diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 19:07:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 19:07:26 -0500 |
commit | 47f521ba18190e4bfbb65ead3977af5756884427 (patch) | |
tree | 54d6039d71149d8596b66a1d41cfd9eb7f334601 | |
parent | b91593fa8531a7396551dd9c0a0c51e9b9b97ca9 (diff) | |
parent | 0868b99c214a3d55486c700de7c3f770b7243e7c (diff) |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD update from Shaohua Li:
"This update mostly includes bug fixes:
- md-cluster now supports raid10 from Guoqing
- raid5 PPL fixes from Artur
- badblock regression fix from Bo
- suspend hang related fixes from Neil
- raid5 reshape fixes from Neil
- raid1 freeze deadlock fix from Nate
- memleak fixes from Zdenek
- bitmap related fixes from Me and Tao
- other fixes and cleanups"
* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (33 commits)
md: free unused memory after bitmap resize
md: release allocated bitset sync_set
md/bitmap: clear BITMAP_WRITE_ERROR bit before writing it to sb
md: be cautious about using ->curr_resync_completed for ->recovery_offset
badblocks: fix wrong return value in badblocks_set if badblocks are disabled
md: don't check MD_SB_CHANGE_CLEAN in md_allow_write
md-cluster: update document for raid10
md: remove redundant variable q
raid1: remove obsolete code in raid1_write_request
md-cluster: Use a small window for raid10 resync
md-cluster: Suspend writes in RAID10 if within range
md-cluster/raid10: set "do_balance = 0" if area is resyncing
md: use lockdep_assert_held
raid1: prevent freeze_array/wait_all_barriers deadlock
md: use TASK_IDLE instead of blocking signals
md: remove special meaning of ->quiesce(.., 2)
md: allow metadata update while suspending.
md: use mddev_suspend/resume instead of ->quiesce()
md: move suspend_hi/lo handling into core md code
md: don't call bitmap_create() while array is quiesced.
...
-rw-r--r-- | Documentation/md/md-cluster.txt | 3 | ||||
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | block/badblocks.c | 2 | ||||
-rw-r--r-- | drivers/md/Kconfig | 5 | ||||
-rw-r--r-- | drivers/md/Makefile | 5 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 12 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c (renamed from drivers/md/bitmap.c) | 27 | ||||
-rw-r--r-- | drivers/md/md-bitmap.h (renamed from drivers/md/bitmap.h) | 0 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 12 | ||||
-rw-r--r-- | drivers/md/md-faulty.c (renamed from drivers/md/faulty.c) | 0 | ||||
-rw-r--r-- | drivers/md/md-linear.c (renamed from drivers/md/linear.c) | 2 | ||||
-rw-r--r-- | drivers/md/md-linear.h (renamed from drivers/md/linear.h) | 0 | ||||
-rw-r--r-- | drivers/md/md-multipath.c (renamed from drivers/md/multipath.c) | 4 | ||||
-rw-r--r-- | drivers/md/md-multipath.h (renamed from drivers/md/multipath.h) | 0 | ||||
-rw-r--r-- | drivers/md/md.c | 147 | ||||
-rw-r--r-- | drivers/md/md.h | 20 | ||||
-rw-r--r-- | drivers/md/raid0.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 78 | ||||
-rw-r--r-- | drivers/md/raid10.c | 169 | ||||
-rw-r--r-- | drivers/md/raid10.h | 6 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 44 | ||||
-rw-r--r-- | drivers/md/raid5-log.h | 2 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 6 | ||||
-rw-r--r-- | drivers/md/raid5.c | 79 |
24 files changed, 409 insertions, 223 deletions
diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt index 82ee51604e9a..e1055f105cf5 100644 --- a/Documentation/md/md-cluster.txt +++ b/Documentation/md/md-cluster.txt | |||
@@ -1,4 +1,5 @@ | |||
1 | The cluster MD is a shared-device RAID for a cluster. | 1 | The cluster MD is a shared-device RAID for a cluster, it supports |
2 | two levels: raid1 and raid10 (limited support). | ||
2 | 3 | ||
3 | 4 | ||
4 | 1. On-disk format | 5 | 1. On-disk format |
diff --git a/MAINTAINERS b/MAINTAINERS index ba3d8c197d92..8604cf64a169 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4103,6 +4103,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git | |||
4103 | T: quilt http://people.redhat.com/agk/patches/linux/editing/ | 4103 | T: quilt http://people.redhat.com/agk/patches/linux/editing/ |
4104 | S: Maintained | 4104 | S: Maintained |
4105 | F: Documentation/device-mapper/ | 4105 | F: Documentation/device-mapper/ |
4106 | F: drivers/md/Makefile | ||
4107 | F: drivers/md/Kconfig | ||
4106 | F: drivers/md/dm* | 4108 | F: drivers/md/dm* |
4107 | F: drivers/md/persistent-data/ | 4109 | F: drivers/md/persistent-data/ |
4108 | F: include/linux/device-mapper.h | 4110 | F: include/linux/device-mapper.h |
@@ -12487,7 +12489,10 @@ M: Shaohua Li <shli@kernel.org> | |||
12487 | L: linux-raid@vger.kernel.org | 12489 | L: linux-raid@vger.kernel.org |
12488 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git | 12490 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git |
12489 | S: Supported | 12491 | S: Supported |
12490 | F: drivers/md/ | 12492 | F: drivers/md/Makefile |
12493 | F: drivers/md/Kconfig | ||
12494 | F: drivers/md/md* | ||
12495 | F: drivers/md/raid* | ||
12491 | F: include/linux/raid/ | 12496 | F: include/linux/raid/ |
12492 | F: include/uapi/linux/raid/ | 12497 | F: include/uapi/linux/raid/ |
12493 | 12498 | ||
diff --git a/block/badblocks.c b/block/badblocks.c index 43c71166e1e2..91f7bcf979d3 100644 --- a/block/badblocks.c +++ b/block/badblocks.c | |||
@@ -178,7 +178,7 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors, | |||
178 | 178 | ||
179 | if (bb->shift < 0) | 179 | if (bb->shift < 0) |
180 | /* badblocks are disabled */ | 180 | /* badblocks are disabled */ |
181 | return 0; | 181 | return 1; |
182 | 182 | ||
183 | if (bb->shift) { | 183 | if (bb->shift) { |
184 | /* round the start down, and the end up */ | 184 | /* round the start down, and the end up */ |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a249ee86364..83b9362be09c 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -178,7 +178,7 @@ config MD_FAULTY | |||
178 | 178 | ||
179 | 179 | ||
180 | config MD_CLUSTER | 180 | config MD_CLUSTER |
181 | tristate "Cluster Support for MD (EXPERIMENTAL)" | 181 | tristate "Cluster Support for MD" |
182 | depends on BLK_DEV_MD | 182 | depends on BLK_DEV_MD |
183 | depends on DLM | 183 | depends on DLM |
184 | default n | 184 | default n |
@@ -188,7 +188,8 @@ config MD_CLUSTER | |||
188 | nodes in the cluster can access the MD devices simultaneously. | 188 | nodes in the cluster can access the MD devices simultaneously. |
189 | 189 | ||
190 | This brings the redundancy (and uptime) of RAID levels across the | 190 | This brings the redundancy (and uptime) of RAID levels across the |
191 | nodes of the cluster. | 191 | nodes of the cluster. Currently, it can work with raid1 and raid10 |
192 | (limited support). | ||
192 | 193 | ||
193 | If unsure, say N. | 194 | If unsure, say N. |
194 | 195 | ||
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e94b6f9be941..f701bb211783 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -19,9 +19,12 @@ dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ | |||
19 | dm-cache-smq-y += dm-cache-policy-smq.o | 19 | dm-cache-smq-y += dm-cache-policy-smq.o |
20 | dm-era-y += dm-era-target.o | 20 | dm-era-y += dm-era-target.o |
21 | dm-verity-y += dm-verity-target.o | 21 | dm-verity-y += dm-verity-target.o |
22 | md-mod-y += md.o bitmap.o | 22 | md-mod-y += md.o md-bitmap.o |
23 | raid456-y += raid5.o raid5-cache.o raid5-ppl.o | 23 | raid456-y += raid5.o raid5-cache.o raid5-ppl.o |
24 | dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o | 24 | dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o |
25 | linear-y += md-linear.o | ||
26 | multipath-y += md-multipath.o | ||
27 | faulty-y += md-faulty.o | ||
25 | 28 | ||
26 | # Note: link order is important. All raid personalities | 29 | # Note: link order is important. All raid personalities |
27 | # and must come before md.o, as they each initialise | 30 | # and must come before md.o, as they each initialise |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index a25eebd98996..366c625b9591 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include "raid1.h" | 12 | #include "raid1.h" |
13 | #include "raid5.h" | 13 | #include "raid5.h" |
14 | #include "raid10.h" | 14 | #include "raid10.h" |
15 | #include "bitmap.h" | 15 | #include "md-bitmap.h" |
16 | 16 | ||
17 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
18 | 18 | ||
@@ -3630,8 +3630,11 @@ static void raid_postsuspend(struct dm_target *ti) | |||
3630 | { | 3630 | { |
3631 | struct raid_set *rs = ti->private; | 3631 | struct raid_set *rs = ti->private; |
3632 | 3632 | ||
3633 | if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) | 3633 | if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { |
3634 | mddev_lock_nointr(&rs->md); | ||
3634 | mddev_suspend(&rs->md); | 3635 | mddev_suspend(&rs->md); |
3636 | mddev_unlock(&rs->md); | ||
3637 | } | ||
3635 | 3638 | ||
3636 | rs->md.ro = 1; | 3639 | rs->md.ro = 1; |
3637 | } | 3640 | } |
@@ -3888,8 +3891,11 @@ static void raid_resume(struct dm_target *ti) | |||
3888 | if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) | 3891 | if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) |
3889 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 3892 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
3890 | 3893 | ||
3891 | if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) | 3894 | if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { |
3895 | mddev_lock_nointr(mddev); | ||
3892 | mddev_resume(mddev); | 3896 | mddev_resume(mddev); |
3897 | mddev_unlock(mddev); | ||
3898 | } | ||
3893 | } | 3899 | } |
3894 | 3900 | ||
3895 | static struct target_type raid_target = { | 3901 | static struct target_type raid_target = { |
diff --git a/drivers/md/bitmap.c b/drivers/md/md-bitmap.c index 4d8ed74efadf..239c7bb3929b 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/md-bitmap.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <linux/seq_file.h> | 29 | #include <linux/seq_file.h> |
30 | #include <trace/events/block.h> | 30 | #include <trace/events/block.h> |
31 | #include "md.h" | 31 | #include "md.h" |
32 | #include "bitmap.h" | 32 | #include "md-bitmap.h" |
33 | 33 | ||
34 | static inline char *bmname(struct bitmap *bitmap) | 34 | static inline char *bmname(struct bitmap *bitmap) |
35 | { | 35 | { |
@@ -459,7 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap) | |||
459 | /* rocking back to read-only */ | 459 | /* rocking back to read-only */ |
460 | bitmap->events_cleared = bitmap->mddev->events; | 460 | bitmap->events_cleared = bitmap->mddev->events; |
461 | sb->events_cleared = cpu_to_le64(bitmap->events_cleared); | 461 | sb->events_cleared = cpu_to_le64(bitmap->events_cleared); |
462 | sb->state = cpu_to_le32(bitmap->flags); | 462 | /* |
463 | * clear BITMAP_WRITE_ERROR bit to protect against the case that | ||
464 | * a bitmap write error occurred but the later writes succeeded. | ||
465 | */ | ||
466 | sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); | ||
463 | /* Just in case these have been changed via sysfs: */ | 467 | /* Just in case these have been changed via sysfs: */ |
464 | sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); | 468 | sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); |
465 | sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); | 469 | sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); |
@@ -625,7 +629,7 @@ re_read: | |||
625 | err = read_sb_page(bitmap->mddev, | 629 | err = read_sb_page(bitmap->mddev, |
626 | offset, | 630 | offset, |
627 | sb_page, | 631 | sb_page, |
628 | 0, PAGE_SIZE); | 632 | 0, sizeof(bitmap_super_t)); |
629 | } | 633 | } |
630 | if (err) | 634 | if (err) |
631 | return err; | 635 | return err; |
@@ -1816,6 +1820,12 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) | |||
1816 | 1820 | ||
1817 | BUG_ON(file && mddev->bitmap_info.offset); | 1821 | BUG_ON(file && mddev->bitmap_info.offset); |
1818 | 1822 | ||
1823 | if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { | ||
1824 | pr_notice("md/raid:%s: array with journal cannot have bitmap\n", | ||
1825 | mdname(mddev)); | ||
1826 | return ERR_PTR(-EBUSY); | ||
1827 | } | ||
1828 | |||
1819 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); | 1829 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); |
1820 | if (!bitmap) | 1830 | if (!bitmap) |
1821 | return ERR_PTR(-ENOMEM); | 1831 | return ERR_PTR(-ENOMEM); |
@@ -2123,7 +2133,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, | |||
2123 | if (store.sb_page && bitmap->storage.sb_page) | 2133 | if (store.sb_page && bitmap->storage.sb_page) |
2124 | memcpy(page_address(store.sb_page), | 2134 | memcpy(page_address(store.sb_page), |
2125 | page_address(bitmap->storage.sb_page), | 2135 | page_address(bitmap->storage.sb_page), |
2126 | PAGE_SIZE); | 2136 | sizeof(bitmap_super_t)); |
2127 | bitmap_file_unmap(&bitmap->storage); | 2137 | bitmap_file_unmap(&bitmap->storage); |
2128 | bitmap->storage = store; | 2138 | bitmap->storage = store; |
2129 | 2139 | ||
@@ -2152,6 +2162,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, | |||
2152 | for (k = 0; k < page; k++) { | 2162 | for (k = 0; k < page; k++) { |
2153 | kfree(new_bp[k].map); | 2163 | kfree(new_bp[k].map); |
2154 | } | 2164 | } |
2165 | kfree(new_bp); | ||
2155 | 2166 | ||
2156 | /* restore some fields from old_counts */ | 2167 | /* restore some fields from old_counts */ |
2157 | bitmap->counts.bp = old_counts.bp; | 2168 | bitmap->counts.bp = old_counts.bp; |
@@ -2202,6 +2213,14 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, | |||
2202 | block += old_blocks; | 2213 | block += old_blocks; |
2203 | } | 2214 | } |
2204 | 2215 | ||
2216 | if (bitmap->counts.bp != old_counts.bp) { | ||
2217 | unsigned long k; | ||
2218 | for (k = 0; k < old_counts.pages; k++) | ||
2219 | if (!old_counts.bp[k].hijacked) | ||
2220 | kfree(old_counts.bp[k].map); | ||
2221 | kfree(old_counts.bp); | ||
2222 | } | ||
2223 | |||
2205 | if (!init) { | 2224 | if (!init) { |
2206 | int i; | 2225 | int i; |
2207 | while (block < (chunks << chunkshift)) { | 2226 | while (block < (chunks << chunkshift)) { |
diff --git a/drivers/md/bitmap.h b/drivers/md/md-bitmap.h index 5df35ca90f58..5df35ca90f58 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/md-bitmap.h | |||
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 03082e17c65c..79bfbc840385 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/raid/md_p.h> | 16 | #include <linux/raid/md_p.h> |
17 | #include "md.h" | 17 | #include "md.h" |
18 | #include "bitmap.h" | 18 | #include "md-bitmap.h" |
19 | #include "md-cluster.h" | 19 | #include "md-cluster.h" |
20 | 20 | ||
21 | #define LVB_SIZE 64 | 21 | #define LVB_SIZE 64 |
@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot) | |||
442 | static void remove_suspend_info(struct mddev *mddev, int slot) | 442 | static void remove_suspend_info(struct mddev *mddev, int slot) |
443 | { | 443 | { |
444 | struct md_cluster_info *cinfo = mddev->cluster_info; | 444 | struct md_cluster_info *cinfo = mddev->cluster_info; |
445 | mddev->pers->quiesce(mddev, 1); | ||
445 | spin_lock_irq(&cinfo->suspend_lock); | 446 | spin_lock_irq(&cinfo->suspend_lock); |
446 | __remove_suspend_info(cinfo, slot); | 447 | __remove_suspend_info(cinfo, slot); |
447 | spin_unlock_irq(&cinfo->suspend_lock); | 448 | spin_unlock_irq(&cinfo->suspend_lock); |
448 | mddev->pers->quiesce(mddev, 2); | 449 | mddev->pers->quiesce(mddev, 0); |
449 | } | 450 | } |
450 | 451 | ||
451 | 452 | ||
@@ -492,13 +493,12 @@ static void process_suspend_info(struct mddev *mddev, | |||
492 | s->lo = lo; | 493 | s->lo = lo; |
493 | s->hi = hi; | 494 | s->hi = hi; |
494 | mddev->pers->quiesce(mddev, 1); | 495 | mddev->pers->quiesce(mddev, 1); |
495 | mddev->pers->quiesce(mddev, 0); | ||
496 | spin_lock_irq(&cinfo->suspend_lock); | 496 | spin_lock_irq(&cinfo->suspend_lock); |
497 | /* Remove existing entry (if exists) before adding */ | 497 | /* Remove existing entry (if exists) before adding */ |
498 | __remove_suspend_info(cinfo, slot); | 498 | __remove_suspend_info(cinfo, slot); |
499 | list_add(&s->list, &cinfo->suspend_list); | 499 | list_add(&s->list, &cinfo->suspend_list); |
500 | spin_unlock_irq(&cinfo->suspend_lock); | 500 | spin_unlock_irq(&cinfo->suspend_lock); |
501 | mddev->pers->quiesce(mddev, 2); | 501 | mddev->pers->quiesce(mddev, 0); |
502 | } | 502 | } |
503 | 503 | ||
504 | static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) | 504 | static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) |
@@ -1094,7 +1094,7 @@ static void metadata_update_cancel(struct mddev *mddev) | |||
1094 | /* | 1094 | /* |
1095 | * return 0 if all the bitmaps have the same sync_size | 1095 | * return 0 if all the bitmaps have the same sync_size |
1096 | */ | 1096 | */ |
1097 | int cluster_check_sync_size(struct mddev *mddev) | 1097 | static int cluster_check_sync_size(struct mddev *mddev) |
1098 | { | 1098 | { |
1099 | int i, rv; | 1099 | int i, rv; |
1100 | bitmap_super_t *sb; | 1100 | bitmap_super_t *sb; |
@@ -1478,7 +1478,7 @@ static struct md_cluster_operations cluster_ops = { | |||
1478 | 1478 | ||
1479 | static int __init cluster_init(void) | 1479 | static int __init cluster_init(void) |
1480 | { | 1480 | { |
1481 | pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n"); | 1481 | pr_warn("md-cluster: support raid1 and raid10 (limited support)\n"); |
1482 | pr_info("Registering Cluster MD functions\n"); | 1482 | pr_info("Registering Cluster MD functions\n"); |
1483 | register_md_cluster_operations(&cluster_ops, THIS_MODULE); | 1483 | register_md_cluster_operations(&cluster_ops, THIS_MODULE); |
1484 | return 0; | 1484 | return 0; |
diff --git a/drivers/md/faulty.c b/drivers/md/md-faulty.c index 38264b38420f..38264b38420f 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/md-faulty.c | |||
diff --git a/drivers/md/linear.c b/drivers/md/md-linear.c index c464fb48039a..773fc70dced7 100644 --- a/drivers/md/linear.c +++ b/drivers/md/md-linear.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <trace/events/block.h> | 24 | #include <trace/events/block.h> |
25 | #include "md.h" | 25 | #include "md.h" |
26 | #include "linear.h" | 26 | #include "md-linear.h" |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * find which device holds a particular offset | 29 | * find which device holds a particular offset |
diff --git a/drivers/md/linear.h b/drivers/md/md-linear.h index 8381d651d4ed..8381d651d4ed 100644 --- a/drivers/md/linear.h +++ b/drivers/md/md-linear.h | |||
diff --git a/drivers/md/multipath.c b/drivers/md/md-multipath.c index b68e0666b9b0..e40065bdbfc8 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/md-multipath.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/seq_file.h> | 25 | #include <linux/seq_file.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include "md.h" | 27 | #include "md.h" |
28 | #include "multipath.h" | 28 | #include "md-multipath.h" |
29 | 29 | ||
30 | #define MAX_WORK_PER_DISK 128 | 30 | #define MAX_WORK_PER_DISK 128 |
31 | 31 | ||
@@ -243,7 +243,6 @@ static void print_multipath_conf (struct mpconf *conf) | |||
243 | static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) | 243 | static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) |
244 | { | 244 | { |
245 | struct mpconf *conf = mddev->private; | 245 | struct mpconf *conf = mddev->private; |
246 | struct request_queue *q; | ||
247 | int err = -EEXIST; | 246 | int err = -EEXIST; |
248 | int path; | 247 | int path; |
249 | struct multipath_info *p; | 248 | struct multipath_info *p; |
@@ -257,7 +256,6 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
257 | 256 | ||
258 | for (path = first; path <= last; path++) | 257 | for (path = first; path <= last; path++) |
259 | if ((p=conf->multipaths+path)->rdev == NULL) { | 258 | if ((p=conf->multipaths+path)->rdev == NULL) { |
260 | q = rdev->bdev->bd_disk->queue; | ||
261 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 259 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
262 | rdev->data_offset << 9); | 260 | rdev->data_offset << 9); |
263 | 261 | ||
diff --git a/drivers/md/multipath.h b/drivers/md/md-multipath.h index 0adb941f485a..0adb941f485a 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/md-multipath.h | |||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 447ddcbc9566..09c3af3dcdca 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -69,7 +69,7 @@ | |||
69 | 69 | ||
70 | #include <trace/events/block.h> | 70 | #include <trace/events/block.h> |
71 | #include "md.h" | 71 | #include "md.h" |
72 | #include "bitmap.h" | 72 | #include "md-bitmap.h" |
73 | #include "md-cluster.h" | 73 | #include "md-cluster.h" |
74 | 74 | ||
75 | #ifndef MODULE | 75 | #ifndef MODULE |
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock); | |||
266 | * call has finished, the bio has been linked into some internal structure | 266 | * call has finished, the bio has been linked into some internal structure |
267 | * and so is visible to ->quiesce(), so we don't need the refcount any more. | 267 | * and so is visible to ->quiesce(), so we don't need the refcount any more. |
268 | */ | 268 | */ |
269 | static bool is_suspended(struct mddev *mddev, struct bio *bio) | ||
270 | { | ||
271 | if (mddev->suspended) | ||
272 | return true; | ||
273 | if (bio_data_dir(bio) != WRITE) | ||
274 | return false; | ||
275 | if (mddev->suspend_lo >= mddev->suspend_hi) | ||
276 | return false; | ||
277 | if (bio->bi_iter.bi_sector >= mddev->suspend_hi) | ||
278 | return false; | ||
279 | if (bio_end_sector(bio) < mddev->suspend_lo) | ||
280 | return false; | ||
281 | return true; | ||
282 | } | ||
283 | |||
269 | void md_handle_request(struct mddev *mddev, struct bio *bio) | 284 | void md_handle_request(struct mddev *mddev, struct bio *bio) |
270 | { | 285 | { |
271 | check_suspended: | 286 | check_suspended: |
272 | rcu_read_lock(); | 287 | rcu_read_lock(); |
273 | if (mddev->suspended) { | 288 | if (is_suspended(mddev, bio)) { |
274 | DEFINE_WAIT(__wait); | 289 | DEFINE_WAIT(__wait); |
275 | for (;;) { | 290 | for (;;) { |
276 | prepare_to_wait(&mddev->sb_wait, &__wait, | 291 | prepare_to_wait(&mddev->sb_wait, &__wait, |
277 | TASK_UNINTERRUPTIBLE); | 292 | TASK_UNINTERRUPTIBLE); |
278 | if (!mddev->suspended) | 293 | if (!is_suspended(mddev, bio)) |
279 | break; | 294 | break; |
280 | rcu_read_unlock(); | 295 | rcu_read_unlock(); |
281 | schedule(); | 296 | schedule(); |
@@ -344,12 +359,17 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) | |||
344 | void mddev_suspend(struct mddev *mddev) | 359 | void mddev_suspend(struct mddev *mddev) |
345 | { | 360 | { |
346 | WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); | 361 | WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); |
362 | lockdep_assert_held(&mddev->reconfig_mutex); | ||
347 | if (mddev->suspended++) | 363 | if (mddev->suspended++) |
348 | return; | 364 | return; |
349 | synchronize_rcu(); | 365 | synchronize_rcu(); |
350 | wake_up(&mddev->sb_wait); | 366 | wake_up(&mddev->sb_wait); |
367 | set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); | ||
368 | smp_mb__after_atomic(); | ||
351 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); | 369 | wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); |
352 | mddev->pers->quiesce(mddev, 1); | 370 | mddev->pers->quiesce(mddev, 1); |
371 | clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); | ||
372 | wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); | ||
353 | 373 | ||
354 | del_timer_sync(&mddev->safemode_timer); | 374 | del_timer_sync(&mddev->safemode_timer); |
355 | } | 375 | } |
@@ -357,6 +377,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend); | |||
357 | 377 | ||
358 | void mddev_resume(struct mddev *mddev) | 378 | void mddev_resume(struct mddev *mddev) |
359 | { | 379 | { |
380 | lockdep_assert_held(&mddev->reconfig_mutex); | ||
360 | if (--mddev->suspended) | 381 | if (--mddev->suspended) |
361 | return; | 382 | return; |
362 | wake_up(&mddev->sb_wait); | 383 | wake_up(&mddev->sb_wait); |
@@ -663,6 +684,7 @@ void mddev_unlock(struct mddev *mddev) | |||
663 | */ | 684 | */ |
664 | spin_lock(&pers_lock); | 685 | spin_lock(&pers_lock); |
665 | md_wakeup_thread(mddev->thread); | 686 | md_wakeup_thread(mddev->thread); |
687 | wake_up(&mddev->sb_wait); | ||
666 | spin_unlock(&pers_lock); | 688 | spin_unlock(&pers_lock); |
667 | } | 689 | } |
668 | EXPORT_SYMBOL_GPL(mddev_unlock); | 690 | EXPORT_SYMBOL_GPL(mddev_unlock); |
@@ -2313,7 +2335,7 @@ static void export_array(struct mddev *mddev) | |||
2313 | 2335 | ||
2314 | static bool set_in_sync(struct mddev *mddev) | 2336 | static bool set_in_sync(struct mddev *mddev) |
2315 | { | 2337 | { |
2316 | WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock)); | 2338 | lockdep_assert_held(&mddev->lock); |
2317 | if (!mddev->in_sync) { | 2339 | if (!mddev->in_sync) { |
2318 | mddev->sync_checkers++; | 2340 | mddev->sync_checkers++; |
2319 | spin_unlock(&mddev->lock); | 2341 | spin_unlock(&mddev->lock); |
@@ -2432,10 +2454,18 @@ repeat: | |||
2432 | } | 2454 | } |
2433 | } | 2455 | } |
2434 | 2456 | ||
2435 | /* First make sure individual recovery_offsets are correct */ | 2457 | /* |
2458 | * First make sure individual recovery_offsets are correct | ||
2459 | * curr_resync_completed can only be used during recovery. | ||
2460 | * During reshape/resync it might use array-addresses rather | ||
2461 | * that device addresses. | ||
2462 | */ | ||
2436 | rdev_for_each(rdev, mddev) { | 2463 | rdev_for_each(rdev, mddev) { |
2437 | if (rdev->raid_disk >= 0 && | 2464 | if (rdev->raid_disk >= 0 && |
2438 | mddev->delta_disks >= 0 && | 2465 | mddev->delta_disks >= 0 && |
2466 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | ||
2467 | test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) && | ||
2468 | !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
2439 | !test_bit(Journal, &rdev->flags) && | 2469 | !test_bit(Journal, &rdev->flags) && |
2440 | !test_bit(In_sync, &rdev->flags) && | 2470 | !test_bit(In_sync, &rdev->flags) && |
2441 | mddev->curr_resync_completed > rdev->recovery_offset) | 2471 | mddev->curr_resync_completed > rdev->recovery_offset) |
@@ -4824,7 +4854,7 @@ suspend_lo_show(struct mddev *mddev, char *page) | |||
4824 | static ssize_t | 4854 | static ssize_t |
4825 | suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) | 4855 | suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) |
4826 | { | 4856 | { |
4827 | unsigned long long old, new; | 4857 | unsigned long long new; |
4828 | int err; | 4858 | int err; |
4829 | 4859 | ||
4830 | err = kstrtoull(buf, 10, &new); | 4860 | err = kstrtoull(buf, 10, &new); |
@@ -4840,16 +4870,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) | |||
4840 | if (mddev->pers == NULL || | 4870 | if (mddev->pers == NULL || |
4841 | mddev->pers->quiesce == NULL) | 4871 | mddev->pers->quiesce == NULL) |
4842 | goto unlock; | 4872 | goto unlock; |
4843 | old = mddev->suspend_lo; | 4873 | mddev_suspend(mddev); |
4844 | mddev->suspend_lo = new; | 4874 | mddev->suspend_lo = new; |
4845 | if (new >= old) | 4875 | mddev_resume(mddev); |
4846 | /* Shrinking suspended region */ | 4876 | |
4847 | mddev->pers->quiesce(mddev, 2); | ||
4848 | else { | ||
4849 | /* Expanding suspended region - need to wait */ | ||
4850 | mddev->pers->quiesce(mddev, 1); | ||
4851 | mddev->pers->quiesce(mddev, 0); | ||
4852 | } | ||
4853 | err = 0; | 4877 | err = 0; |
4854 | unlock: | 4878 | unlock: |
4855 | mddev_unlock(mddev); | 4879 | mddev_unlock(mddev); |
@@ -4867,7 +4891,7 @@ suspend_hi_show(struct mddev *mddev, char *page) | |||
4867 | static ssize_t | 4891 | static ssize_t |
4868 | suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) | 4892 | suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) |
4869 | { | 4893 | { |
4870 | unsigned long long old, new; | 4894 | unsigned long long new; |
4871 | int err; | 4895 | int err; |
4872 | 4896 | ||
4873 | err = kstrtoull(buf, 10, &new); | 4897 | err = kstrtoull(buf, 10, &new); |
@@ -4880,19 +4904,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) | |||
4880 | if (err) | 4904 | if (err) |
4881 | return err; | 4905 | return err; |
4882 | err = -EINVAL; | 4906 | err = -EINVAL; |
4883 | if (mddev->pers == NULL || | 4907 | if (mddev->pers == NULL) |
4884 | mddev->pers->quiesce == NULL) | ||
4885 | goto unlock; | 4908 | goto unlock; |
4886 | old = mddev->suspend_hi; | 4909 | |
4910 | mddev_suspend(mddev); | ||
4887 | mddev->suspend_hi = new; | 4911 | mddev->suspend_hi = new; |
4888 | if (new <= old) | 4912 | mddev_resume(mddev); |
4889 | /* Shrinking suspended region */ | 4913 | |
4890 | mddev->pers->quiesce(mddev, 2); | ||
4891 | else { | ||
4892 | /* Expanding suspended region - need to wait */ | ||
4893 | mddev->pers->quiesce(mddev, 1); | ||
4894 | mddev->pers->quiesce(mddev, 0); | ||
4895 | } | ||
4896 | err = 0; | 4914 | err = 0; |
4897 | unlock: | 4915 | unlock: |
4898 | mddev_unlock(mddev); | 4916 | mddev_unlock(mddev); |
@@ -5834,8 +5852,14 @@ void md_stop(struct mddev *mddev) | |||
5834 | * This is called from dm-raid | 5852 | * This is called from dm-raid |
5835 | */ | 5853 | */ |
5836 | __md_stop(mddev); | 5854 | __md_stop(mddev); |
5837 | if (mddev->bio_set) | 5855 | if (mddev->bio_set) { |
5838 | bioset_free(mddev->bio_set); | 5856 | bioset_free(mddev->bio_set); |
5857 | mddev->bio_set = NULL; | ||
5858 | } | ||
5859 | if (mddev->sync_set) { | ||
5860 | bioset_free(mddev->sync_set); | ||
5861 | mddev->sync_set = NULL; | ||
5862 | } | ||
5839 | } | 5863 | } |
5840 | 5864 | ||
5841 | EXPORT_SYMBOL_GPL(md_stop); | 5865 | EXPORT_SYMBOL_GPL(md_stop); |
@@ -6362,7 +6386,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) | |||
6362 | break; | 6386 | break; |
6363 | } | 6387 | } |
6364 | } | 6388 | } |
6365 | if (has_journal) { | 6389 | if (has_journal || mddev->bitmap) { |
6366 | export_rdev(rdev); | 6390 | export_rdev(rdev); |
6367 | return -EBUSY; | 6391 | return -EBUSY; |
6368 | } | 6392 | } |
@@ -6618,22 +6642,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd) | |||
6618 | return -ENOENT; /* cannot remove what isn't there */ | 6642 | return -ENOENT; /* cannot remove what isn't there */ |
6619 | err = 0; | 6643 | err = 0; |
6620 | if (mddev->pers) { | 6644 | if (mddev->pers) { |
6621 | mddev->pers->quiesce(mddev, 1); | ||
6622 | if (fd >= 0) { | 6645 | if (fd >= 0) { |
6623 | struct bitmap *bitmap; | 6646 | struct bitmap *bitmap; |
6624 | 6647 | ||
6625 | bitmap = bitmap_create(mddev, -1); | 6648 | bitmap = bitmap_create(mddev, -1); |
6649 | mddev_suspend(mddev); | ||
6626 | if (!IS_ERR(bitmap)) { | 6650 | if (!IS_ERR(bitmap)) { |
6627 | mddev->bitmap = bitmap; | 6651 | mddev->bitmap = bitmap; |
6628 | err = bitmap_load(mddev); | 6652 | err = bitmap_load(mddev); |
6629 | } else | 6653 | } else |
6630 | err = PTR_ERR(bitmap); | 6654 | err = PTR_ERR(bitmap); |
6631 | } | 6655 | if (err) { |
6632 | if (fd < 0 || err) { | 6656 | bitmap_destroy(mddev); |
6657 | fd = -1; | ||
6658 | } | ||
6659 | mddev_resume(mddev); | ||
6660 | } else if (fd < 0) { | ||
6661 | mddev_suspend(mddev); | ||
6633 | bitmap_destroy(mddev); | 6662 | bitmap_destroy(mddev); |
6634 | fd = -1; /* make sure to put the file */ | 6663 | mddev_resume(mddev); |
6635 | } | 6664 | } |
6636 | mddev->pers->quiesce(mddev, 0); | ||
6637 | } | 6665 | } |
6638 | if (fd < 0) { | 6666 | if (fd < 0) { |
6639 | struct file *f = mddev->bitmap_info.file; | 6667 | struct file *f = mddev->bitmap_info.file; |
@@ -6735,7 +6763,7 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) | |||
6735 | 6763 | ||
6736 | void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors) | 6764 | void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors) |
6737 | { | 6765 | { |
6738 | WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__); | 6766 | lockdep_assert_held(&mddev->reconfig_mutex); |
6739 | 6767 | ||
6740 | if (mddev->external_size) | 6768 | if (mddev->external_size) |
6741 | return; | 6769 | return; |
@@ -6917,8 +6945,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) | |||
6917 | mddev->bitmap_info.default_offset; | 6945 | mddev->bitmap_info.default_offset; |
6918 | mddev->bitmap_info.space = | 6946 | mddev->bitmap_info.space = |
6919 | mddev->bitmap_info.default_space; | 6947 | mddev->bitmap_info.default_space; |
6920 | mddev->pers->quiesce(mddev, 1); | ||
6921 | bitmap = bitmap_create(mddev, -1); | 6948 | bitmap = bitmap_create(mddev, -1); |
6949 | mddev_suspend(mddev); | ||
6922 | if (!IS_ERR(bitmap)) { | 6950 | if (!IS_ERR(bitmap)) { |
6923 | mddev->bitmap = bitmap; | 6951 | mddev->bitmap = bitmap; |
6924 | rv = bitmap_load(mddev); | 6952 | rv = bitmap_load(mddev); |
@@ -6926,7 +6954,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) | |||
6926 | rv = PTR_ERR(bitmap); | 6954 | rv = PTR_ERR(bitmap); |
6927 | if (rv) | 6955 | if (rv) |
6928 | bitmap_destroy(mddev); | 6956 | bitmap_destroy(mddev); |
6929 | mddev->pers->quiesce(mddev, 0); | 6957 | mddev_resume(mddev); |
6930 | } else { | 6958 | } else { |
6931 | /* remove the bitmap */ | 6959 | /* remove the bitmap */ |
6932 | if (!mddev->bitmap) { | 6960 | if (!mddev->bitmap) { |
@@ -6949,9 +6977,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) | |||
6949 | mddev->bitmap_info.nodes = 0; | 6977 | mddev->bitmap_info.nodes = 0; |
6950 | md_cluster_ops->leave(mddev); | 6978 | md_cluster_ops->leave(mddev); |
6951 | } | 6979 | } |
6952 | mddev->pers->quiesce(mddev, 1); | 6980 | mddev_suspend(mddev); |
6953 | bitmap_destroy(mddev); | 6981 | bitmap_destroy(mddev); |
6954 | mddev->pers->quiesce(mddev, 0); | 6982 | mddev_resume(mddev); |
6955 | mddev->bitmap_info.offset = 0; | 6983 | mddev->bitmap_info.offset = 0; |
6956 | } | 6984 | } |
6957 | } | 6985 | } |
@@ -7468,8 +7496,8 @@ void md_wakeup_thread(struct md_thread *thread) | |||
7468 | { | 7496 | { |
7469 | if (thread) { | 7497 | if (thread) { |
7470 | pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); | 7498 | pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); |
7471 | if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) | 7499 | set_bit(THREAD_WAKEUP, &thread->flags); |
7472 | wake_up(&thread->wqueue); | 7500 | wake_up(&thread->wqueue); |
7473 | } | 7501 | } |
7474 | } | 7502 | } |
7475 | EXPORT_SYMBOL(md_wakeup_thread); | 7503 | EXPORT_SYMBOL(md_wakeup_thread); |
@@ -8039,7 +8067,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) | |||
8039 | if (did_change) | 8067 | if (did_change) |
8040 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 8068 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
8041 | wait_event(mddev->sb_wait, | 8069 | wait_event(mddev->sb_wait, |
8042 | !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended); | 8070 | !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || |
8071 | mddev->suspended); | ||
8043 | if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { | 8072 | if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { |
8044 | percpu_ref_put(&mddev->writes_pending); | 8073 | percpu_ref_put(&mddev->writes_pending); |
8045 | return false; | 8074 | return false; |
@@ -8110,7 +8139,6 @@ void md_allow_write(struct mddev *mddev) | |||
8110 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 8139 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
8111 | /* wait for the dirty state to be recorded in the metadata */ | 8140 | /* wait for the dirty state to be recorded in the metadata */ |
8112 | wait_event(mddev->sb_wait, | 8141 | wait_event(mddev->sb_wait, |
8113 | !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) && | ||
8114 | !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); | 8142 | !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); |
8115 | } else | 8143 | } else |
8116 | spin_unlock(&mddev->lock); | 8144 | spin_unlock(&mddev->lock); |
@@ -8477,16 +8505,19 @@ void md_do_sync(struct md_thread *thread) | |||
8477 | } else { | 8505 | } else { |
8478 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 8506 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
8479 | mddev->curr_resync = MaxSector; | 8507 | mddev->curr_resync = MaxSector; |
8480 | rcu_read_lock(); | 8508 | if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && |
8481 | rdev_for_each_rcu(rdev, mddev) | 8509 | test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { |
8482 | if (rdev->raid_disk >= 0 && | 8510 | rcu_read_lock(); |
8483 | mddev->delta_disks >= 0 && | 8511 | rdev_for_each_rcu(rdev, mddev) |
8484 | !test_bit(Journal, &rdev->flags) && | 8512 | if (rdev->raid_disk >= 0 && |
8485 | !test_bit(Faulty, &rdev->flags) && | 8513 | mddev->delta_disks >= 0 && |
8486 | !test_bit(In_sync, &rdev->flags) && | 8514 | !test_bit(Journal, &rdev->flags) && |
8487 | rdev->recovery_offset < mddev->curr_resync) | 8515 | !test_bit(Faulty, &rdev->flags) && |
8488 | rdev->recovery_offset = mddev->curr_resync; | 8516 | !test_bit(In_sync, &rdev->flags) && |
8489 | rcu_read_unlock(); | 8517 | rdev->recovery_offset < mddev->curr_resync) |
8518 | rdev->recovery_offset = mddev->curr_resync; | ||
8519 | rcu_read_unlock(); | ||
8520 | } | ||
8490 | } | 8521 | } |
8491 | } | 8522 | } |
8492 | skip: | 8523 | skip: |
@@ -8813,6 +8844,16 @@ void md_check_recovery(struct mddev *mddev) | |||
8813 | unlock: | 8844 | unlock: |
8814 | wake_up(&mddev->sb_wait); | 8845 | wake_up(&mddev->sb_wait); |
8815 | mddev_unlock(mddev); | 8846 | mddev_unlock(mddev); |
8847 | } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { | ||
8848 | /* Write superblock - thread that called mddev_suspend() | ||
8849 | * holds reconfig_mutex for us. | ||
8850 | */ | ||
8851 | set_bit(MD_UPDATING_SB, &mddev->flags); | ||
8852 | smp_mb__after_atomic(); | ||
8853 | if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) | ||
8854 | md_update_sb(mddev, 0); | ||
8855 | clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); | ||
8856 | wake_up(&mddev->sb_wait); | ||
8816 | } | 8857 | } |
8817 | } | 8858 | } |
8818 | EXPORT_SYMBOL(md_check_recovery); | 8859 | EXPORT_SYMBOL(md_check_recovery); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index d8287d3cd1bf..7d6bcf0eba0c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -237,6 +237,12 @@ enum mddev_flags { | |||
237 | */ | 237 | */ |
238 | MD_HAS_PPL, /* The raid array has PPL feature set */ | 238 | MD_HAS_PPL, /* The raid array has PPL feature set */ |
239 | MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ | 239 | MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */ |
240 | MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update | ||
241 | * the metadata without taking reconfig_mutex. | ||
242 | */ | ||
243 | MD_UPDATING_SB, /* md_check_recovery is updating the metadata | ||
244 | * without explicitly holding reconfig_mutex. | ||
245 | */ | ||
240 | }; | 246 | }; |
241 | 247 | ||
242 | enum mddev_sb_flags { | 248 | enum mddev_sb_flags { |
@@ -494,11 +500,6 @@ static inline void mddev_lock_nointr(struct mddev *mddev) | |||
494 | mutex_lock(&mddev->reconfig_mutex); | 500 | mutex_lock(&mddev->reconfig_mutex); |
495 | } | 501 | } |
496 | 502 | ||
497 | static inline int mddev_is_locked(struct mddev *mddev) | ||
498 | { | ||
499 | return mutex_is_locked(&mddev->reconfig_mutex); | ||
500 | } | ||
501 | |||
502 | static inline int mddev_trylock(struct mddev *mddev) | 503 | static inline int mddev_trylock(struct mddev *mddev) |
503 | { | 504 | { |
504 | return mutex_trylock(&mddev->reconfig_mutex); | 505 | return mutex_trylock(&mddev->reconfig_mutex); |
@@ -538,12 +539,11 @@ struct md_personality | |||
538 | int (*check_reshape) (struct mddev *mddev); | 539 | int (*check_reshape) (struct mddev *mddev); |
539 | int (*start_reshape) (struct mddev *mddev); | 540 | int (*start_reshape) (struct mddev *mddev); |
540 | void (*finish_reshape) (struct mddev *mddev); | 541 | void (*finish_reshape) (struct mddev *mddev); |
541 | /* quiesce moves between quiescence states | 542 | /* quiesce suspends or resumes internal processing. |
542 | * 0 - fully active | 543 | * 1 - stop new actions and wait for action io to complete |
543 | * 1 - no new requests allowed | 544 | * 0 - return to normal behaviour |
544 | * others - reserved | ||
545 | */ | 545 | */ |
546 | void (*quiesce) (struct mddev *mddev, int state); | 546 | void (*quiesce) (struct mddev *mddev, int quiesce); |
547 | /* takeover is used to transition an array from one | 547 | /* takeover is used to transition an array from one |
548 | * personality to another. The new personality must be able | 548 | * personality to another. The new personality must be able |
549 | * to handle the data in the current layout. | 549 | * to handle the data in the current layout. |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 5a00fc118470..5ecba9eef441 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev *mddev) | |||
768 | return ERR_PTR(-EINVAL); | 768 | return ERR_PTR(-EINVAL); |
769 | } | 769 | } |
770 | 770 | ||
771 | static void raid0_quiesce(struct mddev *mddev, int state) | 771 | static void raid0_quiesce(struct mddev *mddev, int quiesce) |
772 | { | 772 | { |
773 | } | 773 | } |
774 | 774 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f3f3e40dc9d8..cc9d337a1ed3 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -37,13 +37,12 @@ | |||
37 | #include <linux/module.h> | 37 | #include <linux/module.h> |
38 | #include <linux/seq_file.h> | 38 | #include <linux/seq_file.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | #include <linux/sched/signal.h> | ||
41 | 40 | ||
42 | #include <trace/events/block.h> | 41 | #include <trace/events/block.h> |
43 | 42 | ||
44 | #include "md.h" | 43 | #include "md.h" |
45 | #include "raid1.h" | 44 | #include "raid1.h" |
46 | #include "bitmap.h" | 45 | #include "md-bitmap.h" |
47 | 46 | ||
48 | #define UNSUPPORTED_MDDEV_FLAGS \ | 47 | #define UNSUPPORTED_MDDEV_FLAGS \ |
49 | ((1L << MD_HAS_JOURNAL) | \ | 48 | ((1L << MD_HAS_JOURNAL) | \ |
@@ -990,14 +989,6 @@ static void wait_barrier(struct r1conf *conf, sector_t sector_nr) | |||
990 | _wait_barrier(conf, idx); | 989 | _wait_barrier(conf, idx); |
991 | } | 990 | } |
992 | 991 | ||
993 | static void wait_all_barriers(struct r1conf *conf) | ||
994 | { | ||
995 | int idx; | ||
996 | |||
997 | for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) | ||
998 | _wait_barrier(conf, idx); | ||
999 | } | ||
1000 | |||
1001 | static void _allow_barrier(struct r1conf *conf, int idx) | 992 | static void _allow_barrier(struct r1conf *conf, int idx) |
1002 | { | 993 | { |
1003 | atomic_dec(&conf->nr_pending[idx]); | 994 | atomic_dec(&conf->nr_pending[idx]); |
@@ -1011,14 +1002,6 @@ static void allow_barrier(struct r1conf *conf, sector_t sector_nr) | |||
1011 | _allow_barrier(conf, idx); | 1002 | _allow_barrier(conf, idx); |
1012 | } | 1003 | } |
1013 | 1004 | ||
1014 | static void allow_all_barriers(struct r1conf *conf) | ||
1015 | { | ||
1016 | int idx; | ||
1017 | |||
1018 | for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) | ||
1019 | _allow_barrier(conf, idx); | ||
1020 | } | ||
1021 | |||
1022 | /* conf->resync_lock should be held */ | 1005 | /* conf->resync_lock should be held */ |
1023 | static int get_unqueued_pending(struct r1conf *conf) | 1006 | static int get_unqueued_pending(struct r1conf *conf) |
1024 | { | 1007 | { |
@@ -1303,42 +1286,28 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, | |||
1303 | int first_clone; | 1286 | int first_clone; |
1304 | int max_sectors; | 1287 | int max_sectors; |
1305 | 1288 | ||
1306 | /* | 1289 | if (mddev_is_clustered(mddev) && |
1307 | * Register the new request and wait if the reconstruction | ||
1308 | * thread has put up a bar for new requests. | ||
1309 | * Continue immediately if no resync is active currently. | ||
1310 | */ | ||
1311 | |||
1312 | |||
1313 | if ((bio_end_sector(bio) > mddev->suspend_lo && | ||
1314 | bio->bi_iter.bi_sector < mddev->suspend_hi) || | ||
1315 | (mddev_is_clustered(mddev) && | ||
1316 | md_cluster_ops->area_resyncing(mddev, WRITE, | 1290 | md_cluster_ops->area_resyncing(mddev, WRITE, |
1317 | bio->bi_iter.bi_sector, bio_end_sector(bio)))) { | 1291 | bio->bi_iter.bi_sector, bio_end_sector(bio))) { |
1318 | 1292 | ||
1319 | /* | ||
1320 | * As the suspend_* range is controlled by userspace, we want | ||
1321 | * an interruptible wait. | ||
1322 | */ | ||
1323 | DEFINE_WAIT(w); | 1293 | DEFINE_WAIT(w); |
1324 | for (;;) { | 1294 | for (;;) { |
1325 | sigset_t full, old; | ||
1326 | prepare_to_wait(&conf->wait_barrier, | 1295 | prepare_to_wait(&conf->wait_barrier, |
1327 | &w, TASK_INTERRUPTIBLE); | 1296 | &w, TASK_IDLE); |
1328 | if (bio_end_sector(bio) <= mddev->suspend_lo || | 1297 | if (!md_cluster_ops->area_resyncing(mddev, WRITE, |
1329 | bio->bi_iter.bi_sector >= mddev->suspend_hi || | 1298 | bio->bi_iter.bi_sector, |
1330 | (mddev_is_clustered(mddev) && | 1299 | bio_end_sector(bio))) |
1331 | !md_cluster_ops->area_resyncing(mddev, WRITE, | ||
1332 | bio->bi_iter.bi_sector, | ||
1333 | bio_end_sector(bio)))) | ||
1334 | break; | 1300 | break; |
1335 | sigfillset(&full); | ||
1336 | sigprocmask(SIG_BLOCK, &full, &old); | ||
1337 | schedule(); | 1301 | schedule(); |
1338 | sigprocmask(SIG_SETMASK, &old, NULL); | ||
1339 | } | 1302 | } |
1340 | finish_wait(&conf->wait_barrier, &w); | 1303 | finish_wait(&conf->wait_barrier, &w); |
1341 | } | 1304 | } |
1305 | |||
1306 | /* | ||
1307 | * Register the new request and wait if the reconstruction | ||
1308 | * thread has put up a bar for new requests. | ||
1309 | * Continue immediately if no resync is active currently. | ||
1310 | */ | ||
1342 | wait_barrier(conf, bio->bi_iter.bi_sector); | 1311 | wait_barrier(conf, bio->bi_iter.bi_sector); |
1343 | 1312 | ||
1344 | r1_bio = alloc_r1bio(mddev, bio); | 1313 | r1_bio = alloc_r1bio(mddev, bio); |
@@ -1654,8 +1623,12 @@ static void print_conf(struct r1conf *conf) | |||
1654 | 1623 | ||
1655 | static void close_sync(struct r1conf *conf) | 1624 | static void close_sync(struct r1conf *conf) |
1656 | { | 1625 | { |
1657 | wait_all_barriers(conf); | 1626 | int idx; |
1658 | allow_all_barriers(conf); | 1627 | |
1628 | for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) { | ||
1629 | _wait_barrier(conf, idx); | ||
1630 | _allow_barrier(conf, idx); | ||
1631 | } | ||
1659 | 1632 | ||
1660 | mempool_destroy(conf->r1buf_pool); | 1633 | mempool_destroy(conf->r1buf_pool); |
1661 | conf->r1buf_pool = NULL; | 1634 | conf->r1buf_pool = NULL; |
@@ -3277,21 +3250,14 @@ static int raid1_reshape(struct mddev *mddev) | |||
3277 | return 0; | 3250 | return 0; |
3278 | } | 3251 | } |
3279 | 3252 | ||
3280 | static void raid1_quiesce(struct mddev *mddev, int state) | 3253 | static void raid1_quiesce(struct mddev *mddev, int quiesce) |
3281 | { | 3254 | { |
3282 | struct r1conf *conf = mddev->private; | 3255 | struct r1conf *conf = mddev->private; |
3283 | 3256 | ||
3284 | switch(state) { | 3257 | if (quiesce) |
3285 | case 2: /* wake for suspend */ | ||
3286 | wake_up(&conf->wait_barrier); | ||
3287 | break; | ||
3288 | case 1: | ||
3289 | freeze_array(conf, 0); | 3258 | freeze_array(conf, 0); |
3290 | break; | 3259 | else |
3291 | case 0: | ||
3292 | unfreeze_array(conf); | 3260 | unfreeze_array(conf); |
3293 | break; | ||
3294 | } | ||
3295 | } | 3261 | } |
3296 | 3262 | ||
3297 | static void *raid1_takeover(struct mddev *mddev) | 3263 | static void *raid1_takeover(struct mddev *mddev) |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 374df5796649..b9edbc747a95 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include "md.h" | 29 | #include "md.h" |
30 | #include "raid10.h" | 30 | #include "raid10.h" |
31 | #include "raid0.h" | 31 | #include "raid0.h" |
32 | #include "bitmap.h" | 32 | #include "md-bitmap.h" |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * RAID10 provides a combination of RAID0 and RAID1 functionality. | 35 | * RAID10 provides a combination of RAID0 and RAID1 functionality. |
@@ -136,10 +136,13 @@ static void r10bio_pool_free(void *r10_bio, void *data) | |||
136 | kfree(r10_bio); | 136 | kfree(r10_bio); |
137 | } | 137 | } |
138 | 138 | ||
139 | #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) | ||
139 | /* amount of memory to reserve for resync requests */ | 140 | /* amount of memory to reserve for resync requests */ |
140 | #define RESYNC_WINDOW (1024*1024) | 141 | #define RESYNC_WINDOW (1024*1024) |
141 | /* maximum number of concurrent requests, memory permitting */ | 142 | /* maximum number of concurrent requests, memory permitting */ |
142 | #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) | 143 | #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) |
144 | #define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) | ||
145 | #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) | ||
143 | 146 | ||
144 | /* | 147 | /* |
145 | * When performing a resync, we need to read and compare, so | 148 | * When performing a resync, we need to read and compare, so |
@@ -383,12 +386,11 @@ static void raid10_end_read_request(struct bio *bio) | |||
383 | { | 386 | { |
384 | int uptodate = !bio->bi_status; | 387 | int uptodate = !bio->bi_status; |
385 | struct r10bio *r10_bio = bio->bi_private; | 388 | struct r10bio *r10_bio = bio->bi_private; |
386 | int slot, dev; | 389 | int slot; |
387 | struct md_rdev *rdev; | 390 | struct md_rdev *rdev; |
388 | struct r10conf *conf = r10_bio->mddev->private; | 391 | struct r10conf *conf = r10_bio->mddev->private; |
389 | 392 | ||
390 | slot = r10_bio->read_slot; | 393 | slot = r10_bio->read_slot; |
391 | dev = r10_bio->devs[slot].devnum; | ||
392 | rdev = r10_bio->devs[slot].rdev; | 394 | rdev = r10_bio->devs[slot].rdev; |
393 | /* | 395 | /* |
394 | * this branch is our 'one mirror IO has finished' event handler: | 396 | * this branch is our 'one mirror IO has finished' event handler: |
@@ -748,7 +750,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
748 | 750 | ||
749 | raid10_find_phys(conf, r10_bio); | 751 | raid10_find_phys(conf, r10_bio); |
750 | rcu_read_lock(); | 752 | rcu_read_lock(); |
751 | sectors = r10_bio->sectors; | ||
752 | best_slot = -1; | 753 | best_slot = -1; |
753 | best_rdev = NULL; | 754 | best_rdev = NULL; |
754 | best_dist = MaxSector; | 755 | best_dist = MaxSector; |
@@ -761,8 +762,11 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
761 | * the resync window. We take the first readable disk when | 762 | * the resync window. We take the first readable disk when |
762 | * above the resync window. | 763 | * above the resync window. |
763 | */ | 764 | */ |
764 | if (conf->mddev->recovery_cp < MaxSector | 765 | if ((conf->mddev->recovery_cp < MaxSector |
765 | && (this_sector + sectors >= conf->next_resync)) | 766 | && (this_sector + sectors >= conf->next_resync)) || |
767 | (mddev_is_clustered(conf->mddev) && | ||
768 | md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, | ||
769 | this_sector + sectors))) | ||
766 | do_balance = 0; | 770 | do_balance = 0; |
767 | 771 | ||
768 | for (slot = 0; slot < conf->copies ; slot++) { | 772 | for (slot = 0; slot < conf->copies ; slot++) { |
@@ -1293,6 +1297,22 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, | |||
1293 | sector_t sectors; | 1297 | sector_t sectors; |
1294 | int max_sectors; | 1298 | int max_sectors; |
1295 | 1299 | ||
1300 | if ((mddev_is_clustered(mddev) && | ||
1301 | md_cluster_ops->area_resyncing(mddev, WRITE, | ||
1302 | bio->bi_iter.bi_sector, | ||
1303 | bio_end_sector(bio)))) { | ||
1304 | DEFINE_WAIT(w); | ||
1305 | for (;;) { | ||
1306 | prepare_to_wait(&conf->wait_barrier, | ||
1307 | &w, TASK_IDLE); | ||
1308 | if (!md_cluster_ops->area_resyncing(mddev, WRITE, | ||
1309 | bio->bi_iter.bi_sector, bio_end_sector(bio))) | ||
1310 | break; | ||
1311 | schedule(); | ||
1312 | } | ||
1313 | finish_wait(&conf->wait_barrier, &w); | ||
1314 | } | ||
1315 | |||
1296 | /* | 1316 | /* |
1297 | * Register the new request and wait if the reconstruction | 1317 | * Register the new request and wait if the reconstruction |
1298 | * thread has put up a bar for new requests. | 1318 | * thread has put up a bar for new requests. |
@@ -2575,7 +2595,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) | |||
2575 | struct bio *bio; | 2595 | struct bio *bio; |
2576 | struct r10conf *conf = mddev->private; | 2596 | struct r10conf *conf = mddev->private; |
2577 | struct md_rdev *rdev = r10_bio->devs[slot].rdev; | 2597 | struct md_rdev *rdev = r10_bio->devs[slot].rdev; |
2578 | sector_t bio_last_sector; | ||
2579 | 2598 | ||
2580 | /* we got a read error. Maybe the drive is bad. Maybe just | 2599 | /* we got a read error. Maybe the drive is bad. Maybe just |
2581 | * the block and we can fix it. | 2600 | * the block and we can fix it. |
@@ -2586,7 +2605,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) | |||
2586 | * frozen. | 2605 | * frozen. |
2587 | */ | 2606 | */ |
2588 | bio = r10_bio->devs[slot].bio; | 2607 | bio = r10_bio->devs[slot].bio; |
2589 | bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors; | ||
2590 | bio_put(bio); | 2608 | bio_put(bio); |
2591 | r10_bio->devs[slot].bio = NULL; | 2609 | r10_bio->devs[slot].bio = NULL; |
2592 | 2610 | ||
@@ -2826,6 +2844,43 @@ static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) | |||
2826 | } | 2844 | } |
2827 | 2845 | ||
2828 | /* | 2846 | /* |
2847 | * Set cluster_sync_high since we need other nodes to add the | ||
2848 | * range [cluster_sync_low, cluster_sync_high] to suspend list. | ||
2849 | */ | ||
2850 | static void raid10_set_cluster_sync_high(struct r10conf *conf) | ||
2851 | { | ||
2852 | sector_t window_size; | ||
2853 | int extra_chunk, chunks; | ||
2854 | |||
2855 | /* | ||
2856 | * First, here we define "stripe" as a unit which across | ||
2857 | * all member devices one time, so we get chunks by use | ||
2858 | * raid_disks / near_copies. Otherwise, if near_copies is | ||
2859 | * close to raid_disks, then resync window could increases | ||
2860 | * linearly with the increase of raid_disks, which means | ||
2861 | * we will suspend a really large IO window while it is not | ||
2862 | * necessary. If raid_disks is not divisible by near_copies, | ||
2863 | * an extra chunk is needed to ensure the whole "stripe" is | ||
2864 | * covered. | ||
2865 | */ | ||
2866 | |||
2867 | chunks = conf->geo.raid_disks / conf->geo.near_copies; | ||
2868 | if (conf->geo.raid_disks % conf->geo.near_copies == 0) | ||
2869 | extra_chunk = 0; | ||
2870 | else | ||
2871 | extra_chunk = 1; | ||
2872 | window_size = (chunks + extra_chunk) * conf->mddev->chunk_sectors; | ||
2873 | |||
2874 | /* | ||
2875 | * At least use a 32M window to align with raid1's resync window | ||
2876 | */ | ||
2877 | window_size = (CLUSTER_RESYNC_WINDOW_SECTORS > window_size) ? | ||
2878 | CLUSTER_RESYNC_WINDOW_SECTORS : window_size; | ||
2879 | |||
2880 | conf->cluster_sync_high = conf->cluster_sync_low + window_size; | ||
2881 | } | ||
2882 | |||
2883 | /* | ||
2829 | * perform a "sync" on one "block" | 2884 | * perform a "sync" on one "block" |
2830 | * | 2885 | * |
2831 | * We need to make sure that no normal I/O request - particularly write | 2886 | * We need to make sure that no normal I/O request - particularly write |
@@ -2897,6 +2952,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2897 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 2952 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
2898 | max_sector = mddev->resync_max_sectors; | 2953 | max_sector = mddev->resync_max_sectors; |
2899 | if (sector_nr >= max_sector) { | 2954 | if (sector_nr >= max_sector) { |
2955 | conf->cluster_sync_low = 0; | ||
2956 | conf->cluster_sync_high = 0; | ||
2957 | |||
2900 | /* If we aborted, we need to abort the | 2958 | /* If we aborted, we need to abort the |
2901 | * sync on the 'current' bitmap chucks (there can | 2959 | * sync on the 'current' bitmap chucks (there can |
2902 | * be several when recovering multiple devices). | 2960 | * be several when recovering multiple devices). |
@@ -3251,7 +3309,17 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3251 | /* resync. Schedule a read for every block at this virt offset */ | 3309 | /* resync. Schedule a read for every block at this virt offset */ |
3252 | int count = 0; | 3310 | int count = 0; |
3253 | 3311 | ||
3254 | bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0); | 3312 | /* |
3313 | * Since curr_resync_completed could probably not update in | ||
3314 | * time, and we will set cluster_sync_low based on it. | ||
3315 | * Let's check against "sector_nr + 2 * RESYNC_SECTORS" for | ||
3316 | * safety reason, which ensures curr_resync_completed is | ||
3317 | * updated in bitmap_cond_end_sync. | ||
3318 | */ | ||
3319 | bitmap_cond_end_sync(mddev->bitmap, sector_nr, | ||
3320 | mddev_is_clustered(mddev) && | ||
3321 | (sector_nr + 2 * RESYNC_SECTORS > | ||
3322 | conf->cluster_sync_high)); | ||
3255 | 3323 | ||
3256 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, | 3324 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, |
3257 | &sync_blocks, mddev->degraded) && | 3325 | &sync_blocks, mddev->degraded) && |
@@ -3385,6 +3453,52 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3385 | } while (++page_idx < RESYNC_PAGES); | 3453 | } while (++page_idx < RESYNC_PAGES); |
3386 | r10_bio->sectors = nr_sectors; | 3454 | r10_bio->sectors = nr_sectors; |
3387 | 3455 | ||
3456 | if (mddev_is_clustered(mddev) && | ||
3457 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
3458 | /* It is resync not recovery */ | ||
3459 | if (conf->cluster_sync_high < sector_nr + nr_sectors) { | ||
3460 | conf->cluster_sync_low = mddev->curr_resync_completed; | ||
3461 | raid10_set_cluster_sync_high(conf); | ||
3462 | /* Send resync message */ | ||
3463 | md_cluster_ops->resync_info_update(mddev, | ||
3464 | conf->cluster_sync_low, | ||
3465 | conf->cluster_sync_high); | ||
3466 | } | ||
3467 | } else if (mddev_is_clustered(mddev)) { | ||
3468 | /* This is recovery not resync */ | ||
3469 | sector_t sect_va1, sect_va2; | ||
3470 | bool broadcast_msg = false; | ||
3471 | |||
3472 | for (i = 0; i < conf->geo.raid_disks; i++) { | ||
3473 | /* | ||
3474 | * sector_nr is a device address for recovery, so we | ||
3475 | * need translate it to array address before compare | ||
3476 | * with cluster_sync_high. | ||
3477 | */ | ||
3478 | sect_va1 = raid10_find_virt(conf, sector_nr, i); | ||
3479 | |||
3480 | if (conf->cluster_sync_high < sect_va1 + nr_sectors) { | ||
3481 | broadcast_msg = true; | ||
3482 | /* | ||
3483 | * curr_resync_completed is similar as | ||
3484 | * sector_nr, so make the translation too. | ||
3485 | */ | ||
3486 | sect_va2 = raid10_find_virt(conf, | ||
3487 | mddev->curr_resync_completed, i); | ||
3488 | |||
3489 | if (conf->cluster_sync_low == 0 || | ||
3490 | conf->cluster_sync_low > sect_va2) | ||
3491 | conf->cluster_sync_low = sect_va2; | ||
3492 | } | ||
3493 | } | ||
3494 | if (broadcast_msg) { | ||
3495 | raid10_set_cluster_sync_high(conf); | ||
3496 | md_cluster_ops->resync_info_update(mddev, | ||
3497 | conf->cluster_sync_low, | ||
3498 | conf->cluster_sync_high); | ||
3499 | } | ||
3500 | } | ||
3501 | |||
3388 | while (biolist) { | 3502 | while (biolist) { |
3389 | bio = biolist; | 3503 | bio = biolist; |
3390 | biolist = biolist->bi_next; | 3504 | biolist = biolist->bi_next; |
@@ -3644,6 +3758,18 @@ static int raid10_run(struct mddev *mddev) | |||
3644 | if (!conf) | 3758 | if (!conf) |
3645 | goto out; | 3759 | goto out; |
3646 | 3760 | ||
3761 | if (mddev_is_clustered(conf->mddev)) { | ||
3762 | int fc, fo; | ||
3763 | |||
3764 | fc = (mddev->layout >> 8) & 255; | ||
3765 | fo = mddev->layout & (1<<16); | ||
3766 | if (fc > 1 || fo > 0) { | ||
3767 | pr_err("only near layout is supported by clustered" | ||
3768 | " raid10\n"); | ||
3769 | goto out; | ||
3770 | } | ||
3771 | } | ||
3772 | |||
3647 | mddev->thread = conf->thread; | 3773 | mddev->thread = conf->thread; |
3648 | conf->thread = NULL; | 3774 | conf->thread = NULL; |
3649 | 3775 | ||
@@ -3832,18 +3958,14 @@ static void raid10_free(struct mddev *mddev, void *priv) | |||
3832 | kfree(conf); | 3958 | kfree(conf); |
3833 | } | 3959 | } |
3834 | 3960 | ||
3835 | static void raid10_quiesce(struct mddev *mddev, int state) | 3961 | static void raid10_quiesce(struct mddev *mddev, int quiesce) |
3836 | { | 3962 | { |
3837 | struct r10conf *conf = mddev->private; | 3963 | struct r10conf *conf = mddev->private; |
3838 | 3964 | ||
3839 | switch(state) { | 3965 | if (quiesce) |
3840 | case 1: | ||
3841 | raise_barrier(conf, 0); | 3966 | raise_barrier(conf, 0); |
3842 | break; | 3967 | else |
3843 | case 0: | ||
3844 | lower_barrier(conf); | 3968 | lower_barrier(conf); |
3845 | break; | ||
3846 | } | ||
3847 | } | 3969 | } |
3848 | 3970 | ||
3849 | static int raid10_resize(struct mddev *mddev, sector_t sectors) | 3971 | static int raid10_resize(struct mddev *mddev, sector_t sectors) |
@@ -4578,15 +4700,18 @@ static int handle_reshape_read_error(struct mddev *mddev, | |||
4578 | /* Use sync reads to get the blocks from somewhere else */ | 4700 | /* Use sync reads to get the blocks from somewhere else */ |
4579 | int sectors = r10_bio->sectors; | 4701 | int sectors = r10_bio->sectors; |
4580 | struct r10conf *conf = mddev->private; | 4702 | struct r10conf *conf = mddev->private; |
4581 | struct { | 4703 | struct r10bio *r10b; |
4582 | struct r10bio r10_bio; | ||
4583 | struct r10dev devs[conf->copies]; | ||
4584 | } on_stack; | ||
4585 | struct r10bio *r10b = &on_stack.r10_bio; | ||
4586 | int slot = 0; | 4704 | int slot = 0; |
4587 | int idx = 0; | 4705 | int idx = 0; |
4588 | struct page **pages; | 4706 | struct page **pages; |
4589 | 4707 | ||
4708 | r10b = kmalloc(sizeof(*r10b) + | ||
4709 | sizeof(struct r10dev) * conf->copies, GFP_NOIO); | ||
4710 | if (!r10b) { | ||
4711 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
4712 | return -ENOMEM; | ||
4713 | } | ||
4714 | |||
4590 | /* reshape IOs share pages from .devs[0].bio */ | 4715 | /* reshape IOs share pages from .devs[0].bio */ |
4591 | pages = get_resync_pages(r10_bio->devs[0].bio)->pages; | 4716 | pages = get_resync_pages(r10_bio->devs[0].bio)->pages; |
4592 | 4717 | ||
@@ -4635,11 +4760,13 @@ static int handle_reshape_read_error(struct mddev *mddev, | |||
4635 | /* couldn't read this block, must give up */ | 4760 | /* couldn't read this block, must give up */ |
4636 | set_bit(MD_RECOVERY_INTR, | 4761 | set_bit(MD_RECOVERY_INTR, |
4637 | &mddev->recovery); | 4762 | &mddev->recovery); |
4763 | kfree(r10b); | ||
4638 | return -EIO; | 4764 | return -EIO; |
4639 | } | 4765 | } |
4640 | sectors -= s; | 4766 | sectors -= s; |
4641 | idx++; | 4767 | idx++; |
4642 | } | 4768 | } |
4769 | kfree(r10b); | ||
4643 | return 0; | 4770 | return 0; |
4644 | } | 4771 | } |
4645 | 4772 | ||
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index abceccab6671..db2ac22ac1b4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -89,6 +89,12 @@ struct r10conf { | |||
89 | * the new thread here until we fully activate the array. | 89 | * the new thread here until we fully activate the array. |
90 | */ | 90 | */ |
91 | struct md_thread *thread; | 91 | struct md_thread *thread; |
92 | |||
93 | /* | ||
94 | * Keep track of cluster resync window to send to other nodes. | ||
95 | */ | ||
96 | sector_t cluster_sync_low; | ||
97 | sector_t cluster_sync_high; | ||
92 | }; | 98 | }; |
93 | 99 | ||
94 | /* | 100 | /* |
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 0b7406ac8ce1..f1c86d938502 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include "md.h" | 24 | #include "md.h" |
25 | #include "raid5.h" | 25 | #include "raid5.h" |
26 | #include "bitmap.h" | 26 | #include "md-bitmap.h" |
27 | #include "raid5-log.h" | 27 | #include "raid5-log.h" |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -539,7 +539,7 @@ static void r5l_log_run_stripes(struct r5l_log *log) | |||
539 | { | 539 | { |
540 | struct r5l_io_unit *io, *next; | 540 | struct r5l_io_unit *io, *next; |
541 | 541 | ||
542 | assert_spin_locked(&log->io_list_lock); | 542 | lockdep_assert_held(&log->io_list_lock); |
543 | 543 | ||
544 | list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) { | 544 | list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) { |
545 | /* don't change list order */ | 545 | /* don't change list order */ |
@@ -555,7 +555,7 @@ static void r5l_move_to_end_ios(struct r5l_log *log) | |||
555 | { | 555 | { |
556 | struct r5l_io_unit *io, *next; | 556 | struct r5l_io_unit *io, *next; |
557 | 557 | ||
558 | assert_spin_locked(&log->io_list_lock); | 558 | lockdep_assert_held(&log->io_list_lock); |
559 | 559 | ||
560 | list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) { | 560 | list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) { |
561 | /* don't change list order */ | 561 | /* don't change list order */ |
@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(struct work_struct *work) | |||
693 | struct r5l_log *log = container_of(work, struct r5l_log, | 693 | struct r5l_log *log = container_of(work, struct r5l_log, |
694 | disable_writeback_work); | 694 | disable_writeback_work); |
695 | struct mddev *mddev = log->rdev->mddev; | 695 | struct mddev *mddev = log->rdev->mddev; |
696 | struct r5conf *conf = mddev->private; | ||
697 | int locked = 0; | ||
696 | 698 | ||
697 | if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) | 699 | if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) |
698 | return; | 700 | return; |
@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(struct work_struct *work) | |||
701 | 703 | ||
702 | /* wait superblock change before suspend */ | 704 | /* wait superblock change before suspend */ |
703 | wait_event(mddev->sb_wait, | 705 | wait_event(mddev->sb_wait, |
704 | !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); | 706 | conf->log == NULL || |
705 | 707 | (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && | |
706 | mddev_suspend(mddev); | 708 | (locked = mddev_trylock(mddev)))); |
707 | log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; | 709 | if (locked) { |
708 | mddev_resume(mddev); | 710 | mddev_suspend(mddev); |
711 | log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; | ||
712 | mddev_resume(mddev); | ||
713 | mddev_unlock(mddev); | ||
714 | } | ||
709 | } | 715 | } |
710 | 716 | ||
711 | static void r5l_submit_current_io(struct r5l_log *log) | 717 | static void r5l_submit_current_io(struct r5l_log *log) |
@@ -1194,7 +1200,7 @@ static void r5l_run_no_mem_stripe(struct r5l_log *log) | |||
1194 | { | 1200 | { |
1195 | struct stripe_head *sh; | 1201 | struct stripe_head *sh; |
1196 | 1202 | ||
1197 | assert_spin_locked(&log->io_list_lock); | 1203 | lockdep_assert_held(&log->io_list_lock); |
1198 | 1204 | ||
1199 | if (!list_empty(&log->no_mem_stripes)) { | 1205 | if (!list_empty(&log->no_mem_stripes)) { |
1200 | sh = list_first_entry(&log->no_mem_stripes, | 1206 | sh = list_first_entry(&log->no_mem_stripes, |
@@ -1210,7 +1216,7 @@ static bool r5l_complete_finished_ios(struct r5l_log *log) | |||
1210 | struct r5l_io_unit *io, *next; | 1216 | struct r5l_io_unit *io, *next; |
1211 | bool found = false; | 1217 | bool found = false; |
1212 | 1218 | ||
1213 | assert_spin_locked(&log->io_list_lock); | 1219 | lockdep_assert_held(&log->io_list_lock); |
1214 | 1220 | ||
1215 | list_for_each_entry_safe(io, next, &log->finished_ios, log_sibling) { | 1221 | list_for_each_entry_safe(io, next, &log->finished_ios, log_sibling) { |
1216 | /* don't change list order */ | 1222 | /* don't change list order */ |
@@ -1382,7 +1388,7 @@ static void r5c_flush_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
1382 | * raid5_release_stripe() while holding conf->device_lock | 1388 | * raid5_release_stripe() while holding conf->device_lock |
1383 | */ | 1389 | */ |
1384 | BUG_ON(test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); | 1390 | BUG_ON(test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); |
1385 | assert_spin_locked(&conf->device_lock); | 1391 | lockdep_assert_held(&conf->device_lock); |
1386 | 1392 | ||
1387 | list_del_init(&sh->lru); | 1393 | list_del_init(&sh->lru); |
1388 | atomic_inc(&sh->count); | 1394 | atomic_inc(&sh->count); |
@@ -1409,7 +1415,7 @@ void r5c_flush_cache(struct r5conf *conf, int num) | |||
1409 | int count; | 1415 | int count; |
1410 | struct stripe_head *sh, *next; | 1416 | struct stripe_head *sh, *next; |
1411 | 1417 | ||
1412 | assert_spin_locked(&conf->device_lock); | 1418 | lockdep_assert_held(&conf->device_lock); |
1413 | if (!conf->log) | 1419 | if (!conf->log) |
1414 | return; | 1420 | return; |
1415 | 1421 | ||
@@ -1583,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space) | |||
1583 | md_wakeup_thread(log->reclaim_thread); | 1589 | md_wakeup_thread(log->reclaim_thread); |
1584 | } | 1590 | } |
1585 | 1591 | ||
1586 | void r5l_quiesce(struct r5l_log *log, int state) | 1592 | void r5l_quiesce(struct r5l_log *log, int quiesce) |
1587 | { | 1593 | { |
1588 | struct mddev *mddev; | 1594 | struct mddev *mddev; |
1589 | if (!log || state == 2) | 1595 | if (!log) |
1590 | return; | 1596 | return; |
1591 | if (state == 0) | 1597 | |
1592 | kthread_unpark(log->reclaim_thread->tsk); | 1598 | if (quiesce) { |
1593 | else if (state == 1) { | ||
1594 | /* make sure r5l_write_super_and_discard_space exits */ | 1599 | /* make sure r5l_write_super_and_discard_space exits */ |
1595 | mddev = log->rdev->mddev; | 1600 | mddev = log->rdev->mddev; |
1596 | wake_up(&mddev->sb_wait); | 1601 | wake_up(&mddev->sb_wait); |
1597 | kthread_park(log->reclaim_thread->tsk); | 1602 | kthread_park(log->reclaim_thread->tsk); |
1598 | r5l_wake_reclaim(log, MaxSector); | 1603 | r5l_wake_reclaim(log, MaxSector); |
1599 | r5l_do_reclaim(log); | 1604 | r5l_do_reclaim(log); |
1600 | } | 1605 | } else |
1606 | kthread_unpark(log->reclaim_thread->tsk); | ||
1601 | } | 1607 | } |
1602 | 1608 | ||
1603 | bool r5l_log_disk_error(struct r5conf *conf) | 1609 | bool r5l_log_disk_error(struct r5conf *conf) |
@@ -3165,6 +3171,8 @@ void r5l_exit_log(struct r5conf *conf) | |||
3165 | conf->log = NULL; | 3171 | conf->log = NULL; |
3166 | synchronize_rcu(); | 3172 | synchronize_rcu(); |
3167 | 3173 | ||
3174 | /* Ensure disable_writeback_work wakes up and exits */ | ||
3175 | wake_up(&conf->mddev->sb_wait); | ||
3168 | flush_work(&log->disable_writeback_work); | 3176 | flush_work(&log->disable_writeback_work); |
3169 | md_unregister_thread(&log->reclaim_thread); | 3177 | md_unregister_thread(&log->reclaim_thread); |
3170 | mempool_destroy(log->meta_pool); | 3178 | mempool_destroy(log->meta_pool); |
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 7f9ad5f7cda0..284578b0a349 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h | |||
@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct r5l_log *log); | |||
9 | extern void r5l_flush_stripe_to_raid(struct r5l_log *log); | 9 | extern void r5l_flush_stripe_to_raid(struct r5l_log *log); |
10 | extern void r5l_stripe_write_finished(struct stripe_head *sh); | 10 | extern void r5l_stripe_write_finished(struct stripe_head *sh); |
11 | extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); | 11 | extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); |
12 | extern void r5l_quiesce(struct r5l_log *log, int state); | 12 | extern void r5l_quiesce(struct r5l_log *log, int quiesce); |
13 | extern bool r5l_log_disk_error(struct r5conf *conf); | 13 | extern bool r5l_log_disk_error(struct r5conf *conf); |
14 | extern bool r5c_is_writeback(struct r5l_log *log); | 14 | extern bool r5c_is_writeback(struct r5l_log *log); |
15 | extern int | 15 | extern int |
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index cd026c88f7ef..628c0bf7b9fd 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c | |||
@@ -758,7 +758,8 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, | |||
758 | (unsigned long long)sector); | 758 | (unsigned long long)sector); |
759 | 759 | ||
760 | rdev = conf->disks[dd_idx].rdev; | 760 | rdev = conf->disks[dd_idx].rdev; |
761 | if (!rdev) { | 761 | if (!rdev || (!test_bit(In_sync, &rdev->flags) && |
762 | sector >= rdev->recovery_offset)) { | ||
762 | pr_debug("%s:%*s data member disk %d missing\n", | 763 | pr_debug("%s:%*s data member disk %d missing\n", |
763 | __func__, indent, "", dd_idx); | 764 | __func__, indent, "", dd_idx); |
764 | update_parity = false; | 765 | update_parity = false; |
@@ -1296,8 +1297,7 @@ int ppl_init_log(struct r5conf *conf) | |||
1296 | 1297 | ||
1297 | if (ret) { | 1298 | if (ret) { |
1298 | goto err; | 1299 | goto err; |
1299 | } else if (!mddev->pers && | 1300 | } else if (!mddev->pers && mddev->recovery_cp == 0 && |
1300 | mddev->recovery_cp == 0 && !mddev->degraded && | ||
1301 | ppl_conf->recovered_entries > 0 && | 1301 | ppl_conf->recovered_entries > 0 && |
1302 | ppl_conf->mismatch_count == 0) { | 1302 | ppl_conf->mismatch_count == 0) { |
1303 | /* | 1303 | /* |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7d9a50eed9db..31dc25e2871a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <linux/ratelimit.h> | 55 | #include <linux/ratelimit.h> |
56 | #include <linux/nodemask.h> | 56 | #include <linux/nodemask.h> |
57 | #include <linux/flex_array.h> | 57 | #include <linux/flex_array.h> |
58 | #include <linux/sched/signal.h> | ||
59 | 58 | ||
60 | #include <trace/events/block.h> | 59 | #include <trace/events/block.h> |
61 | #include <linux/list_sort.h> | 60 | #include <linux/list_sort.h> |
@@ -63,7 +62,7 @@ | |||
63 | #include "md.h" | 62 | #include "md.h" |
64 | #include "raid5.h" | 63 | #include "raid5.h" |
65 | #include "raid0.h" | 64 | #include "raid0.h" |
66 | #include "bitmap.h" | 65 | #include "md-bitmap.h" |
67 | #include "raid5-log.h" | 66 | #include "raid5-log.h" |
68 | 67 | ||
69 | #define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED) | 68 | #define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED) |
@@ -1818,8 +1817,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1818 | struct r5dev *dev = &sh->dev[i]; | 1817 | struct r5dev *dev = &sh->dev[i]; |
1819 | 1818 | ||
1820 | if (dev->written || i == pd_idx || i == qd_idx) { | 1819 | if (dev->written || i == pd_idx || i == qd_idx) { |
1821 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) | 1820 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { |
1822 | set_bit(R5_UPTODATE, &dev->flags); | 1821 | set_bit(R5_UPTODATE, &dev->flags); |
1822 | if (test_bit(STRIPE_EXPAND_READY, &sh->state)) | ||
1823 | set_bit(R5_Expanded, &dev->flags); | ||
1824 | } | ||
1823 | if (fua) | 1825 | if (fua) |
1824 | set_bit(R5_WantFUA, &dev->flags); | 1826 | set_bit(R5_WantFUA, &dev->flags); |
1825 | if (sync) | 1827 | if (sync) |
@@ -5682,28 +5684,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) | |||
5682 | goto retry; | 5684 | goto retry; |
5683 | } | 5685 | } |
5684 | 5686 | ||
5685 | if (rw == WRITE && | ||
5686 | logical_sector >= mddev->suspend_lo && | ||
5687 | logical_sector < mddev->suspend_hi) { | ||
5688 | raid5_release_stripe(sh); | ||
5689 | /* As the suspend_* range is controlled by | ||
5690 | * userspace, we want an interruptible | ||
5691 | * wait. | ||
5692 | */ | ||
5693 | prepare_to_wait(&conf->wait_for_overlap, | ||
5694 | &w, TASK_INTERRUPTIBLE); | ||
5695 | if (logical_sector >= mddev->suspend_lo && | ||
5696 | logical_sector < mddev->suspend_hi) { | ||
5697 | sigset_t full, old; | ||
5698 | sigfillset(&full); | ||
5699 | sigprocmask(SIG_BLOCK, &full, &old); | ||
5700 | schedule(); | ||
5701 | sigprocmask(SIG_SETMASK, &old, NULL); | ||
5702 | do_prepare = true; | ||
5703 | } | ||
5704 | goto retry; | ||
5705 | } | ||
5706 | |||
5707 | if (test_bit(STRIPE_EXPANDING, &sh->state) || | 5687 | if (test_bit(STRIPE_EXPANDING, &sh->state) || |
5708 | !add_stripe_bio(sh, bi, dd_idx, rw, previous)) { | 5688 | !add_stripe_bio(sh, bi, dd_idx, rw, previous)) { |
5709 | /* Stripe is busy expanding or | 5689 | /* Stripe is busy expanding or |
@@ -5758,6 +5738,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk | |||
5758 | */ | 5738 | */ |
5759 | struct r5conf *conf = mddev->private; | 5739 | struct r5conf *conf = mddev->private; |
5760 | struct stripe_head *sh; | 5740 | struct stripe_head *sh; |
5741 | struct md_rdev *rdev; | ||
5761 | sector_t first_sector, last_sector; | 5742 | sector_t first_sector, last_sector; |
5762 | int raid_disks = conf->previous_raid_disks; | 5743 | int raid_disks = conf->previous_raid_disks; |
5763 | int data_disks = raid_disks - conf->max_degraded; | 5744 | int data_disks = raid_disks - conf->max_degraded; |
@@ -5880,6 +5861,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk | |||
5880 | return 0; | 5861 | return 0; |
5881 | mddev->reshape_position = conf->reshape_progress; | 5862 | mddev->reshape_position = conf->reshape_progress; |
5882 | mddev->curr_resync_completed = sector_nr; | 5863 | mddev->curr_resync_completed = sector_nr; |
5864 | if (!mddev->reshape_backwards) | ||
5865 | /* Can update recovery_offset */ | ||
5866 | rdev_for_each(rdev, mddev) | ||
5867 | if (rdev->raid_disk >= 0 && | ||
5868 | !test_bit(Journal, &rdev->flags) && | ||
5869 | !test_bit(In_sync, &rdev->flags) && | ||
5870 | rdev->recovery_offset < sector_nr) | ||
5871 | rdev->recovery_offset = sector_nr; | ||
5872 | |||
5883 | conf->reshape_checkpoint = jiffies; | 5873 | conf->reshape_checkpoint = jiffies; |
5884 | set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); | 5874 | set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); |
5885 | md_wakeup_thread(mddev->thread); | 5875 | md_wakeup_thread(mddev->thread); |
@@ -5978,6 +5968,14 @@ finish: | |||
5978 | goto ret; | 5968 | goto ret; |
5979 | mddev->reshape_position = conf->reshape_progress; | 5969 | mddev->reshape_position = conf->reshape_progress; |
5980 | mddev->curr_resync_completed = sector_nr; | 5970 | mddev->curr_resync_completed = sector_nr; |
5971 | if (!mddev->reshape_backwards) | ||
5972 | /* Can update recovery_offset */ | ||
5973 | rdev_for_each(rdev, mddev) | ||
5974 | if (rdev->raid_disk >= 0 && | ||
5975 | !test_bit(Journal, &rdev->flags) && | ||
5976 | !test_bit(In_sync, &rdev->flags) && | ||
5977 | rdev->recovery_offset < sector_nr) | ||
5978 | rdev->recovery_offset = sector_nr; | ||
5981 | conf->reshape_checkpoint = jiffies; | 5979 | conf->reshape_checkpoint = jiffies; |
5982 | set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); | 5980 | set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); |
5983 | md_wakeup_thread(mddev->thread); | 5981 | md_wakeup_thread(mddev->thread); |
@@ -7156,6 +7154,13 @@ static int raid5_run(struct mddev *mddev) | |||
7156 | min_offset_diff = diff; | 7154 | min_offset_diff = diff; |
7157 | } | 7155 | } |
7158 | 7156 | ||
7157 | if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) && | ||
7158 | (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { | ||
7159 | pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", | ||
7160 | mdname(mddev)); | ||
7161 | return -EINVAL; | ||
7162 | } | ||
7163 | |||
7159 | if (mddev->reshape_position != MaxSector) { | 7164 | if (mddev->reshape_position != MaxSector) { |
7160 | /* Check that we can continue the reshape. | 7165 | /* Check that we can continue the reshape. |
7161 | * Difficulties arise if the stripe we would write to | 7166 | * Difficulties arise if the stripe we would write to |
@@ -7958,6 +7963,7 @@ static void end_reshape(struct r5conf *conf) | |||
7958 | { | 7963 | { |
7959 | 7964 | ||
7960 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { | 7965 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
7966 | struct md_rdev *rdev; | ||
7961 | 7967 | ||
7962 | spin_lock_irq(&conf->device_lock); | 7968 | spin_lock_irq(&conf->device_lock); |
7963 | conf->previous_raid_disks = conf->raid_disks; | 7969 | conf->previous_raid_disks = conf->raid_disks; |
@@ -7965,6 +7971,11 @@ static void end_reshape(struct r5conf *conf) | |||
7965 | smp_wmb(); | 7971 | smp_wmb(); |
7966 | conf->reshape_progress = MaxSector; | 7972 | conf->reshape_progress = MaxSector; |
7967 | conf->mddev->reshape_position = MaxSector; | 7973 | conf->mddev->reshape_position = MaxSector; |
7974 | rdev_for_each(rdev, conf->mddev) | ||
7975 | if (rdev->raid_disk >= 0 && | ||
7976 | !test_bit(Journal, &rdev->flags) && | ||
7977 | !test_bit(In_sync, &rdev->flags)) | ||
7978 | rdev->recovery_offset = MaxSector; | ||
7968 | spin_unlock_irq(&conf->device_lock); | 7979 | spin_unlock_irq(&conf->device_lock); |
7969 | wake_up(&conf->wait_for_overlap); | 7980 | wake_up(&conf->wait_for_overlap); |
7970 | 7981 | ||
@@ -8020,16 +8031,12 @@ static void raid5_finish_reshape(struct mddev *mddev) | |||
8020 | } | 8031 | } |
8021 | } | 8032 | } |
8022 | 8033 | ||
8023 | static void raid5_quiesce(struct mddev *mddev, int state) | 8034 | static void raid5_quiesce(struct mddev *mddev, int quiesce) |
8024 | { | 8035 | { |
8025 | struct r5conf *conf = mddev->private; | 8036 | struct r5conf *conf = mddev->private; |
8026 | 8037 | ||
8027 | switch(state) { | 8038 | if (quiesce) { |
8028 | case 2: /* resume for a suspend */ | 8039 | /* stop all writes */ |
8029 | wake_up(&conf->wait_for_overlap); | ||
8030 | break; | ||
8031 | |||
8032 | case 1: /* stop all writes */ | ||
8033 | lock_all_device_hash_locks_irq(conf); | 8040 | lock_all_device_hash_locks_irq(conf); |
8034 | /* '2' tells resync/reshape to pause so that all | 8041 | /* '2' tells resync/reshape to pause so that all |
8035 | * active stripes can drain | 8042 | * active stripes can drain |
@@ -8045,17 +8052,15 @@ static void raid5_quiesce(struct mddev *mddev, int state) | |||
8045 | unlock_all_device_hash_locks_irq(conf); | 8052 | unlock_all_device_hash_locks_irq(conf); |
8046 | /* allow reshape to continue */ | 8053 | /* allow reshape to continue */ |
8047 | wake_up(&conf->wait_for_overlap); | 8054 | wake_up(&conf->wait_for_overlap); |
8048 | break; | 8055 | } else { |
8049 | 8056 | /* re-enable writes */ | |
8050 | case 0: /* re-enable writes */ | ||
8051 | lock_all_device_hash_locks_irq(conf); | 8057 | lock_all_device_hash_locks_irq(conf); |
8052 | conf->quiesce = 0; | 8058 | conf->quiesce = 0; |
8053 | wake_up(&conf->wait_for_quiescent); | 8059 | wake_up(&conf->wait_for_quiescent); |
8054 | wake_up(&conf->wait_for_overlap); | 8060 | wake_up(&conf->wait_for_overlap); |
8055 | unlock_all_device_hash_locks_irq(conf); | 8061 | unlock_all_device_hash_locks_irq(conf); |
8056 | break; | ||
8057 | } | 8062 | } |
8058 | r5l_quiesce(conf->log, state); | 8063 | r5l_quiesce(conf->log, quiesce); |
8059 | } | 8064 | } |
8060 | 8065 | ||
8061 | static void *raid45_takeover_raid0(struct mddev *mddev, int level) | 8066 | static void *raid45_takeover_raid0(struct mddev *mddev, int level) |