aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorcolyli@suse.de <colyli@suse.de>2017-02-17 14:05:56 -0500
committerShaohua Li <shli@fb.com>2017-02-20 01:04:24 -0500
commitfd76863e37fef26fe05547fddfa6e3d05e1682e6 (patch)
treedaa8035674d33b9bf86c168f1f341e5485eef83c /drivers/md/raid1.c
parenteae8263fb1f4256460270dd8f42334604dcdfac6 (diff)
RAID1: a new I/O barrier implementation to remove resync window
'Commit 79ef3a8aa1cb ("raid1: Rewrite the implementation of iobarrier.")' introduces a sliding resync window for raid1 I/O barrier, this idea limits I/O barriers to happen only inside a slidingresync window, for regular I/Os out of this resync window they don't need to wait for barrier any more. On large raid1 device, it helps a lot to improve parallel writing I/O throughput when there are background resync I/Os performing at same time. The idea of sliding resync widow is awesome, but code complexity is a challenge. Sliding resync window requires several variables to work collectively, this is complexed and very hard to make it work correctly. Just grep "Fixes: 79ef3a8aa1" in kernel git log, there are 8 more patches to fix the original resync window patch. This is not the end, any further related modification may easily introduce more regreassion. Therefore I decide to implement a much simpler raid1 I/O barrier, by removing resync window code, I believe life will be much easier. The brief idea of the simpler barrier is, - Do not maintain a global unique resync window - Use multiple hash buckets to reduce I/O barrier conflicts, regular I/O only has to wait for a resync I/O when both them have same barrier bucket index, vice versa. - I/O barrier can be reduced to an acceptable number if there are enough barrier buckets Here I explain how the barrier buckets are designed, - BARRIER_UNIT_SECTOR_SIZE The whole LBA address space of a raid1 device is divided into multiple barrier units, by the size of BARRIER_UNIT_SECTOR_SIZE. Bio requests won't go across border of barrier unit size, that means maximum bio size is BARRIER_UNIT_SECTOR_SIZE<<9 (64MB) in bytes. For random I/O 64MB is large enough for both read and write requests, for sequential I/O considering underlying block layer may merge them into larger requests, 64MB is still good enough. Neil also points out that for resync operation, "we want the resync to move from region to region fairly quickly so that the slowness caused by having to synchronize with the resync is averaged out over a fairly small time frame". For full speed resync, 64MB should take less then 1 second. When resync is competing with other I/O, it could take up a few minutes. Therefore 64MB size is fairly good range for resync. - BARRIER_BUCKETS_NR There are BARRIER_BUCKETS_NR buckets in total, which is defined by, #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - 2) #define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS) this patch makes the bellowed members of struct r1conf from integer to array of integers, - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; + int *nr_pending; + int *nr_waiting; + int *nr_queued; + int *barrier; number of the array elements is defined as BARRIER_BUCKETS_NR. For 4KB kernel space page size, (PAGE_SHIFT - 2) indecates there are 1024 I/O barrier buckets, and each array of integers occupies single memory page. 1024 means for a request which is smaller than the I/O barrier unit size has ~0.1% chance to wait for resync to pause, which is quite a small enough fraction. Also requesting single memory page is more friendly to kernel page allocator than larger memory size. - I/O barrier bucket is indexed by bio start sector If multiple I/O requests hit different I/O barrier units, they only need to compete I/O barrier with other I/Os which hit the same I/O barrier bucket index with each other. The index of a barrier bucket which a bio should look for is calculated by sector_to_idx() which is defined in raid1.h as an inline function, static inline int sector_to_idx(sector_t sector) { return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS, BARRIER_BUCKETS_NR_BITS); } Here sector_nr is the start sector number of a bio. - Single bio won't go across boundary of a I/O barrier unit If a request goes across boundary of barrier unit, it will be split. A bio may be split in raid1_make_request() or raid1_sync_request(), if sectors returned by align_to_barrier_unit_end() is smaller than original bio size. Comparing to single sliding resync window, - Currently resync I/O grows linearly, therefore regular and resync I/O will conflict within a single barrier units. So the I/O behavior is similar to single sliding resync window. - But a barrier unit bucket is shared by all barrier units with identical barrier uinit index, the probability of conflict might be higher than single sliding resync window, in condition that writing I/Os always hit barrier units which have identical barrier bucket indexs with the resync I/Os. This is a very rare condition in real I/O work loads, I cannot imagine how it could happen in practice. - Therefore we can achieve a good enough low conflict rate with much simpler barrier algorithm and implementation. There are two changes should be noticed, - In raid1d(), I change the code to decrease conf->nr_pending[idx] into single loop, it looks like this, spin_lock_irqsave(&conf->device_lock, flags); conf->nr_queued[idx]--; spin_unlock_irqrestore(&conf->device_lock, flags); This change generates more spin lock operations, but in next patch of this patch set, it will be replaced by a single line code, atomic_dec(&conf->nr_queueud[idx]); So we don't need to worry about spin lock cost here. - Mainline raid1 code split original raid1_make_request() into raid1_read_request() and raid1_write_request(). If the original bio goes across an I/O barrier unit size, this bio will be split before calling raid1_read_request() or raid1_write_request(), this change the code logic more simple and clear. - In this patch wait_barrier() is moved from raid1_make_request() to raid1_write_request(). In raid_read_request(), original wait_barrier() is replaced by raid1_read_request(). The differnece is wait_read_barrier() only waits if array is frozen, using different barrier function in different code path makes the code more clean and easy to read. Changelog V4: - Add alloc_r1bio() to remove redundant r1bio memory allocation code. - Fix many typos in patch comments. - Use (PAGE_SHIFT - ilog2(sizeof(int))) to define BARRIER_BUCKETS_NR_BITS. V3: - Rebase the patch against latest upstream kernel code. - Many fixes by review comments from Neil, - Back to use pointers to replace arraries in struct r1conf - Remove total_barriers from struct r1conf - Add more patch comments to explain how/why the values of BARRIER_UNIT_SECTOR_SIZE and BARRIER_BUCKETS_NR are decided. - Use get_unqueued_pending() to replace get_all_pendings() and get_all_queued() - Increase bucket number from 512 to 1024 - Change code comments format by review from Shaohua. V2: - Use bio_split() to split the orignal bio if it goes across barrier unit bounday, to make the code more simple, by suggestion from Shaohua and Neil. - Use hash_long() to replace original linear hash, to avoid a possible confilict between resync I/O and sequential write I/O, by suggestion from Shaohua. - Add conf->total_barriers to record barrier depth, which is used to control number of parallel sync I/O barriers, by suggestion from Shaohua. - In V1 patch the bellowed barrier buckets related members in r1conf are allocated in memory page. To make the code more simple, V2 patch moves the memory space into struct r1conf, like this, - int nr_pending; - int nr_waiting; - int nr_queued; - int barrier; + int nr_pending[BARRIER_BUCKETS_NR]; + int nr_waiting[BARRIER_BUCKETS_NR]; + int nr_queued[BARRIER_BUCKETS_NR]; + int barrier[BARRIER_BUCKETS_NR]; This change is by the suggestion from Shaohua. - Remove some inrelavent code comments, by suggestion from Guoqing. - Add a missing wait_barrier() before jumping to retry_write, in raid1_make_write_request(). V1: - Original RFC patch for comments Signed-off-by: Coly Li <colyli@suse.de> Cc: Johannes Thumshirn <jthumshirn@suse.de> Cc: Guoqing Jiang <gqjiang@suse.com> Reviewed-by: Neil Brown <neilb@suse.de> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c473
1 files changed, 261 insertions, 212 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ad5c9483bd50..40297fd17f7e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -71,9 +71,8 @@
71 */ 71 */
72static int max_queued_requests = 1024; 72static int max_queued_requests = 1024;
73 73
74static void allow_barrier(struct r1conf *conf, sector_t start_next_window, 74static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
75 sector_t bi_sector); 75static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
76static void lower_barrier(struct r1conf *conf);
77 76
78#define raid1_log(md, fmt, args...) \ 77#define raid1_log(md, fmt, args...) \
79 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) 78 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
@@ -100,7 +99,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
100#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) 99#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
101#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) 100#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
102#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) 101#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
103#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
104 102
105static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) 103static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
106{ 104{
@@ -215,7 +213,7 @@ static void put_buf(struct r1bio *r1_bio)
215 213
216 mempool_free(r1_bio, conf->r1buf_pool); 214 mempool_free(r1_bio, conf->r1buf_pool);
217 215
218 lower_barrier(conf); 216 lower_barrier(conf, r1_bio->sector);
219} 217}
220 218
221static void reschedule_retry(struct r1bio *r1_bio) 219static void reschedule_retry(struct r1bio *r1_bio)
@@ -223,10 +221,12 @@ static void reschedule_retry(struct r1bio *r1_bio)
223 unsigned long flags; 221 unsigned long flags;
224 struct mddev *mddev = r1_bio->mddev; 222 struct mddev *mddev = r1_bio->mddev;
225 struct r1conf *conf = mddev->private; 223 struct r1conf *conf = mddev->private;
224 int idx;
226 225
226 idx = sector_to_idx(r1_bio->sector);
227 spin_lock_irqsave(&conf->device_lock, flags); 227 spin_lock_irqsave(&conf->device_lock, flags);
228 list_add(&r1_bio->retry_list, &conf->retry_list); 228 list_add(&r1_bio->retry_list, &conf->retry_list);
229 conf->nr_queued ++; 229 conf->nr_queued[idx]++;
230 spin_unlock_irqrestore(&conf->device_lock, flags); 230 spin_unlock_irqrestore(&conf->device_lock, flags);
231 231
232 wake_up(&conf->wait_barrier); 232 wake_up(&conf->wait_barrier);
@@ -243,7 +243,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
243 struct bio *bio = r1_bio->master_bio; 243 struct bio *bio = r1_bio->master_bio;
244 int done; 244 int done;
245 struct r1conf *conf = r1_bio->mddev->private; 245 struct r1conf *conf = r1_bio->mddev->private;
246 sector_t start_next_window = r1_bio->start_next_window;
247 sector_t bi_sector = bio->bi_iter.bi_sector; 246 sector_t bi_sector = bio->bi_iter.bi_sector;
248 247
249 if (bio->bi_phys_segments) { 248 if (bio->bi_phys_segments) {
@@ -269,7 +268,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
269 * Wake up any possible resync thread that waits for the device 268 * Wake up any possible resync thread that waits for the device
270 * to go idle. 269 * to go idle.
271 */ 270 */
272 allow_barrier(conf, start_next_window, bi_sector); 271 allow_barrier(conf, bi_sector);
273 } 272 }
274} 273}
275 274
@@ -517,6 +516,25 @@ static void raid1_end_write_request(struct bio *bio)
517 bio_put(to_put); 516 bio_put(to_put);
518} 517}
519 518
519static sector_t align_to_barrier_unit_end(sector_t start_sector,
520 sector_t sectors)
521{
522 sector_t len;
523
524 WARN_ON(sectors == 0);
525 /*
526 * len is the number of sectors from start_sector to end of the
527 * barrier unit which start_sector belongs to.
528 */
529 len = round_up(start_sector + 1, BARRIER_UNIT_SECTOR_SIZE) -
530 start_sector;
531
532 if (len > sectors)
533 len = sectors;
534
535 return len;
536}
537
520/* 538/*
521 * This routine returns the disk from which the requested read should 539 * This routine returns the disk from which the requested read should
522 * be done. There is a per-array 'next expected sequential IO' sector 540 * be done. There is a per-array 'next expected sequential IO' sector
@@ -813,168 +831,168 @@ static void flush_pending_writes(struct r1conf *conf)
813 */ 831 */
814static void raise_barrier(struct r1conf *conf, sector_t sector_nr) 832static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
815{ 833{
834 int idx = sector_to_idx(sector_nr);
835
816 spin_lock_irq(&conf->resync_lock); 836 spin_lock_irq(&conf->resync_lock);
817 837
818 /* Wait until no block IO is waiting */ 838 /* Wait until no block IO is waiting */
819 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, 839 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting[idx],
820 conf->resync_lock); 840 conf->resync_lock);
821 841
822 /* block any new IO from starting */ 842 /* block any new IO from starting */
823 conf->barrier++; 843 conf->barrier[idx]++;
824 conf->next_resync = sector_nr;
825 844
826 /* For these conditions we must wait: 845 /* For these conditions we must wait:
827 * A: while the array is in frozen state 846 * A: while the array is in frozen state
828 * B: while barrier >= RESYNC_DEPTH, meaning resync reach 847 * B: while conf->nr_pending[idx] is not 0, meaning regular I/O
829 * the max count which allowed. 848 * existing in corresponding I/O barrier bucket.
830 * C: next_resync + RESYNC_SECTORS > start_next_window, meaning 849 * C: while conf->barrier[idx] >= RESYNC_DEPTH, meaning reaches
831 * next resync will reach to the window which normal bios are 850 * max resync count which allowed on current I/O barrier bucket.
832 * handling.
833 * D: while there are any active requests in the current window.
834 */ 851 */
835 wait_event_lock_irq(conf->wait_barrier, 852 wait_event_lock_irq(conf->wait_barrier,
836 !conf->array_frozen && 853 !conf->array_frozen &&
837 conf->barrier < RESYNC_DEPTH && 854 !conf->nr_pending[idx] &&
838 conf->current_window_requests == 0 && 855 conf->barrier[idx] < RESYNC_DEPTH,
839 (conf->start_next_window >=
840 conf->next_resync + RESYNC_SECTORS),
841 conf->resync_lock); 856 conf->resync_lock);
842 857
843 conf->nr_pending++; 858 conf->nr_pending[idx]++;
844 spin_unlock_irq(&conf->resync_lock); 859 spin_unlock_irq(&conf->resync_lock);
845} 860}
846 861
847static void lower_barrier(struct r1conf *conf) 862static void lower_barrier(struct r1conf *conf, sector_t sector_nr)
848{ 863{
849 unsigned long flags; 864 unsigned long flags;
850 BUG_ON(conf->barrier <= 0); 865 int idx = sector_to_idx(sector_nr);
866
867 BUG_ON(conf->barrier[idx] <= 0);
868
851 spin_lock_irqsave(&conf->resync_lock, flags); 869 spin_lock_irqsave(&conf->resync_lock, flags);
852 conf->barrier--; 870 conf->barrier[idx]--;
853 conf->nr_pending--; 871 conf->nr_pending[idx]--;
854 spin_unlock_irqrestore(&conf->resync_lock, flags); 872 spin_unlock_irqrestore(&conf->resync_lock, flags);
855 wake_up(&conf->wait_barrier); 873 wake_up(&conf->wait_barrier);
856} 874}
857 875
858static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) 876static void _wait_barrier(struct r1conf *conf, int idx)
859{ 877{
860 bool wait = false; 878 spin_lock_irq(&conf->resync_lock);
861 879 if (conf->array_frozen || conf->barrier[idx]) {
862 if (conf->array_frozen || !bio) 880 conf->nr_waiting[idx]++;
863 wait = true; 881 /* Wait for the barrier to drop. */
864 else if (conf->barrier && bio_data_dir(bio) == WRITE) { 882 wait_event_lock_irq(
865 if ((conf->mddev->curr_resync_completed 883 conf->wait_barrier,
866 >= bio_end_sector(bio)) || 884 !conf->array_frozen && !conf->barrier[idx],
867 (conf->start_next_window + NEXT_NORMALIO_DISTANCE 885 conf->resync_lock);
868 <= bio->bi_iter.bi_sector)) 886 conf->nr_waiting[idx]--;
869 wait = false;
870 else
871 wait = true;
872 } 887 }
873 888
874 return wait; 889 conf->nr_pending[idx]++;
890 spin_unlock_irq(&conf->resync_lock);
875} 891}
876 892
877static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) 893static void wait_read_barrier(struct r1conf *conf, sector_t sector_nr)
878{ 894{
879 sector_t sector = 0; 895 int idx = sector_to_idx(sector_nr);
880 896
881 spin_lock_irq(&conf->resync_lock); 897 spin_lock_irq(&conf->resync_lock);
882 if (need_to_wait_for_sync(conf, bio)) { 898 if (conf->array_frozen) {
883 conf->nr_waiting++; 899 conf->nr_waiting[idx]++;
884 /* Wait for the barrier to drop. 900 /* Wait for array to unfreeze */
885 * However if there are already pending 901 wait_event_lock_irq(
886 * requests (preventing the barrier from 902 conf->wait_barrier,
887 * rising completely), and the 903 !conf->array_frozen,
888 * per-process bio queue isn't empty, 904 conf->resync_lock);
889 * then don't wait, as we need to empty 905 conf->nr_waiting[idx]--;
890 * that queue to allow conf->start_next_window
891 * to increase.
892 */
893 raid1_log(conf->mddev, "wait barrier");
894 wait_event_lock_irq(conf->wait_barrier,
895 !conf->array_frozen &&
896 (!conf->barrier ||
897 ((conf->start_next_window <
898 conf->next_resync + RESYNC_SECTORS) &&
899 current->bio_list &&
900 !bio_list_empty(current->bio_list))),
901 conf->resync_lock);
902 conf->nr_waiting--;
903 }
904
905 if (bio && bio_data_dir(bio) == WRITE) {
906 if (bio->bi_iter.bi_sector >= conf->next_resync) {
907 if (conf->start_next_window == MaxSector)
908 conf->start_next_window =
909 conf->next_resync +
910 NEXT_NORMALIO_DISTANCE;
911
912 if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
913 <= bio->bi_iter.bi_sector)
914 conf->next_window_requests++;
915 else
916 conf->current_window_requests++;
917 sector = conf->start_next_window;
918 }
919 } 906 }
920 907
921 conf->nr_pending++; 908 conf->nr_pending[idx]++;
922 spin_unlock_irq(&conf->resync_lock); 909 spin_unlock_irq(&conf->resync_lock);
923 return sector;
924} 910}
925 911
926static void allow_barrier(struct r1conf *conf, sector_t start_next_window, 912static void wait_barrier(struct r1conf *conf, sector_t sector_nr)
927 sector_t bi_sector) 913{
914 int idx = sector_to_idx(sector_nr);
915
916 _wait_barrier(conf, idx);
917}
918
919static void wait_all_barriers(struct r1conf *conf)
920{
921 int idx;
922
923 for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
924 _wait_barrier(conf, idx);
925}
926
927static void _allow_barrier(struct r1conf *conf, int idx)
928{ 928{
929 unsigned long flags; 929 unsigned long flags;
930 930
931 spin_lock_irqsave(&conf->resync_lock, flags); 931 spin_lock_irqsave(&conf->resync_lock, flags);
932 conf->nr_pending--; 932 conf->nr_pending[idx]--;
933 if (start_next_window) {
934 if (start_next_window == conf->start_next_window) {
935 if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
936 <= bi_sector)
937 conf->next_window_requests--;
938 else
939 conf->current_window_requests--;
940 } else
941 conf->current_window_requests--;
942
943 if (!conf->current_window_requests) {
944 if (conf->next_window_requests) {
945 conf->current_window_requests =
946 conf->next_window_requests;
947 conf->next_window_requests = 0;
948 conf->start_next_window +=
949 NEXT_NORMALIO_DISTANCE;
950 } else
951 conf->start_next_window = MaxSector;
952 }
953 }
954 spin_unlock_irqrestore(&conf->resync_lock, flags); 933 spin_unlock_irqrestore(&conf->resync_lock, flags);
955 wake_up(&conf->wait_barrier); 934 wake_up(&conf->wait_barrier);
956} 935}
957 936
937static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
938{
939 int idx = sector_to_idx(sector_nr);
940
941 _allow_barrier(conf, idx);
942}
943
944static void allow_all_barriers(struct r1conf *conf)
945{
946 int idx;
947
948 for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
949 _allow_barrier(conf, idx);
950}
951
952/* conf->resync_lock should be held */
953static int get_unqueued_pending(struct r1conf *conf)
954{
955 int idx, ret;
956
957 for (ret = 0, idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
958 ret += conf->nr_pending[idx] - conf->nr_queued[idx];
959
960 return ret;
961}
962
958static void freeze_array(struct r1conf *conf, int extra) 963static void freeze_array(struct r1conf *conf, int extra)
959{ 964{
960 /* stop syncio and normal IO and wait for everything to 965 /* Stop sync I/O and normal I/O and wait for everything to
961 * go quite. 966 * go quite.
962 * We wait until nr_pending match nr_queued+extra 967 * This is called in two situations:
963 * This is called in the context of one normal IO request 968 * 1) management command handlers (reshape, remove disk, quiesce).
964 * that has failed. Thus any sync request that might be pending 969 * 2) one normal I/O request failed.
965 * will be blocked by nr_pending, and we need to wait for 970
966 * pending IO requests to complete or be queued for re-try. 971 * After array_frozen is set to 1, new sync IO will be blocked at
967 * Thus the number queued (nr_queued) plus this request (extra) 972 * raise_barrier(), and new normal I/O will blocked at _wait_barrier()
968 * must match the number of pending IOs (nr_pending) before 973 * or wait_read_barrier(). The flying I/Os will either complete or be
969 * we continue. 974 * queued. When everything goes quite, there are only queued I/Os left.
975
976 * Every flying I/O contributes to a conf->nr_pending[idx], idx is the
977 * barrier bucket index which this I/O request hits. When all sync and
978 * normal I/O are queued, sum of all conf->nr_pending[] will match sum
979 * of all conf->nr_queued[]. But normal I/O failure is an exception,
980 * in handle_read_error(), we may call freeze_array() before trying to
981 * fix the read error. In this case, the error read I/O is not queued,
982 * so get_unqueued_pending() == 1.
983 *
984 * Therefore before this function returns, we need to wait until
985 * get_unqueued_pendings(conf) gets equal to extra. For
986 * normal I/O context, extra is 1, in rested situations extra is 0.
970 */ 987 */
971 spin_lock_irq(&conf->resync_lock); 988 spin_lock_irq(&conf->resync_lock);
972 conf->array_frozen = 1; 989 conf->array_frozen = 1;
973 raid1_log(conf->mddev, "wait freeze"); 990 raid1_log(conf->mddev, "wait freeze");
974 wait_event_lock_irq_cmd(conf->wait_barrier, 991 wait_event_lock_irq_cmd(
975 conf->nr_pending == conf->nr_queued+extra, 992 conf->wait_barrier,
976 conf->resync_lock, 993 get_unqueued_pending(conf) == extra,
977 flush_pending_writes(conf)); 994 conf->resync_lock,
995 flush_pending_writes(conf));
978 spin_unlock_irq(&conf->resync_lock); 996 spin_unlock_irq(&conf->resync_lock);
979} 997}
980static void unfreeze_array(struct r1conf *conf) 998static void unfreeze_array(struct r1conf *conf)
@@ -1070,11 +1088,28 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
1070 kfree(plug); 1088 kfree(plug);
1071} 1089}
1072 1090
1073static void raid1_read_request(struct mddev *mddev, struct bio *bio, 1091static inline struct r1bio *
1074 struct r1bio *r1_bio) 1092alloc_r1bio(struct mddev *mddev, struct bio *bio, sector_t sectors_handled)
1093{
1094 struct r1conf *conf = mddev->private;
1095 struct r1bio *r1_bio;
1096
1097 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1098
1099 r1_bio->master_bio = bio;
1100 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1101 r1_bio->state = 0;
1102 r1_bio->mddev = mddev;
1103 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1104
1105 return r1_bio;
1106}
1107
1108static void raid1_read_request(struct mddev *mddev, struct bio *bio)
1075{ 1109{
1076 struct r1conf *conf = mddev->private; 1110 struct r1conf *conf = mddev->private;
1077 struct raid1_info *mirror; 1111 struct raid1_info *mirror;
1112 struct r1bio *r1_bio;
1078 struct bio *read_bio; 1113 struct bio *read_bio;
1079 struct bitmap *bitmap = mddev->bitmap; 1114 struct bitmap *bitmap = mddev->bitmap;
1080 const int op = bio_op(bio); 1115 const int op = bio_op(bio);
@@ -1083,8 +1118,29 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
1083 int max_sectors; 1118 int max_sectors;
1084 int rdisk; 1119 int rdisk;
1085 1120
1086 wait_barrier(conf, bio); 1121 /*
1122 * Still need barrier for READ in case that whole
1123 * array is frozen.
1124 */
1125 wait_read_barrier(conf, bio->bi_iter.bi_sector);
1126
1127 r1_bio = alloc_r1bio(mddev, bio, 0);
1087 1128
1129 /*
1130 * We might need to issue multiple reads to different
1131 * devices if there are bad blocks around, so we keep
1132 * track of the number of reads in bio->bi_phys_segments.
1133 * If this is 0, there is only one r1_bio and no locking
1134 * will be needed when requests complete. If it is
1135 * non-zero, then it is the number of not-completed requests.
1136 */
1137 bio->bi_phys_segments = 0;
1138 bio_clear_flag(bio, BIO_SEG_VALID);
1139
1140 /*
1141 * make_request() can abort the operation when read-ahead is being
1142 * used and no empty request is available.
1143 */
1088read_again: 1144read_again:
1089 rdisk = read_balance(conf, r1_bio, &max_sectors); 1145 rdisk = read_balance(conf, r1_bio, &max_sectors);
1090 1146
@@ -1106,7 +1162,6 @@ read_again:
1106 atomic_read(&bitmap->behind_writes) == 0); 1162 atomic_read(&bitmap->behind_writes) == 0);
1107 } 1163 }
1108 r1_bio->read_disk = rdisk; 1164 r1_bio->read_disk = rdisk;
1109 r1_bio->start_next_window = 0;
1110 1165
1111 read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); 1166 read_bio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
1112 bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector, 1167 bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
@@ -1151,22 +1206,16 @@ read_again:
1151 */ 1206 */
1152 reschedule_retry(r1_bio); 1207 reschedule_retry(r1_bio);
1153 1208
1154 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1209 r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
1155
1156 r1_bio->master_bio = bio;
1157 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1158 r1_bio->state = 0;
1159 r1_bio->mddev = mddev;
1160 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1161 goto read_again; 1210 goto read_again;
1162 } else 1211 } else
1163 generic_make_request(read_bio); 1212 generic_make_request(read_bio);
1164} 1213}
1165 1214
1166static void raid1_write_request(struct mddev *mddev, struct bio *bio, 1215static void raid1_write_request(struct mddev *mddev, struct bio *bio)
1167 struct r1bio *r1_bio)
1168{ 1216{
1169 struct r1conf *conf = mddev->private; 1217 struct r1conf *conf = mddev->private;
1218 struct r1bio *r1_bio;
1170 int i, disks; 1219 int i, disks;
1171 struct bitmap *bitmap = mddev->bitmap; 1220 struct bitmap *bitmap = mddev->bitmap;
1172 unsigned long flags; 1221 unsigned long flags;
@@ -1180,7 +1229,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1180 int first_clone; 1229 int first_clone;
1181 int sectors_handled; 1230 int sectors_handled;
1182 int max_sectors; 1231 int max_sectors;
1183 sector_t start_next_window;
1184 1232
1185 /* 1233 /*
1186 * Register the new request and wait if the reconstruction 1234 * Register the new request and wait if the reconstruction
@@ -1216,7 +1264,19 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1216 } 1264 }
1217 finish_wait(&conf->wait_barrier, &w); 1265 finish_wait(&conf->wait_barrier, &w);
1218 } 1266 }
1219 start_next_window = wait_barrier(conf, bio); 1267 wait_barrier(conf, bio->bi_iter.bi_sector);
1268
1269 r1_bio = alloc_r1bio(mddev, bio, 0);
1270
1271 /* We might need to issue multiple writes to different
1272 * devices if there are bad blocks around, so we keep
1273 * track of the number of writes in bio->bi_phys_segments.
1274 * If this is 0, there is only one r1_bio and no locking
1275 * will be needed when requests complete. If it is
1276 * non-zero, then it is the number of not-completed requests.
1277 */
1278 bio->bi_phys_segments = 0;
1279 bio_clear_flag(bio, BIO_SEG_VALID);
1220 1280
1221 if (conf->pending_count >= max_queued_requests) { 1281 if (conf->pending_count >= max_queued_requests) {
1222 md_wakeup_thread(mddev->thread); 1282 md_wakeup_thread(mddev->thread);
@@ -1237,7 +1297,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1237 1297
1238 disks = conf->raid_disks * 2; 1298 disks = conf->raid_disks * 2;
1239 retry_write: 1299 retry_write:
1240 r1_bio->start_next_window = start_next_window;
1241 blocked_rdev = NULL; 1300 blocked_rdev = NULL;
1242 rcu_read_lock(); 1301 rcu_read_lock();
1243 max_sectors = r1_bio->sectors; 1302 max_sectors = r1_bio->sectors;
@@ -1304,25 +1363,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1304 if (unlikely(blocked_rdev)) { 1363 if (unlikely(blocked_rdev)) {
1305 /* Wait for this device to become unblocked */ 1364 /* Wait for this device to become unblocked */
1306 int j; 1365 int j;
1307 sector_t old = start_next_window;
1308 1366
1309 for (j = 0; j < i; j++) 1367 for (j = 0; j < i; j++)
1310 if (r1_bio->bios[j]) 1368 if (r1_bio->bios[j])
1311 rdev_dec_pending(conf->mirrors[j].rdev, mddev); 1369 rdev_dec_pending(conf->mirrors[j].rdev, mddev);
1312 r1_bio->state = 0; 1370 r1_bio->state = 0;
1313 allow_barrier(conf, start_next_window, bio->bi_iter.bi_sector); 1371 allow_barrier(conf, bio->bi_iter.bi_sector);
1314 raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk); 1372 raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
1315 md_wait_for_blocked_rdev(blocked_rdev, mddev); 1373 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1316 start_next_window = wait_barrier(conf, bio); 1374 wait_barrier(conf, bio->bi_iter.bi_sector);
1317 /*
1318 * We must make sure the multi r1bios of bio have
1319 * the same value of bi_phys_segments
1320 */
1321 if (bio->bi_phys_segments && old &&
1322 old != start_next_window)
1323 /* Wait for the former r1bio(s) to complete */
1324 wait_event(conf->wait_barrier,
1325 bio->bi_phys_segments == 1);
1326 goto retry_write; 1375 goto retry_write;
1327 } 1376 }
1328 1377
@@ -1440,12 +1489,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1440 /* We need another r1_bio. It has already been counted 1489 /* We need another r1_bio. It has already been counted
1441 * in bio->bi_phys_segments 1490 * in bio->bi_phys_segments
1442 */ 1491 */
1443 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1492 r1_bio = alloc_r1bio(mddev, bio, sectors_handled);
1444 r1_bio->master_bio = bio;
1445 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1446 r1_bio->state = 0;
1447 r1_bio->mddev = mddev;
1448 r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1449 goto retry_write; 1493 goto retry_write;
1450 } 1494 }
1451 1495
@@ -1457,36 +1501,25 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1457 1501
1458static void raid1_make_request(struct mddev *mddev, struct bio *bio) 1502static void raid1_make_request(struct mddev *mddev, struct bio *bio)
1459{ 1503{
1460 struct r1conf *conf = mddev->private; 1504 struct bio *split;
1461 struct r1bio *r1_bio; 1505 sector_t sectors;
1462 1506
1463 /* 1507 /* if bio exceeds barrier unit boundary, split it */
1464 * make_request() can abort the operation when read-ahead is being 1508 do {
1465 * used and no empty request is available. 1509 sectors = align_to_barrier_unit_end(
1466 * 1510 bio->bi_iter.bi_sector, bio_sectors(bio));
1467 */ 1511 if (sectors < bio_sectors(bio)) {
1468 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1512 split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
1469 1513 bio_chain(split, bio);
1470 r1_bio->master_bio = bio; 1514 } else {
1471 r1_bio->sectors = bio_sectors(bio); 1515 split = bio;
1472 r1_bio->state = 0; 1516 }
1473 r1_bio->mddev = mddev;
1474 r1_bio->sector = bio->bi_iter.bi_sector;
1475
1476 /*
1477 * We might need to issue multiple reads to different devices if there
1478 * are bad blocks around, so we keep track of the number of reads in
1479 * bio->bi_phys_segments. If this is 0, there is only one r1_bio and
1480 * no locking will be needed when requests complete. If it is
1481 * non-zero, then it is the number of not-completed requests.
1482 */
1483 bio->bi_phys_segments = 0;
1484 bio_clear_flag(bio, BIO_SEG_VALID);
1485 1517
1486 if (bio_data_dir(bio) == READ) 1518 if (bio_data_dir(split) == READ)
1487 raid1_read_request(mddev, bio, r1_bio); 1519 raid1_read_request(mddev, split);
1488 else 1520 else
1489 raid1_write_request(mddev, bio, r1_bio); 1521 raid1_write_request(mddev, split);
1522 } while (split != bio);
1490} 1523}
1491 1524
1492static void raid1_status(struct seq_file *seq, struct mddev *mddev) 1525static void raid1_status(struct seq_file *seq, struct mddev *mddev)
@@ -1577,19 +1610,11 @@ static void print_conf(struct r1conf *conf)
1577 1610
1578static void close_sync(struct r1conf *conf) 1611static void close_sync(struct r1conf *conf)
1579{ 1612{
1580 wait_barrier(conf, NULL); 1613 wait_all_barriers(conf);
1581 allow_barrier(conf, 0, 0); 1614 allow_all_barriers(conf);
1582 1615
1583 mempool_destroy(conf->r1buf_pool); 1616 mempool_destroy(conf->r1buf_pool);
1584 conf->r1buf_pool = NULL; 1617 conf->r1buf_pool = NULL;
1585
1586 spin_lock_irq(&conf->resync_lock);
1587 conf->next_resync = MaxSector - 2 * NEXT_NORMALIO_DISTANCE;
1588 conf->start_next_window = MaxSector;
1589 conf->current_window_requests +=
1590 conf->next_window_requests;
1591 conf->next_window_requests = 0;
1592 spin_unlock_irq(&conf->resync_lock);
1593} 1618}
1594 1619
1595static int raid1_spare_active(struct mddev *mddev) 1620static int raid1_spare_active(struct mddev *mddev)
@@ -2337,8 +2362,9 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
2337 2362
2338static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) 2363static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
2339{ 2364{
2340 int m; 2365 int m, idx;
2341 bool fail = false; 2366 bool fail = false;
2367
2342 for (m = 0; m < conf->raid_disks * 2 ; m++) 2368 for (m = 0; m < conf->raid_disks * 2 ; m++)
2343 if (r1_bio->bios[m] == IO_MADE_GOOD) { 2369 if (r1_bio->bios[m] == IO_MADE_GOOD) {
2344 struct md_rdev *rdev = conf->mirrors[m].rdev; 2370 struct md_rdev *rdev = conf->mirrors[m].rdev;
@@ -2364,7 +2390,8 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
2364 if (fail) { 2390 if (fail) {
2365 spin_lock_irq(&conf->device_lock); 2391 spin_lock_irq(&conf->device_lock);
2366 list_add(&r1_bio->retry_list, &conf->bio_end_io_list); 2392 list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
2367 conf->nr_queued++; 2393 idx = sector_to_idx(r1_bio->sector);
2394 conf->nr_queued[idx]++;
2368 spin_unlock_irq(&conf->device_lock); 2395 spin_unlock_irq(&conf->device_lock);
2369 md_wakeup_thread(conf->mddev->thread); 2396 md_wakeup_thread(conf->mddev->thread);
2370 } else { 2397 } else {
@@ -2460,15 +2487,8 @@ read_more:
2460 generic_make_request(bio); 2487 generic_make_request(bio);
2461 bio = NULL; 2488 bio = NULL;
2462 2489
2463 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 2490 r1_bio = alloc_r1bio(mddev, mbio, sectors_handled);
2464
2465 r1_bio->master_bio = mbio;
2466 r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
2467 r1_bio->state = 0;
2468 set_bit(R1BIO_ReadError, &r1_bio->state); 2491 set_bit(R1BIO_ReadError, &r1_bio->state);
2469 r1_bio->mddev = mddev;
2470 r1_bio->sector = mbio->bi_iter.bi_sector +
2471 sectors_handled;
2472 2492
2473 goto read_more; 2493 goto read_more;
2474 } else { 2494 } else {
@@ -2487,6 +2507,7 @@ static void raid1d(struct md_thread *thread)
2487 struct r1conf *conf = mddev->private; 2507 struct r1conf *conf = mddev->private;
2488 struct list_head *head = &conf->retry_list; 2508 struct list_head *head = &conf->retry_list;
2489 struct blk_plug plug; 2509 struct blk_plug plug;
2510 int idx;
2490 2511
2491 md_check_recovery(mddev); 2512 md_check_recovery(mddev);
2492 2513
@@ -2494,17 +2515,17 @@ static void raid1d(struct md_thread *thread)
2494 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { 2515 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
2495 LIST_HEAD(tmp); 2516 LIST_HEAD(tmp);
2496 spin_lock_irqsave(&conf->device_lock, flags); 2517 spin_lock_irqsave(&conf->device_lock, flags);
2497 if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { 2518 if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
2498 while (!list_empty(&conf->bio_end_io_list)) { 2519 list_splice_init(&conf->bio_end_io_list, &tmp);
2499 list_move(conf->bio_end_io_list.prev, &tmp);
2500 conf->nr_queued--;
2501 }
2502 }
2503 spin_unlock_irqrestore(&conf->device_lock, flags); 2520 spin_unlock_irqrestore(&conf->device_lock, flags);
2504 while (!list_empty(&tmp)) { 2521 while (!list_empty(&tmp)) {
2505 r1_bio = list_first_entry(&tmp, struct r1bio, 2522 r1_bio = list_first_entry(&tmp, struct r1bio,
2506 retry_list); 2523 retry_list);
2507 list_del(&r1_bio->retry_list); 2524 list_del(&r1_bio->retry_list);
2525 idx = sector_to_idx(r1_bio->sector);
2526 spin_lock_irqsave(&conf->device_lock, flags);
2527 conf->nr_queued[idx]--;
2528 spin_unlock_irqrestore(&conf->device_lock, flags);
2508 if (mddev->degraded) 2529 if (mddev->degraded)
2509 set_bit(R1BIO_Degraded, &r1_bio->state); 2530 set_bit(R1BIO_Degraded, &r1_bio->state);
2510 if (test_bit(R1BIO_WriteError, &r1_bio->state)) 2531 if (test_bit(R1BIO_WriteError, &r1_bio->state))
@@ -2525,7 +2546,8 @@ static void raid1d(struct md_thread *thread)
2525 } 2546 }
2526 r1_bio = list_entry(head->prev, struct r1bio, retry_list); 2547 r1_bio = list_entry(head->prev, struct r1bio, retry_list);
2527 list_del(head->prev); 2548 list_del(head->prev);
2528 conf->nr_queued--; 2549 idx = sector_to_idx(r1_bio->sector);
2550 conf->nr_queued[idx]--;
2529 spin_unlock_irqrestore(&conf->device_lock, flags); 2551 spin_unlock_irqrestore(&conf->device_lock, flags);
2530 2552
2531 mddev = r1_bio->mddev; 2553 mddev = r1_bio->mddev;
@@ -2564,7 +2586,6 @@ static int init_resync(struct r1conf *conf)
2564 conf->poolinfo); 2586 conf->poolinfo);
2565 if (!conf->r1buf_pool) 2587 if (!conf->r1buf_pool)
2566 return -ENOMEM; 2588 return -ENOMEM;
2567 conf->next_resync = 0;
2568 return 0; 2589 return 0;
2569} 2590}
2570 2591
@@ -2593,6 +2614,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2593 int still_degraded = 0; 2614 int still_degraded = 0;
2594 int good_sectors = RESYNC_SECTORS; 2615 int good_sectors = RESYNC_SECTORS;
2595 int min_bad = 0; /* number of sectors that are bad in all devices */ 2616 int min_bad = 0; /* number of sectors that are bad in all devices */
2617 int idx = sector_to_idx(sector_nr);
2596 2618
2597 if (!conf->r1buf_pool) 2619 if (!conf->r1buf_pool)
2598 if (init_resync(conf)) 2620 if (init_resync(conf))
@@ -2642,7 +2664,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2642 * If there is non-resync activity waiting for a turn, then let it 2664 * If there is non-resync activity waiting for a turn, then let it
2643 * though before starting on this new sync request. 2665 * though before starting on this new sync request.
2644 */ 2666 */
2645 if (conf->nr_waiting) 2667 if (conf->nr_waiting[idx])
2646 schedule_timeout_uninterruptible(1); 2668 schedule_timeout_uninterruptible(1);
2647 2669
2648 /* we are incrementing sector_nr below. To be safe, we check against 2670 /* we are incrementing sector_nr below. To be safe, we check against
@@ -2669,6 +2691,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2669 r1_bio->sector = sector_nr; 2691 r1_bio->sector = sector_nr;
2670 r1_bio->state = 0; 2692 r1_bio->state = 0;
2671 set_bit(R1BIO_IsSync, &r1_bio->state); 2693 set_bit(R1BIO_IsSync, &r1_bio->state);
2694 /* make sure good_sectors won't go across barrier unit boundary */
2695 good_sectors = align_to_barrier_unit_end(sector_nr, good_sectors);
2672 2696
2673 for (i = 0; i < conf->raid_disks * 2; i++) { 2697 for (i = 0; i < conf->raid_disks * 2; i++) {
2674 struct md_rdev *rdev; 2698 struct md_rdev *rdev;
@@ -2899,6 +2923,26 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2899 if (!conf) 2923 if (!conf)
2900 goto abort; 2924 goto abort;
2901 2925
2926 conf->nr_pending = kcalloc(BARRIER_BUCKETS_NR,
2927 sizeof(int), GFP_KERNEL);
2928 if (!conf->nr_pending)
2929 goto abort;
2930
2931 conf->nr_waiting = kcalloc(BARRIER_BUCKETS_NR,
2932 sizeof(int), GFP_KERNEL);
2933 if (!conf->nr_waiting)
2934 goto abort;
2935
2936 conf->nr_queued = kcalloc(BARRIER_BUCKETS_NR,
2937 sizeof(int), GFP_KERNEL);
2938 if (!conf->nr_queued)
2939 goto abort;
2940
2941 conf->barrier = kcalloc(BARRIER_BUCKETS_NR,
2942 sizeof(int), GFP_KERNEL);
2943 if (!conf->barrier)
2944 goto abort;
2945
2902 conf->mirrors = kzalloc(sizeof(struct raid1_info) 2946 conf->mirrors = kzalloc(sizeof(struct raid1_info)
2903 * mddev->raid_disks * 2, 2947 * mddev->raid_disks * 2,
2904 GFP_KERNEL); 2948 GFP_KERNEL);
@@ -2954,9 +2998,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2954 conf->pending_count = 0; 2998 conf->pending_count = 0;
2955 conf->recovery_disabled = mddev->recovery_disabled - 1; 2999 conf->recovery_disabled = mddev->recovery_disabled - 1;
2956 3000
2957 conf->start_next_window = MaxSector;
2958 conf->current_window_requests = conf->next_window_requests = 0;
2959
2960 err = -EIO; 3001 err = -EIO;
2961 for (i = 0; i < conf->raid_disks * 2; i++) { 3002 for (i = 0; i < conf->raid_disks * 2; i++) {
2962 3003
@@ -2999,6 +3040,10 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2999 kfree(conf->mirrors); 3040 kfree(conf->mirrors);
3000 safe_put_page(conf->tmppage); 3041 safe_put_page(conf->tmppage);
3001 kfree(conf->poolinfo); 3042 kfree(conf->poolinfo);
3043 kfree(conf->nr_pending);
3044 kfree(conf->nr_waiting);
3045 kfree(conf->nr_queued);
3046 kfree(conf->barrier);
3002 kfree(conf); 3047 kfree(conf);
3003 } 3048 }
3004 return ERR_PTR(err); 3049 return ERR_PTR(err);
@@ -3100,6 +3145,10 @@ static void raid1_free(struct mddev *mddev, void *priv)
3100 kfree(conf->mirrors); 3145 kfree(conf->mirrors);
3101 safe_put_page(conf->tmppage); 3146 safe_put_page(conf->tmppage);
3102 kfree(conf->poolinfo); 3147 kfree(conf->poolinfo);
3148 kfree(conf->nr_pending);
3149 kfree(conf->nr_waiting);
3150 kfree(conf->nr_queued);
3151 kfree(conf->barrier);
3103 kfree(conf); 3152 kfree(conf);
3104} 3153}
3105 3154