aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorMing Lei <tom.leiming@gmail.com>2017-03-16 12:12:26 -0400
committerShaohua Li <shli@fb.com>2017-03-24 13:41:36 -0400
commit98d30c5812c343c970b5997369b4f6b197c29b3d (patch)
tree9691a1c031edc1b6ae96a369d0a80ac2d15e2ac9 /drivers/md/raid1.c
parenta7234234d0d6373d0510582ab632efbf73243403 (diff)
md: raid1: don't use bio's vec table to manage resync pages
Now we allocate one page array for managing resync pages, instead of using bio's vec table to do that, and the old way is very hacky and won't work any more if multipage bvec is enabled. The introduced cost is that we need to allocate (128 + 16) * raid_disks bytes per r1_bio, and it is fine because the inflight r1_bio for resync shouldn't be much, as pointed by Shaohua. Also the bio_reset() in raid1_sync_request() is removed because all bios are freshly new now and not necessary to reset any more. This patch can be thought as a cleanup too Suggested-by: Shaohua Li <shli@kernel.org> Signed-off-by: Ming Lei <tom.leiming@gmail.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c93
1 files changed, 64 insertions, 29 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7ee0911fba7d..89a384bdae29 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -81,6 +81,24 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
81#define raid1_log(md, fmt, args...) \ 81#define raid1_log(md, fmt, args...) \
82 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) 82 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
83 83
84/*
85 * 'strct resync_pages' stores actual pages used for doing the resync
86 * IO, and it is per-bio, so make .bi_private points to it.
87 */
88static inline struct resync_pages *get_resync_pages(struct bio *bio)
89{
90 return bio->bi_private;
91}
92
93/*
94 * for resync bio, r1bio pointer can be retrieved from the per-bio
95 * 'struct resync_pages'.
96 */
97static inline struct r1bio *get_resync_r1bio(struct bio *bio)
98{
99 return get_resync_pages(bio)->raid_bio;
100}
101
84static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) 102static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
85{ 103{
86 struct pool_info *pi = data; 104 struct pool_info *pi = data;
@@ -108,12 +126,18 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
108 struct r1bio *r1_bio; 126 struct r1bio *r1_bio;
109 struct bio *bio; 127 struct bio *bio;
110 int need_pages; 128 int need_pages;
111 int i, j; 129 int j;
130 struct resync_pages *rps;
112 131
113 r1_bio = r1bio_pool_alloc(gfp_flags, pi); 132 r1_bio = r1bio_pool_alloc(gfp_flags, pi);
114 if (!r1_bio) 133 if (!r1_bio)
115 return NULL; 134 return NULL;
116 135
136 rps = kmalloc(sizeof(struct resync_pages) * pi->raid_disks,
137 gfp_flags);
138 if (!rps)
139 goto out_free_r1bio;
140
117 /* 141 /*
118 * Allocate bios : 1 for reading, n-1 for writing 142 * Allocate bios : 1 for reading, n-1 for writing
119 */ 143 */
@@ -133,22 +157,22 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
133 need_pages = pi->raid_disks; 157 need_pages = pi->raid_disks;
134 else 158 else
135 need_pages = 1; 159 need_pages = 1;
136 for (j = 0; j < need_pages; j++) { 160 for (j = 0; j < pi->raid_disks; j++) {
161 struct resync_pages *rp = &rps[j];
162
137 bio = r1_bio->bios[j]; 163 bio = r1_bio->bios[j];
138 bio->bi_vcnt = RESYNC_PAGES; 164
139 165 if (j < need_pages) {
140 if (bio_alloc_pages(bio, gfp_flags)) 166 if (resync_alloc_pages(rp, gfp_flags))
141 goto out_free_pages; 167 goto out_free_pages;
142 } 168 } else {
143 /* If not user-requests, copy the page pointers to all bios */ 169 memcpy(rp, &rps[0], sizeof(*rp));
144 if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { 170 resync_get_all_pages(rp);
145 for (i = 0; i< RESYNC_PAGES; i++) 171 }
146 for (j = 1; j < pi->raid_disks; j++) { 172
147 struct page *page = 173 rp->idx = 0;
148 r1_bio->bios[0]->bi_io_vec[i].bv_page; 174 rp->raid_bio = r1_bio;
149 get_page(page); 175 bio->bi_private = rp;
150 r1_bio->bios[j]->bi_io_vec[i].bv_page = page;
151 }
152 } 176 }
153 177
154 r1_bio->master_bio = NULL; 178 r1_bio->master_bio = NULL;
@@ -157,11 +181,14 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
157 181
158out_free_pages: 182out_free_pages:
159 while (--j >= 0) 183 while (--j >= 0)
160 bio_free_pages(r1_bio->bios[j]); 184 resync_free_pages(&rps[j]);
161 185
162out_free_bio: 186out_free_bio:
163 while (++j < pi->raid_disks) 187 while (++j < pi->raid_disks)
164 bio_put(r1_bio->bios[j]); 188 bio_put(r1_bio->bios[j]);
189 kfree(rps);
190
191out_free_r1bio:
165 r1bio_pool_free(r1_bio, data); 192 r1bio_pool_free(r1_bio, data);
166 return NULL; 193 return NULL;
167} 194}
@@ -169,14 +196,18 @@ out_free_bio:
169static void r1buf_pool_free(void *__r1_bio, void *data) 196static void r1buf_pool_free(void *__r1_bio, void *data)
170{ 197{
171 struct pool_info *pi = data; 198 struct pool_info *pi = data;
172 int i,j; 199 int i;
173 struct r1bio *r1bio = __r1_bio; 200 struct r1bio *r1bio = __r1_bio;
201 struct resync_pages *rp = NULL;
174 202
175 for (i = 0; i < RESYNC_PAGES; i++) 203 for (i = pi->raid_disks; i--; ) {
176 for (j = pi->raid_disks; j-- ;) 204 rp = get_resync_pages(r1bio->bios[i]);
177 safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); 205 resync_free_pages(rp);
178 for (i=0 ; i < pi->raid_disks; i++)
179 bio_put(r1bio->bios[i]); 206 bio_put(r1bio->bios[i]);
207 }
208
209 /* resync pages array stored in the 1st bio's .bi_private */
210 kfree(rp);
180 211
181 r1bio_pool_free(r1bio, data); 212 r1bio_pool_free(r1bio, data);
182} 213}
@@ -1844,7 +1875,7 @@ abort:
1844 1875
1845static void end_sync_read(struct bio *bio) 1876static void end_sync_read(struct bio *bio)
1846{ 1877{
1847 struct r1bio *r1_bio = bio->bi_private; 1878 struct r1bio *r1_bio = get_resync_r1bio(bio);
1848 1879
1849 update_head_pos(r1_bio->read_disk, r1_bio); 1880 update_head_pos(r1_bio->read_disk, r1_bio);
1850 1881
@@ -1863,7 +1894,7 @@ static void end_sync_read(struct bio *bio)
1863static void end_sync_write(struct bio *bio) 1894static void end_sync_write(struct bio *bio)
1864{ 1895{
1865 int uptodate = !bio->bi_error; 1896 int uptodate = !bio->bi_error;
1866 struct r1bio *r1_bio = bio->bi_private; 1897 struct r1bio *r1_bio = get_resync_r1bio(bio);
1867 struct mddev *mddev = r1_bio->mddev; 1898 struct mddev *mddev = r1_bio->mddev;
1868 struct r1conf *conf = mddev->private; 1899 struct r1conf *conf = mddev->private;
1869 sector_t first_bad; 1900 sector_t first_bad;
@@ -2080,6 +2111,7 @@ static void process_checks(struct r1bio *r1_bio)
2080 int size; 2111 int size;
2081 int error; 2112 int error;
2082 struct bio *b = r1_bio->bios[i]; 2113 struct bio *b = r1_bio->bios[i];
2114 struct resync_pages *rp = get_resync_pages(b);
2083 if (b->bi_end_io != end_sync_read) 2115 if (b->bi_end_io != end_sync_read)
2084 continue; 2116 continue;
2085 /* fixup the bio for reuse, but preserve errno */ 2117 /* fixup the bio for reuse, but preserve errno */
@@ -2092,7 +2124,8 @@ static void process_checks(struct r1bio *r1_bio)
2092 conf->mirrors[i].rdev->data_offset; 2124 conf->mirrors[i].rdev->data_offset;
2093 b->bi_bdev = conf->mirrors[i].rdev->bdev; 2125 b->bi_bdev = conf->mirrors[i].rdev->bdev;
2094 b->bi_end_io = end_sync_read; 2126 b->bi_end_io = end_sync_read;
2095 b->bi_private = r1_bio; 2127 rp->raid_bio = r1_bio;
2128 b->bi_private = rp;
2096 2129
2097 size = b->bi_iter.bi_size; 2130 size = b->bi_iter.bi_size;
2098 for (j = 0; j < vcnt ; j++) { 2131 for (j = 0; j < vcnt ; j++) {
@@ -2746,7 +2779,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2746 for (i = 0; i < conf->raid_disks * 2; i++) { 2779 for (i = 0; i < conf->raid_disks * 2; i++) {
2747 struct md_rdev *rdev; 2780 struct md_rdev *rdev;
2748 bio = r1_bio->bios[i]; 2781 bio = r1_bio->bios[i];
2749 bio_reset(bio);
2750 2782
2751 rdev = rcu_dereference(conf->mirrors[i].rdev); 2783 rdev = rcu_dereference(conf->mirrors[i].rdev);
2752 if (rdev == NULL || 2784 if (rdev == NULL ||
@@ -2802,7 +2834,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2802 atomic_inc(&rdev->nr_pending); 2834 atomic_inc(&rdev->nr_pending);
2803 bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; 2835 bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
2804 bio->bi_bdev = rdev->bdev; 2836 bio->bi_bdev = rdev->bdev;
2805 bio->bi_private = r1_bio;
2806 if (test_bit(FailFast, &rdev->flags)) 2837 if (test_bit(FailFast, &rdev->flags))
2807 bio->bi_opf |= MD_FAILFAST; 2838 bio->bi_opf |= MD_FAILFAST;
2808 } 2839 }
@@ -2888,9 +2919,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2888 } 2919 }
2889 2920
2890 for (i = 0 ; i < conf->raid_disks * 2; i++) { 2921 for (i = 0 ; i < conf->raid_disks * 2; i++) {
2922 struct resync_pages *rp;
2923
2891 bio = r1_bio->bios[i]; 2924 bio = r1_bio->bios[i];
2925 rp = get_resync_pages(bio);
2892 if (bio->bi_end_io) { 2926 if (bio->bi_end_io) {
2893 page = bio->bi_io_vec[bio->bi_vcnt].bv_page; 2927 page = resync_fetch_page(rp, rp->idx++);
2894 2928
2895 /* 2929 /*
2896 * won't fail because the vec table is big 2930 * won't fail because the vec table is big
@@ -2902,7 +2936,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
2902 nr_sectors += len>>9; 2936 nr_sectors += len>>9;
2903 sector_nr += len>>9; 2937 sector_nr += len>>9;
2904 sync_blocks -= (len>>9); 2938 sync_blocks -= (len>>9);
2905 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); 2939 } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES);
2940
2906 r1_bio->sectors = nr_sectors; 2941 r1_bio->sectors = nr_sectors;
2907 2942
2908 if (mddev_is_clustered(mddev) && 2943 if (mddev_is_clustered(mddev) &&