aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid1.c128
-rw-r--r--drivers/md/raid1.h14
2 files changed, 129 insertions, 13 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 78da3392f577..d9ee4edd7a53 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -66,7 +66,8 @@
66 */ 66 */
67static int max_queued_requests = 1024; 67static int max_queued_requests = 1024;
68 68
69static void allow_barrier(struct r1conf *conf); 69static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
70 sector_t bi_sector);
70static void lower_barrier(struct r1conf *conf); 71static void lower_barrier(struct r1conf *conf);
71 72
72static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) 73static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
227 struct bio *bio = r1_bio->master_bio; 228 struct bio *bio = r1_bio->master_bio;
228 int done; 229 int done;
229 struct r1conf *conf = r1_bio->mddev->private; 230 struct r1conf *conf = r1_bio->mddev->private;
231 sector_t start_next_window = r1_bio->start_next_window;
232 sector_t bi_sector = bio->bi_sector;
230 233
231 if (bio->bi_phys_segments) { 234 if (bio->bi_phys_segments) {
232 unsigned long flags; 235 unsigned long flags;
@@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
234 bio->bi_phys_segments--; 237 bio->bi_phys_segments--;
235 done = (bio->bi_phys_segments == 0); 238 done = (bio->bi_phys_segments == 0);
236 spin_unlock_irqrestore(&conf->device_lock, flags); 239 spin_unlock_irqrestore(&conf->device_lock, flags);
240 /*
241 * make_request() might be waiting for
242 * bi_phys_segments to decrease
243 */
244 wake_up(&conf->wait_barrier);
237 } else 245 } else
238 done = 1; 246 done = 1;
239 247
@@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
245 * Wake up any possible resync thread that waits for the device 253 * Wake up any possible resync thread that waits for the device
246 * to go idle. 254 * to go idle.
247 */ 255 */
248 allow_barrier(conf); 256 allow_barrier(conf, start_next_window, bi_sector);
249 } 257 }
250} 258}
251 259
@@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf)
827 /* block any new IO from starting */ 835 /* block any new IO from starting */
828 conf->barrier++; 836 conf->barrier++;
829 837
830 /* Now wait for all pending IO to complete */ 838 /* For these conditions we must wait:
839 * A: while the array is in frozen state
840 * B: while barrier >= RESYNC_DEPTH, meaning resync reach
841 * the max count which allowed.
842 * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
843 * next resync will reach to the window which normal bios are
844 * handling.
845 */
831 wait_event_lock_irq(conf->wait_barrier, 846 wait_event_lock_irq(conf->wait_barrier,
832 !conf->array_frozen && 847 !conf->array_frozen &&
833 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 848 conf->barrier < RESYNC_DEPTH &&
849 (conf->start_next_window >=
850 conf->next_resync + RESYNC_SECTORS),
834 conf->resync_lock); 851 conf->resync_lock);
835 852
836 spin_unlock_irq(&conf->resync_lock); 853 spin_unlock_irq(&conf->resync_lock);
@@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
846 wake_up(&conf->wait_barrier); 863 wake_up(&conf->wait_barrier);
847} 864}
848 865
849static void wait_barrier(struct r1conf *conf) 866static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
867{
868 bool wait = false;
869
870 if (conf->array_frozen || !bio)
871 wait = true;
872 else if (conf->barrier && bio_data_dir(bio) == WRITE) {
873 if (conf->next_resync < RESYNC_WINDOW_SECTORS)
874 wait = true;
875 else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
876 >= bio_end_sector(bio)) ||
877 (conf->next_resync + NEXT_NORMALIO_DISTANCE
878 <= bio->bi_sector))
879 wait = false;
880 else
881 wait = true;
882 }
883
884 return wait;
885}
886
887static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
850{ 888{
889 sector_t sector = 0;
890
851 spin_lock_irq(&conf->resync_lock); 891 spin_lock_irq(&conf->resync_lock);
852 if (conf->barrier) { 892 if (need_to_wait_for_sync(conf, bio)) {
853 conf->nr_waiting++; 893 conf->nr_waiting++;
854 /* Wait for the barrier to drop. 894 /* Wait for the barrier to drop.
855 * However if there are already pending 895 * However if there are already pending
@@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf)
863 wait_event_lock_irq(conf->wait_barrier, 903 wait_event_lock_irq(conf->wait_barrier,
864 !conf->array_frozen && 904 !conf->array_frozen &&
865 (!conf->barrier || 905 (!conf->barrier ||
866 (conf->nr_pending && 906 ((conf->start_next_window <
907 conf->next_resync + RESYNC_SECTORS) &&
867 current->bio_list && 908 current->bio_list &&
868 !bio_list_empty(current->bio_list))), 909 !bio_list_empty(current->bio_list))),
869 conf->resync_lock); 910 conf->resync_lock);
870 conf->nr_waiting--; 911 conf->nr_waiting--;
871 } 912 }
913
914 if (bio && bio_data_dir(bio) == WRITE) {
915 if (conf->next_resync + NEXT_NORMALIO_DISTANCE
916 <= bio->bi_sector) {
917 if (conf->start_next_window == MaxSector)
918 conf->start_next_window =
919 conf->next_resync +
920 NEXT_NORMALIO_DISTANCE;
921
922 if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
923 <= bio->bi_sector)
924 conf->next_window_requests++;
925 else
926 conf->current_window_requests++;
927 }
928 if (bio->bi_sector >= conf->start_next_window)
929 sector = conf->start_next_window;
930 }
931
872 conf->nr_pending++; 932 conf->nr_pending++;
873 spin_unlock_irq(&conf->resync_lock); 933 spin_unlock_irq(&conf->resync_lock);
934 return sector;
874} 935}
875 936
876static void allow_barrier(struct r1conf *conf) 937static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
938 sector_t bi_sector)
877{ 939{
878 unsigned long flags; 940 unsigned long flags;
941
879 spin_lock_irqsave(&conf->resync_lock, flags); 942 spin_lock_irqsave(&conf->resync_lock, flags);
880 conf->nr_pending--; 943 conf->nr_pending--;
944 if (start_next_window) {
945 if (start_next_window == conf->start_next_window) {
946 if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
947 <= bi_sector)
948 conf->next_window_requests--;
949 else
950 conf->current_window_requests--;
951 } else
952 conf->current_window_requests--;
953
954 if (!conf->current_window_requests) {
955 if (conf->next_window_requests) {
956 conf->current_window_requests =
957 conf->next_window_requests;
958 conf->next_window_requests = 0;
959 conf->start_next_window +=
960 NEXT_NORMALIO_DISTANCE;
961 } else
962 conf->start_next_window = MaxSector;
963 }
964 }
881 spin_unlock_irqrestore(&conf->resync_lock, flags); 965 spin_unlock_irqrestore(&conf->resync_lock, flags);
882 wake_up(&conf->wait_barrier); 966 wake_up(&conf->wait_barrier);
883} 967}
@@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1012 int first_clone; 1096 int first_clone;
1013 int sectors_handled; 1097 int sectors_handled;
1014 int max_sectors; 1098 int max_sectors;
1099 sector_t start_next_window;
1015 1100
1016 /* 1101 /*
1017 * Register the new request and wait if the reconstruction 1102 * Register the new request and wait if the reconstruction
@@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1041 finish_wait(&conf->wait_barrier, &w); 1126 finish_wait(&conf->wait_barrier, &w);
1042 } 1127 }
1043 1128
1044 wait_barrier(conf); 1129 start_next_window = wait_barrier(conf, bio);
1045 1130
1046 bitmap = mddev->bitmap; 1131 bitmap = mddev->bitmap;
1047 1132
@@ -1162,6 +1247,7 @@ read_again:
1162 1247
1163 disks = conf->raid_disks * 2; 1248 disks = conf->raid_disks * 2;
1164 retry_write: 1249 retry_write:
1250 r1_bio->start_next_window = start_next_window;
1165 blocked_rdev = NULL; 1251 blocked_rdev = NULL;
1166 rcu_read_lock(); 1252 rcu_read_lock();
1167 max_sectors = r1_bio->sectors; 1253 max_sectors = r1_bio->sectors;
@@ -1230,14 +1316,24 @@ read_again:
1230 if (unlikely(blocked_rdev)) { 1316 if (unlikely(blocked_rdev)) {
1231 /* Wait for this device to become unblocked */ 1317 /* Wait for this device to become unblocked */
1232 int j; 1318 int j;
1319 sector_t old = start_next_window;
1233 1320
1234 for (j = 0; j < i; j++) 1321 for (j = 0; j < i; j++)
1235 if (r1_bio->bios[j]) 1322 if (r1_bio->bios[j])
1236 rdev_dec_pending(conf->mirrors[j].rdev, mddev); 1323 rdev_dec_pending(conf->mirrors[j].rdev, mddev);
1237 r1_bio->state = 0; 1324 r1_bio->state = 0;
1238 allow_barrier(conf); 1325 allow_barrier(conf, start_next_window, bio->bi_sector);
1239 md_wait_for_blocked_rdev(blocked_rdev, mddev); 1326 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1240 wait_barrier(conf); 1327 start_next_window = wait_barrier(conf, bio);
1328 /*
1329 * We must make sure the multi r1bios of bio have
1330 * the same value of bi_phys_segments
1331 */
1332 if (bio->bi_phys_segments && old &&
1333 old != start_next_window)
1334 /* Wait for the former r1bio(s) to complete */
1335 wait_event(conf->wait_barrier,
1336 bio->bi_phys_segments == 1);
1241 goto retry_write; 1337 goto retry_write;
1242 } 1338 }
1243 1339
@@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
1437 1533
1438static void close_sync(struct r1conf *conf) 1534static void close_sync(struct r1conf *conf)
1439{ 1535{
1440 wait_barrier(conf); 1536 wait_barrier(conf, NULL);
1441 allow_barrier(conf); 1537 allow_barrier(conf, 0, 0);
1442 1538
1443 mempool_destroy(conf->r1buf_pool); 1539 mempool_destroy(conf->r1buf_pool);
1444 conf->r1buf_pool = NULL; 1540 conf->r1buf_pool = NULL;
1541
1542 conf->next_resync = 0;
1543 conf->start_next_window = MaxSector;
1445} 1544}
1446 1545
1447static int raid1_spare_active(struct mddev *mddev) 1546static int raid1_spare_active(struct mddev *mddev)
@@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2713 conf->pending_count = 0; 2812 conf->pending_count = 0;
2714 conf->recovery_disabled = mddev->recovery_disabled - 1; 2813 conf->recovery_disabled = mddev->recovery_disabled - 1;
2715 2814
2815 conf->start_next_window = MaxSector;
2816 conf->current_window_requests = conf->next_window_requests = 0;
2817
2716 err = -EIO; 2818 err = -EIO;
2717 for (i = 0; i < conf->raid_disks * 2; i++) { 2819 for (i = 0; i < conf->raid_disks * 2; i++) {
2718 2820
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 331a98a231b4..9bebca7bff2f 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -41,6 +41,19 @@ struct r1conf {
41 */ 41 */
42 sector_t next_resync; 42 sector_t next_resync;
43 43
44 /* When raid1 starts resync, we divide array into four partitions
45 * |---------|--------------|---------------------|-------------|
46 * next_resync start_next_window end_window
47 * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
48 * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
49 * current_window_requests means the count of normalIO between
50 * start_next_window and end_window.
51 * next_window_requests means the count of normalIO after end_window.
52 * */
53 sector_t start_next_window;
54 int current_window_requests;
55 int next_window_requests;
56
44 spinlock_t device_lock; 57 spinlock_t device_lock;
45 58
46 /* list of 'struct r1bio' that need to be processed by raid1d, 59 /* list of 'struct r1bio' that need to be processed by raid1d,
@@ -112,6 +125,7 @@ struct r1bio {
112 * in this BehindIO request 125 * in this BehindIO request
113 */ 126 */
114 sector_t sector; 127 sector_t sector;
128 sector_t start_next_window;
115 int sectors; 129 int sectors;
116 unsigned long state; 130 unsigned long state;
117 struct mddev *mddev; 131 struct mddev *mddev;