diff options
-rw-r--r-- | drivers/md/raid1.c | 128 | ||||
-rw-r--r-- | drivers/md/raid1.h | 14 |
2 files changed, 129 insertions, 13 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 78da3392f577..d9ee4edd7a53 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -66,7 +66,8 @@ | |||
66 | */ | 66 | */ |
67 | static int max_queued_requests = 1024; | 67 | static int max_queued_requests = 1024; |
68 | 68 | ||
69 | static void allow_barrier(struct r1conf *conf); | 69 | static void allow_barrier(struct r1conf *conf, sector_t start_next_window, |
70 | sector_t bi_sector); | ||
70 | static void lower_barrier(struct r1conf *conf); | 71 | static void lower_barrier(struct r1conf *conf); |
71 | 72 | ||
72 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) | 73 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) |
@@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
227 | struct bio *bio = r1_bio->master_bio; | 228 | struct bio *bio = r1_bio->master_bio; |
228 | int done; | 229 | int done; |
229 | struct r1conf *conf = r1_bio->mddev->private; | 230 | struct r1conf *conf = r1_bio->mddev->private; |
231 | sector_t start_next_window = r1_bio->start_next_window; | ||
232 | sector_t bi_sector = bio->bi_sector; | ||
230 | 233 | ||
231 | if (bio->bi_phys_segments) { | 234 | if (bio->bi_phys_segments) { |
232 | unsigned long flags; | 235 | unsigned long flags; |
@@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
234 | bio->bi_phys_segments--; | 237 | bio->bi_phys_segments--; |
235 | done = (bio->bi_phys_segments == 0); | 238 | done = (bio->bi_phys_segments == 0); |
236 | spin_unlock_irqrestore(&conf->device_lock, flags); | 239 | spin_unlock_irqrestore(&conf->device_lock, flags); |
240 | /* | ||
241 | * make_request() might be waiting for | ||
242 | * bi_phys_segments to decrease | ||
243 | */ | ||
244 | wake_up(&conf->wait_barrier); | ||
237 | } else | 245 | } else |
238 | done = 1; | 246 | done = 1; |
239 | 247 | ||
@@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
245 | * Wake up any possible resync thread that waits for the device | 253 | * Wake up any possible resync thread that waits for the device |
246 | * to go idle. | 254 | * to go idle. |
247 | */ | 255 | */ |
248 | allow_barrier(conf); | 256 | allow_barrier(conf, start_next_window, bi_sector); |
249 | } | 257 | } |
250 | } | 258 | } |
251 | 259 | ||
@@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf) | |||
827 | /* block any new IO from starting */ | 835 | /* block any new IO from starting */ |
828 | conf->barrier++; | 836 | conf->barrier++; |
829 | 837 | ||
830 | /* Now wait for all pending IO to complete */ | 838 | /* For these conditions we must wait: |
839 | * A: while the array is in frozen state | ||
840 | * B: while barrier >= RESYNC_DEPTH, meaning resync reach | ||
841 | * the max count which allowed. | ||
842 | * C: next_resync + RESYNC_SECTORS > start_next_window, meaning | ||
843 | * next resync will reach to the window which normal bios are | ||
844 | * handling. | ||
845 | */ | ||
831 | wait_event_lock_irq(conf->wait_barrier, | 846 | wait_event_lock_irq(conf->wait_barrier, |
832 | !conf->array_frozen && | 847 | !conf->array_frozen && |
833 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 848 | conf->barrier < RESYNC_DEPTH && |
849 | (conf->start_next_window >= | ||
850 | conf->next_resync + RESYNC_SECTORS), | ||
834 | conf->resync_lock); | 851 | conf->resync_lock); |
835 | 852 | ||
836 | spin_unlock_irq(&conf->resync_lock); | 853 | spin_unlock_irq(&conf->resync_lock); |
@@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf) | |||
846 | wake_up(&conf->wait_barrier); | 863 | wake_up(&conf->wait_barrier); |
847 | } | 864 | } |
848 | 865 | ||
849 | static void wait_barrier(struct r1conf *conf) | 866 | static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) |
867 | { | ||
868 | bool wait = false; | ||
869 | |||
870 | if (conf->array_frozen || !bio) | ||
871 | wait = true; | ||
872 | else if (conf->barrier && bio_data_dir(bio) == WRITE) { | ||
873 | if (conf->next_resync < RESYNC_WINDOW_SECTORS) | ||
874 | wait = true; | ||
875 | else if ((conf->next_resync - RESYNC_WINDOW_SECTORS | ||
876 | >= bio_end_sector(bio)) || | ||
877 | (conf->next_resync + NEXT_NORMALIO_DISTANCE | ||
878 | <= bio->bi_sector)) | ||
879 | wait = false; | ||
880 | else | ||
881 | wait = true; | ||
882 | } | ||
883 | |||
884 | return wait; | ||
885 | } | ||
886 | |||
887 | static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) | ||
850 | { | 888 | { |
889 | sector_t sector = 0; | ||
890 | |||
851 | spin_lock_irq(&conf->resync_lock); | 891 | spin_lock_irq(&conf->resync_lock); |
852 | if (conf->barrier) { | 892 | if (need_to_wait_for_sync(conf, bio)) { |
853 | conf->nr_waiting++; | 893 | conf->nr_waiting++; |
854 | /* Wait for the barrier to drop. | 894 | /* Wait for the barrier to drop. |
855 | * However if there are already pending | 895 | * However if there are already pending |
@@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf) | |||
863 | wait_event_lock_irq(conf->wait_barrier, | 903 | wait_event_lock_irq(conf->wait_barrier, |
864 | !conf->array_frozen && | 904 | !conf->array_frozen && |
865 | (!conf->barrier || | 905 | (!conf->barrier || |
866 | (conf->nr_pending && | 906 | ((conf->start_next_window < |
907 | conf->next_resync + RESYNC_SECTORS) && | ||
867 | current->bio_list && | 908 | current->bio_list && |
868 | !bio_list_empty(current->bio_list))), | 909 | !bio_list_empty(current->bio_list))), |
869 | conf->resync_lock); | 910 | conf->resync_lock); |
870 | conf->nr_waiting--; | 911 | conf->nr_waiting--; |
871 | } | 912 | } |
913 | |||
914 | if (bio && bio_data_dir(bio) == WRITE) { | ||
915 | if (conf->next_resync + NEXT_NORMALIO_DISTANCE | ||
916 | <= bio->bi_sector) { | ||
917 | if (conf->start_next_window == MaxSector) | ||
918 | conf->start_next_window = | ||
919 | conf->next_resync + | ||
920 | NEXT_NORMALIO_DISTANCE; | ||
921 | |||
922 | if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) | ||
923 | <= bio->bi_sector) | ||
924 | conf->next_window_requests++; | ||
925 | else | ||
926 | conf->current_window_requests++; | ||
927 | } | ||
928 | if (bio->bi_sector >= conf->start_next_window) | ||
929 | sector = conf->start_next_window; | ||
930 | } | ||
931 | |||
872 | conf->nr_pending++; | 932 | conf->nr_pending++; |
873 | spin_unlock_irq(&conf->resync_lock); | 933 | spin_unlock_irq(&conf->resync_lock); |
934 | return sector; | ||
874 | } | 935 | } |
875 | 936 | ||
876 | static void allow_barrier(struct r1conf *conf) | 937 | static void allow_barrier(struct r1conf *conf, sector_t start_next_window, |
938 | sector_t bi_sector) | ||
877 | { | 939 | { |
878 | unsigned long flags; | 940 | unsigned long flags; |
941 | |||
879 | spin_lock_irqsave(&conf->resync_lock, flags); | 942 | spin_lock_irqsave(&conf->resync_lock, flags); |
880 | conf->nr_pending--; | 943 | conf->nr_pending--; |
944 | if (start_next_window) { | ||
945 | if (start_next_window == conf->start_next_window) { | ||
946 | if (conf->start_next_window + NEXT_NORMALIO_DISTANCE | ||
947 | <= bi_sector) | ||
948 | conf->next_window_requests--; | ||
949 | else | ||
950 | conf->current_window_requests--; | ||
951 | } else | ||
952 | conf->current_window_requests--; | ||
953 | |||
954 | if (!conf->current_window_requests) { | ||
955 | if (conf->next_window_requests) { | ||
956 | conf->current_window_requests = | ||
957 | conf->next_window_requests; | ||
958 | conf->next_window_requests = 0; | ||
959 | conf->start_next_window += | ||
960 | NEXT_NORMALIO_DISTANCE; | ||
961 | } else | ||
962 | conf->start_next_window = MaxSector; | ||
963 | } | ||
964 | } | ||
881 | spin_unlock_irqrestore(&conf->resync_lock, flags); | 965 | spin_unlock_irqrestore(&conf->resync_lock, flags); |
882 | wake_up(&conf->wait_barrier); | 966 | wake_up(&conf->wait_barrier); |
883 | } | 967 | } |
@@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1012 | int first_clone; | 1096 | int first_clone; |
1013 | int sectors_handled; | 1097 | int sectors_handled; |
1014 | int max_sectors; | 1098 | int max_sectors; |
1099 | sector_t start_next_window; | ||
1015 | 1100 | ||
1016 | /* | 1101 | /* |
1017 | * Register the new request and wait if the reconstruction | 1102 | * Register the new request and wait if the reconstruction |
@@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1041 | finish_wait(&conf->wait_barrier, &w); | 1126 | finish_wait(&conf->wait_barrier, &w); |
1042 | } | 1127 | } |
1043 | 1128 | ||
1044 | wait_barrier(conf); | 1129 | start_next_window = wait_barrier(conf, bio); |
1045 | 1130 | ||
1046 | bitmap = mddev->bitmap; | 1131 | bitmap = mddev->bitmap; |
1047 | 1132 | ||
@@ -1162,6 +1247,7 @@ read_again: | |||
1162 | 1247 | ||
1163 | disks = conf->raid_disks * 2; | 1248 | disks = conf->raid_disks * 2; |
1164 | retry_write: | 1249 | retry_write: |
1250 | r1_bio->start_next_window = start_next_window; | ||
1165 | blocked_rdev = NULL; | 1251 | blocked_rdev = NULL; |
1166 | rcu_read_lock(); | 1252 | rcu_read_lock(); |
1167 | max_sectors = r1_bio->sectors; | 1253 | max_sectors = r1_bio->sectors; |
@@ -1230,14 +1316,24 @@ read_again: | |||
1230 | if (unlikely(blocked_rdev)) { | 1316 | if (unlikely(blocked_rdev)) { |
1231 | /* Wait for this device to become unblocked */ | 1317 | /* Wait for this device to become unblocked */ |
1232 | int j; | 1318 | int j; |
1319 | sector_t old = start_next_window; | ||
1233 | 1320 | ||
1234 | for (j = 0; j < i; j++) | 1321 | for (j = 0; j < i; j++) |
1235 | if (r1_bio->bios[j]) | 1322 | if (r1_bio->bios[j]) |
1236 | rdev_dec_pending(conf->mirrors[j].rdev, mddev); | 1323 | rdev_dec_pending(conf->mirrors[j].rdev, mddev); |
1237 | r1_bio->state = 0; | 1324 | r1_bio->state = 0; |
1238 | allow_barrier(conf); | 1325 | allow_barrier(conf, start_next_window, bio->bi_sector); |
1239 | md_wait_for_blocked_rdev(blocked_rdev, mddev); | 1326 | md_wait_for_blocked_rdev(blocked_rdev, mddev); |
1240 | wait_barrier(conf); | 1327 | start_next_window = wait_barrier(conf, bio); |
1328 | /* | ||
1329 | * We must make sure the multi r1bios of bio have | ||
1330 | * the same value of bi_phys_segments | ||
1331 | */ | ||
1332 | if (bio->bi_phys_segments && old && | ||
1333 | old != start_next_window) | ||
1334 | /* Wait for the former r1bio(s) to complete */ | ||
1335 | wait_event(conf->wait_barrier, | ||
1336 | bio->bi_phys_segments == 1); | ||
1241 | goto retry_write; | 1337 | goto retry_write; |
1242 | } | 1338 | } |
1243 | 1339 | ||
@@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf) | |||
1437 | 1533 | ||
1438 | static void close_sync(struct r1conf *conf) | 1534 | static void close_sync(struct r1conf *conf) |
1439 | { | 1535 | { |
1440 | wait_barrier(conf); | 1536 | wait_barrier(conf, NULL); |
1441 | allow_barrier(conf); | 1537 | allow_barrier(conf, 0, 0); |
1442 | 1538 | ||
1443 | mempool_destroy(conf->r1buf_pool); | 1539 | mempool_destroy(conf->r1buf_pool); |
1444 | conf->r1buf_pool = NULL; | 1540 | conf->r1buf_pool = NULL; |
1541 | |||
1542 | conf->next_resync = 0; | ||
1543 | conf->start_next_window = MaxSector; | ||
1445 | } | 1544 | } |
1446 | 1545 | ||
1447 | static int raid1_spare_active(struct mddev *mddev) | 1546 | static int raid1_spare_active(struct mddev *mddev) |
@@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2713 | conf->pending_count = 0; | 2812 | conf->pending_count = 0; |
2714 | conf->recovery_disabled = mddev->recovery_disabled - 1; | 2813 | conf->recovery_disabled = mddev->recovery_disabled - 1; |
2715 | 2814 | ||
2815 | conf->start_next_window = MaxSector; | ||
2816 | conf->current_window_requests = conf->next_window_requests = 0; | ||
2817 | |||
2716 | err = -EIO; | 2818 | err = -EIO; |
2717 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2819 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2718 | 2820 | ||
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 331a98a231b4..9bebca7bff2f 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -41,6 +41,19 @@ struct r1conf { | |||
41 | */ | 41 | */ |
42 | sector_t next_resync; | 42 | sector_t next_resync; |
43 | 43 | ||
44 | /* When raid1 starts resync, we divide array into four partitions | ||
45 | * |---------|--------------|---------------------|-------------| | ||
46 | * next_resync start_next_window end_window | ||
47 | * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE | ||
48 | * end_window = start_next_window + NEXT_NORMALIO_DISTANCE | ||
49 | * current_window_requests means the count of normalIO between | ||
50 | * start_next_window and end_window. | ||
51 | * next_window_requests means the count of normalIO after end_window. | ||
52 | * */ | ||
53 | sector_t start_next_window; | ||
54 | int current_window_requests; | ||
55 | int next_window_requests; | ||
56 | |||
44 | spinlock_t device_lock; | 57 | spinlock_t device_lock; |
45 | 58 | ||
46 | /* list of 'struct r1bio' that need to be processed by raid1d, | 59 | /* list of 'struct r1bio' that need to be processed by raid1d, |
@@ -112,6 +125,7 @@ struct r1bio { | |||
112 | * in this BehindIO request | 125 | * in this BehindIO request |
113 | */ | 126 | */ |
114 | sector_t sector; | 127 | sector_t sector; |
128 | sector_t start_next_window; | ||
115 | int sectors; | 129 | int sectors; |
116 | unsigned long state; | 130 | unsigned long state; |
117 | struct mddev *mddev; | 131 | struct mddev *mddev; |