diff options
Diffstat (limited to 'drivers/md/raid1.c')
| -rw-r--r-- | drivers/md/raid1.c | 162 |
1 files changed, 132 insertions, 30 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index af6681b19776..1e5a540995e9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -66,7 +66,8 @@ | |||
| 66 | */ | 66 | */ |
| 67 | static int max_queued_requests = 1024; | 67 | static int max_queued_requests = 1024; |
| 68 | 68 | ||
| 69 | static void allow_barrier(struct r1conf *conf); | 69 | static void allow_barrier(struct r1conf *conf, sector_t start_next_window, |
| 70 | sector_t bi_sector); | ||
| 70 | static void lower_barrier(struct r1conf *conf); | 71 | static void lower_barrier(struct r1conf *conf); |
| 71 | 72 | ||
| 72 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) | 73 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) |
| @@ -84,10 +85,12 @@ static void r1bio_pool_free(void *r1_bio, void *data) | |||
| 84 | } | 85 | } |
| 85 | 86 | ||
| 86 | #define RESYNC_BLOCK_SIZE (64*1024) | 87 | #define RESYNC_BLOCK_SIZE (64*1024) |
| 87 | //#define RESYNC_BLOCK_SIZE PAGE_SIZE | 88 | #define RESYNC_DEPTH 32 |
| 88 | #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) | 89 | #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) |
| 89 | #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) | 90 | #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) |
| 90 | #define RESYNC_WINDOW (2048*1024) | 91 | #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) |
| 92 | #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) | ||
| 93 | #define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) | ||
| 91 | 94 | ||
| 92 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | 95 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) |
| 93 | { | 96 | { |
| @@ -225,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
| 225 | struct bio *bio = r1_bio->master_bio; | 228 | struct bio *bio = r1_bio->master_bio; |
| 226 | int done; | 229 | int done; |
| 227 | struct r1conf *conf = r1_bio->mddev->private; | 230 | struct r1conf *conf = r1_bio->mddev->private; |
| 231 | sector_t start_next_window = r1_bio->start_next_window; | ||
| 232 | sector_t bi_sector = bio->bi_sector; | ||
| 228 | 233 | ||
| 229 | if (bio->bi_phys_segments) { | 234 | if (bio->bi_phys_segments) { |
| 230 | unsigned long flags; | 235 | unsigned long flags; |
| @@ -232,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
| 232 | bio->bi_phys_segments--; | 237 | bio->bi_phys_segments--; |
| 233 | done = (bio->bi_phys_segments == 0); | 238 | done = (bio->bi_phys_segments == 0); |
| 234 | spin_unlock_irqrestore(&conf->device_lock, flags); | 239 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 240 | /* | ||
| 241 | * make_request() might be waiting for | ||
| 242 | * bi_phys_segments to decrease | ||
| 243 | */ | ||
| 244 | wake_up(&conf->wait_barrier); | ||
| 235 | } else | 245 | } else |
| 236 | done = 1; | 246 | done = 1; |
| 237 | 247 | ||
| @@ -243,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio) | |||
| 243 | * Wake up any possible resync thread that waits for the device | 253 | * Wake up any possible resync thread that waits for the device |
| 244 | * to go idle. | 254 | * to go idle. |
| 245 | */ | 255 | */ |
| 246 | allow_barrier(conf); | 256 | allow_barrier(conf, start_next_window, bi_sector); |
| 247 | } | 257 | } |
| 248 | } | 258 | } |
| 249 | 259 | ||
| @@ -814,8 +824,6 @@ static void flush_pending_writes(struct r1conf *conf) | |||
| 814 | * there is no normal IO happeing. It must arrange to call | 824 | * there is no normal IO happeing. It must arrange to call |
| 815 | * lower_barrier when the particular background IO completes. | 825 | * lower_barrier when the particular background IO completes. |
| 816 | */ | 826 | */ |
| 817 | #define RESYNC_DEPTH 32 | ||
| 818 | |||
| 819 | static void raise_barrier(struct r1conf *conf) | 827 | static void raise_barrier(struct r1conf *conf) |
| 820 | { | 828 | { |
| 821 | spin_lock_irq(&conf->resync_lock); | 829 | spin_lock_irq(&conf->resync_lock); |
| @@ -827,9 +835,19 @@ static void raise_barrier(struct r1conf *conf) | |||
| 827 | /* block any new IO from starting */ | 835 | /* block any new IO from starting */ |
| 828 | conf->barrier++; | 836 | conf->barrier++; |
| 829 | 837 | ||
| 830 | /* Now wait for all pending IO to complete */ | 838 | /* For these conditions we must wait: |
| 839 | * A: while the array is in frozen state | ||
| 840 | * B: while barrier >= RESYNC_DEPTH, meaning resync reach | ||
| 841 | * the max count which allowed. | ||
| 842 | * C: next_resync + RESYNC_SECTORS > start_next_window, meaning | ||
| 843 | * next resync will reach to the window which normal bios are | ||
| 844 | * handling. | ||
| 845 | */ | ||
| 831 | wait_event_lock_irq(conf->wait_barrier, | 846 | wait_event_lock_irq(conf->wait_barrier, |
| 832 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 847 | !conf->array_frozen && |
| 848 | conf->barrier < RESYNC_DEPTH && | ||
| 849 | (conf->start_next_window >= | ||
| 850 | conf->next_resync + RESYNC_SECTORS), | ||
| 833 | conf->resync_lock); | 851 | conf->resync_lock); |
| 834 | 852 | ||
| 835 | spin_unlock_irq(&conf->resync_lock); | 853 | spin_unlock_irq(&conf->resync_lock); |
| @@ -845,10 +863,33 @@ static void lower_barrier(struct r1conf *conf) | |||
| 845 | wake_up(&conf->wait_barrier); | 863 | wake_up(&conf->wait_barrier); |
| 846 | } | 864 | } |
| 847 | 865 | ||
| 848 | static void wait_barrier(struct r1conf *conf) | 866 | static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) |
| 849 | { | 867 | { |
| 868 | bool wait = false; | ||
| 869 | |||
| 870 | if (conf->array_frozen || !bio) | ||
| 871 | wait = true; | ||
| 872 | else if (conf->barrier && bio_data_dir(bio) == WRITE) { | ||
| 873 | if (conf->next_resync < RESYNC_WINDOW_SECTORS) | ||
| 874 | wait = true; | ||
| 875 | else if ((conf->next_resync - RESYNC_WINDOW_SECTORS | ||
| 876 | >= bio_end_sector(bio)) || | ||
| 877 | (conf->next_resync + NEXT_NORMALIO_DISTANCE | ||
| 878 | <= bio->bi_sector)) | ||
| 879 | wait = false; | ||
| 880 | else | ||
| 881 | wait = true; | ||
| 882 | } | ||
| 883 | |||
| 884 | return wait; | ||
| 885 | } | ||
| 886 | |||
| 887 | static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) | ||
| 888 | { | ||
| 889 | sector_t sector = 0; | ||
| 890 | |||
| 850 | spin_lock_irq(&conf->resync_lock); | 891 | spin_lock_irq(&conf->resync_lock); |
| 851 | if (conf->barrier) { | 892 | if (need_to_wait_for_sync(conf, bio)) { |
| 852 | conf->nr_waiting++; | 893 | conf->nr_waiting++; |
| 853 | /* Wait for the barrier to drop. | 894 | /* Wait for the barrier to drop. |
| 854 | * However if there are already pending | 895 | * However if there are already pending |
| @@ -860,22 +901,67 @@ static void wait_barrier(struct r1conf *conf) | |||
| 860 | * count down. | 901 | * count down. |
| 861 | */ | 902 | */ |
| 862 | wait_event_lock_irq(conf->wait_barrier, | 903 | wait_event_lock_irq(conf->wait_barrier, |
| 863 | !conf->barrier || | 904 | !conf->array_frozen && |
| 864 | (conf->nr_pending && | 905 | (!conf->barrier || |
| 906 | ((conf->start_next_window < | ||
| 907 | conf->next_resync + RESYNC_SECTORS) && | ||
| 865 | current->bio_list && | 908 | current->bio_list && |
| 866 | !bio_list_empty(current->bio_list)), | 909 | !bio_list_empty(current->bio_list))), |
| 867 | conf->resync_lock); | 910 | conf->resync_lock); |
| 868 | conf->nr_waiting--; | 911 | conf->nr_waiting--; |
| 869 | } | 912 | } |
| 913 | |||
| 914 | if (bio && bio_data_dir(bio) == WRITE) { | ||
| 915 | if (conf->next_resync + NEXT_NORMALIO_DISTANCE | ||
| 916 | <= bio->bi_sector) { | ||
| 917 | if (conf->start_next_window == MaxSector) | ||
| 918 | conf->start_next_window = | ||
| 919 | conf->next_resync + | ||
| 920 | NEXT_NORMALIO_DISTANCE; | ||
| 921 | |||
| 922 | if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) | ||
| 923 | <= bio->bi_sector) | ||
| 924 | conf->next_window_requests++; | ||
| 925 | else | ||
| 926 | conf->current_window_requests++; | ||
| 927 | } | ||
| 928 | if (bio->bi_sector >= conf->start_next_window) | ||
| 929 | sector = conf->start_next_window; | ||
| 930 | } | ||
| 931 | |||
| 870 | conf->nr_pending++; | 932 | conf->nr_pending++; |
| 871 | spin_unlock_irq(&conf->resync_lock); | 933 | spin_unlock_irq(&conf->resync_lock); |
| 934 | return sector; | ||
| 872 | } | 935 | } |
| 873 | 936 | ||
| 874 | static void allow_barrier(struct r1conf *conf) | 937 | static void allow_barrier(struct r1conf *conf, sector_t start_next_window, |
| 938 | sector_t bi_sector) | ||
| 875 | { | 939 | { |
| 876 | unsigned long flags; | 940 | unsigned long flags; |
| 941 | |||
| 877 | spin_lock_irqsave(&conf->resync_lock, flags); | 942 | spin_lock_irqsave(&conf->resync_lock, flags); |
| 878 | conf->nr_pending--; | 943 | conf->nr_pending--; |
| 944 | if (start_next_window) { | ||
| 945 | if (start_next_window == conf->start_next_window) { | ||
| 946 | if (conf->start_next_window + NEXT_NORMALIO_DISTANCE | ||
| 947 | <= bi_sector) | ||
| 948 | conf->next_window_requests--; | ||
| 949 | else | ||
| 950 | conf->current_window_requests--; | ||
| 951 | } else | ||
| 952 | conf->current_window_requests--; | ||
| 953 | |||
| 954 | if (!conf->current_window_requests) { | ||
| 955 | if (conf->next_window_requests) { | ||
| 956 | conf->current_window_requests = | ||
| 957 | conf->next_window_requests; | ||
| 958 | conf->next_window_requests = 0; | ||
| 959 | conf->start_next_window += | ||
| 960 | NEXT_NORMALIO_DISTANCE; | ||
| 961 | } else | ||
| 962 | conf->start_next_window = MaxSector; | ||
| 963 | } | ||
| 964 | } | ||
| 879 | spin_unlock_irqrestore(&conf->resync_lock, flags); | 965 | spin_unlock_irqrestore(&conf->resync_lock, flags); |
| 880 | wake_up(&conf->wait_barrier); | 966 | wake_up(&conf->wait_barrier); |
| 881 | } | 967 | } |
| @@ -884,8 +970,7 @@ static void freeze_array(struct r1conf *conf, int extra) | |||
| 884 | { | 970 | { |
| 885 | /* stop syncio and normal IO and wait for everything to | 971 | /* stop syncio and normal IO and wait for everything to |
| 886 | * go quite. | 972 | * go quite. |
| 887 | * We increment barrier and nr_waiting, and then | 973 | * We wait until nr_pending match nr_queued+extra |
| 888 | * wait until nr_pending match nr_queued+extra | ||
| 889 | * This is called in the context of one normal IO request | 974 | * This is called in the context of one normal IO request |
| 890 | * that has failed. Thus any sync request that might be pending | 975 | * that has failed. Thus any sync request that might be pending |
| 891 | * will be blocked by nr_pending, and we need to wait for | 976 | * will be blocked by nr_pending, and we need to wait for |
| @@ -895,8 +980,7 @@ static void freeze_array(struct r1conf *conf, int extra) | |||
| 895 | * we continue. | 980 | * we continue. |
| 896 | */ | 981 | */ |
| 897 | spin_lock_irq(&conf->resync_lock); | 982 | spin_lock_irq(&conf->resync_lock); |
| 898 | conf->barrier++; | 983 | conf->array_frozen = 1; |
| 899 | conf->nr_waiting++; | ||
| 900 | wait_event_lock_irq_cmd(conf->wait_barrier, | 984 | wait_event_lock_irq_cmd(conf->wait_barrier, |
| 901 | conf->nr_pending == conf->nr_queued+extra, | 985 | conf->nr_pending == conf->nr_queued+extra, |
| 902 | conf->resync_lock, | 986 | conf->resync_lock, |
| @@ -907,8 +991,7 @@ static void unfreeze_array(struct r1conf *conf) | |||
| 907 | { | 991 | { |
| 908 | /* reverse the effect of the freeze */ | 992 | /* reverse the effect of the freeze */ |
| 909 | spin_lock_irq(&conf->resync_lock); | 993 | spin_lock_irq(&conf->resync_lock); |
| 910 | conf->barrier--; | 994 | conf->array_frozen = 0; |
| 911 | conf->nr_waiting--; | ||
| 912 | wake_up(&conf->wait_barrier); | 995 | wake_up(&conf->wait_barrier); |
| 913 | spin_unlock_irq(&conf->resync_lock); | 996 | spin_unlock_irq(&conf->resync_lock); |
| 914 | } | 997 | } |
| @@ -1013,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 1013 | int first_clone; | 1096 | int first_clone; |
| 1014 | int sectors_handled; | 1097 | int sectors_handled; |
| 1015 | int max_sectors; | 1098 | int max_sectors; |
| 1099 | sector_t start_next_window; | ||
| 1016 | 1100 | ||
| 1017 | /* | 1101 | /* |
| 1018 | * Register the new request and wait if the reconstruction | 1102 | * Register the new request and wait if the reconstruction |
| @@ -1042,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 1042 | finish_wait(&conf->wait_barrier, &w); | 1126 | finish_wait(&conf->wait_barrier, &w); |
| 1043 | } | 1127 | } |
| 1044 | 1128 | ||
| 1045 | wait_barrier(conf); | 1129 | start_next_window = wait_barrier(conf, bio); |
| 1046 | 1130 | ||
| 1047 | bitmap = mddev->bitmap; | 1131 | bitmap = mddev->bitmap; |
| 1048 | 1132 | ||
| @@ -1163,6 +1247,7 @@ read_again: | |||
| 1163 | 1247 | ||
| 1164 | disks = conf->raid_disks * 2; | 1248 | disks = conf->raid_disks * 2; |
| 1165 | retry_write: | 1249 | retry_write: |
| 1250 | r1_bio->start_next_window = start_next_window; | ||
| 1166 | blocked_rdev = NULL; | 1251 | blocked_rdev = NULL; |
| 1167 | rcu_read_lock(); | 1252 | rcu_read_lock(); |
| 1168 | max_sectors = r1_bio->sectors; | 1253 | max_sectors = r1_bio->sectors; |
| @@ -1231,14 +1316,24 @@ read_again: | |||
| 1231 | if (unlikely(blocked_rdev)) { | 1316 | if (unlikely(blocked_rdev)) { |
| 1232 | /* Wait for this device to become unblocked */ | 1317 | /* Wait for this device to become unblocked */ |
| 1233 | int j; | 1318 | int j; |
| 1319 | sector_t old = start_next_window; | ||
| 1234 | 1320 | ||
| 1235 | for (j = 0; j < i; j++) | 1321 | for (j = 0; j < i; j++) |
| 1236 | if (r1_bio->bios[j]) | 1322 | if (r1_bio->bios[j]) |
| 1237 | rdev_dec_pending(conf->mirrors[j].rdev, mddev); | 1323 | rdev_dec_pending(conf->mirrors[j].rdev, mddev); |
| 1238 | r1_bio->state = 0; | 1324 | r1_bio->state = 0; |
| 1239 | allow_barrier(conf); | 1325 | allow_barrier(conf, start_next_window, bio->bi_sector); |
| 1240 | md_wait_for_blocked_rdev(blocked_rdev, mddev); | 1326 | md_wait_for_blocked_rdev(blocked_rdev, mddev); |
| 1241 | wait_barrier(conf); | 1327 | start_next_window = wait_barrier(conf, bio); |
| 1328 | /* | ||
| 1329 | * We must make sure the multi r1bios of bio have | ||
| 1330 | * the same value of bi_phys_segments | ||
| 1331 | */ | ||
| 1332 | if (bio->bi_phys_segments && old && | ||
| 1333 | old != start_next_window) | ||
| 1334 | /* Wait for the former r1bio(s) to complete */ | ||
| 1335 | wait_event(conf->wait_barrier, | ||
| 1336 | bio->bi_phys_segments == 1); | ||
| 1242 | goto retry_write; | 1337 | goto retry_write; |
| 1243 | } | 1338 | } |
| 1244 | 1339 | ||
| @@ -1438,11 +1533,14 @@ static void print_conf(struct r1conf *conf) | |||
| 1438 | 1533 | ||
| 1439 | static void close_sync(struct r1conf *conf) | 1534 | static void close_sync(struct r1conf *conf) |
| 1440 | { | 1535 | { |
| 1441 | wait_barrier(conf); | 1536 | wait_barrier(conf, NULL); |
| 1442 | allow_barrier(conf); | 1537 | allow_barrier(conf, 0, 0); |
| 1443 | 1538 | ||
| 1444 | mempool_destroy(conf->r1buf_pool); | 1539 | mempool_destroy(conf->r1buf_pool); |
| 1445 | conf->r1buf_pool = NULL; | 1540 | conf->r1buf_pool = NULL; |
| 1541 | |||
| 1542 | conf->next_resync = 0; | ||
| 1543 | conf->start_next_window = MaxSector; | ||
| 1446 | } | 1544 | } |
| 1447 | 1545 | ||
| 1448 | static int raid1_spare_active(struct mddev *mddev) | 1546 | static int raid1_spare_active(struct mddev *mddev) |
| @@ -2714,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
| 2714 | conf->pending_count = 0; | 2812 | conf->pending_count = 0; |
| 2715 | conf->recovery_disabled = mddev->recovery_disabled - 1; | 2813 | conf->recovery_disabled = mddev->recovery_disabled - 1; |
| 2716 | 2814 | ||
| 2815 | conf->start_next_window = MaxSector; | ||
| 2816 | conf->current_window_requests = conf->next_window_requests = 0; | ||
| 2817 | |||
| 2717 | err = -EIO; | 2818 | err = -EIO; |
| 2718 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2819 | for (i = 0; i < conf->raid_disks * 2; i++) { |
| 2719 | 2820 | ||
| @@ -2871,8 +2972,8 @@ static int stop(struct mddev *mddev) | |||
| 2871 | atomic_read(&bitmap->behind_writes) == 0); | 2972 | atomic_read(&bitmap->behind_writes) == 0); |
| 2872 | } | 2973 | } |
| 2873 | 2974 | ||
| 2874 | raise_barrier(conf); | 2975 | freeze_array(conf, 0); |
| 2875 | lower_barrier(conf); | 2976 | unfreeze_array(conf); |
| 2876 | 2977 | ||
| 2877 | md_unregister_thread(&mddev->thread); | 2978 | md_unregister_thread(&mddev->thread); |
| 2878 | if (conf->r1bio_pool) | 2979 | if (conf->r1bio_pool) |
| @@ -3031,10 +3132,10 @@ static void raid1_quiesce(struct mddev *mddev, int state) | |||
| 3031 | wake_up(&conf->wait_barrier); | 3132 | wake_up(&conf->wait_barrier); |
| 3032 | break; | 3133 | break; |
| 3033 | case 1: | 3134 | case 1: |
| 3034 | raise_barrier(conf); | 3135 | freeze_array(conf, 0); |
| 3035 | break; | 3136 | break; |
| 3036 | case 0: | 3137 | case 0: |
| 3037 | lower_barrier(conf); | 3138 | unfreeze_array(conf); |
| 3038 | break; | 3139 | break; |
| 3039 | } | 3140 | } |
| 3040 | } | 3141 | } |
| @@ -3051,7 +3152,8 @@ static void *raid1_takeover(struct mddev *mddev) | |||
| 3051 | mddev->new_chunk_sectors = 0; | 3152 | mddev->new_chunk_sectors = 0; |
| 3052 | conf = setup_conf(mddev); | 3153 | conf = setup_conf(mddev); |
| 3053 | if (!IS_ERR(conf)) | 3154 | if (!IS_ERR(conf)) |
| 3054 | conf->barrier = 1; | 3155 | /* Array must appear to be quiesced */ |
| 3156 | conf->array_frozen = 1; | ||
| 3055 | return conf; | 3157 | return conf; |
| 3056 | } | 3158 | } |
| 3057 | return ERR_PTR(-EINVAL); | 3159 | return ERR_PTR(-EINVAL); |
