diff options
Diffstat (limited to 'drivers/block/drbd/drbd_bitmap.c')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 146 |
1 files changed, 107 insertions, 39 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3030201c69d8..b5c5ff53cb57 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
205 | static void bm_store_page_idx(struct page *page, unsigned long idx) | 205 | static void bm_store_page_idx(struct page *page, unsigned long idx) |
206 | { | 206 | { |
207 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); | 207 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); |
208 | page_private(page) |= idx; | 208 | set_page_private(page, idx); |
209 | } | 209 | } |
210 | 210 | ||
211 | static unsigned long bm_page_to_idx(struct page *page) | 211 | static unsigned long bm_page_to_idx(struct page *page) |
@@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) | |||
886 | struct bm_aio_ctx { | 886 | struct bm_aio_ctx { |
887 | struct drbd_conf *mdev; | 887 | struct drbd_conf *mdev; |
888 | atomic_t in_flight; | 888 | atomic_t in_flight; |
889 | struct completion done; | 889 | unsigned int done; |
890 | unsigned flags; | 890 | unsigned flags; |
891 | #define BM_AIO_COPY_PAGES 1 | 891 | #define BM_AIO_COPY_PAGES 1 |
892 | int error; | 892 | int error; |
893 | struct kref kref; | ||
893 | }; | 894 | }; |
894 | 895 | ||
896 | static void bm_aio_ctx_destroy(struct kref *kref) | ||
897 | { | ||
898 | struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); | ||
899 | |||
900 | put_ldev(ctx->mdev); | ||
901 | kfree(ctx); | ||
902 | } | ||
903 | |||
895 | /* bv_page may be a copy, or may be the original */ | 904 | /* bv_page may be a copy, or may be the original */ |
896 | static void bm_async_io_complete(struct bio *bio, int error) | 905 | static void bm_async_io_complete(struct bio *bio, int error) |
897 | { | 906 | { |
@@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
930 | 939 | ||
931 | bm_page_unlock_io(mdev, idx); | 940 | bm_page_unlock_io(mdev, idx); |
932 | 941 | ||
933 | /* FIXME give back to page pool */ | ||
934 | if (ctx->flags & BM_AIO_COPY_PAGES) | 942 | if (ctx->flags & BM_AIO_COPY_PAGES) |
935 | put_page(bio->bi_io_vec[0].bv_page); | 943 | mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); |
936 | 944 | ||
937 | bio_put(bio); | 945 | bio_put(bio); |
938 | 946 | ||
939 | if (atomic_dec_and_test(&ctx->in_flight)) | 947 | if (atomic_dec_and_test(&ctx->in_flight)) { |
940 | complete(&ctx->done); | 948 | ctx->done = 1; |
949 | wake_up(&mdev->misc_wait); | ||
950 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
951 | } | ||
941 | } | 952 | } |
942 | 953 | ||
943 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) | 954 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) |
944 | { | 955 | { |
945 | /* we are process context. we always get a bio */ | 956 | struct bio *bio = bio_alloc_drbd(GFP_NOIO); |
946 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); | ||
947 | struct drbd_conf *mdev = ctx->mdev; | 957 | struct drbd_conf *mdev = ctx->mdev; |
948 | struct drbd_bitmap *b = mdev->bitmap; | 958 | struct drbd_bitmap *b = mdev->bitmap; |
949 | struct page *page; | 959 | struct page *page; |
@@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
966 | bm_set_page_unchanged(b->bm_pages[page_nr]); | 976 | bm_set_page_unchanged(b->bm_pages[page_nr]); |
967 | 977 | ||
968 | if (ctx->flags & BM_AIO_COPY_PAGES) { | 978 | if (ctx->flags & BM_AIO_COPY_PAGES) { |
969 | /* FIXME alloc_page is good enough for now, but actually needs | ||
970 | * to use pre-allocated page pool */ | ||
971 | void *src, *dest; | 979 | void *src, *dest; |
972 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); | 980 | page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); |
973 | dest = kmap_atomic(page); | 981 | dest = kmap_atomic(page); |
974 | src = kmap_atomic(b->bm_pages[page_nr]); | 982 | src = kmap_atomic(b->bm_pages[page_nr]); |
975 | memcpy(dest, src, PAGE_SIZE); | 983 | memcpy(dest, src, PAGE_SIZE); |
@@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
981 | 989 | ||
982 | bio->bi_bdev = mdev->ldev->md_bdev; | 990 | bio->bi_bdev = mdev->ldev->md_bdev; |
983 | bio->bi_sector = on_disk_sector; | 991 | bio->bi_sector = on_disk_sector; |
992 | /* bio_add_page of a single page to an empty bio will always succeed, | ||
993 | * according to api. Do we want to assert that? */ | ||
984 | bio_add_page(bio, page, len, 0); | 994 | bio_add_page(bio, page, len, 0); |
985 | bio->bi_private = ctx; | 995 | bio->bi_private = ctx; |
986 | bio->bi_end_io = bm_async_io_complete; | 996 | bio->bi_end_io = bm_async_io_complete; |
@@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
999 | /* | 1009 | /* |
1000 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 1010 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
1001 | */ | 1011 | */ |
1002 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) | 1012 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) |
1003 | { | 1013 | { |
1004 | struct bm_aio_ctx ctx = { | 1014 | struct bm_aio_ctx *ctx; |
1005 | .mdev = mdev, | ||
1006 | .in_flight = ATOMIC_INIT(1), | ||
1007 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
1008 | .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, | ||
1009 | }; | ||
1010 | struct drbd_bitmap *b = mdev->bitmap; | 1015 | struct drbd_bitmap *b = mdev->bitmap; |
1011 | int num_pages, i, count = 0; | 1016 | int num_pages, i, count = 0; |
1012 | unsigned long now; | 1017 | unsigned long now; |
@@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1021 | * For lazy writeout, we don't care for ongoing changes to the bitmap, | 1026 | * For lazy writeout, we don't care for ongoing changes to the bitmap, |
1022 | * as we submit copies of pages anyways. | 1027 | * as we submit copies of pages anyways. |
1023 | */ | 1028 | */ |
1024 | if (!ctx.flags) | 1029 | |
1030 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1031 | if (!ctx) | ||
1032 | return -ENOMEM; | ||
1033 | |||
1034 | *ctx = (struct bm_aio_ctx) { | ||
1035 | .mdev = mdev, | ||
1036 | .in_flight = ATOMIC_INIT(1), | ||
1037 | .done = 0, | ||
1038 | .flags = flags, | ||
1039 | .error = 0, | ||
1040 | .kref = { ATOMIC_INIT(2) }, | ||
1041 | }; | ||
1042 | |||
1043 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ | ||
1044 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); | ||
1045 | kfree(ctx); | ||
1046 | return -ENODEV; | ||
1047 | } | ||
1048 | |||
1049 | if (!ctx->flags) | ||
1025 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | 1050 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); |
1026 | 1051 | ||
1027 | num_pages = b->bm_number_of_pages; | 1052 | num_pages = b->bm_number_of_pages; |
@@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1046 | continue; | 1071 | continue; |
1047 | } | 1072 | } |
1048 | } | 1073 | } |
1049 | atomic_inc(&ctx.in_flight); | 1074 | atomic_inc(&ctx->in_flight); |
1050 | bm_page_io_async(&ctx, i, rw); | 1075 | bm_page_io_async(ctx, i, rw); |
1051 | ++count; | 1076 | ++count; |
1052 | cond_resched(); | 1077 | cond_resched(); |
1053 | } | 1078 | } |
1054 | 1079 | ||
1055 | /* | 1080 | /* |
1056 | * We initialize ctx.in_flight to one to make sure bm_async_io_complete | 1081 | * We initialize ctx->in_flight to one to make sure bm_async_io_complete |
1057 | * will not complete() early, and decrement / test it here. If there | 1082 | * will not set ctx->done early, and decrement / test it here. If there |
1058 | * are still some bios in flight, we need to wait for them here. | 1083 | * are still some bios in flight, we need to wait for them here. |
1084 | * If all IO is done already (or nothing had been submitted), there is | ||
1085 | * no need to wait. Still, we need to put the kref associated with the | ||
1086 | * "in_flight reached zero, all done" event. | ||
1059 | */ | 1087 | */ |
1060 | if (!atomic_dec_and_test(&ctx.in_flight)) | 1088 | if (!atomic_dec_and_test(&ctx->in_flight)) |
1061 | wait_for_completion(&ctx.done); | 1089 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1090 | else | ||
1091 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1092 | |||
1062 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | 1093 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", |
1063 | rw == WRITE ? "WRITE" : "READ", | 1094 | rw == WRITE ? "WRITE" : "READ", |
1064 | count, jiffies - now); | 1095 | count, jiffies - now); |
1065 | 1096 | ||
1066 | if (ctx.error) { | 1097 | if (ctx->error) { |
1067 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1098 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
1068 | drbd_chk_io_error(mdev, 1, true); | 1099 | drbd_chk_io_error(mdev, 1, true); |
1069 | err = -EIO; /* ctx.error ? */ | 1100 | err = -EIO; /* ctx->error ? */ |
1070 | } | 1101 | } |
1071 | 1102 | ||
1103 | if (atomic_read(&ctx->in_flight)) | ||
1104 | err = -EIO; /* Disk failed during IO... */ | ||
1105 | |||
1072 | now = jiffies; | 1106 | now = jiffies; |
1073 | if (rw == WRITE) { | 1107 | if (rw == WRITE) { |
1074 | drbd_md_flush(mdev); | 1108 | drbd_md_flush(mdev); |
@@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1082 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", | 1116 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", |
1083 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); | 1117 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); |
1084 | 1118 | ||
1119 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1085 | return err; | 1120 | return err; |
1086 | } | 1121 | } |
1087 | 1122 | ||
@@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1091 | */ | 1126 | */ |
1092 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | 1127 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) |
1093 | { | 1128 | { |
1094 | return bm_rw(mdev, READ, 0); | 1129 | return bm_rw(mdev, READ, 0, 0); |
1095 | } | 1130 | } |
1096 | 1131 | ||
1097 | /** | 1132 | /** |
@@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | |||
1102 | */ | 1137 | */ |
1103 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | 1138 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) |
1104 | { | 1139 | { |
1105 | return bm_rw(mdev, WRITE, 0); | 1140 | return bm_rw(mdev, WRITE, 0, 0); |
1106 | } | 1141 | } |
1107 | 1142 | ||
1108 | /** | 1143 | /** |
@@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | |||
1112 | */ | 1147 | */ |
1113 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) | 1148 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) |
1114 | { | 1149 | { |
1115 | return bm_rw(mdev, WRITE, upper_idx); | 1150 | return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); |
1151 | } | ||
1152 | |||
1153 | /** | ||
1154 | * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. | ||
1155 | * @mdev: DRBD device. | ||
1156 | * | ||
1157 | * Will only write pages that have changed since last IO. | ||
1158 | * In contrast to drbd_bm_write(), this will copy the bitmap pages | ||
1159 | * to temporary writeout pages. It is intended to trigger a full write-out | ||
1160 | * while still allowing the bitmap to change, for example if a resync or online | ||
1161 | * verify is aborted due to a failed peer disk, while local IO continues, or | ||
1162 | * pending resync acks are still being processed. | ||
1163 | */ | ||
1164 | int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) | ||
1165 | { | ||
1166 | return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); | ||
1116 | } | 1167 | } |
1117 | 1168 | ||
1118 | 1169 | ||
@@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l | |||
1130 | */ | 1181 | */ |
1131 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) | 1182 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
1132 | { | 1183 | { |
1133 | struct bm_aio_ctx ctx = { | 1184 | struct bm_aio_ctx *ctx; |
1185 | int err; | ||
1186 | |||
1187 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | ||
1188 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | ||
1189 | return 0; | ||
1190 | } | ||
1191 | |||
1192 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1193 | if (!ctx) | ||
1194 | return -ENOMEM; | ||
1195 | |||
1196 | *ctx = (struct bm_aio_ctx) { | ||
1134 | .mdev = mdev, | 1197 | .mdev = mdev, |
1135 | .in_flight = ATOMIC_INIT(1), | 1198 | .in_flight = ATOMIC_INIT(1), |
1136 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | 1199 | .done = 0, |
1137 | .flags = BM_AIO_COPY_PAGES, | 1200 | .flags = BM_AIO_COPY_PAGES, |
1201 | .error = 0, | ||
1202 | .kref = { ATOMIC_INIT(2) }, | ||
1138 | }; | 1203 | }; |
1139 | 1204 | ||
1140 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | 1205 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ |
1141 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | 1206 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); |
1142 | return 0; | 1207 | kfree(ctx); |
1208 | return -ENODEV; | ||
1143 | } | 1209 | } |
1144 | 1210 | ||
1145 | bm_page_io_async(&ctx, idx, WRITE_SYNC); | 1211 | bm_page_io_async(ctx, idx, WRITE_SYNC); |
1146 | wait_for_completion(&ctx.done); | 1212 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1147 | 1213 | ||
1148 | if (ctx.error) | 1214 | if (ctx->error) |
1149 | drbd_chk_io_error(mdev, 1, true); | 1215 | drbd_chk_io_error(mdev, 1, true); |
1150 | /* that should force detach, so the in memory bitmap will be | 1216 | /* that should force detach, so the in memory bitmap will be |
1151 | * gone in a moment as well. */ | 1217 | * gone in a moment as well. */ |
1152 | 1218 | ||
1153 | mdev->bm_writ_cnt++; | 1219 | mdev->bm_writ_cnt++; |
1154 | return ctx.error; | 1220 | err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; |
1221 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1222 | return err; | ||
1155 | } | 1223 | } |
1156 | 1224 | ||
1157 | /* NOTE | 1225 | /* NOTE |