diff options
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 75 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 115 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 12 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 77 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 28 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 52 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 7 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 9 | ||||
-rw-r--r-- | include/linux/drbd_genl.h | 9 | ||||
-rw-r--r-- | include/linux/drbd_limits.h | 6 |
12 files changed, 321 insertions, 90 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea06..58b5b61628fc 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -114,18 +114,44 @@ struct drbd_atodb_wait { | |||
114 | 114 | ||
115 | static int w_al_write_transaction(struct drbd_work *, int); | 115 | static int w_al_write_transaction(struct drbd_work *, int); |
116 | 116 | ||
117 | void *drbd_md_get_buffer(struct drbd_conf *mdev) | ||
118 | { | ||
119 | int r; | ||
120 | |||
121 | wait_event(mdev->misc_wait, | ||
122 | (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || | ||
123 | mdev->state.disk <= D_FAILED); | ||
124 | |||
125 | return r ? NULL : page_address(mdev->md_io_page); | ||
126 | } | ||
127 | |||
128 | void drbd_md_put_buffer(struct drbd_conf *mdev) | ||
129 | { | ||
130 | if (atomic_dec_and_test(&mdev->md_io_in_use)) | ||
131 | wake_up(&mdev->misc_wait); | ||
132 | } | ||
133 | |||
134 | static bool md_io_allowed(struct drbd_conf *mdev) | ||
135 | { | ||
136 | enum drbd_disk_state ds = mdev->state.disk; | ||
137 | return ds >= D_NEGOTIATING || ds == D_ATTACHING; | ||
138 | } | ||
139 | |||
140 | void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) | ||
141 | { | ||
142 | wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); | ||
143 | } | ||
144 | |||
117 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | 145 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, |
118 | struct drbd_backing_dev *bdev, | 146 | struct drbd_backing_dev *bdev, |
119 | struct page *page, sector_t sector, | 147 | struct page *page, sector_t sector, |
120 | int rw, int size) | 148 | int rw, int size) |
121 | { | 149 | { |
122 | struct bio *bio; | 150 | struct bio *bio; |
123 | struct drbd_md_io md_io; | ||
124 | int err; | 151 | int err; |
125 | 152 | ||
126 | md_io.mdev = mdev; | 153 | mdev->md_io.done = 0; |
127 | init_completion(&md_io.event); | 154 | mdev->md_io.error = -ENODEV; |
128 | md_io.error = 0; | ||
129 | 155 | ||
130 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) | 156 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) |
131 | rw |= REQ_FUA | REQ_FLUSH; | 157 | rw |= REQ_FUA | REQ_FLUSH; |
@@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | |||
137 | err = -EIO; | 163 | err = -EIO; |
138 | if (bio_add_page(bio, page, size, 0) != size) | 164 | if (bio_add_page(bio, page, size, 0) != size) |
139 | goto out; | 165 | goto out; |
140 | bio->bi_private = &md_io; | 166 | bio->bi_private = &mdev->md_io; |
141 | bio->bi_end_io = drbd_md_io_complete; | 167 | bio->bi_end_io = drbd_md_io_complete; |
142 | bio->bi_rw = rw; | 168 | bio->bi_rw = rw; |
143 | 169 | ||
170 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ | ||
171 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); | ||
172 | err = -ENODEV; | ||
173 | goto out; | ||
174 | } | ||
175 | |||
176 | bio_get(bio); /* one bio_put() is in the completion handler */ | ||
177 | atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ | ||
144 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) | 178 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) |
145 | bio_endio(bio, -EIO); | 179 | bio_endio(bio, -EIO); |
146 | else | 180 | else |
147 | submit_bio(rw, bio); | 181 | submit_bio(rw, bio); |
148 | wait_for_completion(&md_io.event); | 182 | wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); |
149 | if (bio_flagged(bio, BIO_UPTODATE)) | 183 | if (bio_flagged(bio, BIO_UPTODATE)) |
150 | err = md_io.error; | 184 | err = mdev->md_io.error; |
151 | 185 | ||
152 | out: | 186 | out: |
153 | bio_put(bio); | 187 | bio_put(bio); |
@@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, | |||
160 | int err; | 194 | int err; |
161 | struct page *iop = mdev->md_io_page; | 195 | struct page *iop = mdev->md_io_page; |
162 | 196 | ||
163 | D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); | 197 | D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); |
164 | 198 | ||
165 | BUG_ON(!bdev->md_bdev); | 199 | BUG_ON(!bdev->md_bdev); |
166 | 200 | ||
@@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) | |||
344 | return 0; | 378 | return 0; |
345 | } | 379 | } |
346 | 380 | ||
347 | mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ | 381 | buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ |
348 | buffer = page_address(mdev->md_io_page); | 382 | if (!buffer) { |
383 | dev_err(DEV, "disk failed while waiting for md_io buffer\n"); | ||
384 | aw->err = -EIO; | ||
385 | complete(&((struct update_al_work *)w)->event); | ||
386 | put_ldev(mdev); | ||
387 | return 1; | ||
388 | } | ||
349 | 389 | ||
350 | memset(buffer, 0, sizeof(*buffer)); | 390 | memset(buffer, 0, sizeof(*buffer)); |
351 | buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); | 391 | buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); |
@@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) | |||
415 | mdev->al_tr_number++; | 455 | mdev->al_tr_number++; |
416 | } | 456 | } |
417 | 457 | ||
418 | mutex_unlock(&mdev->md_io_mutex); | 458 | drbd_md_put_buffer(mdev); |
419 | complete(&((struct update_al_work *)w)->event); | 459 | complete(&((struct update_al_work *)w)->event); |
420 | put_ldev(mdev); | 460 | put_ldev(mdev); |
421 | 461 | ||
@@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
506 | /* lock out all other meta data io for now, | 546 | /* lock out all other meta data io for now, |
507 | * and make sure the page is mapped. | 547 | * and make sure the page is mapped. |
508 | */ | 548 | */ |
509 | mutex_lock(&mdev->md_io_mutex); | 549 | b = drbd_md_get_buffer(mdev); |
510 | b = page_address(mdev->md_io_page); | 550 | if (!b) |
551 | return 0; | ||
511 | 552 | ||
512 | /* Always use the full ringbuffer space for now. | 553 | /* Always use the full ringbuffer space for now. |
513 | * possible optimization: read in all of it, | 554 | * possible optimization: read in all of it, |
@@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
528 | 569 | ||
529 | /* IO error */ | 570 | /* IO error */ |
530 | if (rv == -1) { | 571 | if (rv == -1) { |
531 | mutex_unlock(&mdev->md_io_mutex); | 572 | drbd_md_put_buffer(mdev); |
532 | return 0; | 573 | return 0; |
533 | } | 574 | } |
534 | 575 | ||
@@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
558 | if (!found_valid) { | 599 | if (!found_valid) { |
559 | if (found_initialized != mx) | 600 | if (found_initialized != mx) |
560 | dev_warn(DEV, "No usable activity log found.\n"); | 601 | dev_warn(DEV, "No usable activity log found.\n"); |
561 | mutex_unlock(&mdev->md_io_mutex); | 602 | drbd_md_put_buffer(mdev); |
562 | return 1; | 603 | return 1; |
563 | } | 604 | } |
564 | 605 | ||
@@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
573 | if (!expect(rv != 0)) | 614 | if (!expect(rv != 0)) |
574 | goto cancel; | 615 | goto cancel; |
575 | if (rv == -1) { | 616 | if (rv == -1) { |
576 | mutex_unlock(&mdev->md_io_mutex); | 617 | drbd_md_put_buffer(mdev); |
577 | return 0; | 618 | return 0; |
578 | } | 619 | } |
579 | 620 | ||
@@ -643,7 +684,7 @@ cancel: | |||
643 | mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); | 684 | mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); |
644 | 685 | ||
645 | /* ok, we are done with it */ | 686 | /* ok, we are done with it */ |
646 | mutex_unlock(&mdev->md_io_mutex); | 687 | drbd_md_put_buffer(mdev); |
647 | 688 | ||
648 | dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", | 689 | dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", |
649 | transactions, active_extents); | 690 | transactions, active_extents); |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22a..706e5220dd4a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) | |||
918 | struct bm_aio_ctx { | 918 | struct bm_aio_ctx { |
919 | struct drbd_conf *mdev; | 919 | struct drbd_conf *mdev; |
920 | atomic_t in_flight; | 920 | atomic_t in_flight; |
921 | struct completion done; | 921 | unsigned int done; |
922 | unsigned flags; | 922 | unsigned flags; |
923 | #define BM_AIO_COPY_PAGES 1 | 923 | #define BM_AIO_COPY_PAGES 1 |
924 | #define BM_AIO_WRITE_HINTED 2 | 924 | #define BM_AIO_WRITE_HINTED 2 |
925 | int error; | 925 | int error; |
926 | struct kref kref; | ||
926 | }; | 927 | }; |
927 | 928 | ||
929 | static void bm_aio_ctx_destroy(struct kref *kref) | ||
930 | { | ||
931 | struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); | ||
932 | |||
933 | put_ldev(ctx->mdev); | ||
934 | kfree(ctx); | ||
935 | } | ||
936 | |||
928 | /* bv_page may be a copy, or may be the original */ | 937 | /* bv_page may be a copy, or may be the original */ |
929 | static void bm_async_io_complete(struct bio *bio, int error) | 938 | static void bm_async_io_complete(struct bio *bio, int error) |
930 | { | 939 | { |
@@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
968 | 977 | ||
969 | bio_put(bio); | 978 | bio_put(bio); |
970 | 979 | ||
971 | if (atomic_dec_and_test(&ctx->in_flight)) | 980 | if (atomic_dec_and_test(&ctx->in_flight)) { |
972 | complete(&ctx->done); | 981 | ctx->done = 1; |
982 | wake_up(&mdev->misc_wait); | ||
983 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
984 | } | ||
973 | } | 985 | } |
974 | 986 | ||
975 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) | 987 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) |
976 | { | 988 | { |
977 | struct bio *bio = bio_alloc_drbd(GFP_KERNEL); | 989 | struct bio *bio = bio_alloc_drbd(GFP_NOIO); |
978 | struct drbd_conf *mdev = ctx->mdev; | 990 | struct drbd_conf *mdev = ctx->mdev; |
979 | struct drbd_bitmap *b = mdev->bitmap; | 991 | struct drbd_bitmap *b = mdev->bitmap; |
980 | struct page *page; | 992 | struct page *page; |
@@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
1032 | */ | 1044 | */ |
1033 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) | 1045 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) |
1034 | { | 1046 | { |
1035 | struct bm_aio_ctx ctx = { | 1047 | struct bm_aio_ctx *ctx; |
1036 | .mdev = mdev, | ||
1037 | .in_flight = ATOMIC_INIT(1), | ||
1038 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
1039 | .flags = flags, | ||
1040 | }; | ||
1041 | struct drbd_bitmap *b = mdev->bitmap; | 1048 | struct drbd_bitmap *b = mdev->bitmap; |
1042 | int num_pages, i, count = 0; | 1049 | int num_pages, i, count = 0; |
1043 | unsigned long now; | 1050 | unsigned long now; |
@@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
1052 | * For lazy writeout, we don't care for ongoing changes to the bitmap, | 1059 | * For lazy writeout, we don't care for ongoing changes to the bitmap, |
1053 | * as we submit copies of pages anyways. | 1060 | * as we submit copies of pages anyways. |
1054 | */ | 1061 | */ |
1055 | if (!ctx.flags) | 1062 | |
1063 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1064 | if (!ctx) | ||
1065 | return -ENOMEM; | ||
1066 | |||
1067 | *ctx = (struct bm_aio_ctx) { | ||
1068 | .mdev = mdev, | ||
1069 | .in_flight = ATOMIC_INIT(1), | ||
1070 | .done = 0, | ||
1071 | .flags = flags, | ||
1072 | .error = 0, | ||
1073 | .kref = { ATOMIC_INIT(2) }, | ||
1074 | }; | ||
1075 | |||
1076 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ | ||
1077 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); | ||
1078 | err = -ENODEV; | ||
1079 | goto out; | ||
1080 | } | ||
1081 | |||
1082 | if (!ctx->flags) | ||
1056 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | 1083 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); |
1057 | 1084 | ||
1058 | num_pages = b->bm_number_of_pages; | 1085 | num_pages = b->bm_number_of_pages; |
@@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
1081 | continue; | 1108 | continue; |
1082 | } | 1109 | } |
1083 | } | 1110 | } |
1084 | atomic_inc(&ctx.in_flight); | 1111 | atomic_inc(&ctx->in_flight); |
1085 | bm_page_io_async(&ctx, i, rw); | 1112 | bm_page_io_async(ctx, i, rw); |
1086 | ++count; | 1113 | ++count; |
1087 | cond_resched(); | 1114 | cond_resched(); |
1088 | } | 1115 | } |
1089 | 1116 | ||
1090 | /* | 1117 | /* |
1091 | * We initialize ctx.in_flight to one to make sure bm_async_io_complete | 1118 | * We initialize ctx->in_flight to one to make sure bm_async_io_complete |
1092 | * will not complete() early, and decrement / test it here. If there | 1119 | * will not set ctx->done early, and decrement / test it here. If there |
1093 | * are still some bios in flight, we need to wait for them here. | 1120 | * are still some bios in flight, we need to wait for them here. |
1121 | * If all IO is done already (or nothing had been submitted), there is | ||
1122 | * no need to wait. Still, we need to put the kref associated with the | ||
1123 | * "in_flight reached zero, all done" event. | ||
1094 | */ | 1124 | */ |
1095 | if (!atomic_dec_and_test(&ctx.in_flight)) | 1125 | if (!atomic_dec_and_test(&ctx->in_flight)) |
1096 | wait_for_completion(&ctx.done); | 1126 | wait_until_done_or_disk_failure(mdev, &ctx->done); |
1127 | else | ||
1128 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1097 | 1129 | ||
1098 | /* summary for global bitmap IO */ | 1130 | /* summary for global bitmap IO */ |
1099 | if (flags == 0) | 1131 | if (flags == 0) |
1100 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | 1132 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", |
1101 | rw == WRITE ? "WRITE" : "READ", | 1133 | rw == WRITE ? "WRITE" : "READ", |
1102 | count, jiffies - now); | 1134 | count, jiffies - now); |
1103 | 1135 | ||
1104 | if (ctx.error) { | 1136 | if (ctx->error) { |
1105 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1137 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
1106 | drbd_chk_io_error(mdev, 1, true); | 1138 | drbd_chk_io_error(mdev, 1, true); |
1107 | err = -EIO; /* ctx.error ? */ | 1139 | err = -EIO; /* ctx->error ? */ |
1108 | } | 1140 | } |
1109 | 1141 | ||
1142 | if (atomic_read(&ctx->in_flight)) | ||
1143 | err = -EIO; /* Disk failed during IO... */ | ||
1144 | |||
1110 | now = jiffies; | 1145 | now = jiffies; |
1111 | if (rw == WRITE) { | 1146 | if (rw == WRITE) { |
1112 | drbd_md_flush(mdev); | 1147 | drbd_md_flush(mdev); |
@@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
1121 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", | 1156 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", |
1122 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); | 1157 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); |
1123 | 1158 | ||
1159 | out: | ||
1160 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1124 | return err; | 1161 | return err; |
1125 | } | 1162 | } |
1126 | 1163 | ||
@@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) | |||
1177 | */ | 1214 | */ |
1178 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) | 1215 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
1179 | { | 1216 | { |
1180 | struct bm_aio_ctx ctx = { | 1217 | struct bm_aio_ctx *ctx; |
1218 | int err; | ||
1219 | |||
1220 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | ||
1221 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | ||
1222 | return 0; | ||
1223 | } | ||
1224 | |||
1225 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1226 | if (!ctx) | ||
1227 | return -ENOMEM; | ||
1228 | |||
1229 | *ctx = (struct bm_aio_ctx) { | ||
1181 | .mdev = mdev, | 1230 | .mdev = mdev, |
1182 | .in_flight = ATOMIC_INIT(1), | 1231 | .in_flight = ATOMIC_INIT(1), |
1183 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | 1232 | .done = 0, |
1184 | .flags = BM_AIO_COPY_PAGES, | 1233 | .flags = BM_AIO_COPY_PAGES, |
1234 | .error = 0, | ||
1235 | .kref = { ATOMIC_INIT(2) }, | ||
1185 | }; | 1236 | }; |
1186 | 1237 | ||
1187 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | 1238 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ |
1188 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | 1239 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); |
1189 | return 0; | 1240 | err = -ENODEV; |
1241 | goto out; | ||
1190 | } | 1242 | } |
1191 | 1243 | ||
1192 | bm_page_io_async(&ctx, idx, WRITE_SYNC); | 1244 | bm_page_io_async(ctx, idx, WRITE_SYNC); |
1193 | wait_for_completion(&ctx.done); | 1245 | wait_until_done_or_disk_failure(mdev, &ctx->done); |
1194 | 1246 | ||
1195 | if (ctx.error) | 1247 | if (ctx->error) |
1196 | drbd_chk_io_error(mdev, 1, true); | 1248 | drbd_chk_io_error(mdev, 1, true); |
1197 | /* that should force detach, so the in memory bitmap will be | 1249 | /* that should force detach, so the in memory bitmap will be |
1198 | * gone in a moment as well. */ | 1250 | * gone in a moment as well. */ |
1199 | 1251 | ||
1200 | mdev->bm_writ_cnt++; | 1252 | mdev->bm_writ_cnt++; |
1201 | return ctx.error; | 1253 | err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; |
1254 | out: | ||
1255 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1256 | return err; | ||
1202 | } | 1257 | } |
1203 | 1258 | ||
1204 | /* NOTE | 1259 | /* NOTE |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de3..4e582058a7c9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -780,8 +780,7 @@ struct drbd_backing_dev { | |||
780 | }; | 780 | }; |
781 | 781 | ||
782 | struct drbd_md_io { | 782 | struct drbd_md_io { |
783 | struct drbd_conf *mdev; | 783 | unsigned int done; |
784 | struct completion event; | ||
785 | int error; | 784 | int error; |
786 | }; | 785 | }; |
787 | 786 | ||
@@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ | |||
852 | struct drbd_tl_epoch *newest_tle; | 851 | struct drbd_tl_epoch *newest_tle; |
853 | struct drbd_tl_epoch *oldest_tle; | 852 | struct drbd_tl_epoch *oldest_tle; |
854 | struct list_head out_of_sequence_requests; | 853 | struct list_head out_of_sequence_requests; |
854 | struct list_head barrier_acked_requests; | ||
855 | 855 | ||
856 | struct crypto_hash *cram_hmac_tfm; | 856 | struct crypto_hash *cram_hmac_tfm; |
857 | struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ | 857 | struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ |
@@ -978,7 +978,8 @@ struct drbd_conf { | |||
978 | atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ | 978 | atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ |
979 | wait_queue_head_t ee_wait; | 979 | wait_queue_head_t ee_wait; |
980 | struct page *md_io_page; /* one page buffer for md_io */ | 980 | struct page *md_io_page; /* one page buffer for md_io */ |
981 | struct mutex md_io_mutex; /* protects the md_io_buffer */ | 981 | struct drbd_md_io md_io; |
982 | atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ | ||
982 | spinlock_t al_lock; | 983 | spinlock_t al_lock; |
983 | wait_queue_head_t al_wait; | 984 | wait_queue_head_t al_wait; |
984 | struct lru_cache *act_log; /* activity log */ | 985 | struct lru_cache *act_log; /* activity log */ |
@@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); | |||
1424 | extern void suspend_other_sg(struct drbd_conf *mdev); | 1425 | extern void suspend_other_sg(struct drbd_conf *mdev); |
1425 | extern int drbd_resync_finished(struct drbd_conf *mdev); | 1426 | extern int drbd_resync_finished(struct drbd_conf *mdev); |
1426 | /* maybe rather drbd_main.c ? */ | 1427 | /* maybe rather drbd_main.c ? */ |
1428 | extern void *drbd_md_get_buffer(struct drbd_conf *mdev); | ||
1429 | extern void drbd_md_put_buffer(struct drbd_conf *mdev); | ||
1427 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | 1430 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, |
1428 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1431 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
1429 | extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); | 1432 | extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); |
1433 | extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); | ||
1430 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); | 1434 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); |
1431 | 1435 | ||
1432 | static inline void ov_out_of_sync_print(struct drbd_conf *mdev) | 1436 | static inline void ov_out_of_sync_print(struct drbd_conf *mdev) |
@@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) | |||
2151 | case D_OUTDATED: | 2155 | case D_OUTDATED: |
2152 | case D_CONSISTENT: | 2156 | case D_CONSISTENT: |
2153 | case D_UP_TO_DATE: | 2157 | case D_UP_TO_DATE: |
2158 | case D_FAILED: | ||
2154 | /* disk state is stable as well. */ | 2159 | /* disk state is stable as well. */ |
2155 | break; | 2160 | break; |
2156 | 2161 | ||
2157 | /* no new io accepted during transitional states */ | 2162 | /* no new io accepted during transitional states */ |
2158 | case D_ATTACHING: | 2163 | case D_ATTACHING: |
2159 | case D_FAILED: | ||
2160 | case D_NEGOTIATING: | 2164 | case D_NEGOTIATING: |
2161 | case D_UNKNOWN: | 2165 | case D_UNKNOWN: |
2162 | case D_MASK: | 2166 | case D_MASK: |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf8223..15384986e4a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) | |||
215 | tconn->oldest_tle = b; | 215 | tconn->oldest_tle = b; |
216 | tconn->newest_tle = b; | 216 | tconn->newest_tle = b; |
217 | INIT_LIST_HEAD(&tconn->out_of_sequence_requests); | 217 | INIT_LIST_HEAD(&tconn->out_of_sequence_requests); |
218 | INIT_LIST_HEAD(&tconn->barrier_acked_requests); | ||
218 | 219 | ||
219 | return 1; | 220 | return 1; |
220 | } | 221 | } |
@@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, | |||
315 | These have been list_move'd to the out_of_sequence_requests list in | 316 | These have been list_move'd to the out_of_sequence_requests list in |
316 | _req_mod(, BARRIER_ACKED) above. | 317 | _req_mod(, BARRIER_ACKED) above. |
317 | */ | 318 | */ |
318 | list_del_init(&b->requests); | 319 | list_splice_init(&b->requests, &tconn->barrier_acked_requests); |
319 | mdev = b->w.mdev; | 320 | mdev = b->w.mdev; |
320 | 321 | ||
321 | nob = b->next; | 322 | nob = b->next; |
@@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) | |||
417 | b = tmp; | 418 | b = tmp; |
418 | list_splice(&carry_reads, &b->requests); | 419 | list_splice(&carry_reads, &b->requests); |
419 | } | 420 | } |
420 | } | ||
421 | 421 | ||
422 | /* Actions operating on the disk state, also want to work on | ||
423 | requests that got barrier acked. */ | ||
424 | switch (what) { | ||
425 | case FAIL_FROZEN_DISK_IO: | ||
426 | case RESTART_FROZEN_DISK_IO: | ||
427 | list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { | ||
428 | req = list_entry(le, struct drbd_request, tl_requests); | ||
429 | _req_mod(req, what); | ||
430 | } | ||
431 | case CONNECTION_LOST_WHILE_PENDING: | ||
432 | case RESEND: | ||
433 | break; | ||
434 | default: | ||
435 | conn_err(tconn, "what = %d in _tl_restart()\n", what); | ||
436 | } | ||
437 | } | ||
422 | 438 | ||
423 | /** | 439 | /** |
424 | * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL | 440 | * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL |
@@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) | |||
467 | spin_unlock_irq(&tconn->req_lock); | 483 | spin_unlock_irq(&tconn->req_lock); |
468 | } | 484 | } |
469 | 485 | ||
486 | /** | ||
487 | * tl_apply() - Applies an event to all requests for a certain mdev in the TL | ||
488 | * @mdev: DRBD device. | ||
489 | * @what: The action/event to perform with all request objects | ||
490 | * | ||
491 | * @what might ony be ABORT_DISK_IO. | ||
492 | */ | ||
493 | void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) | ||
494 | { | ||
495 | struct drbd_tconn *tconn = mdev->tconn; | ||
496 | struct drbd_tl_epoch *b; | ||
497 | struct list_head *le, *tle; | ||
498 | struct drbd_request *req; | ||
499 | |||
500 | D_ASSERT(what == ABORT_DISK_IO); | ||
501 | |||
502 | spin_lock_irq(&tconn->req_lock); | ||
503 | b = tconn->oldest_tle; | ||
504 | while (b) { | ||
505 | list_for_each_safe(le, tle, &b->requests) { | ||
506 | req = list_entry(le, struct drbd_request, tl_requests); | ||
507 | if (req->w.mdev == mdev) | ||
508 | _req_mod(req, what); | ||
509 | } | ||
510 | b = b->next; | ||
511 | } | ||
512 | |||
513 | list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { | ||
514 | req = list_entry(le, struct drbd_request, tl_requests); | ||
515 | if (req->w.mdev == mdev) | ||
516 | _req_mod(req, what); | ||
517 | } | ||
518 | |||
519 | spin_unlock_irq(&tconn->req_lock); | ||
520 | } | ||
521 | |||
470 | static int drbd_thread_setup(void *arg) | 522 | static int drbd_thread_setup(void *arg) |
471 | { | 523 | { |
472 | struct drbd_thread *thi = (struct drbd_thread *) arg; | 524 | struct drbd_thread *thi = (struct drbd_thread *) arg; |
@@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2003 | atomic_set(&mdev->rs_sect_in, 0); | 2055 | atomic_set(&mdev->rs_sect_in, 0); |
2004 | atomic_set(&mdev->rs_sect_ev, 0); | 2056 | atomic_set(&mdev->rs_sect_ev, 0); |
2005 | atomic_set(&mdev->ap_in_flight, 0); | 2057 | atomic_set(&mdev->ap_in_flight, 0); |
2058 | atomic_set(&mdev->md_io_in_use, 0); | ||
2006 | 2059 | ||
2007 | mutex_init(&mdev->md_io_mutex); | ||
2008 | mutex_init(&mdev->own_state_mutex); | 2060 | mutex_init(&mdev->own_state_mutex); |
2009 | mdev->state_mutex = &mdev->own_state_mutex; | 2061 | mdev->state_mutex = &mdev->own_state_mutex; |
2010 | 2062 | ||
@@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) | |||
2282 | struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); | 2334 | struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); |
2283 | struct drbd_tconn *tconn = mdev->tconn; | 2335 | struct drbd_tconn *tconn = mdev->tconn; |
2284 | 2336 | ||
2337 | del_timer_sync(&mdev->request_timer); | ||
2338 | |||
2285 | /* paranoia asserts */ | 2339 | /* paranoia asserts */ |
2286 | D_ASSERT(mdev->open_cnt == 0); | 2340 | D_ASSERT(mdev->open_cnt == 0); |
2287 | D_ASSERT(list_empty(&mdev->tconn->data.work.q)); | 2341 | D_ASSERT(list_empty(&mdev->tconn->data.work.q)); |
@@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
2868 | if (!get_ldev_if_state(mdev, D_FAILED)) | 2922 | if (!get_ldev_if_state(mdev, D_FAILED)) |
2869 | return; | 2923 | return; |
2870 | 2924 | ||
2871 | mutex_lock(&mdev->md_io_mutex); | 2925 | buffer = drbd_md_get_buffer(mdev); |
2872 | buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); | 2926 | if (!buffer) |
2927 | goto out; | ||
2928 | |||
2873 | memset(buffer, 0, 512); | 2929 | memset(buffer, 0, 512); |
2874 | 2930 | ||
2875 | buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); | 2931 | buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); |
@@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
2900 | * since we updated it on metadata. */ | 2956 | * since we updated it on metadata. */ |
2901 | mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); | 2957 | mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); |
2902 | 2958 | ||
2903 | mutex_unlock(&mdev->md_io_mutex); | 2959 | drbd_md_put_buffer(mdev); |
2960 | out: | ||
2904 | put_ldev(mdev); | 2961 | put_ldev(mdev); |
2905 | } | 2962 | } |
2906 | 2963 | ||
@@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
2920 | if (!get_ldev_if_state(mdev, D_ATTACHING)) | 2977 | if (!get_ldev_if_state(mdev, D_ATTACHING)) |
2921 | return ERR_IO_MD_DISK; | 2978 | return ERR_IO_MD_DISK; |
2922 | 2979 | ||
2923 | mutex_lock(&mdev->md_io_mutex); | 2980 | buffer = drbd_md_get_buffer(mdev); |
2924 | buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); | 2981 | if (!buffer) |
2982 | goto out; | ||
2925 | 2983 | ||
2926 | if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { | 2984 | if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { |
2927 | /* NOTE: can't do normal error processing here as this is | 2985 | /* NOTE: can't do normal error processing here as this is |
@@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
2983 | bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; | 3041 | bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; |
2984 | 3042 | ||
2985 | err: | 3043 | err: |
2986 | mutex_unlock(&mdev->md_io_mutex); | 3044 | drbd_md_put_buffer(mdev); |
3045 | out: | ||
2987 | put_ldev(mdev); | 3046 | put_ldev(mdev); |
2988 | 3047 | ||
2989 | return rv; | 3048 | return rv; |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d2..bf8d0b077624 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1236 | synchronize_rcu(); | 1236 | synchronize_rcu(); |
1237 | kfree(old_disk_conf); | 1237 | kfree(old_disk_conf); |
1238 | kfree(old_plan); | 1238 | kfree(old_plan); |
1239 | mod_timer(&mdev->request_timer, jiffies + HZ); | ||
1239 | goto success; | 1240 | goto success; |
1240 | 1241 | ||
1241 | fail_unlock: | 1242 | fail_unlock: |
@@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1628 | if (rv < SS_SUCCESS) | 1629 | if (rv < SS_SUCCESS) |
1629 | goto force_diskless_dec; | 1630 | goto force_diskless_dec; |
1630 | 1631 | ||
1632 | mod_timer(&mdev->request_timer, jiffies + HZ); | ||
1633 | |||
1631 | if (mdev->state.role == R_PRIMARY) | 1634 | if (mdev->state.role == R_PRIMARY) |
1632 | mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; | 1635 | mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; |
1633 | else | 1636 | else |
@@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1667 | return 0; | 1670 | return 0; |
1668 | } | 1671 | } |
1669 | 1672 | ||
1670 | static int adm_detach(struct drbd_conf *mdev) | 1673 | static int adm_detach(struct drbd_conf *mdev, int force) |
1671 | { | 1674 | { |
1672 | enum drbd_state_rv retcode; | 1675 | enum drbd_state_rv retcode; |
1673 | int ret; | 1676 | int ret; |
1677 | |||
1678 | if (force) { | ||
1679 | drbd_force_state(mdev, NS(disk, D_FAILED)); | ||
1680 | retcode = SS_SUCCESS; | ||
1681 | goto out; | ||
1682 | } | ||
1683 | |||
1674 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ | 1684 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ |
1675 | retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); | 1685 | retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); |
1676 | /* D_FAILED will transition to DISKLESS. */ | 1686 | /* D_FAILED will transition to DISKLESS. */ |
@@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) | |||
1681 | retcode = SS_NOTHING_TO_DO; | 1691 | retcode = SS_NOTHING_TO_DO; |
1682 | if (ret) | 1692 | if (ret) |
1683 | retcode = ERR_INTR; | 1693 | retcode = ERR_INTR; |
1694 | out: | ||
1684 | return retcode; | 1695 | return retcode; |
1685 | } | 1696 | } |
1686 | 1697 | ||
@@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) | |||
1692 | int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) | 1703 | int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) |
1693 | { | 1704 | { |
1694 | enum drbd_ret_code retcode; | 1705 | enum drbd_ret_code retcode; |
1706 | struct detach_parms parms = { }; | ||
1707 | int err; | ||
1695 | 1708 | ||
1696 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 1709 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); |
1697 | if (!adm_ctx.reply_skb) | 1710 | if (!adm_ctx.reply_skb) |
@@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) | |||
1699 | if (retcode != NO_ERROR) | 1712 | if (retcode != NO_ERROR) |
1700 | goto out; | 1713 | goto out; |
1701 | 1714 | ||
1702 | retcode = adm_detach(adm_ctx.mdev); | 1715 | if (info->attrs[DRBD_NLA_DETACH_PARMS]) { |
1716 | err = detach_parms_from_attrs(&parms, info); | ||
1717 | if (err) { | ||
1718 | retcode = ERR_MANDATORY_TAG; | ||
1719 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | ||
1720 | goto out; | ||
1721 | } | ||
1722 | } | ||
1723 | |||
1724 | retcode = adm_detach(adm_ctx.mdev, parms.force_detach); | ||
1703 | out: | 1725 | out: |
1704 | drbd_adm_finish(info, retcode); | 1726 | drbd_adm_finish(info, retcode); |
1705 | return 0; | 1727 | return 0; |
@@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3116 | 3138 | ||
3117 | /* detach */ | 3139 | /* detach */ |
3118 | idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { | 3140 | idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { |
3119 | retcode = adm_detach(mdev); | 3141 | retcode = adm_detach(mdev, 0); |
3120 | if (retcode < SS_SUCCESS) { | 3142 | if (retcode < SS_SUCCESS) { |
3121 | drbd_msg_put_info("failed to detach"); | 3143 | drbd_msg_put_info("failed to detach"); |
3122 | goto out; | 3144 | goto out; |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d2937..3a7e54b8f418 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) | |||
4366 | atomic_set(&mdev->rs_pending_cnt, 0); | 4366 | atomic_set(&mdev->rs_pending_cnt, 0); |
4367 | wake_up(&mdev->misc_wait); | 4367 | wake_up(&mdev->misc_wait); |
4368 | 4368 | ||
4369 | del_timer(&mdev->request_timer); | ||
4370 | |||
4371 | del_timer_sync(&mdev->resync_timer); | 4369 | del_timer_sync(&mdev->resync_timer); |
4372 | resync_timer_fn((unsigned long)mdev); | 4370 | resync_timer_fn((unsigned long)mdev); |
4373 | 4371 | ||
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2c..8fa51cda3b7e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
213 | { | 213 | { |
214 | const unsigned long s = req->rq_state; | 214 | const unsigned long s = req->rq_state; |
215 | struct drbd_conf *mdev = req->w.mdev; | 215 | struct drbd_conf *mdev = req->w.mdev; |
216 | /* only WRITES may end up here without a master bio (on barrier ack) */ | 216 | int rw = req->rq_state & RQ_WRITE ? WRITE : READ; |
217 | int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; | ||
218 | 217 | ||
219 | /* we must not complete the master bio, while it is | 218 | /* we must not complete the master bio, while it is |
220 | * still being processed by _drbd_send_zc_bio (drbd_send_dblock) | 219 | * still being processed by _drbd_send_zc_bio (drbd_send_dblock) |
@@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
225 | * the receiver, | 224 | * the receiver, |
226 | * the bio_endio completion callbacks. | 225 | * the bio_endio completion callbacks. |
227 | */ | 226 | */ |
228 | if (s & RQ_LOCAL_PENDING) | 227 | if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) |
229 | return; | 228 | return; |
230 | if (req->i.waiting) { | 229 | if (req->i.waiting) { |
231 | /* Retry all conflicting peer requests. */ | 230 | /* Retry all conflicting peer requests. */ |
@@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
288 | req->master_bio = NULL; | 287 | req->master_bio = NULL; |
289 | } | 288 | } |
290 | 289 | ||
290 | if (s & RQ_LOCAL_PENDING) | ||
291 | return; | ||
292 | |||
291 | if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { | 293 | if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { |
292 | /* this is disconnected (local only) operation, | 294 | /* this is disconnected (local only) operation, |
293 | * or protocol C P_WRITE_ACK, | 295 | * or protocol C P_WRITE_ACK, |
@@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
362 | break; | 364 | break; |
363 | 365 | ||
364 | case COMPLETED_OK: | 366 | case COMPLETED_OK: |
365 | if (bio_data_dir(req->master_bio) == WRITE) | 367 | if (req->rq_state & RQ_WRITE) |
366 | mdev->writ_cnt += req->i.size >> 9; | 368 | mdev->writ_cnt += req->i.size >> 9; |
367 | else | 369 | else |
368 | mdev->read_cnt += req->i.size >> 9; | 370 | mdev->read_cnt += req->i.size >> 9; |
@@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
374 | put_ldev(mdev); | 376 | put_ldev(mdev); |
375 | break; | 377 | break; |
376 | 378 | ||
379 | case ABORT_DISK_IO: | ||
380 | req->rq_state |= RQ_LOCAL_ABORTED; | ||
381 | if (req->rq_state & RQ_WRITE) | ||
382 | _req_may_be_done_not_susp(req, m); | ||
383 | else | ||
384 | goto goto_queue_for_net_read; | ||
385 | break; | ||
386 | |||
377 | case WRITE_COMPLETED_WITH_ERROR: | 387 | case WRITE_COMPLETED_WITH_ERROR: |
378 | req->rq_state |= RQ_LOCAL_COMPLETED; | 388 | req->rq_state |= RQ_LOCAL_COMPLETED; |
379 | req->rq_state &= ~RQ_LOCAL_PENDING; | 389 | req->rq_state &= ~RQ_LOCAL_PENDING; |
@@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
402 | __drbd_chk_io_error(mdev, false); | 412 | __drbd_chk_io_error(mdev, false); |
403 | put_ldev(mdev); | 413 | put_ldev(mdev); |
404 | 414 | ||
415 | goto_queue_for_net_read: | ||
416 | |||
405 | /* no point in retrying if there is no good remote data, | 417 | /* no point in retrying if there is no good remote data, |
406 | * or we have no connection. */ | 418 | * or we have no connection. */ |
407 | if (mdev->state.pdsk != D_UP_TO_DATE) { | 419 | if (mdev->state.pdsk != D_UP_TO_DATE) { |
@@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) | |||
1071 | struct drbd_request *req; /* oldest request */ | 1083 | struct drbd_request *req; /* oldest request */ |
1072 | struct list_head *le; | 1084 | struct list_head *le; |
1073 | struct net_conf *nc; | 1085 | struct net_conf *nc; |
1074 | unsigned long et; /* effective timeout = ko_count * timeout */ | 1086 | unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ |
1075 | 1087 | ||
1076 | rcu_read_lock(); | 1088 | rcu_read_lock(); |
1077 | nc = rcu_dereference(tconn->net_conf); | 1089 | nc = rcu_dereference(tconn->net_conf); |
1078 | et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; | 1090 | ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; |
1091 | |||
1092 | if (get_ldev(mdev)) { | ||
1093 | dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; | ||
1094 | put_ldev(mdev); | ||
1095 | } | ||
1079 | rcu_read_unlock(); | 1096 | rcu_read_unlock(); |
1080 | 1097 | ||
1081 | if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) | 1098 | et = min_not_zero(dt, ent); |
1099 | |||
1100 | if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) | ||
1082 | return; /* Recurring timer stopped */ | 1101 | return; /* Recurring timer stopped */ |
1083 | 1102 | ||
1084 | spin_lock_irq(&tconn->req_lock); | 1103 | spin_lock_irq(&tconn->req_lock); |
@@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) | |||
1091 | 1110 | ||
1092 | le = le->prev; | 1111 | le = le->prev; |
1093 | req = list_entry(le, struct drbd_request, tl_requests); | 1112 | req = list_entry(le, struct drbd_request, tl_requests); |
1094 | if (time_is_before_eq_jiffies(req->start_time + et)) { | 1113 | if (ent && req->rq_state & RQ_NET_PENDING) { |
1095 | if (req->rq_state & RQ_NET_PENDING) { | 1114 | if (time_is_before_eq_jiffies(req->start_time + ent)) { |
1096 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | 1115 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); |
1097 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); | 1116 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); |
1098 | } else { | 1117 | } |
1099 | dev_warn(DEV, "Local backing block device frozen?\n"); | 1118 | } |
1100 | mod_timer(&mdev->request_timer, jiffies + et); | 1119 | if (dt && req->rq_state & RQ_LOCAL_PENDING) { |
1120 | if (time_is_before_eq_jiffies(req->start_time + dt)) { | ||
1121 | dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); | ||
1122 | __drbd_chk_io_error(mdev, 1); | ||
1101 | } | 1123 | } |
1102 | } else { | ||
1103 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
1104 | } | 1124 | } |
1105 | |||
1106 | spin_unlock_irq(&tconn->req_lock); | 1125 | spin_unlock_irq(&tconn->req_lock); |
1126 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
1107 | } | 1127 | } |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf85..f6aff150addb 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -106,6 +106,7 @@ enum drbd_req_event { | |||
106 | READ_COMPLETED_WITH_ERROR, | 106 | READ_COMPLETED_WITH_ERROR, |
107 | READ_AHEAD_COMPLETED_WITH_ERROR, | 107 | READ_AHEAD_COMPLETED_WITH_ERROR, |
108 | WRITE_COMPLETED_WITH_ERROR, | 108 | WRITE_COMPLETED_WITH_ERROR, |
109 | ABORT_DISK_IO, | ||
109 | COMPLETED_OK, | 110 | COMPLETED_OK, |
110 | RESEND, | 111 | RESEND, |
111 | FAIL_FROZEN_DISK_IO, | 112 | FAIL_FROZEN_DISK_IO, |
@@ -119,18 +120,21 @@ enum drbd_req_event { | |||
119 | * same time, so we should hold the request lock anyways. | 120 | * same time, so we should hold the request lock anyways. |
120 | */ | 121 | */ |
121 | enum drbd_req_state_bits { | 122 | enum drbd_req_state_bits { |
122 | /* 210 | 123 | /* 3210 |
123 | * 000: no local possible | 124 | * 0000: no local possible |
124 | * 001: to be submitted | 125 | * 0001: to be submitted |
125 | * UNUSED, we could map: 011: submitted, completion still pending | 126 | * UNUSED, we could map: 011: submitted, completion still pending |
126 | * 110: completed ok | 127 | * 0110: completed ok |
127 | * 010: completed with error | 128 | * 0010: completed with error |
129 | * 1001: Aborted (before completion) | ||
130 | * 1x10: Aborted and completed -> free | ||
128 | */ | 131 | */ |
129 | __RQ_LOCAL_PENDING, | 132 | __RQ_LOCAL_PENDING, |
130 | __RQ_LOCAL_COMPLETED, | 133 | __RQ_LOCAL_COMPLETED, |
131 | __RQ_LOCAL_OK, | 134 | __RQ_LOCAL_OK, |
135 | __RQ_LOCAL_ABORTED, | ||
132 | 136 | ||
133 | /* 76543 | 137 | /* 87654 |
134 | * 00000: no network possible | 138 | * 00000: no network possible |
135 | * 00001: to be send | 139 | * 00001: to be send |
136 | * 00011: to be send, on worker queue | 140 | * 00011: to be send, on worker queue |
@@ -209,8 +213,9 @@ enum drbd_req_state_bits { | |||
209 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) | 213 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) |
210 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) | 214 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) |
211 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) | 215 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) |
216 | #define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) | ||
212 | 217 | ||
213 | #define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ | 218 | #define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) |
214 | 219 | ||
215 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) | 220 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) |
216 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) | 221 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f184..f51cefdbeff3 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -29,6 +29,9 @@ | |||
29 | #include "drbd_int.h" | 29 | #include "drbd_int.h" |
30 | #include "drbd_req.h" | 30 | #include "drbd_req.h" |
31 | 31 | ||
32 | /* in drbd_main.c */ | ||
33 | extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); | ||
34 | |||
32 | struct after_state_chg_work { | 35 | struct after_state_chg_work { |
33 | struct drbd_work w; | 36 | struct drbd_work w; |
34 | union drbd_state os; | 37 | union drbd_state os; |
@@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1315 | rcu_read_unlock(); | 1318 | rcu_read_unlock(); |
1316 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | 1319 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); |
1317 | 1320 | ||
1321 | /* Immediately allow completion of all application IO, that waits | ||
1322 | for completion from the local disk. */ | ||
1323 | tl_apply(mdev, ABORT_DISK_IO); | ||
1324 | |||
1318 | /* current state still has to be D_FAILED, | 1325 | /* current state still has to be D_FAILED, |
1319 | * there is only one way out: to D_DISKLESS, | 1326 | * there is only one way out: to D_DISKLESS, |
1320 | * and that may only happen after our put_ldev below. */ | 1327 | * and that may only happen after our put_ldev below. */ |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e0..dac8d9bc4bec 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -67,11 +67,18 @@ rwlock_t global_state_lock; | |||
67 | void drbd_md_io_complete(struct bio *bio, int error) | 67 | void drbd_md_io_complete(struct bio *bio, int error) |
68 | { | 68 | { |
69 | struct drbd_md_io *md_io; | 69 | struct drbd_md_io *md_io; |
70 | struct drbd_conf *mdev; | ||
70 | 71 | ||
71 | md_io = (struct drbd_md_io *)bio->bi_private; | 72 | md_io = (struct drbd_md_io *)bio->bi_private; |
73 | mdev = container_of(md_io, struct drbd_conf, md_io); | ||
74 | |||
72 | md_io->error = error; | 75 | md_io->error = error; |
73 | 76 | ||
74 | complete(&md_io->event); | 77 | md_io->done = 1; |
78 | wake_up(&mdev->misc_wait); | ||
79 | bio_put(bio); | ||
80 | drbd_md_put_buffer(mdev); | ||
81 | put_ldev(mdev); | ||
75 | } | 82 | } |
76 | 83 | ||
77 | /* reads on behalf of the partner, | 84 | /* reads on behalf of the partner, |
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a9324380..2e6cefefe5e5 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h | |||
@@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, | |||
128 | __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) | 128 | __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) |
129 | __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) | 129 | __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) |
130 | __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) | 130 | __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) |
131 | __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) | ||
131 | ) | 132 | ) |
132 | 133 | ||
133 | GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, | 134 | GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, |
@@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, | |||
224 | __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) | 225 | __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) |
225 | ) | 226 | ) |
226 | 227 | ||
228 | GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, | ||
229 | __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) | ||
230 | ) | ||
231 | |||
227 | /* | 232 | /* |
228 | * Notifications and commands (genlmsghdr->cmd) | 233 | * Notifications and commands (genlmsghdr->cmd) |
229 | */ | 234 | */ |
@@ -335,7 +340,9 @@ GENL_op( | |||
335 | ) | 340 | ) |
336 | 341 | ||
337 | GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), | 342 | GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), |
338 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | 343 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) |
344 | GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) | ||
345 | |||
339 | GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), | 346 | GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), |
340 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | 347 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) |
341 | GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), | 348 | GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), |
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f6..ddd332db2a5d 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h | |||
@@ -50,6 +50,12 @@ | |||
50 | #define DRBD_TIMEOUT_MAX 600 | 50 | #define DRBD_TIMEOUT_MAX 600 |
51 | #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ | 51 | #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ |
52 | 52 | ||
53 | /* If backing disk takes longer than disk_timeout, mark the disk as failed */ | ||
54 | #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ | ||
55 | #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ | ||
56 | #define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ | ||
57 | #define DRBD_DISK_TIMEOUT_SCALE '1' | ||
58 | |||
53 | /* active connection retries when C_WF_CONNECTION */ | 59 | /* active connection retries when C_WF_CONNECTION */ |
54 | #define DRBD_CONNECT_INT_MIN 1 | 60 | #define DRBD_CONNECT_INT_MIN 1 |
55 | #define DRBD_CONNECT_INT_MAX 120 | 61 | #define DRBD_CONNECT_INT_MAX 120 |