diff options
author | Jens Axboe <axboe@kernel.dk> | 2012-05-18 10:20:06 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-05-18 10:20:06 -0400 |
commit | 4fd1ffaa122cf66bfb710ced43679413df4f3605 (patch) | |
tree | 27bf967e16c4dcc34877e6d33672fb635a7926a2 /drivers/block | |
parent | 13828dec457270b48f433142fce0efd1e85f2c5d (diff) | |
parent | 92b4ca291f8676c9f323166a65fb7447774b2a46 (diff) |
Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-3.5/drivers
Philipp writes:
This are the updates we have in the drbd-8.3 tree. They are intended
for your "for-3.5/drivers" drivers branch.
These changes include one new feature:
* Allow detach from frozen backing devices with the new --force option;
configurable timeout for backing devices by the new disk-timeout option
And huge number of bug fixes:
* Fixed a write ordering problem on SyncTarget nodes for a write
to a block that gets resynced at the same time. The bug can
only be triggered with a device that has a firmware that
actually reorders writes to the same block
* Fixed a race between disconnect and receive_state, that could cause
a IO lockup
* Fixed resend/resubmit for requests with disk or network timeout
* Make sure that hard state changed do not disturb the connection
establishing process (I.e. detach due to an IO error). When the
bug was triggered it caused a retry in the connect process
* Postpone soft state changes to no disturb the connection
establishing process (I.e. becoming primary). When the bug
was triggered it could cause both nodes going into SyncSource state
* Fixed a refcount leak that could cause failures when trying to
unload a protocol family modules, that was used by DRBD
* Dedicated page pool for meta data IOs
* Deny normal detach (as opposed to --forced) if the user tries
to detach from the last UpToDate disk in the resource
* Fixed a possible protocol error that could be caused by
"unusual" BIOs.
* Enforce the disk-timeout option also on meta-data IO operations
* Implemented stable bitmap pages when we do a full write out of
the bitmap
* Fixed a rare compatibility issue with DRBD's older than 8.3.7
when negotiating the bio_size
* Fixed a rare race condition where an empty resync could stall with
if pause/unpause events happen in parallel
* Made the re-establishing of connections quicker, if it got a broken pipe
once. Previously there was a bug in the code caused it to waste the first
successful established connection after a broken pipe event.
PS: I am postponing the drbd-8.4 for mainline for one or two kernel
development cycles more (the ~400 patchets set).
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 104 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 146 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 90 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 357 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 48 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 95 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 132 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 31 |
10 files changed, 741 insertions, 283 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index cf0e63dd97da..e54e31b02b88 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -65,39 +65,80 @@ struct drbd_atodb_wait { | |||
65 | 65 | ||
66 | int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); | 66 | int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); |
67 | 67 | ||
68 | void *drbd_md_get_buffer(struct drbd_conf *mdev) | ||
69 | { | ||
70 | int r; | ||
71 | |||
72 | wait_event(mdev->misc_wait, | ||
73 | (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || | ||
74 | mdev->state.disk <= D_FAILED); | ||
75 | |||
76 | return r ? NULL : page_address(mdev->md_io_page); | ||
77 | } | ||
78 | |||
79 | void drbd_md_put_buffer(struct drbd_conf *mdev) | ||
80 | { | ||
81 | if (atomic_dec_and_test(&mdev->md_io_in_use)) | ||
82 | wake_up(&mdev->misc_wait); | ||
83 | } | ||
84 | |||
85 | static bool md_io_allowed(struct drbd_conf *mdev) | ||
86 | { | ||
87 | enum drbd_disk_state ds = mdev->state.disk; | ||
88 | return ds >= D_NEGOTIATING || ds == D_ATTACHING; | ||
89 | } | ||
90 | |||
91 | void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, | ||
92 | unsigned int *done) | ||
93 | { | ||
94 | long dt = bdev->dc.disk_timeout * HZ / 10; | ||
95 | if (dt == 0) | ||
96 | dt = MAX_SCHEDULE_TIMEOUT; | ||
97 | |||
98 | dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); | ||
99 | if (dt == 0) | ||
100 | dev_err(DEV, "meta-data IO operation timed out\n"); | ||
101 | } | ||
102 | |||
68 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, | 103 | static int _drbd_md_sync_page_io(struct drbd_conf *mdev, |
69 | struct drbd_backing_dev *bdev, | 104 | struct drbd_backing_dev *bdev, |
70 | struct page *page, sector_t sector, | 105 | struct page *page, sector_t sector, |
71 | int rw, int size) | 106 | int rw, int size) |
72 | { | 107 | { |
73 | struct bio *bio; | 108 | struct bio *bio; |
74 | struct drbd_md_io md_io; | ||
75 | int ok; | 109 | int ok; |
76 | 110 | ||
77 | md_io.mdev = mdev; | 111 | mdev->md_io.done = 0; |
78 | init_completion(&md_io.event); | 112 | mdev->md_io.error = -ENODEV; |
79 | md_io.error = 0; | ||
80 | 113 | ||
81 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) | 114 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) |
82 | rw |= REQ_FUA | REQ_FLUSH; | 115 | rw |= REQ_FUA | REQ_FLUSH; |
83 | rw |= REQ_SYNC; | 116 | rw |= REQ_SYNC; |
84 | 117 | ||
85 | bio = bio_alloc(GFP_NOIO, 1); | 118 | bio = bio_alloc_drbd(GFP_NOIO); |
86 | bio->bi_bdev = bdev->md_bdev; | 119 | bio->bi_bdev = bdev->md_bdev; |
87 | bio->bi_sector = sector; | 120 | bio->bi_sector = sector; |
88 | ok = (bio_add_page(bio, page, size, 0) == size); | 121 | ok = (bio_add_page(bio, page, size, 0) == size); |
89 | if (!ok) | 122 | if (!ok) |
90 | goto out; | 123 | goto out; |
91 | bio->bi_private = &md_io; | 124 | bio->bi_private = &mdev->md_io; |
92 | bio->bi_end_io = drbd_md_io_complete; | 125 | bio->bi_end_io = drbd_md_io_complete; |
93 | bio->bi_rw = rw; | 126 | bio->bi_rw = rw; |
94 | 127 | ||
128 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ | ||
129 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); | ||
130 | ok = 0; | ||
131 | goto out; | ||
132 | } | ||
133 | |||
134 | bio_get(bio); /* one bio_put() is in the completion handler */ | ||
135 | atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ | ||
95 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) | 136 | if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) |
96 | bio_endio(bio, -EIO); | 137 | bio_endio(bio, -EIO); |
97 | else | 138 | else |
98 | submit_bio(rw, bio); | 139 | submit_bio(rw, bio); |
99 | wait_for_completion(&md_io.event); | 140 | wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); |
100 | ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; | 141 | ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; |
101 | 142 | ||
102 | out: | 143 | out: |
103 | bio_put(bio); | 144 | bio_put(bio); |
@@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, | |||
111 | int offset = 0; | 152 | int offset = 0; |
112 | struct page *iop = mdev->md_io_page; | 153 | struct page *iop = mdev->md_io_page; |
113 | 154 | ||
114 | D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); | 155 | D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); |
115 | 156 | ||
116 | BUG_ON(!bdev->md_bdev); | 157 | BUG_ON(!bdev->md_bdev); |
117 | 158 | ||
@@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
328 | return 1; | 369 | return 1; |
329 | } | 370 | } |
330 | 371 | ||
331 | mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ | 372 | buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ |
332 | buffer = (struct al_transaction *)page_address(mdev->md_io_page); | 373 | if (!buffer) { |
374 | dev_err(DEV, "disk failed while waiting for md_io buffer\n"); | ||
375 | complete(&((struct update_al_work *)w)->event); | ||
376 | put_ldev(mdev); | ||
377 | return 1; | ||
378 | } | ||
333 | 379 | ||
334 | buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); | 380 | buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); |
335 | buffer->tr_number = cpu_to_be32(mdev->al_tr_number); | 381 | buffer->tr_number = cpu_to_be32(mdev->al_tr_number); |
@@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
374 | D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); | 420 | D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); |
375 | mdev->al_tr_number++; | 421 | mdev->al_tr_number++; |
376 | 422 | ||
377 | mutex_unlock(&mdev->md_io_mutex); | 423 | drbd_md_put_buffer(mdev); |
378 | 424 | ||
379 | complete(&((struct update_al_work *)w)->event); | 425 | complete(&((struct update_al_work *)w)->event); |
380 | put_ldev(mdev); | 426 | put_ldev(mdev); |
@@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
443 | /* lock out all other meta data io for now, | 489 | /* lock out all other meta data io for now, |
444 | * and make sure the page is mapped. | 490 | * and make sure the page is mapped. |
445 | */ | 491 | */ |
446 | mutex_lock(&mdev->md_io_mutex); | 492 | buffer = drbd_md_get_buffer(mdev); |
447 | buffer = page_address(mdev->md_io_page); | 493 | if (!buffer) |
494 | return 0; | ||
448 | 495 | ||
449 | /* Find the valid transaction in the log */ | 496 | /* Find the valid transaction in the log */ |
450 | for (i = 0; i <= mx; i++) { | 497 | for (i = 0; i <= mx; i++) { |
@@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
452 | if (rv == 0) | 499 | if (rv == 0) |
453 | continue; | 500 | continue; |
454 | if (rv == -1) { | 501 | if (rv == -1) { |
455 | mutex_unlock(&mdev->md_io_mutex); | 502 | drbd_md_put_buffer(mdev); |
456 | return 0; | 503 | return 0; |
457 | } | 504 | } |
458 | cnr = be32_to_cpu(buffer->tr_number); | 505 | cnr = be32_to_cpu(buffer->tr_number); |
@@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
478 | 525 | ||
479 | if (!found_valid) { | 526 | if (!found_valid) { |
480 | dev_warn(DEV, "No usable activity log found.\n"); | 527 | dev_warn(DEV, "No usable activity log found.\n"); |
481 | mutex_unlock(&mdev->md_io_mutex); | 528 | drbd_md_put_buffer(mdev); |
482 | return 1; | 529 | return 1; |
483 | } | 530 | } |
484 | 531 | ||
@@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
493 | rv = drbd_al_read_tr(mdev, bdev, buffer, i); | 540 | rv = drbd_al_read_tr(mdev, bdev, buffer, i); |
494 | ERR_IF(rv == 0) goto cancel; | 541 | ERR_IF(rv == 0) goto cancel; |
495 | if (rv == -1) { | 542 | if (rv == -1) { |
496 | mutex_unlock(&mdev->md_io_mutex); | 543 | drbd_md_put_buffer(mdev); |
497 | return 0; | 544 | return 0; |
498 | } | 545 | } |
499 | 546 | ||
@@ -534,7 +581,7 @@ cancel: | |||
534 | mdev->al_tr_pos = 0; | 581 | mdev->al_tr_pos = 0; |
535 | 582 | ||
536 | /* ok, we are done with it */ | 583 | /* ok, we are done with it */ |
537 | mutex_unlock(&mdev->md_io_mutex); | 584 | drbd_md_put_buffer(mdev); |
538 | 585 | ||
539 | dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", | 586 | dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", |
540 | transactions, active_extents); | 587 | transactions, active_extents); |
@@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, | |||
671 | else | 718 | else |
672 | ext->rs_failed += count; | 719 | ext->rs_failed += count; |
673 | if (ext->rs_left < ext->rs_failed) { | 720 | if (ext->rs_left < ext->rs_failed) { |
674 | dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " | 721 | dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " |
675 | "rs_failed=%d count=%d\n", | 722 | "rs_failed=%d count=%d cstate=%s\n", |
676 | (unsigned long long)sector, | 723 | (unsigned long long)sector, |
677 | ext->lce.lc_number, ext->rs_left, | 724 | ext->lce.lc_number, ext->rs_left, |
678 | ext->rs_failed, count); | 725 | ext->rs_failed, count, |
679 | dump_stack(); | 726 | drbd_conn_str(mdev->state.conn)); |
680 | 727 | ||
681 | lc_put(mdev->resync, &ext->lce); | 728 | /* We don't expect to be able to clear more bits |
682 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | 729 | * than have been set when we originally counted |
683 | return; | 730 | * the set bits to cache that value in ext->rs_left. |
731 | * Whatever the reason (disconnect during resync, | ||
732 | * delayed local completion of an application write), | ||
733 | * try to fix it up by recounting here. */ | ||
734 | ext->rs_left = drbd_bm_e_weight(mdev, enr); | ||
684 | } | 735 | } |
685 | } else { | 736 | } else { |
686 | /* Normally this element should be in the cache, | 737 | /* Normally this element should be in the cache, |
@@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) | |||
1192 | put_ldev(mdev); | 1243 | put_ldev(mdev); |
1193 | } | 1244 | } |
1194 | spin_unlock_irq(&mdev->al_lock); | 1245 | spin_unlock_irq(&mdev->al_lock); |
1246 | wake_up(&mdev->al_wait); | ||
1195 | 1247 | ||
1196 | return 0; | 1248 | return 0; |
1197 | } | 1249 | } |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3030201c69d8..b5c5ff53cb57 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
205 | static void bm_store_page_idx(struct page *page, unsigned long idx) | 205 | static void bm_store_page_idx(struct page *page, unsigned long idx) |
206 | { | 206 | { |
207 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); | 207 | BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); |
208 | page_private(page) |= idx; | 208 | set_page_private(page, idx); |
209 | } | 209 | } |
210 | 210 | ||
211 | static unsigned long bm_page_to_idx(struct page *page) | 211 | static unsigned long bm_page_to_idx(struct page *page) |
@@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) | |||
886 | struct bm_aio_ctx { | 886 | struct bm_aio_ctx { |
887 | struct drbd_conf *mdev; | 887 | struct drbd_conf *mdev; |
888 | atomic_t in_flight; | 888 | atomic_t in_flight; |
889 | struct completion done; | 889 | unsigned int done; |
890 | unsigned flags; | 890 | unsigned flags; |
891 | #define BM_AIO_COPY_PAGES 1 | 891 | #define BM_AIO_COPY_PAGES 1 |
892 | int error; | 892 | int error; |
893 | struct kref kref; | ||
893 | }; | 894 | }; |
894 | 895 | ||
896 | static void bm_aio_ctx_destroy(struct kref *kref) | ||
897 | { | ||
898 | struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); | ||
899 | |||
900 | put_ldev(ctx->mdev); | ||
901 | kfree(ctx); | ||
902 | } | ||
903 | |||
895 | /* bv_page may be a copy, or may be the original */ | 904 | /* bv_page may be a copy, or may be the original */ |
896 | static void bm_async_io_complete(struct bio *bio, int error) | 905 | static void bm_async_io_complete(struct bio *bio, int error) |
897 | { | 906 | { |
@@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
930 | 939 | ||
931 | bm_page_unlock_io(mdev, idx); | 940 | bm_page_unlock_io(mdev, idx); |
932 | 941 | ||
933 | /* FIXME give back to page pool */ | ||
934 | if (ctx->flags & BM_AIO_COPY_PAGES) | 942 | if (ctx->flags & BM_AIO_COPY_PAGES) |
935 | put_page(bio->bi_io_vec[0].bv_page); | 943 | mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); |
936 | 944 | ||
937 | bio_put(bio); | 945 | bio_put(bio); |
938 | 946 | ||
939 | if (atomic_dec_and_test(&ctx->in_flight)) | 947 | if (atomic_dec_and_test(&ctx->in_flight)) { |
940 | complete(&ctx->done); | 948 | ctx->done = 1; |
949 | wake_up(&mdev->misc_wait); | ||
950 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
951 | } | ||
941 | } | 952 | } |
942 | 953 | ||
943 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) | 954 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) |
944 | { | 955 | { |
945 | /* we are process context. we always get a bio */ | 956 | struct bio *bio = bio_alloc_drbd(GFP_NOIO); |
946 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); | ||
947 | struct drbd_conf *mdev = ctx->mdev; | 957 | struct drbd_conf *mdev = ctx->mdev; |
948 | struct drbd_bitmap *b = mdev->bitmap; | 958 | struct drbd_bitmap *b = mdev->bitmap; |
949 | struct page *page; | 959 | struct page *page; |
@@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
966 | bm_set_page_unchanged(b->bm_pages[page_nr]); | 976 | bm_set_page_unchanged(b->bm_pages[page_nr]); |
967 | 977 | ||
968 | if (ctx->flags & BM_AIO_COPY_PAGES) { | 978 | if (ctx->flags & BM_AIO_COPY_PAGES) { |
969 | /* FIXME alloc_page is good enough for now, but actually needs | ||
970 | * to use pre-allocated page pool */ | ||
971 | void *src, *dest; | 979 | void *src, *dest; |
972 | page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); | 980 | page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); |
973 | dest = kmap_atomic(page); | 981 | dest = kmap_atomic(page); |
974 | src = kmap_atomic(b->bm_pages[page_nr]); | 982 | src = kmap_atomic(b->bm_pages[page_nr]); |
975 | memcpy(dest, src, PAGE_SIZE); | 983 | memcpy(dest, src, PAGE_SIZE); |
@@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
981 | 989 | ||
982 | bio->bi_bdev = mdev->ldev->md_bdev; | 990 | bio->bi_bdev = mdev->ldev->md_bdev; |
983 | bio->bi_sector = on_disk_sector; | 991 | bio->bi_sector = on_disk_sector; |
992 | /* bio_add_page of a single page to an empty bio will always succeed, | ||
993 | * according to api. Do we want to assert that? */ | ||
984 | bio_add_page(bio, page, len, 0); | 994 | bio_add_page(bio, page, len, 0); |
985 | bio->bi_private = ctx; | 995 | bio->bi_private = ctx; |
986 | bio->bi_end_io = bm_async_io_complete; | 996 | bio->bi_end_io = bm_async_io_complete; |
@@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
999 | /* | 1009 | /* |
1000 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 1010 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
1001 | */ | 1011 | */ |
1002 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) | 1012 | static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) |
1003 | { | 1013 | { |
1004 | struct bm_aio_ctx ctx = { | 1014 | struct bm_aio_ctx *ctx; |
1005 | .mdev = mdev, | ||
1006 | .in_flight = ATOMIC_INIT(1), | ||
1007 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | ||
1008 | .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, | ||
1009 | }; | ||
1010 | struct drbd_bitmap *b = mdev->bitmap; | 1015 | struct drbd_bitmap *b = mdev->bitmap; |
1011 | int num_pages, i, count = 0; | 1016 | int num_pages, i, count = 0; |
1012 | unsigned long now; | 1017 | unsigned long now; |
@@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1021 | * For lazy writeout, we don't care for ongoing changes to the bitmap, | 1026 | * For lazy writeout, we don't care for ongoing changes to the bitmap, |
1022 | * as we submit copies of pages anyways. | 1027 | * as we submit copies of pages anyways. |
1023 | */ | 1028 | */ |
1024 | if (!ctx.flags) | 1029 | |
1030 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1031 | if (!ctx) | ||
1032 | return -ENOMEM; | ||
1033 | |||
1034 | *ctx = (struct bm_aio_ctx) { | ||
1035 | .mdev = mdev, | ||
1036 | .in_flight = ATOMIC_INIT(1), | ||
1037 | .done = 0, | ||
1038 | .flags = flags, | ||
1039 | .error = 0, | ||
1040 | .kref = { ATOMIC_INIT(2) }, | ||
1041 | }; | ||
1042 | |||
1043 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ | ||
1044 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); | ||
1045 | kfree(ctx); | ||
1046 | return -ENODEV; | ||
1047 | } | ||
1048 | |||
1049 | if (!ctx->flags) | ||
1025 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | 1050 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); |
1026 | 1051 | ||
1027 | num_pages = b->bm_number_of_pages; | 1052 | num_pages = b->bm_number_of_pages; |
@@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1046 | continue; | 1071 | continue; |
1047 | } | 1072 | } |
1048 | } | 1073 | } |
1049 | atomic_inc(&ctx.in_flight); | 1074 | atomic_inc(&ctx->in_flight); |
1050 | bm_page_io_async(&ctx, i, rw); | 1075 | bm_page_io_async(ctx, i, rw); |
1051 | ++count; | 1076 | ++count; |
1052 | cond_resched(); | 1077 | cond_resched(); |
1053 | } | 1078 | } |
1054 | 1079 | ||
1055 | /* | 1080 | /* |
1056 | * We initialize ctx.in_flight to one to make sure bm_async_io_complete | 1081 | * We initialize ctx->in_flight to one to make sure bm_async_io_complete |
1057 | * will not complete() early, and decrement / test it here. If there | 1082 | * will not set ctx->done early, and decrement / test it here. If there |
1058 | * are still some bios in flight, we need to wait for them here. | 1083 | * are still some bios in flight, we need to wait for them here. |
1084 | * If all IO is done already (or nothing had been submitted), there is | ||
1085 | * no need to wait. Still, we need to put the kref associated with the | ||
1086 | * "in_flight reached zero, all done" event. | ||
1059 | */ | 1087 | */ |
1060 | if (!atomic_dec_and_test(&ctx.in_flight)) | 1088 | if (!atomic_dec_and_test(&ctx->in_flight)) |
1061 | wait_for_completion(&ctx.done); | 1089 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1090 | else | ||
1091 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1092 | |||
1062 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | 1093 | dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", |
1063 | rw == WRITE ? "WRITE" : "READ", | 1094 | rw == WRITE ? "WRITE" : "READ", |
1064 | count, jiffies - now); | 1095 | count, jiffies - now); |
1065 | 1096 | ||
1066 | if (ctx.error) { | 1097 | if (ctx->error) { |
1067 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); | 1098 | dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); |
1068 | drbd_chk_io_error(mdev, 1, true); | 1099 | drbd_chk_io_error(mdev, 1, true); |
1069 | err = -EIO; /* ctx.error ? */ | 1100 | err = -EIO; /* ctx->error ? */ |
1070 | } | 1101 | } |
1071 | 1102 | ||
1103 | if (atomic_read(&ctx->in_flight)) | ||
1104 | err = -EIO; /* Disk failed during IO... */ | ||
1105 | |||
1072 | now = jiffies; | 1106 | now = jiffies; |
1073 | if (rw == WRITE) { | 1107 | if (rw == WRITE) { |
1074 | drbd_md_flush(mdev); | 1108 | drbd_md_flush(mdev); |
@@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1082 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", | 1116 | dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", |
1083 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); | 1117 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); |
1084 | 1118 | ||
1119 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1085 | return err; | 1120 | return err; |
1086 | } | 1121 | } |
1087 | 1122 | ||
@@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id | |||
1091 | */ | 1126 | */ |
1092 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | 1127 | int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) |
1093 | { | 1128 | { |
1094 | return bm_rw(mdev, READ, 0); | 1129 | return bm_rw(mdev, READ, 0, 0); |
1095 | } | 1130 | } |
1096 | 1131 | ||
1097 | /** | 1132 | /** |
@@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) | |||
1102 | */ | 1137 | */ |
1103 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | 1138 | int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) |
1104 | { | 1139 | { |
1105 | return bm_rw(mdev, WRITE, 0); | 1140 | return bm_rw(mdev, WRITE, 0, 0); |
1106 | } | 1141 | } |
1107 | 1142 | ||
1108 | /** | 1143 | /** |
@@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) | |||
1112 | */ | 1147 | */ |
1113 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) | 1148 | int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) |
1114 | { | 1149 | { |
1115 | return bm_rw(mdev, WRITE, upper_idx); | 1150 | return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); |
1151 | } | ||
1152 | |||
1153 | /** | ||
1154 | * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. | ||
1155 | * @mdev: DRBD device. | ||
1156 | * | ||
1157 | * Will only write pages that have changed since last IO. | ||
1158 | * In contrast to drbd_bm_write(), this will copy the bitmap pages | ||
1159 | * to temporary writeout pages. It is intended to trigger a full write-out | ||
1160 | * while still allowing the bitmap to change, for example if a resync or online | ||
1161 | * verify is aborted due to a failed peer disk, while local IO continues, or | ||
1162 | * pending resync acks are still being processed. | ||
1163 | */ | ||
1164 | int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) | ||
1165 | { | ||
1166 | return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); | ||
1116 | } | 1167 | } |
1117 | 1168 | ||
1118 | 1169 | ||
@@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l | |||
1130 | */ | 1181 | */ |
1131 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) | 1182 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
1132 | { | 1183 | { |
1133 | struct bm_aio_ctx ctx = { | 1184 | struct bm_aio_ctx *ctx; |
1185 | int err; | ||
1186 | |||
1187 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | ||
1188 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | ||
1189 | return 0; | ||
1190 | } | ||
1191 | |||
1192 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1193 | if (!ctx) | ||
1194 | return -ENOMEM; | ||
1195 | |||
1196 | *ctx = (struct bm_aio_ctx) { | ||
1134 | .mdev = mdev, | 1197 | .mdev = mdev, |
1135 | .in_flight = ATOMIC_INIT(1), | 1198 | .in_flight = ATOMIC_INIT(1), |
1136 | .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), | 1199 | .done = 0, |
1137 | .flags = BM_AIO_COPY_PAGES, | 1200 | .flags = BM_AIO_COPY_PAGES, |
1201 | .error = 0, | ||
1202 | .kref = { ATOMIC_INIT(2) }, | ||
1138 | }; | 1203 | }; |
1139 | 1204 | ||
1140 | if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { | 1205 | if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ |
1141 | dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); | 1206 | dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); |
1142 | return 0; | 1207 | kfree(ctx); |
1208 | return -ENODEV; | ||
1143 | } | 1209 | } |
1144 | 1210 | ||
1145 | bm_page_io_async(&ctx, idx, WRITE_SYNC); | 1211 | bm_page_io_async(ctx, idx, WRITE_SYNC); |
1146 | wait_for_completion(&ctx.done); | 1212 | wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); |
1147 | 1213 | ||
1148 | if (ctx.error) | 1214 | if (ctx->error) |
1149 | drbd_chk_io_error(mdev, 1, true); | 1215 | drbd_chk_io_error(mdev, 1, true); |
1150 | /* that should force detach, so the in memory bitmap will be | 1216 | /* that should force detach, so the in memory bitmap will be |
1151 | * gone in a moment as well. */ | 1217 | * gone in a moment as well. */ |
1152 | 1218 | ||
1153 | mdev->bm_writ_cnt++; | 1219 | mdev->bm_writ_cnt++; |
1154 | return ctx.error; | 1220 | err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; |
1221 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1222 | return err; | ||
1155 | } | 1223 | } |
1156 | 1224 | ||
1157 | /* NOTE | 1225 | /* NOTE |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8d680562ba73..02f013a073a7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -712,7 +712,6 @@ struct drbd_request { | |||
712 | struct list_head tl_requests; /* ring list in the transfer log */ | 712 | struct list_head tl_requests; /* ring list in the transfer log */ |
713 | struct bio *master_bio; /* master bio pointer */ | 713 | struct bio *master_bio; /* master bio pointer */ |
714 | unsigned long rq_state; /* see comments above _req_mod() */ | 714 | unsigned long rq_state; /* see comments above _req_mod() */ |
715 | int seq_num; | ||
716 | unsigned long start_time; | 715 | unsigned long start_time; |
717 | }; | 716 | }; |
718 | 717 | ||
@@ -851,6 +850,7 @@ enum { | |||
851 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ | 850 | NEW_CUR_UUID, /* Create new current UUID when thawing IO */ |
852 | AL_SUSPENDED, /* Activity logging is currently suspended. */ | 851 | AL_SUSPENDED, /* Activity logging is currently suspended. */ |
853 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ | 852 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ |
853 | STATE_SENT, /* Do not change state/UUIDs while this is set */ | ||
854 | }; | 854 | }; |
855 | 855 | ||
856 | struct drbd_bitmap; /* opaque for drbd_conf */ | 856 | struct drbd_bitmap; /* opaque for drbd_conf */ |
@@ -862,31 +862,30 @@ enum bm_flag { | |||
862 | BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ | 862 | BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ |
863 | 863 | ||
864 | /* currently locked for bulk operation */ | 864 | /* currently locked for bulk operation */ |
865 | BM_LOCKED_MASK = 0x7, | 865 | BM_LOCKED_MASK = 0xf, |
866 | 866 | ||
867 | /* in detail, that is: */ | 867 | /* in detail, that is: */ |
868 | BM_DONT_CLEAR = 0x1, | 868 | BM_DONT_CLEAR = 0x1, |
869 | BM_DONT_SET = 0x2, | 869 | BM_DONT_SET = 0x2, |
870 | BM_DONT_TEST = 0x4, | 870 | BM_DONT_TEST = 0x4, |
871 | 871 | ||
872 | /* so we can mark it locked for bulk operation, | ||
873 | * and still allow all non-bulk operations */ | ||
874 | BM_IS_LOCKED = 0x8, | ||
875 | |||
872 | /* (test bit, count bit) allowed (common case) */ | 876 | /* (test bit, count bit) allowed (common case) */ |
873 | BM_LOCKED_TEST_ALLOWED = 0x3, | 877 | BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED, |
874 | 878 | ||
875 | /* testing bits, as well as setting new bits allowed, but clearing bits | 879 | /* testing bits, as well as setting new bits allowed, but clearing bits |
876 | * would be unexpected. Used during bitmap receive. Setting new bits | 880 | * would be unexpected. Used during bitmap receive. Setting new bits |
877 | * requires sending of "out-of-sync" information, though. */ | 881 | * requires sending of "out-of-sync" information, though. */ |
878 | BM_LOCKED_SET_ALLOWED = 0x1, | 882 | BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED, |
879 | 883 | ||
880 | /* clear is not expected while bitmap is locked for bulk operation */ | 884 | /* for drbd_bm_write_copy_pages, everything is allowed, |
885 | * only concurrent bulk operations are locked out. */ | ||
886 | BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED, | ||
881 | }; | 887 | }; |
882 | 888 | ||
883 | |||
884 | /* TODO sort members for performance | ||
885 | * MAYBE group them further */ | ||
886 | |||
887 | /* THINK maybe we actually want to use the default "event/%s" worker threads | ||
888 | * or similar in linux 2.6, which uses per cpu data and threads. | ||
889 | */ | ||
890 | struct drbd_work_queue { | 889 | struct drbd_work_queue { |
891 | struct list_head q; | 890 | struct list_head q; |
892 | struct semaphore s; /* producers up it, worker down()s it */ | 891 | struct semaphore s; /* producers up it, worker down()s it */ |
@@ -938,8 +937,7 @@ struct drbd_backing_dev { | |||
938 | }; | 937 | }; |
939 | 938 | ||
940 | struct drbd_md_io { | 939 | struct drbd_md_io { |
941 | struct drbd_conf *mdev; | 940 | unsigned int done; |
942 | struct completion event; | ||
943 | int error; | 941 | int error; |
944 | }; | 942 | }; |
945 | 943 | ||
@@ -1022,6 +1020,7 @@ struct drbd_conf { | |||
1022 | struct drbd_tl_epoch *newest_tle; | 1020 | struct drbd_tl_epoch *newest_tle; |
1023 | struct drbd_tl_epoch *oldest_tle; | 1021 | struct drbd_tl_epoch *oldest_tle; |
1024 | struct list_head out_of_sequence_requests; | 1022 | struct list_head out_of_sequence_requests; |
1023 | struct list_head barrier_acked_requests; | ||
1025 | struct hlist_head *tl_hash; | 1024 | struct hlist_head *tl_hash; |
1026 | unsigned int tl_hash_s; | 1025 | unsigned int tl_hash_s; |
1027 | 1026 | ||
@@ -1056,6 +1055,8 @@ struct drbd_conf { | |||
1056 | struct crypto_hash *csums_tfm; | 1055 | struct crypto_hash *csums_tfm; |
1057 | struct crypto_hash *verify_tfm; | 1056 | struct crypto_hash *verify_tfm; |
1058 | 1057 | ||
1058 | unsigned long last_reattach_jif; | ||
1059 | unsigned long last_reconnect_jif; | ||
1059 | struct drbd_thread receiver; | 1060 | struct drbd_thread receiver; |
1060 | struct drbd_thread worker; | 1061 | struct drbd_thread worker; |
1061 | struct drbd_thread asender; | 1062 | struct drbd_thread asender; |
@@ -1094,7 +1095,8 @@ struct drbd_conf { | |||
1094 | wait_queue_head_t ee_wait; | 1095 | wait_queue_head_t ee_wait; |
1095 | struct page *md_io_page; /* one page buffer for md_io */ | 1096 | struct page *md_io_page; /* one page buffer for md_io */ |
1096 | struct page *md_io_tmpp; /* for logical_block_size != 512 */ | 1097 | struct page *md_io_tmpp; /* for logical_block_size != 512 */ |
1097 | struct mutex md_io_mutex; /* protects the md_io_buffer */ | 1098 | struct drbd_md_io md_io; |
1099 | atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ | ||
1098 | spinlock_t al_lock; | 1100 | spinlock_t al_lock; |
1099 | wait_queue_head_t al_wait; | 1101 | wait_queue_head_t al_wait; |
1100 | struct lru_cache *act_log; /* activity log */ | 1102 | struct lru_cache *act_log; /* activity log */ |
@@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); | |||
1228 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | 1230 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); |
1229 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); | 1231 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); |
1230 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); | 1232 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); |
1231 | extern int _drbd_send_state(struct drbd_conf *mdev); | 1233 | extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); |
1232 | extern int drbd_send_state(struct drbd_conf *mdev); | 1234 | extern int drbd_send_current_state(struct drbd_conf *mdev); |
1233 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | 1235 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, |
1234 | enum drbd_packets cmd, struct p_header80 *h, | 1236 | enum drbd_packets cmd, struct p_header80 *h, |
1235 | size_t size, unsigned msg_flags); | 1237 | size_t size, unsigned msg_flags); |
@@ -1461,6 +1463,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); | |||
1461 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); | 1463 | extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); |
1462 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); | 1464 | extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); |
1463 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); | 1465 | extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); |
1466 | extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); | ||
1464 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | 1467 | extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, |
1465 | unsigned long al_enr); | 1468 | unsigned long al_enr); |
1466 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | 1469 | extern size_t drbd_bm_words(struct drbd_conf *mdev); |
@@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ | |||
1493 | extern mempool_t *drbd_request_mempool; | 1496 | extern mempool_t *drbd_request_mempool; |
1494 | extern mempool_t *drbd_ee_mempool; | 1497 | extern mempool_t *drbd_ee_mempool; |
1495 | 1498 | ||
1496 | extern struct page *drbd_pp_pool; /* drbd's page pool */ | 1499 | /* drbd's page pool, used to buffer data received from the peer, |
1500 | * or data requested by the peer. | ||
1501 | * | ||
1502 | * This does not have an emergency reserve. | ||
1503 | * | ||
1504 | * When allocating from this pool, it first takes pages from the pool. | ||
1505 | * Only if the pool is depleted will try to allocate from the system. | ||
1506 | * | ||
1507 | * The assumption is that pages taken from this pool will be processed, | ||
1508 | * and given back, "quickly", and then can be recycled, so we can avoid | ||
1509 | * frequent calls to alloc_page(), and still will be able to make progress even | ||
1510 | * under memory pressure. | ||
1511 | */ | ||
1512 | extern struct page *drbd_pp_pool; | ||
1497 | extern spinlock_t drbd_pp_lock; | 1513 | extern spinlock_t drbd_pp_lock; |
1498 | extern int drbd_pp_vacant; | 1514 | extern int drbd_pp_vacant; |
1499 | extern wait_queue_head_t drbd_pp_wait; | 1515 | extern wait_queue_head_t drbd_pp_wait; |
1500 | 1516 | ||
1517 | /* We also need a standard (emergency-reserve backed) page pool | ||
1518 | * for meta data IO (activity log, bitmap). | ||
1519 | * We can keep it global, as long as it is used as "N pages at a time". | ||
1520 | * 128 should be plenty, currently we probably can get away with as few as 1. | ||
1521 | */ | ||
1522 | #define DRBD_MIN_POOL_PAGES 128 | ||
1523 | extern mempool_t *drbd_md_io_page_pool; | ||
1524 | |||
1525 | /* We also need to make sure we get a bio | ||
1526 | * when we need it for housekeeping purposes */ | ||
1527 | extern struct bio_set *drbd_md_io_bio_set; | ||
1528 | /* to allocate from that set */ | ||
1529 | extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); | ||
1530 | |||
1501 | extern rwlock_t global_state_lock; | 1531 | extern rwlock_t global_state_lock; |
1502 | 1532 | ||
1503 | extern struct drbd_conf *drbd_new_device(unsigned int minor); | 1533 | extern struct drbd_conf *drbd_new_device(unsigned int minor); |
@@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); | |||
1536 | extern void suspend_other_sg(struct drbd_conf *mdev); | 1566 | extern void suspend_other_sg(struct drbd_conf *mdev); |
1537 | extern int drbd_resync_finished(struct drbd_conf *mdev); | 1567 | extern int drbd_resync_finished(struct drbd_conf *mdev); |
1538 | /* maybe rather drbd_main.c ? */ | 1568 | /* maybe rather drbd_main.c ? */ |
1569 | extern void *drbd_md_get_buffer(struct drbd_conf *mdev); | ||
1570 | extern void drbd_md_put_buffer(struct drbd_conf *mdev); | ||
1539 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, | 1571 | extern int drbd_md_sync_page_io(struct drbd_conf *mdev, |
1540 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1572 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
1573 | extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, | ||
1574 | unsigned int *done); | ||
1541 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); | 1575 | extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); |
1542 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); | 1576 | extern void drbd_rs_controller_reset(struct drbd_conf *mdev); |
1543 | 1577 | ||
@@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page) | |||
1754 | #define page_chain_for_each_safe(page, n) \ | 1788 | #define page_chain_for_each_safe(page, n) \ |
1755 | for (; page && ({ n = page_chain_next(page); 1; }); page = n) | 1789 | for (; page && ({ n = page_chain_next(page); 1; }); page = n) |
1756 | 1790 | ||
1757 | static inline int drbd_bio_has_active_page(struct bio *bio) | ||
1758 | { | ||
1759 | struct bio_vec *bvec; | ||
1760 | int i; | ||
1761 | |||
1762 | __bio_for_each_segment(bvec, bio, i, 0) { | ||
1763 | if (page_count(bvec->bv_page) > 1) | ||
1764 | return 1; | ||
1765 | } | ||
1766 | |||
1767 | return 0; | ||
1768 | } | ||
1769 | |||
1770 | static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) | 1791 | static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) |
1771 | { | 1792 | { |
1772 | struct page *page = e->pages; | 1793 | struct page *page = e->pages; |
@@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) | |||
1777 | return 0; | 1798 | return 0; |
1778 | } | 1799 | } |
1779 | 1800 | ||
1780 | |||
1781 | static inline void drbd_state_lock(struct drbd_conf *mdev) | 1801 | static inline void drbd_state_lock(struct drbd_conf *mdev) |
1782 | { | 1802 | { |
1783 | wait_event(mdev->misc_wait, | 1803 | wait_event(mdev->misc_wait, |
@@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | |||
2230 | * Note: currently we don't support such large bitmaps on 32bit | 2250 | * Note: currently we don't support such large bitmaps on 32bit |
2231 | * arch anyways, but no harm done to be prepared for it here. | 2251 | * arch anyways, but no harm done to be prepared for it here. |
2232 | */ | 2252 | */ |
2233 | unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; | 2253 | unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10; |
2234 | unsigned long left = *bits_left >> shift; | 2254 | unsigned long left = *bits_left >> shift; |
2235 | unsigned long total = 1UL + (mdev->rs_total >> shift); | 2255 | unsigned long total = 1UL + (mdev->rs_total >> shift); |
2236 | unsigned long tmp = 1000UL - left * 1000UL/total; | 2256 | unsigned long tmp = 1000UL - left * 1000UL/total; |
@@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) | |||
2306 | case D_OUTDATED: | 2326 | case D_OUTDATED: |
2307 | case D_CONSISTENT: | 2327 | case D_CONSISTENT: |
2308 | case D_UP_TO_DATE: | 2328 | case D_UP_TO_DATE: |
2329 | case D_FAILED: | ||
2309 | /* disk state is stable as well. */ | 2330 | /* disk state is stable as well. */ |
2310 | break; | 2331 | break; |
2311 | 2332 | ||
2312 | /* no new io accepted during tansitional states */ | 2333 | /* no new io accepted during tansitional states */ |
2313 | case D_ATTACHING: | 2334 | case D_ATTACHING: |
2314 | case D_FAILED: | ||
2315 | case D_NEGOTIATING: | 2335 | case D_NEGOTIATING: |
2316 | case D_UNKNOWN: | 2336 | case D_UNKNOWN: |
2317 | case D_MASK: | 2337 | case D_MASK: |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 211fc44f84be..920ede2829d6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -139,6 +139,8 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ | |||
139 | struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ | 139 | struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ |
140 | mempool_t *drbd_request_mempool; | 140 | mempool_t *drbd_request_mempool; |
141 | mempool_t *drbd_ee_mempool; | 141 | mempool_t *drbd_ee_mempool; |
142 | mempool_t *drbd_md_io_page_pool; | ||
143 | struct bio_set *drbd_md_io_bio_set; | ||
142 | 144 | ||
143 | /* I do not use a standard mempool, because: | 145 | /* I do not use a standard mempool, because: |
144 | 1) I want to hand out the pre-allocated objects first. | 146 | 1) I want to hand out the pre-allocated objects first. |
@@ -159,7 +161,24 @@ static const struct block_device_operations drbd_ops = { | |||
159 | .release = drbd_release, | 161 | .release = drbd_release, |
160 | }; | 162 | }; |
161 | 163 | ||
162 | #define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) | 164 | static void bio_destructor_drbd(struct bio *bio) |
165 | { | ||
166 | bio_free(bio, drbd_md_io_bio_set); | ||
167 | } | ||
168 | |||
169 | struct bio *bio_alloc_drbd(gfp_t gfp_mask) | ||
170 | { | ||
171 | struct bio *bio; | ||
172 | |||
173 | if (!drbd_md_io_bio_set) | ||
174 | return bio_alloc(gfp_mask, 1); | ||
175 | |||
176 | bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); | ||
177 | if (!bio) | ||
178 | return NULL; | ||
179 | bio->bi_destructor = bio_destructor_drbd; | ||
180 | return bio; | ||
181 | } | ||
163 | 182 | ||
164 | #ifdef __CHECKER__ | 183 | #ifdef __CHECKER__ |
165 | /* When checking with sparse, and this is an inline function, sparse will | 184 | /* When checking with sparse, and this is an inline function, sparse will |
@@ -208,6 +227,7 @@ static int tl_init(struct drbd_conf *mdev) | |||
208 | mdev->oldest_tle = b; | 227 | mdev->oldest_tle = b; |
209 | mdev->newest_tle = b; | 228 | mdev->newest_tle = b; |
210 | INIT_LIST_HEAD(&mdev->out_of_sequence_requests); | 229 | INIT_LIST_HEAD(&mdev->out_of_sequence_requests); |
230 | INIT_LIST_HEAD(&mdev->barrier_acked_requests); | ||
211 | 231 | ||
212 | mdev->tl_hash = NULL; | 232 | mdev->tl_hash = NULL; |
213 | mdev->tl_hash_s = 0; | 233 | mdev->tl_hash_s = 0; |
@@ -246,9 +266,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) | |||
246 | new->n_writes = 0; | 266 | new->n_writes = 0; |
247 | 267 | ||
248 | newest_before = mdev->newest_tle; | 268 | newest_before = mdev->newest_tle; |
249 | /* never send a barrier number == 0, because that is special-cased | 269 | new->br_number = newest_before->br_number+1; |
250 | * when using TCQ for our write ordering code */ | ||
251 | new->br_number = (newest_before->br_number+1) ?: 1; | ||
252 | if (mdev->newest_tle != new) { | 270 | if (mdev->newest_tle != new) { |
253 | mdev->newest_tle->next = new; | 271 | mdev->newest_tle->next = new; |
254 | mdev->newest_tle = new; | 272 | mdev->newest_tle = new; |
@@ -311,7 +329,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, | |||
311 | These have been list_move'd to the out_of_sequence_requests list in | 329 | These have been list_move'd to the out_of_sequence_requests list in |
312 | _req_mod(, barrier_acked) above. | 330 | _req_mod(, barrier_acked) above. |
313 | */ | 331 | */ |
314 | list_del_init(&b->requests); | 332 | list_splice_init(&b->requests, &mdev->barrier_acked_requests); |
315 | 333 | ||
316 | nob = b->next; | 334 | nob = b->next; |
317 | if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { | 335 | if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { |
@@ -411,6 +429,23 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
411 | b = tmp; | 429 | b = tmp; |
412 | list_splice(&carry_reads, &b->requests); | 430 | list_splice(&carry_reads, &b->requests); |
413 | } | 431 | } |
432 | |||
433 | /* Actions operating on the disk state, also want to work on | ||
434 | requests that got barrier acked. */ | ||
435 | switch (what) { | ||
436 | case fail_frozen_disk_io: | ||
437 | case restart_frozen_disk_io: | ||
438 | list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { | ||
439 | req = list_entry(le, struct drbd_request, tl_requests); | ||
440 | _req_mod(req, what); | ||
441 | } | ||
442 | |||
443 | case connection_lost_while_pending: | ||
444 | case resend: | ||
445 | break; | ||
446 | default: | ||
447 | dev_err(DEV, "what = %d in _tl_restart()\n", what); | ||
448 | } | ||
414 | } | 449 | } |
415 | 450 | ||
416 | 451 | ||
@@ -458,6 +493,38 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
458 | } | 493 | } |
459 | 494 | ||
460 | /** | 495 | /** |
496 | * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL | ||
497 | * @mdev: DRBD device. | ||
498 | */ | ||
499 | void tl_abort_disk_io(struct drbd_conf *mdev) | ||
500 | { | ||
501 | struct drbd_tl_epoch *b; | ||
502 | struct list_head *le, *tle; | ||
503 | struct drbd_request *req; | ||
504 | |||
505 | spin_lock_irq(&mdev->req_lock); | ||
506 | b = mdev->oldest_tle; | ||
507 | while (b) { | ||
508 | list_for_each_safe(le, tle, &b->requests) { | ||
509 | req = list_entry(le, struct drbd_request, tl_requests); | ||
510 | if (!(req->rq_state & RQ_LOCAL_PENDING)) | ||
511 | continue; | ||
512 | _req_mod(req, abort_disk_io); | ||
513 | } | ||
514 | b = b->next; | ||
515 | } | ||
516 | |||
517 | list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { | ||
518 | req = list_entry(le, struct drbd_request, tl_requests); | ||
519 | if (!(req->rq_state & RQ_LOCAL_PENDING)) | ||
520 | continue; | ||
521 | _req_mod(req, abort_disk_io); | ||
522 | } | ||
523 | |||
524 | spin_unlock_irq(&mdev->req_lock); | ||
525 | } | ||
526 | |||
527 | /** | ||
461 | * cl_wide_st_chg() - true if the state change is a cluster wide one | 528 | * cl_wide_st_chg() - true if the state change is a cluster wide one |
462 | * @mdev: DRBD device. | 529 | * @mdev: DRBD device. |
463 | * @os: old (current) state. | 530 | * @os: old (current) state. |
@@ -470,7 +537,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, | |||
470 | ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || | 537 | ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || |
471 | (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 538 | (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
472 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || | 539 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || |
473 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || | 540 | (os.disk != D_FAILED && ns.disk == D_FAILED))) || |
474 | (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || | 541 | (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || |
475 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); | 542 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); |
476 | } | 543 | } |
@@ -509,8 +576,16 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); | |||
509 | static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, | 576 | static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, |
510 | union drbd_state, | 577 | union drbd_state, |
511 | union drbd_state); | 578 | union drbd_state); |
579 | enum sanitize_state_warnings { | ||
580 | NO_WARNING, | ||
581 | ABORTED_ONLINE_VERIFY, | ||
582 | ABORTED_RESYNC, | ||
583 | CONNECTION_LOST_NEGOTIATING, | ||
584 | IMPLICITLY_UPGRADED_DISK, | ||
585 | IMPLICITLY_UPGRADED_PDSK, | ||
586 | }; | ||
512 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | 587 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, |
513 | union drbd_state ns, const char **warn_sync_abort); | 588 | union drbd_state ns, enum sanitize_state_warnings *warn); |
514 | int drbd_send_state_req(struct drbd_conf *, | 589 | int drbd_send_state_req(struct drbd_conf *, |
515 | union drbd_state, union drbd_state); | 590 | union drbd_state, union drbd_state); |
516 | 591 | ||
@@ -785,6 +860,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, | |||
785 | if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) | 860 | if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) |
786 | rv = SS_IN_TRANSIENT_STATE; | 861 | rv = SS_IN_TRANSIENT_STATE; |
787 | 862 | ||
863 | /* While establishing a connection only allow cstate to change. | ||
864 | Delay/refuse role changes, detach attach etc... */ | ||
865 | if (test_bit(STATE_SENT, &mdev->flags) && | ||
866 | !(os.conn == C_WF_REPORT_PARAMS || | ||
867 | (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) | ||
868 | rv = SS_IN_TRANSIENT_STATE; | ||
869 | |||
788 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | 870 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) |
789 | rv = SS_NEED_CONNECTION; | 871 | rv = SS_NEED_CONNECTION; |
790 | 872 | ||
@@ -803,6 +885,21 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, | |||
803 | return rv; | 885 | return rv; |
804 | } | 886 | } |
805 | 887 | ||
888 | static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) | ||
889 | { | ||
890 | static const char *msg_table[] = { | ||
891 | [NO_WARNING] = "", | ||
892 | [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", | ||
893 | [ABORTED_RESYNC] = "Resync aborted.", | ||
894 | [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", | ||
895 | [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", | ||
896 | [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", | ||
897 | }; | ||
898 | |||
899 | if (warn != NO_WARNING) | ||
900 | dev_warn(DEV, "%s\n", msg_table[warn]); | ||
901 | } | ||
902 | |||
806 | /** | 903 | /** |
807 | * sanitize_state() - Resolves implicitly necessary additional changes to a state transition | 904 | * sanitize_state() - Resolves implicitly necessary additional changes to a state transition |
808 | * @mdev: DRBD device. | 905 | * @mdev: DRBD device. |
@@ -814,11 +911,14 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, | |||
814 | * to D_UNKNOWN. This rule and many more along those lines are in this function. | 911 | * to D_UNKNOWN. This rule and many more along those lines are in this function. |
815 | */ | 912 | */ |
816 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | 913 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, |
817 | union drbd_state ns, const char **warn_sync_abort) | 914 | union drbd_state ns, enum sanitize_state_warnings *warn) |
818 | { | 915 | { |
819 | enum drbd_fencing_p fp; | 916 | enum drbd_fencing_p fp; |
820 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | 917 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; |
821 | 918 | ||
919 | if (warn) | ||
920 | *warn = NO_WARNING; | ||
921 | |||
822 | fp = FP_DONT_CARE; | 922 | fp = FP_DONT_CARE; |
823 | if (get_ldev(mdev)) { | 923 | if (get_ldev(mdev)) { |
824 | fp = mdev->ldev->dc.fencing; | 924 | fp = mdev->ldev->dc.fencing; |
@@ -833,18 +933,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
833 | /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. | 933 | /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. |
834 | * If you try to go into some Sync* state, that shall fail (elsewhere). */ | 934 | * If you try to go into some Sync* state, that shall fail (elsewhere). */ |
835 | if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && | 935 | if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && |
836 | ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) | 936 | ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED) |
837 | ns.conn = os.conn; | 937 | ns.conn = os.conn; |
838 | 938 | ||
839 | /* we cannot fail (again) if we already detached */ | 939 | /* we cannot fail (again) if we already detached */ |
840 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) | 940 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) |
841 | ns.disk = D_DISKLESS; | 941 | ns.disk = D_DISKLESS; |
842 | 942 | ||
843 | /* if we are only D_ATTACHING yet, | ||
844 | * we can (and should) go directly to D_DISKLESS. */ | ||
845 | if (ns.disk == D_FAILED && os.disk == D_ATTACHING) | ||
846 | ns.disk = D_DISKLESS; | ||
847 | |||
848 | /* After C_DISCONNECTING only C_STANDALONE may follow */ | 943 | /* After C_DISCONNECTING only C_STANDALONE may follow */ |
849 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) | 944 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) |
850 | ns.conn = os.conn; | 945 | ns.conn = os.conn; |
@@ -863,10 +958,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
863 | /* Abort resync if a disk fails/detaches */ | 958 | /* Abort resync if a disk fails/detaches */ |
864 | if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && | 959 | if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && |
865 | (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { | 960 | (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { |
866 | if (warn_sync_abort) | 961 | if (warn) |
867 | *warn_sync_abort = | 962 | *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? |
868 | os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? | 963 | ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; |
869 | "Online-verify" : "Resync"; | ||
870 | ns.conn = C_CONNECTED; | 964 | ns.conn = C_CONNECTED; |
871 | } | 965 | } |
872 | 966 | ||
@@ -877,7 +971,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
877 | ns.disk = mdev->new_state_tmp.disk; | 971 | ns.disk = mdev->new_state_tmp.disk; |
878 | ns.pdsk = mdev->new_state_tmp.pdsk; | 972 | ns.pdsk = mdev->new_state_tmp.pdsk; |
879 | } else { | 973 | } else { |
880 | dev_alert(DEV, "Connection lost while negotiating, no data!\n"); | 974 | if (warn) |
975 | *warn = CONNECTION_LOST_NEGOTIATING; | ||
881 | ns.disk = D_DISKLESS; | 976 | ns.disk = D_DISKLESS; |
882 | ns.pdsk = D_UNKNOWN; | 977 | ns.pdsk = D_UNKNOWN; |
883 | } | 978 | } |
@@ -959,16 +1054,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
959 | ns.disk = disk_max; | 1054 | ns.disk = disk_max; |
960 | 1055 | ||
961 | if (ns.disk < disk_min) { | 1056 | if (ns.disk < disk_min) { |
962 | dev_warn(DEV, "Implicitly set disk from %s to %s\n", | 1057 | if (warn) |
963 | drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); | 1058 | *warn = IMPLICITLY_UPGRADED_DISK; |
964 | ns.disk = disk_min; | 1059 | ns.disk = disk_min; |
965 | } | 1060 | } |
966 | if (ns.pdsk > pdsk_max) | 1061 | if (ns.pdsk > pdsk_max) |
967 | ns.pdsk = pdsk_max; | 1062 | ns.pdsk = pdsk_max; |
968 | 1063 | ||
969 | if (ns.pdsk < pdsk_min) { | 1064 | if (ns.pdsk < pdsk_min) { |
970 | dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", | 1065 | if (warn) |
971 | drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); | 1066 | *warn = IMPLICITLY_UPGRADED_PDSK; |
972 | ns.pdsk = pdsk_min; | 1067 | ns.pdsk = pdsk_min; |
973 | } | 1068 | } |
974 | 1069 | ||
@@ -1045,12 +1140,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
1045 | { | 1140 | { |
1046 | union drbd_state os; | 1141 | union drbd_state os; |
1047 | enum drbd_state_rv rv = SS_SUCCESS; | 1142 | enum drbd_state_rv rv = SS_SUCCESS; |
1048 | const char *warn_sync_abort = NULL; | 1143 | enum sanitize_state_warnings ssw; |
1049 | struct after_state_chg_work *ascw; | 1144 | struct after_state_chg_work *ascw; |
1050 | 1145 | ||
1051 | os = mdev->state; | 1146 | os = mdev->state; |
1052 | 1147 | ||
1053 | ns = sanitize_state(mdev, os, ns, &warn_sync_abort); | 1148 | ns = sanitize_state(mdev, os, ns, &ssw); |
1054 | 1149 | ||
1055 | if (ns.i == os.i) | 1150 | if (ns.i == os.i) |
1056 | return SS_NOTHING_TO_DO; | 1151 | return SS_NOTHING_TO_DO; |
@@ -1076,8 +1171,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
1076 | return rv; | 1171 | return rv; |
1077 | } | 1172 | } |
1078 | 1173 | ||
1079 | if (warn_sync_abort) | 1174 | print_sanitize_warnings(mdev, ssw); |
1080 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); | ||
1081 | 1175 | ||
1082 | { | 1176 | { |
1083 | char *pbp, pb[300]; | 1177 | char *pbp, pb[300]; |
@@ -1243,7 +1337,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
1243 | drbd_thread_stop_nowait(&mdev->receiver); | 1337 | drbd_thread_stop_nowait(&mdev->receiver); |
1244 | 1338 | ||
1245 | /* Upon network failure, we need to restart the receiver. */ | 1339 | /* Upon network failure, we need to restart the receiver. */ |
1246 | if (os.conn > C_TEAR_DOWN && | 1340 | if (os.conn > C_WF_CONNECTION && |
1247 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) | 1341 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) |
1248 | drbd_thread_restart_nowait(&mdev->receiver); | 1342 | drbd_thread_restart_nowait(&mdev->receiver); |
1249 | 1343 | ||
@@ -1251,6 +1345,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
1251 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | 1345 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) |
1252 | drbd_resume_al(mdev); | 1346 | drbd_resume_al(mdev); |
1253 | 1347 | ||
1348 | /* remember last connect and attach times so request_timer_fn() won't | ||
1349 | * kill newly established sessions while we are still trying to thaw | ||
1350 | * previously frozen IO */ | ||
1351 | if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS) | ||
1352 | mdev->last_reconnect_jif = jiffies; | ||
1353 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && | ||
1354 | ns.disk > D_NEGOTIATING) | ||
1355 | mdev->last_reattach_jif = jiffies; | ||
1356 | |||
1254 | ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); | 1357 | ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); |
1255 | if (ascw) { | 1358 | if (ascw) { |
1256 | ascw->os = os; | 1359 | ascw->os = os; |
@@ -1354,12 +1457,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1354 | /* Here we have the actions that are performed after a | 1457 | /* Here we have the actions that are performed after a |
1355 | state change. This function might sleep */ | 1458 | state change. This function might sleep */ |
1356 | 1459 | ||
1460 | if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING) | ||
1461 | mod_timer(&mdev->request_timer, jiffies + HZ); | ||
1462 | |||
1357 | nsm.i = -1; | 1463 | nsm.i = -1; |
1358 | if (ns.susp_nod) { | 1464 | if (ns.susp_nod) { |
1359 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | 1465 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) |
1360 | what = resend; | 1466 | what = resend; |
1361 | 1467 | ||
1362 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) | 1468 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && |
1469 | ns.disk > D_NEGOTIATING) | ||
1363 | what = restart_frozen_disk_io; | 1470 | what = restart_frozen_disk_io; |
1364 | 1471 | ||
1365 | if (what != nothing) | 1472 | if (what != nothing) |
@@ -1408,7 +1515,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1408 | /* Do not change the order of the if above and the two below... */ | 1515 | /* Do not change the order of the if above and the two below... */ |
1409 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | 1516 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ |
1410 | drbd_send_uuids(mdev); | 1517 | drbd_send_uuids(mdev); |
1411 | drbd_send_state(mdev); | 1518 | drbd_send_state(mdev, ns); |
1412 | } | 1519 | } |
1413 | /* No point in queuing send_bitmap if we don't have a connection | 1520 | /* No point in queuing send_bitmap if we don't have a connection |
1414 | * anymore, so check also the _current_ state, not only the new state | 1521 | * anymore, so check also the _current_ state, not only the new state |
@@ -1441,11 +1548,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1441 | } | 1548 | } |
1442 | 1549 | ||
1443 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | 1550 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { |
1444 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { | 1551 | if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && |
1552 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | ||
1445 | drbd_uuid_new_current(mdev); | 1553 | drbd_uuid_new_current(mdev); |
1446 | drbd_send_uuids(mdev); | 1554 | drbd_send_uuids(mdev); |
1447 | } | 1555 | } |
1448 | |||
1449 | /* D_DISKLESS Peer becomes secondary */ | 1556 | /* D_DISKLESS Peer becomes secondary */ |
1450 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1557 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
1451 | /* We may still be Primary ourselves. | 1558 | /* We may still be Primary ourselves. |
@@ -1473,14 +1580,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1473 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | 1580 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { |
1474 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ | 1581 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ |
1475 | drbd_send_uuids(mdev); | 1582 | drbd_send_uuids(mdev); |
1476 | drbd_send_state(mdev); | 1583 | drbd_send_state(mdev, ns); |
1477 | } | 1584 | } |
1478 | 1585 | ||
1479 | /* We want to pause/continue resync, tell peer. */ | 1586 | /* We want to pause/continue resync, tell peer. */ |
1480 | if (ns.conn >= C_CONNECTED && | 1587 | if (ns.conn >= C_CONNECTED && |
1481 | ((os.aftr_isp != ns.aftr_isp) || | 1588 | ((os.aftr_isp != ns.aftr_isp) || |
1482 | (os.user_isp != ns.user_isp))) | 1589 | (os.user_isp != ns.user_isp))) |
1483 | drbd_send_state(mdev); | 1590 | drbd_send_state(mdev, ns); |
1484 | 1591 | ||
1485 | /* In case one of the isp bits got set, suspend other devices. */ | 1592 | /* In case one of the isp bits got set, suspend other devices. */ |
1486 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | 1593 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && |
@@ -1490,10 +1597,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1490 | /* Make sure the peer gets informed about eventual state | 1597 | /* Make sure the peer gets informed about eventual state |
1491 | changes (ISP bits) while we were in WFReportParams. */ | 1598 | changes (ISP bits) while we were in WFReportParams. */ |
1492 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | 1599 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) |
1493 | drbd_send_state(mdev); | 1600 | drbd_send_state(mdev, ns); |
1494 | 1601 | ||
1495 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | 1602 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) |
1496 | drbd_send_state(mdev); | 1603 | drbd_send_state(mdev, ns); |
1497 | 1604 | ||
1498 | /* We are in the progress to start a full sync... */ | 1605 | /* We are in the progress to start a full sync... */ |
1499 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 1606 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
@@ -1513,33 +1620,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1513 | /* first half of local IO error, failure to attach, | 1620 | /* first half of local IO error, failure to attach, |
1514 | * or administrative detach */ | 1621 | * or administrative detach */ |
1515 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | 1622 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { |
1516 | enum drbd_io_error_p eh; | 1623 | enum drbd_io_error_p eh = EP_PASS_ON; |
1517 | int was_io_error; | 1624 | int was_io_error = 0; |
1518 | /* corresponding get_ldev was in __drbd_set_state, to serialize | 1625 | /* corresponding get_ldev was in __drbd_set_state, to serialize |
1519 | * our cleanup here with the transition to D_DISKLESS, | 1626 | * our cleanup here with the transition to D_DISKLESS. |
1520 | * so it is safe to dreference ldev here. */ | 1627 | * But is is still not save to dreference ldev here, since |
1521 | eh = mdev->ldev->dc.on_io_error; | 1628 | * we might come from an failed Attach before ldev was set. */ |
1522 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | 1629 | if (mdev->ldev) { |
1523 | 1630 | eh = mdev->ldev->dc.on_io_error; | |
1524 | /* current state still has to be D_FAILED, | 1631 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); |
1525 | * there is only one way out: to D_DISKLESS, | 1632 | |
1526 | * and that may only happen after our put_ldev below. */ | 1633 | /* Immediately allow completion of all application IO, that waits |
1527 | if (mdev->state.disk != D_FAILED) | 1634 | for completion from the local disk. */ |
1528 | dev_err(DEV, | 1635 | tl_abort_disk_io(mdev); |
1529 | "ASSERT FAILED: disk is %s during detach\n", | 1636 | |
1530 | drbd_disk_str(mdev->state.disk)); | 1637 | /* current state still has to be D_FAILED, |
1531 | 1638 | * there is only one way out: to D_DISKLESS, | |
1532 | if (drbd_send_state(mdev)) | 1639 | * and that may only happen after our put_ldev below. */ |
1533 | dev_warn(DEV, "Notified peer that I am detaching my disk\n"); | 1640 | if (mdev->state.disk != D_FAILED) |
1534 | else | 1641 | dev_err(DEV, |
1535 | dev_err(DEV, "Sending state for detaching disk failed\n"); | 1642 | "ASSERT FAILED: disk is %s during detach\n", |
1536 | 1643 | drbd_disk_str(mdev->state.disk)); | |
1537 | drbd_rs_cancel_all(mdev); | 1644 | |
1538 | 1645 | if (ns.conn >= C_CONNECTED) | |
1539 | /* In case we want to get something to stable storage still, | 1646 | drbd_send_state(mdev, ns); |
1540 | * this may be the last chance. | 1647 | |
1541 | * Following put_ldev may transition to D_DISKLESS. */ | 1648 | drbd_rs_cancel_all(mdev); |
1542 | drbd_md_sync(mdev); | 1649 | |
1650 | /* In case we want to get something to stable storage still, | ||
1651 | * this may be the last chance. | ||
1652 | * Following put_ldev may transition to D_DISKLESS. */ | ||
1653 | drbd_md_sync(mdev); | ||
1654 | } | ||
1543 | put_ldev(mdev); | 1655 | put_ldev(mdev); |
1544 | 1656 | ||
1545 | if (was_io_error && eh == EP_CALL_HELPER) | 1657 | if (was_io_error && eh == EP_CALL_HELPER) |
@@ -1561,16 +1673,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1561 | mdev->rs_failed = 0; | 1673 | mdev->rs_failed = 0; |
1562 | atomic_set(&mdev->rs_pending_cnt, 0); | 1674 | atomic_set(&mdev->rs_pending_cnt, 0); |
1563 | 1675 | ||
1564 | if (drbd_send_state(mdev)) | 1676 | if (ns.conn >= C_CONNECTED) |
1565 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); | 1677 | drbd_send_state(mdev, ns); |
1678 | |||
1566 | /* corresponding get_ldev in __drbd_set_state | 1679 | /* corresponding get_ldev in __drbd_set_state |
1567 | * this may finally trigger drbd_ldev_destroy. */ | 1680 | * this may finally trigger drbd_ldev_destroy. */ |
1568 | put_ldev(mdev); | 1681 | put_ldev(mdev); |
1569 | } | 1682 | } |
1570 | 1683 | ||
1571 | /* Notify peer that I had a local IO error, and did not detached.. */ | 1684 | /* Notify peer that I had a local IO error, and did not detached.. */ |
1572 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) | 1685 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) |
1573 | drbd_send_state(mdev); | 1686 | drbd_send_state(mdev, ns); |
1574 | 1687 | ||
1575 | /* Disks got bigger while they were detached */ | 1688 | /* Disks got bigger while they were detached */ |
1576 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | 1689 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && |
@@ -1588,7 +1701,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1588 | /* sync target done with resync. Explicitly notify peer, even though | 1701 | /* sync target done with resync. Explicitly notify peer, even though |
1589 | * it should (at least for non-empty resyncs) already know itself. */ | 1702 | * it should (at least for non-empty resyncs) already know itself. */ |
1590 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1703 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
1591 | drbd_send_state(mdev); | 1704 | drbd_send_state(mdev, ns); |
1705 | |||
1706 | /* Wake up role changes, that were delayed because of connection establishing */ | ||
1707 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { | ||
1708 | clear_bit(STATE_SENT, &mdev->flags); | ||
1709 | wake_up(&mdev->state_wait); | ||
1710 | } | ||
1592 | 1711 | ||
1593 | /* This triggers bitmap writeout of potentially still unwritten pages | 1712 | /* This triggers bitmap writeout of potentially still unwritten pages |
1594 | * if the resync finished cleanly, or aborted because of peer disk | 1713 | * if the resync finished cleanly, or aborted because of peer disk |
@@ -1598,8 +1717,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1598 | * No harm done if some bits change during this phase. | 1717 | * No harm done if some bits change during this phase. |
1599 | */ | 1718 | */ |
1600 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | 1719 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { |
1601 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, | 1720 | drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, |
1602 | "write from resync_finished", BM_LOCKED_SET_ALLOWED); | 1721 | "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); |
1603 | put_ldev(mdev); | 1722 | put_ldev(mdev); |
1604 | } | 1723 | } |
1605 | 1724 | ||
@@ -2057,7 +2176,11 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) | |||
2057 | 2176 | ||
2058 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); | 2177 | D_ASSERT(mdev->state.disk == D_UP_TO_DATE); |
2059 | 2178 | ||
2060 | uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; | 2179 | uuid = mdev->ldev->md.uuid[UI_BITMAP]; |
2180 | if (uuid && uuid != UUID_JUST_CREATED) | ||
2181 | uuid = uuid + UUID_NEW_BM_OFFSET; | ||
2182 | else | ||
2183 | get_random_bytes(&uuid, sizeof(u64)); | ||
2061 | drbd_uuid_set(mdev, UI_BITMAP, uuid); | 2184 | drbd_uuid_set(mdev, UI_BITMAP, uuid); |
2062 | drbd_print_uuids(mdev, "updated sync UUID"); | 2185 | drbd_print_uuids(mdev, "updated sync UUID"); |
2063 | drbd_md_sync(mdev); | 2186 | drbd_md_sync(mdev); |
@@ -2089,6 +2212,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
2089 | max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ | 2212 | max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ |
2090 | } | 2213 | } |
2091 | 2214 | ||
2215 | /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */ | ||
2216 | if (mdev->agreed_pro_version <= 94) | ||
2217 | max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); | ||
2218 | |||
2092 | p.d_size = cpu_to_be64(d_size); | 2219 | p.d_size = cpu_to_be64(d_size); |
2093 | p.u_size = cpu_to_be64(u_size); | 2220 | p.u_size = cpu_to_be64(u_size); |
2094 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); | 2221 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); |
@@ -2102,10 +2229,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
2102 | } | 2229 | } |
2103 | 2230 | ||
2104 | /** | 2231 | /** |
2105 | * drbd_send_state() - Sends the drbd state to the peer | 2232 | * drbd_send_current_state() - Sends the drbd state to the peer |
2106 | * @mdev: DRBD device. | 2233 | * @mdev: DRBD device. |
2107 | */ | 2234 | */ |
2108 | int drbd_send_state(struct drbd_conf *mdev) | 2235 | int drbd_send_current_state(struct drbd_conf *mdev) |
2109 | { | 2236 | { |
2110 | struct socket *sock; | 2237 | struct socket *sock; |
2111 | struct p_state p; | 2238 | struct p_state p; |
@@ -2131,6 +2258,37 @@ int drbd_send_state(struct drbd_conf *mdev) | |||
2131 | return ok; | 2258 | return ok; |
2132 | } | 2259 | } |
2133 | 2260 | ||
2261 | /** | ||
2262 | * drbd_send_state() - After a state change, sends the new state to the peer | ||
2263 | * @mdev: DRBD device. | ||
2264 | * @state: the state to send, not necessarily the current state. | ||
2265 | * | ||
2266 | * Each state change queues an "after_state_ch" work, which will eventually | ||
2267 | * send the resulting new state to the peer. If more state changes happen | ||
2268 | * between queuing and processing of the after_state_ch work, we still | ||
2269 | * want to send each intermediary state in the order it occurred. | ||
2270 | */ | ||
2271 | int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) | ||
2272 | { | ||
2273 | struct socket *sock; | ||
2274 | struct p_state p; | ||
2275 | int ok = 0; | ||
2276 | |||
2277 | mutex_lock(&mdev->data.mutex); | ||
2278 | |||
2279 | p.state = cpu_to_be32(state.i); | ||
2280 | sock = mdev->data.socket; | ||
2281 | |||
2282 | if (likely(sock != NULL)) { | ||
2283 | ok = _drbd_send_cmd(mdev, sock, P_STATE, | ||
2284 | (struct p_header80 *)&p, sizeof(p), 0); | ||
2285 | } | ||
2286 | |||
2287 | mutex_unlock(&mdev->data.mutex); | ||
2288 | |||
2289 | return ok; | ||
2290 | } | ||
2291 | |||
2134 | int drbd_send_state_req(struct drbd_conf *mdev, | 2292 | int drbd_send_state_req(struct drbd_conf *mdev, |
2135 | union drbd_state mask, union drbd_state val) | 2293 | union drbd_state mask, union drbd_state val) |
2136 | { | 2294 | { |
@@ -2615,7 +2773,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) | |||
2615 | struct bio_vec *bvec; | 2773 | struct bio_vec *bvec; |
2616 | int i; | 2774 | int i; |
2617 | /* hint all but last page with MSG_MORE */ | 2775 | /* hint all but last page with MSG_MORE */ |
2618 | __bio_for_each_segment(bvec, bio, i, 0) { | 2776 | bio_for_each_segment(bvec, bio, i) { |
2619 | if (!_drbd_no_send_page(mdev, bvec->bv_page, | 2777 | if (!_drbd_no_send_page(mdev, bvec->bv_page, |
2620 | bvec->bv_offset, bvec->bv_len, | 2778 | bvec->bv_offset, bvec->bv_len, |
2621 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | 2779 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) |
@@ -2629,7 +2787,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) | |||
2629 | struct bio_vec *bvec; | 2787 | struct bio_vec *bvec; |
2630 | int i; | 2788 | int i; |
2631 | /* hint all but last page with MSG_MORE */ | 2789 | /* hint all but last page with MSG_MORE */ |
2632 | __bio_for_each_segment(bvec, bio, i, 0) { | 2790 | bio_for_each_segment(bvec, bio, i) { |
2633 | if (!_drbd_send_page(mdev, bvec->bv_page, | 2791 | if (!_drbd_send_page(mdev, bvec->bv_page, |
2634 | bvec->bv_offset, bvec->bv_len, | 2792 | bvec->bv_offset, bvec->bv_len, |
2635 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) | 2793 | i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) |
@@ -2695,8 +2853,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2695 | 2853 | ||
2696 | p.sector = cpu_to_be64(req->sector); | 2854 | p.sector = cpu_to_be64(req->sector); |
2697 | p.block_id = (unsigned long)req; | 2855 | p.block_id = (unsigned long)req; |
2698 | p.seq_num = cpu_to_be32(req->seq_num = | 2856 | p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); |
2699 | atomic_add_return(1, &mdev->packet_seq)); | ||
2700 | 2857 | ||
2701 | dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); | 2858 | dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); |
2702 | 2859 | ||
@@ -2987,8 +3144,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2987 | atomic_set(&mdev->rs_sect_in, 0); | 3144 | atomic_set(&mdev->rs_sect_in, 0); |
2988 | atomic_set(&mdev->rs_sect_ev, 0); | 3145 | atomic_set(&mdev->rs_sect_ev, 0); |
2989 | atomic_set(&mdev->ap_in_flight, 0); | 3146 | atomic_set(&mdev->ap_in_flight, 0); |
3147 | atomic_set(&mdev->md_io_in_use, 0); | ||
2990 | 3148 | ||
2991 | mutex_init(&mdev->md_io_mutex); | ||
2992 | mutex_init(&mdev->data.mutex); | 3149 | mutex_init(&mdev->data.mutex); |
2993 | mutex_init(&mdev->meta.mutex); | 3150 | mutex_init(&mdev->meta.mutex); |
2994 | sema_init(&mdev->data.work.s, 0); | 3151 | sema_init(&mdev->data.work.s, 0); |
@@ -3126,6 +3283,10 @@ static void drbd_destroy_mempools(void) | |||
3126 | 3283 | ||
3127 | /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ | 3284 | /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ |
3128 | 3285 | ||
3286 | if (drbd_md_io_bio_set) | ||
3287 | bioset_free(drbd_md_io_bio_set); | ||
3288 | if (drbd_md_io_page_pool) | ||
3289 | mempool_destroy(drbd_md_io_page_pool); | ||
3129 | if (drbd_ee_mempool) | 3290 | if (drbd_ee_mempool) |
3130 | mempool_destroy(drbd_ee_mempool); | 3291 | mempool_destroy(drbd_ee_mempool); |
3131 | if (drbd_request_mempool) | 3292 | if (drbd_request_mempool) |
@@ -3139,6 +3300,8 @@ static void drbd_destroy_mempools(void) | |||
3139 | if (drbd_al_ext_cache) | 3300 | if (drbd_al_ext_cache) |
3140 | kmem_cache_destroy(drbd_al_ext_cache); | 3301 | kmem_cache_destroy(drbd_al_ext_cache); |
3141 | 3302 | ||
3303 | drbd_md_io_bio_set = NULL; | ||
3304 | drbd_md_io_page_pool = NULL; | ||
3142 | drbd_ee_mempool = NULL; | 3305 | drbd_ee_mempool = NULL; |
3143 | drbd_request_mempool = NULL; | 3306 | drbd_request_mempool = NULL; |
3144 | drbd_ee_cache = NULL; | 3307 | drbd_ee_cache = NULL; |
@@ -3162,6 +3325,8 @@ static int drbd_create_mempools(void) | |||
3162 | drbd_bm_ext_cache = NULL; | 3325 | drbd_bm_ext_cache = NULL; |
3163 | drbd_al_ext_cache = NULL; | 3326 | drbd_al_ext_cache = NULL; |
3164 | drbd_pp_pool = NULL; | 3327 | drbd_pp_pool = NULL; |
3328 | drbd_md_io_page_pool = NULL; | ||
3329 | drbd_md_io_bio_set = NULL; | ||
3165 | 3330 | ||
3166 | /* caches */ | 3331 | /* caches */ |
3167 | drbd_request_cache = kmem_cache_create( | 3332 | drbd_request_cache = kmem_cache_create( |
@@ -3185,6 +3350,16 @@ static int drbd_create_mempools(void) | |||
3185 | goto Enomem; | 3350 | goto Enomem; |
3186 | 3351 | ||
3187 | /* mempools */ | 3352 | /* mempools */ |
3353 | #ifdef COMPAT_HAVE_BIOSET_CREATE | ||
3354 | drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); | ||
3355 | if (drbd_md_io_bio_set == NULL) | ||
3356 | goto Enomem; | ||
3357 | #endif | ||
3358 | |||
3359 | drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); | ||
3360 | if (drbd_md_io_page_pool == NULL) | ||
3361 | goto Enomem; | ||
3362 | |||
3188 | drbd_request_mempool = mempool_create(number, | 3363 | drbd_request_mempool = mempool_create(number, |
3189 | mempool_alloc_slab, mempool_free_slab, drbd_request_cache); | 3364 | mempool_alloc_slab, mempool_free_slab, drbd_request_cache); |
3190 | if (drbd_request_mempool == NULL) | 3365 | if (drbd_request_mempool == NULL) |
@@ -3262,6 +3437,8 @@ static void drbd_delete_device(unsigned int minor) | |||
3262 | if (!mdev) | 3437 | if (!mdev) |
3263 | return; | 3438 | return; |
3264 | 3439 | ||
3440 | del_timer_sync(&mdev->request_timer); | ||
3441 | |||
3265 | /* paranoia asserts */ | 3442 | /* paranoia asserts */ |
3266 | if (mdev->open_cnt != 0) | 3443 | if (mdev->open_cnt != 0) |
3267 | dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, | 3444 | dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, |
@@ -3666,8 +3843,10 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
3666 | if (!get_ldev_if_state(mdev, D_FAILED)) | 3843 | if (!get_ldev_if_state(mdev, D_FAILED)) |
3667 | return; | 3844 | return; |
3668 | 3845 | ||
3669 | mutex_lock(&mdev->md_io_mutex); | 3846 | buffer = drbd_md_get_buffer(mdev); |
3670 | buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); | 3847 | if (!buffer) |
3848 | goto out; | ||
3849 | |||
3671 | memset(buffer, 0, 512); | 3850 | memset(buffer, 0, 512); |
3672 | 3851 | ||
3673 | buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); | 3852 | buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); |
@@ -3698,7 +3877,8 @@ void drbd_md_sync(struct drbd_conf *mdev) | |||
3698 | * since we updated it on metadata. */ | 3877 | * since we updated it on metadata. */ |
3699 | mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); | 3878 | mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); |
3700 | 3879 | ||
3701 | mutex_unlock(&mdev->md_io_mutex); | 3880 | drbd_md_put_buffer(mdev); |
3881 | out: | ||
3702 | put_ldev(mdev); | 3882 | put_ldev(mdev); |
3703 | } | 3883 | } |
3704 | 3884 | ||
@@ -3718,8 +3898,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
3718 | if (!get_ldev_if_state(mdev, D_ATTACHING)) | 3898 | if (!get_ldev_if_state(mdev, D_ATTACHING)) |
3719 | return ERR_IO_MD_DISK; | 3899 | return ERR_IO_MD_DISK; |
3720 | 3900 | ||
3721 | mutex_lock(&mdev->md_io_mutex); | 3901 | buffer = drbd_md_get_buffer(mdev); |
3722 | buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); | 3902 | if (!buffer) |
3903 | goto out; | ||
3723 | 3904 | ||
3724 | if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { | 3905 | if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { |
3725 | /* NOTE: can't do normal error processing here as this is | 3906 | /* NOTE: can't do normal error processing here as this is |
@@ -3780,7 +3961,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) | |||
3780 | mdev->sync_conf.al_extents = 127; | 3961 | mdev->sync_conf.al_extents = 127; |
3781 | 3962 | ||
3782 | err: | 3963 | err: |
3783 | mutex_unlock(&mdev->md_io_mutex); | 3964 | drbd_md_put_buffer(mdev); |
3965 | out: | ||
3784 | put_ldev(mdev); | 3966 | put_ldev(mdev); |
3785 | 3967 | ||
3786 | return rv; | 3968 | return rv; |
@@ -4183,12 +4365,11 @@ const char *drbd_buildtag(void) | |||
4183 | static char buildtag[38] = "\0uilt-in"; | 4365 | static char buildtag[38] = "\0uilt-in"; |
4184 | 4366 | ||
4185 | if (buildtag[0] == 0) { | 4367 | if (buildtag[0] == 0) { |
4186 | #ifdef CONFIG_MODULES | 4368 | #ifdef MODULE |
4187 | if (THIS_MODULE != NULL) | 4369 | sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); |
4188 | sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); | 4370 | #else |
4189 | else | 4371 | buildtag[0] = 'b'; |
4190 | #endif | 4372 | #endif |
4191 | buildtag[0] = 'b'; | ||
4192 | } | 4373 | } |
4193 | 4374 | ||
4194 | return buildtag; | 4375 | return buildtag; |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index abfaacaaf346..867bf1d82988 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data) | |||
289 | */ | 289 | */ |
290 | spin_lock_irq(&mdev->req_lock); | 290 | spin_lock_irq(&mdev->req_lock); |
291 | ns = mdev->state; | 291 | ns = mdev->state; |
292 | if (ns.conn < C_WF_REPORT_PARAMS) { | 292 | if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) { |
293 | ns.pdsk = nps; | 293 | ns.pdsk = nps; |
294 | _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); | 294 | _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); |
295 | } | 295 | } |
@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
432 | /* if this was forced, we should consider sync */ | 432 | /* if this was forced, we should consider sync */ |
433 | if (forced) | 433 | if (forced) |
434 | drbd_send_uuids(mdev); | 434 | drbd_send_uuids(mdev); |
435 | drbd_send_state(mdev); | 435 | drbd_send_current_state(mdev); |
436 | } | 436 | } |
437 | 437 | ||
438 | drbd_md_sync(mdev); | 438 | drbd_md_sync(mdev); |
@@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) | |||
845 | Because new from 8.3.8 onwards the peer can use multiple | 845 | Because new from 8.3.8 onwards the peer can use multiple |
846 | BIOs for a single peer_request */ | 846 | BIOs for a single peer_request */ |
847 | if (mdev->state.conn >= C_CONNECTED) { | 847 | if (mdev->state.conn >= C_CONNECTED) { |
848 | if (mdev->agreed_pro_version < 94) | 848 | if (mdev->agreed_pro_version < 94) { |
849 | peer = mdev->peer_max_bio_size; | 849 | peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); |
850 | else if (mdev->agreed_pro_version == 94) | 850 | /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ |
851 | } else if (mdev->agreed_pro_version == 94) | ||
851 | peer = DRBD_MAX_SIZE_H80_PACKET; | 852 | peer = DRBD_MAX_SIZE_H80_PACKET; |
852 | else /* drbd 8.3.8 onwards */ | 853 | else /* drbd 8.3.8 onwards */ |
853 | peer = DRBD_MAX_BIO_SIZE; | 854 | peer = DRBD_MAX_BIO_SIZE; |
@@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1032 | dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", | 1033 | dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", |
1033 | (unsigned long long) drbd_get_max_capacity(nbc), | 1034 | (unsigned long long) drbd_get_max_capacity(nbc), |
1034 | (unsigned long long) nbc->dc.disk_size); | 1035 | (unsigned long long) nbc->dc.disk_size); |
1035 | retcode = ERR_DISK_TO_SMALL; | 1036 | retcode = ERR_DISK_TOO_SMALL; |
1036 | goto fail; | 1037 | goto fail; |
1037 | } | 1038 | } |
1038 | 1039 | ||
@@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1046 | } | 1047 | } |
1047 | 1048 | ||
1048 | if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { | 1049 | if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { |
1049 | retcode = ERR_MD_DISK_TO_SMALL; | 1050 | retcode = ERR_MD_DISK_TOO_SMALL; |
1050 | dev_warn(DEV, "refusing attach: md-device too small, " | 1051 | dev_warn(DEV, "refusing attach: md-device too small, " |
1051 | "at least %llu sectors needed for this meta-disk type\n", | 1052 | "at least %llu sectors needed for this meta-disk type\n", |
1052 | (unsigned long long) min_md_device_sectors); | 1053 | (unsigned long long) min_md_device_sectors); |
@@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1057 | * (we may currently be R_PRIMARY with no local disk...) */ | 1058 | * (we may currently be R_PRIMARY with no local disk...) */ |
1058 | if (drbd_get_max_capacity(nbc) < | 1059 | if (drbd_get_max_capacity(nbc) < |
1059 | drbd_get_capacity(mdev->this_bdev)) { | 1060 | drbd_get_capacity(mdev->this_bdev)) { |
1060 | retcode = ERR_DISK_TO_SMALL; | 1061 | retcode = ERR_DISK_TOO_SMALL; |
1061 | goto fail; | 1062 | goto fail; |
1062 | } | 1063 | } |
1063 | 1064 | ||
@@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp | |||
1138 | if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && | 1139 | if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && |
1139 | drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { | 1140 | drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { |
1140 | dev_warn(DEV, "refusing to truncate a consistent device\n"); | 1141 | dev_warn(DEV, "refusing to truncate a consistent device\n"); |
1141 | retcode = ERR_DISK_TO_SMALL; | 1142 | retcode = ERR_DISK_TOO_SMALL; |
1142 | goto force_diskless_dec; | 1143 | goto force_diskless_dec; |
1143 | } | 1144 | } |
1144 | 1145 | ||
@@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1336 | { | 1337 | { |
1337 | enum drbd_ret_code retcode; | 1338 | enum drbd_ret_code retcode; |
1338 | int ret; | 1339 | int ret; |
1340 | struct detach dt = {}; | ||
1341 | |||
1342 | if (!detach_from_tags(mdev, nlp->tag_list, &dt)) { | ||
1343 | reply->ret_code = ERR_MANDATORY_TAG; | ||
1344 | goto out; | ||
1345 | } | ||
1346 | |||
1347 | if (dt.detach_force) { | ||
1348 | drbd_force_state(mdev, NS(disk, D_FAILED)); | ||
1349 | reply->ret_code = SS_SUCCESS; | ||
1350 | goto out; | ||
1351 | } | ||
1352 | |||
1339 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ | 1353 | drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ |
1354 | drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */ | ||
1340 | retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); | 1355 | retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); |
1356 | drbd_md_put_buffer(mdev); | ||
1341 | /* D_FAILED will transition to DISKLESS. */ | 1357 | /* D_FAILED will transition to DISKLESS. */ |
1342 | ret = wait_event_interruptible(mdev->misc_wait, | 1358 | ret = wait_event_interruptible(mdev->misc_wait, |
1343 | mdev->state.disk != D_FAILED); | 1359 | mdev->state.disk != D_FAILED); |
1344 | drbd_resume_io(mdev); | 1360 | drbd_resume_io(mdev); |
1361 | |||
1345 | if ((int)retcode == (int)SS_IS_DISKLESS) | 1362 | if ((int)retcode == (int)SS_IS_DISKLESS) |
1346 | retcode = SS_NOTHING_TO_DO; | 1363 | retcode = SS_NOTHING_TO_DO; |
1347 | if (ret) | 1364 | if (ret) |
1348 | retcode = ERR_INTR; | 1365 | retcode = ERR_INTR; |
1349 | reply->ret_code = retcode; | 1366 | reply->ret_code = retcode; |
1367 | out: | ||
1350 | return 0; | 1368 | return 0; |
1351 | } | 1369 | } |
1352 | 1370 | ||
@@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1711 | 1729 | ||
1712 | if (rs.no_resync && mdev->agreed_pro_version < 93) { | 1730 | if (rs.no_resync && mdev->agreed_pro_version < 93) { |
1713 | retcode = ERR_NEED_APV_93; | 1731 | retcode = ERR_NEED_APV_93; |
1714 | goto fail; | 1732 | goto fail_ldev; |
1715 | } | 1733 | } |
1716 | 1734 | ||
1717 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) | 1735 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) |
@@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1738 | fail: | 1756 | fail: |
1739 | reply->ret_code = retcode; | 1757 | reply->ret_code = retcode; |
1740 | return 0; | 1758 | return 0; |
1759 | |||
1760 | fail_ldev: | ||
1761 | put_ldev(mdev); | ||
1762 | goto fail; | ||
1741 | } | 1763 | } |
1742 | 1764 | ||
1743 | static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | 1765 | static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, |
@@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
1941 | 1963 | ||
1942 | /* If there is still bitmap IO pending, probably because of a previous | 1964 | /* If there is still bitmap IO pending, probably because of a previous |
1943 | * resync just being finished, wait for it before requesting a new resync. */ | 1965 | * resync just being finished, wait for it before requesting a new resync. */ |
1966 | drbd_suspend_io(mdev); | ||
1944 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | 1967 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); |
1945 | 1968 | ||
1946 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); | 1969 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); |
@@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl | |||
1959 | 1982 | ||
1960 | retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); | 1983 | retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); |
1961 | } | 1984 | } |
1985 | drbd_resume_io(mdev); | ||
1962 | 1986 | ||
1963 | reply->ret_code = retcode; | 1987 | reply->ret_code = retcode; |
1964 | return 0; | 1988 | return 0; |
@@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
1980 | 2004 | ||
1981 | /* If there is still bitmap IO pending, probably because of a previous | 2005 | /* If there is still bitmap IO pending, probably because of a previous |
1982 | * resync just being finished, wait for it before requesting a new resync. */ | 2006 | * resync just being finished, wait for it before requesting a new resync. */ |
2007 | drbd_suspend_io(mdev); | ||
1983 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | 2008 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); |
1984 | 2009 | ||
1985 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); | 2010 | retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); |
@@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re | |||
1998 | } else | 2023 | } else |
1999 | retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); | 2024 | retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); |
2000 | } | 2025 | } |
2026 | drbd_resume_io(mdev); | ||
2001 | 2027 | ||
2002 | reply->ret_code = retcode; | 2028 | reply->ret_code = retcode; |
2003 | return 0; | 2029 | return 0; |
@@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
2170 | 2196 | ||
2171 | /* If there is still bitmap IO pending, e.g. previous resync or verify | 2197 | /* If there is still bitmap IO pending, e.g. previous resync or verify |
2172 | * just being finished, wait for it before requesting a new resync. */ | 2198 | * just being finished, wait for it before requesting a new resync. */ |
2199 | drbd_suspend_io(mdev); | ||
2173 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); | 2200 | wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); |
2174 | 2201 | ||
2175 | /* w_make_ov_request expects position to be aligned */ | 2202 | /* w_make_ov_request expects position to be aligned */ |
2176 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; | 2203 | mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; |
2177 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); | 2204 | reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); |
2205 | drbd_resume_io(mdev); | ||
2178 | return 0; | 2206 | return 0; |
2179 | } | 2207 | } |
2180 | 2208 | ||
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2959cdfb77f5..869bada2ed06 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) | |||
52 | if (unlikely(v >= 1000000)) { | 52 | if (unlikely(v >= 1000000)) { |
53 | /* cool: > GiByte/s */ | 53 | /* cool: > GiByte/s */ |
54 | seq_printf(seq, "%ld,", v / 1000000); | 54 | seq_printf(seq, "%ld,", v / 1000000); |
55 | v /= 1000000; | 55 | v %= 1000000; |
56 | seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); | 56 | seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); |
57 | } else if (likely(v >= 1000)) | 57 | } else if (likely(v >= 1000)) |
58 | seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); | 58 | seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 43beaca53179..1d088c478150 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what, | |||
466 | goto out; | 466 | goto out; |
467 | } | 467 | } |
468 | (*newsock)->ops = sock->ops; | 468 | (*newsock)->ops = sock->ops; |
469 | __module_get((*newsock)->ops->owner); | ||
469 | 470 | ||
470 | out: | 471 | out: |
471 | return err; | 472 | return err; |
@@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev) | |||
750 | { | 751 | { |
751 | struct socket *s, *sock, *msock; | 752 | struct socket *s, *sock, *msock; |
752 | int try, h, ok; | 753 | int try, h, ok; |
754 | enum drbd_state_rv rv; | ||
753 | 755 | ||
754 | D_ASSERT(!mdev->data.socket); | 756 | D_ASSERT(!mdev->data.socket); |
755 | 757 | ||
@@ -888,25 +890,32 @@ retry: | |||
888 | } | 890 | } |
889 | } | 891 | } |
890 | 892 | ||
891 | if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) | ||
892 | return 0; | ||
893 | |||
894 | sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; | 893 | sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; |
895 | sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; | 894 | sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; |
896 | 895 | ||
897 | atomic_set(&mdev->packet_seq, 0); | 896 | atomic_set(&mdev->packet_seq, 0); |
898 | mdev->peer_seq = 0; | 897 | mdev->peer_seq = 0; |
899 | 898 | ||
900 | drbd_thread_start(&mdev->asender); | ||
901 | |||
902 | if (drbd_send_protocol(mdev) == -1) | 899 | if (drbd_send_protocol(mdev) == -1) |
903 | return -1; | 900 | return -1; |
901 | set_bit(STATE_SENT, &mdev->flags); | ||
904 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 902 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
905 | drbd_send_sizes(mdev, 0, 0); | 903 | drbd_send_sizes(mdev, 0, 0); |
906 | drbd_send_uuids(mdev); | 904 | drbd_send_uuids(mdev); |
907 | drbd_send_state(mdev); | 905 | drbd_send_current_state(mdev); |
908 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); | 906 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); |
909 | clear_bit(RESIZE_PENDING, &mdev->flags); | 907 | clear_bit(RESIZE_PENDING, &mdev->flags); |
908 | |||
909 | spin_lock_irq(&mdev->req_lock); | ||
910 | rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); | ||
911 | if (mdev->state.conn != C_WF_REPORT_PARAMS) | ||
912 | clear_bit(STATE_SENT, &mdev->flags); | ||
913 | spin_unlock_irq(&mdev->req_lock); | ||
914 | |||
915 | if (rv < SS_SUCCESS) | ||
916 | return 0; | ||
917 | |||
918 | drbd_thread_start(&mdev->asender); | ||
910 | mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ | 919 | mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ |
911 | 920 | ||
912 | return 1; | 921 | return 1; |
@@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev) | |||
957 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, | 966 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, |
958 | NULL); | 967 | NULL); |
959 | if (rv) { | 968 | if (rv) { |
960 | dev_err(DEV, "local disk flush failed with status %d\n", rv); | 969 | dev_info(DEV, "local disk flush failed with status %d\n", rv); |
961 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 970 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
962 | * don't try again for ANY return value != 0 | 971 | * don't try again for ANY return value != 0 |
963 | * if (rv == -EOPNOTSUPP) */ | 972 | * if (rv == -EOPNOTSUPP) */ |
@@ -1001,13 +1010,14 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, | |||
1001 | 1010 | ||
1002 | if (epoch_size != 0 && | 1011 | if (epoch_size != 0 && |
1003 | atomic_read(&epoch->active) == 0 && | 1012 | atomic_read(&epoch->active) == 0 && |
1004 | test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { | 1013 | (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { |
1005 | if (!(ev & EV_CLEANUP)) { | 1014 | if (!(ev & EV_CLEANUP)) { |
1006 | spin_unlock(&mdev->epoch_lock); | 1015 | spin_unlock(&mdev->epoch_lock); |
1007 | drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); | 1016 | drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); |
1008 | spin_lock(&mdev->epoch_lock); | 1017 | spin_lock(&mdev->epoch_lock); |
1009 | } | 1018 | } |
1010 | dec_unacked(mdev); | 1019 | if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) |
1020 | dec_unacked(mdev); | ||
1011 | 1021 | ||
1012 | if (mdev->current_epoch != epoch) { | 1022 | if (mdev->current_epoch != epoch) { |
1013 | next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); | 1023 | next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); |
@@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | |||
1096 | /* In most cases, we will only need one bio. But in case the lower | 1106 | /* In most cases, we will only need one bio. But in case the lower |
1097 | * level restrictions happen to be different at this offset on this | 1107 | * level restrictions happen to be different at this offset on this |
1098 | * side than those of the sending peer, we may need to submit the | 1108 | * side than those of the sending peer, we may need to submit the |
1099 | * request in more than one bio. */ | 1109 | * request in more than one bio. |
1110 | * | ||
1111 | * Plain bio_alloc is good enough here, this is no DRBD internally | ||
1112 | * generated bio, but a bio allocated on behalf of the peer. | ||
1113 | */ | ||
1100 | next_bio: | 1114 | next_bio: |
1101 | bio = bio_alloc(GFP_NOIO, nr_pages); | 1115 | bio = bio_alloc(GFP_NOIO, nr_pages); |
1102 | if (!bio) { | 1116 | if (!bio) { |
@@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u | |||
1583 | return ok; | 1597 | return ok; |
1584 | } | 1598 | } |
1585 | 1599 | ||
1600 | static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e) | ||
1601 | { | ||
1602 | |||
1603 | struct drbd_epoch_entry *rs_e; | ||
1604 | bool rv = 0; | ||
1605 | |||
1606 | spin_lock_irq(&mdev->req_lock); | ||
1607 | list_for_each_entry(rs_e, &mdev->sync_ee, w.list) { | ||
1608 | if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) { | ||
1609 | rv = 1; | ||
1610 | break; | ||
1611 | } | ||
1612 | } | ||
1613 | spin_unlock_irq(&mdev->req_lock); | ||
1614 | |||
1615 | return rv; | ||
1616 | } | ||
1617 | |||
1586 | /* Called from receive_Data. | 1618 | /* Called from receive_Data. |
1587 | * Synchronize packets on sock with packets on msock. | 1619 | * Synchronize packets on sock with packets on msock. |
1588 | * | 1620 | * |
@@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
1826 | list_add(&e->w.list, &mdev->active_ee); | 1858 | list_add(&e->w.list, &mdev->active_ee); |
1827 | spin_unlock_irq(&mdev->req_lock); | 1859 | spin_unlock_irq(&mdev->req_lock); |
1828 | 1860 | ||
1861 | if (mdev->state.conn == C_SYNC_TARGET) | ||
1862 | wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e)); | ||
1863 | |||
1829 | switch (mdev->net_conf->wire_protocol) { | 1864 | switch (mdev->net_conf->wire_protocol) { |
1830 | case DRBD_PROT_C: | 1865 | case DRBD_PROT_C: |
1831 | inc_unacked(mdev); | 1866 | inc_unacked(mdev); |
@@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l | |||
2420 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; | 2455 | mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; |
2421 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; | 2456 | mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; |
2422 | 2457 | ||
2423 | dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); | 2458 | dev_info(DEV, "Lost last syncUUID packet, corrected:\n"); |
2424 | drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); | 2459 | drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); |
2425 | 2460 | ||
2426 | return -1; | 2461 | return -1; |
@@ -2806,10 +2841,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi | |||
2806 | 2841 | ||
2807 | if (apv >= 88) { | 2842 | if (apv >= 88) { |
2808 | if (apv == 88) { | 2843 | if (apv == 88) { |
2809 | if (data_size > SHARED_SECRET_MAX) { | 2844 | if (data_size > SHARED_SECRET_MAX || data_size == 0) { |
2810 | dev_err(DEV, "verify-alg too long, " | 2845 | dev_err(DEV, "verify-alg of wrong size, " |
2811 | "peer wants %u, accepting only %u byte\n", | 2846 | "peer wants %u, accepting only up to %u byte\n", |
2812 | data_size, SHARED_SECRET_MAX); | 2847 | data_size, SHARED_SECRET_MAX); |
2813 | return false; | 2848 | return false; |
2814 | } | 2849 | } |
2815 | 2850 | ||
@@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3168 | os = ns = mdev->state; | 3203 | os = ns = mdev->state; |
3169 | spin_unlock_irq(&mdev->req_lock); | 3204 | spin_unlock_irq(&mdev->req_lock); |
3170 | 3205 | ||
3171 | /* peer says his disk is uptodate, while we think it is inconsistent, | 3206 | /* If some other part of the code (asender thread, timeout) |
3172 | * and this happens while we think we have a sync going on. */ | 3207 | * already decided to close the connection again, |
3173 | if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && | 3208 | * we must not "re-establish" it here. */ |
3209 | if (os.conn <= C_TEAR_DOWN) | ||
3210 | return false; | ||
3211 | |||
3212 | /* If this is the "end of sync" confirmation, usually the peer disk | ||
3213 | * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits | ||
3214 | * set) resync started in PausedSyncT, or if the timing of pause-/ | ||
3215 | * unpause-sync events has been "just right", the peer disk may | ||
3216 | * transition from D_CONSISTENT to D_UP_TO_DATE as well. | ||
3217 | */ | ||
3218 | if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && | ||
3219 | real_peer_disk == D_UP_TO_DATE && | ||
3174 | os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { | 3220 | os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { |
3175 | /* If we are (becoming) SyncSource, but peer is still in sync | 3221 | /* If we are (becoming) SyncSource, but peer is still in sync |
3176 | * preparation, ignore its uptodate-ness to avoid flapping, it | 3222 | * preparation, ignore its uptodate-ness to avoid flapping, it |
@@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3288 | /* Nowadays only used when forcing a node into primary role and | 3334 | /* Nowadays only used when forcing a node into primary role and |
3289 | setting its disk to UpToDate with that */ | 3335 | setting its disk to UpToDate with that */ |
3290 | drbd_send_uuids(mdev); | 3336 | drbd_send_uuids(mdev); |
3291 | drbd_send_state(mdev); | 3337 | drbd_send_current_state(mdev); |
3292 | } | 3338 | } |
3293 | } | 3339 | } |
3294 | 3340 | ||
@@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3776 | if (mdev->state.conn == C_STANDALONE) | 3822 | if (mdev->state.conn == C_STANDALONE) |
3777 | return; | 3823 | return; |
3778 | 3824 | ||
3825 | /* We are about to start the cleanup after connection loss. | ||
3826 | * Make sure drbd_make_request knows about that. | ||
3827 | * Usually we should be in some network failure state already, | ||
3828 | * but just in case we are not, we fix it up here. | ||
3829 | */ | ||
3830 | drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); | ||
3831 | |||
3779 | /* asender does not clean up anything. it must not interfere, either */ | 3832 | /* asender does not clean up anything. it must not interfere, either */ |
3780 | drbd_thread_stop(&mdev->asender); | 3833 | drbd_thread_stop(&mdev->asender); |
3781 | drbd_free_sock(mdev); | 3834 | drbd_free_sock(mdev); |
@@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3803 | atomic_set(&mdev->rs_pending_cnt, 0); | 3856 | atomic_set(&mdev->rs_pending_cnt, 0); |
3804 | wake_up(&mdev->misc_wait); | 3857 | wake_up(&mdev->misc_wait); |
3805 | 3858 | ||
3806 | del_timer(&mdev->request_timer); | ||
3807 | |||
3808 | /* make sure syncer is stopped and w_resume_next_sg queued */ | 3859 | /* make sure syncer is stopped and w_resume_next_sg queued */ |
3809 | del_timer_sync(&mdev->resync_timer); | 3860 | del_timer_sync(&mdev->resync_timer); |
3810 | resync_timer_fn((unsigned long)mdev); | 3861 | resync_timer_fn((unsigned long)mdev); |
@@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) | |||
4433 | 4484 | ||
4434 | if (mdev->state.conn == C_AHEAD && | 4485 | if (mdev->state.conn == C_AHEAD && |
4435 | atomic_read(&mdev->ap_in_flight) == 0 && | 4486 | atomic_read(&mdev->ap_in_flight) == 0 && |
4436 | !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { | 4487 | !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { |
4437 | mdev->start_resync_timer.expires = jiffies + HZ; | 4488 | mdev->start_resync_timer.expires = jiffies + HZ; |
4438 | add_timer(&mdev->start_resync_timer); | 4489 | add_timer(&mdev->start_resync_timer); |
4439 | } | 4490 | } |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4a0f314086e5..9c5c84946b05 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -37,6 +37,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req | |||
37 | const int rw = bio_data_dir(bio); | 37 | const int rw = bio_data_dir(bio); |
38 | int cpu; | 38 | int cpu; |
39 | cpu = part_stat_lock(); | 39 | cpu = part_stat_lock(); |
40 | part_round_stats(cpu, &mdev->vdisk->part0); | ||
40 | part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); | 41 | part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); |
41 | part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); | 42 | part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); |
42 | part_inc_in_flight(&mdev->vdisk->part0, rw); | 43 | part_inc_in_flight(&mdev->vdisk->part0, rw); |
@@ -214,8 +215,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
214 | { | 215 | { |
215 | const unsigned long s = req->rq_state; | 216 | const unsigned long s = req->rq_state; |
216 | struct drbd_conf *mdev = req->mdev; | 217 | struct drbd_conf *mdev = req->mdev; |
217 | /* only WRITES may end up here without a master bio (on barrier ack) */ | 218 | int rw = req->rq_state & RQ_WRITE ? WRITE : READ; |
218 | int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; | ||
219 | 219 | ||
220 | /* we must not complete the master bio, while it is | 220 | /* we must not complete the master bio, while it is |
221 | * still being processed by _drbd_send_zc_bio (drbd_send_dblock) | 221 | * still being processed by _drbd_send_zc_bio (drbd_send_dblock) |
@@ -230,7 +230,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
230 | return; | 230 | return; |
231 | if (s & RQ_NET_PENDING) | 231 | if (s & RQ_NET_PENDING) |
232 | return; | 232 | return; |
233 | if (s & RQ_LOCAL_PENDING) | 233 | if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) |
234 | return; | 234 | return; |
235 | 235 | ||
236 | if (req->master_bio) { | 236 | if (req->master_bio) { |
@@ -277,6 +277,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
277 | req->master_bio = NULL; | 277 | req->master_bio = NULL; |
278 | } | 278 | } |
279 | 279 | ||
280 | if (s & RQ_LOCAL_PENDING) | ||
281 | return; | ||
282 | |||
280 | if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { | 283 | if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { |
281 | /* this is disconnected (local only) operation, | 284 | /* this is disconnected (local only) operation, |
282 | * or protocol C P_WRITE_ACK, | 285 | * or protocol C P_WRITE_ACK, |
@@ -429,7 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
429 | break; | 432 | break; |
430 | 433 | ||
431 | case completed_ok: | 434 | case completed_ok: |
432 | if (bio_data_dir(req->master_bio) == WRITE) | 435 | if (req->rq_state & RQ_WRITE) |
433 | mdev->writ_cnt += req->size>>9; | 436 | mdev->writ_cnt += req->size>>9; |
434 | else | 437 | else |
435 | mdev->read_cnt += req->size>>9; | 438 | mdev->read_cnt += req->size>>9; |
@@ -438,7 +441,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
438 | req->rq_state &= ~RQ_LOCAL_PENDING; | 441 | req->rq_state &= ~RQ_LOCAL_PENDING; |
439 | 442 | ||
440 | _req_may_be_done_not_susp(req, m); | 443 | _req_may_be_done_not_susp(req, m); |
441 | put_ldev(mdev); | 444 | break; |
445 | |||
446 | case abort_disk_io: | ||
447 | req->rq_state |= RQ_LOCAL_ABORTED; | ||
448 | if (req->rq_state & RQ_WRITE) | ||
449 | _req_may_be_done_not_susp(req, m); | ||
450 | else | ||
451 | goto goto_queue_for_net_read; | ||
442 | break; | 452 | break; |
443 | 453 | ||
444 | case write_completed_with_error: | 454 | case write_completed_with_error: |
@@ -447,7 +457,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
447 | 457 | ||
448 | __drbd_chk_io_error(mdev, false); | 458 | __drbd_chk_io_error(mdev, false); |
449 | _req_may_be_done_not_susp(req, m); | 459 | _req_may_be_done_not_susp(req, m); |
450 | put_ldev(mdev); | ||
451 | break; | 460 | break; |
452 | 461 | ||
453 | case read_ahead_completed_with_error: | 462 | case read_ahead_completed_with_error: |
@@ -455,7 +464,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
455 | req->rq_state |= RQ_LOCAL_COMPLETED; | 464 | req->rq_state |= RQ_LOCAL_COMPLETED; |
456 | req->rq_state &= ~RQ_LOCAL_PENDING; | 465 | req->rq_state &= ~RQ_LOCAL_PENDING; |
457 | _req_may_be_done_not_susp(req, m); | 466 | _req_may_be_done_not_susp(req, m); |
458 | put_ldev(mdev); | ||
459 | break; | 467 | break; |
460 | 468 | ||
461 | case read_completed_with_error: | 469 | case read_completed_with_error: |
@@ -467,7 +475,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
467 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); | 475 | D_ASSERT(!(req->rq_state & RQ_NET_MASK)); |
468 | 476 | ||
469 | __drbd_chk_io_error(mdev, false); | 477 | __drbd_chk_io_error(mdev, false); |
470 | put_ldev(mdev); | 478 | |
479 | goto_queue_for_net_read: | ||
471 | 480 | ||
472 | /* no point in retrying if there is no good remote data, | 481 | /* no point in retrying if there is no good remote data, |
473 | * or we have no connection. */ | 482 | * or we have no connection. */ |
@@ -556,10 +565,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
556 | drbd_queue_work(&mdev->data.work, &req->w); | 565 | drbd_queue_work(&mdev->data.work, &req->w); |
557 | break; | 566 | break; |
558 | 567 | ||
559 | case oos_handed_to_network: | 568 | case read_retry_remote_canceled: |
560 | /* actually the same */ | ||
561 | case send_canceled: | 569 | case send_canceled: |
562 | /* treat it the same */ | ||
563 | case send_failed: | 570 | case send_failed: |
564 | /* real cleanup will be done from tl_clear. just update flags | 571 | /* real cleanup will be done from tl_clear. just update flags |
565 | * so it is no longer marked as on the worker queue */ | 572 | * so it is no longer marked as on the worker queue */ |
@@ -589,17 +596,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
589 | } | 596 | } |
590 | req->rq_state &= ~RQ_NET_QUEUED; | 597 | req->rq_state &= ~RQ_NET_QUEUED; |
591 | req->rq_state |= RQ_NET_SENT; | 598 | req->rq_state |= RQ_NET_SENT; |
592 | /* because _drbd_send_zc_bio could sleep, and may want to | ||
593 | * dereference the bio even after the "write_acked_by_peer" and | ||
594 | * "completed_ok" events came in, once we return from | ||
595 | * _drbd_send_zc_bio (drbd_send_dblock), we have to check | ||
596 | * whether it is done already, and end it. */ | ||
597 | _req_may_be_done_not_susp(req, m); | 599 | _req_may_be_done_not_susp(req, m); |
598 | break; | 600 | break; |
599 | 601 | ||
600 | case read_retry_remote_canceled: | 602 | case oos_handed_to_network: |
603 | /* Was not set PENDING, no longer QUEUED, so is now DONE | ||
604 | * as far as this connection is concerned. */ | ||
601 | req->rq_state &= ~RQ_NET_QUEUED; | 605 | req->rq_state &= ~RQ_NET_QUEUED; |
602 | /* fall through, in case we raced with drbd_disconnect */ | 606 | req->rq_state |= RQ_NET_DONE; |
607 | _req_may_be_done_not_susp(req, m); | ||
608 | break; | ||
609 | |||
603 | case connection_lost_while_pending: | 610 | case connection_lost_while_pending: |
604 | /* transfer log cleanup after connection loss */ | 611 | /* transfer log cleanup after connection loss */ |
605 | /* assert something? */ | 612 | /* assert something? */ |
@@ -616,8 +623,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
616 | _req_may_be_done(req, m); /* Allowed while state.susp */ | 623 | _req_may_be_done(req, m); /* Allowed while state.susp */ |
617 | break; | 624 | break; |
618 | 625 | ||
619 | case write_acked_by_peer_and_sis: | ||
620 | req->rq_state |= RQ_NET_SIS; | ||
621 | case conflict_discarded_by_peer: | 626 | case conflict_discarded_by_peer: |
622 | /* for discarded conflicting writes of multiple primaries, | 627 | /* for discarded conflicting writes of multiple primaries, |
623 | * there is no need to keep anything in the tl, potential | 628 | * there is no need to keep anything in the tl, potential |
@@ -628,18 +633,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
628 | (unsigned long long)req->sector, req->size); | 633 | (unsigned long long)req->sector, req->size); |
629 | req->rq_state |= RQ_NET_DONE; | 634 | req->rq_state |= RQ_NET_DONE; |
630 | /* fall through */ | 635 | /* fall through */ |
636 | case write_acked_by_peer_and_sis: | ||
631 | case write_acked_by_peer: | 637 | case write_acked_by_peer: |
638 | if (what == write_acked_by_peer_and_sis) | ||
639 | req->rq_state |= RQ_NET_SIS; | ||
632 | /* protocol C; successfully written on peer. | 640 | /* protocol C; successfully written on peer. |
633 | * Nothing to do here. | 641 | * Nothing more to do here. |
634 | * We want to keep the tl in place for all protocols, to cater | 642 | * We want to keep the tl in place for all protocols, to cater |
635 | * for volatile write-back caches on lower level devices. | 643 | * for volatile write-back caches on lower level devices. */ |
636 | * | ||
637 | * A barrier request is expected to have forced all prior | ||
638 | * requests onto stable storage, so completion of a barrier | ||
639 | * request could set NET_DONE right here, and not wait for the | ||
640 | * P_BARRIER_ACK, but that is an unnecessary optimization. */ | ||
641 | 644 | ||
642 | /* this makes it effectively the same as for: */ | ||
643 | case recv_acked_by_peer: | 645 | case recv_acked_by_peer: |
644 | /* protocol B; pretends to be successfully written on peer. | 646 | /* protocol B; pretends to be successfully written on peer. |
645 | * see also notes above in handed_over_to_network about | 647 | * see also notes above in handed_over_to_network about |
@@ -773,6 +775,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns | |||
773 | int local, remote, send_oos = 0; | 775 | int local, remote, send_oos = 0; |
774 | int err = -EIO; | 776 | int err = -EIO; |
775 | int ret = 0; | 777 | int ret = 0; |
778 | union drbd_state s; | ||
776 | 779 | ||
777 | /* allocate outside of all locks; */ | 780 | /* allocate outside of all locks; */ |
778 | req = drbd_req_new(mdev, bio); | 781 | req = drbd_req_new(mdev, bio); |
@@ -834,8 +837,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns | |||
834 | drbd_al_begin_io(mdev, sector); | 837 | drbd_al_begin_io(mdev, sector); |
835 | } | 838 | } |
836 | 839 | ||
837 | remote = remote && drbd_should_do_remote(mdev->state); | 840 | s = mdev->state; |
838 | send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); | 841 | remote = remote && drbd_should_do_remote(s); |
842 | send_oos = rw == WRITE && drbd_should_send_oos(s); | ||
839 | D_ASSERT(!(remote && send_oos)); | 843 | D_ASSERT(!(remote && send_oos)); |
840 | 844 | ||
841 | if (!(local || remote) && !is_susp(mdev->state)) { | 845 | if (!(local || remote) && !is_susp(mdev->state)) { |
@@ -867,7 +871,7 @@ allocate_barrier: | |||
867 | 871 | ||
868 | if (is_susp(mdev->state)) { | 872 | if (is_susp(mdev->state)) { |
869 | /* If we got suspended, use the retry mechanism of | 873 | /* If we got suspended, use the retry mechanism of |
870 | generic_make_request() to restart processing of this | 874 | drbd_make_request() to restart processing of this |
871 | bio. In the next call to drbd_make_request | 875 | bio. In the next call to drbd_make_request |
872 | we sleep in inc_ap_bio() */ | 876 | we sleep in inc_ap_bio() */ |
873 | ret = 1; | 877 | ret = 1; |
@@ -1091,7 +1095,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) | |||
1091 | */ | 1095 | */ |
1092 | D_ASSERT(bio->bi_size > 0); | 1096 | D_ASSERT(bio->bi_size > 0); |
1093 | D_ASSERT((bio->bi_size & 0x1ff) == 0); | 1097 | D_ASSERT((bio->bi_size & 0x1ff) == 0); |
1094 | D_ASSERT(bio->bi_idx == 0); | ||
1095 | 1098 | ||
1096 | /* to make some things easier, force alignment of requests within the | 1099 | /* to make some things easier, force alignment of requests within the |
1097 | * granularity of our hash tables */ | 1100 | * granularity of our hash tables */ |
@@ -1099,8 +1102,9 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) | |||
1099 | e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; | 1102 | e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; |
1100 | 1103 | ||
1101 | if (likely(s_enr == e_enr)) { | 1104 | if (likely(s_enr == e_enr)) { |
1102 | inc_ap_bio(mdev, 1); | 1105 | do { |
1103 | drbd_make_request_common(mdev, bio, start_time); | 1106 | inc_ap_bio(mdev, 1); |
1107 | } while (drbd_make_request_common(mdev, bio, start_time)); | ||
1104 | return; | 1108 | return; |
1105 | } | 1109 | } |
1106 | 1110 | ||
@@ -1196,36 +1200,66 @@ void request_timer_fn(unsigned long data) | |||
1196 | struct drbd_conf *mdev = (struct drbd_conf *) data; | 1200 | struct drbd_conf *mdev = (struct drbd_conf *) data; |
1197 | struct drbd_request *req; /* oldest request */ | 1201 | struct drbd_request *req; /* oldest request */ |
1198 | struct list_head *le; | 1202 | struct list_head *le; |
1199 | unsigned long et = 0; /* effective timeout = ko_count * timeout */ | 1203 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ |
1204 | unsigned long now; | ||
1200 | 1205 | ||
1201 | if (get_net_conf(mdev)) { | 1206 | if (get_net_conf(mdev)) { |
1202 | et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; | 1207 | if (mdev->state.conn >= C_WF_REPORT_PARAMS) |
1208 | ent = mdev->net_conf->timeout*HZ/10 | ||
1209 | * mdev->net_conf->ko_count; | ||
1203 | put_net_conf(mdev); | 1210 | put_net_conf(mdev); |
1204 | } | 1211 | } |
1205 | if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) | 1212 | if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ |
1213 | dt = mdev->ldev->dc.disk_timeout * HZ / 10; | ||
1214 | put_ldev(mdev); | ||
1215 | } | ||
1216 | et = min_not_zero(dt, ent); | ||
1217 | |||
1218 | if (!et) | ||
1206 | return; /* Recurring timer stopped */ | 1219 | return; /* Recurring timer stopped */ |
1207 | 1220 | ||
1221 | now = jiffies; | ||
1222 | |||
1208 | spin_lock_irq(&mdev->req_lock); | 1223 | spin_lock_irq(&mdev->req_lock); |
1209 | le = &mdev->oldest_tle->requests; | 1224 | le = &mdev->oldest_tle->requests; |
1210 | if (list_empty(le)) { | 1225 | if (list_empty(le)) { |
1211 | spin_unlock_irq(&mdev->req_lock); | 1226 | spin_unlock_irq(&mdev->req_lock); |
1212 | mod_timer(&mdev->request_timer, jiffies + et); | 1227 | mod_timer(&mdev->request_timer, now + et); |
1213 | return; | 1228 | return; |
1214 | } | 1229 | } |
1215 | 1230 | ||
1216 | le = le->prev; | 1231 | le = le->prev; |
1217 | req = list_entry(le, struct drbd_request, tl_requests); | 1232 | req = list_entry(le, struct drbd_request, tl_requests); |
1218 | if (time_is_before_eq_jiffies(req->start_time + et)) { | ||
1219 | if (req->rq_state & RQ_NET_PENDING) { | ||
1220 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | ||
1221 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); | ||
1222 | } else { | ||
1223 | dev_warn(DEV, "Local backing block device frozen?\n"); | ||
1224 | mod_timer(&mdev->request_timer, jiffies + et); | ||
1225 | } | ||
1226 | } else { | ||
1227 | mod_timer(&mdev->request_timer, req->start_time + et); | ||
1228 | } | ||
1229 | 1233 | ||
1234 | /* The request is considered timed out, if | ||
1235 | * - we have some effective timeout from the configuration, | ||
1236 | * with above state restrictions applied, | ||
1237 | * - the oldest request is waiting for a response from the network | ||
1238 | * resp. the local disk, | ||
1239 | * - the oldest request is in fact older than the effective timeout, | ||
1240 | * - the connection was established (resp. disk was attached) | ||
1241 | * for longer than the timeout already. | ||
1242 | * Note that for 32bit jiffies and very stable connections/disks, | ||
1243 | * we may have a wrap around, which is catched by | ||
1244 | * !time_in_range(now, last_..._jif, last_..._jif + timeout). | ||
1245 | * | ||
1246 | * Side effect: once per 32bit wrap-around interval, which means every | ||
1247 | * ~198 days with 250 HZ, we have a window where the timeout would need | ||
1248 | * to expire twice (worst case) to become effective. Good enough. | ||
1249 | */ | ||
1250 | if (ent && req->rq_state & RQ_NET_PENDING && | ||
1251 | time_after(now, req->start_time + ent) && | ||
1252 | !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) { | ||
1253 | dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); | ||
1254 | _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); | ||
1255 | } | ||
1256 | if (dt && req->rq_state & RQ_LOCAL_PENDING && | ||
1257 | time_after(now, req->start_time + dt) && | ||
1258 | !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { | ||
1259 | dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); | ||
1260 | __drbd_chk_io_error(mdev, 1); | ||
1261 | } | ||
1262 | nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; | ||
1230 | spin_unlock_irq(&mdev->req_lock); | 1263 | spin_unlock_irq(&mdev->req_lock); |
1264 | mod_timer(&mdev->request_timer, nt); | ||
1231 | } | 1265 | } |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68a234a5fdc5..3d2111919486 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -105,6 +105,7 @@ enum drbd_req_event { | |||
105 | read_completed_with_error, | 105 | read_completed_with_error, |
106 | read_ahead_completed_with_error, | 106 | read_ahead_completed_with_error, |
107 | write_completed_with_error, | 107 | write_completed_with_error, |
108 | abort_disk_io, | ||
108 | completed_ok, | 109 | completed_ok, |
109 | resend, | 110 | resend, |
110 | fail_frozen_disk_io, | 111 | fail_frozen_disk_io, |
@@ -118,18 +119,21 @@ enum drbd_req_event { | |||
118 | * same time, so we should hold the request lock anyways. | 119 | * same time, so we should hold the request lock anyways. |
119 | */ | 120 | */ |
120 | enum drbd_req_state_bits { | 121 | enum drbd_req_state_bits { |
121 | /* 210 | 122 | /* 3210 |
122 | * 000: no local possible | 123 | * 0000: no local possible |
123 | * 001: to be submitted | 124 | * 0001: to be submitted |
124 | * UNUSED, we could map: 011: submitted, completion still pending | 125 | * UNUSED, we could map: 011: submitted, completion still pending |
125 | * 110: completed ok | 126 | * 0110: completed ok |
126 | * 010: completed with error | 127 | * 0010: completed with error |
128 | * 1001: Aborted (before completion) | ||
129 | * 1x10: Aborted and completed -> free | ||
127 | */ | 130 | */ |
128 | __RQ_LOCAL_PENDING, | 131 | __RQ_LOCAL_PENDING, |
129 | __RQ_LOCAL_COMPLETED, | 132 | __RQ_LOCAL_COMPLETED, |
130 | __RQ_LOCAL_OK, | 133 | __RQ_LOCAL_OK, |
134 | __RQ_LOCAL_ABORTED, | ||
131 | 135 | ||
132 | /* 76543 | 136 | /* 87654 |
133 | * 00000: no network possible | 137 | * 00000: no network possible |
134 | * 00001: to be send | 138 | * 00001: to be send |
135 | * 00011: to be send, on worker queue | 139 | * 00011: to be send, on worker queue |
@@ -199,8 +203,9 @@ enum drbd_req_state_bits { | |||
199 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) | 203 | #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) |
200 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) | 204 | #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) |
201 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) | 205 | #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) |
206 | #define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) | ||
202 | 207 | ||
203 | #define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ | 208 | #define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) |
204 | 209 | ||
205 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) | 210 | #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) |
206 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) | 211 | #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4d3e6f6213ba..620c70ff2231 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -70,11 +70,29 @@ rwlock_t global_state_lock; | |||
70 | void drbd_md_io_complete(struct bio *bio, int error) | 70 | void drbd_md_io_complete(struct bio *bio, int error) |
71 | { | 71 | { |
72 | struct drbd_md_io *md_io; | 72 | struct drbd_md_io *md_io; |
73 | struct drbd_conf *mdev; | ||
73 | 74 | ||
74 | md_io = (struct drbd_md_io *)bio->bi_private; | 75 | md_io = (struct drbd_md_io *)bio->bi_private; |
76 | mdev = container_of(md_io, struct drbd_conf, md_io); | ||
77 | |||
75 | md_io->error = error; | 78 | md_io->error = error; |
76 | 79 | ||
77 | complete(&md_io->event); | 80 | /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able |
81 | * to timeout on the lower level device, and eventually detach from it. | ||
82 | * If this io completion runs after that timeout expired, this | ||
83 | * drbd_md_put_buffer() may allow us to finally try and re-attach. | ||
84 | * During normal operation, this only puts that extra reference | ||
85 | * down to 1 again. | ||
86 | * Make sure we first drop the reference, and only then signal | ||
87 | * completion, or we may (in drbd_al_read_log()) cycle so fast into the | ||
88 | * next drbd_md_sync_page_io(), that we trigger the | ||
89 | * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there. | ||
90 | */ | ||
91 | drbd_md_put_buffer(mdev); | ||
92 | md_io->done = 1; | ||
93 | wake_up(&mdev->misc_wait); | ||
94 | bio_put(bio); | ||
95 | put_ldev(mdev); | ||
78 | } | 96 | } |
79 | 97 | ||
80 | /* reads on behalf of the partner, | 98 | /* reads on behalf of the partner, |
@@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error) | |||
226 | spin_lock_irqsave(&mdev->req_lock, flags); | 244 | spin_lock_irqsave(&mdev->req_lock, flags); |
227 | __req_mod(req, what, &m); | 245 | __req_mod(req, what, &m); |
228 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 246 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
247 | put_ldev(mdev); | ||
229 | 248 | ||
230 | if (m.bio) | 249 | if (m.bio) |
231 | complete_master_bio(mdev, &m); | 250 | complete_master_bio(mdev, &m); |
@@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * | |||
290 | sg_init_table(&sg, 1); | 309 | sg_init_table(&sg, 1); |
291 | crypto_hash_init(&desc); | 310 | crypto_hash_init(&desc); |
292 | 311 | ||
293 | __bio_for_each_segment(bvec, bio, i, 0) { | 312 | bio_for_each_segment(bvec, bio, i) { |
294 | sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); | 313 | sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); |
295 | crypto_hash_update(&desc, &sg, sg.length); | 314 | crypto_hash_update(&desc, &sg, sg.length); |
296 | } | 315 | } |
@@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
728 | } | 747 | } |
729 | 748 | ||
730 | drbd_start_resync(mdev, C_SYNC_SOURCE); | 749 | drbd_start_resync(mdev, C_SYNC_SOURCE); |
731 | clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); | 750 | clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags); |
732 | return 1; | 751 | return 1; |
733 | } | 752 | } |
734 | 753 | ||
@@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1519 | } | 1538 | } |
1520 | 1539 | ||
1521 | drbd_state_lock(mdev); | 1540 | drbd_state_lock(mdev); |
1522 | 1541 | write_lock_irq(&global_state_lock); | |
1523 | if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { | 1542 | if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { |
1543 | write_unlock_irq(&global_state_lock); | ||
1524 | drbd_state_unlock(mdev); | 1544 | drbd_state_unlock(mdev); |
1525 | return; | 1545 | return; |
1526 | } | 1546 | } |
1527 | 1547 | ||
1528 | write_lock_irq(&global_state_lock); | 1548 | ns.i = mdev->state.i; |
1529 | ns = mdev->state; | ||
1530 | 1549 | ||
1531 | ns.aftr_isp = !_drbd_may_sync_now(mdev); | 1550 | ns.aftr_isp = !_drbd_may_sync_now(mdev); |
1532 | 1551 | ||