diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-10-27 10:52:30 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-05-09 09:15:56 -0400 |
commit | f479ea06613514814449f28cba6488e31698e406 (patch) | |
tree | b7baebab348fe1deaa6454623564359797afe09b | |
parent | a2e9138197405a4c051630416ceebf98158e631d (diff) |
drbd: send intermediate state change results to the peer
DRBD state changes schedule after_state_ch() actions to a worker thread,
which decides on the old and new states of that change, whether to send
an informational state update packet (P_STATE) to the peer.
If it decides to drbd_send_state(), it would however always send the
_curent_ state, which, if a second state change happens before the
after_state_ch() of the first ran, may "fast-forward" the peer's view
about this node. In most cases that is harmless, but sometimes this can
confuse DRBD, for example into not actually starting a necessary resync
if you do a very tight detach/attach loop on a Connected Secondary.
Fix this by always sending the "new" state of the respective state
transition which scheduled this after_state_ch() work.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 53 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 4 |
4 files changed, 47 insertions, 16 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c7976a77dfba..31dee20f3411 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1229,8 +1229,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); | |||
1229 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | 1229 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); |
1230 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); | 1230 | extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); |
1231 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); | 1231 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); |
1232 | extern int _drbd_send_state(struct drbd_conf *mdev); | 1232 | extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); |
1233 | extern int drbd_send_state(struct drbd_conf *mdev); | 1233 | extern int drbd_send_current_state(struct drbd_conf *mdev); |
1234 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | 1234 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, |
1235 | enum drbd_packets cmd, struct p_header80 *h, | 1235 | enum drbd_packets cmd, struct p_header80 *h, |
1236 | size_t size, unsigned msg_flags); | 1236 | size_t size, unsigned msg_flags); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 64318d4ca9ec..3a5b4dec529f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -1487,7 +1487,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1487 | /* Do not change the order of the if above and the two below... */ | 1487 | /* Do not change the order of the if above and the two below... */ |
1488 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | 1488 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ |
1489 | drbd_send_uuids(mdev); | 1489 | drbd_send_uuids(mdev); |
1490 | drbd_send_state(mdev); | 1490 | drbd_send_state(mdev, ns); |
1491 | } | 1491 | } |
1492 | /* No point in queuing send_bitmap if we don't have a connection | 1492 | /* No point in queuing send_bitmap if we don't have a connection |
1493 | * anymore, so check also the _current_ state, not only the new state | 1493 | * anymore, so check also the _current_ state, not only the new state |
@@ -1552,14 +1552,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1552 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | 1552 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { |
1553 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ | 1553 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ |
1554 | drbd_send_uuids(mdev); | 1554 | drbd_send_uuids(mdev); |
1555 | drbd_send_state(mdev); | 1555 | drbd_send_state(mdev, ns); |
1556 | } | 1556 | } |
1557 | 1557 | ||
1558 | /* We want to pause/continue resync, tell peer. */ | 1558 | /* We want to pause/continue resync, tell peer. */ |
1559 | if (ns.conn >= C_CONNECTED && | 1559 | if (ns.conn >= C_CONNECTED && |
1560 | ((os.aftr_isp != ns.aftr_isp) || | 1560 | ((os.aftr_isp != ns.aftr_isp) || |
1561 | (os.user_isp != ns.user_isp))) | 1561 | (os.user_isp != ns.user_isp))) |
1562 | drbd_send_state(mdev); | 1562 | drbd_send_state(mdev, ns); |
1563 | 1563 | ||
1564 | /* In case one of the isp bits got set, suspend other devices. */ | 1564 | /* In case one of the isp bits got set, suspend other devices. */ |
1565 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | 1565 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && |
@@ -1569,10 +1569,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1569 | /* Make sure the peer gets informed about eventual state | 1569 | /* Make sure the peer gets informed about eventual state |
1570 | changes (ISP bits) while we were in WFReportParams. */ | 1570 | changes (ISP bits) while we were in WFReportParams. */ |
1571 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | 1571 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) |
1572 | drbd_send_state(mdev); | 1572 | drbd_send_state(mdev, ns); |
1573 | 1573 | ||
1574 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | 1574 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) |
1575 | drbd_send_state(mdev); | 1575 | drbd_send_state(mdev, ns); |
1576 | 1576 | ||
1577 | /* We are in the progress to start a full sync... */ | 1577 | /* We are in the progress to start a full sync... */ |
1578 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 1578 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
@@ -1612,7 +1612,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1612 | "ASSERT FAILED: disk is %s during detach\n", | 1612 | "ASSERT FAILED: disk is %s during detach\n", |
1613 | drbd_disk_str(mdev->state.disk)); | 1613 | drbd_disk_str(mdev->state.disk)); |
1614 | 1614 | ||
1615 | if (drbd_send_state(mdev)) | 1615 | if (drbd_send_state(mdev, ns)) |
1616 | dev_info(DEV, "Notified peer that I am detaching my disk\n"); | 1616 | dev_info(DEV, "Notified peer that I am detaching my disk\n"); |
1617 | 1617 | ||
1618 | drbd_rs_cancel_all(mdev); | 1618 | drbd_rs_cancel_all(mdev); |
@@ -1642,7 +1642,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1642 | mdev->rs_failed = 0; | 1642 | mdev->rs_failed = 0; |
1643 | atomic_set(&mdev->rs_pending_cnt, 0); | 1643 | atomic_set(&mdev->rs_pending_cnt, 0); |
1644 | 1644 | ||
1645 | if (drbd_send_state(mdev)) | 1645 | if (drbd_send_state(mdev, ns)) |
1646 | dev_info(DEV, "Notified peer that I'm now diskless.\n"); | 1646 | dev_info(DEV, "Notified peer that I'm now diskless.\n"); |
1647 | /* corresponding get_ldev in __drbd_set_state | 1647 | /* corresponding get_ldev in __drbd_set_state |
1648 | * this may finally trigger drbd_ldev_destroy. */ | 1648 | * this may finally trigger drbd_ldev_destroy. */ |
@@ -1651,7 +1651,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1651 | 1651 | ||
1652 | /* Notify peer that I had a local IO error, and did not detached.. */ | 1652 | /* Notify peer that I had a local IO error, and did not detached.. */ |
1653 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) | 1653 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) |
1654 | drbd_send_state(mdev); | 1654 | drbd_send_state(mdev, ns); |
1655 | 1655 | ||
1656 | /* Disks got bigger while they were detached */ | 1656 | /* Disks got bigger while they were detached */ |
1657 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | 1657 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && |
@@ -1669,7 +1669,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1669 | /* sync target done with resync. Explicitly notify peer, even though | 1669 | /* sync target done with resync. Explicitly notify peer, even though |
1670 | * it should (at least for non-empty resyncs) already know itself. */ | 1670 | * it should (at least for non-empty resyncs) already know itself. */ |
1671 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1671 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
1672 | drbd_send_state(mdev); | 1672 | drbd_send_state(mdev, ns); |
1673 | 1673 | ||
1674 | /* This triggers bitmap writeout of potentially still unwritten pages | 1674 | /* This triggers bitmap writeout of potentially still unwritten pages |
1675 | * if the resync finished cleanly, or aborted because of peer disk | 1675 | * if the resync finished cleanly, or aborted because of peer disk |
@@ -2191,10 +2191,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl | |||
2191 | } | 2191 | } |
2192 | 2192 | ||
2193 | /** | 2193 | /** |
2194 | * drbd_send_state() - Sends the drbd state to the peer | 2194 | * drbd_send_current_state() - Sends the drbd state to the peer |
2195 | * @mdev: DRBD device. | 2195 | * @mdev: DRBD device. |
2196 | */ | 2196 | */ |
2197 | int drbd_send_state(struct drbd_conf *mdev) | 2197 | int drbd_send_current_state(struct drbd_conf *mdev) |
2198 | { | 2198 | { |
2199 | struct socket *sock; | 2199 | struct socket *sock; |
2200 | struct p_state p; | 2200 | struct p_state p; |
@@ -2220,6 +2220,37 @@ int drbd_send_state(struct drbd_conf *mdev) | |||
2220 | return ok; | 2220 | return ok; |
2221 | } | 2221 | } |
2222 | 2222 | ||
2223 | /** | ||
2224 | * drbd_send_state() - After a state change, sends the new state to the peer | ||
2225 | * @mdev: DRBD device. | ||
2226 | * @state: the state to send, not necessarily the current state. | ||
2227 | * | ||
2228 | * Each state change queues an "after_state_ch" work, which will eventually | ||
2229 | * send the resulting new state to the peer. If more state changes happen | ||
2230 | * between queuing and processing of the after_state_ch work, we still | ||
2231 | * want to send each intermediary state in the order it occurred. | ||
2232 | */ | ||
2233 | int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) | ||
2234 | { | ||
2235 | struct socket *sock; | ||
2236 | struct p_state p; | ||
2237 | int ok = 0; | ||
2238 | |||
2239 | mutex_lock(&mdev->data.mutex); | ||
2240 | |||
2241 | p.state = cpu_to_be32(state.i); | ||
2242 | sock = mdev->data.socket; | ||
2243 | |||
2244 | if (likely(sock != NULL)) { | ||
2245 | ok = _drbd_send_cmd(mdev, sock, P_STATE, | ||
2246 | (struct p_header80 *)&p, sizeof(p), 0); | ||
2247 | } | ||
2248 | |||
2249 | mutex_unlock(&mdev->data.mutex); | ||
2250 | |||
2251 | return ok; | ||
2252 | } | ||
2253 | |||
2223 | int drbd_send_state_req(struct drbd_conf *mdev, | 2254 | int drbd_send_state_req(struct drbd_conf *mdev, |
2224 | union drbd_state mask, union drbd_state val) | 2255 | union drbd_state mask, union drbd_state val) |
2225 | { | 2256 | { |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 00a82ab7ab98..1bbbad302ae7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) | |||
432 | /* if this was forced, we should consider sync */ | 432 | /* if this was forced, we should consider sync */ |
433 | if (forced) | 433 | if (forced) |
434 | drbd_send_uuids(mdev); | 434 | drbd_send_uuids(mdev); |
435 | drbd_send_state(mdev); | 435 | drbd_send_current_state(mdev); |
436 | } | 436 | } |
437 | 437 | ||
438 | drbd_md_sync(mdev); | 438 | drbd_md_sync(mdev); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1a48e02b83bc..f0d86cb300cf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -899,7 +899,7 @@ retry: | |||
899 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 899 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
900 | drbd_send_sizes(mdev, 0, 0); | 900 | drbd_send_sizes(mdev, 0, 0); |
901 | drbd_send_uuids(mdev); | 901 | drbd_send_uuids(mdev); |
902 | drbd_send_state(mdev); | 902 | drbd_send_current_state(mdev); |
903 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); | 903 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); |
904 | clear_bit(RESIZE_PENDING, &mdev->flags); | 904 | clear_bit(RESIZE_PENDING, &mdev->flags); |
905 | 905 | ||
@@ -3294,7 +3294,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned | |||
3294 | /* Nowadays only used when forcing a node into primary role and | 3294 | /* Nowadays only used when forcing a node into primary role and |
3295 | setting its disk to UpToDate with that */ | 3295 | setting its disk to UpToDate with that */ |
3296 | drbd_send_uuids(mdev); | 3296 | drbd_send_uuids(mdev); |
3297 | drbd_send_state(mdev); | 3297 | drbd_send_current_state(mdev); |
3298 | } | 3298 | } |
3299 | } | 3299 | } |
3300 | 3300 | ||