diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-03-09 16:44:55 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-05-24 04:03:30 -0400 |
commit | f36af18c7b4ea1ba333c09b606bb4a7e5af66b4d (patch) | |
tree | 6983acfaa859fee028b2927f8f520a27e1785c4d /drivers/block/drbd/drbd_receiver.c | |
parent | 53ea433145d9a56c7ad5e69f21f5662053e00e84 (diff) |
drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int
If there is no replication traffic within the idle timeout
(ping-int seconds), DRBD will send a P_PING,
and adjust the timeout to ping-timeout.
If there is no P_PING_ACK received within this ping-timeout,
DRBD finally drops the connection, and tries to re-establish it.
To decide which timeout was active, we compared the current timeout
with the ping-timeout, and dropped the connection, if that was the case.
By default, ping-int is 10 seconds, ping-timeout is 500 ms.
Unfortunately, if you configure ping-timeout to be the same as ping-int,
expiry of the idle-timeout had been mistaken for a missing ping ack,
and caused an immediate reconnection attempt.
Fix:
Allow both timeouts to be equal, use a local variable
to store which timeout is active.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd26666c0b08..0b17d426c32b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -4554,6 +4554,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4554 | int received = 0; | 4554 | int received = 0; |
4555 | int expect = sizeof(struct p_header80); | 4555 | int expect = sizeof(struct p_header80); |
4556 | int empty; | 4556 | int empty; |
4557 | int ping_timeout_active = 0; | ||
4557 | 4558 | ||
4558 | sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); | 4559 | sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); |
4559 | 4560 | ||
@@ -4566,6 +4567,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4566 | ERR_IF(!drbd_send_ping(mdev)) goto reconnect; | 4567 | ERR_IF(!drbd_send_ping(mdev)) goto reconnect; |
4567 | mdev->meta.socket->sk->sk_rcvtimeo = | 4568 | mdev->meta.socket->sk->sk_rcvtimeo = |
4568 | mdev->net_conf->ping_timeo*HZ/10; | 4569 | mdev->net_conf->ping_timeo*HZ/10; |
4570 | ping_timeout_active = 1; | ||
4569 | } | 4571 | } |
4570 | 4572 | ||
4571 | /* conditionally cork; | 4573 | /* conditionally cork; |
@@ -4620,8 +4622,7 @@ int drbd_asender(struct drbd_thread *thi) | |||
4620 | dev_err(DEV, "meta connection shut down by peer.\n"); | 4622 | dev_err(DEV, "meta connection shut down by peer.\n"); |
4621 | goto reconnect; | 4623 | goto reconnect; |
4622 | } else if (rv == -EAGAIN) { | 4624 | } else if (rv == -EAGAIN) { |
4623 | if (mdev->meta.socket->sk->sk_rcvtimeo == | 4625 | if (ping_timeout_active) { |
4624 | mdev->net_conf->ping_timeo*HZ/10) { | ||
4625 | dev_err(DEV, "PingAck did not arrive in time.\n"); | 4626 | dev_err(DEV, "PingAck did not arrive in time.\n"); |
4626 | goto reconnect; | 4627 | goto reconnect; |
4627 | } | 4628 | } |
@@ -4660,6 +4661,11 @@ int drbd_asender(struct drbd_thread *thi) | |||
4660 | if (!cmd->process(mdev, h)) | 4661 | if (!cmd->process(mdev, h)) |
4661 | goto reconnect; | 4662 | goto reconnect; |
4662 | 4663 | ||
4664 | /* the idle_timeout (ping-int) | ||
4665 | * has been restored in got_PingAck() */ | ||
4666 | if (cmd == get_asender_cmd(P_PING_ACK)) | ||
4667 | ping_timeout_active = 0; | ||
4668 | |||
4663 | buf = h; | 4669 | buf = h; |
4664 | received = 0; | 4670 | received = 0; |
4665 | expect = sizeof(struct p_header80); | 4671 | expect = sizeof(struct p_header80); |