aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-03-09 16:44:55 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-05-24 04:03:30 -0400
commitf36af18c7b4ea1ba333c09b606bb4a7e5af66b4d (patch)
tree6983acfaa859fee028b2927f8f520a27e1785c4d
parent53ea433145d9a56c7ad5e69f21f5662053e00e84 (diff)
drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int
If there is no replication traffic within the idle timeout (ping-int seconds), DRBD will send a P_PING, and adjust the timeout to ping-timeout. If there is no P_PING_ACK received within this ping-timeout, DRBD finally drops the connection, and tries to re-establish it. To decide which timeout was active, we compared the current timeout with the ping-timeout, and dropped the connection, if that was the case. By default, ping-int is 10 seconds, ping-timeout is 500 ms. Unfortunately, if you configure ping-timeout to be the same as ping-int, expiry of the idle-timeout had been mistaken for a missing ping ack, and caused an immediate reconnection attempt. Fix: Allow both timeouts to be equal, use a local variable to store which timeout is active. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_receiver.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fd26666c0b08..0b17d426c32b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4554,6 +4554,7 @@ int drbd_asender(struct drbd_thread *thi)
4554 int received = 0; 4554 int received = 0;
4555 int expect = sizeof(struct p_header80); 4555 int expect = sizeof(struct p_header80);
4556 int empty; 4556 int empty;
4557 int ping_timeout_active = 0;
4557 4558
4558 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); 4559 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
4559 4560
@@ -4566,6 +4567,7 @@ int drbd_asender(struct drbd_thread *thi)
4566 ERR_IF(!drbd_send_ping(mdev)) goto reconnect; 4567 ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
4567 mdev->meta.socket->sk->sk_rcvtimeo = 4568 mdev->meta.socket->sk->sk_rcvtimeo =
4568 mdev->net_conf->ping_timeo*HZ/10; 4569 mdev->net_conf->ping_timeo*HZ/10;
4570 ping_timeout_active = 1;
4569 } 4571 }
4570 4572
4571 /* conditionally cork; 4573 /* conditionally cork;
@@ -4620,8 +4622,7 @@ int drbd_asender(struct drbd_thread *thi)
4620 dev_err(DEV, "meta connection shut down by peer.\n"); 4622 dev_err(DEV, "meta connection shut down by peer.\n");
4621 goto reconnect; 4623 goto reconnect;
4622 } else if (rv == -EAGAIN) { 4624 } else if (rv == -EAGAIN) {
4623 if (mdev->meta.socket->sk->sk_rcvtimeo == 4625 if (ping_timeout_active) {
4624 mdev->net_conf->ping_timeo*HZ/10) {
4625 dev_err(DEV, "PingAck did not arrive in time.\n"); 4626 dev_err(DEV, "PingAck did not arrive in time.\n");
4626 goto reconnect; 4627 goto reconnect;
4627 } 4628 }
@@ -4660,6 +4661,11 @@ int drbd_asender(struct drbd_thread *thi)
4660 if (!cmd->process(mdev, h)) 4661 if (!cmd->process(mdev, h))
4661 goto reconnect; 4662 goto reconnect;
4662 4663
4664 /* the idle_timeout (ping-int)
4665 * has been restored in got_PingAck() */
4666 if (cmd == get_asender_cmd(P_PING_ACK))
4667 ping_timeout_active = 0;
4668
4663 buf = h; 4669 buf = h;
4664 received = 0; 4670 received = 0;
4665 expect = sizeof(struct p_header80); 4671 expect = sizeof(struct p_header80);