aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Graf <tgraf@infradead.org>2011-07-06 20:28:35 -0400
committerDavid S. Miller <davem@davemloft.net>2011-07-07 17:08:44 -0400
commitf8d9605243280f1870dd2c6c37a735b925c15f3c (patch)
tree2d6a3ce33c503bce8fca71489d4c4dc266579469
parent31cb852809c86541c817538c98003678546dfa58 (diff)
sctp: Enforce retransmission limit during shutdown
When initiating a graceful shutdown while having data chunks on the retransmission queue with a peer which is in zero window mode the shutdown is never completed because the retransmission error count is reset periodically by the following two rules: - Do not timeout association while doing zero window probe. - Reset overall error count when a heartbeat request has been acknowledged. The graceful shutdown will wait for all outstanding TSN to be acknowledged before sending the SHUTDOWN request. This never happens due to the peer's zero window not acknowledging the continuously retransmitted data chunks. Although the error counter is incremented for each failed retransmission, the receiving of the SACK announcing the zero window clears the error count again immediately. Also heartbeat requests continue to be sent periodically. The peer acknowledges these requests causing the error counter to be reset as well. This patch changes behaviour to only reset the overall error counter for the above rules while not in shutdown. After reaching the maximum number of retransmission attempts, the T5 shutdown guard timer is scheduled to give the receiver some additional time to recover. The timer is stopped as soon as the receiver acknowledges any data. The issue can be easily reproduced by establishing a sctp association over the loopback device, constantly queueing data at the sender while not reading any at the receiver. Wait for the window to reach zero, then initiate a shutdown by killing both processes simultaneously. The association will never be freed and the chunks on the retransmission queue will be retransmitted indefinitely. Signed-off-by: Thomas Graf <tgraf@infradead.org> Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sctp/command.h1
-rw-r--r--net/sctp/outqueue.c20
-rw-r--r--net/sctp/sm_sideeffect.c20
-rw-r--r--net/sctp/sm_statefuns.c32
-rw-r--r--net/sctp/sm_statetable.c2
5 files changed, 62 insertions, 13 deletions
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index dd6847e5d6e4..6506458ccd33 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -63,6 +63,7 @@ typedef enum {
63 SCTP_CMD_ECN_ECNE, /* Do delayed ECNE processing. */ 63 SCTP_CMD_ECN_ECNE, /* Do delayed ECNE processing. */
64 SCTP_CMD_ECN_CWR, /* Do delayed CWR processing. */ 64 SCTP_CMD_ECN_CWR, /* Do delayed CWR processing. */
65 SCTP_CMD_TIMER_START, /* Start a timer. */ 65 SCTP_CMD_TIMER_START, /* Start a timer. */
66 SCTP_CMD_TIMER_START_ONCE, /* Start a timer once */
66 SCTP_CMD_TIMER_RESTART, /* Restart a timer. */ 67 SCTP_CMD_TIMER_RESTART, /* Restart a timer. */
67 SCTP_CMD_TIMER_STOP, /* Stop a timer. */ 68 SCTP_CMD_TIMER_STOP, /* Stop a timer. */
68 SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */ 69 SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1c88c8911dc5..d03682109b7a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1582,6 +1582,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1582#endif /* SCTP_DEBUG */ 1582#endif /* SCTP_DEBUG */
1583 if (transport) { 1583 if (transport) {
1584 if (bytes_acked) { 1584 if (bytes_acked) {
1585 struct sctp_association *asoc = transport->asoc;
1586
1585 /* We may have counted DATA that was migrated 1587 /* We may have counted DATA that was migrated
1586 * to this transport due to DEL-IP operation. 1588 * to this transport due to DEL-IP operation.
1587 * Subtract those bytes, since the were never 1589 * Subtract those bytes, since the were never
@@ -1600,6 +1602,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1600 transport->error_count = 0; 1602 transport->error_count = 0;
1601 transport->asoc->overall_error_count = 0; 1603 transport->asoc->overall_error_count = 0;
1602 1604
1605 /*
1606 * While in SHUTDOWN PENDING, we may have started
1607 * the T5 shutdown guard timer after reaching the
1608 * retransmission limit. Stop that timer as soon
1609 * as the receiver acknowledged any data.
1610 */
1611 if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING &&
1612 del_timer(&asoc->timers
1613 [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]))
1614 sctp_association_put(asoc);
1615
1603 /* Mark the destination transport address as 1616 /* Mark the destination transport address as
1604 * active if it is not so marked. 1617 * active if it is not so marked.
1605 */ 1618 */
@@ -1629,10 +1642,15 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1629 * A sender is doing zero window probing when the 1642 * A sender is doing zero window probing when the
1630 * receiver's advertised window is zero, and there is 1643 * receiver's advertised window is zero, and there is
1631 * only one data chunk in flight to the receiver. 1644 * only one data chunk in flight to the receiver.
1645 *
1646 * Allow the association to timeout while in SHUTDOWN
1647 * PENDING or SHUTDOWN RECEIVED in case the receiver
1648 * stays in zero window mode forever.
1632 */ 1649 */
1633 if (!q->asoc->peer.rwnd && 1650 if (!q->asoc->peer.rwnd &&
1634 !list_empty(&tlist) && 1651 !list_empty(&tlist) &&
1635 (sack_ctsn+2 == q->asoc->next_tsn)) { 1652 (sack_ctsn+2 == q->asoc->next_tsn) &&
1653 q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) {
1636 SCTP_DEBUG_PRINTK("%s: SACK received for zero " 1654 SCTP_DEBUG_PRINTK("%s: SACK received for zero "
1637 "window probe: %u\n", 1655 "window probe: %u\n",
1638 __func__, sack_ctsn); 1656 __func__, sack_ctsn);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 534c2e5feb05..6e0f88295aaf 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -670,10 +670,19 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
670 /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the 670 /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
671 * HEARTBEAT should clear the error counter of the destination 671 * HEARTBEAT should clear the error counter of the destination
672 * transport address to which the HEARTBEAT was sent. 672 * transport address to which the HEARTBEAT was sent.
673 * The association's overall error count is also cleared.
674 */ 673 */
675 t->error_count = 0; 674 t->error_count = 0;
676 t->asoc->overall_error_count = 0; 675
676 /*
677 * Although RFC4960 specifies that the overall error count must
678 * be cleared when a HEARTBEAT ACK is received, we make an
679 * exception while in SHUTDOWN PENDING. If the peer keeps its
680 * window shut forever, we may never be able to transmit our
681 * outstanding data and rely on the retransmission limit be reached
682 * to shutdown the association.
683 */
684 if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
685 t->asoc->overall_error_count = 0;
677 686
678 /* Clear the hb_sent flag to signal that we had a good 687 /* Clear the hb_sent flag to signal that we had a good
679 * acknowledgement. 688 * acknowledgement.
@@ -1437,6 +1446,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1437 sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); 1446 sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
1438 break; 1447 break;
1439 1448
1449 case SCTP_CMD_TIMER_START_ONCE:
1450 timer = &asoc->timers[cmd->obj.to];
1451
1452 if (timer_pending(timer))
1453 break;
1454 /* fall through */
1455
1440 case SCTP_CMD_TIMER_START: 1456 case SCTP_CMD_TIMER_START:
1441 timer = &asoc->timers[cmd->obj.to]; 1457 timer = &asoc->timers[cmd->obj.to];
1442 timeout = asoc->timeouts[cmd->obj.to]; 1458 timeout = asoc->timeouts[cmd->obj.to];
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a297283154d5..246117142b5c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5154,7 +5154,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
5154 * The sender of the SHUTDOWN MAY also start an overall guard timer 5154 * The sender of the SHUTDOWN MAY also start an overall guard timer
5155 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. 5155 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
5156 */ 5156 */
5157 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, 5157 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
5158 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 5158 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
5159 5159
5160 if (asoc->autoclose) 5160 if (asoc->autoclose)
@@ -5299,14 +5299,28 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
5299 SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); 5299 SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
5300 5300
5301 if (asoc->overall_error_count >= asoc->max_retrans) { 5301 if (asoc->overall_error_count >= asoc->max_retrans) {
5302 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, 5302 if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
5303 SCTP_ERROR(ETIMEDOUT)); 5303 /*
5304 /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ 5304 * We are here likely because the receiver had its rwnd
5305 sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, 5305 * closed for a while and we have not been able to
5306 SCTP_PERR(SCTP_ERROR_NO_ERROR)); 5306 * transmit the locally queued data within the maximum
5307 SCTP_INC_STATS(SCTP_MIB_ABORTEDS); 5307 * retransmission attempts limit. Start the T5
5308 SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); 5308 * shutdown guard timer to give the receiver one last
5309 return SCTP_DISPOSITION_DELETE_TCB; 5309 * chance and some additional time to recover before
5310 * aborting.
5311 */
5312 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE,
5313 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
5314 } else {
5315 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
5316 SCTP_ERROR(ETIMEDOUT));
5317 /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
5318 sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
5319 SCTP_PERR(SCTP_ERROR_NO_ERROR));
5320 SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
5321 SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
5322 return SCTP_DISPOSITION_DELETE_TCB;
5323 }
5310 } 5324 }
5311 5325
5312 /* E1) For the destination address for which the timer 5326 /* E1) For the destination address for which the timer
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 0338dc6fdc9d..7c211a7f90f4 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -827,7 +827,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
827 /* SCTP_STATE_ESTABLISHED */ \ 827 /* SCTP_STATE_ESTABLISHED */ \
828 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 828 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
829 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 829 /* SCTP_STATE_SHUTDOWN_PENDING */ \
830 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ 830 TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
831 /* SCTP_STATE_SHUTDOWN_SENT */ \ 831 /* SCTP_STATE_SHUTDOWN_SENT */ \
832 TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ 832 TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
833 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 833 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \