From 5591d286281fdfb57914f5fad3ca001d44ce8fc6 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Limit feature negotiation to connection setup phase This patch starts the new implementation of feature negotiation: 1. Although it is theoretically possible to perform feature negotiation at any time (and RFC 4340 supports this), in practice this is prohibitively complex, as it requires to put traffic on hold for each new negotiation. 2. As a byproduct of restricting feature negotiation to connection setup, the feature-negotiation retransmit timer is no longer required. This part is now mapped onto the protocol-level retransmission. Details indicating why timers are no longer needed can be found on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/feature_negotiation/\ implementation_notes.html This patch disables anytime negotiation, subsequent patches work out full feature negotiation support for connection setup. Signed-off-by: Gerrit Renker --- net/dccp/timer.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net/dccp/timer.c') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 54b3c7e9e016..162d1e683c39 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - /* retransmit timer is used for feature negotiation throughout - * connection. In this case, no packet is re-transmitted, but rather an - * ack is generated and pending changes are placed into its options. - */ - if (sk->sk_send_head == NULL) { - dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); - if (sk->sk_state == DCCP_OPEN) - dccp_send_ack(sk); - goto backoff; - } - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. @@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk) return; } -backoff: icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); -- cgit v1.2.2 From e7937772d7a2b0127cc4cbc67bc594e139fdaf63 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Extend CCID packet dequeueing interface This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The interface can also be used to support other CCIDs. The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. If the tasklet finds that the socket is locked, it re-schedules the tasklet function (not the tasklet) after one jiffy. Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets). Signed-off-by: Gerrit Renker --- net/dccp/timer.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'net/dccp/timer.c') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 162d1e683c39..9369aca4b0e9 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -237,32 +237,35 @@ out: sock_put(sk); } -/* Transmit-delay timer: used by the CCIDs to delay actual send time */ -static void dccp_write_xmit_timer(unsigned long data) +/** + * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface + * See the comments above %ccid_dequeueing_decision for supported modes. + */ +static void dccp_write_xmitlet(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) - sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); + sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); else dccp_write_xmit(sk, 0); bh_unlock_sock(sk); - sock_put(sk); } -static void dccp_init_write_xmit_timer(struct sock *sk) +static void dccp_write_xmit_timer(unsigned long data) { - struct dccp_sock *dp = dccp_sk(sk); - - setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, - (unsigned long)sk); + dccp_write_xmitlet(data); + sock_put((struct sock *)data); } void dccp_init_xmit_timers(struct sock *sk) { - dccp_init_write_xmit_timer(sk); + struct dccp_sock *dp = dccp_sk(sk); + + tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); + setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, + (unsigned long)sk); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } -- cgit v1.2.2 From 146993cf5174472644ed11bd5fb539f0af8bfa49 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Refine the wait-for-ccid mechanism This extends the existing wait-for-ccid routine so that it may be used with different types of CCID. It further addresses the problems listed below. The code looks if the write queue is non-empty and grants the TX CCID up to `timeout' jiffies to drain the queue. It will instead purge that queue if * the delay suggested by the CCID exceeds the time budget; * a socket error occurred while waiting for the CCID; * there is a signal pending (eg. annoyed user pressed Control-C); * the CCID does not support delays (we don't know how long it will take). D e t a i l s [can be removed] ------------------------------- DCCP's sending mechanism functions a bit like non-blocking I/O: dccp_sendmsg() will enqueue up to net.dccp.default.tx_qlen packets (default=5), without waiting for them to be released to the network. Rate-based CCIDs, such as CCID3/4, can impose sending delays of up to maximally 64 seconds (t_mbi in RFC 3448). Hence the write queue may still contain packets when the application closes. Since the write queue is congestion-controlled by the CCID, draining the queue is also under control of the CCID. There are several problems that needed to be addressed: 1) The queue-drain mechanism only works with rate-based CCIDs. If CCID2 for example has a full TX queue and becomes network-limited just as the application wants to close, then waiting for CCID2 to become unblocked could lead to an indefinite delay (i.e., application "hangs"). 2) Since each TX CCID in turn uses a feedback mechanism, there may be changes in its sending policy while the queue is being drained. This can lead to further delays during which the application will not be able to terminate. 3) The minimum wait time for CCID3/4 can be expected to be the queue length times the current inter-packet delay. For example if tx_qlen=100 and a delay of 15 ms is used for each packet, then the application would have to wait for a minimum of 1.5 seconds before being allowed to exit. 4) There is no way for the user/application to control this behaviour. It would be good to use the timeout argument of dccp_close() as an upper bound. Then the maximum time that an application is willing to wait for its CCIDs to can be set via the SO_LINGER option. These problems are addressed by giving the CCID a grace period of up to the `timeout' value. The wait-for-ccid function is, as before, used when the application (a) has read all the data in its receive buffer and (b) if SO_LINGER was set with a non-zero linger time, or (c) the socket is either in the OPEN (active close) or in the PASSIVE_CLOSEREQ state (client application closes after receiving CloseReq). In addition, there is a catch-all case by calling __skb_queue_purge() after waiting for the CCID. This is necessary since the write queue may still have data when (a) the host has been passively-closed, (b) abnormal termination (unread data, zero linger time), (c) wait-for-ccid could not finish within the given time limit. Signed-off-by: Gerrit Renker --- net/dccp/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp/timer.c') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 9369aca4b0e9..e02d5a94f4c0 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -249,7 +249,7 @@ static void dccp_write_xmitlet(unsigned long data) if (sock_owned_by_user(sk)) sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); else - dccp_write_xmit(sk, 0); + dccp_write_xmit(sk); bh_unlock_sock(sk); } -- cgit v1.2.2 From f76fd327a8b32d3ad5b51639faf6f54d18be0981 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-3: Runtime verification of timer resolution The DCCP base time resolution is 10 microseconds (RFC 4340, 13.1 ... 13.3). Using a timer with a lower resolution was found to trigger the following bug warnings/problems on high-speed networks (e.g. local loopback): * RTT samples are rounded down to 0 if below resolution; * in some cases, negative RTT samples were observed; * the CCID-3 feedback timer complains that the feedback interval is 0, since the feedback interval is in the order of 1 RTT or less and RTT measurement rounded this down to 0; On an Intel computer this will for instance happen when using a boot-time parameter of "clocksource=jiffies". The following system log messages were observed: 11:24:00 kernel: BUG: delta (0) <= 0 at ccid3_hc_rx_send_feedback() 11:26:12 kernel: BUG: delta (0) <= 0 at ccid3_hc_rx_send_feedback() 11:26:30 kernel: dccp_sample_rtt: unusable RTT sample 0, using min 11:26:30 last message repeated 5 times This patch defines a global constant for the time resolution, adds this in timer.c, and checks the available clock resolution at CCID-3 module load time. When the resolution is worse than 10 microseconds, module loading exits with a message "socket type not supported". Signed-off-by: Gerrit Renker --- net/dccp/timer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/dccp/timer.c') diff --git a/net/dccp/timer.c b/net/dccp/timer.c index e02d5a94f4c0..16359e29e7f5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -281,8 +281,7 @@ u32 dccp_timestamp(void) { s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); - do_div(delta, 10); - return delta; + return div_u64(delta, DCCP_TIME_RESOLUTION); } EXPORT_SYMBOL_GPL(dccp_timestamp); -- cgit v1.2.2