aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:38 -0400
commit146993cf5174472644ed11bd5fb539f0af8bfa49 (patch)
treeb2c5343ad610fe113425a3663f0dc3ddb478911b /net
parente7937772d7a2b0127cc4cbc67bc594e139fdaf63 (diff)
dccp: Refine the wait-for-ccid mechanism
This extends the existing wait-for-ccid routine so that it may be used with different types of CCID. It further addresses the problems listed below. The code looks if the write queue is non-empty and grants the TX CCID up to `timeout' jiffies to drain the queue. It will instead purge that queue if * the delay suggested by the CCID exceeds the time budget; * a socket error occurred while waiting for the CCID; * there is a signal pending (eg. annoyed user pressed Control-C); * the CCID does not support delays (we don't know how long it will take). D e t a i l s [can be removed] ------------------------------- DCCP's sending mechanism functions a bit like non-blocking I/O: dccp_sendmsg() will enqueue up to net.dccp.default.tx_qlen packets (default=5), without waiting for them to be released to the network. Rate-based CCIDs, such as CCID3/4, can impose sending delays of up to maximally 64 seconds (t_mbi in RFC 3448). Hence the write queue may still contain packets when the application closes. Since the write queue is congestion-controlled by the CCID, draining the queue is also under control of the CCID. There are several problems that needed to be addressed: 1) The queue-drain mechanism only works with rate-based CCIDs. If CCID2 for example has a full TX queue and becomes network-limited just as the application wants to close, then waiting for CCID2 to become unblocked could lead to an indefinite delay (i.e., application "hangs"). 2) Since each TX CCID in turn uses a feedback mechanism, there may be changes in its sending policy while the queue is being drained. This can lead to further delays during which the application will not be able to terminate. 3) The minimum wait time for CCID3/4 can be expected to be the queue length times the current inter-packet delay. For example if tx_qlen=100 and a delay of 15 ms is used for each packet, then the application would have to wait for a minimum of 1.5 seconds before being allowed to exit. 4) There is no way for the user/application to control this behaviour. It would be good to use the timeout argument of dccp_close() as an upper bound. Then the maximum time that an application is willing to wait for its CCIDs to can be set via the SO_LINGER option. These problems are addressed by giving the CCID a grace period of up to the `timeout' value. The wait-for-ccid function is, as before, used when the application (a) has read all the data in its receive buffer and (b) if SO_LINGER was set with a non-zero linger time, or (c) the socket is either in the OPEN (active close) or in the PASSIVE_CLOSEREQ state (client application closes after receiving CloseReq). In addition, there is a catch-all case by calling __skb_queue_purge() after waiting for the CCID. This is necessary since the write queue may still have data when (a) the host has been passively-closed, (b) abnormal termination (unread data, zero linger time), (c) wait-for-ccid could not finish within the given time limit. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net')
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/output.c115
-rw-r--r--net/dccp/proto.c15
-rw-r--r--net/dccp/timer.c2
4 files changed, 82 insertions, 53 deletions
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1e65378eea3f..74c90cd27677 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -234,8 +234,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
234extern void dccp_send_sync(struct sock *sk, const u64 seq, 234extern void dccp_send_sync(struct sock *sk, const u64 seq,
235 const enum dccp_pkt_type pkt_type); 235 const enum dccp_pkt_type pkt_type);
236 236
237extern void dccp_write_xmit(struct sock *sk, int block); 237extern void dccp_write_xmit(struct sock *sk);
238extern void dccp_write_space(struct sock *sk); 238extern void dccp_write_space(struct sock *sk);
239extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
239 240
240extern void dccp_init_xmit_timers(struct sock *sk); 241extern void dccp_init_xmit_timers(struct sock *sk);
241static inline void dccp_clear_xmit_timers(struct sock *sk) 242static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 9afd58e39e23..39056dc61355 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -206,49 +206,29 @@ void dccp_write_space(struct sock *sk)
206} 206}
207 207
208/** 208/**
209 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 209 * dccp_wait_for_ccid - Await CCID send permission
210 * @sk: socket to wait for 210 * @sk: socket to wait for
211 * @skb: current skb to pass on for waiting 211 * @delay: timeout in jiffies
212 * @delay: sleep timeout in milliseconds (> 0) 212 * This is used by CCIDs which need to delay the send time in process context.
213 * This function is called by default when the socket is closed, and
214 * when a non-zero linger time is set on the socket. For consistency
215 */ 213 */
216static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 214static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
217{ 215{
218 struct dccp_sock *dp = dccp_sk(sk);
219 DEFINE_WAIT(wait); 216 DEFINE_WAIT(wait);
220 unsigned long jiffdelay; 217 long remaining;
221 int rc;
222
223 do {
224 dccp_pr_debug("delayed send by %d msec\n", delay);
225 jiffdelay = msecs_to_jiffies(delay);
226
227 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
228 218
229 sk->sk_write_pending++; 219 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
230 release_sock(sk); 220 sk->sk_write_pending++;
231 schedule_timeout(jiffdelay); 221 release_sock(sk);
232 lock_sock(sk);
233 sk->sk_write_pending--;
234 222
235 if (sk->sk_err) 223 remaining = schedule_timeout(delay);
236 goto do_error;
237 if (signal_pending(current))
238 goto do_interrupted;
239 224
240 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 225 lock_sock(sk);
241 } while ((delay = rc) > 0); 226 sk->sk_write_pending--;
242out:
243 finish_wait(sk->sk_sleep, &wait); 227 finish_wait(sk->sk_sleep, &wait);
244 return rc; 228
245 229 if (signal_pending(current) || sk->sk_err)
246do_error: 230 return -1;
247 rc = -EPIPE; 231 return remaining;
248 goto out;
249do_interrupted:
250 rc = -EINTR;
251 goto out;
252} 232}
253 233
254/** 234/**
@@ -311,7 +291,53 @@ static void dccp_xmit_packet(struct sock *sk)
311 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); 291 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
312} 292}
313 293
314void dccp_write_xmit(struct sock *sk, int block) 294/**
295 * dccp_flush_write_queue - Drain queue at end of connection
296 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
297 * happen that the TX queue is not empty at the end of a connection. We give the
298 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
299 * returns with a non-empty write queue, it will be purged later.
300 */
301void dccp_flush_write_queue(struct sock *sk, long *time_budget)
302{
303 struct dccp_sock *dp = dccp_sk(sk);
304 struct sk_buff *skb;
305 long delay, rc;
306
307 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
308 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
309
310 switch (ccid_packet_dequeue_eval(rc)) {
311 case CCID_PACKET_WILL_DEQUEUE_LATER:
312 /*
313 * If the CCID determines when to send, the next sending
314 * time is unknown or the CCID may not even send again
315 * (e.g. remote host crashes or lost Ack packets).
316 */
317 DCCP_WARN("CCID did not manage to send all packets\n");
318 return;
319 case CCID_PACKET_DELAY:
320 delay = msecs_to_jiffies(rc);
321 if (delay > *time_budget)
322 return;
323 rc = dccp_wait_for_ccid(sk, delay);
324 if (rc < 0)
325 return;
326 *time_budget -= (delay - rc);
327 /* check again if we can send now */
328 break;
329 case CCID_PACKET_SEND_AT_ONCE:
330 dccp_xmit_packet(sk);
331 break;
332 case CCID_PACKET_ERR:
333 skb_dequeue(&sk->sk_write_queue);
334 kfree_skb(skb);
335 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
336 }
337 }
338}
339
340void dccp_write_xmit(struct sock *sk)
315{ 341{
316 struct dccp_sock *dp = dccp_sk(sk); 342 struct dccp_sock *dp = dccp_sk(sk);
317 struct sk_buff *skb; 343 struct sk_buff *skb;
@@ -323,19 +349,9 @@ void dccp_write_xmit(struct sock *sk, int block)
323 case CCID_PACKET_WILL_DEQUEUE_LATER: 349 case CCID_PACKET_WILL_DEQUEUE_LATER:
324 return; 350 return;
325 case CCID_PACKET_DELAY: 351 case CCID_PACKET_DELAY:
326 if (!block) { 352 sk_reset_timer(sk, &dp->dccps_xmit_timer,
327 sk_reset_timer(sk, &dp->dccps_xmit_timer, 353 jiffies + msecs_to_jiffies(rc));
328 msecs_to_jiffies(rc)+jiffies); 354 return;
329 return;
330 }
331 rc = dccp_wait_for_ccid(sk, skb, rc);
332 if (rc && rc != -EINTR) {
333 DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
334 skb_dequeue(&sk->sk_write_queue);
335 kfree_skb(skb);
336 break;
337 }
338 /* fall through */
339 case CCID_PACKET_SEND_AT_ONCE: 355 case CCID_PACKET_SEND_AT_ONCE:
340 dccp_xmit_packet(sk); 356 dccp_xmit_packet(sk);
341 break; 357 break;
@@ -660,7 +676,6 @@ void dccp_send_close(struct sock *sk, const int active)
660 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 676 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
661 677
662 if (active) { 678 if (active) {
663 dccp_write_xmit(sk, 1);
664 dccp_skb_entail(sk, skb); 679 dccp_skb_entail(sk, skb);
665 dccp_transmit_skb(sk, skb_clone(skb, prio)); 680 dccp_transmit_skb(sk, skb_clone(skb, prio));
666 /* 681 /*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 11905e0cf8f7..8c125ffab1c5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -735,7 +735,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
735 goto out_discard; 735 goto out_discard;
736 736
737 skb_queue_tail(&sk->sk_write_queue, skb); 737 skb_queue_tail(&sk->sk_write_queue, skb);
738 dccp_write_xmit(sk,0); 738 dccp_write_xmit(sk);
739out_release: 739out_release:
740 release_sock(sk); 740 release_sock(sk);
741 return rc ? : len; 741 return rc ? : len;
@@ -958,9 +958,22 @@ void dccp_close(struct sock *sk, long timeout)
958 /* Check zero linger _after_ checking for unread data. */ 958 /* Check zero linger _after_ checking for unread data. */
959 sk->sk_prot->disconnect(sk, 0); 959 sk->sk_prot->disconnect(sk, 0);
960 } else if (sk->sk_state != DCCP_CLOSED) { 960 } else if (sk->sk_state != DCCP_CLOSED) {
961 /*
962 * Normal connection termination. May need to wait if there are
963 * still packets in the TX queue that are delayed by the CCID.
964 */
965 dccp_flush_write_queue(sk, &timeout);
961 dccp_terminate_connection(sk); 966 dccp_terminate_connection(sk);
962 } 967 }
963 968
969 /*
970 * Flush write queue. This may be necessary in several cases:
971 * - we have been closed by the peer but still have application data;
972 * - abortive termination (unread data or zero linger time),
973 * - normal termination but queue could not be flushed within time limit
974 */
975 __skb_queue_purge(&sk->sk_write_queue);
976
964 sk_stream_wait_close(sk, timeout); 977 sk_stream_wait_close(sk, timeout);
965 978
966adjudge_to_death: 979adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 9369aca4b0e9..e02d5a94f4c0 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -249,7 +249,7 @@ static void dccp_write_xmitlet(unsigned long data)
249 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
251 else 251 else
252 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk);
253 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
254} 254}
255 255