aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2010-10-27 15:16:27 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-28 13:27:01 -0400
commitb1fcf55eea541af9efa5d39f5a0d1aec8ceca55d (patch)
treea021b6abde9c784d67ee0de3bb7fb31f7d5b2e9f
parentdc841e30eaea9f9f83c9ab1ee0b3ef9e5c95ce8a (diff)
dccp: Refine the wait-for-ccid mechanism
This extends the existing wait-for-ccid routine so that it may be used with different types of CCID, addressing the following problems: 1) The queue-drain mechanism only works with rate-based CCIDs. If CCID-2 for example has a full TX queue and becomes network-limited just as the application wants to close, then waiting for CCID-2 to become unblocked could lead to an indefinite delay (i.e., application "hangs"). 2) Since each TX CCID in turn uses a feedback mechanism, there may be changes in its sending policy while the queue is being drained. This can lead to further delays during which the application will not be able to terminate. 3) The minimum wait time for CCID-3/4 can be expected to be the queue length times the current inter-packet delay. For example if tx_qlen=100 and a delay of 15 ms is used for each packet, then the application would have to wait for a minimum of 1.5 seconds before being allowed to exit. 4) There is no way for the user/application to control this behaviour. It would be good to use the timeout argument of dccp_close() as an upper bound. Then the maximum time that an application is willing to wait for its CCIDs to can be set via the SO_LINGER option. These problems are addressed by giving the CCID a grace period of up to the `timeout' value. The wait-for-ccid function is, as before, used when the application (a) has read all the data in its receive buffer and (b) if SO_LINGER was set with a non-zero linger time, or (c) the socket is either in the OPEN (active close) or in the PASSIVE_CLOSEREQ state (client application closes after receiving CloseReq). In addition, there is a catch-all case of __skb_queue_purge() after waiting for the CCID. This is necessary since the write queue may still have data when (a) the host has been passively-closed, (b) abnormal termination (unread data, zero linger time), (c) wait-for-ccid could not finish within the given time limit. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/output.c115
-rw-r--r--net/dccp/proto.c21
-rw-r--r--net/dccp/timer.c2
4 files changed, 89 insertions, 54 deletions
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3eb264b60823..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246extern void dccp_write_xmit(struct sock *sk, int block); 246extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 247extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
248 249
249extern void dccp_init_xmit_timers(struct sock *sk); 250extern void dccp_init_xmit_timers(struct sock *sk);
250static inline void dccp_clear_xmit_timers(struct sock *sk) 251static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 11418a9a389d..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,49 +209,29 @@ void dccp_write_space(struct sock *sk)
209} 209}
210 210
211/** 211/**
212 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 212 * dccp_wait_for_ccid - Await CCID send permission
213 * @sk: socket to wait for 213 * @sk: socket to wait for
214 * @skb: current skb to pass on for waiting 214 * @delay: timeout in jiffies
215 * @delay: sleep timeout in milliseconds (> 0) 215 * This is used by CCIDs which need to delay the send time in process context.
216 * This function is called by default when the socket is closed, and
217 * when a non-zero linger time is set on the socket. For consistency
218 */ 216 */
219static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 217static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
220{ 218{
221 struct dccp_sock *dp = dccp_sk(sk);
222 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
223 unsigned long jiffdelay; 220 long remaining;
224 int rc;
225
226 do {
227 dccp_pr_debug("delayed send by %d msec\n", delay);
228 jiffdelay = msecs_to_jiffies(delay);
229
230 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
231 221
232 sk->sk_write_pending++; 222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
233 release_sock(sk); 223 sk->sk_write_pending++;
234 schedule_timeout(jiffdelay); 224 release_sock(sk);
235 lock_sock(sk);
236 sk->sk_write_pending--;
237 225
238 if (sk->sk_err) 226 remaining = schedule_timeout(delay);
239 goto do_error;
240 if (signal_pending(current))
241 goto do_interrupted;
242 227
243 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 228 lock_sock(sk);
244 } while ((delay = rc) > 0); 229 sk->sk_write_pending--;
245out:
246 finish_wait(sk_sleep(sk), &wait); 230 finish_wait(sk_sleep(sk), &wait);
247 return rc; 231
248 232 if (signal_pending(current) || sk->sk_err)
249do_error: 233 return -1;
250 rc = -EPIPE; 234 return remaining;
251 goto out;
252do_interrupted:
253 rc = -EINTR;
254 goto out;
255} 235}
256 236
257/** 237/**
@@ -305,7 +285,53 @@ static void dccp_xmit_packet(struct sock *sk)
305 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); 285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
306} 286}
307 287
308void dccp_write_xmit(struct sock *sk, int block) 288/**
289 * dccp_flush_write_queue - Drain queue at end of connection
290 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
291 * happen that the TX queue is not empty at the end of a connection. We give the
292 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
293 * returns with a non-empty write queue, it will be purged later.
294 */
295void dccp_flush_write_queue(struct sock *sk, long *time_budget)
296{
297 struct dccp_sock *dp = dccp_sk(sk);
298 struct sk_buff *skb;
299 long delay, rc;
300
301 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
302 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
303
304 switch (ccid_packet_dequeue_eval(rc)) {
305 case CCID_PACKET_WILL_DEQUEUE_LATER:
306 /*
307 * If the CCID determines when to send, the next sending
308 * time is unknown or the CCID may not even send again
309 * (e.g. remote host crashes or lost Ack packets).
310 */
311 DCCP_WARN("CCID did not manage to send all packets\n");
312 return;
313 case CCID_PACKET_DELAY:
314 delay = msecs_to_jiffies(rc);
315 if (delay > *time_budget)
316 return;
317 rc = dccp_wait_for_ccid(sk, delay);
318 if (rc < 0)
319 return;
320 *time_budget -= (delay - rc);
321 /* check again if we can send now */
322 break;
323 case CCID_PACKET_SEND_AT_ONCE:
324 dccp_xmit_packet(sk);
325 break;
326 case CCID_PACKET_ERR:
327 skb_dequeue(&sk->sk_write_queue);
328 kfree_skb(skb);
329 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
330 }
331 }
332}
333
334void dccp_write_xmit(struct sock *sk)
309{ 335{
310 struct dccp_sock *dp = dccp_sk(sk); 336 struct dccp_sock *dp = dccp_sk(sk);
311 struct sk_buff *skb; 337 struct sk_buff *skb;
@@ -317,19 +343,9 @@ void dccp_write_xmit(struct sock *sk, int block)
317 case CCID_PACKET_WILL_DEQUEUE_LATER: 343 case CCID_PACKET_WILL_DEQUEUE_LATER:
318 return; 344 return;
319 case CCID_PACKET_DELAY: 345 case CCID_PACKET_DELAY:
320 if (!block) { 346 sk_reset_timer(sk, &dp->dccps_xmit_timer,
321 sk_reset_timer(sk, &dp->dccps_xmit_timer, 347 jiffies + msecs_to_jiffies(rc));
322 msecs_to_jiffies(rc)+jiffies); 348 return;
323 return;
324 }
325 rc = dccp_wait_for_ccid(sk, skb, rc);
326 if (rc && rc != -EINTR) {
327 DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
328 skb_dequeue(&sk->sk_write_queue);
329 kfree_skb(skb);
330 break;
331 }
332 /* fall through */
333 case CCID_PACKET_SEND_AT_ONCE: 349 case CCID_PACKET_SEND_AT_ONCE:
334 dccp_xmit_packet(sk); 350 dccp_xmit_packet(sk);
335 break; 351 break;
@@ -648,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
648 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 664 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
649 665
650 if (active) { 666 if (active) {
651 dccp_write_xmit(sk, 1);
652 dccp_skb_entail(sk, skb); 667 dccp_skb_entail(sk, skb);
653 dccp_transmit_skb(sk, skb_clone(skb, prio)); 668 dccp_transmit_skb(sk, skb_clone(skb, prio));
654 /* 669 /*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7e5fc04eb6d1..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726 goto out_discard; 726 goto out_discard;
727 727
728 skb_queue_tail(&sk->sk_write_queue, skb); 728 skb_queue_tail(&sk->sk_write_queue, skb);
729 dccp_write_xmit(sk,0); 729 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the
732 * network. Window-based CCIDs do not use this timer.
733 */
734 if (!timer_pending(&dp->dccps_xmit_timer))
735 dccp_write_xmit(sk);
730out_release: 736out_release:
731 release_sock(sk); 737 release_sock(sk);
732 return rc ? : len; 738 return rc ? : len;
@@ -951,9 +957,22 @@ void dccp_close(struct sock *sk, long timeout)
951 /* Check zero linger _after_ checking for unread data. */ 957 /* Check zero linger _after_ checking for unread data. */
952 sk->sk_prot->disconnect(sk, 0); 958 sk->sk_prot->disconnect(sk, 0);
953 } else if (sk->sk_state != DCCP_CLOSED) { 959 } else if (sk->sk_state != DCCP_CLOSED) {
960 /*
961 * Normal connection termination. May need to wait if there are
962 * still packets in the TX queue that are delayed by the CCID.
963 */
964 dccp_flush_write_queue(sk, &timeout);
954 dccp_terminate_connection(sk); 965 dccp_terminate_connection(sk);
955 } 966 }
956 967
968 /*
969 * Flush write queue. This may be necessary in several cases:
970 * - we have been closed by the peer but still have application data;
971 * - abortive termination (unread data or zero linger time),
972 * - normal termination but queue could not be flushed within time limit
973 */
974 __skb_queue_purge(&sk->sk_write_queue);
975
957 sk_stream_wait_close(sk, timeout); 976 sk_stream_wait_close(sk, timeout);
958 977
959adjudge_to_death: 978adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 916f9d1dab36..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -249,7 +249,7 @@ static void dccp_write_xmitlet(unsigned long data)
249 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
251 else 251 else
252 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk);
253 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
254} 254}
255 255