aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:38 -0400
commite7937772d7a2b0127cc4cbc67bc594e139fdaf63 (patch)
tree3d56098b6fcdecbf70453d74c2065a8e33134d1e
parentf4a66ca4d2ff093c0f9111b449a248ffb8209b4d (diff)
dccp: Extend CCID packet dequeueing interface
This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The interface can also be used to support other CCIDs. The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. If the tasklet finds that the socket is locked, it re-schedules the tasklet function (not the tasklet) after one jiffy. Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets). Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-rw-r--r--include/linux/dccp.h4
-rw-r--r--net/dccp/output.c129
-rw-r--r--net/dccp/timer.c25
3 files changed, 98 insertions, 60 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 83197b601a4f..eed52bcd35d0 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -463,7 +463,8 @@ struct dccp_ackvec;
463 * @dccps_hc_tx_insert_options - sender wants to add options when sending 463 * @dccps_hc_tx_insert_options - sender wants to add options when sending
464 * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) 464 * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
465 * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" 465 * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
466 * @dccps_xmit_timer - timer for when CCID is not ready to send 466 * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
467 * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
467 * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) 468 * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
468 */ 469 */
469struct dccp_sock { 470struct dccp_sock {
@@ -504,6 +505,7 @@ struct dccp_sock {
504 __u8 dccps_hc_tx_insert_options:1; 505 __u8 dccps_hc_tx_insert_options:1;
505 __u8 dccps_server_timewait:1; 506 __u8 dccps_server_timewait:1;
506 __u8 dccps_sync_scheduled:1; 507 __u8 dccps_sync_scheduled:1;
508 struct tasklet_struct dccps_xmitlet;
507 struct timer_list dccps_xmit_timer; 509 struct timer_list dccps_xmit_timer;
508}; 510};
509 511
diff --git a/net/dccp/output.c b/net/dccp/output.c
index bfda071559f4..9afd58e39e23 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -251,65 +251,98 @@ do_interrupted:
251 goto out; 251 goto out;
252} 252}
253 253
254/**
255 * dccp_xmit_packet - Send data packet under control of CCID
256 * Transmits next-queued payload and informs CCID to account for the packet.
257 */
258static void dccp_xmit_packet(struct sock *sk)
259{
260 int err, len;
261 struct dccp_sock *dp = dccp_sk(sk);
262 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
263
264 if (unlikely(skb == NULL))
265 return;
266 len = skb->len;
267
268 if (sk->sk_state == DCCP_PARTOPEN) {
269 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
270 /*
271 * See 8.1.5 - Handshake Completion.
272 *
273 * For robustness we resend Confirm options until the client has
274 * entered OPEN. During the initial feature negotiation, the MPS
275 * is smaller than usual, reduced by the Change/Confirm options.
276 */
277 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
278 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
279 dccp_send_ack(sk);
280 dccp_feat_list_purge(&dp->dccps_featneg);
281 }
282
283 inet_csk_schedule_ack(sk);
284 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
285 inet_csk(sk)->icsk_rto,
286 DCCP_RTO_MAX);
287 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
288 } else if (dccp_ack_pending(sk)) {
289 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
290 } else {
291 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
292 }
293
294 err = dccp_transmit_skb(sk, skb);
295 if (err)
296 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
297 /*
298 * Register this one as sent even if an error occurred. To the remote
299 * end a local packet drop is indistinguishable from network loss, i.e.
300 * any local drop will eventually be reported via receiver feedback.
301 */
302 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
303
304 /*
305 * If the CCID needs to transfer additional header options out-of-band
306 * (e.g. Ack Vectors or feature-negotiation options), it activates this
307 * flag to schedule a Sync. The Sync will automatically incorporate all
308 * currently pending header options, thus clearing the backlog.
309 */
310 if (dp->dccps_sync_scheduled)
311 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
312}
313
254void dccp_write_xmit(struct sock *sk, int block) 314void dccp_write_xmit(struct sock *sk, int block)
255{ 315{
256 struct dccp_sock *dp = dccp_sk(sk); 316 struct dccp_sock *dp = dccp_sk(sk);
257 struct sk_buff *skb; 317 struct sk_buff *skb;
258 318
259 while ((skb = skb_peek(&sk->sk_write_queue))) { 319 while ((skb = skb_peek(&sk->sk_write_queue))) {
260 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 320 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
261 321
262 if (err > 0) { 322 switch (ccid_packet_dequeue_eval(rc)) {
323 case CCID_PACKET_WILL_DEQUEUE_LATER:
324 return;
325 case CCID_PACKET_DELAY:
263 if (!block) { 326 if (!block) {
264 sk_reset_timer(sk, &dp->dccps_xmit_timer, 327 sk_reset_timer(sk, &dp->dccps_xmit_timer,
265 msecs_to_jiffies(err)+jiffies); 328 msecs_to_jiffies(rc)+jiffies);
329 return;
330 }
331 rc = dccp_wait_for_ccid(sk, skb, rc);
332 if (rc && rc != -EINTR) {
333 DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
334 skb_dequeue(&sk->sk_write_queue);
335 kfree_skb(skb);
266 break; 336 break;
267 } else 337 }
268 err = dccp_wait_for_ccid(sk, skb, err); 338 /* fall through */
269 if (err && err != -EINTR) 339 case CCID_PACKET_SEND_AT_ONCE:
270 DCCP_BUG("err=%d after dccp_wait_for_ccid", err); 340 dccp_xmit_packet(sk);
271 } 341 break;
272 342 case CCID_PACKET_ERR:
273 skb_dequeue(&sk->sk_write_queue); 343 skb_dequeue(&sk->sk_write_queue);
274 if (err == 0) {
275 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
276 const int len = skb->len;
277
278 if (sk->sk_state == DCCP_PARTOPEN) {
279 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
280 /*
281 * See 8.1.5 - Handshake Completion.
282 *
283 * For robustness we resend Confirm options until the client has
284 * entered OPEN. During the initial feature negotiation, the MPS
285 * is smaller than usual, reduced by the Change/Confirm options.
286 */
287 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
288 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
289 dccp_send_ack(sk);
290 dccp_feat_list_purge(&dp->dccps_featneg);
291 }
292
293 inet_csk_schedule_ack(sk);
294 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
295 inet_csk(sk)->icsk_rto,
296 DCCP_RTO_MAX);
297 dcb->dccpd_type = DCCP_PKT_DATAACK;
298 } else if (dccp_ack_pending(sk))
299 dcb->dccpd_type = DCCP_PKT_DATAACK;
300 else
301 dcb->dccpd_type = DCCP_PKT_DATA;
302
303 err = dccp_transmit_skb(sk, skb);
304 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
305 if (err)
306 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
307 err);
308 if (dp->dccps_sync_scheduled)
309 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
310 } else {
311 dccp_pr_debug("packet discarded due to err=%d\n", err);
312 kfree_skb(skb); 344 kfree_skb(skb);
345 dccp_pr_debug("packet discarded due to err=%d\n", rc);
313 } 346 }
314 } 347 }
315} 348}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 162d1e683c39..9369aca4b0e9 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
237 sock_put(sk); 237 sock_put(sk);
238} 238}
239 239
240/* Transmit-delay timer: used by the CCIDs to delay actual send time */ 240/**
241static void dccp_write_xmit_timer(unsigned long data) 241 * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
242 * See the comments above %ccid_dequeueing_decision for supported modes.
243 */
244static void dccp_write_xmitlet(unsigned long data)
242{ 245{
243 struct sock *sk = (struct sock *)data; 246 struct sock *sk = (struct sock *)data;
244 struct dccp_sock *dp = dccp_sk(sk);
245 247
246 bh_lock_sock(sk); 248 bh_lock_sock(sk);
247 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
248 sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
249 else 251 else
250 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk, 0);
251 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
252 sock_put(sk);
253} 254}
254 255
255static void dccp_init_write_xmit_timer(struct sock *sk) 256static void dccp_write_xmit_timer(unsigned long data)
256{ 257{
257 struct dccp_sock *dp = dccp_sk(sk); 258 dccp_write_xmitlet(data);
258 259 sock_put((struct sock *)data);
259 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
260 (unsigned long)sk);
261} 260}
262 261
263void dccp_init_xmit_timers(struct sock *sk) 262void dccp_init_xmit_timers(struct sock *sk)
264{ 263{
265 dccp_init_write_xmit_timer(sk); 264 struct dccp_sock *dp = dccp_sk(sk);
265
266 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
267 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
268 (unsigned long)sk);
266 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 269 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
267 &dccp_keepalive_timer); 270 &dccp_keepalive_timer);
268} 271}