diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-04 01:30:19 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-04 01:45:38 -0400 |
commit | e7937772d7a2b0127cc4cbc67bc594e139fdaf63 (patch) | |
tree | 3d56098b6fcdecbf70453d74c2065a8e33134d1e | |
parent | f4a66ca4d2ff093c0f9111b449a248ffb8209b4d (diff) |
dccp: Extend CCID packet dequeueing interface
This extends the packet dequeuing interface of dccp_write_xmit() to allow
1. CCIDs to take care of timing when the next packet may be sent;
2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds).
The main purpose is to take CCID2 out of its polling mode (when it is network-
limited, it tries every millisecond to send, without interruption).
The interface can also be used to support other CCIDs.
The mode of operation for (2) is as follows:
* new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(),
* ccid_hc_tx_send_packet() detects that it may not send (e.g. window full),
* it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER',
* dccp_write_xmit() returns without further action;
* after some time the wait-condition for CCID becomes true,
* that CCID schedules the tasklet,
* tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(),
* since the wait-condition is now true, ccid_hc_tx_packet() returns "send now",
* packet is sent, and possibly more (since dccp_write_xmit() loops).
Code reuse: the taskled function calls dccp_write_xmit(), the timer function
reduces to a wrapper around the same code.
If the tasklet finds that the socket is locked, it re-schedules the tasklet
function (not the tasklet) after one jiffy.
Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a
local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets).
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-rw-r--r-- | include/linux/dccp.h | 4 | ||||
-rw-r--r-- | net/dccp/output.c | 129 | ||||
-rw-r--r-- | net/dccp/timer.c | 25 |
3 files changed, 98 insertions, 60 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 83197b601a4f..eed52bcd35d0 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h | |||
@@ -463,7 +463,8 @@ struct dccp_ackvec; | |||
463 | * @dccps_hc_tx_insert_options - sender wants to add options when sending | 463 | * @dccps_hc_tx_insert_options - sender wants to add options when sending |
464 | * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) | 464 | * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) |
465 | * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" | 465 | * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" |
466 | * @dccps_xmit_timer - timer for when CCID is not ready to send | 466 | * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets |
467 | * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) | ||
467 | * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) | 468 | * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) |
468 | */ | 469 | */ |
469 | struct dccp_sock { | 470 | struct dccp_sock { |
@@ -504,6 +505,7 @@ struct dccp_sock { | |||
504 | __u8 dccps_hc_tx_insert_options:1; | 505 | __u8 dccps_hc_tx_insert_options:1; |
505 | __u8 dccps_server_timewait:1; | 506 | __u8 dccps_server_timewait:1; |
506 | __u8 dccps_sync_scheduled:1; | 507 | __u8 dccps_sync_scheduled:1; |
508 | struct tasklet_struct dccps_xmitlet; | ||
507 | struct timer_list dccps_xmit_timer; | 509 | struct timer_list dccps_xmit_timer; |
508 | }; | 510 | }; |
509 | 511 | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index bfda071559f4..9afd58e39e23 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -251,65 +251,98 @@ do_interrupted: | |||
251 | goto out; | 251 | goto out; |
252 | } | 252 | } |
253 | 253 | ||
254 | /** | ||
255 | * dccp_xmit_packet - Send data packet under control of CCID | ||
256 | * Transmits next-queued payload and informs CCID to account for the packet. | ||
257 | */ | ||
258 | static void dccp_xmit_packet(struct sock *sk) | ||
259 | { | ||
260 | int err, len; | ||
261 | struct dccp_sock *dp = dccp_sk(sk); | ||
262 | struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); | ||
263 | |||
264 | if (unlikely(skb == NULL)) | ||
265 | return; | ||
266 | len = skb->len; | ||
267 | |||
268 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
269 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | ||
270 | /* | ||
271 | * See 8.1.5 - Handshake Completion. | ||
272 | * | ||
273 | * For robustness we resend Confirm options until the client has | ||
274 | * entered OPEN. During the initial feature negotiation, the MPS | ||
275 | * is smaller than usual, reduced by the Change/Confirm options. | ||
276 | */ | ||
277 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
278 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
279 | dccp_send_ack(sk); | ||
280 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
281 | } | ||
282 | |||
283 | inet_csk_schedule_ack(sk); | ||
284 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
285 | inet_csk(sk)->icsk_rto, | ||
286 | DCCP_RTO_MAX); | ||
287 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
288 | } else if (dccp_ack_pending(sk)) { | ||
289 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
290 | } else { | ||
291 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; | ||
292 | } | ||
293 | |||
294 | err = dccp_transmit_skb(sk, skb); | ||
295 | if (err) | ||
296 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); | ||
297 | /* | ||
298 | * Register this one as sent even if an error occurred. To the remote | ||
299 | * end a local packet drop is indistinguishable from network loss, i.e. | ||
300 | * any local drop will eventually be reported via receiver feedback. | ||
301 | */ | ||
302 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | ||
303 | |||
304 | /* | ||
305 | * If the CCID needs to transfer additional header options out-of-band | ||
306 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
307 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
308 | * currently pending header options, thus clearing the backlog. | ||
309 | */ | ||
310 | if (dp->dccps_sync_scheduled) | ||
311 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
312 | } | ||
313 | |||
254 | void dccp_write_xmit(struct sock *sk, int block) | 314 | void dccp_write_xmit(struct sock *sk, int block) |
255 | { | 315 | { |
256 | struct dccp_sock *dp = dccp_sk(sk); | 316 | struct dccp_sock *dp = dccp_sk(sk); |
257 | struct sk_buff *skb; | 317 | struct sk_buff *skb; |
258 | 318 | ||
259 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 319 | while ((skb = skb_peek(&sk->sk_write_queue))) { |
260 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 320 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
261 | 321 | ||
262 | if (err > 0) { | 322 | switch (ccid_packet_dequeue_eval(rc)) { |
323 | case CCID_PACKET_WILL_DEQUEUE_LATER: | ||
324 | return; | ||
325 | case CCID_PACKET_DELAY: | ||
263 | if (!block) { | 326 | if (!block) { |
264 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | 327 | sk_reset_timer(sk, &dp->dccps_xmit_timer, |
265 | msecs_to_jiffies(err)+jiffies); | 328 | msecs_to_jiffies(rc)+jiffies); |
329 | return; | ||
330 | } | ||
331 | rc = dccp_wait_for_ccid(sk, skb, rc); | ||
332 | if (rc && rc != -EINTR) { | ||
333 | DCCP_BUG("err=%d after dccp_wait_for_ccid", rc); | ||
334 | skb_dequeue(&sk->sk_write_queue); | ||
335 | kfree_skb(skb); | ||
266 | break; | 336 | break; |
267 | } else | 337 | } |
268 | err = dccp_wait_for_ccid(sk, skb, err); | 338 | /* fall through */ |
269 | if (err && err != -EINTR) | 339 | case CCID_PACKET_SEND_AT_ONCE: |
270 | DCCP_BUG("err=%d after dccp_wait_for_ccid", err); | 340 | dccp_xmit_packet(sk); |
271 | } | 341 | break; |
272 | 342 | case CCID_PACKET_ERR: | |
273 | skb_dequeue(&sk->sk_write_queue); | 343 | skb_dequeue(&sk->sk_write_queue); |
274 | if (err == 0) { | ||
275 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
276 | const int len = skb->len; | ||
277 | |||
278 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
279 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | ||
280 | /* | ||
281 | * See 8.1.5 - Handshake Completion. | ||
282 | * | ||
283 | * For robustness we resend Confirm options until the client has | ||
284 | * entered OPEN. During the initial feature negotiation, the MPS | ||
285 | * is smaller than usual, reduced by the Change/Confirm options. | ||
286 | */ | ||
287 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
288 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
289 | dccp_send_ack(sk); | ||
290 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
291 | } | ||
292 | |||
293 | inet_csk_schedule_ack(sk); | ||
294 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
295 | inet_csk(sk)->icsk_rto, | ||
296 | DCCP_RTO_MAX); | ||
297 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
298 | } else if (dccp_ack_pending(sk)) | ||
299 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
300 | else | ||
301 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
302 | |||
303 | err = dccp_transmit_skb(sk, skb); | ||
304 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | ||
305 | if (err) | ||
306 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", | ||
307 | err); | ||
308 | if (dp->dccps_sync_scheduled) | ||
309 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
310 | } else { | ||
311 | dccp_pr_debug("packet discarded due to err=%d\n", err); | ||
312 | kfree_skb(skb); | 344 | kfree_skb(skb); |
345 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | ||
313 | } | 346 | } |
314 | } | 347 | } |
315 | } | 348 | } |
diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 162d1e683c39..9369aca4b0e9 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c | |||
@@ -237,32 +237,35 @@ out: | |||
237 | sock_put(sk); | 237 | sock_put(sk); |
238 | } | 238 | } |
239 | 239 | ||
240 | /* Transmit-delay timer: used by the CCIDs to delay actual send time */ | 240 | /** |
241 | static void dccp_write_xmit_timer(unsigned long data) | 241 | * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface |
242 | * See the comments above %ccid_dequeueing_decision for supported modes. | ||
243 | */ | ||
244 | static void dccp_write_xmitlet(unsigned long data) | ||
242 | { | 245 | { |
243 | struct sock *sk = (struct sock *)data; | 246 | struct sock *sk = (struct sock *)data; |
244 | struct dccp_sock *dp = dccp_sk(sk); | ||
245 | 247 | ||
246 | bh_lock_sock(sk); | 248 | bh_lock_sock(sk); |
247 | if (sock_owned_by_user(sk)) | 249 | if (sock_owned_by_user(sk)) |
248 | sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); | 250 | sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); |
249 | else | 251 | else |
250 | dccp_write_xmit(sk, 0); | 252 | dccp_write_xmit(sk, 0); |
251 | bh_unlock_sock(sk); | 253 | bh_unlock_sock(sk); |
252 | sock_put(sk); | ||
253 | } | 254 | } |
254 | 255 | ||
255 | static void dccp_init_write_xmit_timer(struct sock *sk) | 256 | static void dccp_write_xmit_timer(unsigned long data) |
256 | { | 257 | { |
257 | struct dccp_sock *dp = dccp_sk(sk); | 258 | dccp_write_xmitlet(data); |
258 | 259 | sock_put((struct sock *)data); | |
259 | setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, | ||
260 | (unsigned long)sk); | ||
261 | } | 260 | } |
262 | 261 | ||
263 | void dccp_init_xmit_timers(struct sock *sk) | 262 | void dccp_init_xmit_timers(struct sock *sk) |
264 | { | 263 | { |
265 | dccp_init_write_xmit_timer(sk); | 264 | struct dccp_sock *dp = dccp_sk(sk); |
265 | |||
266 | tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); | ||
267 | setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, | ||
268 | (unsigned long)sk); | ||
266 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | 269 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, |
267 | &dccp_keepalive_timer); | 270 | &dccp_keepalive_timer); |
268 | } | 271 | } |