diff options
Diffstat (limited to 'net/dccp/output.c')
-rw-r--r-- | net/dccp/output.c | 279 |
1 files changed, 177 insertions, 102 deletions
diff --git a/net/dccp/output.c b/net/dccp/output.c index d06945c7d3df..2532797a8009 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -26,11 +26,13 @@ static inline void dccp_event_ack_sent(struct sock *sk) | |||
26 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | 26 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
27 | } | 27 | } |
28 | 28 | ||
29 | static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) | 29 | /* enqueue @skb on sk_send_head for retransmission, return clone to send now */ |
30 | static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) | ||
30 | { | 31 | { |
31 | skb_set_owner_w(skb, sk); | 32 | skb_set_owner_w(skb, sk); |
32 | WARN_ON(sk->sk_send_head); | 33 | WARN_ON(sk->sk_send_head); |
33 | sk->sk_send_head = skb; | 34 | sk->sk_send_head = skb; |
35 | return skb_clone(sk->sk_send_head, gfp_any()); | ||
34 | } | 36 | } |
35 | 37 | ||
36 | /* | 38 | /* |
@@ -161,21 +163,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | |||
161 | struct inet_connection_sock *icsk = inet_csk(sk); | 163 | struct inet_connection_sock *icsk = inet_csk(sk); |
162 | struct dccp_sock *dp = dccp_sk(sk); | 164 | struct dccp_sock *dp = dccp_sk(sk); |
163 | u32 ccmps = dccp_determine_ccmps(dp); | 165 | u32 ccmps = dccp_determine_ccmps(dp); |
164 | int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; | 166 | u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; |
165 | 167 | ||
166 | /* Account for header lengths and IPv4/v6 option overhead */ | 168 | /* Account for header lengths and IPv4/v6 option overhead */ |
167 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + | 169 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + |
168 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); | 170 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); |
169 | 171 | ||
170 | /* | 172 | /* |
171 | * FIXME: this should come from the CCID infrastructure, where, say, | 173 | * Leave enough headroom for common DCCP header options. |
172 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | 174 | * This only considers options which may appear on DCCP-Data packets, as |
173 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | 175 | * per table 3 in RFC 4340, 5.8. When running out of space for other |
174 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | 176 | * options (eg. Ack Vector which can take up to 255 bytes), it is better |
175 | * make it a multiple of 4 | 177 | * to schedule a separate Ack. Thus we leave headroom for the following: |
178 | * - 1 byte for Slow Receiver (11.6) | ||
179 | * - 6 bytes for Timestamp (13.1) | ||
180 | * - 10 bytes for Timestamp Echo (13.3) | ||
181 | * - 8 bytes for NDP count (7.7, when activated) | ||
182 | * - 6 bytes for Data Checksum (9.3) | ||
183 | * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) | ||
176 | */ | 184 | */ |
177 | 185 | cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + | |
178 | cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | 186 | (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); |
179 | 187 | ||
180 | /* And store cached results */ | 188 | /* And store cached results */ |
181 | icsk->icsk_pmtu_cookie = pmtu; | 189 | icsk->icsk_pmtu_cookie = pmtu; |
@@ -200,95 +208,158 @@ void dccp_write_space(struct sock *sk) | |||
200 | } | 208 | } |
201 | 209 | ||
202 | /** | 210 | /** |
203 | * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet | 211 | * dccp_wait_for_ccid - Await CCID send permission |
204 | * @sk: socket to wait for | 212 | * @sk: socket to wait for |
205 | * @skb: current skb to pass on for waiting | 213 | * @delay: timeout in jiffies |
206 | * @delay: sleep timeout in milliseconds (> 0) | 214 | * This is used by CCIDs which need to delay the send time in process context. |
207 | * This function is called by default when the socket is closed, and | ||
208 | * when a non-zero linger time is set on the socket. For consistency | ||
209 | */ | 215 | */ |
210 | static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) | 216 | static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) |
211 | { | 217 | { |
212 | struct dccp_sock *dp = dccp_sk(sk); | ||
213 | DEFINE_WAIT(wait); | 218 | DEFINE_WAIT(wait); |
214 | unsigned long jiffdelay; | 219 | long remaining; |
215 | int rc; | ||
216 | 220 | ||
217 | do { | 221 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
218 | dccp_pr_debug("delayed send by %d msec\n", delay); | 222 | sk->sk_write_pending++; |
219 | jiffdelay = msecs_to_jiffies(delay); | 223 | release_sock(sk); |
220 | 224 | ||
221 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 225 | remaining = schedule_timeout(delay); |
222 | 226 | ||
223 | sk->sk_write_pending++; | 227 | lock_sock(sk); |
224 | release_sock(sk); | 228 | sk->sk_write_pending--; |
225 | schedule_timeout(jiffdelay); | 229 | finish_wait(sk->sk_sleep, &wait); |
226 | lock_sock(sk); | ||
227 | sk->sk_write_pending--; | ||
228 | 230 | ||
229 | if (sk->sk_err) | 231 | if (signal_pending(current) || sk->sk_err) |
230 | goto do_error; | 232 | return -1; |
231 | if (signal_pending(current)) | 233 | return remaining; |
232 | goto do_interrupted; | 234 | } |
233 | 235 | ||
234 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 236 | /** |
235 | } while ((delay = rc) > 0); | 237 | * dccp_xmit_packet - Send data packet under control of CCID |
236 | out: | 238 | * Transmits next-queued payload and informs CCID to account for the packet. |
237 | finish_wait(sk->sk_sleep, &wait); | 239 | */ |
238 | return rc; | 240 | static void dccp_xmit_packet(struct sock *sk) |
239 | 241 | { | |
240 | do_error: | 242 | int err, len; |
241 | rc = -EPIPE; | 243 | struct dccp_sock *dp = dccp_sk(sk); |
242 | goto out; | 244 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
243 | do_interrupted: | 245 | |
244 | rc = -EINTR; | 246 | if (unlikely(skb == NULL)) |
245 | goto out; | 247 | return; |
248 | len = skb->len; | ||
249 | |||
250 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
251 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | ||
252 | /* | ||
253 | * See 8.1.5 - Handshake Completion. | ||
254 | * | ||
255 | * For robustness we resend Confirm options until the client has | ||
256 | * entered OPEN. During the initial feature negotiation, the MPS | ||
257 | * is smaller than usual, reduced by the Change/Confirm options. | ||
258 | */ | ||
259 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
260 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
261 | dccp_send_ack(sk); | ||
262 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
263 | } | ||
264 | |||
265 | inet_csk_schedule_ack(sk); | ||
266 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
267 | inet_csk(sk)->icsk_rto, | ||
268 | DCCP_RTO_MAX); | ||
269 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
270 | } else if (dccp_ack_pending(sk)) { | ||
271 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
272 | } else { | ||
273 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; | ||
274 | } | ||
275 | |||
276 | err = dccp_transmit_skb(sk, skb); | ||
277 | if (err) | ||
278 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); | ||
279 | /* | ||
280 | * Register this one as sent even if an error occurred. To the remote | ||
281 | * end a local packet drop is indistinguishable from network loss, i.e. | ||
282 | * any local drop will eventually be reported via receiver feedback. | ||
283 | */ | ||
284 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | ||
285 | |||
286 | /* | ||
287 | * If the CCID needs to transfer additional header options out-of-band | ||
288 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
289 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
290 | * currently pending header options, thus clearing the backlog. | ||
291 | */ | ||
292 | if (dp->dccps_sync_scheduled) | ||
293 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
246 | } | 294 | } |
247 | 295 | ||
248 | void dccp_write_xmit(struct sock *sk, int block) | 296 | /** |
297 | * dccp_flush_write_queue - Drain queue at end of connection | ||
298 | * Since dccp_sendmsg queues packets without waiting for them to be sent, it may | ||
299 | * happen that the TX queue is not empty at the end of a connection. We give the | ||
300 | * HC-sender CCID a grace period of up to @time_budget jiffies. If this function | ||
301 | * returns with a non-empty write queue, it will be purged later. | ||
302 | */ | ||
303 | void dccp_flush_write_queue(struct sock *sk, long *time_budget) | ||
249 | { | 304 | { |
250 | struct dccp_sock *dp = dccp_sk(sk); | 305 | struct dccp_sock *dp = dccp_sk(sk); |
251 | struct sk_buff *skb; | 306 | struct sk_buff *skb; |
307 | long delay, rc; | ||
308 | |||
309 | while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { | ||
310 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | ||
252 | 311 | ||
253 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 312 | switch (ccid_packet_dequeue_eval(rc)) { |
254 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 313 | case CCID_PACKET_WILL_DEQUEUE_LATER: |
255 | 314 | /* | |
256 | if (err > 0) { | 315 | * If the CCID determines when to send, the next sending |
257 | if (!block) { | 316 | * time is unknown or the CCID may not even send again |
258 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | 317 | * (e.g. remote host crashes or lost Ack packets). |
259 | msecs_to_jiffies(err)+jiffies); | 318 | */ |
260 | break; | 319 | DCCP_WARN("CCID did not manage to send all packets\n"); |
261 | } else | 320 | return; |
262 | err = dccp_wait_for_ccid(sk, skb, err); | 321 | case CCID_PACKET_DELAY: |
263 | if (err && err != -EINTR) | 322 | delay = msecs_to_jiffies(rc); |
264 | DCCP_BUG("err=%d after dccp_wait_for_ccid", err); | 323 | if (delay > *time_budget) |
324 | return; | ||
325 | rc = dccp_wait_for_ccid(sk, delay); | ||
326 | if (rc < 0) | ||
327 | return; | ||
328 | *time_budget -= (delay - rc); | ||
329 | /* check again if we can send now */ | ||
330 | break; | ||
331 | case CCID_PACKET_SEND_AT_ONCE: | ||
332 | dccp_xmit_packet(sk); | ||
333 | break; | ||
334 | case CCID_PACKET_ERR: | ||
335 | skb_dequeue(&sk->sk_write_queue); | ||
336 | kfree_skb(skb); | ||
337 | dccp_pr_debug("packet discarded due to err=%ld\n", rc); | ||
265 | } | 338 | } |
339 | } | ||
340 | } | ||
266 | 341 | ||
267 | skb_dequeue(&sk->sk_write_queue); | 342 | void dccp_write_xmit(struct sock *sk) |
268 | if (err == 0) { | 343 | { |
269 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 344 | struct dccp_sock *dp = dccp_sk(sk); |
270 | const int len = skb->len; | 345 | struct sk_buff *skb; |
271 | 346 | ||
272 | if (sk->sk_state == DCCP_PARTOPEN) { | 347 | while ((skb = dccp_qpolicy_top(sk))) { |
273 | /* See 8.1.5. Handshake Completion */ | 348 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
274 | inet_csk_schedule_ack(sk); | 349 | |
275 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 350 | switch (ccid_packet_dequeue_eval(rc)) { |
276 | inet_csk(sk)->icsk_rto, | 351 | case CCID_PACKET_WILL_DEQUEUE_LATER: |
277 | DCCP_RTO_MAX); | 352 | return; |
278 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 353 | case CCID_PACKET_DELAY: |
279 | } else if (dccp_ack_pending(sk)) | 354 | sk_reset_timer(sk, &dp->dccps_xmit_timer, |
280 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 355 | jiffies + msecs_to_jiffies(rc)); |
281 | else | 356 | return; |
282 | dcb->dccpd_type = DCCP_PKT_DATA; | 357 | case CCID_PACKET_SEND_AT_ONCE: |
283 | 358 | dccp_xmit_packet(sk); | |
284 | err = dccp_transmit_skb(sk, skb); | 359 | break; |
285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | 360 | case CCID_PACKET_ERR: |
286 | if (err) | 361 | dccp_qpolicy_drop(sk, skb); |
287 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", | 362 | dccp_pr_debug("packet discarded due to err=%d\n", rc); |
288 | err); | ||
289 | } else { | ||
290 | dccp_pr_debug("packet discarded due to err=%d\n", err); | ||
291 | kfree_skb(skb); | ||
292 | } | 363 | } |
293 | } | 364 | } |
294 | } | 365 | } |
@@ -339,10 +410,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
339 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | 410 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; |
340 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; | 411 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; |
341 | 412 | ||
342 | if (dccp_insert_options_rsk(dreq, skb)) { | 413 | /* Resolve feature dependencies resulting from choice of CCID */ |
343 | kfree_skb(skb); | 414 | if (dccp_feat_server_ccid_dependencies(dreq)) |
344 | return NULL; | 415 | goto response_failed; |
345 | } | 416 | |
417 | if (dccp_insert_options_rsk(dreq, skb)) | ||
418 | goto response_failed; | ||
346 | 419 | ||
347 | /* Build and checksum header */ | 420 | /* Build and checksum header */ |
348 | dh = dccp_zeroed_hdr(skb, dccp_header_size); | 421 | dh = dccp_zeroed_hdr(skb, dccp_header_size); |
@@ -363,6 +436,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
363 | inet_rsk(req)->acked = 1; | 436 | inet_rsk(req)->acked = 1; |
364 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | 437 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); |
365 | return skb; | 438 | return skb; |
439 | response_failed: | ||
440 | kfree_skb(skb); | ||
441 | return NULL; | ||
366 | } | 442 | } |
367 | 443 | ||
368 | EXPORT_SYMBOL_GPL(dccp_make_response); | 444 | EXPORT_SYMBOL_GPL(dccp_make_response); |
@@ -447,8 +523,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) | |||
447 | /* | 523 | /* |
448 | * Do all connect socket setups that can be done AF independent. | 524 | * Do all connect socket setups that can be done AF independent. |
449 | */ | 525 | */ |
450 | static inline void dccp_connect_init(struct sock *sk) | 526 | int dccp_connect(struct sock *sk) |
451 | { | 527 | { |
528 | struct sk_buff *skb; | ||
452 | struct dccp_sock *dp = dccp_sk(sk); | 529 | struct dccp_sock *dp = dccp_sk(sk); |
453 | struct dst_entry *dst = __sk_dst_get(sk); | 530 | struct dst_entry *dst = __sk_dst_get(sk); |
454 | struct inet_connection_sock *icsk = inet_csk(sk); | 531 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -458,19 +535,13 @@ static inline void dccp_connect_init(struct sock *sk) | |||
458 | 535 | ||
459 | dccp_sync_mss(sk, dst_mtu(dst)); | 536 | dccp_sync_mss(sk, dst_mtu(dst)); |
460 | 537 | ||
538 | /* do not connect if feature negotiation setup fails */ | ||
539 | if (dccp_feat_finalise_settings(dccp_sk(sk))) | ||
540 | return -EPROTO; | ||
541 | |||
461 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ | 542 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ |
462 | dp->dccps_gar = dp->dccps_iss; | 543 | dp->dccps_gar = dp->dccps_iss; |
463 | 544 | ||
464 | icsk->icsk_retransmits = 0; | ||
465 | } | ||
466 | |||
467 | int dccp_connect(struct sock *sk) | ||
468 | { | ||
469 | struct sk_buff *skb; | ||
470 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
471 | |||
472 | dccp_connect_init(sk); | ||
473 | |||
474 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); | 545 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); |
475 | if (unlikely(skb == NULL)) | 546 | if (unlikely(skb == NULL)) |
476 | return -ENOBUFS; | 547 | return -ENOBUFS; |
@@ -480,11 +551,11 @@ int dccp_connect(struct sock *sk) | |||
480 | 551 | ||
481 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | 552 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; |
482 | 553 | ||
483 | dccp_skb_entail(sk, skb); | 554 | dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); |
484 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
485 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | 555 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); |
486 | 556 | ||
487 | /* Timer for repeating the REQUEST until an answer. */ | 557 | /* Timer for repeating the REQUEST until an answer. */ |
558 | icsk->icsk_retransmits = 0; | ||
488 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 559 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
489 | icsk->icsk_rto, DCCP_RTO_MAX); | 560 | icsk->icsk_rto, DCCP_RTO_MAX); |
490 | return 0; | 561 | return 0; |
@@ -571,6 +642,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, | |||
571 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | 642 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; |
572 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; | 643 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; |
573 | 644 | ||
645 | /* | ||
646 | * Clear the flag in case the Sync was scheduled for out-of-band data, | ||
647 | * such as carrying a long Ack Vector. | ||
648 | */ | ||
649 | dccp_sk(sk)->dccps_sync_scheduled = 0; | ||
650 | |||
574 | dccp_transmit_skb(sk, skb); | 651 | dccp_transmit_skb(sk, skb); |
575 | } | 652 | } |
576 | 653 | ||
@@ -599,9 +676,7 @@ void dccp_send_close(struct sock *sk, const int active) | |||
599 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; | 676 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; |
600 | 677 | ||
601 | if (active) { | 678 | if (active) { |
602 | dccp_write_xmit(sk, 1); | 679 | skb = dccp_skb_entail(sk, skb); |
603 | dccp_skb_entail(sk, skb); | ||
604 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | ||
605 | /* | 680 | /* |
606 | * Retransmission timer for active-close: RFC 4340, 8.3 requires | 681 | * Retransmission timer for active-close: RFC 4340, 8.3 requires |
607 | * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ | 682 | * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ |
@@ -614,6 +689,6 @@ void dccp_send_close(struct sock *sk, const int active) | |||
614 | */ | 689 | */ |
615 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 690 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
616 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); | 691 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); |
617 | } else | 692 | } |
618 | dccp_transmit_skb(sk, skb); | 693 | dccp_transmit_skb(sk, skb); |
619 | } | 694 | } |