diff options
Diffstat (limited to 'net/dccp/output.c')
-rw-r--r-- | net/dccp/output.c | 279 |
1 files changed, 102 insertions, 177 deletions
diff --git a/net/dccp/output.c b/net/dccp/output.c index 2532797a8009..d06945c7d3df 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -26,13 +26,11 @@ static inline void dccp_event_ack_sent(struct sock *sk) | |||
26 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | 26 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
27 | } | 27 | } |
28 | 28 | ||
29 | /* enqueue @skb on sk_send_head for retransmission, return clone to send now */ | 29 | static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) |
30 | static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) | ||
31 | { | 30 | { |
32 | skb_set_owner_w(skb, sk); | 31 | skb_set_owner_w(skb, sk); |
33 | WARN_ON(sk->sk_send_head); | 32 | WARN_ON(sk->sk_send_head); |
34 | sk->sk_send_head = skb; | 33 | sk->sk_send_head = skb; |
35 | return skb_clone(sk->sk_send_head, gfp_any()); | ||
36 | } | 34 | } |
37 | 35 | ||
38 | /* | 36 | /* |
@@ -163,27 +161,21 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | |||
163 | struct inet_connection_sock *icsk = inet_csk(sk); | 161 | struct inet_connection_sock *icsk = inet_csk(sk); |
164 | struct dccp_sock *dp = dccp_sk(sk); | 162 | struct dccp_sock *dp = dccp_sk(sk); |
165 | u32 ccmps = dccp_determine_ccmps(dp); | 163 | u32 ccmps = dccp_determine_ccmps(dp); |
166 | u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; | 164 | int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; |
167 | 165 | ||
168 | /* Account for header lengths and IPv4/v6 option overhead */ | 166 | /* Account for header lengths and IPv4/v6 option overhead */ |
169 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + | 167 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + |
170 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); | 168 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); |
171 | 169 | ||
172 | /* | 170 | /* |
173 | * Leave enough headroom for common DCCP header options. | 171 | * FIXME: this should come from the CCID infrastructure, where, say, |
174 | * This only considers options which may appear on DCCP-Data packets, as | 172 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets |
175 | * per table 3 in RFC 4340, 5.8. When running out of space for other | 173 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED |
176 | * options (eg. Ack Vector which can take up to 255 bytes), it is better | 174 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to |
177 | * to schedule a separate Ack. Thus we leave headroom for the following: | 175 | * make it a multiple of 4 |
178 | * - 1 byte for Slow Receiver (11.6) | ||
179 | * - 6 bytes for Timestamp (13.1) | ||
180 | * - 10 bytes for Timestamp Echo (13.3) | ||
181 | * - 8 bytes for NDP count (7.7, when activated) | ||
182 | * - 6 bytes for Data Checksum (9.3) | ||
183 | * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) | ||
184 | */ | 176 | */ |
185 | cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + | 177 | |
186 | (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); | 178 | cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; |
187 | 179 | ||
188 | /* And store cached results */ | 180 | /* And store cached results */ |
189 | icsk->icsk_pmtu_cookie = pmtu; | 181 | icsk->icsk_pmtu_cookie = pmtu; |
@@ -208,158 +200,95 @@ void dccp_write_space(struct sock *sk) | |||
208 | } | 200 | } |
209 | 201 | ||
210 | /** | 202 | /** |
211 | * dccp_wait_for_ccid - Await CCID send permission | 203 | * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet |
212 | * @sk: socket to wait for | 204 | * @sk: socket to wait for |
213 | * @delay: timeout in jiffies | 205 | * @skb: current skb to pass on for waiting |
214 | * This is used by CCIDs which need to delay the send time in process context. | 206 | * @delay: sleep timeout in milliseconds (> 0) |
207 | * This function is called by default when the socket is closed, and | ||
208 | * when a non-zero linger time is set on the socket. For consistency | ||
215 | */ | 209 | */ |
216 | static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) | 210 | static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) |
217 | { | 211 | { |
212 | struct dccp_sock *dp = dccp_sk(sk); | ||
218 | DEFINE_WAIT(wait); | 213 | DEFINE_WAIT(wait); |
219 | long remaining; | 214 | unsigned long jiffdelay; |
220 | 215 | int rc; | |
221 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
222 | sk->sk_write_pending++; | ||
223 | release_sock(sk); | ||
224 | 216 | ||
225 | remaining = schedule_timeout(delay); | 217 | do { |
226 | 218 | dccp_pr_debug("delayed send by %d msec\n", delay); | |
227 | lock_sock(sk); | 219 | jiffdelay = msecs_to_jiffies(delay); |
228 | sk->sk_write_pending--; | ||
229 | finish_wait(sk->sk_sleep, &wait); | ||
230 | 220 | ||
231 | if (signal_pending(current) || sk->sk_err) | 221 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
232 | return -1; | ||
233 | return remaining; | ||
234 | } | ||
235 | |||
236 | /** | ||
237 | * dccp_xmit_packet - Send data packet under control of CCID | ||
238 | * Transmits next-queued payload and informs CCID to account for the packet. | ||
239 | */ | ||
240 | static void dccp_xmit_packet(struct sock *sk) | ||
241 | { | ||
242 | int err, len; | ||
243 | struct dccp_sock *dp = dccp_sk(sk); | ||
244 | struct sk_buff *skb = dccp_qpolicy_pop(sk); | ||
245 | 222 | ||
246 | if (unlikely(skb == NULL)) | 223 | sk->sk_write_pending++; |
247 | return; | 224 | release_sock(sk); |
248 | len = skb->len; | 225 | schedule_timeout(jiffdelay); |
226 | lock_sock(sk); | ||
227 | sk->sk_write_pending--; | ||
249 | 228 | ||
250 | if (sk->sk_state == DCCP_PARTOPEN) { | 229 | if (sk->sk_err) |
251 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | 230 | goto do_error; |
252 | /* | 231 | if (signal_pending(current)) |
253 | * See 8.1.5 - Handshake Completion. | 232 | goto do_interrupted; |
254 | * | ||
255 | * For robustness we resend Confirm options until the client has | ||
256 | * entered OPEN. During the initial feature negotiation, the MPS | ||
257 | * is smaller than usual, reduced by the Change/Confirm options. | ||
258 | */ | ||
259 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
260 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
261 | dccp_send_ack(sk); | ||
262 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
263 | } | ||
264 | 233 | ||
265 | inet_csk_schedule_ack(sk); | 234 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
266 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 235 | } while ((delay = rc) > 0); |
267 | inet_csk(sk)->icsk_rto, | 236 | out: |
268 | DCCP_RTO_MAX); | 237 | finish_wait(sk->sk_sleep, &wait); |
269 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | 238 | return rc; |
270 | } else if (dccp_ack_pending(sk)) { | 239 | |
271 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | 240 | do_error: |
272 | } else { | 241 | rc = -EPIPE; |
273 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; | 242 | goto out; |
274 | } | 243 | do_interrupted: |
275 | 244 | rc = -EINTR; | |
276 | err = dccp_transmit_skb(sk, skb); | 245 | goto out; |
277 | if (err) | ||
278 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); | ||
279 | /* | ||
280 | * Register this one as sent even if an error occurred. To the remote | ||
281 | * end a local packet drop is indistinguishable from network loss, i.e. | ||
282 | * any local drop will eventually be reported via receiver feedback. | ||
283 | */ | ||
284 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | ||
285 | |||
286 | /* | ||
287 | * If the CCID needs to transfer additional header options out-of-band | ||
288 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
289 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
290 | * currently pending header options, thus clearing the backlog. | ||
291 | */ | ||
292 | if (dp->dccps_sync_scheduled) | ||
293 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
294 | } | 246 | } |
295 | 247 | ||
296 | /** | 248 | void dccp_write_xmit(struct sock *sk, int block) |
297 | * dccp_flush_write_queue - Drain queue at end of connection | ||
298 | * Since dccp_sendmsg queues packets without waiting for them to be sent, it may | ||
299 | * happen that the TX queue is not empty at the end of a connection. We give the | ||
300 | * HC-sender CCID a grace period of up to @time_budget jiffies. If this function | ||
301 | * returns with a non-empty write queue, it will be purged later. | ||
302 | */ | ||
303 | void dccp_flush_write_queue(struct sock *sk, long *time_budget) | ||
304 | { | 249 | { |
305 | struct dccp_sock *dp = dccp_sk(sk); | 250 | struct dccp_sock *dp = dccp_sk(sk); |
306 | struct sk_buff *skb; | 251 | struct sk_buff *skb; |
307 | long delay, rc; | ||
308 | |||
309 | while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { | ||
310 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | ||
311 | 252 | ||
312 | switch (ccid_packet_dequeue_eval(rc)) { | 253 | while ((skb = skb_peek(&sk->sk_write_queue))) { |
313 | case CCID_PACKET_WILL_DEQUEUE_LATER: | 254 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
314 | /* | 255 | |
315 | * If the CCID determines when to send, the next sending | 256 | if (err > 0) { |
316 | * time is unknown or the CCID may not even send again | 257 | if (!block) { |
317 | * (e.g. remote host crashes or lost Ack packets). | 258 | sk_reset_timer(sk, &dp->dccps_xmit_timer, |
318 | */ | 259 | msecs_to_jiffies(err)+jiffies); |
319 | DCCP_WARN("CCID did not manage to send all packets\n"); | 260 | break; |
320 | return; | 261 | } else |
321 | case CCID_PACKET_DELAY: | 262 | err = dccp_wait_for_ccid(sk, skb, err); |
322 | delay = msecs_to_jiffies(rc); | 263 | if (err && err != -EINTR) |
323 | if (delay > *time_budget) | 264 | DCCP_BUG("err=%d after dccp_wait_for_ccid", err); |
324 | return; | ||
325 | rc = dccp_wait_for_ccid(sk, delay); | ||
326 | if (rc < 0) | ||
327 | return; | ||
328 | *time_budget -= (delay - rc); | ||
329 | /* check again if we can send now */ | ||
330 | break; | ||
331 | case CCID_PACKET_SEND_AT_ONCE: | ||
332 | dccp_xmit_packet(sk); | ||
333 | break; | ||
334 | case CCID_PACKET_ERR: | ||
335 | skb_dequeue(&sk->sk_write_queue); | ||
336 | kfree_skb(skb); | ||
337 | dccp_pr_debug("packet discarded due to err=%ld\n", rc); | ||
338 | } | 265 | } |
339 | } | ||
340 | } | ||
341 | 266 | ||
342 | void dccp_write_xmit(struct sock *sk) | 267 | skb_dequeue(&sk->sk_write_queue); |
343 | { | 268 | if (err == 0) { |
344 | struct dccp_sock *dp = dccp_sk(sk); | 269 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
345 | struct sk_buff *skb; | 270 | const int len = skb->len; |
346 | 271 | ||
347 | while ((skb = dccp_qpolicy_top(sk))) { | 272 | if (sk->sk_state == DCCP_PARTOPEN) { |
348 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 273 | /* See 8.1.5. Handshake Completion */ |
349 | 274 | inet_csk_schedule_ack(sk); | |
350 | switch (ccid_packet_dequeue_eval(rc)) { | 275 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
351 | case CCID_PACKET_WILL_DEQUEUE_LATER: | 276 | inet_csk(sk)->icsk_rto, |
352 | return; | 277 | DCCP_RTO_MAX); |
353 | case CCID_PACKET_DELAY: | 278 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
354 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | 279 | } else if (dccp_ack_pending(sk)) |
355 | jiffies + msecs_to_jiffies(rc)); | 280 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
356 | return; | 281 | else |
357 | case CCID_PACKET_SEND_AT_ONCE: | 282 | dcb->dccpd_type = DCCP_PKT_DATA; |
358 | dccp_xmit_packet(sk); | 283 | |
359 | break; | 284 | err = dccp_transmit_skb(sk, skb); |
360 | case CCID_PACKET_ERR: | 285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); |
361 | dccp_qpolicy_drop(sk, skb); | 286 | if (err) |
362 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | 287 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", |
288 | err); | ||
289 | } else { | ||
290 | dccp_pr_debug("packet discarded due to err=%d\n", err); | ||
291 | kfree_skb(skb); | ||
363 | } | 292 | } |
364 | } | 293 | } |
365 | } | 294 | } |
@@ -410,12 +339,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
410 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | 339 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; |
411 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; | 340 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; |
412 | 341 | ||
413 | /* Resolve feature dependencies resulting from choice of CCID */ | 342 | if (dccp_insert_options_rsk(dreq, skb)) { |
414 | if (dccp_feat_server_ccid_dependencies(dreq)) | 343 | kfree_skb(skb); |
415 | goto response_failed; | 344 | return NULL; |
416 | 345 | } | |
417 | if (dccp_insert_options_rsk(dreq, skb)) | ||
418 | goto response_failed; | ||
419 | 346 | ||
420 | /* Build and checksum header */ | 347 | /* Build and checksum header */ |
421 | dh = dccp_zeroed_hdr(skb, dccp_header_size); | 348 | dh = dccp_zeroed_hdr(skb, dccp_header_size); |
@@ -436,9 +363,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
436 | inet_rsk(req)->acked = 1; | 363 | inet_rsk(req)->acked = 1; |
437 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | 364 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); |
438 | return skb; | 365 | return skb; |
439 | response_failed: | ||
440 | kfree_skb(skb); | ||
441 | return NULL; | ||
442 | } | 366 | } |
443 | 367 | ||
444 | EXPORT_SYMBOL_GPL(dccp_make_response); | 368 | EXPORT_SYMBOL_GPL(dccp_make_response); |
@@ -523,9 +447,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) | |||
523 | /* | 447 | /* |
524 | * Do all connect socket setups that can be done AF independent. | 448 | * Do all connect socket setups that can be done AF independent. |
525 | */ | 449 | */ |
526 | int dccp_connect(struct sock *sk) | 450 | static inline void dccp_connect_init(struct sock *sk) |
527 | { | 451 | { |
528 | struct sk_buff *skb; | ||
529 | struct dccp_sock *dp = dccp_sk(sk); | 452 | struct dccp_sock *dp = dccp_sk(sk); |
530 | struct dst_entry *dst = __sk_dst_get(sk); | 453 | struct dst_entry *dst = __sk_dst_get(sk); |
531 | struct inet_connection_sock *icsk = inet_csk(sk); | 454 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -535,13 +458,19 @@ int dccp_connect(struct sock *sk) | |||
535 | 458 | ||
536 | dccp_sync_mss(sk, dst_mtu(dst)); | 459 | dccp_sync_mss(sk, dst_mtu(dst)); |
537 | 460 | ||
538 | /* do not connect if feature negotiation setup fails */ | ||
539 | if (dccp_feat_finalise_settings(dccp_sk(sk))) | ||
540 | return -EPROTO; | ||
541 | |||
542 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ | 461 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ |
543 | dp->dccps_gar = dp->dccps_iss; | 462 | dp->dccps_gar = dp->dccps_iss; |
544 | 463 | ||
464 | icsk->icsk_retransmits = 0; | ||
465 | } | ||
466 | |||
467 | int dccp_connect(struct sock *sk) | ||
468 | { | ||
469 | struct sk_buff *skb; | ||
470 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
471 | |||
472 | dccp_connect_init(sk); | ||
473 | |||
545 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); | 474 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); |
546 | if (unlikely(skb == NULL)) | 475 | if (unlikely(skb == NULL)) |
547 | return -ENOBUFS; | 476 | return -ENOBUFS; |
@@ -551,11 +480,11 @@ int dccp_connect(struct sock *sk) | |||
551 | 480 | ||
552 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | 481 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; |
553 | 482 | ||
554 | dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); | 483 | dccp_skb_entail(sk, skb); |
484 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
555 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | 485 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); |
556 | 486 | ||
557 | /* Timer for repeating the REQUEST until an answer. */ | 487 | /* Timer for repeating the REQUEST until an answer. */ |
558 | icsk->icsk_retransmits = 0; | ||
559 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 488 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
560 | icsk->icsk_rto, DCCP_RTO_MAX); | 489 | icsk->icsk_rto, DCCP_RTO_MAX); |
561 | return 0; | 490 | return 0; |
@@ -642,12 +571,6 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, | |||
642 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | 571 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; |
643 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; | 572 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; |
644 | 573 | ||
645 | /* | ||
646 | * Clear the flag in case the Sync was scheduled for out-of-band data, | ||
647 | * such as carrying a long Ack Vector. | ||
648 | */ | ||
649 | dccp_sk(sk)->dccps_sync_scheduled = 0; | ||
650 | |||
651 | dccp_transmit_skb(sk, skb); | 574 | dccp_transmit_skb(sk, skb); |
652 | } | 575 | } |
653 | 576 | ||
@@ -676,7 +599,9 @@ void dccp_send_close(struct sock *sk, const int active) | |||
676 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; | 599 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; |
677 | 600 | ||
678 | if (active) { | 601 | if (active) { |
679 | skb = dccp_skb_entail(sk, skb); | 602 | dccp_write_xmit(sk, 1); |
603 | dccp_skb_entail(sk, skb); | ||
604 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | ||
680 | /* | 605 | /* |
681 | * Retransmission timer for active-close: RFC 4340, 8.3 requires | 606 | * Retransmission timer for active-close: RFC 4340, 8.3 requires |
682 | * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ | 607 | * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ |
@@ -689,6 +614,6 @@ void dccp_send_close(struct sock *sk, const int active) | |||
689 | */ | 614 | */ |
690 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 615 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
691 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); | 616 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); |
692 | } | 617 | } else |
693 | dccp_transmit_skb(sk, skb); | 618 | dccp_transmit_skb(sk, skb); |
694 | } | 619 | } |