aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/output.c')
-rw-r--r--net/dccp/output.c279
1 files changed, 177 insertions, 102 deletions
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d06945c7d3df..2532797a8009 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -26,11 +26,13 @@ static inline void dccp_event_ack_sent(struct sock *sk)
26 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 26 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
27} 27}
28 28
29static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) 29/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
30static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
30{ 31{
31 skb_set_owner_w(skb, sk); 32 skb_set_owner_w(skb, sk);
32 WARN_ON(sk->sk_send_head); 33 WARN_ON(sk->sk_send_head);
33 sk->sk_send_head = skb; 34 sk->sk_send_head = skb;
35 return skb_clone(sk->sk_send_head, gfp_any());
34} 36}
35 37
36/* 38/*
@@ -161,21 +163,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
161 struct inet_connection_sock *icsk = inet_csk(sk); 163 struct inet_connection_sock *icsk = inet_csk(sk);
162 struct dccp_sock *dp = dccp_sk(sk); 164 struct dccp_sock *dp = dccp_sk(sk);
163 u32 ccmps = dccp_determine_ccmps(dp); 165 u32 ccmps = dccp_determine_ccmps(dp);
164 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; 166 u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
165 167
166 /* Account for header lengths and IPv4/v6 option overhead */ 168 /* Account for header lengths and IPv4/v6 option overhead */
167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + 169 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); 170 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
169 171
170 /* 172 /*
171 * FIXME: this should come from the CCID infrastructure, where, say, 173 * Leave enough headroom for common DCCP header options.
172 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 174 * This only considers options which may appear on DCCP-Data packets, as
173 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 175 * per table 3 in RFC 4340, 5.8. When running out of space for other
174 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 176 * options (eg. Ack Vector which can take up to 255 bytes), it is better
175 * make it a multiple of 4 177 * to schedule a separate Ack. Thus we leave headroom for the following:
178 * - 1 byte for Slow Receiver (11.6)
179 * - 6 bytes for Timestamp (13.1)
180 * - 10 bytes for Timestamp Echo (13.3)
181 * - 8 bytes for NDP count (7.7, when activated)
182 * - 6 bytes for Data Checksum (9.3)
183 * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
176 */ 184 */
177 185 cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
178 cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 186 (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
179 187
180 /* And store cached results */ 188 /* And store cached results */
181 icsk->icsk_pmtu_cookie = pmtu; 189 icsk->icsk_pmtu_cookie = pmtu;
@@ -200,95 +208,158 @@ void dccp_write_space(struct sock *sk)
200} 208}
201 209
202/** 210/**
203 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 211 * dccp_wait_for_ccid - Await CCID send permission
204 * @sk: socket to wait for 212 * @sk: socket to wait for
205 * @skb: current skb to pass on for waiting 213 * @delay: timeout in jiffies
206 * @delay: sleep timeout in milliseconds (> 0) 214 * This is used by CCIDs which need to delay the send time in process context.
207 * This function is called by default when the socket is closed, and
208 * when a non-zero linger time is set on the socket. For consistency
209 */ 215 */
210static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 216static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
211{ 217{
212 struct dccp_sock *dp = dccp_sk(sk);
213 DEFINE_WAIT(wait); 218 DEFINE_WAIT(wait);
214 unsigned long jiffdelay; 219 long remaining;
215 int rc;
216 220
217 do { 221 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
218 dccp_pr_debug("delayed send by %d msec\n", delay); 222 sk->sk_write_pending++;
219 jiffdelay = msecs_to_jiffies(delay); 223 release_sock(sk);
220 224
221 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 225 remaining = schedule_timeout(delay);
222 226
223 sk->sk_write_pending++; 227 lock_sock(sk);
224 release_sock(sk); 228 sk->sk_write_pending--;
225 schedule_timeout(jiffdelay); 229 finish_wait(sk->sk_sleep, &wait);
226 lock_sock(sk);
227 sk->sk_write_pending--;
228 230
229 if (sk->sk_err) 231 if (signal_pending(current) || sk->sk_err)
230 goto do_error; 232 return -1;
231 if (signal_pending(current)) 233 return remaining;
232 goto do_interrupted; 234}
233 235
234 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 236/**
235 } while ((delay = rc) > 0); 237 * dccp_xmit_packet - Send data packet under control of CCID
236out: 238 * Transmits next-queued payload and informs CCID to account for the packet.
237 finish_wait(sk->sk_sleep, &wait); 239 */
238 return rc; 240static void dccp_xmit_packet(struct sock *sk)
239 241{
240do_error: 242 int err, len;
241 rc = -EPIPE; 243 struct dccp_sock *dp = dccp_sk(sk);
242 goto out; 244 struct sk_buff *skb = dccp_qpolicy_pop(sk);
243do_interrupted: 245
244 rc = -EINTR; 246 if (unlikely(skb == NULL))
245 goto out; 247 return;
248 len = skb->len;
249
250 if (sk->sk_state == DCCP_PARTOPEN) {
251 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
252 /*
253 * See 8.1.5 - Handshake Completion.
254 *
255 * For robustness we resend Confirm options until the client has
256 * entered OPEN. During the initial feature negotiation, the MPS
257 * is smaller than usual, reduced by the Change/Confirm options.
258 */
259 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
260 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
261 dccp_send_ack(sk);
262 dccp_feat_list_purge(&dp->dccps_featneg);
263 }
264
265 inet_csk_schedule_ack(sk);
266 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
267 inet_csk(sk)->icsk_rto,
268 DCCP_RTO_MAX);
269 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
270 } else if (dccp_ack_pending(sk)) {
271 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
272 } else {
273 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
274 }
275
276 err = dccp_transmit_skb(sk, skb);
277 if (err)
278 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
279 /*
280 * Register this one as sent even if an error occurred. To the remote
281 * end a local packet drop is indistinguishable from network loss, i.e.
282 * any local drop will eventually be reported via receiver feedback.
283 */
284 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
285
286 /*
287 * If the CCID needs to transfer additional header options out-of-band
288 * (e.g. Ack Vectors or feature-negotiation options), it activates this
289 * flag to schedule a Sync. The Sync will automatically incorporate all
290 * currently pending header options, thus clearing the backlog.
291 */
292 if (dp->dccps_sync_scheduled)
293 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
246} 294}
247 295
248void dccp_write_xmit(struct sock *sk, int block) 296/**
297 * dccp_flush_write_queue - Drain queue at end of connection
298 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
299 * happen that the TX queue is not empty at the end of a connection. We give the
300 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
301 * returns with a non-empty write queue, it will be purged later.
302 */
303void dccp_flush_write_queue(struct sock *sk, long *time_budget)
249{ 304{
250 struct dccp_sock *dp = dccp_sk(sk); 305 struct dccp_sock *dp = dccp_sk(sk);
251 struct sk_buff *skb; 306 struct sk_buff *skb;
307 long delay, rc;
308
309 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
310 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
252 311
253 while ((skb = skb_peek(&sk->sk_write_queue))) { 312 switch (ccid_packet_dequeue_eval(rc)) {
254 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 313 case CCID_PACKET_WILL_DEQUEUE_LATER:
255 314 /*
256 if (err > 0) { 315 * If the CCID determines when to send, the next sending
257 if (!block) { 316 * time is unknown or the CCID may not even send again
258 sk_reset_timer(sk, &dp->dccps_xmit_timer, 317 * (e.g. remote host crashes or lost Ack packets).
259 msecs_to_jiffies(err)+jiffies); 318 */
260 break; 319 DCCP_WARN("CCID did not manage to send all packets\n");
261 } else 320 return;
262 err = dccp_wait_for_ccid(sk, skb, err); 321 case CCID_PACKET_DELAY:
263 if (err && err != -EINTR) 322 delay = msecs_to_jiffies(rc);
264 DCCP_BUG("err=%d after dccp_wait_for_ccid", err); 323 if (delay > *time_budget)
324 return;
325 rc = dccp_wait_for_ccid(sk, delay);
326 if (rc < 0)
327 return;
328 *time_budget -= (delay - rc);
329 /* check again if we can send now */
330 break;
331 case CCID_PACKET_SEND_AT_ONCE:
332 dccp_xmit_packet(sk);
333 break;
334 case CCID_PACKET_ERR:
335 skb_dequeue(&sk->sk_write_queue);
336 kfree_skb(skb);
337 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
265 } 338 }
339 }
340}
266 341
267 skb_dequeue(&sk->sk_write_queue); 342void dccp_write_xmit(struct sock *sk)
268 if (err == 0) { 343{
269 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 344 struct dccp_sock *dp = dccp_sk(sk);
270 const int len = skb->len; 345 struct sk_buff *skb;
271 346
272 if (sk->sk_state == DCCP_PARTOPEN) { 347 while ((skb = dccp_qpolicy_top(sk))) {
273 /* See 8.1.5. Handshake Completion */ 348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
274 inet_csk_schedule_ack(sk); 349
275 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 350 switch (ccid_packet_dequeue_eval(rc)) {
276 inet_csk(sk)->icsk_rto, 351 case CCID_PACKET_WILL_DEQUEUE_LATER:
277 DCCP_RTO_MAX); 352 return;
278 dcb->dccpd_type = DCCP_PKT_DATAACK; 353 case CCID_PACKET_DELAY:
279 } else if (dccp_ack_pending(sk)) 354 sk_reset_timer(sk, &dp->dccps_xmit_timer,
280 dcb->dccpd_type = DCCP_PKT_DATAACK; 355 jiffies + msecs_to_jiffies(rc));
281 else 356 return;
282 dcb->dccpd_type = DCCP_PKT_DATA; 357 case CCID_PACKET_SEND_AT_ONCE:
283 358 dccp_xmit_packet(sk);
284 err = dccp_transmit_skb(sk, skb); 359 break;
285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 360 case CCID_PACKET_ERR:
286 if (err) 361 dccp_qpolicy_drop(sk, skb);
287 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", 362 dccp_pr_debug("packet discarded due to err=%d\n", rc);
288 err);
289 } else {
290 dccp_pr_debug("packet discarded due to err=%d\n", err);
291 kfree_skb(skb);
292 } 363 }
293 } 364 }
294} 365}
@@ -339,10 +410,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
339 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 410 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
340 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 411 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
341 412
342 if (dccp_insert_options_rsk(dreq, skb)) { 413 /* Resolve feature dependencies resulting from choice of CCID */
343 kfree_skb(skb); 414 if (dccp_feat_server_ccid_dependencies(dreq))
344 return NULL; 415 goto response_failed;
345 } 416
417 if (dccp_insert_options_rsk(dreq, skb))
418 goto response_failed;
346 419
347 /* Build and checksum header */ 420 /* Build and checksum header */
348 dh = dccp_zeroed_hdr(skb, dccp_header_size); 421 dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -363,6 +436,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
363 inet_rsk(req)->acked = 1; 436 inet_rsk(req)->acked = 1;
364 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 437 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
365 return skb; 438 return skb;
439response_failed:
440 kfree_skb(skb);
441 return NULL;
366} 442}
367 443
368EXPORT_SYMBOL_GPL(dccp_make_response); 444EXPORT_SYMBOL_GPL(dccp_make_response);
@@ -447,8 +523,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
447/* 523/*
448 * Do all connect socket setups that can be done AF independent. 524 * Do all connect socket setups that can be done AF independent.
449 */ 525 */
450static inline void dccp_connect_init(struct sock *sk) 526int dccp_connect(struct sock *sk)
451{ 527{
528 struct sk_buff *skb;
452 struct dccp_sock *dp = dccp_sk(sk); 529 struct dccp_sock *dp = dccp_sk(sk);
453 struct dst_entry *dst = __sk_dst_get(sk); 530 struct dst_entry *dst = __sk_dst_get(sk);
454 struct inet_connection_sock *icsk = inet_csk(sk); 531 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -458,19 +535,13 @@ static inline void dccp_connect_init(struct sock *sk)
458 535
459 dccp_sync_mss(sk, dst_mtu(dst)); 536 dccp_sync_mss(sk, dst_mtu(dst));
460 537
538 /* do not connect if feature negotiation setup fails */
539 if (dccp_feat_finalise_settings(dccp_sk(sk)))
540 return -EPROTO;
541
461 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ 542 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
462 dp->dccps_gar = dp->dccps_iss; 543 dp->dccps_gar = dp->dccps_iss;
463 544
464 icsk->icsk_retransmits = 0;
465}
466
467int dccp_connect(struct sock *sk)
468{
469 struct sk_buff *skb;
470 struct inet_connection_sock *icsk = inet_csk(sk);
471
472 dccp_connect_init(sk);
473
474 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 545 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
475 if (unlikely(skb == NULL)) 546 if (unlikely(skb == NULL))
476 return -ENOBUFS; 547 return -ENOBUFS;
@@ -480,11 +551,11 @@ int dccp_connect(struct sock *sk)
480 551
481 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 552 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
482 553
483 dccp_skb_entail(sk, skb); 554 dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
484 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
485 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 555 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
486 556
487 /* Timer for repeating the REQUEST until an answer. */ 557 /* Timer for repeating the REQUEST until an answer. */
558 icsk->icsk_retransmits = 0;
488 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 559 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
489 icsk->icsk_rto, DCCP_RTO_MAX); 560 icsk->icsk_rto, DCCP_RTO_MAX);
490 return 0; 561 return 0;
@@ -571,6 +642,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
571 DCCP_SKB_CB(skb)->dccpd_type = pkt_type; 642 DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
572 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; 643 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
573 644
645 /*
646 * Clear the flag in case the Sync was scheduled for out-of-band data,
647 * such as carrying a long Ack Vector.
648 */
649 dccp_sk(sk)->dccps_sync_scheduled = 0;
650
574 dccp_transmit_skb(sk, skb); 651 dccp_transmit_skb(sk, skb);
575} 652}
576 653
@@ -599,9 +676,7 @@ void dccp_send_close(struct sock *sk, const int active)
599 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 676 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
600 677
601 if (active) { 678 if (active) {
602 dccp_write_xmit(sk, 1); 679 skb = dccp_skb_entail(sk, skb);
603 dccp_skb_entail(sk, skb);
604 dccp_transmit_skb(sk, skb_clone(skb, prio));
605 /* 680 /*
606 * Retransmission timer for active-close: RFC 4340, 8.3 requires 681 * Retransmission timer for active-close: RFC 4340, 8.3 requires
607 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ 682 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
@@ -614,6 +689,6 @@ void dccp_send_close(struct sock *sk, const int active)
614 */ 689 */
615 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 690 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
616 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); 691 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
617 } else 692 }
618 dccp_transmit_skb(sk, skb); 693 dccp_transmit_skb(sk, skb);
619} 694}