aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/output.c')
-rw-r--r--net/dccp/output.c279
1 files changed, 102 insertions, 177 deletions
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2532797a8009..d06945c7d3df 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -26,13 +26,11 @@ static inline void dccp_event_ack_sent(struct sock *sk)
26 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 26 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
27} 27}
28 28
29/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ 29static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
30static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
31{ 30{
32 skb_set_owner_w(skb, sk); 31 skb_set_owner_w(skb, sk);
33 WARN_ON(sk->sk_send_head); 32 WARN_ON(sk->sk_send_head);
34 sk->sk_send_head = skb; 33 sk->sk_send_head = skb;
35 return skb_clone(sk->sk_send_head, gfp_any());
36} 34}
37 35
38/* 36/*
@@ -163,27 +161,21 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
163 struct inet_connection_sock *icsk = inet_csk(sk); 161 struct inet_connection_sock *icsk = inet_csk(sk);
164 struct dccp_sock *dp = dccp_sk(sk); 162 struct dccp_sock *dp = dccp_sk(sk);
165 u32 ccmps = dccp_determine_ccmps(dp); 163 u32 ccmps = dccp_determine_ccmps(dp);
166 u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; 164 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
167 165
168 /* Account for header lengths and IPv4/v6 option overhead */ 166 /* Account for header lengths and IPv4/v6 option overhead */
169 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + 167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
170 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); 168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
171 169
172 /* 170 /*
173 * Leave enough headroom for common DCCP header options. 171 * FIXME: this should come from the CCID infrastructure, where, say,
174 * This only considers options which may appear on DCCP-Data packets, as 172 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
175 * per table 3 in RFC 4340, 5.8. When running out of space for other 173 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
176 * options (eg. Ack Vector which can take up to 255 bytes), it is better 174 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
177 * to schedule a separate Ack. Thus we leave headroom for the following: 175 * make it a multiple of 4
178 * - 1 byte for Slow Receiver (11.6)
179 * - 6 bytes for Timestamp (13.1)
180 * - 10 bytes for Timestamp Echo (13.3)
181 * - 8 bytes for NDP count (7.7, when activated)
182 * - 6 bytes for Data Checksum (9.3)
183 * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
184 */ 176 */
185 cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + 177
186 (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); 178 cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
187 179
188 /* And store cached results */ 180 /* And store cached results */
189 icsk->icsk_pmtu_cookie = pmtu; 181 icsk->icsk_pmtu_cookie = pmtu;
@@ -208,158 +200,95 @@ void dccp_write_space(struct sock *sk)
208} 200}
209 201
210/** 202/**
211 * dccp_wait_for_ccid - Await CCID send permission 203 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
212 * @sk: socket to wait for 204 * @sk: socket to wait for
213 * @delay: timeout in jiffies 205 * @skb: current skb to pass on for waiting
214 * This is used by CCIDs which need to delay the send time in process context. 206 * @delay: sleep timeout in milliseconds (> 0)
207 * This function is called by default when the socket is closed, and
208 * when a non-zero linger time is set on the socket. For consistency
215 */ 209 */
216static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) 210static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
217{ 211{
212 struct dccp_sock *dp = dccp_sk(sk);
218 DEFINE_WAIT(wait); 213 DEFINE_WAIT(wait);
219 long remaining; 214 unsigned long jiffdelay;
220 215 int rc;
221 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
222 sk->sk_write_pending++;
223 release_sock(sk);
224 216
225 remaining = schedule_timeout(delay); 217 do {
226 218 dccp_pr_debug("delayed send by %d msec\n", delay);
227 lock_sock(sk); 219 jiffdelay = msecs_to_jiffies(delay);
228 sk->sk_write_pending--;
229 finish_wait(sk->sk_sleep, &wait);
230 220
231 if (signal_pending(current) || sk->sk_err) 221 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
232 return -1;
233 return remaining;
234}
235
236/**
237 * dccp_xmit_packet - Send data packet under control of CCID
238 * Transmits next-queued payload and informs CCID to account for the packet.
239 */
240static void dccp_xmit_packet(struct sock *sk)
241{
242 int err, len;
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct sk_buff *skb = dccp_qpolicy_pop(sk);
245 222
246 if (unlikely(skb == NULL)) 223 sk->sk_write_pending++;
247 return; 224 release_sock(sk);
248 len = skb->len; 225 schedule_timeout(jiffdelay);
226 lock_sock(sk);
227 sk->sk_write_pending--;
249 228
250 if (sk->sk_state == DCCP_PARTOPEN) { 229 if (sk->sk_err)
251 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; 230 goto do_error;
252 /* 231 if (signal_pending(current))
253 * See 8.1.5 - Handshake Completion. 232 goto do_interrupted;
254 *
255 * For robustness we resend Confirm options until the client has
256 * entered OPEN. During the initial feature negotiation, the MPS
257 * is smaller than usual, reduced by the Change/Confirm options.
258 */
259 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
260 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
261 dccp_send_ack(sk);
262 dccp_feat_list_purge(&dp->dccps_featneg);
263 }
264 233
265 inet_csk_schedule_ack(sk); 234 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
266 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 235 } while ((delay = rc) > 0);
267 inet_csk(sk)->icsk_rto, 236out:
268 DCCP_RTO_MAX); 237 finish_wait(sk->sk_sleep, &wait);
269 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; 238 return rc;
270 } else if (dccp_ack_pending(sk)) { 239
271 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; 240do_error:
272 } else { 241 rc = -EPIPE;
273 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; 242 goto out;
274 } 243do_interrupted:
275 244 rc = -EINTR;
276 err = dccp_transmit_skb(sk, skb); 245 goto out;
277 if (err)
278 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
279 /*
280 * Register this one as sent even if an error occurred. To the remote
281 * end a local packet drop is indistinguishable from network loss, i.e.
282 * any local drop will eventually be reported via receiver feedback.
283 */
284 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
285
286 /*
287 * If the CCID needs to transfer additional header options out-of-band
288 * (e.g. Ack Vectors or feature-negotiation options), it activates this
289 * flag to schedule a Sync. The Sync will automatically incorporate all
290 * currently pending header options, thus clearing the backlog.
291 */
292 if (dp->dccps_sync_scheduled)
293 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
294} 246}
295 247
296/** 248void dccp_write_xmit(struct sock *sk, int block)
297 * dccp_flush_write_queue - Drain queue at end of connection
298 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
299 * happen that the TX queue is not empty at the end of a connection. We give the
300 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
301 * returns with a non-empty write queue, it will be purged later.
302 */
303void dccp_flush_write_queue(struct sock *sk, long *time_budget)
304{ 249{
305 struct dccp_sock *dp = dccp_sk(sk); 250 struct dccp_sock *dp = dccp_sk(sk);
306 struct sk_buff *skb; 251 struct sk_buff *skb;
307 long delay, rc;
308
309 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
310 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
311 252
312 switch (ccid_packet_dequeue_eval(rc)) { 253 while ((skb = skb_peek(&sk->sk_write_queue))) {
313 case CCID_PACKET_WILL_DEQUEUE_LATER: 254 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
314 /* 255
315 * If the CCID determines when to send, the next sending 256 if (err > 0) {
316 * time is unknown or the CCID may not even send again 257 if (!block) {
317 * (e.g. remote host crashes or lost Ack packets). 258 sk_reset_timer(sk, &dp->dccps_xmit_timer,
318 */ 259 msecs_to_jiffies(err)+jiffies);
319 DCCP_WARN("CCID did not manage to send all packets\n"); 260 break;
320 return; 261 } else
321 case CCID_PACKET_DELAY: 262 err = dccp_wait_for_ccid(sk, skb, err);
322 delay = msecs_to_jiffies(rc); 263 if (err && err != -EINTR)
323 if (delay > *time_budget) 264 DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
324 return;
325 rc = dccp_wait_for_ccid(sk, delay);
326 if (rc < 0)
327 return;
328 *time_budget -= (delay - rc);
329 /* check again if we can send now */
330 break;
331 case CCID_PACKET_SEND_AT_ONCE:
332 dccp_xmit_packet(sk);
333 break;
334 case CCID_PACKET_ERR:
335 skb_dequeue(&sk->sk_write_queue);
336 kfree_skb(skb);
337 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
338 } 265 }
339 }
340}
341 266
342void dccp_write_xmit(struct sock *sk) 267 skb_dequeue(&sk->sk_write_queue);
343{ 268 if (err == 0) {
344 struct dccp_sock *dp = dccp_sk(sk); 269 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
345 struct sk_buff *skb; 270 const int len = skb->len;
346 271
347 while ((skb = dccp_qpolicy_top(sk))) { 272 if (sk->sk_state == DCCP_PARTOPEN) {
348 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 273 /* See 8.1.5. Handshake Completion */
349 274 inet_csk_schedule_ack(sk);
350 switch (ccid_packet_dequeue_eval(rc)) { 275 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
351 case CCID_PACKET_WILL_DEQUEUE_LATER: 276 inet_csk(sk)->icsk_rto,
352 return; 277 DCCP_RTO_MAX);
353 case CCID_PACKET_DELAY: 278 dcb->dccpd_type = DCCP_PKT_DATAACK;
354 sk_reset_timer(sk, &dp->dccps_xmit_timer, 279 } else if (dccp_ack_pending(sk))
355 jiffies + msecs_to_jiffies(rc)); 280 dcb->dccpd_type = DCCP_PKT_DATAACK;
356 return; 281 else
357 case CCID_PACKET_SEND_AT_ONCE: 282 dcb->dccpd_type = DCCP_PKT_DATA;
358 dccp_xmit_packet(sk); 283
359 break; 284 err = dccp_transmit_skb(sk, skb);
360 case CCID_PACKET_ERR: 285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
361 dccp_qpolicy_drop(sk, skb); 286 if (err)
362 dccp_pr_debug("packet discarded due to err=%d\n", rc); 287 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
288 err);
289 } else {
290 dccp_pr_debug("packet discarded due to err=%d\n", err);
291 kfree_skb(skb);
363 } 292 }
364 } 293 }
365} 294}
@@ -410,12 +339,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
410 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 339 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
411 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 340 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
412 341
413 /* Resolve feature dependencies resulting from choice of CCID */ 342 if (dccp_insert_options_rsk(dreq, skb)) {
414 if (dccp_feat_server_ccid_dependencies(dreq)) 343 kfree_skb(skb);
415 goto response_failed; 344 return NULL;
416 345 }
417 if (dccp_insert_options_rsk(dreq, skb))
418 goto response_failed;
419 346
420 /* Build and checksum header */ 347 /* Build and checksum header */
421 dh = dccp_zeroed_hdr(skb, dccp_header_size); 348 dh = dccp_zeroed_hdr(skb, dccp_header_size);
@@ -436,9 +363,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
436 inet_rsk(req)->acked = 1; 363 inet_rsk(req)->acked = 1;
437 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 364 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
438 return skb; 365 return skb;
439response_failed:
440 kfree_skb(skb);
441 return NULL;
442} 366}
443 367
444EXPORT_SYMBOL_GPL(dccp_make_response); 368EXPORT_SYMBOL_GPL(dccp_make_response);
@@ -523,9 +447,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
523/* 447/*
524 * Do all connect socket setups that can be done AF independent. 448 * Do all connect socket setups that can be done AF independent.
525 */ 449 */
526int dccp_connect(struct sock *sk) 450static inline void dccp_connect_init(struct sock *sk)
527{ 451{
528 struct sk_buff *skb;
529 struct dccp_sock *dp = dccp_sk(sk); 452 struct dccp_sock *dp = dccp_sk(sk);
530 struct dst_entry *dst = __sk_dst_get(sk); 453 struct dst_entry *dst = __sk_dst_get(sk);
531 struct inet_connection_sock *icsk = inet_csk(sk); 454 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -535,13 +458,19 @@ int dccp_connect(struct sock *sk)
535 458
536 dccp_sync_mss(sk, dst_mtu(dst)); 459 dccp_sync_mss(sk, dst_mtu(dst));
537 460
538 /* do not connect if feature negotiation setup fails */
539 if (dccp_feat_finalise_settings(dccp_sk(sk)))
540 return -EPROTO;
541
542 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ 461 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
543 dp->dccps_gar = dp->dccps_iss; 462 dp->dccps_gar = dp->dccps_iss;
544 463
464 icsk->icsk_retransmits = 0;
465}
466
467int dccp_connect(struct sock *sk)
468{
469 struct sk_buff *skb;
470 struct inet_connection_sock *icsk = inet_csk(sk);
471
472 dccp_connect_init(sk);
473
545 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 474 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
546 if (unlikely(skb == NULL)) 475 if (unlikely(skb == NULL))
547 return -ENOBUFS; 476 return -ENOBUFS;
@@ -551,11 +480,11 @@ int dccp_connect(struct sock *sk)
551 480
552 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 481 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
553 482
554 dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); 483 dccp_skb_entail(sk, skb);
484 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
555 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 485 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
556 486
557 /* Timer for repeating the REQUEST until an answer. */ 487 /* Timer for repeating the REQUEST until an answer. */
558 icsk->icsk_retransmits = 0;
559 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 488 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
560 icsk->icsk_rto, DCCP_RTO_MAX); 489 icsk->icsk_rto, DCCP_RTO_MAX);
561 return 0; 490 return 0;
@@ -642,12 +571,6 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
642 DCCP_SKB_CB(skb)->dccpd_type = pkt_type; 571 DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
643 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; 572 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
644 573
645 /*
646 * Clear the flag in case the Sync was scheduled for out-of-band data,
647 * such as carrying a long Ack Vector.
648 */
649 dccp_sk(sk)->dccps_sync_scheduled = 0;
650
651 dccp_transmit_skb(sk, skb); 574 dccp_transmit_skb(sk, skb);
652} 575}
653 576
@@ -676,7 +599,9 @@ void dccp_send_close(struct sock *sk, const int active)
676 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 599 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
677 600
678 if (active) { 601 if (active) {
679 skb = dccp_skb_entail(sk, skb); 602 dccp_write_xmit(sk, 1);
603 dccp_skb_entail(sk, skb);
604 dccp_transmit_skb(sk, skb_clone(skb, prio));
680 /* 605 /*
681 * Retransmission timer for active-close: RFC 4340, 8.3 requires 606 * Retransmission timer for active-close: RFC 4340, 8.3 requires
682 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ 607 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
@@ -689,6 +614,6 @@ void dccp_send_close(struct sock *sk, const int active)
689 */ 614 */
690 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 615 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
691 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); 616 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
692 } 617 } else
693 dccp_transmit_skb(sk, skb); 618 dccp_transmit_skb(sk, skb);
694} 619}