aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-21 11:19:50 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-21 11:19:50 -0400
commiteb6a12c2428d21a9f3e0f1a50e927d5fd80fc3d0 (patch)
tree5ac6f43899648abeab1d43aad3107f664e7f13d5 /net/ipv4/tcp_output.c
parentc4762aba0b1f72659aae9ce37b772ca8bd8f06f4 (diff)
parent14b395e35d1afdd8019d11b92e28041fad591b71 (diff)
Merge branch 'linus' into cpus4096-for-linus
Conflicts: net/sunrpc/svc.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c466
1 files changed, 251 insertions, 215 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb4810..1fa683c0ba9b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
347 TCP_SKB_CB(skb)->end_seq = seq; 345 TCP_SKB_CB(skb)->end_seq = seq;
348} 346}
349 347
350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, 348#define OPTION_SACK_ADVERTISE (1 << 0)
351 __u32 tstamp, __u8 **md5_hash) 349#define OPTION_TS (1 << 1)
352{ 350#define OPTION_MD5 (1 << 2)
353 if (tp->rx_opt.tstamp_ok) { 351
352struct tcp_out_options {
353 u8 options; /* bit field of OPTION_* */
354 u8 ws; /* window scale, 0 to disable */
355 u8 num_sack_blocks; /* number of SACK blocks to include */
356 u16 mss; /* 0 to disable */
357 __u32 tsval, tsecr; /* need to include OPTION_TS */
358};
359
360static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
361 const struct tcp_out_options *opts,
362 __u8 **md5_hash) {
363 if (unlikely(OPTION_MD5 & opts->options)) {
364 *ptr++ = htonl((TCPOPT_NOP << 24) |
365 (TCPOPT_NOP << 16) |
366 (TCPOPT_MD5SIG << 8) |
367 TCPOLEN_MD5SIG);
368 *md5_hash = (__u8 *)ptr;
369 ptr += 4;
370 } else {
371 *md5_hash = NULL;
372 }
373
374 if (likely(OPTION_TS & opts->options)) {
375 if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
376 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
377 (TCPOLEN_SACK_PERM << 16) |
378 (TCPOPT_TIMESTAMP << 8) |
379 TCPOLEN_TIMESTAMP);
380 } else {
381 *ptr++ = htonl((TCPOPT_NOP << 24) |
382 (TCPOPT_NOP << 16) |
383 (TCPOPT_TIMESTAMP << 8) |
384 TCPOLEN_TIMESTAMP);
385 }
386 *ptr++ = htonl(opts->tsval);
387 *ptr++ = htonl(opts->tsecr);
388 }
389
390 if (unlikely(opts->mss)) {
391 *ptr++ = htonl((TCPOPT_MSS << 24) |
392 (TCPOLEN_MSS << 16) |
393 opts->mss);
394 }
395
396 if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
397 !(OPTION_TS & opts->options))) {
354 *ptr++ = htonl((TCPOPT_NOP << 24) | 398 *ptr++ = htonl((TCPOPT_NOP << 24) |
355 (TCPOPT_NOP << 16) | 399 (TCPOPT_NOP << 16) |
356 (TCPOPT_TIMESTAMP << 8) | 400 (TCPOPT_SACK_PERM << 8) |
357 TCPOLEN_TIMESTAMP); 401 TCPOLEN_SACK_PERM);
358 *ptr++ = htonl(tstamp); 402 }
359 *ptr++ = htonl(tp->rx_opt.ts_recent); 403
404 if (unlikely(opts->ws)) {
405 *ptr++ = htonl((TCPOPT_NOP << 24) |
406 (TCPOPT_WINDOW << 16) |
407 (TCPOLEN_WINDOW << 8) |
408 opts->ws);
360 } 409 }
361 if (tp->rx_opt.eff_sacks) { 410
362 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; 411 if (unlikely(opts->num_sack_blocks)) {
412 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
413 tp->duplicate_sack : tp->selective_acks;
363 int this_sack; 414 int this_sack;
364 415
365 *ptr++ = htonl((TCPOPT_NOP << 24) | 416 *ptr++ = htonl((TCPOPT_NOP << 24) |
366 (TCPOPT_NOP << 16) | 417 (TCPOPT_NOP << 16) |
367 (TCPOPT_SACK << 8) | 418 (TCPOPT_SACK << 8) |
368 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * 419 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
369 TCPOLEN_SACK_PERBLOCK))); 420 TCPOLEN_SACK_PERBLOCK)));
370 421
371 for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { 422 for (this_sack = 0; this_sack < opts->num_sack_blocks;
423 ++this_sack) {
372 *ptr++ = htonl(sp[this_sack].start_seq); 424 *ptr++ = htonl(sp[this_sack].start_seq);
373 *ptr++ = htonl(sp[this_sack].end_seq); 425 *ptr++ = htonl(sp[this_sack].end_seq);
374 } 426 }
@@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
378 tp->rx_opt.eff_sacks--; 430 tp->rx_opt.eff_sacks--;
379 } 431 }
380 } 432 }
433}
434
435static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
436 struct tcp_out_options *opts,
437 struct tcp_md5sig_key **md5) {
438 struct tcp_sock *tp = tcp_sk(sk);
439 unsigned size = 0;
440
381#ifdef CONFIG_TCP_MD5SIG 441#ifdef CONFIG_TCP_MD5SIG
382 if (md5_hash) { 442 *md5 = tp->af_specific->md5_lookup(sk, sk);
383 *ptr++ = htonl((TCPOPT_NOP << 24) | 443 if (*md5) {
384 (TCPOPT_NOP << 16) | 444 opts->options |= OPTION_MD5;
385 (TCPOPT_MD5SIG << 8) | 445 size += TCPOLEN_MD5SIG_ALIGNED;
386 TCPOLEN_MD5SIG);
387 *md5_hash = (__u8 *)ptr;
388 } 446 }
447#else
448 *md5 = NULL;
389#endif 449#endif
450
451 /* We always get an MSS option. The option bytes which will be seen in
452 * normal data packets should timestamps be used, must be in the MSS
453 * advertised. But we subtract them from tp->mss_cache so that
454 * calculations in tcp_sendmsg are simpler etc. So account for this
455 * fact here if necessary. If we don't do this correctly, as a
456 * receiver we won't recognize data packets as being full sized when we
457 * should, and thus we won't abide by the delayed ACK rules correctly.
458 * SACKs don't matter, we never delay an ACK when we have any of those
459 * going out. */
460 opts->mss = tcp_advertise_mss(sk);
461 size += TCPOLEN_MSS_ALIGNED;
462
463 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
464 opts->options |= OPTION_TS;
465 opts->tsval = TCP_SKB_CB(skb)->when;
466 opts->tsecr = tp->rx_opt.ts_recent;
467 size += TCPOLEN_TSTAMP_ALIGNED;
468 }
469 if (likely(sysctl_tcp_window_scaling)) {
470 opts->ws = tp->rx_opt.rcv_wscale;
471 size += TCPOLEN_WSCALE_ALIGNED;
472 }
473 if (likely(sysctl_tcp_sack)) {
474 opts->options |= OPTION_SACK_ADVERTISE;
475 if (unlikely(!OPTION_TS & opts->options))
476 size += TCPOLEN_SACKPERM_ALIGNED;
477 }
478
479 return size;
390} 480}
391 481
392/* Construct a tcp options header for a SYN or SYN_ACK packet. 482static unsigned tcp_synack_options(struct sock *sk,
393 * If this is every changed make sure to change the definition of 483 struct request_sock *req,
394 * MAX_SYN_SIZE to match the new maximum number of options that you 484 unsigned mss, struct sk_buff *skb,
395 * can generate. 485 struct tcp_out_options *opts,
396 * 486 struct tcp_md5sig_key **md5) {
397 * Note - that with the RFC2385 TCP option, we make room for the 487 unsigned size = 0;
398 * 16 byte MD5 hash. This will be filled in later, so the pointer for the 488 struct inet_request_sock *ireq = inet_rsk(req);
399 * location to be filled is passed back up. 489 char doing_ts;
400 */ 490
401static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
402 int offer_wscale, int wscale, __u32 tstamp,
403 __u32 ts_recent, __u8 **md5_hash)
404{
405 /* We always get an MSS option.
406 * The option bytes which will be seen in normal data
407 * packets should timestamps be used, must be in the MSS
408 * advertised. But we subtract them from tp->mss_cache so
409 * that calculations in tcp_sendmsg are simpler etc.
410 * So account for this fact here if necessary. If we
411 * don't do this correctly, as a receiver we won't
412 * recognize data packets as being full sized when we
413 * should, and thus we won't abide by the delayed ACK
414 * rules correctly.
415 * SACKs don't matter, we never delay an ACK when we
416 * have any of those going out.
417 */
418 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
419 if (ts) {
420 if (sack)
421 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
422 (TCPOLEN_SACK_PERM << 16) |
423 (TCPOPT_TIMESTAMP << 8) |
424 TCPOLEN_TIMESTAMP);
425 else
426 *ptr++ = htonl((TCPOPT_NOP << 24) |
427 (TCPOPT_NOP << 16) |
428 (TCPOPT_TIMESTAMP << 8) |
429 TCPOLEN_TIMESTAMP);
430 *ptr++ = htonl(tstamp); /* TSVAL */
431 *ptr++ = htonl(ts_recent); /* TSECR */
432 } else if (sack)
433 *ptr++ = htonl((TCPOPT_NOP << 24) |
434 (TCPOPT_NOP << 16) |
435 (TCPOPT_SACK_PERM << 8) |
436 TCPOLEN_SACK_PERM);
437 if (offer_wscale)
438 *ptr++ = htonl((TCPOPT_NOP << 24) |
439 (TCPOPT_WINDOW << 16) |
440 (TCPOLEN_WINDOW << 8) |
441 (wscale));
442#ifdef CONFIG_TCP_MD5SIG 491#ifdef CONFIG_TCP_MD5SIG
443 /* 492 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
444 * If MD5 is enabled, then we set the option, and include the size 493 if (*md5) {
445 * (always 18). The actual MD5 hash is added just before the 494 opts->options |= OPTION_MD5;
446 * packet is sent. 495 size += TCPOLEN_MD5SIG_ALIGNED;
447 */
448 if (md5_hash) {
449 *ptr++ = htonl((TCPOPT_NOP << 24) |
450 (TCPOPT_NOP << 16) |
451 (TCPOPT_MD5SIG << 8) |
452 TCPOLEN_MD5SIG);
453 *md5_hash = (__u8 *)ptr;
454 } 496 }
497#else
498 *md5 = NULL;
455#endif 499#endif
500
501 /* we can't fit any SACK blocks in a packet with MD5 + TS
502 options. There was discussion about disabling SACK rather than TS in
503 order to fit in better with old, buggy kernels, but that was deemed
504 to be unnecessary. */
505 doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
506
507 opts->mss = mss;
508 size += TCPOLEN_MSS_ALIGNED;
509
510 if (likely(ireq->wscale_ok)) {
511 opts->ws = ireq->rcv_wscale;
512 size += TCPOLEN_WSCALE_ALIGNED;
513 }
514 if (likely(doing_ts)) {
515 opts->options |= OPTION_TS;
516 opts->tsval = TCP_SKB_CB(skb)->when;
517 opts->tsecr = req->ts_recent;
518 size += TCPOLEN_TSTAMP_ALIGNED;
519 }
520 if (likely(ireq->sack_ok)) {
521 opts->options |= OPTION_SACK_ADVERTISE;
522 if (unlikely(!doing_ts))
523 size += TCPOLEN_SACKPERM_ALIGNED;
524 }
525
526 return size;
527}
528
529static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
530 struct tcp_out_options *opts,
531 struct tcp_md5sig_key **md5) {
532 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
533 struct tcp_sock *tp = tcp_sk(sk);
534 unsigned size = 0;
535
536#ifdef CONFIG_TCP_MD5SIG
537 *md5 = tp->af_specific->md5_lookup(sk, sk);
538 if (unlikely(*md5)) {
539 opts->options |= OPTION_MD5;
540 size += TCPOLEN_MD5SIG_ALIGNED;
541 }
542#else
543 *md5 = NULL;
544#endif
545
546 if (likely(tp->rx_opt.tstamp_ok)) {
547 opts->options |= OPTION_TS;
548 opts->tsval = tcb ? tcb->when : 0;
549 opts->tsecr = tp->rx_opt.ts_recent;
550 size += TCPOLEN_TSTAMP_ALIGNED;
551 }
552
553 if (unlikely(tp->rx_opt.eff_sacks)) {
554 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
555 opts->num_sack_blocks =
556 min_t(unsigned, tp->rx_opt.eff_sacks,
557 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
558 TCPOLEN_SACK_PERBLOCK);
559 size += TCPOLEN_SACK_BASE_ALIGNED +
560 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
561 }
562
563 return size;
456} 564}
457 565
458/* This routine actually transmits TCP packets queued in by 566/* This routine actually transmits TCP packets queued in by
@@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
473 struct inet_sock *inet; 581 struct inet_sock *inet;
474 struct tcp_sock *tp; 582 struct tcp_sock *tp;
475 struct tcp_skb_cb *tcb; 583 struct tcp_skb_cb *tcb;
476 int tcp_header_size; 584 struct tcp_out_options opts;
477#ifdef CONFIG_TCP_MD5SIG 585 unsigned tcp_options_size, tcp_header_size;
478 struct tcp_md5sig_key *md5; 586 struct tcp_md5sig_key *md5;
479 __u8 *md5_hash_location; 587 __u8 *md5_hash_location;
480#endif
481 struct tcphdr *th; 588 struct tcphdr *th;
482 int sysctl_flags;
483 int err; 589 int err;
484 590
485 BUG_ON(!skb || !tcp_skb_pcount(skb)); 591 BUG_ON(!skb || !tcp_skb_pcount(skb));
@@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
502 inet = inet_sk(sk); 608 inet = inet_sk(sk);
503 tp = tcp_sk(sk); 609 tp = tcp_sk(sk);
504 tcb = TCP_SKB_CB(skb); 610 tcb = TCP_SKB_CB(skb);
505 tcp_header_size = tp->tcp_header_len; 611 memset(&opts, 0, sizeof(opts));
506
507#define SYSCTL_FLAG_TSTAMPS 0x1
508#define SYSCTL_FLAG_WSCALE 0x2
509#define SYSCTL_FLAG_SACK 0x4
510 612
511 sysctl_flags = 0; 613 if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
512 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 614 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
513 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; 615 else
514 if (sysctl_tcp_timestamps) { 616 tcp_options_size = tcp_established_options(sk, skb, &opts,
515 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; 617 &md5);
516 sysctl_flags |= SYSCTL_FLAG_TSTAMPS; 618 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
517 }
518 if (sysctl_tcp_window_scaling) {
519 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
520 sysctl_flags |= SYSCTL_FLAG_WSCALE;
521 }
522 if (sysctl_tcp_sack) {
523 sysctl_flags |= SYSCTL_FLAG_SACK;
524 if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
525 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
526 }
527 } else if (unlikely(tp->rx_opt.eff_sacks)) {
528 /* A SACK is 2 pad bytes, a 2 byte header, plus
529 * 2 32-bit sequence numbers for each SACK block.
530 */
531 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
532 (tp->rx_opt.eff_sacks *
533 TCPOLEN_SACK_PERBLOCK));
534 }
535 619
536 if (tcp_packets_in_flight(tp) == 0) 620 if (tcp_packets_in_flight(tp) == 0)
537 tcp_ca_event(sk, CA_EVENT_TX_START); 621 tcp_ca_event(sk, CA_EVENT_TX_START);
538 622
539#ifdef CONFIG_TCP_MD5SIG
540 /*
541 * Are we doing MD5 on this segment? If so - make
542 * room for it.
543 */
544 md5 = tp->af_specific->md5_lookup(sk, sk);
545 if (md5)
546 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
547#endif
548
549 skb_push(skb, tcp_header_size); 623 skb_push(skb, tcp_header_size);
550 skb_reset_transport_header(skb); 624 skb_reset_transport_header(skb);
551 skb_set_owner_w(skb, sk); 625 skb_set_owner_w(skb, sk);
@@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
576 th->urg = 1; 650 th->urg = 1;
577 } 651 }
578 652
579 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 653 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
580 tcp_syn_build_options((__be32 *)(th + 1), 654 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
581 tcp_advertise_mss(sk),
582 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
583 (sysctl_flags & SYSCTL_FLAG_SACK),
584 (sysctl_flags & SYSCTL_FLAG_WSCALE),
585 tp->rx_opt.rcv_wscale,
586 tcb->when,
587 tp->rx_opt.ts_recent,
588
589#ifdef CONFIG_TCP_MD5SIG
590 md5 ? &md5_hash_location :
591#endif
592 NULL);
593 } else {
594 tcp_build_and_update_options((__be32 *)(th + 1),
595 tp, tcb->when,
596#ifdef CONFIG_TCP_MD5SIG
597 md5 ? &md5_hash_location :
598#endif
599 NULL);
600 TCP_ECN_send(sk, skb, tcp_header_size); 655 TCP_ECN_send(sk, skb, tcp_header_size);
601 }
602 656
603#ifdef CONFIG_TCP_MD5SIG 657#ifdef CONFIG_TCP_MD5SIG
604 /* Calculate the MD5 hash, as we have all we need now */ 658 /* Calculate the MD5 hash, as we have all we need now */
605 if (md5) { 659 if (md5) {
660 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
606 tp->af_specific->calc_md5_hash(md5_hash_location, 661 tp->af_specific->calc_md5_hash(md5_hash_location,
607 md5, 662 md5, sk, NULL, skb);
608 sk, NULL, NULL,
609 tcp_hdr(skb),
610 sk->sk_protocol,
611 skb->len);
612 } 663 }
613#endif 664#endif
614 665
@@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
621 tcp_event_data_sent(tp, skb, sk); 672 tcp_event_data_sent(tp, skb, sk);
622 673
623 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 674 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
624 TCP_INC_STATS(TCP_MIB_OUTSEGS); 675 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
625 676
626 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 677 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
627 if (likely(err <= 0)) 678 if (likely(err <= 0))
@@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
630 tcp_enter_cwr(sk, 1); 681 tcp_enter_cwr(sk, 1);
631 682
632 return net_xmit_eval(err); 683 return net_xmit_eval(err);
633
634#undef SYSCTL_FLAG_TSTAMPS
635#undef SYSCTL_FLAG_WSCALE
636#undef SYSCTL_FLAG_SACK
637} 684}
638 685
639/* This routine just queue's the buffer 686/* This routine just queue's the buffer
@@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
974 u32 mss_now; 1021 u32 mss_now;
975 u16 xmit_size_goal; 1022 u16 xmit_size_goal;
976 int doing_tso = 0; 1023 int doing_tso = 0;
1024 unsigned header_len;
1025 struct tcp_out_options opts;
1026 struct tcp_md5sig_key *md5;
977 1027
978 mss_now = tp->mss_cache; 1028 mss_now = tp->mss_cache;
979 1029
@@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
986 mss_now = tcp_sync_mss(sk, mtu); 1036 mss_now = tcp_sync_mss(sk, mtu);
987 } 1037 }
988 1038
989 if (tp->rx_opt.eff_sacks) 1039 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
990 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 1040 sizeof(struct tcphdr);
991 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); 1041 /* The mss_cache is sized based on tp->tcp_header_len, which assumes
992 1042 * some common options. If this is an odd packet (because we have SACK
993#ifdef CONFIG_TCP_MD5SIG 1043 * blocks etc) then our calculated header_len will be different, and
994 if (tp->af_specific->md5_lookup(sk, sk)) 1044 * we have to adjust mss_now correspondingly */
995 mss_now -= TCPOLEN_MD5SIG_ALIGNED; 1045 if (header_len != tp->tcp_header_len) {
996#endif 1046 int delta = (int) header_len - tp->tcp_header_len;
1047 mss_now -= delta;
1048 }
997 1049
998 xmit_size_goal = mss_now; 1050 xmit_size_goal = mss_now;
999 1051
@@ -1913,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1913 1965
1914 if (err == 0) { 1966 if (err == 0) {
1915 /* Update global TCP statistics. */ 1967 /* Update global TCP statistics. */
1916 TCP_INC_STATS(TCP_MIB_RETRANSSEGS); 1968 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
1917 1969
1918 tp->total_retrans++; 1970 tp->total_retrans++;
1919 1971
@@ -1988,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1988 2040
1989 if (sacked & TCPCB_LOST) { 2041 if (sacked & TCPCB_LOST) {
1990 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 2042 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
2043 int mib_idx;
2044
1991 if (tcp_retransmit_skb(sk, skb)) { 2045 if (tcp_retransmit_skb(sk, skb)) {
1992 tp->retransmit_skb_hint = NULL; 2046 tp->retransmit_skb_hint = NULL;
1993 return; 2047 return;
1994 } 2048 }
1995 if (icsk->icsk_ca_state != TCP_CA_Loss) 2049 if (icsk->icsk_ca_state != TCP_CA_Loss)
1996 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 2050 mib_idx = LINUX_MIB_TCPFASTRETRANS;
1997 else 2051 else
1998 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); 2052 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2053 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1999 2054
2000 if (skb == tcp_write_queue_head(sk)) 2055 if (skb == tcp_write_queue_head(sk))
2001 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2056 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2065,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2065 inet_csk(sk)->icsk_rto, 2120 inet_csk(sk)->icsk_rto,
2066 TCP_RTO_MAX); 2121 TCP_RTO_MAX);
2067 2122
2068 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); 2123 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
2069 } 2124 }
2070} 2125}
2071 2126
@@ -2119,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2119 /* NOTE: No TCP options attached and we never retransmit this. */ 2174 /* NOTE: No TCP options attached and we never retransmit this. */
2120 skb = alloc_skb(MAX_TCP_HEADER, priority); 2175 skb = alloc_skb(MAX_TCP_HEADER, priority);
2121 if (!skb) { 2176 if (!skb) {
2122 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2177 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2123 return; 2178 return;
2124 } 2179 }
2125 2180
@@ -2130,9 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2130 /* Send it off. */ 2185 /* Send it off. */
2131 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2186 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2132 if (tcp_transmit_skb(sk, skb, 0, priority)) 2187 if (tcp_transmit_skb(sk, skb, 0, priority))
2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2188 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2134 2189
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS); 2190 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2136} 2191}
2137 2192
2138/* WARNING: This routine must only be called when we have already sent 2193/* WARNING: This routine must only be called when we have already sent
@@ -2180,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2180 struct tcp_sock *tp = tcp_sk(sk); 2235 struct tcp_sock *tp = tcp_sk(sk);
2181 struct tcphdr *th; 2236 struct tcphdr *th;
2182 int tcp_header_size; 2237 int tcp_header_size;
2238 struct tcp_out_options opts;
2183 struct sk_buff *skb; 2239 struct sk_buff *skb;
2184#ifdef CONFIG_TCP_MD5SIG
2185 struct tcp_md5sig_key *md5; 2240 struct tcp_md5sig_key *md5;
2186 __u8 *md5_hash_location; 2241 __u8 *md5_hash_location;
2187#endif
2188 2242
2189 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); 2243 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
2190 if (skb == NULL) 2244 if (skb == NULL)
@@ -2195,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2195 2249
2196 skb->dst = dst_clone(dst); 2250 skb->dst = dst_clone(dst);
2197 2251
2198 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 2252 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
2199 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 2253 __u8 rcv_wscale;
2200 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 2254 /* Set this up on the first call only */
2201 /* SACK_PERM is in the place of NOP NOP of TS */ 2255 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2202 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 2256 /* tcp_full_space because it is guaranteed to be the first packet */
2257 tcp_select_initial_window(tcp_full_space(sk),
2258 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2259 &req->rcv_wnd,
2260 &req->window_clamp,
2261 ireq->wscale_ok,
2262 &rcv_wscale);
2263 ireq->rcv_wscale = rcv_wscale;
2264 }
2265
2266 memset(&opts, 0, sizeof(opts));
2267 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2268 tcp_header_size = tcp_synack_options(sk, req,
2269 dst_metric(dst, RTAX_ADVMSS),
2270 skb, &opts, &md5) +
2271 sizeof(struct tcphdr);
2203 2272
2204#ifdef CONFIG_TCP_MD5SIG
2205 /* Are we doing MD5 on this segment? If so - make room for it */
2206 md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
2207 if (md5)
2208 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
2209#endif
2210 skb_push(skb, tcp_header_size); 2273 skb_push(skb, tcp_header_size);
2211 skb_reset_transport_header(skb); 2274 skb_reset_transport_header(skb);
2212 2275
@@ -2224,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2224 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); 2287 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2225 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2288 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2226 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2289 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2227 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
2228 __u8 rcv_wscale;
2229 /* Set this up on the first call only */
2230 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2231 /* tcp_full_space because it is guaranteed to be the first packet */
2232 tcp_select_initial_window(tcp_full_space(sk),
2233 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2234 &req->rcv_wnd,
2235 &req->window_clamp,
2236 ireq->wscale_ok,
2237 &rcv_wscale);
2238 ireq->rcv_wscale = rcv_wscale;
2239 }
2240 2290
2241 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 2291 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
2242 th->window = htons(min(req->rcv_wnd, 65535U)); 2292 th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2245,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2245 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); 2295 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2246 else 2296 else
2247#endif 2297#endif
2248 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2298 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
2249 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
2250 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
2251 TCP_SKB_CB(skb)->when,
2252 req->ts_recent,
2253 (
2254#ifdef CONFIG_TCP_MD5SIG
2255 md5 ? &md5_hash_location :
2256#endif
2257 NULL)
2258 );
2259
2260 th->doff = (tcp_header_size >> 2); 2299 th->doff = (tcp_header_size >> 2);
2261 TCP_INC_STATS(TCP_MIB_OUTSEGS); 2300 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
2262 2301
2263#ifdef CONFIG_TCP_MD5SIG 2302#ifdef CONFIG_TCP_MD5SIG
2264 /* Okay, we have all we need - do the md5 hash if needed */ 2303 /* Okay, we have all we need - do the md5 hash if needed */
2265 if (md5) { 2304 if (md5) {
2266 tp->af_specific->calc_md5_hash(md5_hash_location, 2305 tp->af_specific->calc_md5_hash(md5_hash_location,
2267 md5, 2306 md5, NULL, req, skb);
2268 NULL, dst, req,
2269 tcp_hdr(skb), sk->sk_protocol,
2270 skb->len);
2271 } 2307 }
2272#endif 2308#endif
2273 2309
@@ -2367,7 +2403,7 @@ int tcp_connect(struct sock *sk)
2367 */ 2403 */
2368 tp->snd_nxt = tp->write_seq; 2404 tp->snd_nxt = tp->write_seq;
2369 tp->pushed_seq = tp->write_seq; 2405 tp->pushed_seq = tp->write_seq;
2370 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); 2406 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2371 2407
2372 /* Timer for repeating the SYN until an answer. */ 2408 /* Timer for repeating the SYN until an answer. */
2373 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2409 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,