diff options
author | Christoph Paasch <christoph.paasch@uclouvain.be> | 2013-03-17 04:23:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-03-17 14:35:13 -0400 |
commit | 1a2c6181c4a1922021b4d7df373bba612c3e5f04 (patch) | |
tree | 1346c9b1db83495a98fbcf95f8e521c67fc55cb9 /net/ipv4/tcp_output.c | |
parent | 94d8f2b133c9ff97105adc1233d1a35e16e1e7a6 (diff) |
tcp: Remove TCPCT
TCPCT uses option-number 253, reserved for experimental use and should
not be used in production environments.
Further, TCPCT does not fully implement RFC 6013.
As a nice side-effect, removing TCPCT increases TCP's performance for
very short flows:
Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests
for files of 1KB size.
before this patch:
average (among 7 runs) of 20845.5 Requests/Second
after:
average (among 7 runs) of 21403.6 Requests/Second
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 219 |
1 files changed, 6 insertions, 213 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e7742f0b5d2..ac5871ebe086 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | |||
65 | /* By default, RFC2861 behavior. */ | 65 | /* By default, RFC2861 behavior. */ |
66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
67 | 67 | ||
68 | int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ | ||
69 | EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); | ||
70 | |||
71 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | 68 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
72 | int push_one, gfp_t gfp); | 69 | int push_one, gfp_t gfp); |
73 | 70 | ||
@@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) | |||
386 | #define OPTION_TS (1 << 1) | 383 | #define OPTION_TS (1 << 1) |
387 | #define OPTION_MD5 (1 << 2) | 384 | #define OPTION_MD5 (1 << 2) |
388 | #define OPTION_WSCALE (1 << 3) | 385 | #define OPTION_WSCALE (1 << 3) |
389 | #define OPTION_COOKIE_EXTENSION (1 << 4) | ||
390 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) | 386 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) |
391 | 387 | ||
392 | struct tcp_out_options { | 388 | struct tcp_out_options { |
@@ -400,36 +396,6 @@ struct tcp_out_options { | |||
400 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ | 396 | struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ |
401 | }; | 397 | }; |
402 | 398 | ||
403 | /* The sysctl int routines are generic, so check consistency here. | ||
404 | */ | ||
405 | static u8 tcp_cookie_size_check(u8 desired) | ||
406 | { | ||
407 | int cookie_size; | ||
408 | |||
409 | if (desired > 0) | ||
410 | /* previously specified */ | ||
411 | return desired; | ||
412 | |||
413 | cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); | ||
414 | if (cookie_size <= 0) | ||
415 | /* no default specified */ | ||
416 | return 0; | ||
417 | |||
418 | if (cookie_size <= TCP_COOKIE_MIN) | ||
419 | /* value too small, specify minimum */ | ||
420 | return TCP_COOKIE_MIN; | ||
421 | |||
422 | if (cookie_size >= TCP_COOKIE_MAX) | ||
423 | /* value too large, specify maximum */ | ||
424 | return TCP_COOKIE_MAX; | ||
425 | |||
426 | if (cookie_size & 1) | ||
427 | /* 8-bit multiple, illegal, fix it */ | ||
428 | cookie_size++; | ||
429 | |||
430 | return (u8)cookie_size; | ||
431 | } | ||
432 | |||
433 | /* Write previously computed TCP options to the packet. | 399 | /* Write previously computed TCP options to the packet. |
434 | * | 400 | * |
435 | * Beware: Something in the Internet is very sensitive to the ordering of | 401 | * Beware: Something in the Internet is very sensitive to the ordering of |
@@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
448 | { | 414 | { |
449 | u16 options = opts->options; /* mungable copy */ | 415 | u16 options = opts->options; /* mungable copy */ |
450 | 416 | ||
451 | /* Having both authentication and cookies for security is redundant, | ||
452 | * and there's certainly not enough room. Instead, the cookie-less | ||
453 | * extension variant is proposed. | ||
454 | * | ||
455 | * Consider the pessimal case with authentication. The options | ||
456 | * could look like: | ||
457 | * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 | ||
458 | */ | ||
459 | if (unlikely(OPTION_MD5 & options)) { | 417 | if (unlikely(OPTION_MD5 & options)) { |
460 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | 418 | *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | |
461 | *ptr++ = htonl((TCPOPT_COOKIE << 24) | | 419 | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); |
462 | (TCPOLEN_COOKIE_BASE << 16) | | ||
463 | (TCPOPT_MD5SIG << 8) | | ||
464 | TCPOLEN_MD5SIG); | ||
465 | } else { | ||
466 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
467 | (TCPOPT_NOP << 16) | | ||
468 | (TCPOPT_MD5SIG << 8) | | ||
469 | TCPOLEN_MD5SIG); | ||
470 | } | ||
471 | options &= ~OPTION_COOKIE_EXTENSION; | ||
472 | /* overload cookie hash location */ | 420 | /* overload cookie hash location */ |
473 | opts->hash_location = (__u8 *)ptr; | 421 | opts->hash_location = (__u8 *)ptr; |
474 | ptr += 4; | 422 | ptr += 4; |
@@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
497 | *ptr++ = htonl(opts->tsecr); | 445 | *ptr++ = htonl(opts->tsecr); |
498 | } | 446 | } |
499 | 447 | ||
500 | /* Specification requires after timestamp, so do it now. | ||
501 | * | ||
502 | * Consider the pessimal case without authentication. The options | ||
503 | * could look like: | ||
504 | * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 | ||
505 | */ | ||
506 | if (unlikely(OPTION_COOKIE_EXTENSION & options)) { | ||
507 | __u8 *cookie_copy = opts->hash_location; | ||
508 | u8 cookie_size = opts->hash_size; | ||
509 | |||
510 | /* 8-bit multiple handled in tcp_cookie_size_check() above, | ||
511 | * and elsewhere. | ||
512 | */ | ||
513 | if (0x2 & cookie_size) { | ||
514 | __u8 *p = (__u8 *)ptr; | ||
515 | |||
516 | /* 16-bit multiple */ | ||
517 | *p++ = TCPOPT_COOKIE; | ||
518 | *p++ = TCPOLEN_COOKIE_BASE + cookie_size; | ||
519 | *p++ = *cookie_copy++; | ||
520 | *p++ = *cookie_copy++; | ||
521 | ptr++; | ||
522 | cookie_size -= 2; | ||
523 | } else { | ||
524 | /* 32-bit multiple */ | ||
525 | *ptr++ = htonl(((TCPOPT_NOP << 24) | | ||
526 | (TCPOPT_NOP << 16) | | ||
527 | (TCPOPT_COOKIE << 8) | | ||
528 | TCPOLEN_COOKIE_BASE) + | ||
529 | cookie_size); | ||
530 | } | ||
531 | |||
532 | if (cookie_size > 0) { | ||
533 | memcpy(ptr, cookie_copy, cookie_size); | ||
534 | ptr += (cookie_size / 4); | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { | 448 | if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
539 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 449 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
540 | (TCPOPT_NOP << 16) | | 450 | (TCPOPT_NOP << 16) | |
@@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
593 | struct tcp_md5sig_key **md5) | 503 | struct tcp_md5sig_key **md5) |
594 | { | 504 | { |
595 | struct tcp_sock *tp = tcp_sk(sk); | 505 | struct tcp_sock *tp = tcp_sk(sk); |
596 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
597 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 506 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
598 | u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? | ||
599 | tcp_cookie_size_check(cvp->cookie_desired) : | ||
600 | 0; | ||
601 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; | 507 | struct tcp_fastopen_request *fastopen = tp->fastopen_req; |
602 | 508 | ||
603 | #ifdef CONFIG_TCP_MD5SIG | 509 | #ifdef CONFIG_TCP_MD5SIG |
@@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
649 | tp->syn_fastopen = 1; | 555 | tp->syn_fastopen = 1; |
650 | } | 556 | } |
651 | } | 557 | } |
652 | /* Note that timestamps are required by the specification. | ||
653 | * | ||
654 | * Odd numbers of bytes are prohibited by the specification, ensuring | ||
655 | * that the cookie is 16-bit aligned, and the resulting cookie pair is | ||
656 | * 32-bit aligned. | ||
657 | */ | ||
658 | if (*md5 == NULL && | ||
659 | (OPTION_TS & opts->options) && | ||
660 | cookie_size > 0) { | ||
661 | int need = TCPOLEN_COOKIE_BASE + cookie_size; | ||
662 | |||
663 | if (0x2 & need) { | ||
664 | /* 32-bit multiple */ | ||
665 | need += 2; /* NOPs */ | ||
666 | |||
667 | if (need > remaining) { | ||
668 | /* try shrinking cookie to fit */ | ||
669 | cookie_size -= 2; | ||
670 | need -= 4; | ||
671 | } | ||
672 | } | ||
673 | while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { | ||
674 | cookie_size -= 4; | ||
675 | need -= 4; | ||
676 | } | ||
677 | if (TCP_COOKIE_MIN <= cookie_size) { | ||
678 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
679 | opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; | ||
680 | opts->hash_size = cookie_size; | ||
681 | |||
682 | /* Remember for future incarnations. */ | ||
683 | cvp->cookie_desired = cookie_size; | ||
684 | |||
685 | if (cvp->cookie_desired != cvp->cookie_pair_size) { | ||
686 | /* Currently use random bytes as a nonce, | ||
687 | * assuming these are completely unpredictable | ||
688 | * by hostile users of the same system. | ||
689 | */ | ||
690 | get_random_bytes(&cvp->cookie_pair[0], | ||
691 | cookie_size); | ||
692 | cvp->cookie_pair_size = cookie_size; | ||
693 | } | ||
694 | 558 | ||
695 | remaining -= need; | ||
696 | } | ||
697 | } | ||
698 | return MAX_TCP_OPTION_SPACE - remaining; | 559 | return MAX_TCP_OPTION_SPACE - remaining; |
699 | } | 560 | } |
700 | 561 | ||
@@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
704 | unsigned int mss, struct sk_buff *skb, | 565 | unsigned int mss, struct sk_buff *skb, |
705 | struct tcp_out_options *opts, | 566 | struct tcp_out_options *opts, |
706 | struct tcp_md5sig_key **md5, | 567 | struct tcp_md5sig_key **md5, |
707 | struct tcp_extend_values *xvp, | ||
708 | struct tcp_fastopen_cookie *foc) | 568 | struct tcp_fastopen_cookie *foc) |
709 | { | 569 | { |
710 | struct inet_request_sock *ireq = inet_rsk(req); | 570 | struct inet_request_sock *ireq = inet_rsk(req); |
711 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 571 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
712 | u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? | ||
713 | xvp->cookie_plus : | ||
714 | 0; | ||
715 | 572 | ||
716 | #ifdef CONFIG_TCP_MD5SIG | 573 | #ifdef CONFIG_TCP_MD5SIG |
717 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | 574 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
@@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
759 | remaining -= need; | 616 | remaining -= need; |
760 | } | 617 | } |
761 | } | 618 | } |
762 | /* Similar rationale to tcp_syn_options() applies here, too. | 619 | |
763 | * If the <SYN> options fit, the same options should fit now! | ||
764 | */ | ||
765 | if (*md5 == NULL && | ||
766 | ireq->tstamp_ok && | ||
767 | cookie_plus > TCPOLEN_COOKIE_BASE) { | ||
768 | int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ | ||
769 | |||
770 | if (0x2 & need) { | ||
771 | /* 32-bit multiple */ | ||
772 | need += 2; /* NOPs */ | ||
773 | } | ||
774 | if (need <= remaining) { | ||
775 | opts->options |= OPTION_COOKIE_EXTENSION; | ||
776 | opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; | ||
777 | remaining -= need; | ||
778 | } else { | ||
779 | /* There's no error return, so flag it. */ | ||
780 | xvp->cookie_out_never = 1; /* true */ | ||
781 | opts->hash_size = 0; | ||
782 | } | ||
783 | } | ||
784 | return MAX_TCP_OPTION_SPACE - remaining; | 620 | return MAX_TCP_OPTION_SPACE - remaining; |
785 | } | 621 | } |
786 | 622 | ||
@@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk) | |||
2802 | * sk: listener socket | 2638 | * sk: listener socket |
2803 | * dst: dst entry attached to the SYNACK | 2639 | * dst: dst entry attached to the SYNACK |
2804 | * req: request_sock pointer | 2640 | * req: request_sock pointer |
2805 | * rvp: request_values pointer | ||
2806 | * | 2641 | * |
2807 | * Allocate one skb and build a SYNACK packet. | 2642 | * Allocate one skb and build a SYNACK packet. |
2808 | * @dst is consumed : Caller should not use it again. | 2643 | * @dst is consumed : Caller should not use it again. |
2809 | */ | 2644 | */ |
2810 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2645 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2811 | struct request_sock *req, | 2646 | struct request_sock *req, |
2812 | struct request_values *rvp, | ||
2813 | struct tcp_fastopen_cookie *foc) | 2647 | struct tcp_fastopen_cookie *foc) |
2814 | { | 2648 | { |
2815 | struct tcp_out_options opts; | 2649 | struct tcp_out_options opts; |
2816 | struct tcp_extend_values *xvp = tcp_xv(rvp); | ||
2817 | struct inet_request_sock *ireq = inet_rsk(req); | 2650 | struct inet_request_sock *ireq = inet_rsk(req); |
2818 | struct tcp_sock *tp = tcp_sk(sk); | 2651 | struct tcp_sock *tp = tcp_sk(sk); |
2819 | const struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2820 | struct tcphdr *th; | 2652 | struct tcphdr *th; |
2821 | struct sk_buff *skb; | 2653 | struct sk_buff *skb; |
2822 | struct tcp_md5sig_key *md5; | 2654 | struct tcp_md5sig_key *md5; |
2823 | int tcp_header_size; | 2655 | int tcp_header_size; |
2824 | int mss; | 2656 | int mss; |
2825 | int s_data_desired = 0; | ||
2826 | 2657 | ||
2827 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2658 | skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); |
2828 | s_data_desired = cvp->s_data_desired; | ||
2829 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, | ||
2830 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
2831 | if (unlikely(!skb)) { | 2659 | if (unlikely(!skb)) { |
2832 | dst_release(dst); | 2660 | dst_release(dst); |
2833 | return NULL; | 2661 | return NULL; |
@@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2869 | else | 2697 | else |
2870 | #endif | 2698 | #endif |
2871 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2699 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2872 | tcp_header_size = tcp_synack_options(sk, req, mss, | 2700 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, |
2873 | skb, &opts, &md5, xvp, foc) | 2701 | foc) + sizeof(*th); |
2874 | + sizeof(*th); | ||
2875 | 2702 | ||
2876 | skb_push(skb, tcp_header_size); | 2703 | skb_push(skb, tcp_header_size); |
2877 | skb_reset_transport_header(skb); | 2704 | skb_reset_transport_header(skb); |
@@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2889 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2716 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
2890 | TCPHDR_SYN | TCPHDR_ACK); | 2717 | TCPHDR_SYN | TCPHDR_ACK); |
2891 | 2718 | ||
2892 | if (OPTION_COOKIE_EXTENSION & opts.options) { | ||
2893 | if (s_data_desired) { | ||
2894 | u8 *buf = skb_put(skb, s_data_desired); | ||
2895 | |||
2896 | /* copy data directly from the listening socket. */ | ||
2897 | memcpy(buf, cvp->s_data_payload, s_data_desired); | ||
2898 | TCP_SKB_CB(skb)->end_seq += s_data_desired; | ||
2899 | } | ||
2900 | |||
2901 | if (opts.hash_size > 0) { | ||
2902 | __u32 workspace[SHA_WORKSPACE_WORDS]; | ||
2903 | u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; | ||
2904 | u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; | ||
2905 | |||
2906 | /* Secret recipe depends on the Timestamp, (future) | ||
2907 | * Sequence and Acknowledgment Numbers, Initiator | ||
2908 | * Cookie, and others handled by IP variant caller. | ||
2909 | */ | ||
2910 | *tail-- ^= opts.tsval; | ||
2911 | *tail-- ^= tcp_rsk(req)->rcv_isn + 1; | ||
2912 | *tail-- ^= TCP_SKB_CB(skb)->seq + 1; | ||
2913 | |||
2914 | /* recommended */ | ||
2915 | *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); | ||
2916 | *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ | ||
2917 | |||
2918 | sha_transform((__u32 *)&xvp->cookie_bakery[0], | ||
2919 | (char *)mess, | ||
2920 | &workspace[0]); | ||
2921 | opts.hash_location = | ||
2922 | (__u8 *)&xvp->cookie_bakery[0]; | ||
2923 | } | ||
2924 | } | ||
2925 | |||
2926 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2719 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
2927 | /* XXX data is queued and acked as is. No buffer/window check */ | 2720 | /* XXX data is queued and acked as is. No buffer/window check */ |
2928 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); | 2721 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); |