aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorChristoph Paasch <christoph.paasch@uclouvain.be>2013-03-17 04:23:34 -0400
committerDavid S. Miller <davem@davemloft.net>2013-03-17 14:35:13 -0400
commit1a2c6181c4a1922021b4d7df373bba612c3e5f04 (patch)
tree1346c9b1db83495a98fbcf95f8e521c67fc55cb9 /net/ipv4/tcp_output.c
parent94d8f2b133c9ff97105adc1233d1a35e16e1e7a6 (diff)
tcp: Remove TCPCT
TCPCT uses option-number 253, reserved for experimental use and should not be used in production environments. Further, TCPCT does not fully implement RFC 6013. As a nice side-effect, removing TCPCT increases TCP's performance for very short flows: Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests for files of 1KB size. before this patch: average (among 7 runs) of 20845.5 Requests/Second after: average (among 7 runs) of 21403.6 Requests/Second Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c219
1 files changed, 6 insertions, 213 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e7742f0b5d2..ac5871ebe086 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp); 69 int push_one, gfp_t gfp);
73 70
@@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
386#define OPTION_TS (1 << 1) 383#define OPTION_TS (1 << 1)
387#define OPTION_MD5 (1 << 2) 384#define OPTION_MD5 (1 << 2)
388#define OPTION_WSCALE (1 << 3) 385#define OPTION_WSCALE (1 << 3)
389#define OPTION_COOKIE_EXTENSION (1 << 4)
390#define OPTION_FAST_OPEN_COOKIE (1 << 8) 386#define OPTION_FAST_OPEN_COOKIE (1 << 8)
391 387
392struct tcp_out_options { 388struct tcp_out_options {
@@ -400,36 +396,6 @@ struct tcp_out_options {
400 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 396 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
401}; 397};
402 398
403/* The sysctl int routines are generic, so check consistency here.
404 */
405static u8 tcp_cookie_size_check(u8 desired)
406{
407 int cookie_size;
408
409 if (desired > 0)
410 /* previously specified */
411 return desired;
412
413 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
414 if (cookie_size <= 0)
415 /* no default specified */
416 return 0;
417
418 if (cookie_size <= TCP_COOKIE_MIN)
419 /* value too small, specify minimum */
420 return TCP_COOKIE_MIN;
421
422 if (cookie_size >= TCP_COOKIE_MAX)
423 /* value too large, specify maximum */
424 return TCP_COOKIE_MAX;
425
426 if (cookie_size & 1)
427 /* 8-bit multiple, illegal, fix it */
428 cookie_size++;
429
430 return (u8)cookie_size;
431}
432
433/* Write previously computed TCP options to the packet. 399/* Write previously computed TCP options to the packet.
434 * 400 *
435 * Beware: Something in the Internet is very sensitive to the ordering of 401 * Beware: Something in the Internet is very sensitive to the ordering of
@@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
448{ 414{
449 u16 options = opts->options; /* mungable copy */ 415 u16 options = opts->options; /* mungable copy */
450 416
451 /* Having both authentication and cookies for security is redundant,
452 * and there's certainly not enough room. Instead, the cookie-less
453 * extension variant is proposed.
454 *
455 * Consider the pessimal case with authentication. The options
456 * could look like:
457 * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
458 */
459 if (unlikely(OPTION_MD5 & options)) { 417 if (unlikely(OPTION_MD5 & options)) {
460 if (unlikely(OPTION_COOKIE_EXTENSION & options)) { 418 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
461 *ptr++ = htonl((TCPOPT_COOKIE << 24) | 419 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
462 (TCPOLEN_COOKIE_BASE << 16) |
463 (TCPOPT_MD5SIG << 8) |
464 TCPOLEN_MD5SIG);
465 } else {
466 *ptr++ = htonl((TCPOPT_NOP << 24) |
467 (TCPOPT_NOP << 16) |
468 (TCPOPT_MD5SIG << 8) |
469 TCPOLEN_MD5SIG);
470 }
471 options &= ~OPTION_COOKIE_EXTENSION;
472 /* overload cookie hash location */ 420 /* overload cookie hash location */
473 opts->hash_location = (__u8 *)ptr; 421 opts->hash_location = (__u8 *)ptr;
474 ptr += 4; 422 ptr += 4;
@@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
497 *ptr++ = htonl(opts->tsecr); 445 *ptr++ = htonl(opts->tsecr);
498 } 446 }
499 447
500 /* Specification requires after timestamp, so do it now.
501 *
502 * Consider the pessimal case without authentication. The options
503 * could look like:
504 * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
505 */
506 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
507 __u8 *cookie_copy = opts->hash_location;
508 u8 cookie_size = opts->hash_size;
509
510 /* 8-bit multiple handled in tcp_cookie_size_check() above,
511 * and elsewhere.
512 */
513 if (0x2 & cookie_size) {
514 __u8 *p = (__u8 *)ptr;
515
516 /* 16-bit multiple */
517 *p++ = TCPOPT_COOKIE;
518 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
519 *p++ = *cookie_copy++;
520 *p++ = *cookie_copy++;
521 ptr++;
522 cookie_size -= 2;
523 } else {
524 /* 32-bit multiple */
525 *ptr++ = htonl(((TCPOPT_NOP << 24) |
526 (TCPOPT_NOP << 16) |
527 (TCPOPT_COOKIE << 8) |
528 TCPOLEN_COOKIE_BASE) +
529 cookie_size);
530 }
531
532 if (cookie_size > 0) {
533 memcpy(ptr, cookie_copy, cookie_size);
534 ptr += (cookie_size / 4);
535 }
536 }
537
538 if (unlikely(OPTION_SACK_ADVERTISE & options)) { 448 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
539 *ptr++ = htonl((TCPOPT_NOP << 24) | 449 *ptr++ = htonl((TCPOPT_NOP << 24) |
540 (TCPOPT_NOP << 16) | 450 (TCPOPT_NOP << 16) |
@@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
593 struct tcp_md5sig_key **md5) 503 struct tcp_md5sig_key **md5)
594{ 504{
595 struct tcp_sock *tp = tcp_sk(sk); 505 struct tcp_sock *tp = tcp_sk(sk);
596 struct tcp_cookie_values *cvp = tp->cookie_values;
597 unsigned int remaining = MAX_TCP_OPTION_SPACE; 506 unsigned int remaining = MAX_TCP_OPTION_SPACE;
598 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
599 tcp_cookie_size_check(cvp->cookie_desired) :
600 0;
601 struct tcp_fastopen_request *fastopen = tp->fastopen_req; 507 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
602 508
603#ifdef CONFIG_TCP_MD5SIG 509#ifdef CONFIG_TCP_MD5SIG
@@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
649 tp->syn_fastopen = 1; 555 tp->syn_fastopen = 1;
650 } 556 }
651 } 557 }
652 /* Note that timestamps are required by the specification.
653 *
654 * Odd numbers of bytes are prohibited by the specification, ensuring
655 * that the cookie is 16-bit aligned, and the resulting cookie pair is
656 * 32-bit aligned.
657 */
658 if (*md5 == NULL &&
659 (OPTION_TS & opts->options) &&
660 cookie_size > 0) {
661 int need = TCPOLEN_COOKIE_BASE + cookie_size;
662
663 if (0x2 & need) {
664 /* 32-bit multiple */
665 need += 2; /* NOPs */
666
667 if (need > remaining) {
668 /* try shrinking cookie to fit */
669 cookie_size -= 2;
670 need -= 4;
671 }
672 }
673 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
674 cookie_size -= 4;
675 need -= 4;
676 }
677 if (TCP_COOKIE_MIN <= cookie_size) {
678 opts->options |= OPTION_COOKIE_EXTENSION;
679 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
680 opts->hash_size = cookie_size;
681
682 /* Remember for future incarnations. */
683 cvp->cookie_desired = cookie_size;
684
685 if (cvp->cookie_desired != cvp->cookie_pair_size) {
686 /* Currently use random bytes as a nonce,
687 * assuming these are completely unpredictable
688 * by hostile users of the same system.
689 */
690 get_random_bytes(&cvp->cookie_pair[0],
691 cookie_size);
692 cvp->cookie_pair_size = cookie_size;
693 }
694 558
695 remaining -= need;
696 }
697 }
698 return MAX_TCP_OPTION_SPACE - remaining; 559 return MAX_TCP_OPTION_SPACE - remaining;
699} 560}
700 561
@@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
704 unsigned int mss, struct sk_buff *skb, 565 unsigned int mss, struct sk_buff *skb,
705 struct tcp_out_options *opts, 566 struct tcp_out_options *opts,
706 struct tcp_md5sig_key **md5, 567 struct tcp_md5sig_key **md5,
707 struct tcp_extend_values *xvp,
708 struct tcp_fastopen_cookie *foc) 568 struct tcp_fastopen_cookie *foc)
709{ 569{
710 struct inet_request_sock *ireq = inet_rsk(req); 570 struct inet_request_sock *ireq = inet_rsk(req);
711 unsigned int remaining = MAX_TCP_OPTION_SPACE; 571 unsigned int remaining = MAX_TCP_OPTION_SPACE;
712 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
713 xvp->cookie_plus :
714 0;
715 572
716#ifdef CONFIG_TCP_MD5SIG 573#ifdef CONFIG_TCP_MD5SIG
717 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 574 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
759 remaining -= need; 616 remaining -= need;
760 } 617 }
761 } 618 }
762 /* Similar rationale to tcp_syn_options() applies here, too. 619
763 * If the <SYN> options fit, the same options should fit now!
764 */
765 if (*md5 == NULL &&
766 ireq->tstamp_ok &&
767 cookie_plus > TCPOLEN_COOKIE_BASE) {
768 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
769
770 if (0x2 & need) {
771 /* 32-bit multiple */
772 need += 2; /* NOPs */
773 }
774 if (need <= remaining) {
775 opts->options |= OPTION_COOKIE_EXTENSION;
776 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
777 remaining -= need;
778 } else {
779 /* There's no error return, so flag it. */
780 xvp->cookie_out_never = 1; /* true */
781 opts->hash_size = 0;
782 }
783 }
784 return MAX_TCP_OPTION_SPACE - remaining; 620 return MAX_TCP_OPTION_SPACE - remaining;
785} 621}
786 622
@@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk)
2802 * sk: listener socket 2638 * sk: listener socket
2803 * dst: dst entry attached to the SYNACK 2639 * dst: dst entry attached to the SYNACK
2804 * req: request_sock pointer 2640 * req: request_sock pointer
2805 * rvp: request_values pointer
2806 * 2641 *
2807 * Allocate one skb and build a SYNACK packet. 2642 * Allocate one skb and build a SYNACK packet.
2808 * @dst is consumed : Caller should not use it again. 2643 * @dst is consumed : Caller should not use it again.
2809 */ 2644 */
2810struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2645struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2811 struct request_sock *req, 2646 struct request_sock *req,
2812 struct request_values *rvp,
2813 struct tcp_fastopen_cookie *foc) 2647 struct tcp_fastopen_cookie *foc)
2814{ 2648{
2815 struct tcp_out_options opts; 2649 struct tcp_out_options opts;
2816 struct tcp_extend_values *xvp = tcp_xv(rvp);
2817 struct inet_request_sock *ireq = inet_rsk(req); 2650 struct inet_request_sock *ireq = inet_rsk(req);
2818 struct tcp_sock *tp = tcp_sk(sk); 2651 struct tcp_sock *tp = tcp_sk(sk);
2819 const struct tcp_cookie_values *cvp = tp->cookie_values;
2820 struct tcphdr *th; 2652 struct tcphdr *th;
2821 struct sk_buff *skb; 2653 struct sk_buff *skb;
2822 struct tcp_md5sig_key *md5; 2654 struct tcp_md5sig_key *md5;
2823 int tcp_header_size; 2655 int tcp_header_size;
2824 int mss; 2656 int mss;
2825 int s_data_desired = 0;
2826 2657
2827 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2658 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
2828 s_data_desired = cvp->s_data_desired;
2829 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2830 sk_gfp_atomic(sk, GFP_ATOMIC));
2831 if (unlikely(!skb)) { 2659 if (unlikely(!skb)) {
2832 dst_release(dst); 2660 dst_release(dst);
2833 return NULL; 2661 return NULL;
@@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2869 else 2697 else
2870#endif 2698#endif
2871 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2699 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2872 tcp_header_size = tcp_synack_options(sk, req, mss, 2700 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2873 skb, &opts, &md5, xvp, foc) 2701 foc) + sizeof(*th);
2874 + sizeof(*th);
2875 2702
2876 skb_push(skb, tcp_header_size); 2703 skb_push(skb, tcp_header_size);
2877 skb_reset_transport_header(skb); 2704 skb_reset_transport_header(skb);
@@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2889 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2716 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2890 TCPHDR_SYN | TCPHDR_ACK); 2717 TCPHDR_SYN | TCPHDR_ACK);
2891 2718
2892 if (OPTION_COOKIE_EXTENSION & opts.options) {
2893 if (s_data_desired) {
2894 u8 *buf = skb_put(skb, s_data_desired);
2895
2896 /* copy data directly from the listening socket. */
2897 memcpy(buf, cvp->s_data_payload, s_data_desired);
2898 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2899 }
2900
2901 if (opts.hash_size > 0) {
2902 __u32 workspace[SHA_WORKSPACE_WORDS];
2903 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2904 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2905
2906 /* Secret recipe depends on the Timestamp, (future)
2907 * Sequence and Acknowledgment Numbers, Initiator
2908 * Cookie, and others handled by IP variant caller.
2909 */
2910 *tail-- ^= opts.tsval;
2911 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2912 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2913
2914 /* recommended */
2915 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2916 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2917
2918 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2919 (char *)mess,
2920 &workspace[0]);
2921 opts.hash_location =
2922 (__u8 *)&xvp->cookie_bakery[0];
2923 }
2924 }
2925
2926 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2719 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2927 /* XXX data is queued and acked as is. No buffer/window check */ 2720 /* XXX data is queued and acked as is. No buffer/window check */
2928 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 2721 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);