diff options
| -rw-r--r-- | include/net/tcp.h | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 432 |
2 files changed, 238 insertions, 196 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 31f5bbfc59bc..8983386356a5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
| @@ -50,6 +50,7 @@ extern atomic_t tcp_orphan_count; | |||
| 50 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 50 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
| 51 | 51 | ||
| 52 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 52 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
| 53 | #define MAX_TCP_OPTION_SPACE 40 | ||
| 53 | 54 | ||
| 54 | /* | 55 | /* |
| 55 | * Never offer a window over 32767 without using window scaling. Some | 56 | * Never offer a window over 32767 without using window scaling. Some |
| @@ -184,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
| 184 | #define TCPOLEN_SACK_BASE_ALIGNED 4 | 185 | #define TCPOLEN_SACK_BASE_ALIGNED 4 |
| 185 | #define TCPOLEN_SACK_PERBLOCK 8 | 186 | #define TCPOLEN_SACK_PERBLOCK 8 |
| 186 | #define TCPOLEN_MD5SIG_ALIGNED 20 | 187 | #define TCPOLEN_MD5SIG_ALIGNED 20 |
| 188 | #define TCPOLEN_MSS_ALIGNED 4 | ||
| 187 | 189 | ||
| 188 | /* Flags in tp->nonagle */ | 190 | /* Flags in tp->nonagle */ |
| 189 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ | 191 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 958ff486165f..1fa683c0ba9b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -345,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
| 345 | TCP_SKB_CB(skb)->end_seq = seq; | 345 | TCP_SKB_CB(skb)->end_seq = seq; |
| 346 | } | 346 | } |
| 347 | 347 | ||
| 348 | static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, | 348 | #define OPTION_SACK_ADVERTISE (1 << 0) |
| 349 | __u32 tstamp, __u8 **md5_hash) | 349 | #define OPTION_TS (1 << 1) |
| 350 | { | 350 | #define OPTION_MD5 (1 << 2) |
| 351 | if (tp->rx_opt.tstamp_ok) { | 351 | |
| 352 | struct tcp_out_options { | ||
| 353 | u8 options; /* bit field of OPTION_* */ | ||
| 354 | u8 ws; /* window scale, 0 to disable */ | ||
| 355 | u8 num_sack_blocks; /* number of SACK blocks to include */ | ||
| 356 | u16 mss; /* 0 to disable */ | ||
| 357 | __u32 tsval, tsecr; /* need to include OPTION_TS */ | ||
| 358 | }; | ||
| 359 | |||
| 360 | static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | ||
| 361 | const struct tcp_out_options *opts, | ||
| 362 | __u8 **md5_hash) { | ||
| 363 | if (unlikely(OPTION_MD5 & opts->options)) { | ||
| 352 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 364 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
| 353 | (TCPOPT_NOP << 16) | | 365 | (TCPOPT_NOP << 16) | |
| 354 | (TCPOPT_TIMESTAMP << 8) | | 366 | (TCPOPT_MD5SIG << 8) | |
| 355 | TCPOLEN_TIMESTAMP); | 367 | TCPOLEN_MD5SIG); |
| 356 | *ptr++ = htonl(tstamp); | 368 | *md5_hash = (__u8 *)ptr; |
| 357 | *ptr++ = htonl(tp->rx_opt.ts_recent); | 369 | ptr += 4; |
| 370 | } else { | ||
| 371 | *md5_hash = NULL; | ||
| 358 | } | 372 | } |
| 359 | if (tp->rx_opt.eff_sacks) { | 373 | |
| 360 | struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; | 374 | if (likely(OPTION_TS & opts->options)) { |
| 375 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { | ||
| 376 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | ||
| 377 | (TCPOLEN_SACK_PERM << 16) | | ||
| 378 | (TCPOPT_TIMESTAMP << 8) | | ||
| 379 | TCPOLEN_TIMESTAMP); | ||
| 380 | } else { | ||
| 381 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 382 | (TCPOPT_NOP << 16) | | ||
| 383 | (TCPOPT_TIMESTAMP << 8) | | ||
| 384 | TCPOLEN_TIMESTAMP); | ||
| 385 | } | ||
| 386 | *ptr++ = htonl(opts->tsval); | ||
| 387 | *ptr++ = htonl(opts->tsecr); | ||
| 388 | } | ||
| 389 | |||
| 390 | if (unlikely(opts->mss)) { | ||
| 391 | *ptr++ = htonl((TCPOPT_MSS << 24) | | ||
| 392 | (TCPOLEN_MSS << 16) | | ||
| 393 | opts->mss); | ||
| 394 | } | ||
| 395 | |||
| 396 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options && | ||
| 397 | !(OPTION_TS & opts->options))) { | ||
| 398 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 399 | (TCPOPT_NOP << 16) | | ||
| 400 | (TCPOPT_SACK_PERM << 8) | | ||
| 401 | TCPOLEN_SACK_PERM); | ||
| 402 | } | ||
| 403 | |||
| 404 | if (unlikely(opts->ws)) { | ||
| 405 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 406 | (TCPOPT_WINDOW << 16) | | ||
| 407 | (TCPOLEN_WINDOW << 8) | | ||
| 408 | opts->ws); | ||
| 409 | } | ||
| 410 | |||
| 411 | if (unlikely(opts->num_sack_blocks)) { | ||
| 412 | struct tcp_sack_block *sp = tp->rx_opt.dsack ? | ||
| 413 | tp->duplicate_sack : tp->selective_acks; | ||
| 361 | int this_sack; | 414 | int this_sack; |
| 362 | 415 | ||
| 363 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 416 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
| 364 | (TCPOPT_NOP << 16) | | 417 | (TCPOPT_NOP << 16) | |
| 365 | (TCPOPT_SACK << 8) | | 418 | (TCPOPT_SACK << 8) | |
| 366 | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * | 419 | (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * |
| 367 | TCPOLEN_SACK_PERBLOCK))); | 420 | TCPOLEN_SACK_PERBLOCK))); |
| 368 | 421 | ||
| 369 | for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { | 422 | for (this_sack = 0; this_sack < opts->num_sack_blocks; |
| 423 | ++this_sack) { | ||
| 370 | *ptr++ = htonl(sp[this_sack].start_seq); | 424 | *ptr++ = htonl(sp[this_sack].start_seq); |
| 371 | *ptr++ = htonl(sp[this_sack].end_seq); | 425 | *ptr++ = htonl(sp[this_sack].end_seq); |
| 372 | } | 426 | } |
| @@ -376,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, | |||
| 376 | tp->rx_opt.eff_sacks--; | 430 | tp->rx_opt.eff_sacks--; |
| 377 | } | 431 | } |
| 378 | } | 432 | } |
| 433 | } | ||
| 434 | |||
| 435 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | ||
| 436 | struct tcp_out_options *opts, | ||
| 437 | struct tcp_md5sig_key **md5) { | ||
| 438 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 439 | unsigned size = 0; | ||
| 440 | |||
| 379 | #ifdef CONFIG_TCP_MD5SIG | 441 | #ifdef CONFIG_TCP_MD5SIG |
| 380 | if (md5_hash) { | 442 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
| 381 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 443 | if (*md5) { |
| 382 | (TCPOPT_NOP << 16) | | 444 | opts->options |= OPTION_MD5; |
| 383 | (TCPOPT_MD5SIG << 8) | | 445 | size += TCPOLEN_MD5SIG_ALIGNED; |
| 384 | TCPOLEN_MD5SIG); | ||
| 385 | *md5_hash = (__u8 *)ptr; | ||
| 386 | } | 446 | } |
| 447 | #else | ||
| 448 | *md5 = NULL; | ||
| 387 | #endif | 449 | #endif |
| 450 | |||
| 451 | /* We always get an MSS option. The option bytes which will be seen in | ||
| 452 | * normal data packets should timestamps be used, must be in the MSS | ||
| 453 | * advertised. But we subtract them from tp->mss_cache so that | ||
| 454 | * calculations in tcp_sendmsg are simpler etc. So account for this | ||
| 455 | * fact here if necessary. If we don't do this correctly, as a | ||
| 456 | * receiver we won't recognize data packets as being full sized when we | ||
| 457 | * should, and thus we won't abide by the delayed ACK rules correctly. | ||
| 458 | * SACKs don't matter, we never delay an ACK when we have any of those | ||
| 459 | * going out. */ | ||
| 460 | opts->mss = tcp_advertise_mss(sk); | ||
| 461 | size += TCPOLEN_MSS_ALIGNED; | ||
| 462 | |||
| 463 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { | ||
| 464 | opts->options |= OPTION_TS; | ||
| 465 | opts->tsval = TCP_SKB_CB(skb)->when; | ||
| 466 | opts->tsecr = tp->rx_opt.ts_recent; | ||
| 467 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
| 468 | } | ||
| 469 | if (likely(sysctl_tcp_window_scaling)) { | ||
| 470 | opts->ws = tp->rx_opt.rcv_wscale; | ||
| 471 | size += TCPOLEN_WSCALE_ALIGNED; | ||
| 472 | } | ||
| 473 | if (likely(sysctl_tcp_sack)) { | ||
| 474 | opts->options |= OPTION_SACK_ADVERTISE; | ||
| 475 | if (unlikely(!OPTION_TS & opts->options)) | ||
| 476 | size += TCPOLEN_SACKPERM_ALIGNED; | ||
| 477 | } | ||
| 478 | |||
| 479 | return size; | ||
| 388 | } | 480 | } |
| 389 | 481 | ||
| 390 | /* Construct a tcp options header for a SYN or SYN_ACK packet. | 482 | static unsigned tcp_synack_options(struct sock *sk, |
| 391 | * If this is every changed make sure to change the definition of | 483 | struct request_sock *req, |
| 392 | * MAX_SYN_SIZE to match the new maximum number of options that you | 484 | unsigned mss, struct sk_buff *skb, |
| 393 | * can generate. | 485 | struct tcp_out_options *opts, |
| 394 | * | 486 | struct tcp_md5sig_key **md5) { |
| 395 | * Note - that with the RFC2385 TCP option, we make room for the | 487 | unsigned size = 0; |
| 396 | * 16 byte MD5 hash. This will be filled in later, so the pointer for the | 488 | struct inet_request_sock *ireq = inet_rsk(req); |
| 397 | * location to be filled is passed back up. | 489 | char doing_ts; |
| 398 | */ | 490 | |
| 399 | static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, | ||
| 400 | int offer_wscale, int wscale, __u32 tstamp, | ||
| 401 | __u32 ts_recent, __u8 **md5_hash) | ||
| 402 | { | ||
| 403 | /* We always get an MSS option. | ||
| 404 | * The option bytes which will be seen in normal data | ||
| 405 | * packets should timestamps be used, must be in the MSS | ||
| 406 | * advertised. But we subtract them from tp->mss_cache so | ||
| 407 | * that calculations in tcp_sendmsg are simpler etc. | ||
| 408 | * So account for this fact here if necessary. If we | ||
| 409 | * don't do this correctly, as a receiver we won't | ||
| 410 | * recognize data packets as being full sized when we | ||
| 411 | * should, and thus we won't abide by the delayed ACK | ||
| 412 | * rules correctly. | ||
| 413 | * SACKs don't matter, we never delay an ACK when we | ||
| 414 | * have any of those going out. | ||
| 415 | */ | ||
| 416 | *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); | ||
| 417 | if (ts) { | ||
| 418 | if (sack) | ||
| 419 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | ||
| 420 | (TCPOLEN_SACK_PERM << 16) | | ||
| 421 | (TCPOPT_TIMESTAMP << 8) | | ||
| 422 | TCPOLEN_TIMESTAMP); | ||
| 423 | else | ||
| 424 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 425 | (TCPOPT_NOP << 16) | | ||
| 426 | (TCPOPT_TIMESTAMP << 8) | | ||
| 427 | TCPOLEN_TIMESTAMP); | ||
| 428 | *ptr++ = htonl(tstamp); /* TSVAL */ | ||
| 429 | *ptr++ = htonl(ts_recent); /* TSECR */ | ||
| 430 | } else if (sack) | ||
| 431 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 432 | (TCPOPT_NOP << 16) | | ||
| 433 | (TCPOPT_SACK_PERM << 8) | | ||
| 434 | TCPOLEN_SACK_PERM); | ||
| 435 | if (offer_wscale) | ||
| 436 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 437 | (TCPOPT_WINDOW << 16) | | ||
| 438 | (TCPOLEN_WINDOW << 8) | | ||
| 439 | (wscale)); | ||
| 440 | #ifdef CONFIG_TCP_MD5SIG | 491 | #ifdef CONFIG_TCP_MD5SIG |
| 441 | /* | 492 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
| 442 | * If MD5 is enabled, then we set the option, and include the size | 493 | if (*md5) { |
| 443 | * (always 18). The actual MD5 hash is added just before the | 494 | opts->options |= OPTION_MD5; |
| 444 | * packet is sent. | 495 | size += TCPOLEN_MD5SIG_ALIGNED; |
| 445 | */ | ||
| 446 | if (md5_hash) { | ||
| 447 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
| 448 | (TCPOPT_NOP << 16) | | ||
| 449 | (TCPOPT_MD5SIG << 8) | | ||
| 450 | TCPOLEN_MD5SIG); | ||
| 451 | *md5_hash = (__u8 *)ptr; | ||
| 452 | } | 496 | } |
| 497 | #else | ||
| 498 | *md5 = NULL; | ||
| 453 | #endif | 499 | #endif |
| 500 | |||
| 501 | /* we can't fit any SACK blocks in a packet with MD5 + TS | ||
| 502 | options. There was discussion about disabling SACK rather than TS in | ||
| 503 | order to fit in better with old, buggy kernels, but that was deemed | ||
| 504 | to be unnecessary. */ | ||
| 505 | doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); | ||
| 506 | |||
| 507 | opts->mss = mss; | ||
| 508 | size += TCPOLEN_MSS_ALIGNED; | ||
| 509 | |||
| 510 | if (likely(ireq->wscale_ok)) { | ||
| 511 | opts->ws = ireq->rcv_wscale; | ||
| 512 | size += TCPOLEN_WSCALE_ALIGNED; | ||
| 513 | } | ||
| 514 | if (likely(doing_ts)) { | ||
| 515 | opts->options |= OPTION_TS; | ||
| 516 | opts->tsval = TCP_SKB_CB(skb)->when; | ||
| 517 | opts->tsecr = req->ts_recent; | ||
| 518 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
| 519 | } | ||
| 520 | if (likely(ireq->sack_ok)) { | ||
| 521 | opts->options |= OPTION_SACK_ADVERTISE; | ||
| 522 | if (unlikely(!doing_ts)) | ||
| 523 | size += TCPOLEN_SACKPERM_ALIGNED; | ||
| 524 | } | ||
| 525 | |||
| 526 | return size; | ||
| 527 | } | ||
| 528 | |||
| 529 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | ||
| 530 | struct tcp_out_options *opts, | ||
| 531 | struct tcp_md5sig_key **md5) { | ||
| 532 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | ||
| 533 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 534 | unsigned size = 0; | ||
| 535 | |||
| 536 | #ifdef CONFIG_TCP_MD5SIG | ||
| 537 | *md5 = tp->af_specific->md5_lookup(sk, sk); | ||
| 538 | if (unlikely(*md5)) { | ||
| 539 | opts->options |= OPTION_MD5; | ||
| 540 | size += TCPOLEN_MD5SIG_ALIGNED; | ||
| 541 | } | ||
| 542 | #else | ||
| 543 | *md5 = NULL; | ||
| 544 | #endif | ||
| 545 | |||
| 546 | if (likely(tp->rx_opt.tstamp_ok)) { | ||
| 547 | opts->options |= OPTION_TS; | ||
| 548 | opts->tsval = tcb ? tcb->when : 0; | ||
| 549 | opts->tsecr = tp->rx_opt.ts_recent; | ||
| 550 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
| 551 | } | ||
| 552 | |||
| 553 | if (unlikely(tp->rx_opt.eff_sacks)) { | ||
| 554 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | ||
| 555 | opts->num_sack_blocks = | ||
| 556 | min_t(unsigned, tp->rx_opt.eff_sacks, | ||
| 557 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | ||
| 558 | TCPOLEN_SACK_PERBLOCK); | ||
| 559 | size += TCPOLEN_SACK_BASE_ALIGNED + | ||
| 560 | opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; | ||
| 561 | } | ||
| 562 | |||
| 563 | return size; | ||
| 454 | } | 564 | } |
| 455 | 565 | ||
| 456 | /* This routine actually transmits TCP packets queued in by | 566 | /* This routine actually transmits TCP packets queued in by |
| @@ -471,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 471 | struct inet_sock *inet; | 581 | struct inet_sock *inet; |
| 472 | struct tcp_sock *tp; | 582 | struct tcp_sock *tp; |
| 473 | struct tcp_skb_cb *tcb; | 583 | struct tcp_skb_cb *tcb; |
| 474 | int tcp_header_size; | 584 | struct tcp_out_options opts; |
| 475 | #ifdef CONFIG_TCP_MD5SIG | 585 | unsigned tcp_options_size, tcp_header_size; |
| 476 | struct tcp_md5sig_key *md5; | 586 | struct tcp_md5sig_key *md5; |
| 477 | __u8 *md5_hash_location; | 587 | __u8 *md5_hash_location; |
| 478 | #endif | ||
| 479 | struct tcphdr *th; | 588 | struct tcphdr *th; |
| 480 | int sysctl_flags; | ||
| 481 | int err; | 589 | int err; |
| 482 | 590 | ||
| 483 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 591 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
| @@ -500,52 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 500 | inet = inet_sk(sk); | 608 | inet = inet_sk(sk); |
| 501 | tp = tcp_sk(sk); | 609 | tp = tcp_sk(sk); |
| 502 | tcb = TCP_SKB_CB(skb); | 610 | tcb = TCP_SKB_CB(skb); |
| 503 | tcp_header_size = tp->tcp_header_len; | 611 | memset(&opts, 0, sizeof(opts)); |
| 504 | |||
| 505 | #define SYSCTL_FLAG_TSTAMPS 0x1 | ||
| 506 | #define SYSCTL_FLAG_WSCALE 0x2 | ||
| 507 | #define SYSCTL_FLAG_SACK 0x4 | ||
| 508 | 612 | ||
| 509 | sysctl_flags = 0; | 613 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) |
| 510 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 614 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); |
| 511 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; | 615 | else |
| 512 | if (sysctl_tcp_timestamps) { | 616 | tcp_options_size = tcp_established_options(sk, skb, &opts, |
| 513 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; | 617 | &md5); |
| 514 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; | 618 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
| 515 | } | ||
| 516 | if (sysctl_tcp_window_scaling) { | ||
| 517 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; | ||
| 518 | sysctl_flags |= SYSCTL_FLAG_WSCALE; | ||
| 519 | } | ||
| 520 | if (sysctl_tcp_sack) { | ||
| 521 | sysctl_flags |= SYSCTL_FLAG_SACK; | ||
| 522 | if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | ||
| 523 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; | ||
| 524 | } | ||
| 525 | } else if (unlikely(tp->rx_opt.eff_sacks)) { | ||
| 526 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
| 527 | * 2 32-bit sequence numbers for each SACK block. | ||
| 528 | */ | ||
| 529 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
| 530 | (tp->rx_opt.eff_sacks * | ||
| 531 | TCPOLEN_SACK_PERBLOCK)); | ||
| 532 | } | ||
| 533 | 619 | ||
| 534 | if (tcp_packets_in_flight(tp) == 0) | 620 | if (tcp_packets_in_flight(tp) == 0) |
| 535 | tcp_ca_event(sk, CA_EVENT_TX_START); | 621 | tcp_ca_event(sk, CA_EVENT_TX_START); |
| 536 | 622 | ||
| 537 | #ifdef CONFIG_TCP_MD5SIG | ||
| 538 | /* | ||
| 539 | * Are we doing MD5 on this segment? If so - make | ||
| 540 | * room for it. | ||
| 541 | */ | ||
| 542 | md5 = tp->af_specific->md5_lookup(sk, sk); | ||
| 543 | if (md5) { | ||
| 544 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | ||
| 545 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | ||
| 546 | } | ||
| 547 | #endif | ||
| 548 | |||
| 549 | skb_push(skb, tcp_header_size); | 623 | skb_push(skb, tcp_header_size); |
| 550 | skb_reset_transport_header(skb); | 624 | skb_reset_transport_header(skb); |
| 551 | skb_set_owner_w(skb, sk); | 625 | skb_set_owner_w(skb, sk); |
| @@ -576,33 +650,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 576 | th->urg = 1; | 650 | th->urg = 1; |
| 577 | } | 651 | } |
| 578 | 652 | ||
| 579 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 653 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
| 580 | tcp_syn_build_options((__be32 *)(th + 1), | 654 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) |
| 581 | tcp_advertise_mss(sk), | ||
| 582 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | ||
| 583 | (sysctl_flags & SYSCTL_FLAG_SACK), | ||
| 584 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
| 585 | tp->rx_opt.rcv_wscale, | ||
| 586 | tcb->when, | ||
| 587 | tp->rx_opt.ts_recent, | ||
| 588 | |||
| 589 | #ifdef CONFIG_TCP_MD5SIG | ||
| 590 | md5 ? &md5_hash_location : | ||
| 591 | #endif | ||
| 592 | NULL); | ||
| 593 | } else { | ||
| 594 | tcp_build_and_update_options((__be32 *)(th + 1), | ||
| 595 | tp, tcb->when, | ||
| 596 | #ifdef CONFIG_TCP_MD5SIG | ||
| 597 | md5 ? &md5_hash_location : | ||
| 598 | #endif | ||
| 599 | NULL); | ||
| 600 | TCP_ECN_send(sk, skb, tcp_header_size); | 655 | TCP_ECN_send(sk, skb, tcp_header_size); |
| 601 | } | ||
| 602 | 656 | ||
| 603 | #ifdef CONFIG_TCP_MD5SIG | 657 | #ifdef CONFIG_TCP_MD5SIG |
| 604 | /* Calculate the MD5 hash, as we have all we need now */ | 658 | /* Calculate the MD5 hash, as we have all we need now */ |
| 605 | if (md5) { | 659 | if (md5) { |
| 660 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | ||
| 606 | tp->af_specific->calc_md5_hash(md5_hash_location, | 661 | tp->af_specific->calc_md5_hash(md5_hash_location, |
| 607 | md5, sk, NULL, skb); | 662 | md5, sk, NULL, skb); |
| 608 | } | 663 | } |
| @@ -626,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 626 | tcp_enter_cwr(sk, 1); | 681 | tcp_enter_cwr(sk, 1); |
| 627 | 682 | ||
| 628 | return net_xmit_eval(err); | 683 | return net_xmit_eval(err); |
| 629 | |||
| 630 | #undef SYSCTL_FLAG_TSTAMPS | ||
| 631 | #undef SYSCTL_FLAG_WSCALE | ||
| 632 | #undef SYSCTL_FLAG_SACK | ||
| 633 | } | 684 | } |
| 634 | 685 | ||
| 635 | /* This routine just queue's the buffer | 686 | /* This routine just queue's the buffer |
| @@ -970,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
| 970 | u32 mss_now; | 1021 | u32 mss_now; |
| 971 | u16 xmit_size_goal; | 1022 | u16 xmit_size_goal; |
| 972 | int doing_tso = 0; | 1023 | int doing_tso = 0; |
| 1024 | unsigned header_len; | ||
| 1025 | struct tcp_out_options opts; | ||
| 1026 | struct tcp_md5sig_key *md5; | ||
| 973 | 1027 | ||
| 974 | mss_now = tp->mss_cache; | 1028 | mss_now = tp->mss_cache; |
| 975 | 1029 | ||
| @@ -982,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
| 982 | mss_now = tcp_sync_mss(sk, mtu); | 1036 | mss_now = tcp_sync_mss(sk, mtu); |
| 983 | } | 1037 | } |
| 984 | 1038 | ||
| 985 | if (tp->rx_opt.eff_sacks) | 1039 | header_len = tcp_established_options(sk, NULL, &opts, &md5) + |
| 986 | mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + | 1040 | sizeof(struct tcphdr); |
| 987 | (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); | 1041 | /* The mss_cache is sized based on tp->tcp_header_len, which assumes |
| 988 | 1042 | * some common options. If this is an odd packet (because we have SACK | |
| 989 | #ifdef CONFIG_TCP_MD5SIG | 1043 | * blocks etc) then our calculated header_len will be different, and |
| 990 | if (tp->af_specific->md5_lookup(sk, sk)) | 1044 | * we have to adjust mss_now correspondingly */ |
| 991 | mss_now -= TCPOLEN_MD5SIG_ALIGNED; | 1045 | if (header_len != tp->tcp_header_len) { |
| 992 | #endif | 1046 | int delta = (int) header_len - tp->tcp_header_len; |
| 1047 | mss_now -= delta; | ||
| 1048 | } | ||
| 993 | 1049 | ||
| 994 | xmit_size_goal = mss_now; | 1050 | xmit_size_goal = mss_now; |
| 995 | 1051 | ||
| @@ -2179,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2179 | struct tcp_sock *tp = tcp_sk(sk); | 2235 | struct tcp_sock *tp = tcp_sk(sk); |
| 2180 | struct tcphdr *th; | 2236 | struct tcphdr *th; |
| 2181 | int tcp_header_size; | 2237 | int tcp_header_size; |
| 2238 | struct tcp_out_options opts; | ||
| 2182 | struct sk_buff *skb; | 2239 | struct sk_buff *skb; |
| 2183 | #ifdef CONFIG_TCP_MD5SIG | ||
| 2184 | struct tcp_md5sig_key *md5; | 2240 | struct tcp_md5sig_key *md5; |
| 2185 | __u8 *md5_hash_location; | 2241 | __u8 *md5_hash_location; |
| 2186 | #endif | ||
| 2187 | 2242 | ||
| 2188 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); | 2243 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); |
| 2189 | if (skb == NULL) | 2244 | if (skb == NULL) |
| @@ -2194,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2194 | 2249 | ||
| 2195 | skb->dst = dst_clone(dst); | 2250 | skb->dst = dst_clone(dst); |
| 2196 | 2251 | ||
| 2197 | tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + | 2252 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ |
| 2198 | (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + | 2253 | __u8 rcv_wscale; |
| 2199 | (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + | 2254 | /* Set this up on the first call only */ |
| 2200 | /* SACK_PERM is in the place of NOP NOP of TS */ | 2255 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); |
| 2201 | ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); | 2256 | /* tcp_full_space because it is guaranteed to be the first packet */ |
| 2257 | tcp_select_initial_window(tcp_full_space(sk), | ||
| 2258 | dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
| 2259 | &req->rcv_wnd, | ||
| 2260 | &req->window_clamp, | ||
| 2261 | ireq->wscale_ok, | ||
| 2262 | &rcv_wscale); | ||
| 2263 | ireq->rcv_wscale = rcv_wscale; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | memset(&opts, 0, sizeof(opts)); | ||
| 2267 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
| 2268 | tcp_header_size = tcp_synack_options(sk, req, | ||
| 2269 | dst_metric(dst, RTAX_ADVMSS), | ||
| 2270 | skb, &opts, &md5) + | ||
| 2271 | sizeof(struct tcphdr); | ||
| 2202 | 2272 | ||
| 2203 | #ifdef CONFIG_TCP_MD5SIG | ||
| 2204 | /* Are we doing MD5 on this segment? If so - make room for it */ | ||
| 2205 | md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | ||
| 2206 | if (md5) | ||
| 2207 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | ||
| 2208 | #endif | ||
| 2209 | skb_push(skb, tcp_header_size); | 2273 | skb_push(skb, tcp_header_size); |
| 2210 | skb_reset_transport_header(skb); | 2274 | skb_reset_transport_header(skb); |
| 2211 | 2275 | ||
| @@ -2223,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2223 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); | 2287 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); |
| 2224 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2288 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
| 2225 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); | 2289 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); |
| 2226 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ | ||
| 2227 | __u8 rcv_wscale; | ||
| 2228 | /* Set this up on the first call only */ | ||
| 2229 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
| 2230 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
| 2231 | tcp_select_initial_window(tcp_full_space(sk), | ||
| 2232 | dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
| 2233 | &req->rcv_wnd, | ||
| 2234 | &req->window_clamp, | ||
| 2235 | ireq->wscale_ok, | ||
| 2236 | &rcv_wscale); | ||
| 2237 | ireq->rcv_wscale = rcv_wscale; | ||
| 2238 | } | ||
| 2239 | 2290 | ||
| 2240 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ | 2291 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
| 2241 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2292 | th->window = htons(min(req->rcv_wnd, 65535U)); |
| @@ -2244,18 +2295,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2244 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); | 2295 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); |
| 2245 | else | 2296 | else |
| 2246 | #endif | 2297 | #endif |
| 2247 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2298 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
| 2248 | tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, | ||
| 2249 | ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, | ||
| 2250 | TCP_SKB_CB(skb)->when, | ||
| 2251 | req->ts_recent, | ||
| 2252 | ( | ||
| 2253 | #ifdef CONFIG_TCP_MD5SIG | ||
| 2254 | md5 ? &md5_hash_location : | ||
| 2255 | #endif | ||
| 2256 | NULL) | ||
| 2257 | ); | ||
| 2258 | |||
| 2259 | th->doff = (tcp_header_size >> 2); | 2299 | th->doff = (tcp_header_size >> 2); |
| 2260 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); | 2300 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
| 2261 | 2301 | ||
