diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-21 11:19:50 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-21 11:19:50 -0400 |
commit | eb6a12c2428d21a9f3e0f1a50e927d5fd80fc3d0 (patch) | |
tree | 5ac6f43899648abeab1d43aad3107f664e7f13d5 /net/ipv4/tcp_output.c | |
parent | c4762aba0b1f72659aae9ce37b772ca8bd8f06f4 (diff) | |
parent | 14b395e35d1afdd8019d11b92e28041fad591b71 (diff) |
Merge branch 'linus' into cpus4096-for-linus
Conflicts:
net/sunrpc/svc.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 466 |
1 files changed, 251 insertions, 215 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ad993ecb4810..1fa683c0ba9b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -5,8 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Implementation of the Transmission Control Protocol(TCP). | 6 | * Implementation of the Transmission Control Protocol(TCP). |
7 | * | 7 | * |
8 | * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $ | ||
9 | * | ||
10 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Mark Evans, <evansmp@uhura.aston.ac.uk> | 10 | * Mark Evans, <evansmp@uhura.aston.ac.uk> |
@@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
347 | TCP_SKB_CB(skb)->end_seq = seq; | 345 | TCP_SKB_CB(skb)->end_seq = seq; |
348 | } | 346 | } |
349 | 347 | ||
350 | static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, | 348 | #define OPTION_SACK_ADVERTISE (1 << 0) |
351 | __u32 tstamp, __u8 **md5_hash) | 349 | #define OPTION_TS (1 << 1) |
352 | { | 350 | #define OPTION_MD5 (1 << 2) |
353 | if (tp->rx_opt.tstamp_ok) { | 351 | |
352 | struct tcp_out_options { | ||
353 | u8 options; /* bit field of OPTION_* */ | ||
354 | u8 ws; /* window scale, 0 to disable */ | ||
355 | u8 num_sack_blocks; /* number of SACK blocks to include */ | ||
356 | u16 mss; /* 0 to disable */ | ||
357 | __u32 tsval, tsecr; /* need to include OPTION_TS */ | ||
358 | }; | ||
359 | |||
360 | static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | ||
361 | const struct tcp_out_options *opts, | ||
362 | __u8 **md5_hash) { | ||
363 | if (unlikely(OPTION_MD5 & opts->options)) { | ||
364 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
365 | (TCPOPT_NOP << 16) | | ||
366 | (TCPOPT_MD5SIG << 8) | | ||
367 | TCPOLEN_MD5SIG); | ||
368 | *md5_hash = (__u8 *)ptr; | ||
369 | ptr += 4; | ||
370 | } else { | ||
371 | *md5_hash = NULL; | ||
372 | } | ||
373 | |||
374 | if (likely(OPTION_TS & opts->options)) { | ||
375 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { | ||
376 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | ||
377 | (TCPOLEN_SACK_PERM << 16) | | ||
378 | (TCPOPT_TIMESTAMP << 8) | | ||
379 | TCPOLEN_TIMESTAMP); | ||
380 | } else { | ||
381 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
382 | (TCPOPT_NOP << 16) | | ||
383 | (TCPOPT_TIMESTAMP << 8) | | ||
384 | TCPOLEN_TIMESTAMP); | ||
385 | } | ||
386 | *ptr++ = htonl(opts->tsval); | ||
387 | *ptr++ = htonl(opts->tsecr); | ||
388 | } | ||
389 | |||
390 | if (unlikely(opts->mss)) { | ||
391 | *ptr++ = htonl((TCPOPT_MSS << 24) | | ||
392 | (TCPOLEN_MSS << 16) | | ||
393 | opts->mss); | ||
394 | } | ||
395 | |||
396 | if (unlikely(OPTION_SACK_ADVERTISE & opts->options && | ||
397 | !(OPTION_TS & opts->options))) { | ||
354 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 398 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
355 | (TCPOPT_NOP << 16) | | 399 | (TCPOPT_NOP << 16) | |
356 | (TCPOPT_TIMESTAMP << 8) | | 400 | (TCPOPT_SACK_PERM << 8) | |
357 | TCPOLEN_TIMESTAMP); | 401 | TCPOLEN_SACK_PERM); |
358 | *ptr++ = htonl(tstamp); | 402 | } |
359 | *ptr++ = htonl(tp->rx_opt.ts_recent); | 403 | |
404 | if (unlikely(opts->ws)) { | ||
405 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
406 | (TCPOPT_WINDOW << 16) | | ||
407 | (TCPOLEN_WINDOW << 8) | | ||
408 | opts->ws); | ||
360 | } | 409 | } |
361 | if (tp->rx_opt.eff_sacks) { | 410 | |
362 | struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; | 411 | if (unlikely(opts->num_sack_blocks)) { |
412 | struct tcp_sack_block *sp = tp->rx_opt.dsack ? | ||
413 | tp->duplicate_sack : tp->selective_acks; | ||
363 | int this_sack; | 414 | int this_sack; |
364 | 415 | ||
365 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 416 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
366 | (TCPOPT_NOP << 16) | | 417 | (TCPOPT_NOP << 16) | |
367 | (TCPOPT_SACK << 8) | | 418 | (TCPOPT_SACK << 8) | |
368 | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * | 419 | (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * |
369 | TCPOLEN_SACK_PERBLOCK))); | 420 | TCPOLEN_SACK_PERBLOCK))); |
370 | 421 | ||
371 | for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { | 422 | for (this_sack = 0; this_sack < opts->num_sack_blocks; |
423 | ++this_sack) { | ||
372 | *ptr++ = htonl(sp[this_sack].start_seq); | 424 | *ptr++ = htonl(sp[this_sack].start_seq); |
373 | *ptr++ = htonl(sp[this_sack].end_seq); | 425 | *ptr++ = htonl(sp[this_sack].end_seq); |
374 | } | 426 | } |
@@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, | |||
378 | tp->rx_opt.eff_sacks--; | 430 | tp->rx_opt.eff_sacks--; |
379 | } | 431 | } |
380 | } | 432 | } |
433 | } | ||
434 | |||
435 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | ||
436 | struct tcp_out_options *opts, | ||
437 | struct tcp_md5sig_key **md5) { | ||
438 | struct tcp_sock *tp = tcp_sk(sk); | ||
439 | unsigned size = 0; | ||
440 | |||
381 | #ifdef CONFIG_TCP_MD5SIG | 441 | #ifdef CONFIG_TCP_MD5SIG |
382 | if (md5_hash) { | 442 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
383 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 443 | if (*md5) { |
384 | (TCPOPT_NOP << 16) | | 444 | opts->options |= OPTION_MD5; |
385 | (TCPOPT_MD5SIG << 8) | | 445 | size += TCPOLEN_MD5SIG_ALIGNED; |
386 | TCPOLEN_MD5SIG); | ||
387 | *md5_hash = (__u8 *)ptr; | ||
388 | } | 446 | } |
447 | #else | ||
448 | *md5 = NULL; | ||
389 | #endif | 449 | #endif |
450 | |||
451 | /* We always get an MSS option. The option bytes which will be seen in | ||
452 | * normal data packets should timestamps be used, must be in the MSS | ||
453 | * advertised. But we subtract them from tp->mss_cache so that | ||
454 | * calculations in tcp_sendmsg are simpler etc. So account for this | ||
455 | * fact here if necessary. If we don't do this correctly, as a | ||
456 | * receiver we won't recognize data packets as being full sized when we | ||
457 | * should, and thus we won't abide by the delayed ACK rules correctly. | ||
458 | * SACKs don't matter, we never delay an ACK when we have any of those | ||
459 | * going out. */ | ||
460 | opts->mss = tcp_advertise_mss(sk); | ||
461 | size += TCPOLEN_MSS_ALIGNED; | ||
462 | |||
463 | if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { | ||
464 | opts->options |= OPTION_TS; | ||
465 | opts->tsval = TCP_SKB_CB(skb)->when; | ||
466 | opts->tsecr = tp->rx_opt.ts_recent; | ||
467 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
468 | } | ||
469 | if (likely(sysctl_tcp_window_scaling)) { | ||
470 | opts->ws = tp->rx_opt.rcv_wscale; | ||
471 | size += TCPOLEN_WSCALE_ALIGNED; | ||
472 | } | ||
473 | if (likely(sysctl_tcp_sack)) { | ||
474 | opts->options |= OPTION_SACK_ADVERTISE; | ||
475 | if (unlikely(!OPTION_TS & opts->options)) | ||
476 | size += TCPOLEN_SACKPERM_ALIGNED; | ||
477 | } | ||
478 | |||
479 | return size; | ||
390 | } | 480 | } |
391 | 481 | ||
392 | /* Construct a tcp options header for a SYN or SYN_ACK packet. | 482 | static unsigned tcp_synack_options(struct sock *sk, |
393 | * If this is every changed make sure to change the definition of | 483 | struct request_sock *req, |
394 | * MAX_SYN_SIZE to match the new maximum number of options that you | 484 | unsigned mss, struct sk_buff *skb, |
395 | * can generate. | 485 | struct tcp_out_options *opts, |
396 | * | 486 | struct tcp_md5sig_key **md5) { |
397 | * Note - that with the RFC2385 TCP option, we make room for the | 487 | unsigned size = 0; |
398 | * 16 byte MD5 hash. This will be filled in later, so the pointer for the | 488 | struct inet_request_sock *ireq = inet_rsk(req); |
399 | * location to be filled is passed back up. | 489 | char doing_ts; |
400 | */ | 490 | |
401 | static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, | ||
402 | int offer_wscale, int wscale, __u32 tstamp, | ||
403 | __u32 ts_recent, __u8 **md5_hash) | ||
404 | { | ||
405 | /* We always get an MSS option. | ||
406 | * The option bytes which will be seen in normal data | ||
407 | * packets should timestamps be used, must be in the MSS | ||
408 | * advertised. But we subtract them from tp->mss_cache so | ||
409 | * that calculations in tcp_sendmsg are simpler etc. | ||
410 | * So account for this fact here if necessary. If we | ||
411 | * don't do this correctly, as a receiver we won't | ||
412 | * recognize data packets as being full sized when we | ||
413 | * should, and thus we won't abide by the delayed ACK | ||
414 | * rules correctly. | ||
415 | * SACKs don't matter, we never delay an ACK when we | ||
416 | * have any of those going out. | ||
417 | */ | ||
418 | *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); | ||
419 | if (ts) { | ||
420 | if (sack) | ||
421 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | ||
422 | (TCPOLEN_SACK_PERM << 16) | | ||
423 | (TCPOPT_TIMESTAMP << 8) | | ||
424 | TCPOLEN_TIMESTAMP); | ||
425 | else | ||
426 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
427 | (TCPOPT_NOP << 16) | | ||
428 | (TCPOPT_TIMESTAMP << 8) | | ||
429 | TCPOLEN_TIMESTAMP); | ||
430 | *ptr++ = htonl(tstamp); /* TSVAL */ | ||
431 | *ptr++ = htonl(ts_recent); /* TSECR */ | ||
432 | } else if (sack) | ||
433 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
434 | (TCPOPT_NOP << 16) | | ||
435 | (TCPOPT_SACK_PERM << 8) | | ||
436 | TCPOLEN_SACK_PERM); | ||
437 | if (offer_wscale) | ||
438 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
439 | (TCPOPT_WINDOW << 16) | | ||
440 | (TCPOLEN_WINDOW << 8) | | ||
441 | (wscale)); | ||
442 | #ifdef CONFIG_TCP_MD5SIG | 491 | #ifdef CONFIG_TCP_MD5SIG |
443 | /* | 492 | *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); |
444 | * If MD5 is enabled, then we set the option, and include the size | 493 | if (*md5) { |
445 | * (always 18). The actual MD5 hash is added just before the | 494 | opts->options |= OPTION_MD5; |
446 | * packet is sent. | 495 | size += TCPOLEN_MD5SIG_ALIGNED; |
447 | */ | ||
448 | if (md5_hash) { | ||
449 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
450 | (TCPOPT_NOP << 16) | | ||
451 | (TCPOPT_MD5SIG << 8) | | ||
452 | TCPOLEN_MD5SIG); | ||
453 | *md5_hash = (__u8 *)ptr; | ||
454 | } | 496 | } |
497 | #else | ||
498 | *md5 = NULL; | ||
455 | #endif | 499 | #endif |
500 | |||
501 | /* we can't fit any SACK blocks in a packet with MD5 + TS | ||
502 | options. There was discussion about disabling SACK rather than TS in | ||
503 | order to fit in better with old, buggy kernels, but that was deemed | ||
504 | to be unnecessary. */ | ||
505 | doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); | ||
506 | |||
507 | opts->mss = mss; | ||
508 | size += TCPOLEN_MSS_ALIGNED; | ||
509 | |||
510 | if (likely(ireq->wscale_ok)) { | ||
511 | opts->ws = ireq->rcv_wscale; | ||
512 | size += TCPOLEN_WSCALE_ALIGNED; | ||
513 | } | ||
514 | if (likely(doing_ts)) { | ||
515 | opts->options |= OPTION_TS; | ||
516 | opts->tsval = TCP_SKB_CB(skb)->when; | ||
517 | opts->tsecr = req->ts_recent; | ||
518 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
519 | } | ||
520 | if (likely(ireq->sack_ok)) { | ||
521 | opts->options |= OPTION_SACK_ADVERTISE; | ||
522 | if (unlikely(!doing_ts)) | ||
523 | size += TCPOLEN_SACKPERM_ALIGNED; | ||
524 | } | ||
525 | |||
526 | return size; | ||
527 | } | ||
528 | |||
529 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | ||
530 | struct tcp_out_options *opts, | ||
531 | struct tcp_md5sig_key **md5) { | ||
532 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | ||
533 | struct tcp_sock *tp = tcp_sk(sk); | ||
534 | unsigned size = 0; | ||
535 | |||
536 | #ifdef CONFIG_TCP_MD5SIG | ||
537 | *md5 = tp->af_specific->md5_lookup(sk, sk); | ||
538 | if (unlikely(*md5)) { | ||
539 | opts->options |= OPTION_MD5; | ||
540 | size += TCPOLEN_MD5SIG_ALIGNED; | ||
541 | } | ||
542 | #else | ||
543 | *md5 = NULL; | ||
544 | #endif | ||
545 | |||
546 | if (likely(tp->rx_opt.tstamp_ok)) { | ||
547 | opts->options |= OPTION_TS; | ||
548 | opts->tsval = tcb ? tcb->when : 0; | ||
549 | opts->tsecr = tp->rx_opt.ts_recent; | ||
550 | size += TCPOLEN_TSTAMP_ALIGNED; | ||
551 | } | ||
552 | |||
553 | if (unlikely(tp->rx_opt.eff_sacks)) { | ||
554 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | ||
555 | opts->num_sack_blocks = | ||
556 | min_t(unsigned, tp->rx_opt.eff_sacks, | ||
557 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | ||
558 | TCPOLEN_SACK_PERBLOCK); | ||
559 | size += TCPOLEN_SACK_BASE_ALIGNED + | ||
560 | opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; | ||
561 | } | ||
562 | |||
563 | return size; | ||
456 | } | 564 | } |
457 | 565 | ||
458 | /* This routine actually transmits TCP packets queued in by | 566 | /* This routine actually transmits TCP packets queued in by |
@@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
473 | struct inet_sock *inet; | 581 | struct inet_sock *inet; |
474 | struct tcp_sock *tp; | 582 | struct tcp_sock *tp; |
475 | struct tcp_skb_cb *tcb; | 583 | struct tcp_skb_cb *tcb; |
476 | int tcp_header_size; | 584 | struct tcp_out_options opts; |
477 | #ifdef CONFIG_TCP_MD5SIG | 585 | unsigned tcp_options_size, tcp_header_size; |
478 | struct tcp_md5sig_key *md5; | 586 | struct tcp_md5sig_key *md5; |
479 | __u8 *md5_hash_location; | 587 | __u8 *md5_hash_location; |
480 | #endif | ||
481 | struct tcphdr *th; | 588 | struct tcphdr *th; |
482 | int sysctl_flags; | ||
483 | int err; | 589 | int err; |
484 | 590 | ||
485 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 591 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
@@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
502 | inet = inet_sk(sk); | 608 | inet = inet_sk(sk); |
503 | tp = tcp_sk(sk); | 609 | tp = tcp_sk(sk); |
504 | tcb = TCP_SKB_CB(skb); | 610 | tcb = TCP_SKB_CB(skb); |
505 | tcp_header_size = tp->tcp_header_len; | 611 | memset(&opts, 0, sizeof(opts)); |
506 | |||
507 | #define SYSCTL_FLAG_TSTAMPS 0x1 | ||
508 | #define SYSCTL_FLAG_WSCALE 0x2 | ||
509 | #define SYSCTL_FLAG_SACK 0x4 | ||
510 | 612 | ||
511 | sysctl_flags = 0; | 613 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) |
512 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 614 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); |
513 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; | 615 | else |
514 | if (sysctl_tcp_timestamps) { | 616 | tcp_options_size = tcp_established_options(sk, skb, &opts, |
515 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; | 617 | &md5); |
516 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; | 618 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
517 | } | ||
518 | if (sysctl_tcp_window_scaling) { | ||
519 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; | ||
520 | sysctl_flags |= SYSCTL_FLAG_WSCALE; | ||
521 | } | ||
522 | if (sysctl_tcp_sack) { | ||
523 | sysctl_flags |= SYSCTL_FLAG_SACK; | ||
524 | if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | ||
525 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; | ||
526 | } | ||
527 | } else if (unlikely(tp->rx_opt.eff_sacks)) { | ||
528 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
529 | * 2 32-bit sequence numbers for each SACK block. | ||
530 | */ | ||
531 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
532 | (tp->rx_opt.eff_sacks * | ||
533 | TCPOLEN_SACK_PERBLOCK)); | ||
534 | } | ||
535 | 619 | ||
536 | if (tcp_packets_in_flight(tp) == 0) | 620 | if (tcp_packets_in_flight(tp) == 0) |
537 | tcp_ca_event(sk, CA_EVENT_TX_START); | 621 | tcp_ca_event(sk, CA_EVENT_TX_START); |
538 | 622 | ||
539 | #ifdef CONFIG_TCP_MD5SIG | ||
540 | /* | ||
541 | * Are we doing MD5 on this segment? If so - make | ||
542 | * room for it. | ||
543 | */ | ||
544 | md5 = tp->af_specific->md5_lookup(sk, sk); | ||
545 | if (md5) | ||
546 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | ||
547 | #endif | ||
548 | |||
549 | skb_push(skb, tcp_header_size); | 623 | skb_push(skb, tcp_header_size); |
550 | skb_reset_transport_header(skb); | 624 | skb_reset_transport_header(skb); |
551 | skb_set_owner_w(skb, sk); | 625 | skb_set_owner_w(skb, sk); |
@@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
576 | th->urg = 1; | 650 | th->urg = 1; |
577 | } | 651 | } |
578 | 652 | ||
579 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 653 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
580 | tcp_syn_build_options((__be32 *)(th + 1), | 654 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) |
581 | tcp_advertise_mss(sk), | ||
582 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | ||
583 | (sysctl_flags & SYSCTL_FLAG_SACK), | ||
584 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
585 | tp->rx_opt.rcv_wscale, | ||
586 | tcb->when, | ||
587 | tp->rx_opt.ts_recent, | ||
588 | |||
589 | #ifdef CONFIG_TCP_MD5SIG | ||
590 | md5 ? &md5_hash_location : | ||
591 | #endif | ||
592 | NULL); | ||
593 | } else { | ||
594 | tcp_build_and_update_options((__be32 *)(th + 1), | ||
595 | tp, tcb->when, | ||
596 | #ifdef CONFIG_TCP_MD5SIG | ||
597 | md5 ? &md5_hash_location : | ||
598 | #endif | ||
599 | NULL); | ||
600 | TCP_ECN_send(sk, skb, tcp_header_size); | 655 | TCP_ECN_send(sk, skb, tcp_header_size); |
601 | } | ||
602 | 656 | ||
603 | #ifdef CONFIG_TCP_MD5SIG | 657 | #ifdef CONFIG_TCP_MD5SIG |
604 | /* Calculate the MD5 hash, as we have all we need now */ | 658 | /* Calculate the MD5 hash, as we have all we need now */ |
605 | if (md5) { | 659 | if (md5) { |
660 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | ||
606 | tp->af_specific->calc_md5_hash(md5_hash_location, | 661 | tp->af_specific->calc_md5_hash(md5_hash_location, |
607 | md5, | 662 | md5, sk, NULL, skb); |
608 | sk, NULL, NULL, | ||
609 | tcp_hdr(skb), | ||
610 | sk->sk_protocol, | ||
611 | skb->len); | ||
612 | } | 663 | } |
613 | #endif | 664 | #endif |
614 | 665 | ||
@@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
621 | tcp_event_data_sent(tp, skb, sk); | 672 | tcp_event_data_sent(tp, skb, sk); |
622 | 673 | ||
623 | if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) | 674 | if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) |
624 | TCP_INC_STATS(TCP_MIB_OUTSEGS); | 675 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
625 | 676 | ||
626 | err = icsk->icsk_af_ops->queue_xmit(skb, 0); | 677 | err = icsk->icsk_af_ops->queue_xmit(skb, 0); |
627 | if (likely(err <= 0)) | 678 | if (likely(err <= 0)) |
@@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
630 | tcp_enter_cwr(sk, 1); | 681 | tcp_enter_cwr(sk, 1); |
631 | 682 | ||
632 | return net_xmit_eval(err); | 683 | return net_xmit_eval(err); |
633 | |||
634 | #undef SYSCTL_FLAG_TSTAMPS | ||
635 | #undef SYSCTL_FLAG_WSCALE | ||
636 | #undef SYSCTL_FLAG_SACK | ||
637 | } | 684 | } |
638 | 685 | ||
639 | /* This routine just queue's the buffer | 686 | /* This routine just queue's the buffer |
@@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
974 | u32 mss_now; | 1021 | u32 mss_now; |
975 | u16 xmit_size_goal; | 1022 | u16 xmit_size_goal; |
976 | int doing_tso = 0; | 1023 | int doing_tso = 0; |
1024 | unsigned header_len; | ||
1025 | struct tcp_out_options opts; | ||
1026 | struct tcp_md5sig_key *md5; | ||
977 | 1027 | ||
978 | mss_now = tp->mss_cache; | 1028 | mss_now = tp->mss_cache; |
979 | 1029 | ||
@@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
986 | mss_now = tcp_sync_mss(sk, mtu); | 1036 | mss_now = tcp_sync_mss(sk, mtu); |
987 | } | 1037 | } |
988 | 1038 | ||
989 | if (tp->rx_opt.eff_sacks) | 1039 | header_len = tcp_established_options(sk, NULL, &opts, &md5) + |
990 | mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + | 1040 | sizeof(struct tcphdr); |
991 | (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); | 1041 | /* The mss_cache is sized based on tp->tcp_header_len, which assumes |
992 | 1042 | * some common options. If this is an odd packet (because we have SACK | |
993 | #ifdef CONFIG_TCP_MD5SIG | 1043 | * blocks etc) then our calculated header_len will be different, and |
994 | if (tp->af_specific->md5_lookup(sk, sk)) | 1044 | * we have to adjust mss_now correspondingly */ |
995 | mss_now -= TCPOLEN_MD5SIG_ALIGNED; | 1045 | if (header_len != tp->tcp_header_len) { |
996 | #endif | 1046 | int delta = (int) header_len - tp->tcp_header_len; |
1047 | mss_now -= delta; | ||
1048 | } | ||
997 | 1049 | ||
998 | xmit_size_goal = mss_now; | 1050 | xmit_size_goal = mss_now; |
999 | 1051 | ||
@@ -1913,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1913 | 1965 | ||
1914 | if (err == 0) { | 1966 | if (err == 0) { |
1915 | /* Update global TCP statistics. */ | 1967 | /* Update global TCP statistics. */ |
1916 | TCP_INC_STATS(TCP_MIB_RETRANSSEGS); | 1968 | TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); |
1917 | 1969 | ||
1918 | tp->total_retrans++; | 1970 | tp->total_retrans++; |
1919 | 1971 | ||
@@ -1988,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1988 | 2040 | ||
1989 | if (sacked & TCPCB_LOST) { | 2041 | if (sacked & TCPCB_LOST) { |
1990 | if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { | 2042 | if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { |
2043 | int mib_idx; | ||
2044 | |||
1991 | if (tcp_retransmit_skb(sk, skb)) { | 2045 | if (tcp_retransmit_skb(sk, skb)) { |
1992 | tp->retransmit_skb_hint = NULL; | 2046 | tp->retransmit_skb_hint = NULL; |
1993 | return; | 2047 | return; |
1994 | } | 2048 | } |
1995 | if (icsk->icsk_ca_state != TCP_CA_Loss) | 2049 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
1996 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); | 2050 | mib_idx = LINUX_MIB_TCPFASTRETRANS; |
1997 | else | 2051 | else |
1998 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); | 2052 | mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; |
2053 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | ||
1999 | 2054 | ||
2000 | if (skb == tcp_write_queue_head(sk)) | 2055 | if (skb == tcp_write_queue_head(sk)) |
2001 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2056 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
@@ -2065,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2065 | inet_csk(sk)->icsk_rto, | 2120 | inet_csk(sk)->icsk_rto, |
2066 | TCP_RTO_MAX); | 2121 | TCP_RTO_MAX); |
2067 | 2122 | ||
2068 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); | 2123 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS); |
2069 | } | 2124 | } |
2070 | } | 2125 | } |
2071 | 2126 | ||
@@ -2119,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2119 | /* NOTE: No TCP options attached and we never retransmit this. */ | 2174 | /* NOTE: No TCP options attached and we never retransmit this. */ |
2120 | skb = alloc_skb(MAX_TCP_HEADER, priority); | 2175 | skb = alloc_skb(MAX_TCP_HEADER, priority); |
2121 | if (!skb) { | 2176 | if (!skb) { |
2122 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); | 2177 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
2123 | return; | 2178 | return; |
2124 | } | 2179 | } |
2125 | 2180 | ||
@@ -2130,9 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2130 | /* Send it off. */ | 2185 | /* Send it off. */ |
2131 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2186 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2132 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2187 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
2133 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); | 2188 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
2134 | 2189 | ||
2135 | TCP_INC_STATS(TCP_MIB_OUTRSTS); | 2190 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); |
2136 | } | 2191 | } |
2137 | 2192 | ||
2138 | /* WARNING: This routine must only be called when we have already sent | 2193 | /* WARNING: This routine must only be called when we have already sent |
@@ -2180,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2180 | struct tcp_sock *tp = tcp_sk(sk); | 2235 | struct tcp_sock *tp = tcp_sk(sk); |
2181 | struct tcphdr *th; | 2236 | struct tcphdr *th; |
2182 | int tcp_header_size; | 2237 | int tcp_header_size; |
2238 | struct tcp_out_options opts; | ||
2183 | struct sk_buff *skb; | 2239 | struct sk_buff *skb; |
2184 | #ifdef CONFIG_TCP_MD5SIG | ||
2185 | struct tcp_md5sig_key *md5; | 2240 | struct tcp_md5sig_key *md5; |
2186 | __u8 *md5_hash_location; | 2241 | __u8 *md5_hash_location; |
2187 | #endif | ||
2188 | 2242 | ||
2189 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); | 2243 | skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); |
2190 | if (skb == NULL) | 2244 | if (skb == NULL) |
@@ -2195,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2195 | 2249 | ||
2196 | skb->dst = dst_clone(dst); | 2250 | skb->dst = dst_clone(dst); |
2197 | 2251 | ||
2198 | tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + | 2252 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ |
2199 | (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + | 2253 | __u8 rcv_wscale; |
2200 | (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + | 2254 | /* Set this up on the first call only */ |
2201 | /* SACK_PERM is in the place of NOP NOP of TS */ | 2255 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); |
2202 | ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); | 2256 | /* tcp_full_space because it is guaranteed to be the first packet */ |
2257 | tcp_select_initial_window(tcp_full_space(sk), | ||
2258 | dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
2259 | &req->rcv_wnd, | ||
2260 | &req->window_clamp, | ||
2261 | ireq->wscale_ok, | ||
2262 | &rcv_wscale); | ||
2263 | ireq->rcv_wscale = rcv_wscale; | ||
2264 | } | ||
2265 | |||
2266 | memset(&opts, 0, sizeof(opts)); | ||
2267 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | ||
2268 | tcp_header_size = tcp_synack_options(sk, req, | ||
2269 | dst_metric(dst, RTAX_ADVMSS), | ||
2270 | skb, &opts, &md5) + | ||
2271 | sizeof(struct tcphdr); | ||
2203 | 2272 | ||
2204 | #ifdef CONFIG_TCP_MD5SIG | ||
2205 | /* Are we doing MD5 on this segment? If so - make room for it */ | ||
2206 | md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); | ||
2207 | if (md5) | ||
2208 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | ||
2209 | #endif | ||
2210 | skb_push(skb, tcp_header_size); | 2273 | skb_push(skb, tcp_header_size); |
2211 | skb_reset_transport_header(skb); | 2274 | skb_reset_transport_header(skb); |
2212 | 2275 | ||
@@ -2224,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2224 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); | 2287 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); |
2225 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2288 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
2226 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); | 2289 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); |
2227 | if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ | ||
2228 | __u8 rcv_wscale; | ||
2229 | /* Set this up on the first call only */ | ||
2230 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | ||
2231 | /* tcp_full_space because it is guaranteed to be the first packet */ | ||
2232 | tcp_select_initial_window(tcp_full_space(sk), | ||
2233 | dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | ||
2234 | &req->rcv_wnd, | ||
2235 | &req->window_clamp, | ||
2236 | ireq->wscale_ok, | ||
2237 | &rcv_wscale); | ||
2238 | ireq->rcv_wscale = rcv_wscale; | ||
2239 | } | ||
2240 | 2290 | ||
2241 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ | 2291 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
2242 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2292 | th->window = htons(min(req->rcv_wnd, 65535U)); |
@@ -2245,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2245 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); | 2295 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); |
2246 | else | 2296 | else |
2247 | #endif | 2297 | #endif |
2248 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2298 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
2249 | tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, | ||
2250 | ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, | ||
2251 | TCP_SKB_CB(skb)->when, | ||
2252 | req->ts_recent, | ||
2253 | ( | ||
2254 | #ifdef CONFIG_TCP_MD5SIG | ||
2255 | md5 ? &md5_hash_location : | ||
2256 | #endif | ||
2257 | NULL) | ||
2258 | ); | ||
2259 | |||
2260 | th->doff = (tcp_header_size >> 2); | 2299 | th->doff = (tcp_header_size >> 2); |
2261 | TCP_INC_STATS(TCP_MIB_OUTSEGS); | 2300 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
2262 | 2301 | ||
2263 | #ifdef CONFIG_TCP_MD5SIG | 2302 | #ifdef CONFIG_TCP_MD5SIG |
2264 | /* Okay, we have all we need - do the md5 hash if needed */ | 2303 | /* Okay, we have all we need - do the md5 hash if needed */ |
2265 | if (md5) { | 2304 | if (md5) { |
2266 | tp->af_specific->calc_md5_hash(md5_hash_location, | 2305 | tp->af_specific->calc_md5_hash(md5_hash_location, |
2267 | md5, | 2306 | md5, NULL, req, skb); |
2268 | NULL, dst, req, | ||
2269 | tcp_hdr(skb), sk->sk_protocol, | ||
2270 | skb->len); | ||
2271 | } | 2307 | } |
2272 | #endif | 2308 | #endif |
2273 | 2309 | ||
@@ -2367,7 +2403,7 @@ int tcp_connect(struct sock *sk) | |||
2367 | */ | 2403 | */ |
2368 | tp->snd_nxt = tp->write_seq; | 2404 | tp->snd_nxt = tp->write_seq; |
2369 | tp->pushed_seq = tp->write_seq; | 2405 | tp->pushed_seq = tp->write_seq; |
2370 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 2406 | TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); |
2371 | 2407 | ||
2372 | /* Timer for repeating the SYN until an answer. */ | 2408 | /* Timer for repeating the SYN until an answer. */ |
2373 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 2409 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |