aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c287
1 files changed, 18 insertions, 269 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e22020790709..dcb116dde216 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
409 409
410 icsk->icsk_sync_mss = tcp_sync_mss; 410 icsk->icsk_sync_mss = tcp_sync_mss;
411 411
412 /* TCP Cookie Transactions */
413 if (sysctl_tcp_cookie_size > 0) {
414 /* Default, cookies without s_data_payload. */
415 tp->cookie_values =
416 kzalloc(sizeof(*tp->cookie_values),
417 sk->sk_allocation);
418 if (tp->cookie_values != NULL)
419 kref_init(&tp->cookie_values->kref);
420 }
421 /* Presumed zeroed, in order of appearance: 412 /* Presumed zeroed, in order of appearance:
422 * cookie_in_always, cookie_out_never, 413 * cookie_in_always, cookie_out_never,
423 * s_data_constant, s_data_in, s_data_out 414 * s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2397 release_sock(sk); 2388 release_sock(sk);
2398 return err; 2389 return err;
2399 } 2390 }
2400 case TCP_COOKIE_TRANSACTIONS: {
2401 struct tcp_cookie_transactions ctd;
2402 struct tcp_cookie_values *cvp = NULL;
2403
2404 if (sizeof(ctd) > optlen)
2405 return -EINVAL;
2406 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2407 return -EFAULT;
2408
2409 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2410 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2411 return -EINVAL;
2412
2413 if (ctd.tcpct_cookie_desired == 0) {
2414 /* default to global value */
2415 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2416 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2417 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2418 return -EINVAL;
2419 }
2420
2421 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2422 /* Supercedes all other values */
2423 lock_sock(sk);
2424 if (tp->cookie_values != NULL) {
2425 kref_put(&tp->cookie_values->kref,
2426 tcp_cookie_values_release);
2427 tp->cookie_values = NULL;
2428 }
2429 tp->rx_opt.cookie_in_always = 0; /* false */
2430 tp->rx_opt.cookie_out_never = 1; /* true */
2431 release_sock(sk);
2432 return err;
2433 }
2434
2435 /* Allocate ancillary memory before locking.
2436 */
2437 if (ctd.tcpct_used > 0 ||
2438 (tp->cookie_values == NULL &&
2439 (sysctl_tcp_cookie_size > 0 ||
2440 ctd.tcpct_cookie_desired > 0 ||
2441 ctd.tcpct_s_data_desired > 0))) {
2442 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2443 GFP_KERNEL);
2444 if (cvp == NULL)
2445 return -ENOMEM;
2446
2447 kref_init(&cvp->kref);
2448 }
2449 lock_sock(sk);
2450 tp->rx_opt.cookie_in_always =
2451 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2452 tp->rx_opt.cookie_out_never = 0; /* false */
2453
2454 if (tp->cookie_values != NULL) {
2455 if (cvp != NULL) {
2456 /* Changed values are recorded by a changed
2457 * pointer, ensuring the cookie will differ,
2458 * without separately hashing each value later.
2459 */
2460 kref_put(&tp->cookie_values->kref,
2461 tcp_cookie_values_release);
2462 } else {
2463 cvp = tp->cookie_values;
2464 }
2465 }
2466
2467 if (cvp != NULL) {
2468 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2469
2470 if (ctd.tcpct_used > 0) {
2471 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2472 ctd.tcpct_used);
2473 cvp->s_data_desired = ctd.tcpct_used;
2474 cvp->s_data_constant = 1; /* true */
2475 } else {
2476 /* No constant payload data. */
2477 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2478 cvp->s_data_constant = 0; /* false */
2479 }
2480
2481 tp->cookie_values = cvp;
2482 }
2483 release_sock(sk);
2484 return err;
2485 }
2486 default: 2391 default:
2487 /* fallthru */ 2392 /* fallthru */
2488 break; 2393 break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2902 return -EFAULT; 2807 return -EFAULT;
2903 return 0; 2808 return 0;
2904 2809
2905 case TCP_COOKIE_TRANSACTIONS: {
2906 struct tcp_cookie_transactions ctd;
2907 struct tcp_cookie_values *cvp = tp->cookie_values;
2908
2909 if (get_user(len, optlen))
2910 return -EFAULT;
2911 if (len < sizeof(ctd))
2912 return -EINVAL;
2913
2914 memset(&ctd, 0, sizeof(ctd));
2915 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2916 TCP_COOKIE_IN_ALWAYS : 0)
2917 | (tp->rx_opt.cookie_out_never ?
2918 TCP_COOKIE_OUT_NEVER : 0);
2919
2920 if (cvp != NULL) {
2921 ctd.tcpct_flags |= (cvp->s_data_in ?
2922 TCP_S_DATA_IN : 0)
2923 | (cvp->s_data_out ?
2924 TCP_S_DATA_OUT : 0);
2925
2926 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2927 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2928
2929 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2930 cvp->cookie_pair_size);
2931 ctd.tcpct_used = cvp->cookie_pair_size;
2932 }
2933
2934 if (put_user(sizeof(ctd), optlen))
2935 return -EFAULT;
2936 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2937 return -EFAULT;
2938 return 0;
2939 }
2940 case TCP_THIN_LINEAR_TIMEOUTS: 2810 case TCP_THIN_LINEAR_TIMEOUTS:
2941 val = tp->thin_lto; 2811 val = tp->thin_lto;
2942 break; 2812 break;
@@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3015 __be32 delta; 2885 __be32 delta;
3016 unsigned int oldlen; 2886 unsigned int oldlen;
3017 unsigned int mss; 2887 unsigned int mss;
2888 struct sk_buff *gso_skb = skb;
2889 __sum16 newcheck;
3018 2890
3019 if (!pskb_may_pull(skb, sizeof(*th))) 2891 if (!pskb_may_pull(skb, sizeof(*th)))
3020 goto out; 2892 goto out;
@@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3044 SKB_GSO_TCP_ECN | 2916 SKB_GSO_TCP_ECN |
3045 SKB_GSO_TCPV6 | 2917 SKB_GSO_TCPV6 |
3046 SKB_GSO_GRE | 2918 SKB_GSO_GRE |
2919 SKB_GSO_UDP_TUNNEL |
3047 0) || 2920 0) ||
3048 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 2921 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
3049 goto out; 2922 goto out;
@@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3064 th = tcp_hdr(skb); 2937 th = tcp_hdr(skb);
3065 seq = ntohl(th->seq); 2938 seq = ntohl(th->seq);
3066 2939
2940 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
2941 (__force u32)delta));
2942
3067 do { 2943 do {
3068 th->fin = th->psh = 0; 2944 th->fin = th->psh = 0;
2945 th->check = newcheck;
3069 2946
3070 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
3071 (__force u32)delta));
3072 if (skb->ip_summed != CHECKSUM_PARTIAL) 2947 if (skb->ip_summed != CHECKSUM_PARTIAL)
3073 th->check = 2948 th->check =
3074 csum_fold(csum_partial(skb_transport_header(skb), 2949 csum_fold(csum_partial(skb_transport_header(skb),
@@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3082 th->cwr = 0; 2957 th->cwr = 0;
3083 } while (skb->next); 2958 } while (skb->next);
3084 2959
2960 /* Following permits TCP Small Queues to work well with GSO :
2961 * The callback to TCP stack will be called at the time last frag
2962 * is freed at TX completion, and not right now when gso_skb
2963 * is freed by GSO engine
2964 */
2965 if (gso_skb->destructor == tcp_wfree) {
2966 swap(gso_skb->sk, skb->sk);
2967 swap(gso_skb->destructor, skb->destructor);
2968 swap(gso_skb->truesize, skb->truesize);
2969 }
2970
3085 delta = htonl(oldlen + (skb->tail - skb->transport_header) + 2971 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
3086 skb->data_len); 2972 skb->data_len);
3087 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 2973 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
3408 3294
3409#endif 3295#endif
3410 3296
3411/* Each Responder maintains up to two secret values concurrently for
3412 * efficient secret rollover. Each secret value has 4 states:
3413 *
3414 * Generating. (tcp_secret_generating != tcp_secret_primary)
3415 * Generates new Responder-Cookies, but not yet used for primary
3416 * verification. This is a short-term state, typically lasting only
3417 * one round trip time (RTT).
3418 *
3419 * Primary. (tcp_secret_generating == tcp_secret_primary)
3420 * Used both for generation and primary verification.
3421 *
3422 * Retiring. (tcp_secret_retiring != tcp_secret_secondary)
3423 * Used for verification, until the first failure that can be
3424 * verified by the newer Generating secret. At that time, this
3425 * cookie's state is changed to Secondary, and the Generating
3426 * cookie's state is changed to Primary. This is a short-term state,
3427 * typically lasting only one round trip time (RTT).
3428 *
3429 * Secondary. (tcp_secret_retiring == tcp_secret_secondary)
3430 * Used for secondary verification, after primary verification
3431 * failures. This state lasts no more than twice the Maximum Segment
3432 * Lifetime (2MSL). Then, the secret is discarded.
3433 */
3434struct tcp_cookie_secret {
3435 /* The secret is divided into two parts. The digest part is the
3436 * equivalent of previously hashing a secret and saving the state,
3437 * and serves as an initialization vector (IV). The message part
3438 * serves as the trailing secret.
3439 */
3440 u32 secrets[COOKIE_WORKSPACE_WORDS];
3441 unsigned long expires;
3442};
3443
3444#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3445#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3446#define TCP_SECRET_LIFE (HZ * 600)
3447
3448static struct tcp_cookie_secret tcp_secret_one;
3449static struct tcp_cookie_secret tcp_secret_two;
3450
3451/* Essentially a circular list, without dynamic allocation. */
3452static struct tcp_cookie_secret *tcp_secret_generating;
3453static struct tcp_cookie_secret *tcp_secret_primary;
3454static struct tcp_cookie_secret *tcp_secret_retiring;
3455static struct tcp_cookie_secret *tcp_secret_secondary;
3456
3457static DEFINE_SPINLOCK(tcp_secret_locker);
3458
3459/* Select a pseudo-random word in the cookie workspace.
3460 */
3461static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3462{
3463 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3464}
3465
3466/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
3467 * Called in softirq context.
3468 * Returns: 0 for success.
3469 */
3470int tcp_cookie_generator(u32 *bakery)
3471{
3472 unsigned long jiffy = jiffies;
3473
3474 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3475 spin_lock_bh(&tcp_secret_locker);
3476 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3477 /* refreshed by another */
3478 memcpy(bakery,
3479 &tcp_secret_generating->secrets[0],
3480 COOKIE_WORKSPACE_WORDS);
3481 } else {
3482 /* still needs refreshing */
3483 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3484
3485 /* The first time, paranoia assumes that the
3486 * randomization function isn't as strong. But,
3487 * this secret initialization is delayed until
3488 * the last possible moment (packet arrival).
3489 * Although that time is observable, it is
3490 * unpredictably variable. Mash in the most
3491 * volatile clock bits available, and expire the
3492 * secret extra quickly.
3493 */
3494 if (unlikely(tcp_secret_primary->expires ==
3495 tcp_secret_secondary->expires)) {
3496 struct timespec tv;
3497
3498 getnstimeofday(&tv);
3499 bakery[COOKIE_DIGEST_WORDS+0] ^=
3500 (u32)tv.tv_nsec;
3501
3502 tcp_secret_secondary->expires = jiffy
3503 + TCP_SECRET_1MSL
3504 + (0x0f & tcp_cookie_work(bakery, 0));
3505 } else {
3506 tcp_secret_secondary->expires = jiffy
3507 + TCP_SECRET_LIFE
3508 + (0xff & tcp_cookie_work(bakery, 1));
3509 tcp_secret_primary->expires = jiffy
3510 + TCP_SECRET_2MSL
3511 + (0x1f & tcp_cookie_work(bakery, 2));
3512 }
3513 memcpy(&tcp_secret_secondary->secrets[0],
3514 bakery, COOKIE_WORKSPACE_WORDS);
3515
3516 rcu_assign_pointer(tcp_secret_generating,
3517 tcp_secret_secondary);
3518 rcu_assign_pointer(tcp_secret_retiring,
3519 tcp_secret_primary);
3520 /*
3521 * Neither call_rcu() nor synchronize_rcu() needed.
3522 * Retiring data is not freed. It is replaced after
3523 * further (locked) pointer updates, and a quiet time
3524 * (minimum 1MSL, maximum LIFE - 2MSL).
3525 */
3526 }
3527 spin_unlock_bh(&tcp_secret_locker);
3528 } else {
3529 rcu_read_lock_bh();
3530 memcpy(bakery,
3531 &rcu_dereference(tcp_secret_generating)->secrets[0],
3532 COOKIE_WORKSPACE_WORDS);
3533 rcu_read_unlock_bh();
3534 }
3535 return 0;
3536}
3537EXPORT_SYMBOL(tcp_cookie_generator);
3538
3539void tcp_done(struct sock *sk) 3297void tcp_done(struct sock *sk)
3540{ 3298{
3541 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3299 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3590,7 +3348,6 @@ void __init tcp_init(void)
3590 unsigned long limit; 3348 unsigned long limit;
3591 int max_rshare, max_wshare, cnt; 3349 int max_rshare, max_wshare, cnt;
3592 unsigned int i; 3350 unsigned int i;
3593 unsigned long jiffy = jiffies;
3594 3351
3595 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3352 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3596 3353
@@ -3666,13 +3423,5 @@ void __init tcp_init(void)
3666 3423
3667 tcp_register_congestion_control(&tcp_reno); 3424 tcp_register_congestion_control(&tcp_reno);
3668 3425
3669 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3670 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3671 tcp_secret_one.expires = jiffy; /* past due */
3672 tcp_secret_two.expires = jiffy; /* past due */
3673 tcp_secret_generating = &tcp_secret_one;
3674 tcp_secret_primary = &tcp_secret_one;
3675 tcp_secret_retiring = &tcp_secret_two;
3676 tcp_secret_secondary = &tcp_secret_two;
3677 tcp_tasklet_init(); 3426 tcp_tasklet_init();
3678} 3427}