diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 287 |
1 files changed, 18 insertions, 269 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e22020790709..dcb116dde216 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) | |||
409 | 409 | ||
410 | icsk->icsk_sync_mss = tcp_sync_mss; | 410 | icsk->icsk_sync_mss = tcp_sync_mss; |
411 | 411 | ||
412 | /* TCP Cookie Transactions */ | ||
413 | if (sysctl_tcp_cookie_size > 0) { | ||
414 | /* Default, cookies without s_data_payload. */ | ||
415 | tp->cookie_values = | ||
416 | kzalloc(sizeof(*tp->cookie_values), | ||
417 | sk->sk_allocation); | ||
418 | if (tp->cookie_values != NULL) | ||
419 | kref_init(&tp->cookie_values->kref); | ||
420 | } | ||
421 | /* Presumed zeroed, in order of appearance: | 412 | /* Presumed zeroed, in order of appearance: |
422 | * cookie_in_always, cookie_out_never, | 413 | * cookie_in_always, cookie_out_never, |
423 | * s_data_constant, s_data_in, s_data_out | 414 | * s_data_constant, s_data_in, s_data_out |
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2397 | release_sock(sk); | 2388 | release_sock(sk); |
2398 | return err; | 2389 | return err; |
2399 | } | 2390 | } |
2400 | case TCP_COOKIE_TRANSACTIONS: { | ||
2401 | struct tcp_cookie_transactions ctd; | ||
2402 | struct tcp_cookie_values *cvp = NULL; | ||
2403 | |||
2404 | if (sizeof(ctd) > optlen) | ||
2405 | return -EINVAL; | ||
2406 | if (copy_from_user(&ctd, optval, sizeof(ctd))) | ||
2407 | return -EFAULT; | ||
2408 | |||
2409 | if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || | ||
2410 | ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) | ||
2411 | return -EINVAL; | ||
2412 | |||
2413 | if (ctd.tcpct_cookie_desired == 0) { | ||
2414 | /* default to global value */ | ||
2415 | } else if ((0x1 & ctd.tcpct_cookie_desired) || | ||
2416 | ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || | ||
2417 | ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { | ||
2418 | return -EINVAL; | ||
2419 | } | ||
2420 | |||
2421 | if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { | ||
2422 | /* Supercedes all other values */ | ||
2423 | lock_sock(sk); | ||
2424 | if (tp->cookie_values != NULL) { | ||
2425 | kref_put(&tp->cookie_values->kref, | ||
2426 | tcp_cookie_values_release); | ||
2427 | tp->cookie_values = NULL; | ||
2428 | } | ||
2429 | tp->rx_opt.cookie_in_always = 0; /* false */ | ||
2430 | tp->rx_opt.cookie_out_never = 1; /* true */ | ||
2431 | release_sock(sk); | ||
2432 | return err; | ||
2433 | } | ||
2434 | |||
2435 | /* Allocate ancillary memory before locking. | ||
2436 | */ | ||
2437 | if (ctd.tcpct_used > 0 || | ||
2438 | (tp->cookie_values == NULL && | ||
2439 | (sysctl_tcp_cookie_size > 0 || | ||
2440 | ctd.tcpct_cookie_desired > 0 || | ||
2441 | ctd.tcpct_s_data_desired > 0))) { | ||
2442 | cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, | ||
2443 | GFP_KERNEL); | ||
2444 | if (cvp == NULL) | ||
2445 | return -ENOMEM; | ||
2446 | |||
2447 | kref_init(&cvp->kref); | ||
2448 | } | ||
2449 | lock_sock(sk); | ||
2450 | tp->rx_opt.cookie_in_always = | ||
2451 | (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); | ||
2452 | tp->rx_opt.cookie_out_never = 0; /* false */ | ||
2453 | |||
2454 | if (tp->cookie_values != NULL) { | ||
2455 | if (cvp != NULL) { | ||
2456 | /* Changed values are recorded by a changed | ||
2457 | * pointer, ensuring the cookie will differ, | ||
2458 | * without separately hashing each value later. | ||
2459 | */ | ||
2460 | kref_put(&tp->cookie_values->kref, | ||
2461 | tcp_cookie_values_release); | ||
2462 | } else { | ||
2463 | cvp = tp->cookie_values; | ||
2464 | } | ||
2465 | } | ||
2466 | |||
2467 | if (cvp != NULL) { | ||
2468 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | ||
2469 | |||
2470 | if (ctd.tcpct_used > 0) { | ||
2471 | memcpy(cvp->s_data_payload, ctd.tcpct_value, | ||
2472 | ctd.tcpct_used); | ||
2473 | cvp->s_data_desired = ctd.tcpct_used; | ||
2474 | cvp->s_data_constant = 1; /* true */ | ||
2475 | } else { | ||
2476 | /* No constant payload data. */ | ||
2477 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | ||
2478 | cvp->s_data_constant = 0; /* false */ | ||
2479 | } | ||
2480 | |||
2481 | tp->cookie_values = cvp; | ||
2482 | } | ||
2483 | release_sock(sk); | ||
2484 | return err; | ||
2485 | } | ||
2486 | default: | 2391 | default: |
2487 | /* fallthru */ | 2392 | /* fallthru */ |
2488 | break; | 2393 | break; |
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2902 | return -EFAULT; | 2807 | return -EFAULT; |
2903 | return 0; | 2808 | return 0; |
2904 | 2809 | ||
2905 | case TCP_COOKIE_TRANSACTIONS: { | ||
2906 | struct tcp_cookie_transactions ctd; | ||
2907 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2908 | |||
2909 | if (get_user(len, optlen)) | ||
2910 | return -EFAULT; | ||
2911 | if (len < sizeof(ctd)) | ||
2912 | return -EINVAL; | ||
2913 | |||
2914 | memset(&ctd, 0, sizeof(ctd)); | ||
2915 | ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? | ||
2916 | TCP_COOKIE_IN_ALWAYS : 0) | ||
2917 | | (tp->rx_opt.cookie_out_never ? | ||
2918 | TCP_COOKIE_OUT_NEVER : 0); | ||
2919 | |||
2920 | if (cvp != NULL) { | ||
2921 | ctd.tcpct_flags |= (cvp->s_data_in ? | ||
2922 | TCP_S_DATA_IN : 0) | ||
2923 | | (cvp->s_data_out ? | ||
2924 | TCP_S_DATA_OUT : 0); | ||
2925 | |||
2926 | ctd.tcpct_cookie_desired = cvp->cookie_desired; | ||
2927 | ctd.tcpct_s_data_desired = cvp->s_data_desired; | ||
2928 | |||
2929 | memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], | ||
2930 | cvp->cookie_pair_size); | ||
2931 | ctd.tcpct_used = cvp->cookie_pair_size; | ||
2932 | } | ||
2933 | |||
2934 | if (put_user(sizeof(ctd), optlen)) | ||
2935 | return -EFAULT; | ||
2936 | if (copy_to_user(optval, &ctd, sizeof(ctd))) | ||
2937 | return -EFAULT; | ||
2938 | return 0; | ||
2939 | } | ||
2940 | case TCP_THIN_LINEAR_TIMEOUTS: | 2810 | case TCP_THIN_LINEAR_TIMEOUTS: |
2941 | val = tp->thin_lto; | 2811 | val = tp->thin_lto; |
2942 | break; | 2812 | break; |
@@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
3015 | __be32 delta; | 2885 | __be32 delta; |
3016 | unsigned int oldlen; | 2886 | unsigned int oldlen; |
3017 | unsigned int mss; | 2887 | unsigned int mss; |
2888 | struct sk_buff *gso_skb = skb; | ||
2889 | __sum16 newcheck; | ||
3018 | 2890 | ||
3019 | if (!pskb_may_pull(skb, sizeof(*th))) | 2891 | if (!pskb_may_pull(skb, sizeof(*th))) |
3020 | goto out; | 2892 | goto out; |
@@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
3044 | SKB_GSO_TCP_ECN | | 2916 | SKB_GSO_TCP_ECN | |
3045 | SKB_GSO_TCPV6 | | 2917 | SKB_GSO_TCPV6 | |
3046 | SKB_GSO_GRE | | 2918 | SKB_GSO_GRE | |
2919 | SKB_GSO_UDP_TUNNEL | | ||
3047 | 0) || | 2920 | 0) || |
3048 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) | 2921 | !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) |
3049 | goto out; | 2922 | goto out; |
@@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
3064 | th = tcp_hdr(skb); | 2937 | th = tcp_hdr(skb); |
3065 | seq = ntohl(th->seq); | 2938 | seq = ntohl(th->seq); |
3066 | 2939 | ||
2940 | newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + | ||
2941 | (__force u32)delta)); | ||
2942 | |||
3067 | do { | 2943 | do { |
3068 | th->fin = th->psh = 0; | 2944 | th->fin = th->psh = 0; |
2945 | th->check = newcheck; | ||
3069 | 2946 | ||
3070 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + | ||
3071 | (__force u32)delta)); | ||
3072 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 2947 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
3073 | th->check = | 2948 | th->check = |
3074 | csum_fold(csum_partial(skb_transport_header(skb), | 2949 | csum_fold(csum_partial(skb_transport_header(skb), |
@@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, | |||
3082 | th->cwr = 0; | 2957 | th->cwr = 0; |
3083 | } while (skb->next); | 2958 | } while (skb->next); |
3084 | 2959 | ||
2960 | /* Following permits TCP Small Queues to work well with GSO : | ||
2961 | * The callback to TCP stack will be called at the time last frag | ||
2962 | * is freed at TX completion, and not right now when gso_skb | ||
2963 | * is freed by GSO engine | ||
2964 | */ | ||
2965 | if (gso_skb->destructor == tcp_wfree) { | ||
2966 | swap(gso_skb->sk, skb->sk); | ||
2967 | swap(gso_skb->destructor, skb->destructor); | ||
2968 | swap(gso_skb->truesize, skb->truesize); | ||
2969 | } | ||
2970 | |||
3085 | delta = htonl(oldlen + (skb->tail - skb->transport_header) + | 2971 | delta = htonl(oldlen + (skb->tail - skb->transport_header) + |
3086 | skb->data_len); | 2972 | skb->data_len); |
3087 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + | 2973 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + |
@@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); | |||
3408 | 3294 | ||
3409 | #endif | 3295 | #endif |
3410 | 3296 | ||
3411 | /* Each Responder maintains up to two secret values concurrently for | ||
3412 | * efficient secret rollover. Each secret value has 4 states: | ||
3413 | * | ||
3414 | * Generating. (tcp_secret_generating != tcp_secret_primary) | ||
3415 | * Generates new Responder-Cookies, but not yet used for primary | ||
3416 | * verification. This is a short-term state, typically lasting only | ||
3417 | * one round trip time (RTT). | ||
3418 | * | ||
3419 | * Primary. (tcp_secret_generating == tcp_secret_primary) | ||
3420 | * Used both for generation and primary verification. | ||
3421 | * | ||
3422 | * Retiring. (tcp_secret_retiring != tcp_secret_secondary) | ||
3423 | * Used for verification, until the first failure that can be | ||
3424 | * verified by the newer Generating secret. At that time, this | ||
3425 | * cookie's state is changed to Secondary, and the Generating | ||
3426 | * cookie's state is changed to Primary. This is a short-term state, | ||
3427 | * typically lasting only one round trip time (RTT). | ||
3428 | * | ||
3429 | * Secondary. (tcp_secret_retiring == tcp_secret_secondary) | ||
3430 | * Used for secondary verification, after primary verification | ||
3431 | * failures. This state lasts no more than twice the Maximum Segment | ||
3432 | * Lifetime (2MSL). Then, the secret is discarded. | ||
3433 | */ | ||
3434 | struct tcp_cookie_secret { | ||
3435 | /* The secret is divided into two parts. The digest part is the | ||
3436 | * equivalent of previously hashing a secret and saving the state, | ||
3437 | * and serves as an initialization vector (IV). The message part | ||
3438 | * serves as the trailing secret. | ||
3439 | */ | ||
3440 | u32 secrets[COOKIE_WORKSPACE_WORDS]; | ||
3441 | unsigned long expires; | ||
3442 | }; | ||
3443 | |||
3444 | #define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) | ||
3445 | #define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) | ||
3446 | #define TCP_SECRET_LIFE (HZ * 600) | ||
3447 | |||
3448 | static struct tcp_cookie_secret tcp_secret_one; | ||
3449 | static struct tcp_cookie_secret tcp_secret_two; | ||
3450 | |||
3451 | /* Essentially a circular list, without dynamic allocation. */ | ||
3452 | static struct tcp_cookie_secret *tcp_secret_generating; | ||
3453 | static struct tcp_cookie_secret *tcp_secret_primary; | ||
3454 | static struct tcp_cookie_secret *tcp_secret_retiring; | ||
3455 | static struct tcp_cookie_secret *tcp_secret_secondary; | ||
3456 | |||
3457 | static DEFINE_SPINLOCK(tcp_secret_locker); | ||
3458 | |||
3459 | /* Select a pseudo-random word in the cookie workspace. | ||
3460 | */ | ||
3461 | static inline u32 tcp_cookie_work(const u32 *ws, const int n) | ||
3462 | { | ||
3463 | return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; | ||
3464 | } | ||
3465 | |||
3466 | /* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. | ||
3467 | * Called in softirq context. | ||
3468 | * Returns: 0 for success. | ||
3469 | */ | ||
3470 | int tcp_cookie_generator(u32 *bakery) | ||
3471 | { | ||
3472 | unsigned long jiffy = jiffies; | ||
3473 | |||
3474 | if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { | ||
3475 | spin_lock_bh(&tcp_secret_locker); | ||
3476 | if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { | ||
3477 | /* refreshed by another */ | ||
3478 | memcpy(bakery, | ||
3479 | &tcp_secret_generating->secrets[0], | ||
3480 | COOKIE_WORKSPACE_WORDS); | ||
3481 | } else { | ||
3482 | /* still needs refreshing */ | ||
3483 | get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); | ||
3484 | |||
3485 | /* The first time, paranoia assumes that the | ||
3486 | * randomization function isn't as strong. But, | ||
3487 | * this secret initialization is delayed until | ||
3488 | * the last possible moment (packet arrival). | ||
3489 | * Although that time is observable, it is | ||
3490 | * unpredictably variable. Mash in the most | ||
3491 | * volatile clock bits available, and expire the | ||
3492 | * secret extra quickly. | ||
3493 | */ | ||
3494 | if (unlikely(tcp_secret_primary->expires == | ||
3495 | tcp_secret_secondary->expires)) { | ||
3496 | struct timespec tv; | ||
3497 | |||
3498 | getnstimeofday(&tv); | ||
3499 | bakery[COOKIE_DIGEST_WORDS+0] ^= | ||
3500 | (u32)tv.tv_nsec; | ||
3501 | |||
3502 | tcp_secret_secondary->expires = jiffy | ||
3503 | + TCP_SECRET_1MSL | ||
3504 | + (0x0f & tcp_cookie_work(bakery, 0)); | ||
3505 | } else { | ||
3506 | tcp_secret_secondary->expires = jiffy | ||
3507 | + TCP_SECRET_LIFE | ||
3508 | + (0xff & tcp_cookie_work(bakery, 1)); | ||
3509 | tcp_secret_primary->expires = jiffy | ||
3510 | + TCP_SECRET_2MSL | ||
3511 | + (0x1f & tcp_cookie_work(bakery, 2)); | ||
3512 | } | ||
3513 | memcpy(&tcp_secret_secondary->secrets[0], | ||
3514 | bakery, COOKIE_WORKSPACE_WORDS); | ||
3515 | |||
3516 | rcu_assign_pointer(tcp_secret_generating, | ||
3517 | tcp_secret_secondary); | ||
3518 | rcu_assign_pointer(tcp_secret_retiring, | ||
3519 | tcp_secret_primary); | ||
3520 | /* | ||
3521 | * Neither call_rcu() nor synchronize_rcu() needed. | ||
3522 | * Retiring data is not freed. It is replaced after | ||
3523 | * further (locked) pointer updates, and a quiet time | ||
3524 | * (minimum 1MSL, maximum LIFE - 2MSL). | ||
3525 | */ | ||
3526 | } | ||
3527 | spin_unlock_bh(&tcp_secret_locker); | ||
3528 | } else { | ||
3529 | rcu_read_lock_bh(); | ||
3530 | memcpy(bakery, | ||
3531 | &rcu_dereference(tcp_secret_generating)->secrets[0], | ||
3532 | COOKIE_WORKSPACE_WORDS); | ||
3533 | rcu_read_unlock_bh(); | ||
3534 | } | ||
3535 | return 0; | ||
3536 | } | ||
3537 | EXPORT_SYMBOL(tcp_cookie_generator); | ||
3538 | |||
3539 | void tcp_done(struct sock *sk) | 3297 | void tcp_done(struct sock *sk) |
3540 | { | 3298 | { |
3541 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | 3299 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; |
@@ -3590,7 +3348,6 @@ void __init tcp_init(void) | |||
3590 | unsigned long limit; | 3348 | unsigned long limit; |
3591 | int max_rshare, max_wshare, cnt; | 3349 | int max_rshare, max_wshare, cnt; |
3592 | unsigned int i; | 3350 | unsigned int i; |
3593 | unsigned long jiffy = jiffies; | ||
3594 | 3351 | ||
3595 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3352 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
3596 | 3353 | ||
@@ -3666,13 +3423,5 @@ void __init tcp_init(void) | |||
3666 | 3423 | ||
3667 | tcp_register_congestion_control(&tcp_reno); | 3424 | tcp_register_congestion_control(&tcp_reno); |
3668 | 3425 | ||
3669 | memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); | ||
3670 | memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); | ||
3671 | tcp_secret_one.expires = jiffy; /* past due */ | ||
3672 | tcp_secret_two.expires = jiffy; /* past due */ | ||
3673 | tcp_secret_generating = &tcp_secret_one; | ||
3674 | tcp_secret_primary = &tcp_secret_one; | ||
3675 | tcp_secret_retiring = &tcp_secret_two; | ||
3676 | tcp_secret_secondary = &tcp_secret_two; | ||
3677 | tcp_tasklet_init(); | 3426 | tcp_tasklet_init(); |
3678 | } | 3427 | } |