diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 289 |
1 files changed, 279 insertions, 10 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1813bc71088..c8666b70cde0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -264,6 +264,7 @@ | |||
264 | #include <linux/cache.h> | 264 | #include <linux/cache.h> |
265 | #include <linux/err.h> | 265 | #include <linux/err.h> |
266 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
267 | #include <linux/time.h> | ||
267 | 268 | ||
268 | #include <net/icmp.h> | 269 | #include <net/icmp.h> |
269 | #include <net/tcp.h> | 270 | #include <net/tcp.h> |
@@ -2042,7 +2043,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2042 | __skb_queue_purge(&sk->sk_async_wait_queue); | 2043 | __skb_queue_purge(&sk->sk_async_wait_queue); |
2043 | #endif | 2044 | #endif |
2044 | 2045 | ||
2045 | inet->dport = 0; | 2046 | inet->inet_dport = 0; |
2046 | 2047 | ||
2047 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 2048 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
2048 | inet_reset_saddr(sk); | 2049 | inet_reset_saddr(sk); |
@@ -2059,6 +2060,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2059 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | 2060 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; |
2060 | tp->snd_cwnd_cnt = 0; | 2061 | tp->snd_cwnd_cnt = 0; |
2061 | tp->bytes_acked = 0; | 2062 | tp->bytes_acked = 0; |
2063 | tp->window_clamp = 0; | ||
2062 | tcp_set_ca_state(sk, TCP_CA_Open); | 2064 | tcp_set_ca_state(sk, TCP_CA_Open); |
2063 | tcp_clear_retrans(tp); | 2065 | tcp_clear_retrans(tp); |
2064 | inet_csk_delack_init(sk); | 2066 | inet_csk_delack_init(sk); |
@@ -2066,7 +2068,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2066 | memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); | 2068 | memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); |
2067 | __sk_dst_reset(sk); | 2069 | __sk_dst_reset(sk); |
2068 | 2070 | ||
2069 | WARN_ON(inet->num && !icsk->icsk_bind_hash); | 2071 | WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); |
2070 | 2072 | ||
2071 | sk->sk_error_report(sk); | 2073 | sk->sk_error_report(sk); |
2072 | return err; | 2074 | return err; |
@@ -2083,8 +2085,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2083 | int val; | 2085 | int val; |
2084 | int err = 0; | 2086 | int err = 0; |
2085 | 2087 | ||
2086 | /* This is a string value all the others are int's */ | 2088 | /* These are data/string values, all the others are ints */ |
2087 | if (optname == TCP_CONGESTION) { | 2089 | switch (optname) { |
2090 | case TCP_CONGESTION: { | ||
2088 | char name[TCP_CA_NAME_MAX]; | 2091 | char name[TCP_CA_NAME_MAX]; |
2089 | 2092 | ||
2090 | if (optlen < 1) | 2093 | if (optlen < 1) |
@@ -2101,6 +2104,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2101 | release_sock(sk); | 2104 | release_sock(sk); |
2102 | return err; | 2105 | return err; |
2103 | } | 2106 | } |
2107 | case TCP_COOKIE_TRANSACTIONS: { | ||
2108 | struct tcp_cookie_transactions ctd; | ||
2109 | struct tcp_cookie_values *cvp = NULL; | ||
2110 | |||
2111 | if (sizeof(ctd) > optlen) | ||
2112 | return -EINVAL; | ||
2113 | if (copy_from_user(&ctd, optval, sizeof(ctd))) | ||
2114 | return -EFAULT; | ||
2115 | |||
2116 | if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || | ||
2117 | ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) | ||
2118 | return -EINVAL; | ||
2119 | |||
2120 | if (ctd.tcpct_cookie_desired == 0) { | ||
2121 | /* default to global value */ | ||
2122 | } else if ((0x1 & ctd.tcpct_cookie_desired) || | ||
2123 | ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || | ||
2124 | ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { | ||
2125 | return -EINVAL; | ||
2126 | } | ||
2127 | |||
2128 | if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { | ||
2129 | /* Supercedes all other values */ | ||
2130 | lock_sock(sk); | ||
2131 | if (tp->cookie_values != NULL) { | ||
2132 | kref_put(&tp->cookie_values->kref, | ||
2133 | tcp_cookie_values_release); | ||
2134 | tp->cookie_values = NULL; | ||
2135 | } | ||
2136 | tp->rx_opt.cookie_in_always = 0; /* false */ | ||
2137 | tp->rx_opt.cookie_out_never = 1; /* true */ | ||
2138 | release_sock(sk); | ||
2139 | return err; | ||
2140 | } | ||
2141 | |||
2142 | /* Allocate ancillary memory before locking. | ||
2143 | */ | ||
2144 | if (ctd.tcpct_used > 0 || | ||
2145 | (tp->cookie_values == NULL && | ||
2146 | (sysctl_tcp_cookie_size > 0 || | ||
2147 | ctd.tcpct_cookie_desired > 0 || | ||
2148 | ctd.tcpct_s_data_desired > 0))) { | ||
2149 | cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, | ||
2150 | GFP_KERNEL); | ||
2151 | if (cvp == NULL) | ||
2152 | return -ENOMEM; | ||
2153 | } | ||
2154 | lock_sock(sk); | ||
2155 | tp->rx_opt.cookie_in_always = | ||
2156 | (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); | ||
2157 | tp->rx_opt.cookie_out_never = 0; /* false */ | ||
2158 | |||
2159 | if (tp->cookie_values != NULL) { | ||
2160 | if (cvp != NULL) { | ||
2161 | /* Changed values are recorded by a changed | ||
2162 | * pointer, ensuring the cookie will differ, | ||
2163 | * without separately hashing each value later. | ||
2164 | */ | ||
2165 | kref_put(&tp->cookie_values->kref, | ||
2166 | tcp_cookie_values_release); | ||
2167 | kref_init(&cvp->kref); | ||
2168 | tp->cookie_values = cvp; | ||
2169 | } else { | ||
2170 | cvp = tp->cookie_values; | ||
2171 | } | ||
2172 | } | ||
2173 | if (cvp != NULL) { | ||
2174 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | ||
2175 | |||
2176 | if (ctd.tcpct_used > 0) { | ||
2177 | memcpy(cvp->s_data_payload, ctd.tcpct_value, | ||
2178 | ctd.tcpct_used); | ||
2179 | cvp->s_data_desired = ctd.tcpct_used; | ||
2180 | cvp->s_data_constant = 1; /* true */ | ||
2181 | } else { | ||
2182 | /* No constant payload data. */ | ||
2183 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | ||
2184 | cvp->s_data_constant = 0; /* false */ | ||
2185 | } | ||
2186 | } | ||
2187 | release_sock(sk); | ||
2188 | return err; | ||
2189 | } | ||
2190 | default: | ||
2191 | /* fallthru */ | ||
2192 | break; | ||
2193 | }; | ||
2104 | 2194 | ||
2105 | if (optlen < sizeof(int)) | 2195 | if (optlen < sizeof(int)) |
2106 | return -EINVAL; | 2196 | return -EINVAL; |
@@ -2425,6 +2515,47 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2425 | if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) | 2515 | if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) |
2426 | return -EFAULT; | 2516 | return -EFAULT; |
2427 | return 0; | 2517 | return 0; |
2518 | |||
2519 | case TCP_COOKIE_TRANSACTIONS: { | ||
2520 | struct tcp_cookie_transactions ctd; | ||
2521 | struct tcp_cookie_values *cvp = tp->cookie_values; | ||
2522 | |||
2523 | if (get_user(len, optlen)) | ||
2524 | return -EFAULT; | ||
2525 | if (len < sizeof(ctd)) | ||
2526 | return -EINVAL; | ||
2527 | |||
2528 | memset(&ctd, 0, sizeof(ctd)); | ||
2529 | ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? | ||
2530 | TCP_COOKIE_IN_ALWAYS : 0) | ||
2531 | | (tp->rx_opt.cookie_out_never ? | ||
2532 | TCP_COOKIE_OUT_NEVER : 0); | ||
2533 | |||
2534 | if (cvp != NULL) { | ||
2535 | ctd.tcpct_flags |= (cvp->s_data_in ? | ||
2536 | TCP_S_DATA_IN : 0) | ||
2537 | | (cvp->s_data_out ? | ||
2538 | TCP_S_DATA_OUT : 0); | ||
2539 | |||
2540 | ctd.tcpct_cookie_desired = cvp->cookie_desired; | ||
2541 | ctd.tcpct_s_data_desired = cvp->s_data_desired; | ||
2542 | |||
2543 | /* Cookie(s) saved, return as nonce */ | ||
2544 | if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) { | ||
2545 | /* impossible? */ | ||
2546 | return -EINVAL; | ||
2547 | } | ||
2548 | memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], | ||
2549 | cvp->cookie_pair_size); | ||
2550 | ctd.tcpct_used = cvp->cookie_pair_size; | ||
2551 | } | ||
2552 | |||
2553 | if (put_user(sizeof(ctd), optlen)) | ||
2554 | return -EFAULT; | ||
2555 | if (copy_to_user(optval, &ctd, sizeof(ctd))) | ||
2556 | return -EFAULT; | ||
2557 | return 0; | ||
2558 | } | ||
2428 | default: | 2559 | default: |
2429 | return -ENOPROTOOPT; | 2560 | return -ENOPROTOOPT; |
2430 | } | 2561 | } |
@@ -2847,6 +2978,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key); | |||
2847 | 2978 | ||
2848 | #endif | 2979 | #endif |
2849 | 2980 | ||
2981 | /** | ||
2982 | * Each Responder maintains up to two secret values concurrently for | ||
2983 | * efficient secret rollover. Each secret value has 4 states: | ||
2984 | * | ||
2985 | * Generating. (tcp_secret_generating != tcp_secret_primary) | ||
2986 | * Generates new Responder-Cookies, but not yet used for primary | ||
2987 | * verification. This is a short-term state, typically lasting only | ||
2988 | * one round trip time (RTT). | ||
2989 | * | ||
2990 | * Primary. (tcp_secret_generating == tcp_secret_primary) | ||
2991 | * Used both for generation and primary verification. | ||
2992 | * | ||
2993 | * Retiring. (tcp_secret_retiring != tcp_secret_secondary) | ||
2994 | * Used for verification, until the first failure that can be | ||
2995 | * verified by the newer Generating secret. At that time, this | ||
2996 | * cookie's state is changed to Secondary, and the Generating | ||
2997 | * cookie's state is changed to Primary. This is a short-term state, | ||
2998 | * typically lasting only one round trip time (RTT). | ||
2999 | * | ||
3000 | * Secondary. (tcp_secret_retiring == tcp_secret_secondary) | ||
3001 | * Used for secondary verification, after primary verification | ||
3002 | * failures. This state lasts no more than twice the Maximum Segment | ||
3003 | * Lifetime (2MSL). Then, the secret is discarded. | ||
3004 | */ | ||
3005 | struct tcp_cookie_secret { | ||
3006 | /* The secret is divided into two parts. The digest part is the | ||
3007 | * equivalent of previously hashing a secret and saving the state, | ||
3008 | * and serves as an initialization vector (IV). The message part | ||
3009 | * serves as the trailing secret. | ||
3010 | */ | ||
3011 | u32 secrets[COOKIE_WORKSPACE_WORDS]; | ||
3012 | unsigned long expires; | ||
3013 | }; | ||
3014 | |||
3015 | #define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) | ||
3016 | #define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) | ||
3017 | #define TCP_SECRET_LIFE (HZ * 600) | ||
3018 | |||
3019 | static struct tcp_cookie_secret tcp_secret_one; | ||
3020 | static struct tcp_cookie_secret tcp_secret_two; | ||
3021 | |||
3022 | /* Essentially a circular list, without dynamic allocation. */ | ||
3023 | static struct tcp_cookie_secret *tcp_secret_generating; | ||
3024 | static struct tcp_cookie_secret *tcp_secret_primary; | ||
3025 | static struct tcp_cookie_secret *tcp_secret_retiring; | ||
3026 | static struct tcp_cookie_secret *tcp_secret_secondary; | ||
3027 | |||
3028 | static DEFINE_SPINLOCK(tcp_secret_locker); | ||
3029 | |||
3030 | /* Select a pseudo-random word in the cookie workspace. | ||
3031 | */ | ||
3032 | static inline u32 tcp_cookie_work(const u32 *ws, const int n) | ||
3033 | { | ||
3034 | return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; | ||
3035 | } | ||
3036 | |||
3037 | /* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. | ||
3038 | * Called in softirq context. | ||
3039 | * Returns: 0 for success. | ||
3040 | */ | ||
3041 | int tcp_cookie_generator(u32 *bakery) | ||
3042 | { | ||
3043 | unsigned long jiffy = jiffies; | ||
3044 | |||
3045 | if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { | ||
3046 | spin_lock_bh(&tcp_secret_locker); | ||
3047 | if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { | ||
3048 | /* refreshed by another */ | ||
3049 | memcpy(bakery, | ||
3050 | &tcp_secret_generating->secrets[0], | ||
3051 | COOKIE_WORKSPACE_WORDS); | ||
3052 | } else { | ||
3053 | /* still needs refreshing */ | ||
3054 | get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); | ||
3055 | |||
3056 | /* The first time, paranoia assumes that the | ||
3057 | * randomization function isn't as strong. But, | ||
3058 | * this secret initialization is delayed until | ||
3059 | * the last possible moment (packet arrival). | ||
3060 | * Although that time is observable, it is | ||
3061 | * unpredictably variable. Mash in the most | ||
3062 | * volatile clock bits available, and expire the | ||
3063 | * secret extra quickly. | ||
3064 | */ | ||
3065 | if (unlikely(tcp_secret_primary->expires == | ||
3066 | tcp_secret_secondary->expires)) { | ||
3067 | struct timespec tv; | ||
3068 | |||
3069 | getnstimeofday(&tv); | ||
3070 | bakery[COOKIE_DIGEST_WORDS+0] ^= | ||
3071 | (u32)tv.tv_nsec; | ||
3072 | |||
3073 | tcp_secret_secondary->expires = jiffy | ||
3074 | + TCP_SECRET_1MSL | ||
3075 | + (0x0f & tcp_cookie_work(bakery, 0)); | ||
3076 | } else { | ||
3077 | tcp_secret_secondary->expires = jiffy | ||
3078 | + TCP_SECRET_LIFE | ||
3079 | + (0xff & tcp_cookie_work(bakery, 1)); | ||
3080 | tcp_secret_primary->expires = jiffy | ||
3081 | + TCP_SECRET_2MSL | ||
3082 | + (0x1f & tcp_cookie_work(bakery, 2)); | ||
3083 | } | ||
3084 | memcpy(&tcp_secret_secondary->secrets[0], | ||
3085 | bakery, COOKIE_WORKSPACE_WORDS); | ||
3086 | |||
3087 | rcu_assign_pointer(tcp_secret_generating, | ||
3088 | tcp_secret_secondary); | ||
3089 | rcu_assign_pointer(tcp_secret_retiring, | ||
3090 | tcp_secret_primary); | ||
3091 | /* | ||
3092 | * Neither call_rcu() nor synchronize_rcu() needed. | ||
3093 | * Retiring data is not freed. It is replaced after | ||
3094 | * further (locked) pointer updates, and a quiet time | ||
3095 | * (minimum 1MSL, maximum LIFE - 2MSL). | ||
3096 | */ | ||
3097 | } | ||
3098 | spin_unlock_bh(&tcp_secret_locker); | ||
3099 | } else { | ||
3100 | rcu_read_lock_bh(); | ||
3101 | memcpy(bakery, | ||
3102 | &rcu_dereference(tcp_secret_generating)->secrets[0], | ||
3103 | COOKIE_WORKSPACE_WORDS); | ||
3104 | rcu_read_unlock_bh(); | ||
3105 | } | ||
3106 | return 0; | ||
3107 | } | ||
3108 | EXPORT_SYMBOL(tcp_cookie_generator); | ||
3109 | |||
2850 | void tcp_done(struct sock *sk) | 3110 | void tcp_done(struct sock *sk) |
2851 | { | 3111 | { |
2852 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 3112 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
@@ -2881,6 +3141,7 @@ void __init tcp_init(void) | |||
2881 | struct sk_buff *skb = NULL; | 3141 | struct sk_buff *skb = NULL; |
2882 | unsigned long nr_pages, limit; | 3142 | unsigned long nr_pages, limit; |
2883 | int order, i, max_share; | 3143 | int order, i, max_share; |
3144 | unsigned long jiffy = jiffies; | ||
2884 | 3145 | ||
2885 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3146 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
2886 | 3147 | ||
@@ -2903,11 +3164,10 @@ void __init tcp_init(void) | |||
2903 | (totalram_pages >= 128 * 1024) ? | 3164 | (totalram_pages >= 128 * 1024) ? |
2904 | 13 : 15, | 3165 | 13 : 15, |
2905 | 0, | 3166 | 0, |
2906 | &tcp_hashinfo.ehash_size, | ||
2907 | NULL, | 3167 | NULL, |
3168 | &tcp_hashinfo.ehash_mask, | ||
2908 | thash_entries ? 0 : 512 * 1024); | 3169 | thash_entries ? 0 : 512 * 1024); |
2909 | tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; | 3170 | for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { |
2910 | for (i = 0; i < tcp_hashinfo.ehash_size; i++) { | ||
2911 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); | 3171 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); |
2912 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); | 3172 | INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); |
2913 | } | 3173 | } |
@@ -2916,7 +3176,7 @@ void __init tcp_init(void) | |||
2916 | tcp_hashinfo.bhash = | 3176 | tcp_hashinfo.bhash = |
2917 | alloc_large_system_hash("TCP bind", | 3177 | alloc_large_system_hash("TCP bind", |
2918 | sizeof(struct inet_bind_hashbucket), | 3178 | sizeof(struct inet_bind_hashbucket), |
2919 | tcp_hashinfo.ehash_size, | 3179 | tcp_hashinfo.ehash_mask + 1, |
2920 | (totalram_pages >= 128 * 1024) ? | 3180 | (totalram_pages >= 128 * 1024) ? |
2921 | 13 : 15, | 3181 | 13 : 15, |
2922 | 0, | 3182 | 0, |
@@ -2971,10 +3231,19 @@ void __init tcp_init(void) | |||
2971 | sysctl_tcp_rmem[2] = max(87380, max_share); | 3231 | sysctl_tcp_rmem[2] = max(87380, max_share); |
2972 | 3232 | ||
2973 | printk(KERN_INFO "TCP: Hash tables configured " | 3233 | printk(KERN_INFO "TCP: Hash tables configured " |
2974 | "(established %d bind %d)\n", | 3234 | "(established %u bind %u)\n", |
2975 | tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); | 3235 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); |
2976 | 3236 | ||
2977 | tcp_register_congestion_control(&tcp_reno); | 3237 | tcp_register_congestion_control(&tcp_reno); |
3238 | |||
3239 | memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); | ||
3240 | memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); | ||
3241 | tcp_secret_one.expires = jiffy; /* past due */ | ||
3242 | tcp_secret_two.expires = jiffy; /* past due */ | ||
3243 | tcp_secret_generating = &tcp_secret_one; | ||
3244 | tcp_secret_primary = &tcp_secret_one; | ||
3245 | tcp_secret_retiring = &tcp_secret_two; | ||
3246 | tcp_secret_secondary = &tcp_secret_two; | ||
2978 | } | 3247 | } |
2979 | 3248 | ||
2980 | EXPORT_SYMBOL(tcp_close); | 3249 | EXPORT_SYMBOL(tcp_close); |