aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c289
1 files changed, 279 insertions, 10 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc71088..c8666b70cde0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -264,6 +264,7 @@
264#include <linux/cache.h> 264#include <linux/cache.h>
265#include <linux/err.h> 265#include <linux/err.h>
266#include <linux/crypto.h> 266#include <linux/crypto.h>
267#include <linux/time.h>
267 268
268#include <net/icmp.h> 269#include <net/icmp.h>
269#include <net/tcp.h> 270#include <net/tcp.h>
@@ -2042,7 +2043,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2042 __skb_queue_purge(&sk->sk_async_wait_queue); 2043 __skb_queue_purge(&sk->sk_async_wait_queue);
2043#endif 2044#endif
2044 2045
2045 inet->dport = 0; 2046 inet->inet_dport = 0;
2046 2047
2047 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 2048 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2048 inet_reset_saddr(sk); 2049 inet_reset_saddr(sk);
@@ -2059,6 +2060,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2059 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 2060 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2060 tp->snd_cwnd_cnt = 0; 2061 tp->snd_cwnd_cnt = 0;
2061 tp->bytes_acked = 0; 2062 tp->bytes_acked = 0;
2063 tp->window_clamp = 0;
2062 tcp_set_ca_state(sk, TCP_CA_Open); 2064 tcp_set_ca_state(sk, TCP_CA_Open);
2063 tcp_clear_retrans(tp); 2065 tcp_clear_retrans(tp);
2064 inet_csk_delack_init(sk); 2066 inet_csk_delack_init(sk);
@@ -2066,7 +2068,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2066 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2068 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2067 __sk_dst_reset(sk); 2069 __sk_dst_reset(sk);
2068 2070
2069 WARN_ON(inet->num && !icsk->icsk_bind_hash); 2071 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2070 2072
2071 sk->sk_error_report(sk); 2073 sk->sk_error_report(sk);
2072 return err; 2074 return err;
@@ -2083,8 +2085,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2083 int val; 2085 int val;
2084 int err = 0; 2086 int err = 0;
2085 2087
2086 /* This is a string value all the others are int's */ 2088 /* These are data/string values, all the others are ints */
2087 if (optname == TCP_CONGESTION) { 2089 switch (optname) {
2090 case TCP_CONGESTION: {
2088 char name[TCP_CA_NAME_MAX]; 2091 char name[TCP_CA_NAME_MAX];
2089 2092
2090 if (optlen < 1) 2093 if (optlen < 1)
@@ -2101,6 +2104,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2101 release_sock(sk); 2104 release_sock(sk);
2102 return err; 2105 return err;
2103 } 2106 }
2107 case TCP_COOKIE_TRANSACTIONS: {
2108 struct tcp_cookie_transactions ctd;
2109 struct tcp_cookie_values *cvp = NULL;
2110
2111 if (sizeof(ctd) > optlen)
2112 return -EINVAL;
2113 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2114 return -EFAULT;
2115
2116 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2117 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2118 return -EINVAL;
2119
2120 if (ctd.tcpct_cookie_desired == 0) {
2121 /* default to global value */
2122 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2123 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2124 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2125 return -EINVAL;
2126 }
2127
2128 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2129 /* Supercedes all other values */
2130 lock_sock(sk);
2131 if (tp->cookie_values != NULL) {
2132 kref_put(&tp->cookie_values->kref,
2133 tcp_cookie_values_release);
2134 tp->cookie_values = NULL;
2135 }
2136 tp->rx_opt.cookie_in_always = 0; /* false */
2137 tp->rx_opt.cookie_out_never = 1; /* true */
2138 release_sock(sk);
2139 return err;
2140 }
2141
2142 /* Allocate ancillary memory before locking.
2143 */
2144 if (ctd.tcpct_used > 0 ||
2145 (tp->cookie_values == NULL &&
2146 (sysctl_tcp_cookie_size > 0 ||
2147 ctd.tcpct_cookie_desired > 0 ||
2148 ctd.tcpct_s_data_desired > 0))) {
2149 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2150 GFP_KERNEL);
2151 if (cvp == NULL)
2152 return -ENOMEM;
2153 }
2154 lock_sock(sk);
2155 tp->rx_opt.cookie_in_always =
2156 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2157 tp->rx_opt.cookie_out_never = 0; /* false */
2158
2159 if (tp->cookie_values != NULL) {
2160 if (cvp != NULL) {
2161 /* Changed values are recorded by a changed
2162 * pointer, ensuring the cookie will differ,
2163 * without separately hashing each value later.
2164 */
2165 kref_put(&tp->cookie_values->kref,
2166 tcp_cookie_values_release);
2167 kref_init(&cvp->kref);
2168 tp->cookie_values = cvp;
2169 } else {
2170 cvp = tp->cookie_values;
2171 }
2172 }
2173 if (cvp != NULL) {
2174 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2175
2176 if (ctd.tcpct_used > 0) {
2177 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2178 ctd.tcpct_used);
2179 cvp->s_data_desired = ctd.tcpct_used;
2180 cvp->s_data_constant = 1; /* true */
2181 } else {
2182 /* No constant payload data. */
2183 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2184 cvp->s_data_constant = 0; /* false */
2185 }
2186 }
2187 release_sock(sk);
2188 return err;
2189 }
2190 default:
2191 /* fallthru */
2192 break;
2193 };
2104 2194
2105 if (optlen < sizeof(int)) 2195 if (optlen < sizeof(int))
2106 return -EINVAL; 2196 return -EINVAL;
@@ -2425,6 +2515,47 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2425 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) 2515 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2426 return -EFAULT; 2516 return -EFAULT;
2427 return 0; 2517 return 0;
2518
2519 case TCP_COOKIE_TRANSACTIONS: {
2520 struct tcp_cookie_transactions ctd;
2521 struct tcp_cookie_values *cvp = tp->cookie_values;
2522
2523 if (get_user(len, optlen))
2524 return -EFAULT;
2525 if (len < sizeof(ctd))
2526 return -EINVAL;
2527
2528 memset(&ctd, 0, sizeof(ctd));
2529 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2530 TCP_COOKIE_IN_ALWAYS : 0)
2531 | (tp->rx_opt.cookie_out_never ?
2532 TCP_COOKIE_OUT_NEVER : 0);
2533
2534 if (cvp != NULL) {
2535 ctd.tcpct_flags |= (cvp->s_data_in ?
2536 TCP_S_DATA_IN : 0)
2537 | (cvp->s_data_out ?
2538 TCP_S_DATA_OUT : 0);
2539
2540 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2541 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2542
2543 /* Cookie(s) saved, return as nonce */
2544 if (sizeof(ctd.tcpct_value) < cvp->cookie_pair_size) {
2545 /* impossible? */
2546 return -EINVAL;
2547 }
2548 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2549 cvp->cookie_pair_size);
2550 ctd.tcpct_used = cvp->cookie_pair_size;
2551 }
2552
2553 if (put_user(sizeof(ctd), optlen))
2554 return -EFAULT;
2555 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2556 return -EFAULT;
2557 return 0;
2558 }
2428 default: 2559 default:
2429 return -ENOPROTOOPT; 2560 return -ENOPROTOOPT;
2430 } 2561 }
@@ -2847,6 +2978,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
2847 2978
2848#endif 2979#endif
2849 2980
2981/**
2982 * Each Responder maintains up to two secret values concurrently for
2983 * efficient secret rollover. Each secret value has 4 states:
2984 *
2985 * Generating. (tcp_secret_generating != tcp_secret_primary)
2986 * Generates new Responder-Cookies, but not yet used for primary
2987 * verification. This is a short-term state, typically lasting only
2988 * one round trip time (RTT).
2989 *
2990 * Primary. (tcp_secret_generating == tcp_secret_primary)
2991 * Used both for generation and primary verification.
2992 *
2993 * Retiring. (tcp_secret_retiring != tcp_secret_secondary)
2994 * Used for verification, until the first failure that can be
2995 * verified by the newer Generating secret. At that time, this
2996 * cookie's state is changed to Secondary, and the Generating
2997 * cookie's state is changed to Primary. This is a short-term state,
2998 * typically lasting only one round trip time (RTT).
2999 *
3000 * Secondary. (tcp_secret_retiring == tcp_secret_secondary)
3001 * Used for secondary verification, after primary verification
3002 * failures. This state lasts no more than twice the Maximum Segment
3003 * Lifetime (2MSL). Then, the secret is discarded.
3004 */
3005struct tcp_cookie_secret {
3006 /* The secret is divided into two parts. The digest part is the
3007 * equivalent of previously hashing a secret and saving the state,
3008 * and serves as an initialization vector (IV). The message part
3009 * serves as the trailing secret.
3010 */
3011 u32 secrets[COOKIE_WORKSPACE_WORDS];
3012 unsigned long expires;
3013};
3014
3015#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3016#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3017#define TCP_SECRET_LIFE (HZ * 600)
3018
3019static struct tcp_cookie_secret tcp_secret_one;
3020static struct tcp_cookie_secret tcp_secret_two;
3021
3022/* Essentially a circular list, without dynamic allocation. */
3023static struct tcp_cookie_secret *tcp_secret_generating;
3024static struct tcp_cookie_secret *tcp_secret_primary;
3025static struct tcp_cookie_secret *tcp_secret_retiring;
3026static struct tcp_cookie_secret *tcp_secret_secondary;
3027
3028static DEFINE_SPINLOCK(tcp_secret_locker);
3029
3030/* Select a pseudo-random word in the cookie workspace.
3031 */
3032static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3033{
3034 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3035}
3036
3037/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
3038 * Called in softirq context.
3039 * Returns: 0 for success.
3040 */
3041int tcp_cookie_generator(u32 *bakery)
3042{
3043 unsigned long jiffy = jiffies;
3044
3045 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3046 spin_lock_bh(&tcp_secret_locker);
3047 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3048 /* refreshed by another */
3049 memcpy(bakery,
3050 &tcp_secret_generating->secrets[0],
3051 COOKIE_WORKSPACE_WORDS);
3052 } else {
3053 /* still needs refreshing */
3054 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3055
3056 /* The first time, paranoia assumes that the
3057 * randomization function isn't as strong. But,
3058 * this secret initialization is delayed until
3059 * the last possible moment (packet arrival).
3060 * Although that time is observable, it is
3061 * unpredictably variable. Mash in the most
3062 * volatile clock bits available, and expire the
3063 * secret extra quickly.
3064 */
3065 if (unlikely(tcp_secret_primary->expires ==
3066 tcp_secret_secondary->expires)) {
3067 struct timespec tv;
3068
3069 getnstimeofday(&tv);
3070 bakery[COOKIE_DIGEST_WORDS+0] ^=
3071 (u32)tv.tv_nsec;
3072
3073 tcp_secret_secondary->expires = jiffy
3074 + TCP_SECRET_1MSL
3075 + (0x0f & tcp_cookie_work(bakery, 0));
3076 } else {
3077 tcp_secret_secondary->expires = jiffy
3078 + TCP_SECRET_LIFE
3079 + (0xff & tcp_cookie_work(bakery, 1));
3080 tcp_secret_primary->expires = jiffy
3081 + TCP_SECRET_2MSL
3082 + (0x1f & tcp_cookie_work(bakery, 2));
3083 }
3084 memcpy(&tcp_secret_secondary->secrets[0],
3085 bakery, COOKIE_WORKSPACE_WORDS);
3086
3087 rcu_assign_pointer(tcp_secret_generating,
3088 tcp_secret_secondary);
3089 rcu_assign_pointer(tcp_secret_retiring,
3090 tcp_secret_primary);
3091 /*
3092 * Neither call_rcu() nor synchronize_rcu() needed.
3093 * Retiring data is not freed. It is replaced after
3094 * further (locked) pointer updates, and a quiet time
3095 * (minimum 1MSL, maximum LIFE - 2MSL).
3096 */
3097 }
3098 spin_unlock_bh(&tcp_secret_locker);
3099 } else {
3100 rcu_read_lock_bh();
3101 memcpy(bakery,
3102 &rcu_dereference(tcp_secret_generating)->secrets[0],
3103 COOKIE_WORKSPACE_WORDS);
3104 rcu_read_unlock_bh();
3105 }
3106 return 0;
3107}
3108EXPORT_SYMBOL(tcp_cookie_generator);
3109
2850void tcp_done(struct sock *sk) 3110void tcp_done(struct sock *sk)
2851{ 3111{
2852 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 3112 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
@@ -2881,6 +3141,7 @@ void __init tcp_init(void)
2881 struct sk_buff *skb = NULL; 3141 struct sk_buff *skb = NULL;
2882 unsigned long nr_pages, limit; 3142 unsigned long nr_pages, limit;
2883 int order, i, max_share; 3143 int order, i, max_share;
3144 unsigned long jiffy = jiffies;
2884 3145
2885 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3146 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
2886 3147
@@ -2903,11 +3164,10 @@ void __init tcp_init(void)
2903 (totalram_pages >= 128 * 1024) ? 3164 (totalram_pages >= 128 * 1024) ?
2904 13 : 15, 3165 13 : 15,
2905 0, 3166 0,
2906 &tcp_hashinfo.ehash_size,
2907 NULL, 3167 NULL,
3168 &tcp_hashinfo.ehash_mask,
2908 thash_entries ? 0 : 512 * 1024); 3169 thash_entries ? 0 : 512 * 1024);
2909 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; 3170 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
2910 for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
2911 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); 3171 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
2912 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); 3172 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
2913 } 3173 }
@@ -2916,7 +3176,7 @@ void __init tcp_init(void)
2916 tcp_hashinfo.bhash = 3176 tcp_hashinfo.bhash =
2917 alloc_large_system_hash("TCP bind", 3177 alloc_large_system_hash("TCP bind",
2918 sizeof(struct inet_bind_hashbucket), 3178 sizeof(struct inet_bind_hashbucket),
2919 tcp_hashinfo.ehash_size, 3179 tcp_hashinfo.ehash_mask + 1,
2920 (totalram_pages >= 128 * 1024) ? 3180 (totalram_pages >= 128 * 1024) ?
2921 13 : 15, 3181 13 : 15,
2922 0, 3182 0,
@@ -2971,10 +3231,19 @@ void __init tcp_init(void)
2971 sysctl_tcp_rmem[2] = max(87380, max_share); 3231 sysctl_tcp_rmem[2] = max(87380, max_share);
2972 3232
2973 printk(KERN_INFO "TCP: Hash tables configured " 3233 printk(KERN_INFO "TCP: Hash tables configured "
2974 "(established %d bind %d)\n", 3234 "(established %u bind %u)\n",
2975 tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); 3235 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
2976 3236
2977 tcp_register_congestion_control(&tcp_reno); 3237 tcp_register_congestion_control(&tcp_reno);
3238
3239 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3240 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3241 tcp_secret_one.expires = jiffy; /* past due */
3242 tcp_secret_two.expires = jiffy; /* past due */
3243 tcp_secret_generating = &tcp_secret_one;
3244 tcp_secret_primary = &tcp_secret_one;
3245 tcp_secret_retiring = &tcp_secret_two;
3246 tcp_secret_secondary = &tcp_secret_two;
2978} 3247}
2979 3248
2980EXPORT_SYMBOL(tcp_close); 3249EXPORT_SYMBOL(tcp_close);