aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 16:38:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 16:38:27 -0400
commitaecdc33e111b2c447b622e287c6003726daa1426 (patch)
tree3e7657eae4b785e1a1fb5dfb225dbae0b2f0cfc6 /include/net/tcp.h
parenta20acf99f75e49271381d65db097c9763060a1e8 (diff)
parenta3a6cab5ea10cca64d036851fe0d932448f2fe4f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) GRE now works over ipv6, from Dmitry Kozlov. 2) Make SCTP more network namespace aware, from Eric Biederman. 3) TEAM driver now works with non-ethernet devices, from Jiri Pirko. 4) Make openvswitch network namespace aware, from Pravin B Shelar. 5) IPV6 NAT implementation, from Patrick McHardy. 6) Server side support for TCP Fast Open, from Jerry Chu and others. 7) Packet BPF filter supports MOD and XOR, from Eric Dumazet and Daniel Borkmann. 8) Increate the loopback default MTU to 64K, from Eric Dumazet. 9) Use a per-task rather than per-socket page fragment allocator for outgoing networking traffic. This benefits processes that have very many mostly idle sockets, which is quite common. From Eric Dumazet. 10) Use up to 32K for page fragment allocations, with fallbacks to smaller sizes when higher order page allocations fail. Benefits are a) less segments for driver to process b) less calls to page allocator c) less waste of space. From Eric Dumazet. 11) Allow GRO to be used on GRE tunnels, from Eric Dumazet. 12) VXLAN device driver, one way to handle VLAN issues such as the limitation of 4096 VLAN IDs yet still have some level of isolation. From Stephen Hemminger. 13) As usual there is a large boatload of driver changes, with the scale perhaps tilted towards the wireless side this time around. Fix up various fairly trivial conflicts, mostly caused by the user namespace changes. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1012 commits) hyperv: Add buffer for extended info after the RNDIS response message. hyperv: Report actual status in receive completion packet hyperv: Remove extra allocated space for recv_pkt_list elements hyperv: Fix page buffer handling in rndis_filter_send_request() hyperv: Fix the missing return value in rndis_filter_set_packet_filter() hyperv: Fix the max_xfer_size in RNDIS initialization vxlan: put UDP socket in correct namespace vxlan: Depend on CONFIG_INET sfc: Fix the reported priorities of different filter types sfc: Remove EFX_FILTER_FLAG_RX_OVERRIDE_IP sfc: Fix loopback self-test with separate_tx_channels=1 sfc: Fix MCDI structure field lookup sfc: Add parentheses around use of bitfield macro arguments sfc: Fix null function pointer in efx_sriov_channel_type vxlan: virtual extensible lan igmp: export symbol ip_mc_leave_group netlink: add attributes to fdb interface tg3: unconditionally select HWMON support when tg3 is enabled. Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" gre: fix sparse warning ...
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h90
1 files changed, 75 insertions, 15 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9a0021d16d91..6feeccd83dd7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -98,11 +98,21 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
98 * 15 is ~13-30min depending on RTO. 98 * 15 is ~13-30min depending on RTO.
99 */ 99 */
100 100
101#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a 101#define TCP_SYN_RETRIES 6 /* This is how many retries are done
102 * connection: ~180sec is RFC minimum */ 102 * when active opening a connection.
103 * RFC1122 says the minimum retry MUST
104 * be at least 180secs. Nevertheless
105 * this value is corresponding to
106 * 63secs of retransmission with the
107 * current initial RTO.
108 */
103 109
104#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a 110#define TCP_SYNACK_RETRIES 5 /* This is how may retries are done
105 * connection: ~180sec is RFC minimum */ 111 * when passive opening a connection.
112 * This is corresponding to 31secs of
113 * retransmission with the current
114 * initial RTO.
115 */
106 116
107#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT 117#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
108 * state, about 60 seconds */ 118 * state, about 60 seconds */
@@ -214,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
214 224
215/* Bit Flags for sysctl_tcp_fastopen */ 225/* Bit Flags for sysctl_tcp_fastopen */
216#define TFO_CLIENT_ENABLE 1 226#define TFO_CLIENT_ENABLE 1
227#define TFO_SERVER_ENABLE 2
217#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ 228#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */
218 229
230/* Process SYN data but skip cookie validation */
231#define TFO_SERVER_COOKIE_NOT_CHKED 0x100
232/* Accept SYN data w/o any cookie option */
233#define TFO_SERVER_COOKIE_NOT_REQD 0x200
234
235/* Force enable TFO on all listeners, i.e., not requiring the
236 * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
237 */
238#define TFO_SERVER_WO_SOCKOPT1 0x400
239#define TFO_SERVER_WO_SOCKOPT2 0x800
240/* Always create TFO child sockets on a TFO listener even when
241 * cookie/data not present. (For testing purpose!)
242 */
243#define TFO_SERVER_ALWAYS 0x1000
244
219extern struct inet_timewait_death_row tcp_death_row; 245extern struct inet_timewait_death_row tcp_death_row;
220 246
221/* sysctl variables for tcp */ 247/* sysctl variables for tcp */
@@ -398,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *
398 const struct tcphdr *th); 424 const struct tcphdr *th);
399extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, 425extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
400 struct request_sock *req, 426 struct request_sock *req,
401 struct request_sock **prev); 427 struct request_sock **prev,
428 bool fastopen);
402extern int tcp_child_process(struct sock *parent, struct sock *child, 429extern int tcp_child_process(struct sock *parent, struct sock *child,
403 struct sk_buff *skb); 430 struct sk_buff *skb);
404extern bool tcp_use_frto(struct sock *sk); 431extern bool tcp_use_frto(struct sock *sk);
@@ -411,12 +438,6 @@ extern void tcp_metrics_init(void);
411extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); 438extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
412extern bool tcp_remember_stamp(struct sock *sk); 439extern bool tcp_remember_stamp(struct sock *sk);
413extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); 440extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
414extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
415 struct tcp_fastopen_cookie *cookie,
416 int *syn_loss, unsigned long *last_syn_loss);
417extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
418 struct tcp_fastopen_cookie *cookie,
419 bool syn_lost);
420extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); 441extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
421extern void tcp_disable_fack(struct tcp_sock *tp); 442extern void tcp_disable_fack(struct tcp_sock *tp);
422extern void tcp_close(struct sock *sk, long timeout); 443extern void tcp_close(struct sock *sk, long timeout);
@@ -458,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
458extern int tcp_connect(struct sock *sk); 479extern int tcp_connect(struct sock *sk);
459extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 480extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
460 struct request_sock *req, 481 struct request_sock *req,
461 struct request_values *rvp); 482 struct request_values *rvp,
483 struct tcp_fastopen_cookie *foc);
462extern int tcp_disconnect(struct sock *sk, int flags); 484extern int tcp_disconnect(struct sock *sk, int flags);
463 485
464void tcp_connect_init(struct sock *sk); 486void tcp_connect_init(struct sock *sk);
@@ -527,6 +549,7 @@ extern void tcp_send_delayed_ack(struct sock *sk);
527extern void tcp_cwnd_application_limited(struct sock *sk); 549extern void tcp_cwnd_application_limited(struct sock *sk);
528extern void tcp_resume_early_retransmit(struct sock *sk); 550extern void tcp_resume_early_retransmit(struct sock *sk);
529extern void tcp_rearm_rto(struct sock *sk); 551extern void tcp_rearm_rto(struct sock *sk);
552extern void tcp_reset(struct sock *sk);
530 553
531/* tcp_timer.c */ 554/* tcp_timer.c */
532extern void tcp_init_xmit_timers(struct sock *); 555extern void tcp_init_xmit_timers(struct sock *);
@@ -576,6 +599,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
576extern int tcp_mss_to_mtu(struct sock *sk, int mss); 599extern int tcp_mss_to_mtu(struct sock *sk, int mss);
577extern void tcp_mtup_init(struct sock *sk); 600extern void tcp_mtup_init(struct sock *sk);
578extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); 601extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
602extern void tcp_init_buffer_space(struct sock *sk);
579 603
580static inline void tcp_bound_rto(const struct sock *sk) 604static inline void tcp_bound_rto(const struct sock *sk)
581{ 605{
@@ -889,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
889 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; 913 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
890} 914}
891 915
916static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
917{
918 return (TCPF_CA_CWR | TCPF_CA_Recovery) &
919 (1 << inet_csk(sk)->icsk_ca_state);
920}
921
892/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. 922/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
893 * The exception is rate halving phase, when cwnd is decreasing towards 923 * The exception is cwnd reduction phase, when cwnd is decreasing towards
894 * ssthresh. 924 * ssthresh.
895 */ 925 */
896static inline __u32 tcp_current_ssthresh(const struct sock *sk) 926static inline __u32 tcp_current_ssthresh(const struct sock *sk)
897{ 927{
898 const struct tcp_sock *tp = tcp_sk(sk); 928 const struct tcp_sock *tp = tcp_sk(sk);
899 929
900 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) 930 if (tcp_in_cwnd_reduction(sk))
901 return tp->snd_ssthresh; 931 return tp->snd_ssthresh;
902 else 932 else
903 return max(tp->snd_ssthresh, 933 return max(tp->snd_ssthresh,
@@ -1094,6 +1124,8 @@ static inline void tcp_openreq_init(struct request_sock *req,
1094 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ 1124 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
1095 req->cookie_ts = 0; 1125 req->cookie_ts = 0;
1096 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; 1126 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1127 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
1128 tcp_rsk(req)->snt_synack = 0;
1097 req->mss = rx_opt->mss_clamp; 1129 req->mss = rx_opt->mss_clamp;
1098 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; 1130 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1099 ireq->tstamp_ok = rx_opt->tstamp_ok; 1131 ireq->tstamp_ok = rx_opt->tstamp_ok;
@@ -1106,6 +1138,15 @@ static inline void tcp_openreq_init(struct request_sock *req,
1106 ireq->loc_port = tcp_hdr(skb)->dest; 1138 ireq->loc_port = tcp_hdr(skb)->dest;
1107} 1139}
1108 1140
1141/* Compute time elapsed between SYNACK and the ACK completing 3WHS */
1142static inline void tcp_synack_rtt_meas(struct sock *sk,
1143 struct request_sock *req)
1144{
1145 if (tcp_rsk(req)->snt_synack)
1146 tcp_valid_rtt_meas(sk,
1147 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1148}
1149
1109extern void tcp_enter_memory_pressure(struct sock *sk); 1150extern void tcp_enter_memory_pressure(struct sock *sk);
1110 1151
1111static inline int keepalive_intvl_when(const struct tcp_sock *tp) 1152static inline int keepalive_intvl_when(const struct tcp_sock *tp)
@@ -1298,15 +1339,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff
1298extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, 1339extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1299 const struct tcp_md5sig_key *key); 1340 const struct tcp_md5sig_key *key);
1300 1341
1342/* From tcp_fastopen.c */
1343extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
1344 struct tcp_fastopen_cookie *cookie,
1345 int *syn_loss, unsigned long *last_syn_loss);
1346extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
1347 struct tcp_fastopen_cookie *cookie,
1348 bool syn_lost);
1301struct tcp_fastopen_request { 1349struct tcp_fastopen_request {
1302 /* Fast Open cookie. Size 0 means a cookie request */ 1350 /* Fast Open cookie. Size 0 means a cookie request */
1303 struct tcp_fastopen_cookie cookie; 1351 struct tcp_fastopen_cookie cookie;
1304 struct msghdr *data; /* data in MSG_FASTOPEN */ 1352 struct msghdr *data; /* data in MSG_FASTOPEN */
1305 u16 copied; /* queued in tcp_connect() */ 1353 u16 copied; /* queued in tcp_connect() */
1306}; 1354};
1307
1308void tcp_free_fastopen_req(struct tcp_sock *tp); 1355void tcp_free_fastopen_req(struct tcp_sock *tp);
1309 1356
1357extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
1358int tcp_fastopen_reset_cipher(void *key, unsigned int len);
1359void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc);
1360
1361#define TCP_FASTOPEN_KEY_LENGTH 16
1362
1363/* Fastopen key context */
1364struct tcp_fastopen_context {
1365 struct crypto_cipher __rcu *tfm;
1366 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
1367 struct rcu_head rcu;
1368};
1369
1310/* write queue abstraction */ 1370/* write queue abstraction */
1311static inline void tcp_write_queue_purge(struct sock *sk) 1371static inline void tcp_write_queue_purge(struct sock *sk)
1312{ 1372{