diff options
Diffstat (limited to 'include/net/tcp.h')
| -rw-r--r-- | include/net/tcp.h | 76 |
1 files changed, 71 insertions, 5 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index f50f29faf76f..8d6b983d5099 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
| @@ -262,8 +262,6 @@ extern int sysctl_tcp_low_latency; | |||
| 262 | extern int sysctl_tcp_nometrics_save; | 262 | extern int sysctl_tcp_nometrics_save; |
| 263 | extern int sysctl_tcp_moderate_rcvbuf; | 263 | extern int sysctl_tcp_moderate_rcvbuf; |
| 264 | extern int sysctl_tcp_tso_win_divisor; | 264 | extern int sysctl_tcp_tso_win_divisor; |
| 265 | extern int sysctl_tcp_mtu_probing; | ||
| 266 | extern int sysctl_tcp_base_mss; | ||
| 267 | extern int sysctl_tcp_workaround_signed_windows; | 265 | extern int sysctl_tcp_workaround_signed_windows; |
| 268 | extern int sysctl_tcp_slow_start_after_idle; | 266 | extern int sysctl_tcp_slow_start_after_idle; |
| 269 | extern int sysctl_tcp_thin_linear_timeouts; | 267 | extern int sysctl_tcp_thin_linear_timeouts; |
| @@ -274,6 +272,7 @@ extern int sysctl_tcp_challenge_ack_limit; | |||
| 274 | extern unsigned int sysctl_tcp_notsent_lowat; | 272 | extern unsigned int sysctl_tcp_notsent_lowat; |
| 275 | extern int sysctl_tcp_min_tso_segs; | 273 | extern int sysctl_tcp_min_tso_segs; |
| 276 | extern int sysctl_tcp_autocorking; | 274 | extern int sysctl_tcp_autocorking; |
| 275 | extern int sysctl_tcp_invalid_ratelimit; | ||
| 277 | 276 | ||
| 278 | extern atomic_long_t tcp_memory_allocated; | 277 | extern atomic_long_t tcp_memory_allocated; |
| 279 | extern struct percpu_counter tcp_sockets_allocated; | 278 | extern struct percpu_counter tcp_sockets_allocated; |
| @@ -448,6 +447,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); | |||
| 448 | struct sock *tcp_create_openreq_child(struct sock *sk, | 447 | struct sock *tcp_create_openreq_child(struct sock *sk, |
| 449 | struct request_sock *req, | 448 | struct request_sock *req, |
| 450 | struct sk_buff *skb); | 449 | struct sk_buff *skb); |
| 450 | void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst); | ||
| 451 | struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | 451 | struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, |
| 452 | struct request_sock *req, | 452 | struct request_sock *req, |
| 453 | struct dst_entry *dst); | 453 | struct dst_entry *dst); |
| @@ -636,6 +636,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk) | |||
| 636 | return jiffies_to_usecs(tcp_rto_min(sk)); | 636 | return jiffies_to_usecs(tcp_rto_min(sk)); |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) | ||
| 640 | { | ||
| 641 | return dst_metric_locked(dst, RTAX_CC_ALGO); | ||
| 642 | } | ||
| 643 | |||
| 639 | /* Compute the actual receive window we are currently advertising. | 644 | /* Compute the actual receive window we are currently advertising. |
| 640 | * Rcv_nxt can be after the window if our peer push more data | 645 | * Rcv_nxt can be after the window if our peer push more data |
| 641 | * than the offered window. | 646 | * than the offered window. |
| @@ -787,6 +792,8 @@ enum tcp_ca_ack_event_flags { | |||
| 787 | #define TCP_CA_MAX 128 | 792 | #define TCP_CA_MAX 128 |
| 788 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) | 793 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) |
| 789 | 794 | ||
| 795 | #define TCP_CA_UNSPEC 0 | ||
| 796 | |||
| 790 | /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ | 797 | /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ |
| 791 | #define TCP_CONG_NON_RESTRICTED 0x1 | 798 | #define TCP_CONG_NON_RESTRICTED 0x1 |
| 792 | /* Requires ECN/ECT set on all packets */ | 799 | /* Requires ECN/ECT set on all packets */ |
| @@ -794,7 +801,8 @@ enum tcp_ca_ack_event_flags { | |||
| 794 | 801 | ||
| 795 | struct tcp_congestion_ops { | 802 | struct tcp_congestion_ops { |
| 796 | struct list_head list; | 803 | struct list_head list; |
| 797 | unsigned long flags; | 804 | u32 key; |
| 805 | u32 flags; | ||
| 798 | 806 | ||
| 799 | /* initialize private data (optional) */ | 807 | /* initialize private data (optional) */ |
| 800 | void (*init)(struct sock *sk); | 808 | void (*init)(struct sock *sk); |
| @@ -834,13 +842,24 @@ void tcp_get_available_congestion_control(char *buf, size_t len); | |||
| 834 | void tcp_get_allowed_congestion_control(char *buf, size_t len); | 842 | void tcp_get_allowed_congestion_control(char *buf, size_t len); |
| 835 | int tcp_set_allowed_congestion_control(char *allowed); | 843 | int tcp_set_allowed_congestion_control(char *allowed); |
| 836 | int tcp_set_congestion_control(struct sock *sk, const char *name); | 844 | int tcp_set_congestion_control(struct sock *sk, const char *name); |
| 837 | void tcp_slow_start(struct tcp_sock *tp, u32 acked); | 845 | u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); |
| 838 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); | 846 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); |
| 839 | 847 | ||
| 840 | u32 tcp_reno_ssthresh(struct sock *sk); | 848 | u32 tcp_reno_ssthresh(struct sock *sk); |
| 841 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); | 849 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); |
| 842 | extern struct tcp_congestion_ops tcp_reno; | 850 | extern struct tcp_congestion_ops tcp_reno; |
| 843 | 851 | ||
| 852 | struct tcp_congestion_ops *tcp_ca_find_key(u32 key); | ||
| 853 | u32 tcp_ca_get_key_by_name(const char *name); | ||
| 854 | #ifdef CONFIG_INET | ||
| 855 | char *tcp_ca_get_name_by_key(u32 key, char *buffer); | ||
| 856 | #else | ||
| 857 | static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) | ||
| 858 | { | ||
| 859 | return NULL; | ||
| 860 | } | ||
| 861 | #endif | ||
| 862 | |||
| 844 | static inline bool tcp_ca_needs_ecn(const struct sock *sk) | 863 | static inline bool tcp_ca_needs_ecn(const struct sock *sk) |
| 845 | { | 864 | { |
| 846 | const struct inet_connection_sock *icsk = inet_csk(sk); | 865 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| @@ -1124,6 +1143,7 @@ static inline void tcp_openreq_init(struct request_sock *req, | |||
| 1124 | tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; | 1143 | tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; |
| 1125 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; | 1144 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; |
| 1126 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | 1145 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
| 1146 | tcp_rsk(req)->last_oow_ack_time = 0; | ||
| 1127 | req->mss = rx_opt->mss_clamp; | 1147 | req->mss = rx_opt->mss_clamp; |
| 1128 | req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; | 1148 | req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; |
| 1129 | ireq->tstamp_ok = rx_opt->tstamp_ok; | 1149 | ireq->tstamp_ok = rx_opt->tstamp_ok; |
| @@ -1216,6 +1236,37 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, | |||
| 1216 | return true; | 1236 | return true; |
| 1217 | } | 1237 | } |
| 1218 | 1238 | ||
| 1239 | /* Return true if we're currently rate-limiting out-of-window ACKs and | ||
| 1240 | * thus shouldn't send a dupack right now. We rate-limit dupacks in | ||
| 1241 | * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS | ||
| 1242 | * attacks that send repeated SYNs or ACKs for the same connection. To | ||
| 1243 | * do this, we do not send a duplicate SYNACK or ACK if the remote | ||
| 1244 | * endpoint is sending out-of-window SYNs or pure ACKs at a high rate. | ||
| 1245 | */ | ||
| 1246 | static inline bool tcp_oow_rate_limited(struct net *net, | ||
| 1247 | const struct sk_buff *skb, | ||
| 1248 | int mib_idx, u32 *last_oow_ack_time) | ||
| 1249 | { | ||
| 1250 | /* Data packets without SYNs are not likely part of an ACK loop. */ | ||
| 1251 | if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && | ||
| 1252 | !tcp_hdr(skb)->syn) | ||
| 1253 | goto not_rate_limited; | ||
| 1254 | |||
| 1255 | if (*last_oow_ack_time) { | ||
| 1256 | s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); | ||
| 1257 | |||
| 1258 | if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { | ||
| 1259 | NET_INC_STATS_BH(net, mib_idx); | ||
| 1260 | return true; /* rate-limited: don't send yet! */ | ||
| 1261 | } | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | *last_oow_ack_time = tcp_time_stamp; | ||
| 1265 | |||
| 1266 | not_rate_limited: | ||
| 1267 | return false; /* not rate-limited: go ahead, send dupack now! */ | ||
| 1268 | } | ||
| 1269 | |||
| 1219 | static inline void tcp_mib_init(struct net *net) | 1270 | static inline void tcp_mib_init(struct net *net) |
| 1220 | { | 1271 | { |
| 1221 | /* See RFC 2012 */ | 1272 | /* See RFC 2012 */ |
| @@ -1693,4 +1744,19 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) | |||
| 1693 | return dopt; | 1744 | return dopt; |
| 1694 | } | 1745 | } |
| 1695 | 1746 | ||
| 1747 | /* locally generated TCP pure ACKs have skb->truesize == 2 | ||
| 1748 | * (check tcp_send_ack() in net/ipv4/tcp_output.c ) | ||
| 1749 | * This is much faster than dissecting the packet to find out. | ||
| 1750 | * (Think of GRE encapsulations, IPv4, IPv6, ...) | ||
| 1751 | */ | ||
| 1752 | static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb) | ||
| 1753 | { | ||
| 1754 | return skb->truesize == 2; | ||
| 1755 | } | ||
| 1756 | |||
| 1757 | static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) | ||
| 1758 | { | ||
| 1759 | skb->truesize = 2; | ||
| 1760 | } | ||
| 1761 | |||
| 1696 | #endif /* _TCP_H */ | 1762 | #endif /* _TCP_H */ |
