diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 91 |
1 files changed, 56 insertions, 35 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 34f5db1e1c8b..22ef5f9fd2ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | |||
282 | struct percpu_counter tcp_orphan_count; | 282 | struct percpu_counter tcp_orphan_count; |
283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
284 | 284 | ||
285 | long sysctl_tcp_mem[3] __read_mostly; | ||
286 | int sysctl_tcp_wmem[3] __read_mostly; | 285 | int sysctl_tcp_wmem[3] __read_mostly; |
287 | int sysctl_tcp_rmem[3] __read_mostly; | 286 | int sysctl_tcp_rmem[3] __read_mostly; |
288 | 287 | ||
289 | EXPORT_SYMBOL(sysctl_tcp_mem); | ||
290 | EXPORT_SYMBOL(sysctl_tcp_rmem); | 288 | EXPORT_SYMBOL(sysctl_tcp_rmem); |
291 | EXPORT_SYMBOL(sysctl_tcp_wmem); | 289 | EXPORT_SYMBOL(sysctl_tcp_wmem); |
292 | 290 | ||
@@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, | |||
888 | } | 886 | } |
889 | EXPORT_SYMBOL(tcp_sendpage); | 887 | EXPORT_SYMBOL(tcp_sendpage); |
890 | 888 | ||
891 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 889 | static inline int select_size(const struct sock *sk, bool sg) |
892 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | ||
893 | |||
894 | static inline int select_size(const struct sock *sk, int sg) | ||
895 | { | 890 | { |
896 | const struct tcp_sock *tp = tcp_sk(sk); | 891 | const struct tcp_sock *tp = tcp_sk(sk); |
897 | int tmp = tp->mss_cache; | 892 | int tmp = tp->mss_cache; |
898 | 893 | ||
899 | if (sg) { | 894 | if (sg) { |
900 | if (sk_can_gso(sk)) | 895 | if (sk_can_gso(sk)) { |
901 | tmp = 0; | 896 | /* Small frames wont use a full page: |
902 | else { | 897 | * Payload will immediately follow tcp header. |
898 | */ | ||
899 | tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); | ||
900 | } else { | ||
903 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); | 901 | int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); |
904 | 902 | ||
905 | if (tmp >= pgbreak && | 903 | if (tmp >= pgbreak && |
@@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
917 | struct iovec *iov; | 915 | struct iovec *iov; |
918 | struct tcp_sock *tp = tcp_sk(sk); | 916 | struct tcp_sock *tp = tcp_sk(sk); |
919 | struct sk_buff *skb; | 917 | struct sk_buff *skb; |
920 | int iovlen, flags; | 918 | int iovlen, flags, err, copied; |
921 | int mss_now, size_goal; | 919 | int mss_now, size_goal; |
922 | int sg, err, copied; | 920 | bool sg; |
923 | long timeo; | 921 | long timeo; |
924 | 922 | ||
925 | lock_sock(sk); | 923 | lock_sock(sk); |
@@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
946 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 944 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
947 | goto out_err; | 945 | goto out_err; |
948 | 946 | ||
949 | sg = sk->sk_route_caps & NETIF_F_SG; | 947 | sg = !!(sk->sk_route_caps & NETIF_F_SG); |
950 | 948 | ||
951 | while (--iovlen >= 0) { | 949 | while (--iovlen >= 0) { |
952 | size_t seglen = iov->iov_len; | 950 | size_t seglen = iov->iov_len; |
@@ -1005,8 +1003,13 @@ new_segment: | |||
1005 | } else { | 1003 | } else { |
1006 | int merge = 0; | 1004 | int merge = 0; |
1007 | int i = skb_shinfo(skb)->nr_frags; | 1005 | int i = skb_shinfo(skb)->nr_frags; |
1008 | struct page *page = TCP_PAGE(sk); | 1006 | struct page *page = sk->sk_sndmsg_page; |
1009 | int off = TCP_OFF(sk); | 1007 | int off; |
1008 | |||
1009 | if (page && page_count(page) == 1) | ||
1010 | sk->sk_sndmsg_off = 0; | ||
1011 | |||
1012 | off = sk->sk_sndmsg_off; | ||
1010 | 1013 | ||
1011 | if (skb_can_coalesce(skb, i, page, off) && | 1014 | if (skb_can_coalesce(skb, i, page, off) && |
1012 | off != PAGE_SIZE) { | 1015 | off != PAGE_SIZE) { |
@@ -1023,7 +1026,7 @@ new_segment: | |||
1023 | } else if (page) { | 1026 | } else if (page) { |
1024 | if (off == PAGE_SIZE) { | 1027 | if (off == PAGE_SIZE) { |
1025 | put_page(page); | 1028 | put_page(page); |
1026 | TCP_PAGE(sk) = page = NULL; | 1029 | sk->sk_sndmsg_page = page = NULL; |
1027 | off = 0; | 1030 | off = 0; |
1028 | } | 1031 | } |
1029 | } else | 1032 | } else |
@@ -1049,9 +1052,9 @@ new_segment: | |||
1049 | /* If this page was new, give it to the | 1052 | /* If this page was new, give it to the |
1050 | * socket so it does not get leaked. | 1053 | * socket so it does not get leaked. |
1051 | */ | 1054 | */ |
1052 | if (!TCP_PAGE(sk)) { | 1055 | if (!sk->sk_sndmsg_page) { |
1053 | TCP_PAGE(sk) = page; | 1056 | sk->sk_sndmsg_page = page; |
1054 | TCP_OFF(sk) = 0; | 1057 | sk->sk_sndmsg_off = 0; |
1055 | } | 1058 | } |
1056 | goto do_error; | 1059 | goto do_error; |
1057 | } | 1060 | } |
@@ -1061,15 +1064,15 @@ new_segment: | |||
1061 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | 1064 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); |
1062 | } else { | 1065 | } else { |
1063 | skb_fill_page_desc(skb, i, page, off, copy); | 1066 | skb_fill_page_desc(skb, i, page, off, copy); |
1064 | if (TCP_PAGE(sk)) { | 1067 | if (sk->sk_sndmsg_page) { |
1065 | get_page(page); | 1068 | get_page(page); |
1066 | } else if (off + copy < PAGE_SIZE) { | 1069 | } else if (off + copy < PAGE_SIZE) { |
1067 | get_page(page); | 1070 | get_page(page); |
1068 | TCP_PAGE(sk) = page; | 1071 | sk->sk_sndmsg_page = page; |
1069 | } | 1072 | } |
1070 | } | 1073 | } |
1071 | 1074 | ||
1072 | TCP_OFF(sk) = off + copy; | 1075 | sk->sk_sndmsg_off = off + copy; |
1073 | } | 1076 | } |
1074 | 1077 | ||
1075 | if (!copied) | 1078 | if (!copied) |
@@ -1873,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how) | |||
1873 | } | 1876 | } |
1874 | EXPORT_SYMBOL(tcp_shutdown); | 1877 | EXPORT_SYMBOL(tcp_shutdown); |
1875 | 1878 | ||
1879 | bool tcp_check_oom(struct sock *sk, int shift) | ||
1880 | { | ||
1881 | bool too_many_orphans, out_of_socket_memory; | ||
1882 | |||
1883 | too_many_orphans = tcp_too_many_orphans(sk, shift); | ||
1884 | out_of_socket_memory = tcp_out_of_memory(sk); | ||
1885 | |||
1886 | if (too_many_orphans && net_ratelimit()) | ||
1887 | pr_info("TCP: too many orphaned sockets\n"); | ||
1888 | if (out_of_socket_memory && net_ratelimit()) | ||
1889 | pr_info("TCP: out of memory -- consider tuning tcp_mem\n"); | ||
1890 | return too_many_orphans || out_of_socket_memory; | ||
1891 | } | ||
1892 | |||
1876 | void tcp_close(struct sock *sk, long timeout) | 1893 | void tcp_close(struct sock *sk, long timeout) |
1877 | { | 1894 | { |
1878 | struct sk_buff *skb; | 1895 | struct sk_buff *skb; |
@@ -2012,10 +2029,7 @@ adjudge_to_death: | |||
2012 | } | 2029 | } |
2013 | if (sk->sk_state != TCP_CLOSE) { | 2030 | if (sk->sk_state != TCP_CLOSE) { |
2014 | sk_mem_reclaim(sk); | 2031 | sk_mem_reclaim(sk); |
2015 | if (tcp_too_many_orphans(sk, 0)) { | 2032 | if (tcp_check_oom(sk, 0)) { |
2016 | if (net_ratelimit()) | ||
2017 | printk(KERN_INFO "TCP: too many of orphaned " | ||
2018 | "sockets\n"); | ||
2019 | tcp_set_state(sk, TCP_CLOSE); | 2033 | tcp_set_state(sk, TCP_CLOSE); |
2020 | tcp_send_active_reset(sk, GFP_ATOMIC); | 2034 | tcp_send_active_reset(sk, GFP_ATOMIC); |
2021 | NET_INC_STATS_BH(sock_net(sk), | 2035 | NET_INC_STATS_BH(sock_net(sk), |
@@ -2653,7 +2667,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2667 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
2654 | #endif | 2668 | #endif |
2655 | 2669 | ||
2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) | 2670 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, |
2671 | netdev_features_t features) | ||
2657 | { | 2672 | { |
2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2673 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2659 | struct tcphdr *th; | 2674 | struct tcphdr *th; |
@@ -3212,11 +3227,21 @@ static int __init set_thash_entries(char *str) | |||
3212 | } | 3227 | } |
3213 | __setup("thash_entries=", set_thash_entries); | 3228 | __setup("thash_entries=", set_thash_entries); |
3214 | 3229 | ||
3230 | void tcp_init_mem(struct net *net) | ||
3231 | { | ||
3232 | unsigned long limit = nr_free_buffer_pages() / 8; | ||
3233 | limit = max(limit, 128UL); | ||
3234 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; | ||
3235 | net->ipv4.sysctl_tcp_mem[1] = limit; | ||
3236 | net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; | ||
3237 | } | ||
3238 | |||
3215 | void __init tcp_init(void) | 3239 | void __init tcp_init(void) |
3216 | { | 3240 | { |
3217 | struct sk_buff *skb = NULL; | 3241 | struct sk_buff *skb = NULL; |
3218 | unsigned long limit; | 3242 | unsigned long limit; |
3219 | int i, max_share, cnt; | 3243 | int max_share, cnt; |
3244 | unsigned int i; | ||
3220 | unsigned long jiffy = jiffies; | 3245 | unsigned long jiffy = jiffies; |
3221 | 3246 | ||
3222 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); | 3247 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); |
@@ -3259,7 +3284,7 @@ void __init tcp_init(void) | |||
3259 | &tcp_hashinfo.bhash_size, | 3284 | &tcp_hashinfo.bhash_size, |
3260 | NULL, | 3285 | NULL, |
3261 | 64 * 1024); | 3286 | 64 * 1024); |
3262 | tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; | 3287 | tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; |
3263 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { | 3288 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { |
3264 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); | 3289 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); |
3265 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); | 3290 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
@@ -3272,14 +3297,10 @@ void __init tcp_init(void) | |||
3272 | sysctl_tcp_max_orphans = cnt / 2; | 3297 | sysctl_tcp_max_orphans = cnt / 2; |
3273 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3298 | sysctl_max_syn_backlog = max(128, cnt / 256); |
3274 | 3299 | ||
3275 | limit = nr_free_buffer_pages() / 8; | 3300 | tcp_init_mem(&init_net); |
3276 | limit = max(limit, 128UL); | ||
3277 | sysctl_tcp_mem[0] = limit / 4 * 3; | ||
3278 | sysctl_tcp_mem[1] = limit; | ||
3279 | sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; | ||
3280 | |||
3281 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | 3301 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ |
3282 | limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); | 3302 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); |
3303 | limit = max(limit, 128UL); | ||
3283 | max_share = min(4UL*1024*1024, limit); | 3304 | max_share = min(4UL*1024*1024, limit); |
3284 | 3305 | ||
3285 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; | 3306 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; |