aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c91
1 files changed, 56 insertions, 35 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..22ef5f9fd2ff 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 285int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 286int sysctl_tcp_rmem[3] __read_mostly;
288 287
289EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 288EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 289EXPORT_SYMBOL(sysctl_tcp_wmem);
292 290
@@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
888} 886}
889EXPORT_SYMBOL(tcp_sendpage); 887EXPORT_SYMBOL(tcp_sendpage);
890 888
891#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 889static inline int select_size(const struct sock *sk, bool sg)
892#define TCP_OFF(sk) (sk->sk_sndmsg_off)
893
894static inline int select_size(const struct sock *sk, int sg)
895{ 890{
896 const struct tcp_sock *tp = tcp_sk(sk); 891 const struct tcp_sock *tp = tcp_sk(sk);
897 int tmp = tp->mss_cache; 892 int tmp = tp->mss_cache;
898 893
899 if (sg) { 894 if (sg) {
900 if (sk_can_gso(sk)) 895 if (sk_can_gso(sk)) {
901 tmp = 0; 896 /* Small frames wont use a full page:
902 else { 897 * Payload will immediately follow tcp header.
898 */
899 tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
900 } else {
903 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); 901 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
904 902
905 if (tmp >= pgbreak && 903 if (tmp >= pgbreak &&
@@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
917 struct iovec *iov; 915 struct iovec *iov;
918 struct tcp_sock *tp = tcp_sk(sk); 916 struct tcp_sock *tp = tcp_sk(sk);
919 struct sk_buff *skb; 917 struct sk_buff *skb;
920 int iovlen, flags; 918 int iovlen, flags, err, copied;
921 int mss_now, size_goal; 919 int mss_now, size_goal;
922 int sg, err, copied; 920 bool sg;
923 long timeo; 921 long timeo;
924 922
925 lock_sock(sk); 923 lock_sock(sk);
@@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
946 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 944 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
947 goto out_err; 945 goto out_err;
948 946
949 sg = sk->sk_route_caps & NETIF_F_SG; 947 sg = !!(sk->sk_route_caps & NETIF_F_SG);
950 948
951 while (--iovlen >= 0) { 949 while (--iovlen >= 0) {
952 size_t seglen = iov->iov_len; 950 size_t seglen = iov->iov_len;
@@ -1005,8 +1003,13 @@ new_segment:
1005 } else { 1003 } else {
1006 int merge = 0; 1004 int merge = 0;
1007 int i = skb_shinfo(skb)->nr_frags; 1005 int i = skb_shinfo(skb)->nr_frags;
1008 struct page *page = TCP_PAGE(sk); 1006 struct page *page = sk->sk_sndmsg_page;
1009 int off = TCP_OFF(sk); 1007 int off;
1008
1009 if (page && page_count(page) == 1)
1010 sk->sk_sndmsg_off = 0;
1011
1012 off = sk->sk_sndmsg_off;
1010 1013
1011 if (skb_can_coalesce(skb, i, page, off) && 1014 if (skb_can_coalesce(skb, i, page, off) &&
1012 off != PAGE_SIZE) { 1015 off != PAGE_SIZE) {
@@ -1023,7 +1026,7 @@ new_segment:
1023 } else if (page) { 1026 } else if (page) {
1024 if (off == PAGE_SIZE) { 1027 if (off == PAGE_SIZE) {
1025 put_page(page); 1028 put_page(page);
1026 TCP_PAGE(sk) = page = NULL; 1029 sk->sk_sndmsg_page = page = NULL;
1027 off = 0; 1030 off = 0;
1028 } 1031 }
1029 } else 1032 } else
@@ -1049,9 +1052,9 @@ new_segment:
1049 /* If this page was new, give it to the 1052 /* If this page was new, give it to the
1050 * socket so it does not get leaked. 1053 * socket so it does not get leaked.
1051 */ 1054 */
1052 if (!TCP_PAGE(sk)) { 1055 if (!sk->sk_sndmsg_page) {
1053 TCP_PAGE(sk) = page; 1056 sk->sk_sndmsg_page = page;
1054 TCP_OFF(sk) = 0; 1057 sk->sk_sndmsg_off = 0;
1055 } 1058 }
1056 goto do_error; 1059 goto do_error;
1057 } 1060 }
@@ -1061,15 +1064,15 @@ new_segment:
1061 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1064 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1062 } else { 1065 } else {
1063 skb_fill_page_desc(skb, i, page, off, copy); 1066 skb_fill_page_desc(skb, i, page, off, copy);
1064 if (TCP_PAGE(sk)) { 1067 if (sk->sk_sndmsg_page) {
1065 get_page(page); 1068 get_page(page);
1066 } else if (off + copy < PAGE_SIZE) { 1069 } else if (off + copy < PAGE_SIZE) {
1067 get_page(page); 1070 get_page(page);
1068 TCP_PAGE(sk) = page; 1071 sk->sk_sndmsg_page = page;
1069 } 1072 }
1070 } 1073 }
1071 1074
1072 TCP_OFF(sk) = off + copy; 1075 sk->sk_sndmsg_off = off + copy;
1073 } 1076 }
1074 1077
1075 if (!copied) 1078 if (!copied)
@@ -1873,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how)
1873} 1876}
1874EXPORT_SYMBOL(tcp_shutdown); 1877EXPORT_SYMBOL(tcp_shutdown);
1875 1878
1879bool tcp_check_oom(struct sock *sk, int shift)
1880{
1881 bool too_many_orphans, out_of_socket_memory;
1882
1883 too_many_orphans = tcp_too_many_orphans(sk, shift);
1884 out_of_socket_memory = tcp_out_of_memory(sk);
1885
1886 if (too_many_orphans && net_ratelimit())
1887 pr_info("TCP: too many orphaned sockets\n");
1888 if (out_of_socket_memory && net_ratelimit())
1889 pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
1890 return too_many_orphans || out_of_socket_memory;
1891}
1892
1876void tcp_close(struct sock *sk, long timeout) 1893void tcp_close(struct sock *sk, long timeout)
1877{ 1894{
1878 struct sk_buff *skb; 1895 struct sk_buff *skb;
@@ -2012,10 +2029,7 @@ adjudge_to_death:
2012 } 2029 }
2013 if (sk->sk_state != TCP_CLOSE) { 2030 if (sk->sk_state != TCP_CLOSE) {
2014 sk_mem_reclaim(sk); 2031 sk_mem_reclaim(sk);
2015 if (tcp_too_many_orphans(sk, 0)) { 2032 if (tcp_check_oom(sk, 0)) {
2016 if (net_ratelimit())
2017 printk(KERN_INFO "TCP: too many of orphaned "
2018 "sockets\n");
2019 tcp_set_state(sk, TCP_CLOSE); 2033 tcp_set_state(sk, TCP_CLOSE);
2020 tcp_send_active_reset(sk, GFP_ATOMIC); 2034 tcp_send_active_reset(sk, GFP_ATOMIC);
2021 NET_INC_STATS_BH(sock_net(sk), 2035 NET_INC_STATS_BH(sock_net(sk),
@@ -2653,7 +2667,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2653EXPORT_SYMBOL(compat_tcp_getsockopt); 2667EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif 2668#endif
2655 2669
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) 2670struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2671 netdev_features_t features)
2657{ 2672{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL); 2673 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th; 2674 struct tcphdr *th;
@@ -3212,11 +3227,21 @@ static int __init set_thash_entries(char *str)
3212} 3227}
3213__setup("thash_entries=", set_thash_entries); 3228__setup("thash_entries=", set_thash_entries);
3214 3229
3230void tcp_init_mem(struct net *net)
3231{
3232 unsigned long limit = nr_free_buffer_pages() / 8;
3233 limit = max(limit, 128UL);
3234 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
3235 net->ipv4.sysctl_tcp_mem[1] = limit;
3236 net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
3237}
3238
3215void __init tcp_init(void) 3239void __init tcp_init(void)
3216{ 3240{
3217 struct sk_buff *skb = NULL; 3241 struct sk_buff *skb = NULL;
3218 unsigned long limit; 3242 unsigned long limit;
3219 int i, max_share, cnt; 3243 int max_share, cnt;
3244 unsigned int i;
3220 unsigned long jiffy = jiffies; 3245 unsigned long jiffy = jiffies;
3221 3246
3222 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3247 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3259,7 +3284,7 @@ void __init tcp_init(void)
3259 &tcp_hashinfo.bhash_size, 3284 &tcp_hashinfo.bhash_size,
3260 NULL, 3285 NULL,
3261 64 * 1024); 3286 64 * 1024);
3262 tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; 3287 tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
3263 for (i = 0; i < tcp_hashinfo.bhash_size; i++) { 3288 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
3264 spin_lock_init(&tcp_hashinfo.bhash[i].lock); 3289 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
3265 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); 3290 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
@@ -3272,14 +3297,10 @@ void __init tcp_init(void)
3272 sysctl_tcp_max_orphans = cnt / 2; 3297 sysctl_tcp_max_orphans = cnt / 2;
3273 sysctl_max_syn_backlog = max(128, cnt / 256); 3298 sysctl_max_syn_backlog = max(128, cnt / 256);
3274 3299
3275 limit = nr_free_buffer_pages() / 8; 3300 tcp_init_mem(&init_net);
3276 limit = max(limit, 128UL);
3277 sysctl_tcp_mem[0] = limit / 4 * 3;
3278 sysctl_tcp_mem[1] = limit;
3279 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
3280
3281 /* Set per-socket limits to no more than 1/128 the pressure threshold */ 3301 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3282 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); 3302 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
3303 limit = max(limit, 128UL);
3283 max_share = min(4UL*1024*1024, limit); 3304 max_share = min(4UL*1024*1024, limit);
3284 3305
3285 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 3306 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;