diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 2 | ||||
-rw-r--r-- | net/ipv4/arp.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 45 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 5 |
8 files changed, 56 insertions, 35 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index aa2a2c79776f..d183262943d9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -409,7 +409,7 @@ config INET_TCP_DIAG | |||
409 | 409 | ||
410 | config INET_UDP_DIAG | 410 | config INET_UDP_DIAG |
411 | tristate "UDP: socket monitoring interface" | 411 | tristate "UDP: socket monitoring interface" |
412 | depends on INET_DIAG | 412 | depends on INET_DIAG && (IPV6 || IPV6=n) |
413 | default n | 413 | default n |
414 | ---help--- | 414 | ---help--- |
415 | Support for UDP socket monitoring interface used by the ss tool. | 415 | Support for UDP socket monitoring interface used by the ss tool. |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 59402be133f0..63e49890ad31 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -863,7 +863,8 @@ static int arp_process(struct sk_buff *skb) | |||
863 | if (addr_type == RTN_UNICAST && | 863 | if (addr_type == RTN_UNICAST && |
864 | (arp_fwd_proxy(in_dev, dev, rt) || | 864 | (arp_fwd_proxy(in_dev, dev, rt) || |
865 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || | 865 | arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || |
866 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { | 866 | (rt->dst.dev != dev && |
867 | pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { | ||
867 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 868 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
868 | if (n) | 869 | if (n) |
869 | neigh_release(n); | 870 | neigh_release(n); |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1e60f7679075..42dd1a90edea 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb) | |||
573 | } | 573 | } |
574 | if (srrptr + 3 <= srrspace) { | 574 | if (srrptr + 3 <= srrspace) { |
575 | opt->is_changed = 1; | 575 | opt->is_changed = 1; |
576 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | ||
577 | ip_hdr(skb)->daddr = opt->nexthop; | 576 | ip_hdr(skb)->daddr = opt->nexthop; |
577 | ip_rt_get_source(&optptr[srrptr-1], skb, rt); | ||
578 | optptr[2] = srrptr+4; | 578 | optptr[2] = srrptr+4; |
579 | } else if (net_ratelimit()) | 579 | } else if (net_ratelimit()) |
580 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); | 580 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4cb9cd2f2c39..7a7724da9bff 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -778,7 +778,6 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); | |||
778 | static __net_init int ipv4_sysctl_init_net(struct net *net) | 778 | static __net_init int ipv4_sysctl_init_net(struct net *net) |
779 | { | 779 | { |
780 | struct ctl_table *table; | 780 | struct ctl_table *table; |
781 | unsigned long limit; | ||
782 | 781 | ||
783 | table = ipv4_net_table; | 782 | table = ipv4_net_table; |
784 | if (!net_eq(net, &init_net)) { | 783 | if (!net_eq(net, &init_net)) { |
@@ -815,11 +814,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
815 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; | 814 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; |
816 | 815 | ||
817 | tcp_init_mem(net); | 816 | tcp_init_mem(net); |
818 | limit = nr_free_buffer_pages() / 8; | ||
819 | limit = max(limit, 128UL); | ||
820 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; | ||
821 | net->ipv4.sysctl_tcp_mem[1] = limit; | ||
822 | net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; | ||
823 | 817 | ||
824 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | 818 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, |
825 | net_ipv4_ctl_path, table); | 819 | net_ipv4_ctl_path, table); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 06373b4a449a..37755ccc0e96 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1876,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how) | |||
1876 | } | 1876 | } |
1877 | EXPORT_SYMBOL(tcp_shutdown); | 1877 | EXPORT_SYMBOL(tcp_shutdown); |
1878 | 1878 | ||
1879 | bool tcp_check_oom(struct sock *sk, int shift) | ||
1880 | { | ||
1881 | bool too_many_orphans, out_of_socket_memory; | ||
1882 | |||
1883 | too_many_orphans = tcp_too_many_orphans(sk, shift); | ||
1884 | out_of_socket_memory = tcp_out_of_memory(sk); | ||
1885 | |||
1886 | if (too_many_orphans && net_ratelimit()) | ||
1887 | pr_info("TCP: too many orphaned sockets\n"); | ||
1888 | if (out_of_socket_memory && net_ratelimit()) | ||
1889 | pr_info("TCP: out of memory -- consider tuning tcp_mem\n"); | ||
1890 | return too_many_orphans || out_of_socket_memory; | ||
1891 | } | ||
1892 | |||
1879 | void tcp_close(struct sock *sk, long timeout) | 1893 | void tcp_close(struct sock *sk, long timeout) |
1880 | { | 1894 | { |
1881 | struct sk_buff *skb; | 1895 | struct sk_buff *skb; |
@@ -2015,10 +2029,7 @@ adjudge_to_death: | |||
2015 | } | 2029 | } |
2016 | if (sk->sk_state != TCP_CLOSE) { | 2030 | if (sk->sk_state != TCP_CLOSE) { |
2017 | sk_mem_reclaim(sk); | 2031 | sk_mem_reclaim(sk); |
2018 | if (tcp_too_many_orphans(sk, 0)) { | 2032 | if (tcp_check_oom(sk, 0)) { |
2019 | if (net_ratelimit()) | ||
2020 | printk(KERN_INFO "TCP: too many of orphaned " | ||
2021 | "sockets\n"); | ||
2022 | tcp_set_state(sk, TCP_CLOSE); | 2033 | tcp_set_state(sk, TCP_CLOSE); |
2023 | tcp_send_active_reset(sk, GFP_ATOMIC); | 2034 | tcp_send_active_reset(sk, GFP_ATOMIC); |
2024 | NET_INC_STATS_BH(sock_net(sk), | 2035 | NET_INC_STATS_BH(sock_net(sk), |
@@ -3218,7 +3229,6 @@ __setup("thash_entries=", set_thash_entries); | |||
3218 | 3229 | ||
3219 | void tcp_init_mem(struct net *net) | 3230 | void tcp_init_mem(struct net *net) |
3220 | { | 3231 | { |
3221 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | ||
3222 | unsigned long limit = nr_free_buffer_pages() / 8; | 3232 | unsigned long limit = nr_free_buffer_pages() / 8; |
3223 | limit = max(limit, 128UL); | 3233 | limit = max(limit, 128UL); |
3224 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; | 3234 | net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; |
@@ -3287,7 +3297,8 @@ void __init tcp_init(void) | |||
3287 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3297 | sysctl_max_syn_backlog = max(128, cnt / 256); |
3288 | 3298 | ||
3289 | tcp_init_mem(&init_net); | 3299 | tcp_init_mem(&init_net); |
3290 | limit = nr_free_buffer_pages() / 8; | 3300 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ |
3301 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); | ||
3291 | limit = max(limit, 128UL); | 3302 | limit = max(limit, 128UL); |
3292 | max_share = min(4UL*1024*1024, limit); | 3303 | max_share = min(4UL*1024*1024, limit); |
3293 | 3304 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 976034f82320..53c8ce4046b2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1307,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1307 | return in_sack; | 1307 | return in_sack; |
1308 | } | 1308 | } |
1309 | 1309 | ||
1310 | static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | 1310 | /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ |
1311 | struct tcp_sacktag_state *state, | 1311 | static u8 tcp_sacktag_one(struct sock *sk, |
1312 | struct tcp_sacktag_state *state, u8 sacked, | ||
1313 | u32 start_seq, u32 end_seq, | ||
1312 | int dup_sack, int pcount) | 1314 | int dup_sack, int pcount) |
1313 | { | 1315 | { |
1314 | struct tcp_sock *tp = tcp_sk(sk); | 1316 | struct tcp_sock *tp = tcp_sk(sk); |
1315 | u8 sacked = TCP_SKB_CB(skb)->sacked; | ||
1316 | int fack_count = state->fack_count; | 1317 | int fack_count = state->fack_count; |
1317 | 1318 | ||
1318 | /* Account D-SACK for retransmitted packet. */ | 1319 | /* Account D-SACK for retransmitted packet. */ |
1319 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1320 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1320 | if (tp->undo_marker && tp->undo_retrans && | 1321 | if (tp->undo_marker && tp->undo_retrans && |
1321 | after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1322 | after(end_seq, tp->undo_marker)) |
1322 | tp->undo_retrans--; | 1323 | tp->undo_retrans--; |
1323 | if (sacked & TCPCB_SACKED_ACKED) | 1324 | if (sacked & TCPCB_SACKED_ACKED) |
1324 | state->reord = min(fack_count, state->reord); | 1325 | state->reord = min(fack_count, state->reord); |
1325 | } | 1326 | } |
1326 | 1327 | ||
1327 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ | 1328 | /* Nothing to do; acked frame is about to be dropped (was ACKed). */ |
1328 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1329 | if (!after(end_seq, tp->snd_una)) |
1329 | return sacked; | 1330 | return sacked; |
1330 | 1331 | ||
1331 | if (!(sacked & TCPCB_SACKED_ACKED)) { | 1332 | if (!(sacked & TCPCB_SACKED_ACKED)) { |
@@ -1344,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1344 | /* New sack for not retransmitted frame, | 1345 | /* New sack for not retransmitted frame, |
1345 | * which was in hole. It is reordering. | 1346 | * which was in hole. It is reordering. |
1346 | */ | 1347 | */ |
1347 | if (before(TCP_SKB_CB(skb)->seq, | 1348 | if (before(start_seq, |
1348 | tcp_highest_sack_seq(tp))) | 1349 | tcp_highest_sack_seq(tp))) |
1349 | state->reord = min(fack_count, | 1350 | state->reord = min(fack_count, |
1350 | state->reord); | 1351 | state->reord); |
1351 | 1352 | ||
1352 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ | 1353 | /* SACK enhanced F-RTO (RFC4138; Appendix B) */ |
1353 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) | 1354 | if (!after(end_seq, tp->frto_highmark)) |
1354 | state->flag |= FLAG_ONLY_ORIG_SACKED; | 1355 | state->flag |= FLAG_ONLY_ORIG_SACKED; |
1355 | } | 1356 | } |
1356 | 1357 | ||
@@ -1368,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1368 | 1369 | ||
1369 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ | 1370 | /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ |
1370 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && | 1371 | if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && |
1371 | before(TCP_SKB_CB(skb)->seq, | 1372 | before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) |
1372 | TCP_SKB_CB(tp->lost_skb_hint)->seq)) | ||
1373 | tp->lost_cnt_hint += pcount; | 1373 | tp->lost_cnt_hint += pcount; |
1374 | 1374 | ||
1375 | if (fack_count > tp->fackets_out) | 1375 | if (fack_count > tp->fackets_out) |
@@ -1388,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, | |||
1388 | return sacked; | 1388 | return sacked; |
1389 | } | 1389 | } |
1390 | 1390 | ||
1391 | /* Shift newly-SACKed bytes from this skb to the immediately previous | ||
1392 | * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. | ||
1393 | */ | ||
1391 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | 1394 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, |
1392 | struct tcp_sacktag_state *state, | 1395 | struct tcp_sacktag_state *state, |
1393 | unsigned int pcount, int shifted, int mss, | 1396 | unsigned int pcount, int shifted, int mss, |
@@ -1395,10 +1398,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1395 | { | 1398 | { |
1396 | struct tcp_sock *tp = tcp_sk(sk); | 1399 | struct tcp_sock *tp = tcp_sk(sk); |
1397 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | 1400 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); |
1401 | u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ | ||
1402 | u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ | ||
1398 | 1403 | ||
1399 | BUG_ON(!pcount); | 1404 | BUG_ON(!pcount); |
1400 | 1405 | ||
1401 | if (skb == tp->lost_skb_hint) | 1406 | /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ |
1407 | if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) | ||
1402 | tp->lost_cnt_hint += pcount; | 1408 | tp->lost_cnt_hint += pcount; |
1403 | 1409 | ||
1404 | TCP_SKB_CB(prev)->end_seq += shifted; | 1410 | TCP_SKB_CB(prev)->end_seq += shifted; |
@@ -1424,8 +1430,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1424 | skb_shinfo(skb)->gso_type = 0; | 1430 | skb_shinfo(skb)->gso_type = 0; |
1425 | } | 1431 | } |
1426 | 1432 | ||
1427 | /* We discard results */ | 1433 | /* Adjust counters and hints for the newly sacked sequence range but |
1428 | tcp_sacktag_one(skb, sk, state, dup_sack, pcount); | 1434 | * discard the return value since prev is already marked. |
1435 | */ | ||
1436 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | ||
1437 | start_seq, end_seq, dup_sack, pcount); | ||
1429 | 1438 | ||
1430 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1439 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
1431 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1440 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
@@ -1664,10 +1673,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1664 | break; | 1673 | break; |
1665 | 1674 | ||
1666 | if (in_sack) { | 1675 | if (in_sack) { |
1667 | TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, | 1676 | TCP_SKB_CB(skb)->sacked = |
1668 | state, | 1677 | tcp_sacktag_one(sk, |
1669 | dup_sack, | 1678 | state, |
1670 | tcp_skb_pcount(skb)); | 1679 | TCP_SKB_CB(skb)->sacked, |
1680 | TCP_SKB_CB(skb)->seq, | ||
1681 | TCP_SKB_CB(skb)->end_seq, | ||
1682 | dup_sack, | ||
1683 | tcp_skb_pcount(skb)); | ||
1671 | 1684 | ||
1672 | if (!before(TCP_SKB_CB(skb)->seq, | 1685 | if (!before(TCP_SKB_CB(skb)->seq, |
1673 | tcp_highest_sack_seq(tp))) | 1686 | tcp_highest_sack_seq(tp))) |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4cca052..94d683a61cba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -651,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
651 | arg.iov[0].iov_len, IPPROTO_TCP, 0); | 651 | arg.iov[0].iov_len, IPPROTO_TCP, 0); |
652 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; | 652 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
653 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; | 653 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; |
654 | /* When socket is gone, all binding information is lost. | ||
655 | * routing might fail in this case. using iif for oif to | ||
656 | * make sure we can deliver it | ||
657 | */ | ||
658 | arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); | ||
654 | 659 | ||
655 | net = dev_net(skb_dst(skb)->dev); | 660 | net = dev_net(skb_dst(skb)->dev); |
656 | arg.tos = ip_hdr(skb)->tos; | 661 | arg.tos = ip_hdr(skb)->tos; |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a516d1e399df..cd2e0723266d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) | |||
77 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
78 | shift++; | 78 | shift++; |
79 | 79 | ||
80 | if (tcp_too_many_orphans(sk, shift)) { | 80 | if (tcp_check_oom(sk, shift)) { |
81 | if (net_ratelimit()) | ||
82 | printk(KERN_INFO "Out of socket memory\n"); | ||
83 | |||
84 | /* Catch exceptional cases, when connection requires reset. | 81 | /* Catch exceptional cases, when connection requires reset. |
85 | * 1. Last segment was sent recently. */ | 82 | * 1. Last segment was sent recently. */ |
86 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || | 83 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || |