aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_timer.c5
8 files changed, 56 insertions, 35 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index aa2a2c79776f..d183262943d9 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,7 +409,7 @@ config INET_TCP_DIAG
409 409
410config INET_UDP_DIAG 410config INET_UDP_DIAG
411 tristate "UDP: socket monitoring interface" 411 tristate "UDP: socket monitoring interface"
412 depends on INET_DIAG 412 depends on INET_DIAG && (IPV6 || IPV6=n)
413 default n 413 default n
414 ---help--- 414 ---help---
415 Support for UDP socket monitoring interface used by the ss tool. 415 Support for UDP socket monitoring interface used by the ss tool.
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59402be133f0..63e49890ad31 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -863,7 +863,8 @@ static int arp_process(struct sk_buff *skb)
863 if (addr_type == RTN_UNICAST && 863 if (addr_type == RTN_UNICAST &&
864 (arp_fwd_proxy(in_dev, dev, rt) || 864 (arp_fwd_proxy(in_dev, dev, rt) ||
865 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 865 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
866 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { 866 (rt->dst.dev != dev &&
867 pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
867 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 868 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
868 if (n) 869 if (n)
869 neigh_release(n); 870 neigh_release(n);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1e60f7679075..42dd1a90edea 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb)
573 } 573 }
574 if (srrptr + 3 <= srrspace) { 574 if (srrptr + 3 <= srrspace) {
575 opt->is_changed = 1; 575 opt->is_changed = 1;
576 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
577 ip_hdr(skb)->daddr = opt->nexthop; 576 ip_hdr(skb)->daddr = opt->nexthop;
577 ip_rt_get_source(&optptr[srrptr-1], skb, rt);
578 optptr[2] = srrptr+4; 578 optptr[2] = srrptr+4;
579 } else if (net_ratelimit()) 579 } else if (net_ratelimit())
580 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 580 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4cb9cd2f2c39..7a7724da9bff 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -778,7 +778,6 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
778static __net_init int ipv4_sysctl_init_net(struct net *net) 778static __net_init int ipv4_sysctl_init_net(struct net *net)
779{ 779{
780 struct ctl_table *table; 780 struct ctl_table *table;
781 unsigned long limit;
782 781
783 table = ipv4_net_table; 782 table = ipv4_net_table;
784 if (!net_eq(net, &init_net)) { 783 if (!net_eq(net, &init_net)) {
@@ -815,11 +814,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
815 net->ipv4.sysctl_rt_cache_rebuild_count = 4; 814 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
816 815
817 tcp_init_mem(net); 816 tcp_init_mem(net);
818 limit = nr_free_buffer_pages() / 8;
819 limit = max(limit, 128UL);
820 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
821 net->ipv4.sysctl_tcp_mem[1] = limit;
822 net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
823 817
824 net->ipv4.ipv4_hdr = register_net_sysctl_table(net, 818 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
825 net_ipv4_ctl_path, table); 819 net_ipv4_ctl_path, table);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 06373b4a449a..37755ccc0e96 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1876,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how)
1876} 1876}
1877EXPORT_SYMBOL(tcp_shutdown); 1877EXPORT_SYMBOL(tcp_shutdown);
1878 1878
1879bool tcp_check_oom(struct sock *sk, int shift)
1880{
1881 bool too_many_orphans, out_of_socket_memory;
1882
1883 too_many_orphans = tcp_too_many_orphans(sk, shift);
1884 out_of_socket_memory = tcp_out_of_memory(sk);
1885
1886 if (too_many_orphans && net_ratelimit())
1887 pr_info("TCP: too many orphaned sockets\n");
1888 if (out_of_socket_memory && net_ratelimit())
1889 pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
1890 return too_many_orphans || out_of_socket_memory;
1891}
1892
1879void tcp_close(struct sock *sk, long timeout) 1893void tcp_close(struct sock *sk, long timeout)
1880{ 1894{
1881 struct sk_buff *skb; 1895 struct sk_buff *skb;
@@ -2015,10 +2029,7 @@ adjudge_to_death:
2015 } 2029 }
2016 if (sk->sk_state != TCP_CLOSE) { 2030 if (sk->sk_state != TCP_CLOSE) {
2017 sk_mem_reclaim(sk); 2031 sk_mem_reclaim(sk);
2018 if (tcp_too_many_orphans(sk, 0)) { 2032 if (tcp_check_oom(sk, 0)) {
2019 if (net_ratelimit())
2020 printk(KERN_INFO "TCP: too many of orphaned "
2021 "sockets\n");
2022 tcp_set_state(sk, TCP_CLOSE); 2033 tcp_set_state(sk, TCP_CLOSE);
2023 tcp_send_active_reset(sk, GFP_ATOMIC); 2034 tcp_send_active_reset(sk, GFP_ATOMIC);
2024 NET_INC_STATS_BH(sock_net(sk), 2035 NET_INC_STATS_BH(sock_net(sk),
@@ -3218,7 +3229,6 @@ __setup("thash_entries=", set_thash_entries);
3218 3229
3219void tcp_init_mem(struct net *net) 3230void tcp_init_mem(struct net *net)
3220{ 3231{
3221 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3222 unsigned long limit = nr_free_buffer_pages() / 8; 3232 unsigned long limit = nr_free_buffer_pages() / 8;
3223 limit = max(limit, 128UL); 3233 limit = max(limit, 128UL);
3224 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; 3234 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
@@ -3287,7 +3297,8 @@ void __init tcp_init(void)
3287 sysctl_max_syn_backlog = max(128, cnt / 256); 3297 sysctl_max_syn_backlog = max(128, cnt / 256);
3288 3298
3289 tcp_init_mem(&init_net); 3299 tcp_init_mem(&init_net);
3290 limit = nr_free_buffer_pages() / 8; 3300 /* Set per-socket limits to no more than 1/128 the pressure threshold */
3301 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
3291 limit = max(limit, 128UL); 3302 limit = max(limit, 128UL);
3292 max_share = min(4UL*1024*1024, limit); 3303 max_share = min(4UL*1024*1024, limit);
3293 3304
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 976034f82320..53c8ce4046b2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1307,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1307 return in_sack; 1307 return in_sack;
1308} 1308}
1309 1309
1310static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, 1310/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
1311 struct tcp_sacktag_state *state, 1311static u8 tcp_sacktag_one(struct sock *sk,
1312 struct tcp_sacktag_state *state, u8 sacked,
1313 u32 start_seq, u32 end_seq,
1312 int dup_sack, int pcount) 1314 int dup_sack, int pcount)
1313{ 1315{
1314 struct tcp_sock *tp = tcp_sk(sk); 1316 struct tcp_sock *tp = tcp_sk(sk);
1315 u8 sacked = TCP_SKB_CB(skb)->sacked;
1316 int fack_count = state->fack_count; 1317 int fack_count = state->fack_count;
1317 1318
1318 /* Account D-SACK for retransmitted packet. */ 1319 /* Account D-SACK for retransmitted packet. */
1319 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1320 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1320 if (tp->undo_marker && tp->undo_retrans && 1321 if (tp->undo_marker && tp->undo_retrans &&
1321 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1322 after(end_seq, tp->undo_marker))
1322 tp->undo_retrans--; 1323 tp->undo_retrans--;
1323 if (sacked & TCPCB_SACKED_ACKED) 1324 if (sacked & TCPCB_SACKED_ACKED)
1324 state->reord = min(fack_count, state->reord); 1325 state->reord = min(fack_count, state->reord);
1325 } 1326 }
1326 1327
1327 /* Nothing to do; acked frame is about to be dropped (was ACKed). */ 1328 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1328 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 1329 if (!after(end_seq, tp->snd_una))
1329 return sacked; 1330 return sacked;
1330 1331
1331 if (!(sacked & TCPCB_SACKED_ACKED)) { 1332 if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1344,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1344 /* New sack for not retransmitted frame, 1345 /* New sack for not retransmitted frame,
1345 * which was in hole. It is reordering. 1346 * which was in hole. It is reordering.
1346 */ 1347 */
1347 if (before(TCP_SKB_CB(skb)->seq, 1348 if (before(start_seq,
1348 tcp_highest_sack_seq(tp))) 1349 tcp_highest_sack_seq(tp)))
1349 state->reord = min(fack_count, 1350 state->reord = min(fack_count,
1350 state->reord); 1351 state->reord);
1351 1352
1352 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1353 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1353 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) 1354 if (!after(end_seq, tp->frto_highmark))
1354 state->flag |= FLAG_ONLY_ORIG_SACKED; 1355 state->flag |= FLAG_ONLY_ORIG_SACKED;
1355 } 1356 }
1356 1357
@@ -1368,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1368 1369
1369 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ 1370 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1370 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && 1371 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1371 before(TCP_SKB_CB(skb)->seq, 1372 before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
1372 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1373 tp->lost_cnt_hint += pcount; 1373 tp->lost_cnt_hint += pcount;
1374 1374
1375 if (fack_count > tp->fackets_out) 1375 if (fack_count > tp->fackets_out)
@@ -1388,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1388 return sacked; 1388 return sacked;
1389} 1389}
1390 1390
1391/* Shift newly-SACKed bytes from this skb to the immediately previous
1392 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
1393 */
1391static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, 1394static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1392 struct tcp_sacktag_state *state, 1395 struct tcp_sacktag_state *state,
1393 unsigned int pcount, int shifted, int mss, 1396 unsigned int pcount, int shifted, int mss,
@@ -1395,10 +1398,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1395{ 1398{
1396 struct tcp_sock *tp = tcp_sk(sk); 1399 struct tcp_sock *tp = tcp_sk(sk);
1397 struct sk_buff *prev = tcp_write_queue_prev(sk, skb); 1400 struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
1401 u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
1402 u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
1398 1403
1399 BUG_ON(!pcount); 1404 BUG_ON(!pcount);
1400 1405
1401 if (skb == tp->lost_skb_hint) 1406 /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
1407 if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
1402 tp->lost_cnt_hint += pcount; 1408 tp->lost_cnt_hint += pcount;
1403 1409
1404 TCP_SKB_CB(prev)->end_seq += shifted; 1410 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1424,8 +1430,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1424 skb_shinfo(skb)->gso_type = 0; 1430 skb_shinfo(skb)->gso_type = 0;
1425 } 1431 }
1426 1432
1427 /* We discard results */ 1433 /* Adjust counters and hints for the newly sacked sequence range but
1428 tcp_sacktag_one(skb, sk, state, dup_sack, pcount); 1434 * discard the return value since prev is already marked.
1435 */
1436 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1437 start_seq, end_seq, dup_sack, pcount);
1429 1438
1430 /* Difference in this won't matter, both ACKed by the same cumul. ACK */ 1439 /* Difference in this won't matter, both ACKed by the same cumul. ACK */
1431 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); 1440 TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1664,10 +1673,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1664 break; 1673 break;
1665 1674
1666 if (in_sack) { 1675 if (in_sack) {
1667 TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, 1676 TCP_SKB_CB(skb)->sacked =
1668 state, 1677 tcp_sacktag_one(sk,
1669 dup_sack, 1678 state,
1670 tcp_skb_pcount(skb)); 1679 TCP_SKB_CB(skb)->sacked,
1680 TCP_SKB_CB(skb)->seq,
1681 TCP_SKB_CB(skb)->end_seq,
1682 dup_sack,
1683 tcp_skb_pcount(skb));
1671 1684
1672 if (!before(TCP_SKB_CB(skb)->seq, 1685 if (!before(TCP_SKB_CB(skb)->seq,
1673 tcp_highest_sack_seq(tp))) 1686 tcp_highest_sack_seq(tp)))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 337ba4cca052..94d683a61cba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -651,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
651 arg.iov[0].iov_len, IPPROTO_TCP, 0); 651 arg.iov[0].iov_len, IPPROTO_TCP, 0);
652 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 652 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
654 /* When socket is gone, all binding information is lost.
655 * routing might fail in this case. using iif for oif to
656 * make sure we can deliver it
657 */
658 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
654 659
655 net = dev_net(skb_dst(skb)->dev); 660 net = dev_net(skb_dst(skb)->dev);
656 arg.tos = ip_hdr(skb)->tos; 661 arg.tos = ip_hdr(skb)->tos;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a516d1e399df..cd2e0723266d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
77 if (sk->sk_err_soft) 77 if (sk->sk_err_soft)
78 shift++; 78 shift++;
79 79
80 if (tcp_too_many_orphans(sk, shift)) { 80 if (tcp_check_oom(sk, shift)) {
81 if (net_ratelimit())
82 printk(KERN_INFO "Out of socket memory\n");
83
84 /* Catch exceptional cases, when connection requires reset. 81 /* Catch exceptional cases, when connection requires reset.
85 * 1. Last segment was sent recently. */ 82 * 1. Last segment was sent recently. */
86 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || 83 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||