diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 7 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 71 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 82 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 47 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/Kconfig | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_h323.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_reject_ipv4.c | 75 | ||||
-rw-r--r-- | net/ipv4/route.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 21 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 37 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 17 |
18 files changed, 290 insertions, 116 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ecd2c3f245ce..19ab78aca547 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
1296 | 1296 | ||
1297 | segs = ERR_PTR(-EPROTONOSUPPORT); | 1297 | segs = ERR_PTR(-EPROTONOSUPPORT); |
1298 | 1298 | ||
1299 | /* Note : following gso_segment() might change skb->encapsulation */ | 1299 | if (skb->encapsulation && |
1300 | udpfrag = !skb->encapsulation && proto == IPPROTO_UDP; | 1300 | skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) |
1301 | udpfrag = proto == IPPROTO_UDP && encap; | ||
1302 | else | ||
1303 | udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; | ||
1301 | 1304 | ||
1302 | ops = rcu_dereference(inet_offloads[proto]); | 1305 | ops = rcu_dereference(inet_offloads[proto]); |
1303 | if (likely(ops && ops->callbacks.gso_segment)) | 1306 | if (likely(ops && ops->callbacks.gso_segment)) |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ac2dff3c2c1c..bdbf68bb2e2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void) | |||
1443 | + nla_total_size(4) /* IFA_LOCAL */ | 1443 | + nla_total_size(4) /* IFA_LOCAL */ |
1444 | + nla_total_size(4) /* IFA_BROADCAST */ | 1444 | + nla_total_size(4) /* IFA_BROADCAST */ |
1445 | + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ | 1445 | + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ |
1446 | + nla_total_size(4); /* IFA_FLAGS */ | 1446 | + nla_total_size(4) /* IFA_FLAGS */ |
1447 | + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ | ||
1447 | } | 1448 | } |
1448 | 1449 | ||
1449 | static inline u32 cstamp_delta(unsigned long cstamp) | 1450 | static inline u32 cstamp_delta(unsigned long cstamp) |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e9f1217a8afd..f3869c186d97 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -39,6 +39,71 @@ | |||
39 | #include <net/route.h> | 39 | #include <net/route.h> |
40 | #include <net/xfrm.h> | 40 | #include <net/xfrm.h> |
41 | 41 | ||
42 | static bool ip_may_fragment(const struct sk_buff *skb) | ||
43 | { | ||
44 | return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || | ||
45 | !skb->local_df; | ||
46 | } | ||
47 | |||
48 | static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | ||
49 | { | ||
50 | if (skb->len <= mtu || skb->local_df) | ||
51 | return false; | ||
52 | |||
53 | if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) | ||
54 | return false; | ||
55 | |||
56 | return true; | ||
57 | } | ||
58 | |||
59 | static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) | ||
60 | { | ||
61 | unsigned int mtu; | ||
62 | |||
63 | if (skb->local_df || !skb_is_gso(skb)) | ||
64 | return false; | ||
65 | |||
66 | mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); | ||
67 | |||
68 | /* if seglen > mtu, do software segmentation for IP fragmentation on | ||
69 | * output. DF bit cannot be set since ip_forward would have sent | ||
70 | * icmp error. | ||
71 | */ | ||
72 | return skb_gso_network_seglen(skb) > mtu; | ||
73 | } | ||
74 | |||
75 | /* called if GSO skb needs to be fragmented on forward */ | ||
76 | static int ip_forward_finish_gso(struct sk_buff *skb) | ||
77 | { | ||
78 | struct dst_entry *dst = skb_dst(skb); | ||
79 | netdev_features_t features; | ||
80 | struct sk_buff *segs; | ||
81 | int ret = 0; | ||
82 | |||
83 | features = netif_skb_dev_features(skb, dst->dev); | ||
84 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | ||
85 | if (IS_ERR(segs)) { | ||
86 | kfree_skb(skb); | ||
87 | return -ENOMEM; | ||
88 | } | ||
89 | |||
90 | consume_skb(skb); | ||
91 | |||
92 | do { | ||
93 | struct sk_buff *nskb = segs->next; | ||
94 | int err; | ||
95 | |||
96 | segs->next = NULL; | ||
97 | err = dst_output(segs); | ||
98 | |||
99 | if (err && ret == 0) | ||
100 | ret = err; | ||
101 | segs = nskb; | ||
102 | } while (segs); | ||
103 | |||
104 | return ret; | ||
105 | } | ||
106 | |||
42 | static int ip_forward_finish(struct sk_buff *skb) | 107 | static int ip_forward_finish(struct sk_buff *skb) |
43 | { | 108 | { |
44 | struct ip_options *opt = &(IPCB(skb)->opt); | 109 | struct ip_options *opt = &(IPCB(skb)->opt); |
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb) | |||
49 | if (unlikely(opt->optlen)) | 114 | if (unlikely(opt->optlen)) |
50 | ip_forward_options(skb); | 115 | ip_forward_options(skb); |
51 | 116 | ||
117 | if (ip_gso_exceeds_dst_mtu(skb)) | ||
118 | return ip_forward_finish_gso(skb); | ||
119 | |||
52 | return dst_output(skb); | 120 | return dst_output(skb); |
53 | } | 121 | } |
54 | 122 | ||
@@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb) | |||
91 | 159 | ||
92 | IPCB(skb)->flags |= IPSKB_FORWARDED; | 160 | IPCB(skb)->flags |= IPSKB_FORWARDED; |
93 | mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); | 161 | mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); |
94 | if (unlikely(skb->len > mtu && !skb_is_gso(skb) && | 162 | if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { |
95 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { | ||
96 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); | 163 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); |
97 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 164 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
98 | htonl(mtu)); | 165 | htonl(mtu)); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8971780aec7c..73c6b63bba74 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
422 | to->tc_index = from->tc_index; | 422 | to->tc_index = from->tc_index; |
423 | #endif | 423 | #endif |
424 | nf_copy(to, from); | 424 | nf_copy(to, from); |
425 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) | ||
426 | to->nf_trace = from->nf_trace; | ||
427 | #endif | ||
428 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 425 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
429 | to->ipvs_property = from->ipvs_property; | 426 | to->ipvs_property = from->ipvs_property; |
430 | #endif | 427 | #endif |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd28f386bd02..78a89e61925d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
@@ -93,83 +93,32 @@ static void tunnel_dst_reset(struct ip_tunnel *t) | |||
93 | tunnel_dst_set(t, NULL); | 93 | tunnel_dst_set(t, NULL); |
94 | } | 94 | } |
95 | 95 | ||
96 | static void tunnel_dst_reset_all(struct ip_tunnel *t) | 96 | void ip_tunnel_dst_reset_all(struct ip_tunnel *t) |
97 | { | 97 | { |
98 | int i; | 98 | int i; |
99 | 99 | ||
100 | for_each_possible_cpu(i) | 100 | for_each_possible_cpu(i) |
101 | __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); | 101 | __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); |
102 | } | 102 | } |
103 | EXPORT_SYMBOL(ip_tunnel_dst_reset_all); | ||
103 | 104 | ||
104 | static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) | 105 | static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) |
105 | { | 106 | { |
106 | struct dst_entry *dst; | 107 | struct dst_entry *dst; |
107 | 108 | ||
108 | rcu_read_lock(); | 109 | rcu_read_lock(); |
109 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); | 110 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); |
110 | if (dst) | 111 | if (dst) { |
112 | if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | ||
113 | rcu_read_unlock(); | ||
114 | tunnel_dst_reset(t); | ||
115 | return NULL; | ||
116 | } | ||
111 | dst_hold(dst); | 117 | dst_hold(dst); |
112 | rcu_read_unlock(); | ||
113 | return dst; | ||
114 | } | ||
115 | |||
116 | static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) | ||
117 | { | ||
118 | struct dst_entry *dst = tunnel_dst_get(t); | ||
119 | |||
120 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | ||
121 | tunnel_dst_reset(t); | ||
122 | return NULL; | ||
123 | } | ||
124 | |||
125 | return dst; | ||
126 | } | ||
127 | |||
128 | /* Often modified stats are per cpu, other are shared (netdev->stats) */ | ||
129 | struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, | ||
130 | struct rtnl_link_stats64 *tot) | ||
131 | { | ||
132 | int i; | ||
133 | |||
134 | for_each_possible_cpu(i) { | ||
135 | const struct pcpu_sw_netstats *tstats = | ||
136 | per_cpu_ptr(dev->tstats, i); | ||
137 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
138 | unsigned int start; | ||
139 | |||
140 | do { | ||
141 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
142 | rx_packets = tstats->rx_packets; | ||
143 | tx_packets = tstats->tx_packets; | ||
144 | rx_bytes = tstats->rx_bytes; | ||
145 | tx_bytes = tstats->tx_bytes; | ||
146 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
147 | |||
148 | tot->rx_packets += rx_packets; | ||
149 | tot->tx_packets += tx_packets; | ||
150 | tot->rx_bytes += rx_bytes; | ||
151 | tot->tx_bytes += tx_bytes; | ||
152 | } | 118 | } |
153 | 119 | rcu_read_unlock(); | |
154 | tot->multicast = dev->stats.multicast; | 120 | return (struct rtable *)dst; |
155 | |||
156 | tot->rx_crc_errors = dev->stats.rx_crc_errors; | ||
157 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; | ||
158 | tot->rx_length_errors = dev->stats.rx_length_errors; | ||
159 | tot->rx_frame_errors = dev->stats.rx_frame_errors; | ||
160 | tot->rx_errors = dev->stats.rx_errors; | ||
161 | |||
162 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
163 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
164 | tot->tx_dropped = dev->stats.tx_dropped; | ||
165 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
166 | tot->tx_errors = dev->stats.tx_errors; | ||
167 | |||
168 | tot->collisions = dev->stats.collisions; | ||
169 | |||
170 | return tot; | ||
171 | } | 121 | } |
172 | EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); | ||
173 | 122 | ||
174 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, | 123 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, |
175 | __be16 flags, __be32 key) | 124 | __be16 flags, __be32 key) |
@@ -584,7 +533,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
584 | struct flowi4 fl4; | 533 | struct flowi4 fl4; |
585 | u8 tos, ttl; | 534 | u8 tos, ttl; |
586 | __be16 df; | 535 | __be16 df; |
587 | struct rtable *rt = NULL; /* Route to the other host */ | 536 | struct rtable *rt; /* Route to the other host */ |
588 | unsigned int max_headroom; /* The extra header space needed */ | 537 | unsigned int max_headroom; /* The extra header space needed */ |
589 | __be32 dst; | 538 | __be32 dst; |
590 | int err; | 539 | int err; |
@@ -657,8 +606,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
657 | init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, | 606 | init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, |
658 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); | 607 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); |
659 | 608 | ||
660 | if (connected) | 609 | rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; |
661 | rt = (struct rtable *)tunnel_dst_check(tunnel, 0); | ||
662 | 610 | ||
663 | if (!rt) { | 611 | if (!rt) { |
664 | rt = ip_route_output_key(tunnel->net, &fl4); | 612 | rt = ip_route_output_key(tunnel->net, &fl4); |
@@ -766,7 +714,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, | |||
766 | if (set_mtu) | 714 | if (set_mtu) |
767 | dev->mtu = mtu; | 715 | dev->mtu = mtu; |
768 | } | 716 | } |
769 | tunnel_dst_reset_all(t); | 717 | ip_tunnel_dst_reset_all(t); |
770 | netdev_state_change(dev); | 718 | netdev_state_change(dev); |
771 | } | 719 | } |
772 | 720 | ||
@@ -1095,7 +1043,7 @@ void ip_tunnel_uninit(struct net_device *dev) | |||
1095 | if (itn->fb_tunnel_dev != dev) | 1043 | if (itn->fb_tunnel_dev != dev) |
1096 | ip_tunnel_del(netdev_priv(dev)); | 1044 | ip_tunnel_del(netdev_priv(dev)); |
1097 | 1045 | ||
1098 | tunnel_dst_reset_all(tunnel); | 1046 | ip_tunnel_dst_reset_all(tunnel); |
1099 | } | 1047 | } |
1100 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); | 1048 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); |
1101 | 1049 | ||
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6156f4ef5e91..6f847dd56dbc 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
@@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) | |||
108 | nf_reset(skb); | 108 | nf_reset(skb); |
109 | secpath_reset(skb); | 109 | secpath_reset(skb); |
110 | skb_clear_hash_if_not_l4(skb); | 110 | skb_clear_hash_if_not_l4(skb); |
111 | skb_dst_drop(skb); | ||
112 | skb->vlan_tci = 0; | 111 | skb->vlan_tci = 0; |
113 | skb_set_queue_mapping(skb, 0); | 112 | skb_set_queue_mapping(skb, 0); |
114 | skb->pkt_type = PACKET_HOST; | 113 | skb->pkt_type = PACKET_HOST; |
@@ -148,3 +147,49 @@ error: | |||
148 | return ERR_PTR(err); | 147 | return ERR_PTR(err); |
149 | } | 148 | } |
150 | EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); | 149 | EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); |
150 | |||
151 | /* Often modified stats are per cpu, other are shared (netdev->stats) */ | ||
152 | struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, | ||
153 | struct rtnl_link_stats64 *tot) | ||
154 | { | ||
155 | int i; | ||
156 | |||
157 | for_each_possible_cpu(i) { | ||
158 | const struct pcpu_sw_netstats *tstats = | ||
159 | per_cpu_ptr(dev->tstats, i); | ||
160 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
161 | unsigned int start; | ||
162 | |||
163 | do { | ||
164 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
165 | rx_packets = tstats->rx_packets; | ||
166 | tx_packets = tstats->tx_packets; | ||
167 | rx_bytes = tstats->rx_bytes; | ||
168 | tx_bytes = tstats->tx_bytes; | ||
169 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
170 | |||
171 | tot->rx_packets += rx_packets; | ||
172 | tot->tx_packets += tx_packets; | ||
173 | tot->rx_bytes += rx_bytes; | ||
174 | tot->tx_bytes += tx_bytes; | ||
175 | } | ||
176 | |||
177 | tot->multicast = dev->stats.multicast; | ||
178 | |||
179 | tot->rx_crc_errors = dev->stats.rx_crc_errors; | ||
180 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; | ||
181 | tot->rx_length_errors = dev->stats.rx_length_errors; | ||
182 | tot->rx_frame_errors = dev->stats.rx_frame_errors; | ||
183 | tot->rx_errors = dev->stats.rx_errors; | ||
184 | |||
185 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
186 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
187 | tot->tx_dropped = dev->stats.tx_dropped; | ||
188 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
189 | tot->tx_errors = dev->stats.tx_errors; | ||
190 | |||
191 | tot->collisions = dev->stats.collisions; | ||
192 | |||
193 | return tot; | ||
194 | } | ||
195 | EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); | ||
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index efa1138fa523..b3e86ea7b71b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -273,7 +273,7 @@ static int __init ic_open_devs(void) | |||
273 | 273 | ||
274 | msleep(1); | 274 | msleep(1); |
275 | 275 | ||
276 | if time_before(jiffies, next_msg) | 276 | if (time_before(jiffies, next_msg)) |
277 | continue; | 277 | continue; |
278 | 278 | ||
279 | elapsed = jiffies_to_msecs(jiffies - start); | 279 | elapsed = jiffies_to_msecs(jiffies - start); |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 81c6910cfa92..a26ce035e3fa 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4 | |||
61 | packet transformations such as the source, destination address and | 61 | packet transformations such as the source, destination address and |
62 | source and destination ports. | 62 | source and destination ports. |
63 | 63 | ||
64 | config NFT_REJECT_IPV4 | ||
65 | depends on NF_TABLES_IPV4 | ||
66 | default NFT_REJECT | ||
67 | tristate | ||
68 | |||
64 | config NF_TABLES_ARP | 69 | config NF_TABLES_ARP |
65 | depends on NF_TABLES | 70 | depends on NF_TABLES |
66 | tristate "ARP nf_tables support" | 71 | tristate "ARP nf_tables support" |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c16be9d58420..90b82405331e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o | |||
30 | obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o | 30 | obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o |
31 | obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o | 31 | obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o |
32 | obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o | 32 | obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o |
33 | obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o | ||
33 | obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o | 34 | obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o |
34 | 35 | ||
35 | # generic IP tables | 36 | # generic IP tables |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9eea059dd621..574f7ebba0b6 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
229 | ret = nf_ct_expect_related(rtcp_exp); | 229 | ret = nf_ct_expect_related(rtcp_exp); |
230 | if (ret == 0) | 230 | if (ret == 0) |
231 | break; | 231 | break; |
232 | else if (ret != -EBUSY) { | 232 | else if (ret == -EBUSY) { |
233 | nf_ct_unexpect_related(rtp_exp); | ||
234 | continue; | ||
235 | } else if (ret < 0) { | ||
233 | nf_ct_unexpect_related(rtp_exp); | 236 | nf_ct_unexpect_related(rtp_exp); |
234 | nated_port = 0; | 237 | nated_port = 0; |
235 | break; | 238 | break; |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d551e31b416e..7c676671329d 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct, | |||
1198 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); | 1198 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); |
1199 | } else { | 1199 | } else { |
1200 | /* DNAT replies */ | 1200 | /* DNAT replies */ |
1201 | map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); | 1201 | map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); |
1202 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); | 1202 | map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); |
1203 | } | 1203 | } |
1204 | 1204 | ||
1205 | if (map.from == map.to) | 1205 | if (map.from == map.to) |
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..e79718a382f2 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> | ||
3 | * Copyright (c) 2013 Eric Leblond <eric@regit.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/netlink.h> | ||
16 | #include <linux/netfilter.h> | ||
17 | #include <linux/netfilter/nf_tables.h> | ||
18 | #include <net/netfilter/nf_tables.h> | ||
19 | #include <net/icmp.h> | ||
20 | #include <net/netfilter/ipv4/nf_reject.h> | ||
21 | #include <net/netfilter/nft_reject.h> | ||
22 | |||
23 | void nft_reject_ipv4_eval(const struct nft_expr *expr, | ||
24 | struct nft_data data[NFT_REG_MAX + 1], | ||
25 | const struct nft_pktinfo *pkt) | ||
26 | { | ||
27 | struct nft_reject *priv = nft_expr_priv(expr); | ||
28 | |||
29 | switch (priv->type) { | ||
30 | case NFT_REJECT_ICMP_UNREACH: | ||
31 | nf_send_unreach(pkt->skb, priv->icmp_code); | ||
32 | break; | ||
33 | case NFT_REJECT_TCP_RST: | ||
34 | nf_send_reset(pkt->skb, pkt->ops->hooknum); | ||
35 | break; | ||
36 | } | ||
37 | |||
38 | data[NFT_REG_VERDICT].verdict = NF_DROP; | ||
39 | } | ||
40 | EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); | ||
41 | |||
42 | static struct nft_expr_type nft_reject_ipv4_type; | ||
43 | static const struct nft_expr_ops nft_reject_ipv4_ops = { | ||
44 | .type = &nft_reject_ipv4_type, | ||
45 | .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), | ||
46 | .eval = nft_reject_ipv4_eval, | ||
47 | .init = nft_reject_init, | ||
48 | .dump = nft_reject_dump, | ||
49 | }; | ||
50 | |||
51 | static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { | ||
52 | .family = NFPROTO_IPV4, | ||
53 | .name = "reject", | ||
54 | .ops = &nft_reject_ipv4_ops, | ||
55 | .policy = nft_reject_policy, | ||
56 | .maxattr = NFTA_REJECT_MAX, | ||
57 | .owner = THIS_MODULE, | ||
58 | }; | ||
59 | |||
60 | static int __init nft_reject_ipv4_module_init(void) | ||
61 | { | ||
62 | return nft_register_expr(&nft_reject_ipv4_type); | ||
63 | } | ||
64 | |||
65 | static void __exit nft_reject_ipv4_module_exit(void) | ||
66 | { | ||
67 | nft_unregister_expr(&nft_reject_ipv4_type); | ||
68 | } | ||
69 | |||
70 | module_init(nft_reject_ipv4_module_init); | ||
71 | module_exit(nft_reject_ipv4_module_exit); | ||
72 | |||
73 | MODULE_LICENSE("GPL"); | ||
74 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
75 | MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 25071b48921c..4c011ec69ed4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1597 | rth->rt_gateway = 0; | 1597 | rth->rt_gateway = 0; |
1598 | rth->rt_uses_gateway = 0; | 1598 | rth->rt_uses_gateway = 0; |
1599 | INIT_LIST_HEAD(&rth->rt_uncached); | 1599 | INIT_LIST_HEAD(&rth->rt_uncached); |
1600 | RT_CACHE_STAT_INC(in_slow_tot); | ||
1600 | 1601 | ||
1601 | rth->dst.input = ip_forward; | 1602 | rth->dst.input = ip_forward; |
1602 | rth->dst.output = ip_output; | 1603 | rth->dst.output = ip_output; |
@@ -1695,10 +1696,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1695 | fl4.daddr = daddr; | 1696 | fl4.daddr = daddr; |
1696 | fl4.saddr = saddr; | 1697 | fl4.saddr = saddr; |
1697 | err = fib_lookup(net, &fl4, &res); | 1698 | err = fib_lookup(net, &fl4, &res); |
1698 | if (err != 0) | 1699 | if (err != 0) { |
1700 | if (!IN_DEV_FORWARD(in_dev)) | ||
1701 | err = -EHOSTUNREACH; | ||
1699 | goto no_route; | 1702 | goto no_route; |
1700 | 1703 | } | |
1701 | RT_CACHE_STAT_INC(in_slow_tot); | ||
1702 | 1704 | ||
1703 | if (res.type == RTN_BROADCAST) | 1705 | if (res.type == RTN_BROADCAST) |
1704 | goto brd_input; | 1706 | goto brd_input; |
@@ -1712,8 +1714,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1712 | goto local_input; | 1714 | goto local_input; |
1713 | } | 1715 | } |
1714 | 1716 | ||
1715 | if (!IN_DEV_FORWARD(in_dev)) | 1717 | if (!IN_DEV_FORWARD(in_dev)) { |
1718 | err = -EHOSTUNREACH; | ||
1716 | goto no_route; | 1719 | goto no_route; |
1720 | } | ||
1717 | if (res.type != RTN_UNICAST) | 1721 | if (res.type != RTN_UNICAST) |
1718 | goto martian_destination; | 1722 | goto martian_destination; |
1719 | 1723 | ||
@@ -1768,6 +1772,7 @@ local_input: | |||
1768 | rth->rt_gateway = 0; | 1772 | rth->rt_gateway = 0; |
1769 | rth->rt_uses_gateway = 0; | 1773 | rth->rt_uses_gateway = 0; |
1770 | INIT_LIST_HEAD(&rth->rt_uncached); | 1774 | INIT_LIST_HEAD(&rth->rt_uncached); |
1775 | RT_CACHE_STAT_INC(in_slow_tot); | ||
1771 | if (res.type == RTN_UNREACHABLE) { | 1776 | if (res.type == RTN_UNREACHABLE) { |
1772 | rth->dst.input= ip_error; | 1777 | rth->dst.input= ip_error; |
1773 | rth->dst.error= -err; | 1778 | rth->dst.error= -err; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4475b3bb494d..97c8f5620c43 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) | |||
1044 | } | 1044 | } |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | 1047 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, |
1048 | int *copied, size_t size) | ||
1048 | { | 1049 | { |
1049 | struct tcp_sock *tp = tcp_sk(sk); | 1050 | struct tcp_sock *tp = tcp_sk(sk); |
1050 | int err, flags; | 1051 | int err, flags; |
@@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | |||
1059 | if (unlikely(tp->fastopen_req == NULL)) | 1060 | if (unlikely(tp->fastopen_req == NULL)) |
1060 | return -ENOBUFS; | 1061 | return -ENOBUFS; |
1061 | tp->fastopen_req->data = msg; | 1062 | tp->fastopen_req->data = msg; |
1063 | tp->fastopen_req->size = size; | ||
1062 | 1064 | ||
1063 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | 1065 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; |
1064 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | 1066 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, |
1065 | msg->msg_namelen, flags); | 1067 | msg->msg_namelen, flags); |
1066 | *size = tp->fastopen_req->copied; | 1068 | *copied = tp->fastopen_req->copied; |
1067 | tcp_free_fastopen_req(tp); | 1069 | tcp_free_fastopen_req(tp); |
1068 | return err; | 1070 | return err; |
1069 | } | 1071 | } |
@@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1083 | 1085 | ||
1084 | flags = msg->msg_flags; | 1086 | flags = msg->msg_flags; |
1085 | if (flags & MSG_FASTOPEN) { | 1087 | if (flags & MSG_FASTOPEN) { |
1086 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); | 1088 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); |
1087 | if (err == -EINPROGRESS && copied_syn > 0) | 1089 | if (err == -EINPROGRESS && copied_syn > 0) |
1088 | goto out; | 1090 | goto out; |
1089 | else if (err) | 1091 | else if (err) |
@@ -2229,7 +2231,7 @@ adjudge_to_death: | |||
2229 | /* This is a (useful) BSD violating of the RFC. There is a | 2231 | /* This is a (useful) BSD violating of the RFC. There is a |
2230 | * problem with TCP as specified in that the other end could | 2232 | * problem with TCP as specified in that the other end could |
2231 | * keep a socket open forever with no application left this end. | 2233 | * keep a socket open forever with no application left this end. |
2232 | * We use a 3 minute timeout (about the same as BSD) then kill | 2234 | * We use a 1 minute timeout (about the same as BSD) then kill |
2233 | * our end. If they send after that then tough - BUT: long enough | 2235 | * our end. If they send after that then tough - BUT: long enough |
2234 | * that we won't make the old 4*rto = almost no time - whoops | 2236 | * that we won't make the old 4*rto = almost no time - whoops |
2235 | * reset mistake. | 2237 | * reset mistake. |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ad37bf18ae4b..2388275adb9b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | |||
290 | left = tp->snd_cwnd - in_flight; | 290 | left = tp->snd_cwnd - in_flight; |
291 | if (sk_can_gso(sk) && | 291 | if (sk_can_gso(sk) && |
292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && |
293 | left * tp->mss_cache < sk->sk_gso_max_size && | 293 | left < tp->xmit_size_goal_segs) |
294 | left < sk->sk_gso_max_segs) | ||
295 | return true; | 294 | return true; |
296 | return left <= tcp_max_tso_deferred_mss(tp); | 295 | return left <= tcp_max_tso_deferred_mss(tp); |
297 | } | 296 | } |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65cf90e063d5..eeaac399420d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
671 | { | 671 | { |
672 | struct tcp_sock *tp = tcp_sk(sk); | 672 | struct tcp_sock *tp = tcp_sk(sk); |
673 | long m = mrtt; /* RTT */ | 673 | long m = mrtt; /* RTT */ |
674 | u32 srtt = tp->srtt; | ||
674 | 675 | ||
675 | /* The following amusing code comes from Jacobson's | 676 | /* The following amusing code comes from Jacobson's |
676 | * article in SIGCOMM '88. Note that rtt and mdev | 677 | * article in SIGCOMM '88. Note that rtt and mdev |
@@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
688 | * does not matter how to _calculate_ it. Seems, it was trap | 689 | * does not matter how to _calculate_ it. Seems, it was trap |
689 | * that VJ failed to avoid. 8) | 690 | * that VJ failed to avoid. 8) |
690 | */ | 691 | */ |
691 | if (m == 0) | 692 | if (srtt != 0) { |
692 | m = 1; | 693 | m -= (srtt >> 3); /* m is now error in rtt est */ |
693 | if (tp->srtt != 0) { | 694 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
694 | m -= (tp->srtt >> 3); /* m is now error in rtt est */ | ||
695 | tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | ||
696 | if (m < 0) { | 695 | if (m < 0) { |
697 | m = -m; /* m is now abs(error) */ | 696 | m = -m; /* m is now abs(error) */ |
698 | m -= (tp->mdev >> 2); /* similar update on mdev */ | 697 | m -= (tp->mdev >> 2); /* similar update on mdev */ |
@@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
723 | } | 722 | } |
724 | } else { | 723 | } else { |
725 | /* no previous measure. */ | 724 | /* no previous measure. */ |
726 | tp->srtt = m << 3; /* take the measured time to be rtt */ | 725 | srtt = m << 3; /* take the measured time to be rtt */ |
727 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ | 726 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ |
728 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 727 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
729 | tp->rtt_seq = tp->snd_nxt; | 728 | tp->rtt_seq = tp->snd_nxt; |
730 | } | 729 | } |
730 | tp->srtt = max(1U, srtt); | ||
731 | } | 731 | } |
732 | 732 | ||
733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. | 733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. |
@@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk) | |||
746 | 746 | ||
747 | rate *= max(tp->snd_cwnd, tp->packets_out); | 747 | rate *= max(tp->snd_cwnd, tp->packets_out); |
748 | 748 | ||
749 | /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), | 749 | /* Correction for small srtt and scheduling constraints. |
750 | * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) | 750 | * For small rtt, consider noise is too high, and use |
751 | * the minimal value (srtt = 1 -> 125 us for HZ=1000) | ||
752 | * | ||
751 | * We probably need usec resolution in the future. | 753 | * We probably need usec resolution in the future. |
752 | * Note: This also takes care of possible srtt=0 case, | 754 | * Note: This also takes care of possible srtt=0 case, |
753 | * when tcp_rtt_estimator() was not yet called. | 755 | * when tcp_rtt_estimator() was not yet called. |
@@ -1943,8 +1945,9 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1943 | if (skb == tcp_send_head(sk)) | 1945 | if (skb == tcp_send_head(sk)) |
1944 | break; | 1946 | break; |
1945 | 1947 | ||
1946 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | 1948 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) |
1947 | tp->undo_marker = 0; | 1949 | tp->undo_marker = 0; |
1950 | |||
1948 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; | 1951 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; |
1949 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { | 1952 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { |
1950 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; | 1953 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03d26b85eab8..f0eb4e337ec8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk) | |||
698 | if ((1 << sk->sk_state) & | 698 | if ((1 << sk->sk_state) & |
699 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | 699 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
700 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | 700 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) |
701 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | 701 | tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, |
702 | 0, GFP_ATOMIC); | ||
702 | } | 703 | } |
703 | /* | 704 | /* |
704 | * One tasklet per cpu tries to send more skbs. | 705 | * One tasklet per cpu tries to send more skbs. |
@@ -863,8 +864,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
863 | 864 | ||
864 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 865 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
865 | fclone->fclone == SKB_FCLONE_CLONE)) | 866 | fclone->fclone == SKB_FCLONE_CLONE)) |
866 | NET_INC_STATS_BH(sock_net(sk), | 867 | NET_INC_STATS(sock_net(sk), |
867 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | 868 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
868 | 869 | ||
869 | if (unlikely(skb_cloned(skb))) | 870 | if (unlikely(skb_cloned(skb))) |
870 | skb = pskb_copy(skb, gfp_mask); | 871 | skb = pskb_copy(skb, gfp_mask); |
@@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1904 | 1905 | ||
1905 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 1906 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
1906 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1907 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
1907 | break; | 1908 | /* It is possible TX completion already happened |
1909 | * before we set TSQ_THROTTLED, so we must | ||
1910 | * test again the condition. | ||
1911 | * We abuse smp_mb__after_clear_bit() because | ||
1912 | * there is no smp_mb__after_set_bit() yet | ||
1913 | */ | ||
1914 | smp_mb__after_clear_bit(); | ||
1915 | if (atomic_read(&sk->sk_wmem_alloc) > limit) | ||
1916 | break; | ||
1908 | } | 1917 | } |
1909 | 1918 | ||
1910 | limit = mss_now; | 1919 | limit = mss_now; |
@@ -1977,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
1977 | /* Schedule a loss probe in 2*RTT for SACK capable connections | 1986 | /* Schedule a loss probe in 2*RTT for SACK capable connections |
1978 | * in Open state, that are either limited by cwnd or application. | 1987 | * in Open state, that are either limited by cwnd or application. |
1979 | */ | 1988 | */ |
1980 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | 1989 | if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || |
1981 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 1990 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
1982 | return false; | 1991 | return false; |
1983 | 1992 | ||
@@ -2328,6 +2337,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2328 | struct tcp_sock *tp = tcp_sk(sk); | 2337 | struct tcp_sock *tp = tcp_sk(sk); |
2329 | struct inet_connection_sock *icsk = inet_csk(sk); | 2338 | struct inet_connection_sock *icsk = inet_csk(sk); |
2330 | unsigned int cur_mss; | 2339 | unsigned int cur_mss; |
2340 | int err; | ||
2331 | 2341 | ||
2332 | /* Inconslusive MTU probe */ | 2342 | /* Inconslusive MTU probe */ |
2333 | if (icsk->icsk_mtup.probe_size) { | 2343 | if (icsk->icsk_mtup.probe_size) { |
@@ -2391,11 +2401,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2391 | skb_headroom(skb) >= 0xFFFF)) { | 2401 | skb_headroom(skb) >= 0xFFFF)) { |
2392 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, | 2402 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, |
2393 | GFP_ATOMIC); | 2403 | GFP_ATOMIC); |
2394 | return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | 2404 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : |
2395 | -ENOBUFS; | 2405 | -ENOBUFS; |
2396 | } else { | 2406 | } else { |
2397 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2407 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2398 | } | 2408 | } |
2409 | |||
2410 | if (likely(!err)) | ||
2411 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | ||
2412 | return err; | ||
2399 | } | 2413 | } |
2400 | 2414 | ||
2401 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | 2415 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) |
@@ -2899,7 +2913,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
2899 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - | 2913 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
2900 | MAX_TCP_OPTION_SPACE; | 2914 | MAX_TCP_OPTION_SPACE; |
2901 | 2915 | ||
2902 | syn_data = skb_copy_expand(syn, skb_headroom(syn), space, | 2916 | space = min_t(size_t, space, fo->size); |
2917 | |||
2918 | /* limit to order-0 allocations */ | ||
2919 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); | ||
2920 | |||
2921 | syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, | ||
2903 | sk->sk_allocation); | 2922 | sk->sk_allocation); |
2904 | if (syn_data == NULL) | 2923 | if (syn_data == NULL) |
2905 | goto fallback; | 2924 | goto fallback; |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 25f5cee3a08a..88b4023ecfcf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
@@ -17,6 +17,8 @@ | |||
17 | static DEFINE_SPINLOCK(udp_offload_lock); | 17 | static DEFINE_SPINLOCK(udp_offload_lock); |
18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; | 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; |
19 | 19 | ||
20 | #define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) | ||
21 | |||
20 | struct udp_offload_priv { | 22 | struct udp_offload_priv { |
21 | struct udp_offload *offload; | 23 | struct udp_offload *offload; |
22 | struct rcu_head rcu; | 24 | struct rcu_head rcu; |
@@ -100,8 +102,7 @@ out: | |||
100 | 102 | ||
101 | int udp_add_offload(struct udp_offload *uo) | 103 | int udp_add_offload(struct udp_offload *uo) |
102 | { | 104 | { |
103 | struct udp_offload_priv __rcu **head = &udp_offload_base; | 105 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); |
104 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); | ||
105 | 106 | ||
106 | if (!new_offload) | 107 | if (!new_offload) |
107 | return -ENOMEM; | 108 | return -ENOMEM; |
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo) | |||
109 | new_offload->offload = uo; | 110 | new_offload->offload = uo; |
110 | 111 | ||
111 | spin_lock(&udp_offload_lock); | 112 | spin_lock(&udp_offload_lock); |
112 | rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); | 113 | new_offload->next = udp_offload_base; |
113 | rcu_assign_pointer(*head, new_offload); | 114 | rcu_assign_pointer(udp_offload_base, new_offload); |
114 | spin_unlock(&udp_offload_lock); | 115 | spin_unlock(&udp_offload_lock); |
115 | 116 | ||
116 | return 0; | 117 | return 0; |
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo) | |||
130 | 131 | ||
131 | spin_lock(&udp_offload_lock); | 132 | spin_lock(&udp_offload_lock); |
132 | 133 | ||
133 | uo_priv = rcu_dereference(*head); | 134 | uo_priv = udp_deref_protected(*head); |
134 | for (; uo_priv != NULL; | 135 | for (; uo_priv != NULL; |
135 | uo_priv = rcu_dereference(*head)) { | 136 | uo_priv = udp_deref_protected(*head)) { |
136 | |||
137 | if (uo_priv->offload == uo) { | 137 | if (uo_priv->offload == uo) { |
138 | rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); | 138 | rcu_assign_pointer(*head, |
139 | udp_deref_protected(uo_priv->next)); | ||
139 | goto unlock; | 140 | goto unlock; |
140 | } | 141 | } |
141 | head = &uo_priv->next; | 142 | head = &uo_priv->next; |