diff options
Diffstat (limited to 'net/ipv4')
42 files changed, 1342 insertions, 999 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 05c57f0fcabe..dbc10d84161f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -307,6 +307,10 @@ config NET_IPVTI | |||
307 | the notion of a secure tunnel for IPSEC and then use routing protocol | 307 | the notion of a secure tunnel for IPSEC and then use routing protocol |
308 | on top. | 308 | on top. |
309 | 309 | ||
310 | config NET_UDP_TUNNEL | ||
311 | tristate | ||
312 | default n | ||
313 | |||
310 | config INET_AH | 314 | config INET_AH |
311 | tristate "IP: AH transformation" | 315 | tristate "IP: AH transformation" |
312 | select XFRM_ALGO | 316 | select XFRM_ALGO |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f032688d20d3..8ee1cd4053ee 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -22,6 +22,7 @@ obj-$(CONFIG_NET_IPIP) += ipip.o | |||
22 | gre-y := gre_demux.o | 22 | gre-y := gre_demux.o |
23 | obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o | 23 | obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o |
24 | obj-$(CONFIG_NET_IPGRE) += ip_gre.o | 24 | obj-$(CONFIG_NET_IPGRE) += ip_gre.o |
25 | obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o | ||
25 | obj-$(CONFIG_NET_IPVTI) += ip_vti.o | 26 | obj-$(CONFIG_NET_IPVTI) += ip_vti.o |
26 | obj-$(CONFIG_SYN_COOKIES) += syncookies.o | 27 | obj-$(CONFIG_SYN_COOKIES) += syncookies.o |
27 | obj-$(CONFIG_INET_AH) += ah4.o | 28 | obj-$(CONFIG_INET_AH) += ah4.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d5e6836cf772..d156b3c5f363 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff) | |||
1429 | int proto = iph->protocol; | 1429 | int proto = iph->protocol; |
1430 | int err = -ENOSYS; | 1430 | int err = -ENOSYS; |
1431 | 1431 | ||
1432 | if (skb->encapsulation) | ||
1433 | skb_set_inner_network_header(skb, nhoff); | ||
1434 | |||
1432 | csum_replace2(&iph->check, iph->tot_len, newlen); | 1435 | csum_replace2(&iph->check, iph->tot_len, newlen); |
1433 | iph->tot_len = newlen; | 1436 | iph->tot_len = newlen; |
1434 | 1437 | ||
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index a3095fdefbed..90c0e8386116 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
76 | inet->inet_daddr = fl4->daddr; | 76 | inet->inet_daddr = fl4->daddr; |
77 | inet->inet_dport = usin->sin_port; | 77 | inet->inet_dport = usin->sin_port; |
78 | sk->sk_state = TCP_ESTABLISHED; | 78 | sk->sk_state = TCP_ESTABLISHED; |
79 | inet_set_txhash(sk); | ||
79 | inet->inet_id = jiffies; | 80 | inet->inet_id = jiffies; |
80 | 81 | ||
81 | sk_dst_set(sk, &rt->dst); | 82 | sk_dst_set(sk, &rt->dst); |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e9449376b58e..214882e7d6de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -180,11 +180,12 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); | |||
180 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | 180 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
181 | int destroy); | 181 | int destroy); |
182 | #ifdef CONFIG_SYSCTL | 182 | #ifdef CONFIG_SYSCTL |
183 | static void devinet_sysctl_register(struct in_device *idev); | 183 | static int devinet_sysctl_register(struct in_device *idev); |
184 | static void devinet_sysctl_unregister(struct in_device *idev); | 184 | static void devinet_sysctl_unregister(struct in_device *idev); |
185 | #else | 185 | #else |
186 | static void devinet_sysctl_register(struct in_device *idev) | 186 | static int devinet_sysctl_register(struct in_device *idev) |
187 | { | 187 | { |
188 | return 0; | ||
188 | } | 189 | } |
189 | static void devinet_sysctl_unregister(struct in_device *idev) | 190 | static void devinet_sysctl_unregister(struct in_device *idev) |
190 | { | 191 | { |
@@ -232,6 +233,7 @@ EXPORT_SYMBOL(in_dev_finish_destroy); | |||
232 | static struct in_device *inetdev_init(struct net_device *dev) | 233 | static struct in_device *inetdev_init(struct net_device *dev) |
233 | { | 234 | { |
234 | struct in_device *in_dev; | 235 | struct in_device *in_dev; |
236 | int err = -ENOMEM; | ||
235 | 237 | ||
236 | ASSERT_RTNL(); | 238 | ASSERT_RTNL(); |
237 | 239 | ||
@@ -252,7 +254,13 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
252 | /* Account for reference dev->ip_ptr (below) */ | 254 | /* Account for reference dev->ip_ptr (below) */ |
253 | in_dev_hold(in_dev); | 255 | in_dev_hold(in_dev); |
254 | 256 | ||
255 | devinet_sysctl_register(in_dev); | 257 | err = devinet_sysctl_register(in_dev); |
258 | if (err) { | ||
259 | in_dev->dead = 1; | ||
260 | in_dev_put(in_dev); | ||
261 | in_dev = NULL; | ||
262 | goto out; | ||
263 | } | ||
256 | ip_mc_init_dev(in_dev); | 264 | ip_mc_init_dev(in_dev); |
257 | if (dev->flags & IFF_UP) | 265 | if (dev->flags & IFF_UP) |
258 | ip_mc_up(in_dev); | 266 | ip_mc_up(in_dev); |
@@ -260,7 +268,7 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
260 | /* we can receive as soon as ip_ptr is set -- do this last */ | 268 | /* we can receive as soon as ip_ptr is set -- do this last */ |
261 | rcu_assign_pointer(dev->ip_ptr, in_dev); | 269 | rcu_assign_pointer(dev->ip_ptr, in_dev); |
262 | out: | 270 | out: |
263 | return in_dev; | 271 | return in_dev ?: ERR_PTR(err); |
264 | out_kfree: | 272 | out_kfree: |
265 | kfree(in_dev); | 273 | kfree(in_dev); |
266 | in_dev = NULL; | 274 | in_dev = NULL; |
@@ -1347,8 +1355,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1347 | if (!in_dev) { | 1355 | if (!in_dev) { |
1348 | if (event == NETDEV_REGISTER) { | 1356 | if (event == NETDEV_REGISTER) { |
1349 | in_dev = inetdev_init(dev); | 1357 | in_dev = inetdev_init(dev); |
1350 | if (!in_dev) | 1358 | if (IS_ERR(in_dev)) |
1351 | return notifier_from_errno(-ENOMEM); | 1359 | return notifier_from_errno(PTR_ERR(in_dev)); |
1352 | if (dev->flags & IFF_LOOPBACK) { | 1360 | if (dev->flags & IFF_LOOPBACK) { |
1353 | IN_DEV_CONF_SET(in_dev, NOXFRM, 1); | 1361 | IN_DEV_CONF_SET(in_dev, NOXFRM, 1); |
1354 | IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); | 1362 | IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); |
@@ -2182,11 +2190,21 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) | |||
2182 | kfree(t); | 2190 | kfree(t); |
2183 | } | 2191 | } |
2184 | 2192 | ||
2185 | static void devinet_sysctl_register(struct in_device *idev) | 2193 | static int devinet_sysctl_register(struct in_device *idev) |
2186 | { | 2194 | { |
2187 | neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); | 2195 | int err; |
2188 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, | 2196 | |
2197 | if (!sysctl_dev_name_is_allowed(idev->dev->name)) | ||
2198 | return -EINVAL; | ||
2199 | |||
2200 | err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); | ||
2201 | if (err) | ||
2202 | return err; | ||
2203 | err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, | ||
2189 | &idev->cnf); | 2204 | &idev->cnf); |
2205 | if (err) | ||
2206 | neigh_sysctl_unregister(idev->arp_parms); | ||
2207 | return err; | ||
2190 | } | 2208 | } |
2191 | 2209 | ||
2192 | static void devinet_sysctl_unregister(struct in_device *idev) | 2210 | static void devinet_sysctl_unregister(struct in_device *idev) |
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4e9619bca732..0485bf7f8f03 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c | |||
@@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, | |||
68 | 68 | ||
69 | skb_push(skb, hdr_len); | 69 | skb_push(skb, hdr_len); |
70 | 70 | ||
71 | skb_reset_transport_header(skb); | ||
71 | greh = (struct gre_base_hdr *)skb->data; | 72 | greh = (struct gre_base_hdr *)skb->data; |
72 | greh->flags = tnl_flags_to_gre_flags(tpi->flags); | 73 | greh->flags = tnl_flags_to_gre_flags(tpi->flags); |
73 | greh->protocol = tpi->proto; | 74 | greh->protocol = tpi->proto; |
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index eb92deb12666..6556263c8fa5 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c | |||
@@ -74,7 +74,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, | |||
74 | /* segment inner packet. */ | 74 | /* segment inner packet. */ |
75 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | 75 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); |
76 | segs = skb_mac_gso_segment(skb, enc_features); | 76 | segs = skb_mac_gso_segment(skb, enc_features); |
77 | if (!segs || IS_ERR(segs)) { | 77 | if (IS_ERR_OR_NULL(segs)) { |
78 | skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); | 78 | skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); |
79 | goto out; | 79 | goto out; |
80 | } | 80 | } |
@@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) | |||
263 | int err = -ENOENT; | 263 | int err = -ENOENT; |
264 | __be16 type; | 264 | __be16 type; |
265 | 265 | ||
266 | skb->encapsulation = 1; | ||
267 | skb_shinfo(skb)->gso_type = SKB_GSO_GRE; | ||
268 | |||
266 | type = greh->protocol; | 269 | type = greh->protocol; |
267 | if (greh->flags & GRE_KEY) | 270 | if (greh->flags & GRE_KEY) |
268 | grehlen += GRE_HEADER_SECTION; | 271 | grehlen += GRE_HEADER_SECTION; |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 79c3d947a481..42b7bcf8045b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -739,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb) | |||
739 | /* fall through */ | 739 | /* fall through */ |
740 | case 0: | 740 | case 0: |
741 | info = ntohs(icmph->un.frag.mtu); | 741 | info = ntohs(icmph->un.frag.mtu); |
742 | if (!info) | ||
743 | goto out; | ||
744 | } | 742 | } |
745 | break; | 743 | break; |
746 | case ICMP_SR_FAILED: | 744 | case ICMP_SR_FAILED: |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6748d420f714..f10eab462282 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -1321,7 +1321,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1321 | atomic_set(&im->refcnt, 1); | 1321 | atomic_set(&im->refcnt, 1); |
1322 | spin_lock_init(&im->lock); | 1322 | spin_lock_init(&im->lock); |
1323 | #ifdef CONFIG_IP_MULTICAST | 1323 | #ifdef CONFIG_IP_MULTICAST |
1324 | setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); | 1324 | setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); |
1325 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; | 1325 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; |
1326 | #endif | 1326 | #endif |
1327 | 1327 | ||
@@ -1944,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1944 | 1944 | ||
1945 | rtnl_lock(); | 1945 | rtnl_lock(); |
1946 | in_dev = ip_mc_find_dev(net, imr); | 1946 | in_dev = ip_mc_find_dev(net, imr); |
1947 | if (!in_dev) { | ||
1948 | ret = -ENODEV; | ||
1949 | goto out; | ||
1950 | } | ||
1947 | ifindex = imr->imr_ifindex; | 1951 | ifindex = imr->imr_ifindex; |
1948 | for (imlp = &inet->mc_list; | 1952 | for (imlp = &inet->mc_list; |
1949 | (iml = rtnl_dereference(*imlp)) != NULL; | 1953 | (iml = rtnl_dereference(*imlp)) != NULL; |
@@ -1961,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1961 | 1965 | ||
1962 | *imlp = iml->next_rcu; | 1966 | *imlp = iml->next_rcu; |
1963 | 1967 | ||
1964 | if (in_dev) | 1968 | ip_mc_dec_group(in_dev, group); |
1965 | ip_mc_dec_group(in_dev, group); | ||
1966 | rtnl_unlock(); | 1969 | rtnl_unlock(); |
1967 | /* decrease mem now to avoid the memleak warning */ | 1970 | /* decrease mem now to avoid the memleak warning */ |
1968 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | 1971 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); |
1969 | kfree_rcu(iml, rcu); | 1972 | kfree_rcu(iml, rcu); |
1970 | return 0; | 1973 | return 0; |
1971 | } | 1974 | } |
1972 | if (!in_dev) | 1975 | out: |
1973 | ret = -ENODEV; | ||
1974 | rtnl_unlock(); | 1976 | rtnl_unlock(); |
1975 | return ret; | 1977 | return ret; |
1976 | } | 1978 | } |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 3b01959bf4bb..62b1f73749dc 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -25,6 +25,12 @@ | |||
25 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
26 | #include <net/inet_ecn.h> | 26 | #include <net/inet_ecn.h> |
27 | 27 | ||
28 | #define INETFRAGS_EVICT_BUCKETS 128 | ||
29 | #define INETFRAGS_EVICT_MAX 512 | ||
30 | |||
31 | /* don't rebuild inetfrag table with new secret more often than this */ | ||
32 | #define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) | ||
33 | |||
28 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | 34 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements |
29 | * Value : 0xff if frame should be dropped. | 35 | * Value : 0xff if frame should be dropped. |
30 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | 36 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field |
@@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = { | |||
46 | }; | 52 | }; |
47 | EXPORT_SYMBOL(ip_frag_ecn_table); | 53 | EXPORT_SYMBOL(ip_frag_ecn_table); |
48 | 54 | ||
49 | static void inet_frag_secret_rebuild(unsigned long dummy) | 55 | static unsigned int |
56 | inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) | ||
57 | { | ||
58 | return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); | ||
59 | } | ||
60 | |||
61 | static bool inet_frag_may_rebuild(struct inet_frags *f) | ||
62 | { | ||
63 | return time_after(jiffies, | ||
64 | f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); | ||
65 | } | ||
66 | |||
67 | static void inet_frag_secret_rebuild(struct inet_frags *f) | ||
50 | { | 68 | { |
51 | struct inet_frags *f = (struct inet_frags *)dummy; | ||
52 | unsigned long now = jiffies; | ||
53 | int i; | 69 | int i; |
54 | 70 | ||
55 | /* Per bucket lock NOT needed here, due to write lock protection */ | 71 | write_seqlock_bh(&f->rnd_seqlock); |
56 | write_lock(&f->lock); | 72 | |
73 | if (!inet_frag_may_rebuild(f)) | ||
74 | goto out; | ||
57 | 75 | ||
58 | get_random_bytes(&f->rnd, sizeof(u32)); | 76 | get_random_bytes(&f->rnd, sizeof(u32)); |
77 | |||
59 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { | 78 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
60 | struct inet_frag_bucket *hb; | 79 | struct inet_frag_bucket *hb; |
61 | struct inet_frag_queue *q; | 80 | struct inet_frag_queue *q; |
62 | struct hlist_node *n; | 81 | struct hlist_node *n; |
63 | 82 | ||
64 | hb = &f->hash[i]; | 83 | hb = &f->hash[i]; |
84 | spin_lock(&hb->chain_lock); | ||
85 | |||
65 | hlist_for_each_entry_safe(q, n, &hb->chain, list) { | 86 | hlist_for_each_entry_safe(q, n, &hb->chain, list) { |
66 | unsigned int hval = f->hashfn(q); | 87 | unsigned int hval = inet_frag_hashfn(f, q); |
67 | 88 | ||
68 | if (hval != i) { | 89 | if (hval != i) { |
69 | struct inet_frag_bucket *hb_dest; | 90 | struct inet_frag_bucket *hb_dest; |
@@ -72,76 +93,195 @@ static void inet_frag_secret_rebuild(unsigned long dummy) | |||
72 | 93 | ||
73 | /* Relink to new hash chain. */ | 94 | /* Relink to new hash chain. */ |
74 | hb_dest = &f->hash[hval]; | 95 | hb_dest = &f->hash[hval]; |
96 | |||
97 | /* This is the only place where we take | ||
98 | * another chain_lock while already holding | ||
99 | * one. As this will not run concurrently, | ||
100 | * we cannot deadlock on hb_dest lock below, if its | ||
101 | * already locked it will be released soon since | ||
102 | * other caller cannot be waiting for hb lock | ||
103 | * that we've taken above. | ||
104 | */ | ||
105 | spin_lock_nested(&hb_dest->chain_lock, | ||
106 | SINGLE_DEPTH_NESTING); | ||
75 | hlist_add_head(&q->list, &hb_dest->chain); | 107 | hlist_add_head(&q->list, &hb_dest->chain); |
108 | spin_unlock(&hb_dest->chain_lock); | ||
76 | } | 109 | } |
77 | } | 110 | } |
111 | spin_unlock(&hb->chain_lock); | ||
78 | } | 112 | } |
79 | write_unlock(&f->lock); | ||
80 | 113 | ||
81 | mod_timer(&f->secret_timer, now + f->secret_interval); | 114 | f->rebuild = false; |
115 | f->last_rebuild_jiffies = jiffies; | ||
116 | out: | ||
117 | write_sequnlock_bh(&f->rnd_seqlock); | ||
118 | } | ||
119 | |||
120 | static bool inet_fragq_should_evict(const struct inet_frag_queue *q) | ||
121 | { | ||
122 | return q->net->low_thresh == 0 || | ||
123 | frag_mem_limit(q->net) >= q->net->low_thresh; | ||
124 | } | ||
125 | |||
126 | static unsigned int | ||
127 | inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) | ||
128 | { | ||
129 | struct inet_frag_queue *fq; | ||
130 | struct hlist_node *n; | ||
131 | unsigned int evicted = 0; | ||
132 | HLIST_HEAD(expired); | ||
133 | |||
134 | evict_again: | ||
135 | spin_lock(&hb->chain_lock); | ||
136 | |||
137 | hlist_for_each_entry_safe(fq, n, &hb->chain, list) { | ||
138 | if (!inet_fragq_should_evict(fq)) | ||
139 | continue; | ||
140 | |||
141 | if (!del_timer(&fq->timer)) { | ||
142 | /* q expiring right now thus increment its refcount so | ||
143 | * it won't be freed under us and wait until the timer | ||
144 | * has finished executing then destroy it | ||
145 | */ | ||
146 | atomic_inc(&fq->refcnt); | ||
147 | spin_unlock(&hb->chain_lock); | ||
148 | del_timer_sync(&fq->timer); | ||
149 | WARN_ON(atomic_read(&fq->refcnt) != 1); | ||
150 | inet_frag_put(fq, f); | ||
151 | goto evict_again; | ||
152 | } | ||
153 | |||
154 | /* suppress xmit of (icmp) error packet */ | ||
155 | fq->last_in &= ~INET_FRAG_FIRST_IN; | ||
156 | fq->last_in |= INET_FRAG_EVICTED; | ||
157 | hlist_del(&fq->list); | ||
158 | hlist_add_head(&fq->list, &expired); | ||
159 | ++evicted; | ||
160 | } | ||
161 | |||
162 | spin_unlock(&hb->chain_lock); | ||
163 | |||
164 | hlist_for_each_entry_safe(fq, n, &expired, list) | ||
165 | f->frag_expire((unsigned long) fq); | ||
166 | |||
167 | return evicted; | ||
168 | } | ||
169 | |||
170 | static void inet_frag_worker(struct work_struct *work) | ||
171 | { | ||
172 | unsigned int budget = INETFRAGS_EVICT_BUCKETS; | ||
173 | unsigned int i, evicted = 0; | ||
174 | struct inet_frags *f; | ||
175 | |||
176 | f = container_of(work, struct inet_frags, frags_work); | ||
177 | |||
178 | BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); | ||
179 | |||
180 | local_bh_disable(); | ||
181 | |||
182 | for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { | ||
183 | evicted += inet_evict_bucket(f, &f->hash[i]); | ||
184 | i = (i + 1) & (INETFRAGS_HASHSZ - 1); | ||
185 | if (evicted > INETFRAGS_EVICT_MAX) | ||
186 | break; | ||
187 | } | ||
188 | |||
189 | f->next_bucket = i; | ||
190 | |||
191 | local_bh_enable(); | ||
192 | |||
193 | if (f->rebuild && inet_frag_may_rebuild(f)) | ||
194 | inet_frag_secret_rebuild(f); | ||
195 | } | ||
196 | |||
197 | static void inet_frag_schedule_worker(struct inet_frags *f) | ||
198 | { | ||
199 | if (unlikely(!work_pending(&f->frags_work))) | ||
200 | schedule_work(&f->frags_work); | ||
82 | } | 201 | } |
83 | 202 | ||
84 | void inet_frags_init(struct inet_frags *f) | 203 | void inet_frags_init(struct inet_frags *f) |
85 | { | 204 | { |
86 | int i; | 205 | int i; |
87 | 206 | ||
207 | INIT_WORK(&f->frags_work, inet_frag_worker); | ||
208 | |||
88 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { | 209 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
89 | struct inet_frag_bucket *hb = &f->hash[i]; | 210 | struct inet_frag_bucket *hb = &f->hash[i]; |
90 | 211 | ||
91 | spin_lock_init(&hb->chain_lock); | 212 | spin_lock_init(&hb->chain_lock); |
92 | INIT_HLIST_HEAD(&hb->chain); | 213 | INIT_HLIST_HEAD(&hb->chain); |
93 | } | 214 | } |
94 | rwlock_init(&f->lock); | ||
95 | 215 | ||
96 | setup_timer(&f->secret_timer, inet_frag_secret_rebuild, | 216 | seqlock_init(&f->rnd_seqlock); |
97 | (unsigned long)f); | 217 | f->last_rebuild_jiffies = 0; |
98 | f->secret_timer.expires = jiffies + f->secret_interval; | ||
99 | add_timer(&f->secret_timer); | ||
100 | } | 218 | } |
101 | EXPORT_SYMBOL(inet_frags_init); | 219 | EXPORT_SYMBOL(inet_frags_init); |
102 | 220 | ||
103 | void inet_frags_init_net(struct netns_frags *nf) | 221 | void inet_frags_init_net(struct netns_frags *nf) |
104 | { | 222 | { |
105 | nf->nqueues = 0; | ||
106 | init_frag_mem_limit(nf); | 223 | init_frag_mem_limit(nf); |
107 | INIT_LIST_HEAD(&nf->lru_list); | ||
108 | spin_lock_init(&nf->lru_lock); | ||
109 | } | 224 | } |
110 | EXPORT_SYMBOL(inet_frags_init_net); | 225 | EXPORT_SYMBOL(inet_frags_init_net); |
111 | 226 | ||
112 | void inet_frags_fini(struct inet_frags *f) | 227 | void inet_frags_fini(struct inet_frags *f) |
113 | { | 228 | { |
114 | del_timer(&f->secret_timer); | 229 | cancel_work_sync(&f->frags_work); |
115 | } | 230 | } |
116 | EXPORT_SYMBOL(inet_frags_fini); | 231 | EXPORT_SYMBOL(inet_frags_fini); |
117 | 232 | ||
118 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) | 233 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) |
119 | { | 234 | { |
120 | nf->low_thresh = 0; | 235 | unsigned int seq; |
236 | int i; | ||
121 | 237 | ||
238 | nf->low_thresh = 0; | ||
122 | local_bh_disable(); | 239 | local_bh_disable(); |
123 | inet_frag_evictor(nf, f, true); | 240 | |
241 | evict_again: | ||
242 | seq = read_seqbegin(&f->rnd_seqlock); | ||
243 | |||
244 | for (i = 0; i < INETFRAGS_HASHSZ ; i++) | ||
245 | inet_evict_bucket(f, &f->hash[i]); | ||
246 | |||
247 | if (read_seqretry(&f->rnd_seqlock, seq)) | ||
248 | goto evict_again; | ||
249 | |||
124 | local_bh_enable(); | 250 | local_bh_enable(); |
125 | 251 | ||
126 | percpu_counter_destroy(&nf->mem); | 252 | percpu_counter_destroy(&nf->mem); |
127 | } | 253 | } |
128 | EXPORT_SYMBOL(inet_frags_exit_net); | 254 | EXPORT_SYMBOL(inet_frags_exit_net); |
129 | 255 | ||
130 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) | 256 | static struct inet_frag_bucket * |
257 | get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) | ||
258 | __acquires(hb->chain_lock) | ||
131 | { | 259 | { |
132 | struct inet_frag_bucket *hb; | 260 | struct inet_frag_bucket *hb; |
133 | unsigned int hash; | 261 | unsigned int seq, hash; |
262 | |||
263 | restart: | ||
264 | seq = read_seqbegin(&f->rnd_seqlock); | ||
134 | 265 | ||
135 | read_lock(&f->lock); | 266 | hash = inet_frag_hashfn(f, fq); |
136 | hash = f->hashfn(fq); | ||
137 | hb = &f->hash[hash]; | 267 | hb = &f->hash[hash]; |
138 | 268 | ||
139 | spin_lock(&hb->chain_lock); | 269 | spin_lock(&hb->chain_lock); |
270 | if (read_seqretry(&f->rnd_seqlock, seq)) { | ||
271 | spin_unlock(&hb->chain_lock); | ||
272 | goto restart; | ||
273 | } | ||
274 | |||
275 | return hb; | ||
276 | } | ||
277 | |||
278 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) | ||
279 | { | ||
280 | struct inet_frag_bucket *hb; | ||
281 | |||
282 | hb = get_frag_bucket_locked(fq, f); | ||
140 | hlist_del(&fq->list); | 283 | hlist_del(&fq->list); |
141 | spin_unlock(&hb->chain_lock); | 284 | spin_unlock(&hb->chain_lock); |
142 | |||
143 | read_unlock(&f->lock); | ||
144 | inet_frag_lru_del(fq); | ||
145 | } | 285 | } |
146 | 286 | ||
147 | void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) | 287 | void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) |
@@ -165,8 +305,7 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, | |||
165 | kfree_skb(skb); | 305 | kfree_skb(skb); |
166 | } | 306 | } |
167 | 307 | ||
168 | void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, | 308 | void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) |
169 | int *work) | ||
170 | { | 309 | { |
171 | struct sk_buff *fp; | 310 | struct sk_buff *fp; |
172 | struct netns_frags *nf; | 311 | struct netns_frags *nf; |
@@ -186,86 +325,30 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, | |||
186 | fp = xp; | 325 | fp = xp; |
187 | } | 326 | } |
188 | sum = sum_truesize + f->qsize; | 327 | sum = sum_truesize + f->qsize; |
189 | if (work) | ||
190 | *work -= sum; | ||
191 | sub_frag_mem_limit(q, sum); | 328 | sub_frag_mem_limit(q, sum); |
192 | 329 | ||
193 | if (f->destructor) | 330 | if (f->destructor) |
194 | f->destructor(q); | 331 | f->destructor(q); |
195 | kfree(q); | 332 | kfree(q); |
196 | |||
197 | } | 333 | } |
198 | EXPORT_SYMBOL(inet_frag_destroy); | 334 | EXPORT_SYMBOL(inet_frag_destroy); |
199 | 335 | ||
200 | int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) | ||
201 | { | ||
202 | struct inet_frag_queue *q; | ||
203 | int work, evicted = 0; | ||
204 | |||
205 | if (!force) { | ||
206 | if (frag_mem_limit(nf) <= nf->high_thresh) | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | work = frag_mem_limit(nf) - nf->low_thresh; | ||
211 | while (work > 0 || force) { | ||
212 | spin_lock(&nf->lru_lock); | ||
213 | |||
214 | if (list_empty(&nf->lru_list)) { | ||
215 | spin_unlock(&nf->lru_lock); | ||
216 | break; | ||
217 | } | ||
218 | |||
219 | q = list_first_entry(&nf->lru_list, | ||
220 | struct inet_frag_queue, lru_list); | ||
221 | atomic_inc(&q->refcnt); | ||
222 | /* Remove q from list to avoid several CPUs grabbing it */ | ||
223 | list_del_init(&q->lru_list); | ||
224 | |||
225 | spin_unlock(&nf->lru_lock); | ||
226 | |||
227 | spin_lock(&q->lock); | ||
228 | if (!(q->last_in & INET_FRAG_COMPLETE)) | ||
229 | inet_frag_kill(q, f); | ||
230 | spin_unlock(&q->lock); | ||
231 | |||
232 | if (atomic_dec_and_test(&q->refcnt)) | ||
233 | inet_frag_destroy(q, f, &work); | ||
234 | evicted++; | ||
235 | } | ||
236 | |||
237 | return evicted; | ||
238 | } | ||
239 | EXPORT_SYMBOL(inet_frag_evictor); | ||
240 | |||
241 | static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | 336 | static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, |
242 | struct inet_frag_queue *qp_in, struct inet_frags *f, | 337 | struct inet_frag_queue *qp_in, struct inet_frags *f, |
243 | void *arg) | 338 | void *arg) |
244 | { | 339 | { |
245 | struct inet_frag_bucket *hb; | 340 | struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); |
246 | struct inet_frag_queue *qp; | 341 | struct inet_frag_queue *qp; |
247 | unsigned int hash; | ||
248 | |||
249 | read_lock(&f->lock); /* Protects against hash rebuild */ | ||
250 | /* | ||
251 | * While we stayed w/o the lock other CPU could update | ||
252 | * the rnd seed, so we need to re-calculate the hash | ||
253 | * chain. Fortunatelly the qp_in can be used to get one. | ||
254 | */ | ||
255 | hash = f->hashfn(qp_in); | ||
256 | hb = &f->hash[hash]; | ||
257 | spin_lock(&hb->chain_lock); | ||
258 | 342 | ||
259 | #ifdef CONFIG_SMP | 343 | #ifdef CONFIG_SMP |
260 | /* With SMP race we have to recheck hash table, because | 344 | /* With SMP race we have to recheck hash table, because |
261 | * such entry could be created on other cpu, while we | 345 | * such entry could have been created on other cpu before |
262 | * released the hash bucket lock. | 346 | * we acquired hash bucket lock. |
263 | */ | 347 | */ |
264 | hlist_for_each_entry(qp, &hb->chain, list) { | 348 | hlist_for_each_entry(qp, &hb->chain, list) { |
265 | if (qp->net == nf && f->match(qp, arg)) { | 349 | if (qp->net == nf && f->match(qp, arg)) { |
266 | atomic_inc(&qp->refcnt); | 350 | atomic_inc(&qp->refcnt); |
267 | spin_unlock(&hb->chain_lock); | 351 | spin_unlock(&hb->chain_lock); |
268 | read_unlock(&f->lock); | ||
269 | qp_in->last_in |= INET_FRAG_COMPLETE; | 352 | qp_in->last_in |= INET_FRAG_COMPLETE; |
270 | inet_frag_put(qp_in, f); | 353 | inet_frag_put(qp_in, f); |
271 | return qp; | 354 | return qp; |
@@ -278,9 +361,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
278 | 361 | ||
279 | atomic_inc(&qp->refcnt); | 362 | atomic_inc(&qp->refcnt); |
280 | hlist_add_head(&qp->list, &hb->chain); | 363 | hlist_add_head(&qp->list, &hb->chain); |
281 | inet_frag_lru_add(nf, qp); | 364 | |
282 | spin_unlock(&hb->chain_lock); | 365 | spin_unlock(&hb->chain_lock); |
283 | read_unlock(&f->lock); | ||
284 | 366 | ||
285 | return qp; | 367 | return qp; |
286 | } | 368 | } |
@@ -290,6 +372,11 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, | |||
290 | { | 372 | { |
291 | struct inet_frag_queue *q; | 373 | struct inet_frag_queue *q; |
292 | 374 | ||
375 | if (frag_mem_limit(nf) > nf->high_thresh) { | ||
376 | inet_frag_schedule_worker(f); | ||
377 | return NULL; | ||
378 | } | ||
379 | |||
293 | q = kzalloc(f->qsize, GFP_ATOMIC); | 380 | q = kzalloc(f->qsize, GFP_ATOMIC); |
294 | if (q == NULL) | 381 | if (q == NULL) |
295 | return NULL; | 382 | return NULL; |
@@ -301,7 +388,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, | |||
301 | setup_timer(&q->timer, f->frag_expire, (unsigned long)q); | 388 | setup_timer(&q->timer, f->frag_expire, (unsigned long)q); |
302 | spin_lock_init(&q->lock); | 389 | spin_lock_init(&q->lock); |
303 | atomic_set(&q->refcnt, 1); | 390 | atomic_set(&q->refcnt, 1); |
304 | INIT_LIST_HEAD(&q->lru_list); | ||
305 | 391 | ||
306 | return q; | 392 | return q; |
307 | } | 393 | } |
@@ -320,12 +406,15 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, | |||
320 | 406 | ||
321 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 407 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
322 | struct inet_frags *f, void *key, unsigned int hash) | 408 | struct inet_frags *f, void *key, unsigned int hash) |
323 | __releases(&f->lock) | ||
324 | { | 409 | { |
325 | struct inet_frag_bucket *hb; | 410 | struct inet_frag_bucket *hb; |
326 | struct inet_frag_queue *q; | 411 | struct inet_frag_queue *q; |
327 | int depth = 0; | 412 | int depth = 0; |
328 | 413 | ||
414 | if (frag_mem_limit(nf) > nf->low_thresh) | ||
415 | inet_frag_schedule_worker(f); | ||
416 | |||
417 | hash &= (INETFRAGS_HASHSZ - 1); | ||
329 | hb = &f->hash[hash]; | 418 | hb = &f->hash[hash]; |
330 | 419 | ||
331 | spin_lock(&hb->chain_lock); | 420 | spin_lock(&hb->chain_lock); |
@@ -333,18 +422,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
333 | if (q->net == nf && f->match(q, key)) { | 422 | if (q->net == nf && f->match(q, key)) { |
334 | atomic_inc(&q->refcnt); | 423 | atomic_inc(&q->refcnt); |
335 | spin_unlock(&hb->chain_lock); | 424 | spin_unlock(&hb->chain_lock); |
336 | read_unlock(&f->lock); | ||
337 | return q; | 425 | return q; |
338 | } | 426 | } |
339 | depth++; | 427 | depth++; |
340 | } | 428 | } |
341 | spin_unlock(&hb->chain_lock); | 429 | spin_unlock(&hb->chain_lock); |
342 | read_unlock(&f->lock); | ||
343 | 430 | ||
344 | if (depth <= INETFRAGS_MAXDEPTH) | 431 | if (depth <= INETFRAGS_MAXDEPTH) |
345 | return inet_frag_create(nf, f, key); | 432 | return inet_frag_create(nf, f, key); |
346 | else | 433 | |
347 | return ERR_PTR(-ENOBUFS); | 434 | if (inet_frag_may_rebuild(f)) { |
435 | if (!f->rebuild) | ||
436 | f->rebuild = true; | ||
437 | inet_frag_schedule_worker(f); | ||
438 | } | ||
439 | |||
440 | return ERR_PTR(-ENOBUFS); | ||
348 | } | 441 | } |
349 | EXPORT_SYMBOL(inet_frag_find); | 442 | EXPORT_SYMBOL(inet_frag_find); |
350 | 443 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ed32313e307c..634fc31aa243 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -86,11 +86,6 @@ static inline u8 ip4_frag_ecn(u8 tos) | |||
86 | 86 | ||
87 | static struct inet_frags ip4_frags; | 87 | static struct inet_frags ip4_frags; |
88 | 88 | ||
89 | int ip_frag_nqueues(struct net *net) | ||
90 | { | ||
91 | return net->ipv4.frags.nqueues; | ||
92 | } | ||
93 | |||
94 | int ip_frag_mem(struct net *net) | 89 | int ip_frag_mem(struct net *net) |
95 | { | 90 | { |
96 | return sum_frag_mem_limit(&net->ipv4.frags); | 91 | return sum_frag_mem_limit(&net->ipv4.frags); |
@@ -109,21 +104,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) | |||
109 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); | 104 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); |
110 | return jhash_3words((__force u32)id << 16 | prot, | 105 | return jhash_3words((__force u32)id << 16 | prot, |
111 | (__force u32)saddr, (__force u32)daddr, | 106 | (__force u32)saddr, (__force u32)daddr, |
112 | ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); | 107 | ip4_frags.rnd); |
113 | } | 108 | } |
114 | 109 | ||
115 | static unsigned int ip4_hashfn(struct inet_frag_queue *q) | 110 | static unsigned int ip4_hashfn(const struct inet_frag_queue *q) |
116 | { | 111 | { |
117 | struct ipq *ipq; | 112 | const struct ipq *ipq; |
118 | 113 | ||
119 | ipq = container_of(q, struct ipq, q); | 114 | ipq = container_of(q, struct ipq, q); |
120 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); | 115 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
121 | } | 116 | } |
122 | 117 | ||
123 | static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | 118 | static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) |
124 | { | 119 | { |
125 | struct ipq *qp; | 120 | const struct ipq *qp; |
126 | struct ip4_create_arg *arg = a; | 121 | const struct ip4_create_arg *arg = a; |
127 | 122 | ||
128 | qp = container_of(q, struct ipq, q); | 123 | qp = container_of(q, struct ipq, q); |
129 | return qp->id == arg->iph->id && | 124 | return qp->id == arg->iph->id && |
@@ -133,14 +128,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
133 | qp->user == arg->user; | 128 | qp->user == arg->user; |
134 | } | 129 | } |
135 | 130 | ||
136 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) | 131 | static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
137 | { | 132 | { |
138 | struct ipq *qp = container_of(q, struct ipq, q); | 133 | struct ipq *qp = container_of(q, struct ipq, q); |
139 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, | 134 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, |
140 | frags); | 135 | frags); |
141 | struct net *net = container_of(ipv4, struct net, ipv4); | 136 | struct net *net = container_of(ipv4, struct net, ipv4); |
142 | 137 | ||
143 | struct ip4_create_arg *arg = a; | 138 | const struct ip4_create_arg *arg = a; |
144 | 139 | ||
145 | qp->protocol = arg->iph->protocol; | 140 | qp->protocol = arg->iph->protocol; |
146 | qp->id = arg->iph->id; | 141 | qp->id = arg->iph->id; |
@@ -177,18 +172,6 @@ static void ipq_kill(struct ipq *ipq) | |||
177 | inet_frag_kill(&ipq->q, &ip4_frags); | 172 | inet_frag_kill(&ipq->q, &ip4_frags); |
178 | } | 173 | } |
179 | 174 | ||
180 | /* Memory limiting on fragments. Evictor trashes the oldest | ||
181 | * fragment queue until we are back under the threshold. | ||
182 | */ | ||
183 | static void ip_evictor(struct net *net) | ||
184 | { | ||
185 | int evicted; | ||
186 | |||
187 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); | ||
188 | if (evicted) | ||
189 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); | ||
190 | } | ||
191 | |||
192 | /* | 175 | /* |
193 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. | 176 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. |
194 | */ | 177 | */ |
@@ -207,7 +190,8 @@ static void ip_expire(unsigned long arg) | |||
207 | 190 | ||
208 | ipq_kill(qp); | 191 | ipq_kill(qp); |
209 | 192 | ||
210 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | 193 | if (!(qp->q.last_in & INET_FRAG_EVICTED)) |
194 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
211 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 195 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
212 | 196 | ||
213 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 197 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
@@ -260,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) | |||
260 | arg.iph = iph; | 244 | arg.iph = iph; |
261 | arg.user = user; | 245 | arg.user = user; |
262 | 246 | ||
263 | read_lock(&ip4_frags.lock); | ||
264 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); | 247 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
265 | 248 | ||
266 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); | 249 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
@@ -505,7 +488,6 @@ found: | |||
505 | } | 488 | } |
506 | 489 | ||
507 | skb_dst_drop(skb); | 490 | skb_dst_drop(skb); |
508 | inet_frag_lru_move(&qp->q); | ||
509 | return -EINPROGRESS; | 491 | return -EINPROGRESS; |
510 | 492 | ||
511 | err: | 493 | err: |
@@ -655,9 +637,6 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
655 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); | 637 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); |
656 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); | 638 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); |
657 | 639 | ||
658 | /* Start by cleaning up the memory. */ | ||
659 | ip_evictor(net); | ||
660 | |||
661 | /* Lookup (or create) queue header */ | 640 | /* Lookup (or create) queue header */ |
662 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { | 641 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { |
663 | int ret; | 642 | int ret; |
@@ -721,14 +700,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
721 | .data = &init_net.ipv4.frags.high_thresh, | 700 | .data = &init_net.ipv4.frags.high_thresh, |
722 | .maxlen = sizeof(int), | 701 | .maxlen = sizeof(int), |
723 | .mode = 0644, | 702 | .mode = 0644, |
724 | .proc_handler = proc_dointvec | 703 | .proc_handler = proc_dointvec_minmax, |
704 | .extra1 = &init_net.ipv4.frags.low_thresh | ||
725 | }, | 705 | }, |
726 | { | 706 | { |
727 | .procname = "ipfrag_low_thresh", | 707 | .procname = "ipfrag_low_thresh", |
728 | .data = &init_net.ipv4.frags.low_thresh, | 708 | .data = &init_net.ipv4.frags.low_thresh, |
729 | .maxlen = sizeof(int), | 709 | .maxlen = sizeof(int), |
730 | .mode = 0644, | 710 | .mode = 0644, |
731 | .proc_handler = proc_dointvec | 711 | .proc_handler = proc_dointvec_minmax, |
712 | .extra1 = &zero, | ||
713 | .extra2 = &init_net.ipv4.frags.high_thresh | ||
732 | }, | 714 | }, |
733 | { | 715 | { |
734 | .procname = "ipfrag_time", | 716 | .procname = "ipfrag_time", |
@@ -740,10 +722,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
740 | { } | 722 | { } |
741 | }; | 723 | }; |
742 | 724 | ||
725 | /* secret interval has been deprecated */ | ||
726 | static int ip4_frags_secret_interval_unused; | ||
743 | static struct ctl_table ip4_frags_ctl_table[] = { | 727 | static struct ctl_table ip4_frags_ctl_table[] = { |
744 | { | 728 | { |
745 | .procname = "ipfrag_secret_interval", | 729 | .procname = "ipfrag_secret_interval", |
746 | .data = &ip4_frags.secret_interval, | 730 | .data = &ip4_frags_secret_interval_unused, |
747 | .maxlen = sizeof(int), | 731 | .maxlen = sizeof(int), |
748 | .mode = 0644, | 732 | .mode = 0644, |
749 | .proc_handler = proc_dointvec_jiffies, | 733 | .proc_handler = proc_dointvec_jiffies, |
@@ -771,7 +755,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) | |||
771 | goto err_alloc; | 755 | goto err_alloc; |
772 | 756 | ||
773 | table[0].data = &net->ipv4.frags.high_thresh; | 757 | table[0].data = &net->ipv4.frags.high_thresh; |
758 | table[0].extra1 = &net->ipv4.frags.low_thresh; | ||
759 | table[0].extra2 = &init_net.ipv4.frags.high_thresh; | ||
774 | table[1].data = &net->ipv4.frags.low_thresh; | 760 | table[1].data = &net->ipv4.frags.low_thresh; |
761 | table[1].extra2 = &net->ipv4.frags.high_thresh; | ||
775 | table[2].data = &net->ipv4.frags.timeout; | 762 | table[2].data = &net->ipv4.frags.timeout; |
776 | 763 | ||
777 | /* Don't export sysctls to unprivileged users */ | 764 | /* Don't export sysctls to unprivileged users */ |
@@ -873,6 +860,5 @@ void __init ipfrag_init(void) | |||
873 | ip4_frags.qsize = sizeof(struct ipq); | 860 | ip4_frags.qsize = sizeof(struct ipq); |
874 | ip4_frags.match = ip4_frag_match; | 861 | ip4_frags.match = ip4_frag_match; |
875 | ip4_frags.frag_expire = ip_expire; | 862 | ip4_frags.frag_expire = ip_expire; |
876 | ip4_frags.secret_interval = 10 * 60 * HZ; | ||
877 | inet_frags_init(&ip4_frags); | 863 | inet_frags_init(&ip4_frags); |
878 | } | 864 | } |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 5e7aecea05cd..ad382499bace 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net, | |||
288 | optptr++; | 288 | optptr++; |
289 | continue; | 289 | continue; |
290 | } | 290 | } |
291 | if (unlikely(l < 2)) { | ||
292 | pp_ptr = optptr; | ||
293 | goto error; | ||
294 | } | ||
291 | optlen = optptr[1]; | 295 | optlen = optptr[1]; |
292 | if (optlen < 2 || optlen > l) { | 296 | if (optlen < 2 || optlen > l) { |
293 | pp_ptr = optptr; | 297 | pp_ptr = optptr; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d3b6b0e9857..b16556836d66 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -962,10 +962,6 @@ alloc_new_skb: | |||
962 | sk->sk_allocation); | 962 | sk->sk_allocation); |
963 | if (unlikely(skb == NULL)) | 963 | if (unlikely(skb == NULL)) |
964 | err = -ENOBUFS; | 964 | err = -ENOBUFS; |
965 | else | ||
966 | /* only the initial fragment is | ||
967 | time stamped */ | ||
968 | cork->tx_flags = 0; | ||
969 | } | 965 | } |
970 | if (skb == NULL) | 966 | if (skb == NULL) |
971 | goto error; | 967 | goto error; |
@@ -976,7 +972,10 @@ alloc_new_skb: | |||
976 | skb->ip_summed = csummode; | 972 | skb->ip_summed = csummode; |
977 | skb->csum = 0; | 973 | skb->csum = 0; |
978 | skb_reserve(skb, hh_len); | 974 | skb_reserve(skb, hh_len); |
975 | |||
976 | /* only the initial fragment is time stamped */ | ||
979 | skb_shinfo(skb)->tx_flags = cork->tx_flags; | 977 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
978 | cork->tx_flags = 0; | ||
980 | 979 | ||
981 | /* | 980 | /* |
982 | * Find where to start putting bytes. | 981 | * Find where to start putting bytes. |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 64741b938632..5cb830c78990 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -1319,7 +1319,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1319 | if (sk->sk_type != SOCK_STREAM) | 1319 | if (sk->sk_type != SOCK_STREAM) |
1320 | return -ENOPROTOOPT; | 1320 | return -ENOPROTOOPT; |
1321 | 1321 | ||
1322 | msg.msg_control = optval; | 1322 | msg.msg_control = (__force void *) optval; |
1323 | msg.msg_controllen = len; | 1323 | msg.msg_controllen = len; |
1324 | msg.msg_flags = flags; | 1324 | msg.msg_flags = flags; |
1325 | 1325 | ||
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 097b3e7c1e8f..dd8c8c765799 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst, | |||
73 | { | 73 | { |
74 | struct dst_entry *old_dst; | 74 | struct dst_entry *old_dst; |
75 | 75 | ||
76 | if (dst) { | 76 | dst_clone(dst); |
77 | if (dst->flags & DST_NOCACHE) | ||
78 | dst = NULL; | ||
79 | else | ||
80 | dst_clone(dst); | ||
81 | } | ||
82 | old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); | 77 | old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); |
83 | dst_release(old_dst); | 78 | dst_release(old_dst); |
84 | } | 79 | } |
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) | |||
108 | 103 | ||
109 | rcu_read_lock(); | 104 | rcu_read_lock(); |
110 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); | 105 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); |
106 | if (dst && !atomic_inc_not_zero(&dst->__refcnt)) | ||
107 | dst = NULL; | ||
111 | if (dst) { | 108 | if (dst) { |
112 | if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 109 | if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { |
113 | rcu_read_unlock(); | ||
114 | tunnel_dst_reset(t); | 110 | tunnel_dst_reset(t); |
115 | return NULL; | 111 | dst_release(dst); |
112 | dst = NULL; | ||
116 | } | 113 | } |
117 | dst_hold(dst); | ||
118 | } | 114 | } |
119 | rcu_read_unlock(); | 115 | rcu_read_unlock(); |
120 | return (struct rtable *)dst; | 116 | return (struct rtable *)dst; |
@@ -173,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |||
173 | 169 | ||
174 | hlist_for_each_entry_rcu(t, head, hash_node) { | 170 | hlist_for_each_entry_rcu(t, head, hash_node) { |
175 | if (remote != t->parms.iph.daddr || | 171 | if (remote != t->parms.iph.daddr || |
172 | t->parms.iph.saddr != 0 || | ||
176 | !(t->dev->flags & IFF_UP)) | 173 | !(t->dev->flags & IFF_UP)) |
177 | continue; | 174 | continue; |
178 | 175 | ||
@@ -189,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |||
189 | head = &itn->tunnels[hash]; | 186 | head = &itn->tunnels[hash]; |
190 | 187 | ||
191 | hlist_for_each_entry_rcu(t, head, hash_node) { | 188 | hlist_for_each_entry_rcu(t, head, hash_node) { |
192 | if ((local != t->parms.iph.saddr && | 189 | if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && |
193 | (local != t->parms.iph.daddr || | 190 | (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) |
194 | !ipv4_is_multicast(local))) || | 191 | continue; |
195 | !(t->dev->flags & IFF_UP)) | 192 | |
193 | if (!(t->dev->flags & IFF_UP)) | ||
196 | continue; | 194 | continue; |
197 | 195 | ||
198 | if (!ip_tunnel_key_match(&t->parms, flags, key)) | 196 | if (!ip_tunnel_key_match(&t->parms, flags, key)) |
@@ -209,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, | |||
209 | 207 | ||
210 | hlist_for_each_entry_rcu(t, head, hash_node) { | 208 | hlist_for_each_entry_rcu(t, head, hash_node) { |
211 | if (t->parms.i_key != key || | 209 | if (t->parms.i_key != key || |
210 | t->parms.iph.saddr != 0 || | ||
211 | t->parms.iph.daddr != 0 || | ||
212 | !(t->dev->flags & IFF_UP)) | 212 | !(t->dev->flags & IFF_UP)) |
213 | continue; | 213 | continue; |
214 | 214 | ||
@@ -305,7 +305,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, | |||
305 | } | 305 | } |
306 | 306 | ||
307 | ASSERT_RTNL(); | 307 | ASSERT_RTNL(); |
308 | dev = alloc_netdev(ops->priv_size, name, ops->setup); | 308 | dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); |
309 | if (!dev) { | 309 | if (!dev) { |
310 | err = -ENOMEM; | 310 | err = -ENOMEM; |
311 | goto failed; | 311 | goto failed; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b3e86ea7b71b..5bbef4fdcb43 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ | |||
143 | __be32 root_server_addr = NONE; /* Address of NFS server */ | 143 | __be32 root_server_addr = NONE; /* Address of NFS server */ |
144 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ | 144 | u8 root_server_path[256] = { 0, }; /* Path to mount as root */ |
145 | 145 | ||
146 | __be32 ic_dev_xid; /* Device under configuration */ | ||
147 | |||
148 | /* vendor class identifier */ | 146 | /* vendor class identifier */ |
149 | static char vendor_class_identifier[253] __initdata; | 147 | static char vendor_class_identifier[253] __initdata; |
150 | 148 | ||
@@ -654,6 +652,7 @@ static struct packet_type bootp_packet_type __initdata = { | |||
654 | .func = ic_bootp_recv, | 652 | .func = ic_bootp_recv, |
655 | }; | 653 | }; |
656 | 654 | ||
655 | static __be32 ic_dev_xid; /* Device under configuration */ | ||
657 | 656 | ||
658 | /* | 657 | /* |
659 | * Initialize DHCP/BOOTP extension fields in the request. | 658 | * Initialize DHCP/BOOTP extension fields in the request. |
@@ -1218,10 +1217,10 @@ static int __init ic_dynamic(void) | |||
1218 | get_random_bytes(&timeout, sizeof(timeout)); | 1217 | get_random_bytes(&timeout, sizeof(timeout)); |
1219 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); | 1218 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); |
1220 | for (;;) { | 1219 | for (;;) { |
1220 | #ifdef IPCONFIG_BOOTP | ||
1221 | /* Track the device we are configuring */ | 1221 | /* Track the device we are configuring */ |
1222 | ic_dev_xid = d->xid; | 1222 | ic_dev_xid = d->xid; |
1223 | 1223 | ||
1224 | #ifdef IPCONFIG_BOOTP | ||
1225 | if (do_bootp && (d->able & IC_BOOTP)) | 1224 | if (do_bootp && (d->able & IC_BOOTP)) |
1226 | ic_bootp_send_if(d, jiffies - start_jiffies); | 1225 | ic_bootp_send_if(d, jiffies - start_jiffies); |
1227 | #endif | 1226 | #endif |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 65bcaa789043..c8034587859d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) | |||
500 | else | 500 | else |
501 | sprintf(name, "pimreg%u", mrt->id); | 501 | sprintf(name, "pimreg%u", mrt->id); |
502 | 502 | ||
503 | dev = alloc_netdev(0, name, reg_vif_setup); | 503 | dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); |
504 | 504 | ||
505 | if (dev == NULL) | 505 | if (dev == NULL) |
506 | return NULL; | 506 | return NULL; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a26ce035e3fa..fb173126f03d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -36,6 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT | |||
36 | 36 | ||
37 | If unsure, say Y. | 37 | If unsure, say Y. |
38 | 38 | ||
39 | config NF_LOG_ARP | ||
40 | tristate "ARP packet logging" | ||
41 | default m if NETFILTER_ADVANCED=n | ||
42 | select NF_LOG_COMMON | ||
43 | |||
44 | config NF_LOG_IPV4 | ||
45 | tristate "IPv4 packet logging" | ||
46 | default m if NETFILTER_ADVANCED=n | ||
47 | select NF_LOG_COMMON | ||
48 | |||
39 | config NF_TABLES_IPV4 | 49 | config NF_TABLES_IPV4 |
40 | depends on NF_TABLES | 50 | depends on NF_TABLES |
41 | tristate "IPv4 nf_tables support" | 51 | tristate "IPv4 nf_tables support" |
@@ -159,25 +169,6 @@ config IP_NF_TARGET_SYNPROXY | |||
159 | 169 | ||
160 | To compile it as a module, choose M here. If unsure, say N. | 170 | To compile it as a module, choose M here. If unsure, say N. |
161 | 171 | ||
162 | config IP_NF_TARGET_ULOG | ||
163 | tristate "ULOG target support (obsolete)" | ||
164 | default m if NETFILTER_ADVANCED=n | ||
165 | ---help--- | ||
166 | |||
167 | This option enables the old IPv4-only "ipt_ULOG" implementation | ||
168 | which has been obsoleted by the new "nfnetlink_log" code (see | ||
169 | CONFIG_NETFILTER_NETLINK_LOG). | ||
170 | |||
171 | This option adds a `ULOG' target, which allows you to create rules in | ||
172 | any iptables table. The packet is passed to a userspace logging | ||
173 | daemon using netlink multicast sockets; unlike the LOG target | ||
174 | which can only be viewed through syslog. | ||
175 | |||
176 | The appropriate userspace logging daemon (ulogd) may be obtained from | ||
177 | <http://www.netfilter.org/projects/ulogd/index.html> | ||
178 | |||
179 | To compile it as a module, choose M here. If unsure, say N. | ||
180 | |||
181 | # NAT + specific targets: nf_conntrack | 172 | # NAT + specific targets: nf_conntrack |
182 | config NF_NAT_IPV4 | 173 | config NF_NAT_IPV4 |
183 | tristate "IPv4 NAT" | 174 | tristate "IPv4 NAT" |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 90b82405331e..245db9df3337 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -19,6 +19,10 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o | |||
19 | # defrag | 19 | # defrag |
20 | obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o | 20 | obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o |
21 | 21 | ||
22 | # logging | ||
23 | obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o | ||
24 | obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o | ||
25 | |||
22 | # NAT helpers (nf_conntrack) | 26 | # NAT helpers (nf_conntrack) |
23 | obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o | 27 | obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o |
24 | obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o | 28 | obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c deleted file mode 100644 index 9cb993cd224b..000000000000 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ /dev/null | |||
@@ -1,498 +0,0 @@ | |||
1 | /* | ||
2 | * netfilter module for userspace packet logging daemons | ||
3 | * | ||
4 | * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> | ||
5 | * (C) 1999-2001 Paul `Rusty' Russell | ||
6 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * (C) 2005-2007 Patrick McHardy <kaber@trash.net> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | * | ||
13 | * This module accepts two parameters: | ||
14 | * | ||
15 | * nlbufsiz: | ||
16 | * The parameter specifies how big the buffer for each netlink multicast | ||
17 | * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will | ||
18 | * get accumulated in the kernel until they are sent to userspace. It is | ||
19 | * NOT possible to allocate more than 128kB, and it is strongly discouraged, | ||
20 | * because atomically allocating 128kB inside the network rx softirq is not | ||
21 | * reliable. Please also keep in mind that this buffer size is allocated for | ||
22 | * each nlgroup you are using, so the total kernel memory usage increases | ||
23 | * by that factor. | ||
24 | * | ||
25 | * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since | ||
26 | * nlbufsiz is used with alloc_skb, which adds another | ||
27 | * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead. | ||
28 | * | ||
29 | * flushtimeout: | ||
30 | * Specify, after how many hundredths of a second the queue should be | ||
31 | * flushed even if it is not full yet. | ||
32 | */ | ||
33 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/socket.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/skbuff.h> | ||
39 | #include <linux/kernel.h> | ||
40 | #include <linux/timer.h> | ||
41 | #include <net/netlink.h> | ||
42 | #include <linux/netdevice.h> | ||
43 | #include <linux/mm.h> | ||
44 | #include <linux/moduleparam.h> | ||
45 | #include <linux/netfilter.h> | ||
46 | #include <linux/netfilter/x_tables.h> | ||
47 | #include <linux/netfilter_ipv4/ipt_ULOG.h> | ||
48 | #include <net/netfilter/nf_log.h> | ||
49 | #include <net/netns/generic.h> | ||
50 | #include <net/sock.h> | ||
51 | #include <linux/bitops.h> | ||
52 | #include <asm/unaligned.h> | ||
53 | |||
54 | MODULE_LICENSE("GPL"); | ||
55 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
56 | MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG"); | ||
57 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); | ||
58 | |||
59 | #define ULOG_NL_EVENT 111 /* Harald's favorite number */ | ||
60 | #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ | ||
61 | |||
62 | static unsigned int nlbufsiz = NLMSG_GOODSIZE; | ||
63 | module_param(nlbufsiz, uint, 0400); | ||
64 | MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); | ||
65 | |||
66 | static unsigned int flushtimeout = 10; | ||
67 | module_param(flushtimeout, uint, 0600); | ||
68 | MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); | ||
69 | |||
70 | static bool nflog = true; | ||
71 | module_param(nflog, bool, 0400); | ||
72 | MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); | ||
73 | |||
74 | /* global data structures */ | ||
75 | |||
76 | typedef struct { | ||
77 | unsigned int qlen; /* number of nlmsgs' in the skb */ | ||
78 | struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ | ||
79 | struct sk_buff *skb; /* the pre-allocated skb */ | ||
80 | struct timer_list timer; /* the timer function */ | ||
81 | } ulog_buff_t; | ||
82 | |||
83 | static int ulog_net_id __read_mostly; | ||
84 | struct ulog_net { | ||
85 | unsigned int nlgroup[ULOG_MAXNLGROUPS]; | ||
86 | ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; | ||
87 | struct sock *nflognl; | ||
88 | spinlock_t lock; | ||
89 | }; | ||
90 | |||
91 | static struct ulog_net *ulog_pernet(struct net *net) | ||
92 | { | ||
93 | return net_generic(net, ulog_net_id); | ||
94 | } | ||
95 | |||
96 | /* send one ulog_buff_t to userspace */ | ||
97 | static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) | ||
98 | { | ||
99 | ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; | ||
100 | |||
101 | pr_debug("ulog_send: timer is deleting\n"); | ||
102 | del_timer(&ub->timer); | ||
103 | |||
104 | if (!ub->skb) { | ||
105 | pr_debug("ulog_send: nothing to send\n"); | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | /* last nlmsg needs NLMSG_DONE */ | ||
110 | if (ub->qlen > 1) | ||
111 | ub->lastnlh->nlmsg_type = NLMSG_DONE; | ||
112 | |||
113 | NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; | ||
114 | pr_debug("throwing %d packets to netlink group %u\n", | ||
115 | ub->qlen, nlgroupnum + 1); | ||
116 | netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, | ||
117 | GFP_ATOMIC); | ||
118 | |||
119 | ub->qlen = 0; | ||
120 | ub->skb = NULL; | ||
121 | ub->lastnlh = NULL; | ||
122 | } | ||
123 | |||
124 | |||
125 | /* timer function to flush queue in flushtimeout time */ | ||
126 | static void ulog_timer(unsigned long data) | ||
127 | { | ||
128 | unsigned int groupnum = *((unsigned int *)data); | ||
129 | struct ulog_net *ulog = container_of((void *)data, | ||
130 | struct ulog_net, | ||
131 | nlgroup[groupnum]); | ||
132 | pr_debug("timer function called, calling ulog_send\n"); | ||
133 | |||
134 | /* lock to protect against somebody modifying our structure | ||
135 | * from ipt_ulog_target at the same time */ | ||
136 | spin_lock_bh(&ulog->lock); | ||
137 | ulog_send(ulog, groupnum); | ||
138 | spin_unlock_bh(&ulog->lock); | ||
139 | } | ||
140 | |||
141 | static struct sk_buff *ulog_alloc_skb(unsigned int size) | ||
142 | { | ||
143 | struct sk_buff *skb; | ||
144 | unsigned int n; | ||
145 | |||
146 | /* alloc skb which should be big enough for a whole | ||
147 | * multipart message. WARNING: has to be <= 131000 | ||
148 | * due to slab allocator restrictions */ | ||
149 | |||
150 | n = max(size, nlbufsiz); | ||
151 | skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); | ||
152 | if (!skb) { | ||
153 | if (n > size) { | ||
154 | /* try to allocate only as much as we need for | ||
155 | * current packet */ | ||
156 | |||
157 | skb = alloc_skb(size, GFP_ATOMIC); | ||
158 | if (!skb) | ||
159 | pr_debug("cannot even allocate %ub\n", size); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | return skb; | ||
164 | } | ||
165 | |||
166 | static void ipt_ulog_packet(struct net *net, | ||
167 | unsigned int hooknum, | ||
168 | const struct sk_buff *skb, | ||
169 | const struct net_device *in, | ||
170 | const struct net_device *out, | ||
171 | const struct ipt_ulog_info *loginfo, | ||
172 | const char *prefix) | ||
173 | { | ||
174 | ulog_buff_t *ub; | ||
175 | ulog_packet_msg_t *pm; | ||
176 | size_t size, copy_len; | ||
177 | struct nlmsghdr *nlh; | ||
178 | struct timeval tv; | ||
179 | struct ulog_net *ulog = ulog_pernet(net); | ||
180 | |||
181 | /* ffs == find first bit set, necessary because userspace | ||
182 | * is already shifting groupnumber, but we need unshifted. | ||
183 | * ffs() returns [1..32], we need [0..31] */ | ||
184 | unsigned int groupnum = ffs(loginfo->nl_group) - 1; | ||
185 | |||
186 | /* calculate the size of the skb needed */ | ||
187 | if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) | ||
188 | copy_len = skb->len; | ||
189 | else | ||
190 | copy_len = loginfo->copy_range; | ||
191 | |||
192 | size = nlmsg_total_size(sizeof(*pm) + copy_len); | ||
193 | |||
194 | ub = &ulog->ulog_buffers[groupnum]; | ||
195 | |||
196 | spin_lock_bh(&ulog->lock); | ||
197 | |||
198 | if (!ub->skb) { | ||
199 | if (!(ub->skb = ulog_alloc_skb(size))) | ||
200 | goto alloc_failure; | ||
201 | } else if (ub->qlen >= loginfo->qthreshold || | ||
202 | size > skb_tailroom(ub->skb)) { | ||
203 | /* either the queue len is too high or we don't have | ||
204 | * enough room in nlskb left. send it to userspace. */ | ||
205 | |||
206 | ulog_send(ulog, groupnum); | ||
207 | |||
208 | if (!(ub->skb = ulog_alloc_skb(size))) | ||
209 | goto alloc_failure; | ||
210 | } | ||
211 | |||
212 | pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); | ||
213 | |||
214 | nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, | ||
215 | sizeof(*pm)+copy_len, 0); | ||
216 | if (!nlh) { | ||
217 | pr_debug("error during nlmsg_put\n"); | ||
218 | goto out_unlock; | ||
219 | } | ||
220 | ub->qlen++; | ||
221 | |||
222 | pm = nlmsg_data(nlh); | ||
223 | memset(pm, 0, sizeof(*pm)); | ||
224 | |||
225 | /* We might not have a timestamp, get one */ | ||
226 | if (skb->tstamp.tv64 == 0) | ||
227 | __net_timestamp((struct sk_buff *)skb); | ||
228 | |||
229 | /* copy hook, prefix, timestamp, payload, etc. */ | ||
230 | pm->data_len = copy_len; | ||
231 | tv = ktime_to_timeval(skb->tstamp); | ||
232 | put_unaligned(tv.tv_sec, &pm->timestamp_sec); | ||
233 | put_unaligned(tv.tv_usec, &pm->timestamp_usec); | ||
234 | put_unaligned(skb->mark, &pm->mark); | ||
235 | pm->hook = hooknum; | ||
236 | if (prefix != NULL) { | ||
237 | strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); | ||
238 | pm->prefix[sizeof(pm->prefix) - 1] = '\0'; | ||
239 | } | ||
240 | else if (loginfo->prefix[0] != '\0') | ||
241 | strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); | ||
242 | |||
243 | if (in && in->hard_header_len > 0 && | ||
244 | skb->mac_header != skb->network_header && | ||
245 | in->hard_header_len <= ULOG_MAC_LEN) { | ||
246 | memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); | ||
247 | pm->mac_len = in->hard_header_len; | ||
248 | } else | ||
249 | pm->mac_len = 0; | ||
250 | |||
251 | if (in) | ||
252 | strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); | ||
253 | |||
254 | if (out) | ||
255 | strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); | ||
256 | |||
257 | /* copy_len <= skb->len, so can't fail. */ | ||
258 | if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) | ||
259 | BUG(); | ||
260 | |||
261 | /* check if we are building multi-part messages */ | ||
262 | if (ub->qlen > 1) | ||
263 | ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; | ||
264 | |||
265 | ub->lastnlh = nlh; | ||
266 | |||
267 | /* if timer isn't already running, start it */ | ||
268 | if (!timer_pending(&ub->timer)) { | ||
269 | ub->timer.expires = jiffies + flushtimeout * HZ / 100; | ||
270 | add_timer(&ub->timer); | ||
271 | } | ||
272 | |||
273 | /* if threshold is reached, send message to userspace */ | ||
274 | if (ub->qlen >= loginfo->qthreshold) { | ||
275 | if (loginfo->qthreshold > 1) | ||
276 | nlh->nlmsg_type = NLMSG_DONE; | ||
277 | ulog_send(ulog, groupnum); | ||
278 | } | ||
279 | out_unlock: | ||
280 | spin_unlock_bh(&ulog->lock); | ||
281 | |||
282 | return; | ||
283 | |||
284 | alloc_failure: | ||
285 | pr_debug("Error building netlink message\n"); | ||
286 | spin_unlock_bh(&ulog->lock); | ||
287 | } | ||
288 | |||
289 | static unsigned int | ||
290 | ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
291 | { | ||
292 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
293 | |||
294 | ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out, | ||
295 | par->targinfo, NULL); | ||
296 | return XT_CONTINUE; | ||
297 | } | ||
298 | |||
299 | static void ipt_logfn(struct net *net, | ||
300 | u_int8_t pf, | ||
301 | unsigned int hooknum, | ||
302 | const struct sk_buff *skb, | ||
303 | const struct net_device *in, | ||
304 | const struct net_device *out, | ||
305 | const struct nf_loginfo *li, | ||
306 | const char *prefix) | ||
307 | { | ||
308 | struct ipt_ulog_info loginfo; | ||
309 | |||
310 | if (!li || li->type != NF_LOG_TYPE_ULOG) { | ||
311 | loginfo.nl_group = ULOG_DEFAULT_NLGROUP; | ||
312 | loginfo.copy_range = 0; | ||
313 | loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; | ||
314 | loginfo.prefix[0] = '\0'; | ||
315 | } else { | ||
316 | loginfo.nl_group = li->u.ulog.group; | ||
317 | loginfo.copy_range = li->u.ulog.copy_len; | ||
318 | loginfo.qthreshold = li->u.ulog.qthreshold; | ||
319 | strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); | ||
320 | } | ||
321 | |||
322 | ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); | ||
323 | } | ||
324 | |||
325 | static int ulog_tg_check(const struct xt_tgchk_param *par) | ||
326 | { | ||
327 | const struct ipt_ulog_info *loginfo = par->targinfo; | ||
328 | |||
329 | if (!par->net->xt.ulog_warn_deprecated) { | ||
330 | pr_info("ULOG is deprecated and it will be removed soon, " | ||
331 | "use NFLOG instead\n"); | ||
332 | par->net->xt.ulog_warn_deprecated = true; | ||
333 | } | ||
334 | |||
335 | if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { | ||
336 | pr_debug("prefix not null-terminated\n"); | ||
337 | return -EINVAL; | ||
338 | } | ||
339 | if (loginfo->qthreshold > ULOG_MAX_QLEN) { | ||
340 | pr_debug("queue threshold %Zu > MAX_QLEN\n", | ||
341 | loginfo->qthreshold); | ||
342 | return -EINVAL; | ||
343 | } | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | #ifdef CONFIG_COMPAT | ||
348 | struct compat_ipt_ulog_info { | ||
349 | compat_uint_t nl_group; | ||
350 | compat_size_t copy_range; | ||
351 | compat_size_t qthreshold; | ||
352 | char prefix[ULOG_PREFIX_LEN]; | ||
353 | }; | ||
354 | |||
355 | static void ulog_tg_compat_from_user(void *dst, const void *src) | ||
356 | { | ||
357 | const struct compat_ipt_ulog_info *cl = src; | ||
358 | struct ipt_ulog_info l = { | ||
359 | .nl_group = cl->nl_group, | ||
360 | .copy_range = cl->copy_range, | ||
361 | .qthreshold = cl->qthreshold, | ||
362 | }; | ||
363 | |||
364 | memcpy(l.prefix, cl->prefix, sizeof(l.prefix)); | ||
365 | memcpy(dst, &l, sizeof(l)); | ||
366 | } | ||
367 | |||
368 | static int ulog_tg_compat_to_user(void __user *dst, const void *src) | ||
369 | { | ||
370 | const struct ipt_ulog_info *l = src; | ||
371 | struct compat_ipt_ulog_info cl = { | ||
372 | .nl_group = l->nl_group, | ||
373 | .copy_range = l->copy_range, | ||
374 | .qthreshold = l->qthreshold, | ||
375 | }; | ||
376 | |||
377 | memcpy(cl.prefix, l->prefix, sizeof(cl.prefix)); | ||
378 | return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0; | ||
379 | } | ||
380 | #endif /* CONFIG_COMPAT */ | ||
381 | |||
382 | static struct xt_target ulog_tg_reg __read_mostly = { | ||
383 | .name = "ULOG", | ||
384 | .family = NFPROTO_IPV4, | ||
385 | .target = ulog_tg, | ||
386 | .targetsize = sizeof(struct ipt_ulog_info), | ||
387 | .checkentry = ulog_tg_check, | ||
388 | #ifdef CONFIG_COMPAT | ||
389 | .compatsize = sizeof(struct compat_ipt_ulog_info), | ||
390 | .compat_from_user = ulog_tg_compat_from_user, | ||
391 | .compat_to_user = ulog_tg_compat_to_user, | ||
392 | #endif | ||
393 | .me = THIS_MODULE, | ||
394 | }; | ||
395 | |||
396 | static struct nf_logger ipt_ulog_logger __read_mostly = { | ||
397 | .name = "ipt_ULOG", | ||
398 | .logfn = ipt_logfn, | ||
399 | .me = THIS_MODULE, | ||
400 | }; | ||
401 | |||
402 | static int __net_init ulog_tg_net_init(struct net *net) | ||
403 | { | ||
404 | int i; | ||
405 | struct ulog_net *ulog = ulog_pernet(net); | ||
406 | struct netlink_kernel_cfg cfg = { | ||
407 | .groups = ULOG_MAXNLGROUPS, | ||
408 | }; | ||
409 | |||
410 | spin_lock_init(&ulog->lock); | ||
411 | /* initialize ulog_buffers */ | ||
412 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) { | ||
413 | ulog->nlgroup[i] = i; | ||
414 | setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, | ||
415 | (unsigned long)&ulog->nlgroup[i]); | ||
416 | } | ||
417 | |||
418 | ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); | ||
419 | if (!ulog->nflognl) | ||
420 | return -ENOMEM; | ||
421 | |||
422 | if (nflog) | ||
423 | nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); | ||
424 | |||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static void __net_exit ulog_tg_net_exit(struct net *net) | ||
429 | { | ||
430 | ulog_buff_t *ub; | ||
431 | int i; | ||
432 | struct ulog_net *ulog = ulog_pernet(net); | ||
433 | |||
434 | if (nflog) | ||
435 | nf_log_unset(net, &ipt_ulog_logger); | ||
436 | |||
437 | netlink_kernel_release(ulog->nflognl); | ||
438 | |||
439 | /* remove pending timers and free allocated skb's */ | ||
440 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) { | ||
441 | ub = &ulog->ulog_buffers[i]; | ||
442 | pr_debug("timer is deleting\n"); | ||
443 | del_timer(&ub->timer); | ||
444 | |||
445 | if (ub->skb) { | ||
446 | kfree_skb(ub->skb); | ||
447 | ub->skb = NULL; | ||
448 | } | ||
449 | } | ||
450 | } | ||
451 | |||
452 | static struct pernet_operations ulog_tg_net_ops = { | ||
453 | .init = ulog_tg_net_init, | ||
454 | .exit = ulog_tg_net_exit, | ||
455 | .id = &ulog_net_id, | ||
456 | .size = sizeof(struct ulog_net), | ||
457 | }; | ||
458 | |||
459 | static int __init ulog_tg_init(void) | ||
460 | { | ||
461 | int ret; | ||
462 | pr_debug("init module\n"); | ||
463 | |||
464 | if (nlbufsiz > 128*1024) { | ||
465 | pr_warn("Netlink buffer has to be <= 128kB\n"); | ||
466 | return -EINVAL; | ||
467 | } | ||
468 | |||
469 | ret = register_pernet_subsys(&ulog_tg_net_ops); | ||
470 | if (ret) | ||
471 | goto out_pernet; | ||
472 | |||
473 | ret = xt_register_target(&ulog_tg_reg); | ||
474 | if (ret < 0) | ||
475 | goto out_target; | ||
476 | |||
477 | if (nflog) | ||
478 | nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); | ||
479 | |||
480 | return 0; | ||
481 | |||
482 | out_target: | ||
483 | unregister_pernet_subsys(&ulog_tg_net_ops); | ||
484 | out_pernet: | ||
485 | return ret; | ||
486 | } | ||
487 | |||
488 | static void __exit ulog_tg_exit(void) | ||
489 | { | ||
490 | pr_debug("cleanup_module\n"); | ||
491 | if (nflog) | ||
492 | nf_log_unregister(&ipt_ulog_logger); | ||
493 | xt_unregister_target(&ulog_tg_reg); | ||
494 | unregister_pernet_subsys(&ulog_tg_net_ops); | ||
495 | } | ||
496 | |||
497 | module_init(ulog_tg_init); | ||
498 | module_exit(ulog_tg_exit); | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8127dc802865..4ce44c4bc57b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -314,7 +314,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
314 | return -ENOENT; | 314 | return -ENOENT; |
315 | } | 315 | } |
316 | 316 | ||
317 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 317 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
318 | 318 | ||
319 | #include <linux/netfilter/nfnetlink.h> | 319 | #include <linux/netfilter/nfnetlink.h> |
320 | #include <linux/netfilter/nfnetlink_conntrack.h> | 320 | #include <linux/netfilter/nfnetlink_conntrack.h> |
@@ -388,7 +388,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | |||
388 | .invert_tuple = ipv4_invert_tuple, | 388 | .invert_tuple = ipv4_invert_tuple, |
389 | .print_tuple = ipv4_print_tuple, | 389 | .print_tuple = ipv4_print_tuple, |
390 | .get_l4proto = ipv4_get_l4proto, | 390 | .get_l4proto = ipv4_get_l4proto, |
391 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 391 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
392 | .tuple_to_nlattr = ipv4_tuple_to_nlattr, | 392 | .tuple_to_nlattr = ipv4_tuple_to_nlattr, |
393 | .nlattr_tuple_size = ipv4_nlattr_tuple_size, | 393 | .nlattr_tuple_size = ipv4_nlattr_tuple_size, |
394 | .nlattr_to_tuple = ipv4_nlattr_to_tuple, | 394 | .nlattr_to_tuple = ipv4_nlattr_to_tuple, |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a338dad41b7d..b91b2641adda 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -226,7 +226,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, | |||
226 | return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); | 226 | return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); |
227 | } | 227 | } |
228 | 228 | ||
229 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 229 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
230 | 230 | ||
231 | #include <linux/netfilter/nfnetlink.h> | 231 | #include <linux/netfilter/nfnetlink.h> |
232 | #include <linux/netfilter/nfnetlink_conntrack.h> | 232 | #include <linux/netfilter/nfnetlink_conntrack.h> |
@@ -408,7 +408,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = | |||
408 | .error = icmp_error, | 408 | .error = icmp_error, |
409 | .destroy = NULL, | 409 | .destroy = NULL, |
410 | .me = NULL, | 410 | .me = NULL, |
411 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 411 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
412 | .tuple_to_nlattr = icmp_tuple_to_nlattr, | 412 | .tuple_to_nlattr = icmp_tuple_to_nlattr, |
413 | .nlattr_tuple_size = icmp_nlattr_tuple_size, | 413 | .nlattr_tuple_size = icmp_nlattr_tuple_size, |
414 | .nlattr_to_tuple = icmp_nlattr_to_tuple, | 414 | .nlattr_to_tuple = icmp_nlattr_to_tuple, |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index b8f6381c7d0b..76bd1aef257f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/netfilter_bridge.h> | 17 | #include <linux/netfilter_bridge.h> |
18 | #include <linux/netfilter_ipv4.h> | 18 | #include <linux/netfilter_ipv4.h> |
19 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> | 19 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> |
20 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 20 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
21 | #include <net/netfilter/nf_conntrack.h> | 21 | #include <net/netfilter/nf_conntrack.h> |
22 | #endif | 22 | #endif |
23 | #include <net/netfilter/nf_conntrack_zones.h> | 23 | #include <net/netfilter/nf_conntrack_zones.h> |
@@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, | |||
45 | { | 45 | { |
46 | u16 zone = NF_CT_DEFAULT_ZONE; | 46 | u16 zone = NF_CT_DEFAULT_ZONE; |
47 | 47 | ||
48 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 48 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
49 | if (skb->nfct) | 49 | if (skb->nfct) |
50 | zone = nf_ct_zone((struct nf_conn *)skb->nfct); | 50 | zone = nf_ct_zone((struct nf_conn *)skb->nfct); |
51 | #endif | 51 | #endif |
@@ -74,8 +74,8 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, | |||
74 | inet->nodefrag) | 74 | inet->nodefrag) |
75 | return NF_ACCEPT; | 75 | return NF_ACCEPT; |
76 | 76 | ||
77 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 77 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
78 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) | 78 | #if !IS_ENABLED(CONFIG_NF_NAT) |
79 | /* Previously seen (loopback)? Ignore. Do this before | 79 | /* Previously seen (loopback)? Ignore. Do this before |
80 | fragment check. */ | 80 | fragment check. */ |
81 | if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) | 81 | if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) |
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c new file mode 100644 index 000000000000..ccfc78db12ee --- /dev/null +++ b/net/ipv4/netfilter/nf_log_arp.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> | ||
3 | * | ||
4 | * Based on code from ebt_log from: | ||
5 | * | ||
6 | * Bart De Schuymer <bdschuym@pandora.be> | ||
7 | * Harald Welte <laforge@netfilter.org> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/spinlock.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/if_arp.h> | ||
18 | #include <linux/ip.h> | ||
19 | #include <net/route.h> | ||
20 | |||
21 | #include <linux/netfilter.h> | ||
22 | #include <linux/netfilter/xt_LOG.h> | ||
23 | #include <net/netfilter/nf_log.h> | ||
24 | |||
25 | static struct nf_loginfo default_loginfo = { | ||
26 | .type = NF_LOG_TYPE_LOG, | ||
27 | .u = { | ||
28 | .log = { | ||
29 | .level = 5, | ||
30 | .logflags = NF_LOG_MASK, | ||
31 | }, | ||
32 | }, | ||
33 | }; | ||
34 | |||
35 | struct arppayload { | ||
36 | unsigned char mac_src[ETH_ALEN]; | ||
37 | unsigned char ip_src[4]; | ||
38 | unsigned char mac_dst[ETH_ALEN]; | ||
39 | unsigned char ip_dst[4]; | ||
40 | }; | ||
41 | |||
42 | static void dump_arp_packet(struct nf_log_buf *m, | ||
43 | const struct nf_loginfo *info, | ||
44 | const struct sk_buff *skb, unsigned int nhoff) | ||
45 | { | ||
46 | const struct arphdr *ah; | ||
47 | struct arphdr _arph; | ||
48 | const struct arppayload *ap; | ||
49 | struct arppayload _arpp; | ||
50 | |||
51 | ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); | ||
52 | if (ah == NULL) { | ||
53 | nf_log_buf_add(m, "TRUNCATED"); | ||
54 | return; | ||
55 | } | ||
56 | nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", | ||
57 | ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); | ||
58 | |||
59 | /* If it's for Ethernet and the lengths are OK, then log the ARP | ||
60 | * payload. | ||
61 | */ | ||
62 | if (ah->ar_hrd != htons(1) || | ||
63 | ah->ar_hln != ETH_ALEN || | ||
64 | ah->ar_pln != sizeof(__be32)) | ||
65 | return; | ||
66 | |||
67 | ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); | ||
68 | if (ap == NULL) { | ||
69 | nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]", | ||
70 | skb->len - sizeof(_arph)); | ||
71 | return; | ||
72 | } | ||
73 | nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", | ||
74 | ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); | ||
75 | } | ||
76 | |||
77 | void nf_log_arp_packet(struct net *net, u_int8_t pf, | ||
78 | unsigned int hooknum, const struct sk_buff *skb, | ||
79 | const struct net_device *in, | ||
80 | const struct net_device *out, | ||
81 | const struct nf_loginfo *loginfo, | ||
82 | const char *prefix) | ||
83 | { | ||
84 | struct nf_log_buf *m; | ||
85 | |||
86 | /* FIXME: Disabled from containers until syslog ns is supported */ | ||
87 | if (!net_eq(net, &init_net)) | ||
88 | return; | ||
89 | |||
90 | m = nf_log_buf_open(); | ||
91 | |||
92 | if (!loginfo) | ||
93 | loginfo = &default_loginfo; | ||
94 | |||
95 | nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, | ||
96 | prefix); | ||
97 | dump_arp_packet(m, loginfo, skb, 0); | ||
98 | |||
99 | nf_log_buf_close(m); | ||
100 | } | ||
101 | |||
102 | static struct nf_logger nf_arp_logger __read_mostly = { | ||
103 | .name = "nf_log_arp", | ||
104 | .type = NF_LOG_TYPE_LOG, | ||
105 | .logfn = nf_log_arp_packet, | ||
106 | .me = THIS_MODULE, | ||
107 | }; | ||
108 | |||
109 | static int __net_init nf_log_arp_net_init(struct net *net) | ||
110 | { | ||
111 | nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static void __net_exit nf_log_arp_net_exit(struct net *net) | ||
116 | { | ||
117 | nf_log_unset(net, &nf_arp_logger); | ||
118 | } | ||
119 | |||
120 | static struct pernet_operations nf_log_arp_net_ops = { | ||
121 | .init = nf_log_arp_net_init, | ||
122 | .exit = nf_log_arp_net_exit, | ||
123 | }; | ||
124 | |||
125 | static int __init nf_log_arp_init(void) | ||
126 | { | ||
127 | int ret; | ||
128 | |||
129 | ret = register_pernet_subsys(&nf_log_arp_net_ops); | ||
130 | if (ret < 0) | ||
131 | return ret; | ||
132 | |||
133 | nf_log_register(NFPROTO_ARP, &nf_arp_logger); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static void __exit nf_log_arp_exit(void) | ||
138 | { | ||
139 | unregister_pernet_subsys(&nf_log_arp_net_ops); | ||
140 | nf_log_unregister(&nf_arp_logger); | ||
141 | } | ||
142 | |||
143 | module_init(nf_log_arp_init); | ||
144 | module_exit(nf_log_arp_exit); | ||
145 | |||
146 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); | ||
147 | MODULE_DESCRIPTION("Netfilter ARP packet logging"); | ||
148 | MODULE_LICENSE("GPL"); | ||
149 | MODULE_ALIAS_NF_LOGGER(3, 0); | ||
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c new file mode 100644 index 000000000000..078bdca1b607 --- /dev/null +++ b/net/ipv4/netfilter/nf_log_ipv4.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/skbuff.h> | ||
12 | #include <linux/if_arp.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <net/ipv6.h> | ||
15 | #include <net/icmp.h> | ||
16 | #include <net/udp.h> | ||
17 | #include <net/tcp.h> | ||
18 | #include <net/route.h> | ||
19 | |||
20 | #include <linux/netfilter.h> | ||
21 | #include <linux/netfilter/xt_LOG.h> | ||
22 | #include <net/netfilter/nf_log.h> | ||
23 | |||
24 | static struct nf_loginfo default_loginfo = { | ||
25 | .type = NF_LOG_TYPE_LOG, | ||
26 | .u = { | ||
27 | .log = { | ||
28 | .level = 5, | ||
29 | .logflags = NF_LOG_MASK, | ||
30 | }, | ||
31 | }, | ||
32 | }; | ||
33 | |||
34 | /* One level of recursion won't kill us */ | ||
35 | static void dump_ipv4_packet(struct nf_log_buf *m, | ||
36 | const struct nf_loginfo *info, | ||
37 | const struct sk_buff *skb, unsigned int iphoff) | ||
38 | { | ||
39 | struct iphdr _iph; | ||
40 | const struct iphdr *ih; | ||
41 | unsigned int logflags; | ||
42 | |||
43 | if (info->type == NF_LOG_TYPE_LOG) | ||
44 | logflags = info->u.log.logflags; | ||
45 | else | ||
46 | logflags = NF_LOG_MASK; | ||
47 | |||
48 | ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); | ||
49 | if (ih == NULL) { | ||
50 | nf_log_buf_add(m, "TRUNCATED"); | ||
51 | return; | ||
52 | } | ||
53 | |||
54 | /* Important fields: | ||
55 | * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ | ||
56 | /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ | ||
57 | nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); | ||
58 | |||
59 | /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ | ||
60 | nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", | ||
61 | ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, | ||
62 | ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); | ||
63 | |||
64 | /* Max length: 6 "CE DF MF " */ | ||
65 | if (ntohs(ih->frag_off) & IP_CE) | ||
66 | nf_log_buf_add(m, "CE "); | ||
67 | if (ntohs(ih->frag_off) & IP_DF) | ||
68 | nf_log_buf_add(m, "DF "); | ||
69 | if (ntohs(ih->frag_off) & IP_MF) | ||
70 | nf_log_buf_add(m, "MF "); | ||
71 | |||
72 | /* Max length: 11 "FRAG:65535 " */ | ||
73 | if (ntohs(ih->frag_off) & IP_OFFSET) | ||
74 | nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); | ||
75 | |||
76 | if ((logflags & XT_LOG_IPOPT) && | ||
77 | ih->ihl * 4 > sizeof(struct iphdr)) { | ||
78 | const unsigned char *op; | ||
79 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; | ||
80 | unsigned int i, optsize; | ||
81 | |||
82 | optsize = ih->ihl * 4 - sizeof(struct iphdr); | ||
83 | op = skb_header_pointer(skb, iphoff+sizeof(_iph), | ||
84 | optsize, _opt); | ||
85 | if (op == NULL) { | ||
86 | nf_log_buf_add(m, "TRUNCATED"); | ||
87 | return; | ||
88 | } | ||
89 | |||
90 | /* Max length: 127 "OPT (" 15*4*2chars ") " */ | ||
91 | nf_log_buf_add(m, "OPT ("); | ||
92 | for (i = 0; i < optsize; i++) | ||
93 | nf_log_buf_add(m, "%02X", op[i]); | ||
94 | nf_log_buf_add(m, ") "); | ||
95 | } | ||
96 | |||
97 | switch (ih->protocol) { | ||
98 | case IPPROTO_TCP: | ||
99 | if (nf_log_dump_tcp_header(m, skb, ih->protocol, | ||
100 | ntohs(ih->frag_off) & IP_OFFSET, | ||
101 | iphoff+ih->ihl*4, logflags)) | ||
102 | return; | ||
103 | break; | ||
104 | case IPPROTO_UDP: | ||
105 | case IPPROTO_UDPLITE: | ||
106 | if (nf_log_dump_udp_header(m, skb, ih->protocol, | ||
107 | ntohs(ih->frag_off) & IP_OFFSET, | ||
108 | iphoff+ih->ihl*4)) | ||
109 | return; | ||
110 | break; | ||
111 | case IPPROTO_ICMP: { | ||
112 | struct icmphdr _icmph; | ||
113 | const struct icmphdr *ich; | ||
114 | static const size_t required_len[NR_ICMP_TYPES+1] | ||
115 | = { [ICMP_ECHOREPLY] = 4, | ||
116 | [ICMP_DEST_UNREACH] | ||
117 | = 8 + sizeof(struct iphdr), | ||
118 | [ICMP_SOURCE_QUENCH] | ||
119 | = 8 + sizeof(struct iphdr), | ||
120 | [ICMP_REDIRECT] | ||
121 | = 8 + sizeof(struct iphdr), | ||
122 | [ICMP_ECHO] = 4, | ||
123 | [ICMP_TIME_EXCEEDED] | ||
124 | = 8 + sizeof(struct iphdr), | ||
125 | [ICMP_PARAMETERPROB] | ||
126 | = 8 + sizeof(struct iphdr), | ||
127 | [ICMP_TIMESTAMP] = 20, | ||
128 | [ICMP_TIMESTAMPREPLY] = 20, | ||
129 | [ICMP_ADDRESS] = 12, | ||
130 | [ICMP_ADDRESSREPLY] = 12 }; | ||
131 | |||
132 | /* Max length: 11 "PROTO=ICMP " */ | ||
133 | nf_log_buf_add(m, "PROTO=ICMP "); | ||
134 | |||
135 | if (ntohs(ih->frag_off) & IP_OFFSET) | ||
136 | break; | ||
137 | |||
138 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ | ||
139 | ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, | ||
140 | sizeof(_icmph), &_icmph); | ||
141 | if (ich == NULL) { | ||
142 | nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", | ||
143 | skb->len - iphoff - ih->ihl*4); | ||
144 | break; | ||
145 | } | ||
146 | |||
147 | /* Max length: 18 "TYPE=255 CODE=255 " */ | ||
148 | nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); | ||
149 | |||
150 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ | ||
151 | if (ich->type <= NR_ICMP_TYPES && | ||
152 | required_len[ich->type] && | ||
153 | skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { | ||
154 | nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", | ||
155 | skb->len - iphoff - ih->ihl*4); | ||
156 | break; | ||
157 | } | ||
158 | |||
159 | switch (ich->type) { | ||
160 | case ICMP_ECHOREPLY: | ||
161 | case ICMP_ECHO: | ||
162 | /* Max length: 19 "ID=65535 SEQ=65535 " */ | ||
163 | nf_log_buf_add(m, "ID=%u SEQ=%u ", | ||
164 | ntohs(ich->un.echo.id), | ||
165 | ntohs(ich->un.echo.sequence)); | ||
166 | break; | ||
167 | |||
168 | case ICMP_PARAMETERPROB: | ||
169 | /* Max length: 14 "PARAMETER=255 " */ | ||
170 | nf_log_buf_add(m, "PARAMETER=%u ", | ||
171 | ntohl(ich->un.gateway) >> 24); | ||
172 | break; | ||
173 | case ICMP_REDIRECT: | ||
174 | /* Max length: 24 "GATEWAY=255.255.255.255 " */ | ||
175 | nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); | ||
176 | /* Fall through */ | ||
177 | case ICMP_DEST_UNREACH: | ||
178 | case ICMP_SOURCE_QUENCH: | ||
179 | case ICMP_TIME_EXCEEDED: | ||
180 | /* Max length: 3+maxlen */ | ||
181 | if (!iphoff) { /* Only recurse once. */ | ||
182 | nf_log_buf_add(m, "["); | ||
183 | dump_ipv4_packet(m, info, skb, | ||
184 | iphoff + ih->ihl*4+sizeof(_icmph)); | ||
185 | nf_log_buf_add(m, "] "); | ||
186 | } | ||
187 | |||
188 | /* Max length: 10 "MTU=65535 " */ | ||
189 | if (ich->type == ICMP_DEST_UNREACH && | ||
190 | ich->code == ICMP_FRAG_NEEDED) { | ||
191 | nf_log_buf_add(m, "MTU=%u ", | ||
192 | ntohs(ich->un.frag.mtu)); | ||
193 | } | ||
194 | } | ||
195 | break; | ||
196 | } | ||
197 | /* Max Length */ | ||
198 | case IPPROTO_AH: { | ||
199 | struct ip_auth_hdr _ahdr; | ||
200 | const struct ip_auth_hdr *ah; | ||
201 | |||
202 | if (ntohs(ih->frag_off) & IP_OFFSET) | ||
203 | break; | ||
204 | |||
205 | /* Max length: 9 "PROTO=AH " */ | ||
206 | nf_log_buf_add(m, "PROTO=AH "); | ||
207 | |||
208 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ | ||
209 | ah = skb_header_pointer(skb, iphoff+ih->ihl*4, | ||
210 | sizeof(_ahdr), &_ahdr); | ||
211 | if (ah == NULL) { | ||
212 | nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", | ||
213 | skb->len - iphoff - ih->ihl*4); | ||
214 | break; | ||
215 | } | ||
216 | |||
217 | /* Length: 15 "SPI=0xF1234567 " */ | ||
218 | nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); | ||
219 | break; | ||
220 | } | ||
221 | case IPPROTO_ESP: { | ||
222 | struct ip_esp_hdr _esph; | ||
223 | const struct ip_esp_hdr *eh; | ||
224 | |||
225 | /* Max length: 10 "PROTO=ESP " */ | ||
226 | nf_log_buf_add(m, "PROTO=ESP "); | ||
227 | |||
228 | if (ntohs(ih->frag_off) & IP_OFFSET) | ||
229 | break; | ||
230 | |||
231 | /* Max length: 25 "INCOMPLETE [65535 bytes] " */ | ||
232 | eh = skb_header_pointer(skb, iphoff+ih->ihl*4, | ||
233 | sizeof(_esph), &_esph); | ||
234 | if (eh == NULL) { | ||
235 | nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", | ||
236 | skb->len - iphoff - ih->ihl*4); | ||
237 | break; | ||
238 | } | ||
239 | |||
240 | /* Length: 15 "SPI=0xF1234567 " */ | ||
241 | nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); | ||
242 | break; | ||
243 | } | ||
244 | /* Max length: 10 "PROTO 255 " */ | ||
245 | default: | ||
246 | nf_log_buf_add(m, "PROTO=%u ", ih->protocol); | ||
247 | } | ||
248 | |||
249 | /* Max length: 15 "UID=4294967295 " */ | ||
250 | if ((logflags & XT_LOG_UID) && !iphoff) | ||
251 | nf_log_dump_sk_uid_gid(m, skb->sk); | ||
252 | |||
253 | /* Max length: 16 "MARK=0xFFFFFFFF " */ | ||
254 | if (!iphoff && skb->mark) | ||
255 | nf_log_buf_add(m, "MARK=0x%x ", skb->mark); | ||
256 | |||
257 | /* Proto Max log string length */ | ||
258 | /* IP: 40+46+6+11+127 = 230 */ | ||
259 | /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ | ||
260 | /* UDP: 10+max(25,20) = 35 */ | ||
261 | /* UDPLITE: 14+max(25,20) = 39 */ | ||
262 | /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ | ||
263 | /* ESP: 10+max(25)+15 = 50 */ | ||
264 | /* AH: 9+max(25)+15 = 49 */ | ||
265 | /* unknown: 10 */ | ||
266 | |||
267 | /* (ICMP allows recursion one level deep) */ | ||
268 | /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ | ||
269 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ | ||
270 | } | ||
271 | |||
272 | static void dump_ipv4_mac_header(struct nf_log_buf *m, | ||
273 | const struct nf_loginfo *info, | ||
274 | const struct sk_buff *skb) | ||
275 | { | ||
276 | struct net_device *dev = skb->dev; | ||
277 | unsigned int logflags = 0; | ||
278 | |||
279 | if (info->type == NF_LOG_TYPE_LOG) | ||
280 | logflags = info->u.log.logflags; | ||
281 | |||
282 | if (!(logflags & XT_LOG_MACDECODE)) | ||
283 | goto fallback; | ||
284 | |||
285 | switch (dev->type) { | ||
286 | case ARPHRD_ETHER: | ||
287 | nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", | ||
288 | eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, | ||
289 | ntohs(eth_hdr(skb)->h_proto)); | ||
290 | return; | ||
291 | default: | ||
292 | break; | ||
293 | } | ||
294 | |||
295 | fallback: | ||
296 | nf_log_buf_add(m, "MAC="); | ||
297 | if (dev->hard_header_len && | ||
298 | skb->mac_header != skb->network_header) { | ||
299 | const unsigned char *p = skb_mac_header(skb); | ||
300 | unsigned int i; | ||
301 | |||
302 | nf_log_buf_add(m, "%02x", *p++); | ||
303 | for (i = 1; i < dev->hard_header_len; i++, p++) | ||
304 | nf_log_buf_add(m, ":%02x", *p); | ||
305 | } | ||
306 | nf_log_buf_add(m, " "); | ||
307 | } | ||
308 | |||
309 | static void nf_log_ip_packet(struct net *net, u_int8_t pf, | ||
310 | unsigned int hooknum, const struct sk_buff *skb, | ||
311 | const struct net_device *in, | ||
312 | const struct net_device *out, | ||
313 | const struct nf_loginfo *loginfo, | ||
314 | const char *prefix) | ||
315 | { | ||
316 | struct nf_log_buf *m; | ||
317 | |||
318 | /* FIXME: Disabled from containers until syslog ns is supported */ | ||
319 | if (!net_eq(net, &init_net)) | ||
320 | return; | ||
321 | |||
322 | m = nf_log_buf_open(); | ||
323 | |||
324 | if (!loginfo) | ||
325 | loginfo = &default_loginfo; | ||
326 | |||
327 | nf_log_dump_packet_common(m, pf, hooknum, skb, in, | ||
328 | out, loginfo, prefix); | ||
329 | |||
330 | if (in != NULL) | ||
331 | dump_ipv4_mac_header(m, loginfo, skb); | ||
332 | |||
333 | dump_ipv4_packet(m, loginfo, skb, 0); | ||
334 | |||
335 | nf_log_buf_close(m); | ||
336 | } | ||
337 | |||
338 | static struct nf_logger nf_ip_logger __read_mostly = { | ||
339 | .name = "nf_log_ipv4", | ||
340 | .type = NF_LOG_TYPE_LOG, | ||
341 | .logfn = nf_log_ip_packet, | ||
342 | .me = THIS_MODULE, | ||
343 | }; | ||
344 | |||
345 | static int __net_init nf_log_ipv4_net_init(struct net *net) | ||
346 | { | ||
347 | nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | static void __net_exit nf_log_ipv4_net_exit(struct net *net) | ||
352 | { | ||
353 | nf_log_unset(net, &nf_ip_logger); | ||
354 | } | ||
355 | |||
356 | static struct pernet_operations nf_log_ipv4_net_ops = { | ||
357 | .init = nf_log_ipv4_net_init, | ||
358 | .exit = nf_log_ipv4_net_exit, | ||
359 | }; | ||
360 | |||
361 | static int __init nf_log_ipv4_init(void) | ||
362 | { | ||
363 | int ret; | ||
364 | |||
365 | ret = register_pernet_subsys(&nf_log_ipv4_net_ops); | ||
366 | if (ret < 0) | ||
367 | return ret; | ||
368 | |||
369 | nf_log_register(NFPROTO_IPV4, &nf_ip_logger); | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | static void __exit nf_log_ipv4_exit(void) | ||
374 | { | ||
375 | unregister_pernet_subsys(&nf_log_ipv4_net_ops); | ||
376 | nf_log_unregister(&nf_ip_logger); | ||
377 | } | ||
378 | |||
379 | module_init(nf_log_ipv4_init); | ||
380 | module_exit(nf_log_ipv4_exit); | ||
381 | |||
382 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | ||
383 | MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); | ||
384 | MODULE_LICENSE("GPL"); | ||
385 | MODULE_ALIAS_NF_LOGGER(AF_INET, 0); | ||
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index d8b2e14efddc..14f5ccd06337 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | |||
@@ -154,6 +154,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, | |||
154 | htons(oldlen), htons(datalen), 1); | 154 | htons(oldlen), htons(datalen), 1); |
155 | } | 155 | } |
156 | 156 | ||
157 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | ||
157 | static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], | 158 | static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], |
158 | struct nf_nat_range *range) | 159 | struct nf_nat_range *range) |
159 | { | 160 | { |
@@ -169,6 +170,7 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], | |||
169 | 170 | ||
170 | return 0; | 171 | return 0; |
171 | } | 172 | } |
173 | #endif | ||
172 | 174 | ||
173 | static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { | 175 | static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { |
174 | .l3proto = NFPROTO_IPV4, | 176 | .l3proto = NFPROTO_IPV4, |
@@ -177,7 +179,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { | |||
177 | .manip_pkt = nf_nat_ipv4_manip_pkt, | 179 | .manip_pkt = nf_nat_ipv4_manip_pkt, |
178 | .csum_update = nf_nat_ipv4_csum_update, | 180 | .csum_update = nf_nat_ipv4_csum_update, |
179 | .csum_recalc = nf_nat_ipv4_csum_recalc, | 181 | .csum_recalc = nf_nat_ipv4_csum_recalc, |
182 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | ||
180 | .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, | 183 | .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, |
184 | #endif | ||
181 | #ifdef CONFIG_XFRM | 185 | #ifdef CONFIG_XFRM |
182 | .decode_session = nf_nat_ipv4_decode_session, | 186 | .decode_session = nf_nat_ipv4_decode_session, |
183 | #endif | 187 | #endif |
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 690d890111bb..9414923f1e15 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
@@ -124,7 +124,7 @@ static const struct nf_nat_l4proto gre = { | |||
124 | .manip_pkt = gre_manip_pkt, | 124 | .manip_pkt = gre_manip_pkt, |
125 | .in_range = nf_nat_l4proto_in_range, | 125 | .in_range = nf_nat_l4proto_in_range, |
126 | .unique_tuple = gre_unique_tuple, | 126 | .unique_tuple = gre_unique_tuple, |
127 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 127 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
128 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, | 128 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, |
129 | #endif | 129 | #endif |
130 | }; | 130 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index eb303471bcf6..4557b4ab8342 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
@@ -77,7 +77,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmp = { | |||
77 | .manip_pkt = icmp_manip_pkt, | 77 | .manip_pkt = icmp_manip_pkt, |
78 | .in_range = icmp_in_range, | 78 | .in_range = icmp_in_range, |
79 | .unique_tuple = icmp_unique_tuple, | 79 | .unique_tuple = icmp_unique_tuple, |
80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 80 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
81 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, | 81 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, |
82 | #endif | 82 | #endif |
83 | }; | 83 | }; |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ae0af9386f7c..8e3eb39f84e7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -52,6 +52,7 @@ | |||
52 | static int sockstat_seq_show(struct seq_file *seq, void *v) | 52 | static int sockstat_seq_show(struct seq_file *seq, void *v) |
53 | { | 53 | { |
54 | struct net *net = seq->private; | 54 | struct net *net = seq->private; |
55 | unsigned int frag_mem; | ||
55 | int orphans, sockets; | 56 | int orphans, sockets; |
56 | 57 | ||
57 | local_bh_disable(); | 58 | local_bh_disable(); |
@@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
71 | sock_prot_inuse_get(net, &udplite_prot)); | 72 | sock_prot_inuse_get(net, &udplite_prot)); |
72 | seq_printf(seq, "RAW: inuse %d\n", | 73 | seq_printf(seq, "RAW: inuse %d\n", |
73 | sock_prot_inuse_get(net, &raw_prot)); | 74 | sock_prot_inuse_get(net, &raw_prot)); |
74 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 75 | frag_mem = ip_frag_mem(net); |
75 | ip_frag_nqueues(net), ip_frag_mem(net)); | 76 | seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); |
76 | return 0; | 77 | return 0; |
77 | } | 78 | } |
78 | 79 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c65160565e1..739db3100c23 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/in_route.h> | 58 | #include <linux/in_route.h> |
59 | #include <linux/route.h> | 59 | #include <linux/route.h> |
60 | #include <linux/skbuff.h> | 60 | #include <linux/skbuff.h> |
61 | #include <linux/igmp.h> | ||
61 | #include <net/net_namespace.h> | 62 | #include <net/net_namespace.h> |
62 | #include <net/dst.h> | 63 | #include <net/dst.h> |
63 | #include <net/sock.h> | 64 | #include <net/sock.h> |
@@ -174,7 +175,9 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) | |||
174 | 175 | ||
175 | while (sk) { | 176 | while (sk) { |
176 | delivered = 1; | 177 | delivered = 1; |
177 | if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { | 178 | if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && |
179 | ip_mc_sf_allow(sk, iph->daddr, iph->saddr, | ||
180 | skb->dev->ifindex)) { | ||
178 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 181 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
179 | 182 | ||
180 | /* Not releasing hash table! */ | 183 | /* Not releasing hash table! */ |
@@ -365,6 +368,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, | |||
365 | 368 | ||
366 | skb->ip_summed = CHECKSUM_NONE; | 369 | skb->ip_summed = CHECKSUM_NONE; |
367 | 370 | ||
371 | sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); | ||
372 | |||
368 | skb->transport_header = skb->network_header; | 373 | skb->transport_header = skb->network_header; |
369 | err = -EFAULT; | 374 | err = -EFAULT; |
370 | if (memcpy_fromiovecend((void *)iph, from, 0, length)) | 375 | if (memcpy_fromiovecend((void *)iph, from, 0, length)) |
@@ -606,6 +611,8 @@ back_from_confirm: | |||
606 | &rt, msg->msg_flags); | 611 | &rt, msg->msg_flags); |
607 | 612 | ||
608 | else { | 613 | else { |
614 | sock_tx_timestamp(sk, &ipc.tx_flags); | ||
615 | |||
609 | if (!ipc.addr) | 616 | if (!ipc.addr) |
610 | ipc.addr = fl4.daddr; | 617 | ipc.addr = fl4.daddr; |
611 | lock_sock(sk); | 618 | lock_sock(sk); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 082239ffe34a..190199851c9a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -457,8 +457,31 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
457 | return neigh_create(&arp_tbl, pkey, dev); | 457 | return neigh_create(&arp_tbl, pkey, dev); |
458 | } | 458 | } |
459 | 459 | ||
460 | atomic_t *ip_idents __read_mostly; | 460 | #define IP_IDENTS_SZ 2048u |
461 | EXPORT_SYMBOL(ip_idents); | 461 | struct ip_ident_bucket { |
462 | atomic_t id; | ||
463 | u32 stamp32; | ||
464 | }; | ||
465 | |||
466 | static struct ip_ident_bucket *ip_idents __read_mostly; | ||
467 | |||
468 | /* In order to protect privacy, we add a perturbation to identifiers | ||
469 | * if one generator is seldom used. This makes hard for an attacker | ||
470 | * to infer how many packets were sent between two points in time. | ||
471 | */ | ||
472 | u32 ip_idents_reserve(u32 hash, int segs) | ||
473 | { | ||
474 | struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; | ||
475 | u32 old = ACCESS_ONCE(bucket->stamp32); | ||
476 | u32 now = (u32)jiffies; | ||
477 | u32 delta = 0; | ||
478 | |||
479 | if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) | ||
480 | delta = prandom_u32_max(now - old); | ||
481 | |||
482 | return atomic_add_return(segs + delta, &bucket->id) - segs; | ||
483 | } | ||
484 | EXPORT_SYMBOL(ip_idents_reserve); | ||
462 | 485 | ||
463 | void __ip_select_ident(struct iphdr *iph, int segs) | 486 | void __ip_select_ident(struct iphdr *iph, int segs) |
464 | { | 487 | { |
@@ -467,7 +490,10 @@ void __ip_select_ident(struct iphdr *iph, int segs) | |||
467 | 490 | ||
468 | net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); | 491 | net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); |
469 | 492 | ||
470 | hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); | 493 | hash = jhash_3words((__force u32)iph->daddr, |
494 | (__force u32)iph->saddr, | ||
495 | iph->protocol, | ||
496 | ip_idents_hashrnd); | ||
471 | id = ip_idents_reserve(hash, segs); | 497 | id = ip_idents_reserve(hash, segs); |
472 | iph->id = htons(id); | 498 | iph->id = htons(id); |
473 | } | 499 | } |
@@ -1010,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1010 | const struct iphdr *iph = (const struct iphdr *) skb->data; | 1036 | const struct iphdr *iph = (const struct iphdr *) skb->data; |
1011 | struct flowi4 fl4; | 1037 | struct flowi4 fl4; |
1012 | struct rtable *rt; | 1038 | struct rtable *rt; |
1013 | struct dst_entry *dst; | 1039 | struct dst_entry *odst = NULL; |
1014 | bool new = false; | 1040 | bool new = false; |
1015 | 1041 | ||
1016 | bh_lock_sock(sk); | 1042 | bh_lock_sock(sk); |
@@ -1018,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1018 | if (!ip_sk_accept_pmtu(sk)) | 1044 | if (!ip_sk_accept_pmtu(sk)) |
1019 | goto out; | 1045 | goto out; |
1020 | 1046 | ||
1021 | rt = (struct rtable *) __sk_dst_get(sk); | 1047 | odst = sk_dst_get(sk); |
1022 | 1048 | ||
1023 | if (sock_owned_by_user(sk) || !rt) { | 1049 | if (sock_owned_by_user(sk) || !odst) { |
1024 | __ipv4_sk_update_pmtu(skb, sk, mtu); | 1050 | __ipv4_sk_update_pmtu(skb, sk, mtu); |
1025 | goto out; | 1051 | goto out; |
1026 | } | 1052 | } |
1027 | 1053 | ||
1028 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); | 1054 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); |
1029 | 1055 | ||
1030 | if (!__sk_dst_check(sk, 0)) { | 1056 | rt = (struct rtable *)odst; |
1057 | if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { | ||
1031 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); | 1058 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); |
1032 | if (IS_ERR(rt)) | 1059 | if (IS_ERR(rt)) |
1033 | goto out; | 1060 | goto out; |
@@ -1037,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1037 | 1064 | ||
1038 | __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); | 1065 | __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); |
1039 | 1066 | ||
1040 | dst = dst_check(&rt->dst, 0); | 1067 | if (!dst_check(&rt->dst, 0)) { |
1041 | if (!dst) { | ||
1042 | if (new) | 1068 | if (new) |
1043 | dst_release(&rt->dst); | 1069 | dst_release(&rt->dst); |
1044 | 1070 | ||
@@ -1050,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1050 | } | 1076 | } |
1051 | 1077 | ||
1052 | if (new) | 1078 | if (new) |
1053 | __sk_dst_set(sk, &rt->dst); | 1079 | sk_dst_set(sk, &rt->dst); |
1054 | 1080 | ||
1055 | out: | 1081 | out: |
1056 | bh_unlock_sock(sk); | 1082 | bh_unlock_sock(sk); |
1083 | dst_release(odst); | ||
1057 | } | 1084 | } |
1058 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); | 1085 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); |
1059 | 1086 | ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c86624b36a62..c0c75688896e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, | |||
170 | } | 170 | } |
171 | EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); | 171 | EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); |
172 | 172 | ||
173 | __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | 173 | __u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, |
174 | __u16 *mssp) | ||
174 | { | 175 | { |
175 | const struct iphdr *iph = ip_hdr(skb); | 176 | const struct iphdr *iph = ip_hdr(skb); |
176 | const struct tcphdr *th = tcp_hdr(skb); | 177 | const struct tcphdr *th = tcp_hdr(skb); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb1dde37e678..9d2118e5fbc7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1108 | if (unlikely(tp->repair)) { | 1108 | if (unlikely(tp->repair)) { |
1109 | if (tp->repair_queue == TCP_RECV_QUEUE) { | 1109 | if (tp->repair_queue == TCP_RECV_QUEUE) { |
1110 | copied = tcp_send_rcvq(sk, msg, size); | 1110 | copied = tcp_send_rcvq(sk, msg, size); |
1111 | goto out; | 1111 | goto out_nopush; |
1112 | } | 1112 | } |
1113 | 1113 | ||
1114 | err = -EINVAL; | 1114 | err = -EINVAL; |
@@ -1282,6 +1282,7 @@ wait_for_memory: | |||
1282 | out: | 1282 | out: |
1283 | if (copied) | 1283 | if (copied) |
1284 | tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); | 1284 | tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); |
1285 | out_nopush: | ||
1285 | release_sock(sk); | 1286 | release_sock(sk); |
1286 | return copied + copied_syn; | 1287 | return copied + copied_syn; |
1287 | 1288 | ||
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 62e48cf84e60..9771563ab564 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -131,7 +131,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, | |||
131 | struct dst_entry *dst, | 131 | struct dst_entry *dst, |
132 | struct request_sock *req) | 132 | struct request_sock *req) |
133 | { | 133 | { |
134 | struct tcp_sock *tp = tcp_sk(sk); | 134 | struct tcp_sock *tp; |
135 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | 135 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; |
136 | struct sock *child; | 136 | struct sock *child; |
137 | 137 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40661fc1e233..7832d941dbcd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, | |||
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | /* D-SACK for already forgotten data... Do dumb counting. */ | 1108 | /* D-SACK for already forgotten data... Do dumb counting. */ |
1109 | if (dup_sack && tp->undo_marker && tp->undo_retrans && | 1109 | if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && |
1110 | !after(end_seq_0, prior_snd_una) && | 1110 | !after(end_seq_0, prior_snd_una) && |
1111 | after(end_seq_0, tp->undo_marker)) | 1111 | after(end_seq_0, tp->undo_marker)) |
1112 | tp->undo_retrans--; | 1112 | tp->undo_retrans--; |
@@ -1162,7 +1162,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1162 | unsigned int new_len = (pkt_len / mss) * mss; | 1162 | unsigned int new_len = (pkt_len / mss) * mss; |
1163 | if (!in_sack && new_len < pkt_len) { | 1163 | if (!in_sack && new_len < pkt_len) { |
1164 | new_len += mss; | 1164 | new_len += mss; |
1165 | if (new_len > skb->len) | 1165 | if (new_len >= skb->len) |
1166 | return 0; | 1166 | return 0; |
1167 | } | 1167 | } |
1168 | pkt_len = new_len; | 1168 | pkt_len = new_len; |
@@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1187 | 1187 | ||
1188 | /* Account D-SACK for retransmitted packet. */ | 1188 | /* Account D-SACK for retransmitted packet. */ |
1189 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1189 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
1190 | if (tp->undo_marker && tp->undo_retrans && | 1190 | if (tp->undo_marker && tp->undo_retrans > 0 && |
1191 | after(end_seq, tp->undo_marker)) | 1191 | after(end_seq, tp->undo_marker)) |
1192 | tp->undo_retrans--; | 1192 | tp->undo_retrans--; |
1193 | if (sacked & TCPCB_SACKED_ACKED) | 1193 | if (sacked & TCPCB_SACKED_ACKED) |
@@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp) | |||
1893 | tp->lost_out = 0; | 1893 | tp->lost_out = 0; |
1894 | 1894 | ||
1895 | tp->undo_marker = 0; | 1895 | tp->undo_marker = 0; |
1896 | tp->undo_retrans = 0; | 1896 | tp->undo_retrans = -1; |
1897 | } | 1897 | } |
1898 | 1898 | ||
1899 | void tcp_clear_retrans(struct tcp_sock *tp) | 1899 | void tcp_clear_retrans(struct tcp_sock *tp) |
@@ -2475,7 +2475,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) | |||
2475 | * losses and/or application stalls), do not perform any further cwnd | 2475 | * losses and/or application stalls), do not perform any further cwnd |
2476 | * reductions, but instead slow start up to ssthresh. | 2476 | * reductions, but instead slow start up to ssthresh. |
2477 | */ | 2477 | */ |
2478 | static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) | 2478 | static void tcp_init_cwnd_reduction(struct sock *sk) |
2479 | { | 2479 | { |
2480 | struct tcp_sock *tp = tcp_sk(sk); | 2480 | struct tcp_sock *tp = tcp_sk(sk); |
2481 | 2481 | ||
@@ -2485,8 +2485,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) | |||
2485 | tp->prior_cwnd = tp->snd_cwnd; | 2485 | tp->prior_cwnd = tp->snd_cwnd; |
2486 | tp->prr_delivered = 0; | 2486 | tp->prr_delivered = 0; |
2487 | tp->prr_out = 0; | 2487 | tp->prr_out = 0; |
2488 | if (set_ssthresh) | 2488 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); |
2489 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
2490 | TCP_ECN_queue_cwr(tp); | 2489 | TCP_ECN_queue_cwr(tp); |
2491 | } | 2490 | } |
2492 | 2491 | ||
@@ -2528,14 +2527,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) | |||
2528 | } | 2527 | } |
2529 | 2528 | ||
2530 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ | 2529 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ |
2531 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | 2530 | void tcp_enter_cwr(struct sock *sk) |
2532 | { | 2531 | { |
2533 | struct tcp_sock *tp = tcp_sk(sk); | 2532 | struct tcp_sock *tp = tcp_sk(sk); |
2534 | 2533 | ||
2535 | tp->prior_ssthresh = 0; | 2534 | tp->prior_ssthresh = 0; |
2536 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2535 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2537 | tp->undo_marker = 0; | 2536 | tp->undo_marker = 0; |
2538 | tcp_init_cwnd_reduction(sk, set_ssthresh); | 2537 | tcp_init_cwnd_reduction(sk); |
2539 | tcp_set_ca_state(sk, TCP_CA_CWR); | 2538 | tcp_set_ca_state(sk, TCP_CA_CWR); |
2540 | } | 2539 | } |
2541 | } | 2540 | } |
@@ -2564,7 +2563,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) | |||
2564 | tp->retrans_stamp = 0; | 2563 | tp->retrans_stamp = 0; |
2565 | 2564 | ||
2566 | if (flag & FLAG_ECE) | 2565 | if (flag & FLAG_ECE) |
2567 | tcp_enter_cwr(sk, 1); | 2566 | tcp_enter_cwr(sk); |
2568 | 2567 | ||
2569 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { | 2568 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
2570 | tcp_try_keep_open(sk); | 2569 | tcp_try_keep_open(sk); |
@@ -2665,12 +2664,12 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2665 | 2664 | ||
2666 | tp->prior_ssthresh = 0; | 2665 | tp->prior_ssthresh = 0; |
2667 | tp->undo_marker = tp->snd_una; | 2666 | tp->undo_marker = tp->snd_una; |
2668 | tp->undo_retrans = tp->retrans_out; | 2667 | tp->undo_retrans = tp->retrans_out ? : -1; |
2669 | 2668 | ||
2670 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2669 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2671 | if (!ece_ack) | 2670 | if (!ece_ack) |
2672 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 2671 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2673 | tcp_init_cwnd_reduction(sk, true); | 2672 | tcp_init_cwnd_reduction(sk); |
2674 | } | 2673 | } |
2675 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2674 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2676 | } | 2675 | } |
@@ -3346,7 +3345,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | |||
3346 | tp->tlp_high_seq = 0; | 3345 | tp->tlp_high_seq = 0; |
3347 | /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ | 3346 | /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ |
3348 | if (!(flag & FLAG_DSACKING_ACK)) { | 3347 | if (!(flag & FLAG_DSACKING_ACK)) { |
3349 | tcp_init_cwnd_reduction(sk, true); | 3348 | tcp_init_cwnd_reduction(sk); |
3350 | tcp_set_ca_state(sk, TCP_CA_CWR); | 3349 | tcp_set_ca_state(sk, TCP_CA_CWR); |
3351 | tcp_end_cwnd_reduction(sk); | 3350 | tcp_end_cwnd_reduction(sk); |
3352 | tcp_try_keep_open(sk); | 3351 | tcp_try_keep_open(sk); |
@@ -5877,3 +5876,153 @@ discard: | |||
5877 | return 0; | 5876 | return 0; |
5878 | } | 5877 | } |
5879 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5878 | EXPORT_SYMBOL(tcp_rcv_state_process); |
5879 | |||
5880 | static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) | ||
5881 | { | ||
5882 | struct inet_request_sock *ireq = inet_rsk(req); | ||
5883 | |||
5884 | if (family == AF_INET) | ||
5885 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), | ||
5886 | &ireq->ir_rmt_addr, port); | ||
5887 | #if IS_ENABLED(CONFIG_IPV6) | ||
5888 | else if (family == AF_INET6) | ||
5889 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), | ||
5890 | &ireq->ir_v6_rmt_addr, port); | ||
5891 | #endif | ||
5892 | } | ||
5893 | |||
5894 | int tcp_conn_request(struct request_sock_ops *rsk_ops, | ||
5895 | const struct tcp_request_sock_ops *af_ops, | ||
5896 | struct sock *sk, struct sk_buff *skb) | ||
5897 | { | ||
5898 | struct tcp_options_received tmp_opt; | ||
5899 | struct request_sock *req; | ||
5900 | struct tcp_sock *tp = tcp_sk(sk); | ||
5901 | struct dst_entry *dst = NULL; | ||
5902 | __u32 isn = TCP_SKB_CB(skb)->when; | ||
5903 | bool want_cookie = false, fastopen; | ||
5904 | struct flowi fl; | ||
5905 | struct tcp_fastopen_cookie foc = { .len = -1 }; | ||
5906 | int err; | ||
5907 | |||
5908 | |||
5909 | /* TW buckets are converted to open requests without | ||
5910 | * limitations, they conserve resources and peer is | ||
5911 | * evidently real one. | ||
5912 | */ | ||
5913 | if ((sysctl_tcp_syncookies == 2 || | ||
5914 | inet_csk_reqsk_queue_is_full(sk)) && !isn) { | ||
5915 | want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); | ||
5916 | if (!want_cookie) | ||
5917 | goto drop; | ||
5918 | } | ||
5919 | |||
5920 | |||
5921 | /* Accept backlog is full. If we have already queued enough | ||
5922 | * of warm entries in syn queue, drop request. It is better than | ||
5923 | * clogging syn queue with openreqs with exponentially increasing | ||
5924 | * timeout. | ||
5925 | */ | ||
5926 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { | ||
5927 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | ||
5928 | goto drop; | ||
5929 | } | ||
5930 | |||
5931 | req = inet_reqsk_alloc(rsk_ops); | ||
5932 | if (!req) | ||
5933 | goto drop; | ||
5934 | |||
5935 | tcp_rsk(req)->af_specific = af_ops; | ||
5936 | |||
5937 | tcp_clear_options(&tmp_opt); | ||
5938 | tmp_opt.mss_clamp = af_ops->mss_clamp; | ||
5939 | tmp_opt.user_mss = tp->rx_opt.user_mss; | ||
5940 | tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); | ||
5941 | |||
5942 | if (want_cookie && !tmp_opt.saw_tstamp) | ||
5943 | tcp_clear_options(&tmp_opt); | ||
5944 | |||
5945 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | ||
5946 | tcp_openreq_init(req, &tmp_opt, skb, sk); | ||
5947 | |||
5948 | af_ops->init_req(req, sk, skb); | ||
5949 | |||
5950 | if (security_inet_conn_request(sk, skb, req)) | ||
5951 | goto drop_and_free; | ||
5952 | |||
5953 | if (!want_cookie || tmp_opt.tstamp_ok) | ||
5954 | TCP_ECN_create_request(req, skb, sock_net(sk)); | ||
5955 | |||
5956 | if (want_cookie) { | ||
5957 | isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); | ||
5958 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
5959 | } else if (!isn) { | ||
5960 | /* VJ's idea. We save last timestamp seen | ||
5961 | * from the destination in peer table, when entering | ||
5962 | * state TIME-WAIT, and check against it before | ||
5963 | * accepting new connection request. | ||
5964 | * | ||
5965 | * If "isn" is not zero, this request hit alive | ||
5966 | * timewait bucket, so that all the necessary checks | ||
5967 | * are made in the function processing timewait state. | ||
5968 | */ | ||
5969 | if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { | ||
5970 | bool strict; | ||
5971 | |||
5972 | dst = af_ops->route_req(sk, &fl, req, &strict); | ||
5973 | if (dst && strict && | ||
5974 | !tcp_peer_is_proven(req, dst, true)) { | ||
5975 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | ||
5976 | goto drop_and_release; | ||
5977 | } | ||
5978 | } | ||
5979 | /* Kill the following clause, if you dislike this way. */ | ||
5980 | else if (!sysctl_tcp_syncookies && | ||
5981 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | ||
5982 | (sysctl_max_syn_backlog >> 2)) && | ||
5983 | !tcp_peer_is_proven(req, dst, false)) { | ||
5984 | /* Without syncookies last quarter of | ||
5985 | * backlog is filled with destinations, | ||
5986 | * proven to be alive. | ||
5987 | * It means that we continue to communicate | ||
5988 | * to destinations, already remembered | ||
5989 | * to the moment of synflood. | ||
5990 | */ | ||
5991 | pr_drop_req(req, ntohs(tcp_hdr(skb)->source), | ||
5992 | rsk_ops->family); | ||
5993 | goto drop_and_release; | ||
5994 | } | ||
5995 | |||
5996 | isn = af_ops->init_seq(skb); | ||
5997 | } | ||
5998 | if (!dst) { | ||
5999 | dst = af_ops->route_req(sk, &fl, req, NULL); | ||
6000 | if (!dst) | ||
6001 | goto drop_and_free; | ||
6002 | } | ||
6003 | |||
6004 | tcp_rsk(req)->snt_isn = isn; | ||
6005 | tcp_openreq_init_rwin(req, sk, dst); | ||
6006 | fastopen = !want_cookie && | ||
6007 | tcp_try_fastopen(sk, skb, req, &foc, dst); | ||
6008 | err = af_ops->send_synack(sk, dst, &fl, req, | ||
6009 | skb_get_queue_mapping(skb), &foc); | ||
6010 | if (!fastopen) { | ||
6011 | if (err || want_cookie) | ||
6012 | goto drop_and_free; | ||
6013 | |||
6014 | tcp_rsk(req)->listener = NULL; | ||
6015 | af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
6016 | } | ||
6017 | |||
6018 | return 0; | ||
6019 | |||
6020 | drop_and_release: | ||
6021 | dst_release(dst); | ||
6022 | drop_and_free: | ||
6023 | reqsk_free(req); | ||
6024 | drop: | ||
6025 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | ||
6026 | return 0; | ||
6027 | } | ||
6028 | EXPORT_SYMBOL(tcp_conn_request); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 180336d47df6..1edc739b9da5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, | |||
99 | struct inet_hashinfo tcp_hashinfo; | 99 | struct inet_hashinfo tcp_hashinfo; |
100 | EXPORT_SYMBOL(tcp_hashinfo); | 100 | EXPORT_SYMBOL(tcp_hashinfo); |
101 | 101 | ||
102 | static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) | 102 | static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) |
103 | { | 103 | { |
104 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, | 104 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, |
105 | ip_hdr(skb)->saddr, | 105 | ip_hdr(skb)->saddr, |
@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
208 | inet->inet_dport = usin->sin_port; | 208 | inet->inet_dport = usin->sin_port; |
209 | inet->inet_daddr = daddr; | 209 | inet->inet_daddr = daddr; |
210 | 210 | ||
211 | inet_set_txhash(sk); | ||
212 | |||
211 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 213 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
212 | if (inet_opt) | 214 | if (inet_opt) |
213 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; | 215 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
@@ -814,6 +816,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
814 | * socket. | 816 | * socket. |
815 | */ | 817 | */ |
816 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 818 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
819 | struct flowi *fl, | ||
817 | struct request_sock *req, | 820 | struct request_sock *req, |
818 | u16 queue_mapping, | 821 | u16 queue_mapping, |
819 | struct tcp_fastopen_cookie *foc) | 822 | struct tcp_fastopen_cookie *foc) |
@@ -837,24 +840,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
837 | ireq->ir_rmt_addr, | 840 | ireq->ir_rmt_addr, |
838 | ireq->opt); | 841 | ireq->opt); |
839 | err = net_xmit_eval(err); | 842 | err = net_xmit_eval(err); |
840 | if (!tcp_rsk(req)->snt_synack && !err) | ||
841 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
842 | } | 843 | } |
843 | 844 | ||
844 | return err; | 845 | return err; |
845 | } | 846 | } |
846 | 847 | ||
847 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) | ||
848 | { | ||
849 | int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); | ||
850 | |||
851 | if (!res) { | ||
852 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | ||
853 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
854 | } | ||
855 | return res; | ||
856 | } | ||
857 | |||
858 | /* | 848 | /* |
859 | * IPv4 request_sock destructor. | 849 | * IPv4 request_sock destructor. |
860 | */ | 850 | */ |
@@ -1237,160 +1227,68 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) | |||
1237 | 1227 | ||
1238 | #endif | 1228 | #endif |
1239 | 1229 | ||
1230 | static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, | ||
1231 | struct sk_buff *skb) | ||
1232 | { | ||
1233 | struct inet_request_sock *ireq = inet_rsk(req); | ||
1234 | |||
1235 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; | ||
1236 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; | ||
1237 | ireq->no_srccheck = inet_sk(sk)->transparent; | ||
1238 | ireq->opt = tcp_v4_save_options(skb); | ||
1239 | } | ||
1240 | |||
1241 | static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, | ||
1242 | const struct request_sock *req, | ||
1243 | bool *strict) | ||
1244 | { | ||
1245 | struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); | ||
1246 | |||
1247 | if (strict) { | ||
1248 | if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) | ||
1249 | *strict = true; | ||
1250 | else | ||
1251 | *strict = false; | ||
1252 | } | ||
1253 | |||
1254 | return dst; | ||
1255 | } | ||
1256 | |||
1240 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { | 1257 | struct request_sock_ops tcp_request_sock_ops __read_mostly = { |
1241 | .family = PF_INET, | 1258 | .family = PF_INET, |
1242 | .obj_size = sizeof(struct tcp_request_sock), | 1259 | .obj_size = sizeof(struct tcp_request_sock), |
1243 | .rtx_syn_ack = tcp_v4_rtx_synack, | 1260 | .rtx_syn_ack = tcp_rtx_synack, |
1244 | .send_ack = tcp_v4_reqsk_send_ack, | 1261 | .send_ack = tcp_v4_reqsk_send_ack, |
1245 | .destructor = tcp_v4_reqsk_destructor, | 1262 | .destructor = tcp_v4_reqsk_destructor, |
1246 | .send_reset = tcp_v4_send_reset, | 1263 | .send_reset = tcp_v4_send_reset, |
1247 | .syn_ack_timeout = tcp_syn_ack_timeout, | 1264 | .syn_ack_timeout = tcp_syn_ack_timeout, |
1248 | }; | 1265 | }; |
1249 | 1266 | ||
1250 | #ifdef CONFIG_TCP_MD5SIG | ||
1251 | static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | 1267 | static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { |
1268 | .mss_clamp = TCP_MSS_DEFAULT, | ||
1269 | #ifdef CONFIG_TCP_MD5SIG | ||
1252 | .md5_lookup = tcp_v4_reqsk_md5_lookup, | 1270 | .md5_lookup = tcp_v4_reqsk_md5_lookup, |
1253 | .calc_md5_hash = tcp_v4_md5_hash_skb, | 1271 | .calc_md5_hash = tcp_v4_md5_hash_skb, |
1254 | }; | ||
1255 | #endif | 1272 | #endif |
1273 | .init_req = tcp_v4_init_req, | ||
1274 | #ifdef CONFIG_SYN_COOKIES | ||
1275 | .cookie_init_seq = cookie_v4_init_sequence, | ||
1276 | #endif | ||
1277 | .route_req = tcp_v4_route_req, | ||
1278 | .init_seq = tcp_v4_init_sequence, | ||
1279 | .send_synack = tcp_v4_send_synack, | ||
1280 | .queue_hash_add = inet_csk_reqsk_queue_hash_add, | ||
1281 | }; | ||
1256 | 1282 | ||
1257 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1283 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1258 | { | 1284 | { |
1259 | struct tcp_options_received tmp_opt; | ||
1260 | struct request_sock *req; | ||
1261 | struct inet_request_sock *ireq; | ||
1262 | struct tcp_sock *tp = tcp_sk(sk); | ||
1263 | struct dst_entry *dst = NULL; | ||
1264 | __be32 saddr = ip_hdr(skb)->saddr; | ||
1265 | __be32 daddr = ip_hdr(skb)->daddr; | ||
1266 | __u32 isn = TCP_SKB_CB(skb)->when; | ||
1267 | bool want_cookie = false, fastopen; | ||
1268 | struct flowi4 fl4; | ||
1269 | struct tcp_fastopen_cookie foc = { .len = -1 }; | ||
1270 | int err; | ||
1271 | |||
1272 | /* Never answer to SYNs send to broadcast or multicast */ | 1285 | /* Never answer to SYNs send to broadcast or multicast */ |
1273 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1286 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
1274 | goto drop; | 1287 | goto drop; |
1275 | 1288 | ||
1276 | /* TW buckets are converted to open requests without | 1289 | return tcp_conn_request(&tcp_request_sock_ops, |
1277 | * limitations, they conserve resources and peer is | 1290 | &tcp_request_sock_ipv4_ops, sk, skb); |
1278 | * evidently real one. | ||
1279 | */ | ||
1280 | if ((sysctl_tcp_syncookies == 2 || | ||
1281 | inet_csk_reqsk_queue_is_full(sk)) && !isn) { | ||
1282 | want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); | ||
1283 | if (!want_cookie) | ||
1284 | goto drop; | ||
1285 | } | ||
1286 | |||
1287 | /* Accept backlog is full. If we have already queued enough | ||
1288 | * of warm entries in syn queue, drop request. It is better than | ||
1289 | * clogging syn queue with openreqs with exponentially increasing | ||
1290 | * timeout. | ||
1291 | */ | ||
1292 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { | ||
1293 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | ||
1294 | goto drop; | ||
1295 | } | ||
1296 | |||
1297 | req = inet_reqsk_alloc(&tcp_request_sock_ops); | ||
1298 | if (!req) | ||
1299 | goto drop; | ||
1300 | |||
1301 | #ifdef CONFIG_TCP_MD5SIG | ||
1302 | tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; | ||
1303 | #endif | ||
1304 | |||
1305 | tcp_clear_options(&tmp_opt); | ||
1306 | tmp_opt.mss_clamp = TCP_MSS_DEFAULT; | ||
1307 | tmp_opt.user_mss = tp->rx_opt.user_mss; | ||
1308 | tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); | ||
1309 | |||
1310 | if (want_cookie && !tmp_opt.saw_tstamp) | ||
1311 | tcp_clear_options(&tmp_opt); | ||
1312 | |||
1313 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | ||
1314 | tcp_openreq_init(req, &tmp_opt, skb, sk); | ||
1315 | |||
1316 | ireq = inet_rsk(req); | ||
1317 | ireq->ir_loc_addr = daddr; | ||
1318 | ireq->ir_rmt_addr = saddr; | ||
1319 | ireq->no_srccheck = inet_sk(sk)->transparent; | ||
1320 | ireq->opt = tcp_v4_save_options(skb); | ||
1321 | 1291 | ||
1322 | if (security_inet_conn_request(sk, skb, req)) | ||
1323 | goto drop_and_free; | ||
1324 | |||
1325 | if (!want_cookie || tmp_opt.tstamp_ok) | ||
1326 | TCP_ECN_create_request(req, skb, sock_net(sk)); | ||
1327 | |||
1328 | if (want_cookie) { | ||
1329 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | ||
1330 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
1331 | } else if (!isn) { | ||
1332 | /* VJ's idea. We save last timestamp seen | ||
1333 | * from the destination in peer table, when entering | ||
1334 | * state TIME-WAIT, and check against it before | ||
1335 | * accepting new connection request. | ||
1336 | * | ||
1337 | * If "isn" is not zero, this request hit alive | ||
1338 | * timewait bucket, so that all the necessary checks | ||
1339 | * are made in the function processing timewait state. | ||
1340 | */ | ||
1341 | if (tmp_opt.saw_tstamp && | ||
1342 | tcp_death_row.sysctl_tw_recycle && | ||
1343 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && | ||
1344 | fl4.daddr == saddr) { | ||
1345 | if (!tcp_peer_is_proven(req, dst, true)) { | ||
1346 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | ||
1347 | goto drop_and_release; | ||
1348 | } | ||
1349 | } | ||
1350 | /* Kill the following clause, if you dislike this way. */ | ||
1351 | else if (!sysctl_tcp_syncookies && | ||
1352 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | ||
1353 | (sysctl_max_syn_backlog >> 2)) && | ||
1354 | !tcp_peer_is_proven(req, dst, false)) { | ||
1355 | /* Without syncookies last quarter of | ||
1356 | * backlog is filled with destinations, | ||
1357 | * proven to be alive. | ||
1358 | * It means that we continue to communicate | ||
1359 | * to destinations, already remembered | ||
1360 | * to the moment of synflood. | ||
1361 | */ | ||
1362 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), | ||
1363 | &saddr, ntohs(tcp_hdr(skb)->source)); | ||
1364 | goto drop_and_release; | ||
1365 | } | ||
1366 | |||
1367 | isn = tcp_v4_init_sequence(skb); | ||
1368 | } | ||
1369 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | ||
1370 | goto drop_and_free; | ||
1371 | |||
1372 | tcp_rsk(req)->snt_isn = isn; | ||
1373 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1374 | tcp_openreq_init_rwin(req, sk, dst); | ||
1375 | fastopen = !want_cookie && | ||
1376 | tcp_try_fastopen(sk, skb, req, &foc, dst); | ||
1377 | err = tcp_v4_send_synack(sk, dst, req, | ||
1378 | skb_get_queue_mapping(skb), &foc); | ||
1379 | if (!fastopen) { | ||
1380 | if (err || want_cookie) | ||
1381 | goto drop_and_free; | ||
1382 | |||
1383 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1384 | tcp_rsk(req)->listener = NULL; | ||
1385 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1386 | } | ||
1387 | |||
1388 | return 0; | ||
1389 | |||
1390 | drop_and_release: | ||
1391 | dst_release(dst); | ||
1392 | drop_and_free: | ||
1393 | reqsk_free(req); | ||
1394 | drop: | 1292 | drop: |
1395 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 1293 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
1396 | return 0; | 1294 | return 0; |
@@ -1438,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1438 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 1336 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1439 | newinet->rcv_tos = ip_hdr(skb)->tos; | 1337 | newinet->rcv_tos = ip_hdr(skb)->tos; |
1440 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1338 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1339 | inet_set_txhash(newsk); | ||
1441 | if (inet_opt) | 1340 | if (inet_opt) |
1442 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; | 1341 | inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
1443 | newinet->inet_id = newtp->write_seq ^ jiffies; | 1342 | newinet->inet_id = newtp->write_seq ^ jiffies; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e68e0d4af6c9..1649988bd1b6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
298 | tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; | 298 | tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; |
299 | tw->tw_tclass = np->tclass; | 299 | tw->tw_tclass = np->tclass; |
300 | tw->tw_flowlabel = np->flow_label >> 12; | 300 | tw->tw_flowlabel = np->flow_label >> 12; |
301 | tw->tw_ipv6only = np->ipv6only; | 301 | tw->tw_ipv6only = sk->sk_ipv6only; |
302 | } | 302 | } |
303 | #endif | 303 | #endif |
304 | 304 | ||
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4e86c59ec7f7..55046ecd083e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c | |||
@@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) | |||
309 | 309 | ||
310 | th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, | 310 | th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, |
311 | iph->daddr, 0); | 311 | iph->daddr, 0); |
312 | skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | 312 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; |
313 | 313 | ||
314 | return tcp_gro_complete(skb); | 314 | return tcp_gro_complete(skb); |
315 | } | 315 | } |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d92bce0ea24e..8fcfc91964ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
916 | skb_orphan(skb); | 916 | skb_orphan(skb); |
917 | skb->sk = sk; | 917 | skb->sk = sk; |
918 | skb->destructor = tcp_wfree; | 918 | skb->destructor = tcp_wfree; |
919 | skb_set_hash_from_sk(skb, sk); | ||
919 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 920 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
920 | 921 | ||
921 | /* Build TCP header and checksum it. */ | 922 | /* Build TCP header and checksum it. */ |
@@ -978,7 +979,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
978 | if (likely(err <= 0)) | 979 | if (likely(err <= 0)) |
979 | return err; | 980 | return err; |
980 | 981 | ||
981 | tcp_enter_cwr(sk, 1); | 982 | tcp_enter_cwr(sk); |
982 | 983 | ||
983 | return net_xmit_eval(err); | 984 | return net_xmit_eval(err); |
984 | } | 985 | } |
@@ -2525,8 +2526,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2525 | if (!tp->retrans_stamp) | 2526 | if (!tp->retrans_stamp) |
2526 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; | 2527 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; |
2527 | 2528 | ||
2528 | tp->undo_retrans += tcp_skb_pcount(skb); | ||
2529 | |||
2530 | /* snd_nxt is stored to detect loss of retransmitted segment, | 2529 | /* snd_nxt is stored to detect loss of retransmitted segment, |
2531 | * see tcp_input.c tcp_sacktag_write_queue(). | 2530 | * see tcp_input.c tcp_sacktag_write_queue(). |
2532 | */ | 2531 | */ |
@@ -2534,6 +2533,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2534 | } else if (err != -EBUSY) { | 2533 | } else if (err != -EBUSY) { |
2535 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); | 2534 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); |
2536 | } | 2535 | } |
2536 | |||
2537 | if (tp->undo_retrans < 0) | ||
2538 | tp->undo_retrans = 0; | ||
2539 | tp->undo_retrans += tcp_skb_pcount(skb); | ||
2537 | return err; | 2540 | return err; |
2538 | } | 2541 | } |
2539 | 2542 | ||
@@ -3299,3 +3302,18 @@ void tcp_send_probe0(struct sock *sk) | |||
3299 | TCP_RTO_MAX); | 3302 | TCP_RTO_MAX); |
3300 | } | 3303 | } |
3301 | } | 3304 | } |
3305 | |||
3306 | int tcp_rtx_synack(struct sock *sk, struct request_sock *req) | ||
3307 | { | ||
3308 | const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; | ||
3309 | struct flowi fl; | ||
3310 | int res; | ||
3311 | |||
3312 | res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); | ||
3313 | if (!res) { | ||
3314 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | ||
3315 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); | ||
3316 | } | ||
3317 | return res; | ||
3318 | } | ||
3319 | EXPORT_SYMBOL(tcp_rtx_synack); | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d92f94b7e402..f57c0e4c2326 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -594,27 +594,6 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, | |||
594 | return true; | 594 | return true; |
595 | } | 595 | } |
596 | 596 | ||
597 | static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | ||
598 | __be16 loc_port, __be32 loc_addr, | ||
599 | __be16 rmt_port, __be32 rmt_addr, | ||
600 | int dif) | ||
601 | { | ||
602 | struct hlist_nulls_node *node; | ||
603 | struct sock *s = sk; | ||
604 | unsigned short hnum = ntohs(loc_port); | ||
605 | |||
606 | sk_nulls_for_each_from(s, node) { | ||
607 | if (__udp_is_mcast_sock(net, s, | ||
608 | loc_port, loc_addr, | ||
609 | rmt_port, rmt_addr, | ||
610 | dif, hnum)) | ||
611 | goto found; | ||
612 | } | ||
613 | s = NULL; | ||
614 | found: | ||
615 | return s; | ||
616 | } | ||
617 | |||
618 | /* | 597 | /* |
619 | * This routine is called by the ICMP module when it gets some | 598 | * This routine is called by the ICMP module when it gets some |
620 | * sort of error condition. If err < 0 then the socket should | 599 | * sort of error condition. If err < 0 then the socket should |
@@ -1588,8 +1567,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1588 | goto csum_error; | 1567 | goto csum_error; |
1589 | 1568 | ||
1590 | 1569 | ||
1591 | if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) | 1570 | if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { |
1571 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | ||
1572 | is_udplite); | ||
1592 | goto drop; | 1573 | goto drop; |
1574 | } | ||
1593 | 1575 | ||
1594 | rc = 0; | 1576 | rc = 0; |
1595 | 1577 | ||
@@ -1637,6 +1619,8 @@ static void flush_stack(struct sock **stack, unsigned int count, | |||
1637 | 1619 | ||
1638 | if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) | 1620 | if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) |
1639 | skb1 = NULL; | 1621 | skb1 = NULL; |
1622 | |||
1623 | sock_put(sk); | ||
1640 | } | 1624 | } |
1641 | if (unlikely(skb1)) | 1625 | if (unlikely(skb1)) |
1642 | kfree_skb(skb1); | 1626 | kfree_skb(skb1); |
@@ -1665,41 +1649,50 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1665 | struct udp_table *udptable) | 1649 | struct udp_table *udptable) |
1666 | { | 1650 | { |
1667 | struct sock *sk, *stack[256 / sizeof(struct sock *)]; | 1651 | struct sock *sk, *stack[256 / sizeof(struct sock *)]; |
1668 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); | 1652 | struct hlist_nulls_node *node; |
1669 | int dif; | 1653 | unsigned short hnum = ntohs(uh->dest); |
1670 | unsigned int i, count = 0; | 1654 | struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); |
1655 | int dif = skb->dev->ifindex; | ||
1656 | unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); | ||
1657 | unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); | ||
1658 | |||
1659 | if (use_hash2) { | ||
1660 | hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & | ||
1661 | udp_table.mask; | ||
1662 | hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask; | ||
1663 | start_lookup: | ||
1664 | hslot = &udp_table.hash2[hash2]; | ||
1665 | offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); | ||
1666 | } | ||
1671 | 1667 | ||
1672 | spin_lock(&hslot->lock); | 1668 | spin_lock(&hslot->lock); |
1673 | sk = sk_nulls_head(&hslot->head); | 1669 | sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { |
1674 | dif = skb->dev->ifindex; | 1670 | if (__udp_is_mcast_sock(net, sk, |
1675 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1671 | uh->dest, daddr, |
1676 | while (sk) { | 1672 | uh->source, saddr, |
1677 | stack[count++] = sk; | 1673 | dif, hnum)) { |
1678 | sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | 1674 | if (unlikely(count == ARRAY_SIZE(stack))) { |
1679 | daddr, uh->source, saddr, dif); | 1675 | flush_stack(stack, count, skb, ~0); |
1680 | if (unlikely(count == ARRAY_SIZE(stack))) { | 1676 | count = 0; |
1681 | if (!sk) | 1677 | } |
1682 | break; | 1678 | stack[count++] = sk; |
1683 | flush_stack(stack, count, skb, ~0); | 1679 | sock_hold(sk); |
1684 | count = 0; | ||
1685 | } | 1680 | } |
1686 | } | 1681 | } |
1687 | /* | ||
1688 | * before releasing chain lock, we must take a reference on sockets | ||
1689 | */ | ||
1690 | for (i = 0; i < count; i++) | ||
1691 | sock_hold(stack[i]); | ||
1692 | 1682 | ||
1693 | spin_unlock(&hslot->lock); | 1683 | spin_unlock(&hslot->lock); |
1694 | 1684 | ||
1685 | /* Also lookup *:port if we are using hash2 and haven't done so yet. */ | ||
1686 | if (use_hash2 && hash2 != hash2_any) { | ||
1687 | hash2 = hash2_any; | ||
1688 | goto start_lookup; | ||
1689 | } | ||
1690 | |||
1695 | /* | 1691 | /* |
1696 | * do the slow work with no lock held | 1692 | * do the slow work with no lock held |
1697 | */ | 1693 | */ |
1698 | if (count) { | 1694 | if (count) { |
1699 | flush_stack(stack, count, skb, count - 1); | 1695 | flush_stack(stack, count, skb, count - 1); |
1700 | |||
1701 | for (i = 0; i < count; i++) | ||
1702 | sock_put(stack[i]); | ||
1703 | } else { | 1696 | } else { |
1704 | kfree_skb(skb); | 1697 | kfree_skb(skb); |
1705 | } | 1698 | } |
@@ -2523,79 +2516,3 @@ void __init udp_init(void) | |||
2523 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; | 2516 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; |
2524 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | 2517 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; |
2525 | } | 2518 | } |
2526 | |||
2527 | struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | ||
2528 | netdev_features_t features) | ||
2529 | { | ||
2530 | struct sk_buff *segs = ERR_PTR(-EINVAL); | ||
2531 | u16 mac_offset = skb->mac_header; | ||
2532 | int mac_len = skb->mac_len; | ||
2533 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | ||
2534 | __be16 protocol = skb->protocol; | ||
2535 | netdev_features_t enc_features; | ||
2536 | int udp_offset, outer_hlen; | ||
2537 | unsigned int oldlen; | ||
2538 | bool need_csum; | ||
2539 | |||
2540 | oldlen = (u16)~skb->len; | ||
2541 | |||
2542 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | ||
2543 | goto out; | ||
2544 | |||
2545 | skb->encapsulation = 0; | ||
2546 | __skb_pull(skb, tnl_hlen); | ||
2547 | skb_reset_mac_header(skb); | ||
2548 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | ||
2549 | skb->mac_len = skb_inner_network_offset(skb); | ||
2550 | skb->protocol = htons(ETH_P_TEB); | ||
2551 | |||
2552 | need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); | ||
2553 | if (need_csum) | ||
2554 | skb->encap_hdr_csum = 1; | ||
2555 | |||
2556 | /* segment inner packet. */ | ||
2557 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | ||
2558 | segs = skb_mac_gso_segment(skb, enc_features); | ||
2559 | if (!segs || IS_ERR(segs)) { | ||
2560 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, | ||
2561 | mac_len); | ||
2562 | goto out; | ||
2563 | } | ||
2564 | |||
2565 | outer_hlen = skb_tnl_header_len(skb); | ||
2566 | udp_offset = outer_hlen - tnl_hlen; | ||
2567 | skb = segs; | ||
2568 | do { | ||
2569 | struct udphdr *uh; | ||
2570 | int len; | ||
2571 | |||
2572 | skb_reset_inner_headers(skb); | ||
2573 | skb->encapsulation = 1; | ||
2574 | |||
2575 | skb->mac_len = mac_len; | ||
2576 | |||
2577 | skb_push(skb, outer_hlen); | ||
2578 | skb_reset_mac_header(skb); | ||
2579 | skb_set_network_header(skb, mac_len); | ||
2580 | skb_set_transport_header(skb, udp_offset); | ||
2581 | len = skb->len - udp_offset; | ||
2582 | uh = udp_hdr(skb); | ||
2583 | uh->len = htons(len); | ||
2584 | |||
2585 | if (need_csum) { | ||
2586 | __be32 delta = htonl(oldlen + len); | ||
2587 | |||
2588 | uh->check = ~csum_fold((__force __wsum) | ||
2589 | ((__force u32)uh->check + | ||
2590 | (__force u32)delta)); | ||
2591 | uh->check = gso_make_checksum(skb, ~uh->check); | ||
2592 | |||
2593 | if (uh->check == 0) | ||
2594 | uh->check = CSUM_MANGLED_0; | ||
2595 | } | ||
2596 | |||
2597 | skb->protocol = protocol; | ||
2598 | } while ((skb = skb->next)); | ||
2599 | out: | ||
2600 | return segs; | ||
2601 | } | ||
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 546d2d439dda..59035bc3008d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
@@ -47,6 +47,82 @@ static int udp4_ufo_send_check(struct sk_buff *skb) | |||
47 | return 0; | 47 | return 0; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, | ||
51 | netdev_features_t features) | ||
52 | { | ||
53 | struct sk_buff *segs = ERR_PTR(-EINVAL); | ||
54 | u16 mac_offset = skb->mac_header; | ||
55 | int mac_len = skb->mac_len; | ||
56 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | ||
57 | __be16 protocol = skb->protocol; | ||
58 | netdev_features_t enc_features; | ||
59 | int udp_offset, outer_hlen; | ||
60 | unsigned int oldlen; | ||
61 | bool need_csum; | ||
62 | |||
63 | oldlen = (u16)~skb->len; | ||
64 | |||
65 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | ||
66 | goto out; | ||
67 | |||
68 | skb->encapsulation = 0; | ||
69 | __skb_pull(skb, tnl_hlen); | ||
70 | skb_reset_mac_header(skb); | ||
71 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | ||
72 | skb->mac_len = skb_inner_network_offset(skb); | ||
73 | skb->protocol = htons(ETH_P_TEB); | ||
74 | |||
75 | need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); | ||
76 | if (need_csum) | ||
77 | skb->encap_hdr_csum = 1; | ||
78 | |||
79 | /* segment inner packet. */ | ||
80 | enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); | ||
81 | segs = skb_mac_gso_segment(skb, enc_features); | ||
82 | if (IS_ERR_OR_NULL(segs)) { | ||
83 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, | ||
84 | mac_len); | ||
85 | goto out; | ||
86 | } | ||
87 | |||
88 | outer_hlen = skb_tnl_header_len(skb); | ||
89 | udp_offset = outer_hlen - tnl_hlen; | ||
90 | skb = segs; | ||
91 | do { | ||
92 | struct udphdr *uh; | ||
93 | int len; | ||
94 | |||
95 | skb_reset_inner_headers(skb); | ||
96 | skb->encapsulation = 1; | ||
97 | |||
98 | skb->mac_len = mac_len; | ||
99 | |||
100 | skb_push(skb, outer_hlen); | ||
101 | skb_reset_mac_header(skb); | ||
102 | skb_set_network_header(skb, mac_len); | ||
103 | skb_set_transport_header(skb, udp_offset); | ||
104 | len = skb->len - udp_offset; | ||
105 | uh = udp_hdr(skb); | ||
106 | uh->len = htons(len); | ||
107 | |||
108 | if (need_csum) { | ||
109 | __be32 delta = htonl(oldlen + len); | ||
110 | |||
111 | uh->check = ~csum_fold((__force __wsum) | ||
112 | ((__force u32)uh->check + | ||
113 | (__force u32)delta)); | ||
114 | uh->check = gso_make_checksum(skb, ~uh->check); | ||
115 | |||
116 | if (uh->check == 0) | ||
117 | uh->check = CSUM_MANGLED_0; | ||
118 | } | ||
119 | |||
120 | skb->protocol = protocol; | ||
121 | } while ((skb = skb->next)); | ||
122 | out: | ||
123 | return segs; | ||
124 | } | ||
125 | |||
50 | static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | 126 | static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, |
51 | netdev_features_t features) | 127 | netdev_features_t features) |
52 | { | 128 | { |
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c new file mode 100644 index 000000000000..61ec1a65207e --- /dev/null +++ b/net/ipv4/udp_tunnel.c | |||
@@ -0,0 +1,100 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/socket.h> | ||
4 | #include <linux/udp.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <net/udp.h> | ||
8 | #include <net/udp_tunnel.h> | ||
9 | #include <net/net_namespace.h> | ||
10 | |||
11 | int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, | ||
12 | struct socket **sockp) | ||
13 | { | ||
14 | int err = -EINVAL; | ||
15 | struct socket *sock = NULL; | ||
16 | |||
17 | #if IS_ENABLED(CONFIG_IPV6) | ||
18 | if (cfg->family == AF_INET6) { | ||
19 | struct sockaddr_in6 udp6_addr; | ||
20 | |||
21 | err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); | ||
22 | if (err < 0) | ||
23 | goto error; | ||
24 | |||
25 | sk_change_net(sock->sk, net); | ||
26 | |||
27 | udp6_addr.sin6_family = AF_INET6; | ||
28 | memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, | ||
29 | sizeof(udp6_addr.sin6_addr)); | ||
30 | udp6_addr.sin6_port = cfg->local_udp_port; | ||
31 | err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, | ||
32 | sizeof(udp6_addr)); | ||
33 | if (err < 0) | ||
34 | goto error; | ||
35 | |||
36 | if (cfg->peer_udp_port) { | ||
37 | udp6_addr.sin6_family = AF_INET6; | ||
38 | memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, | ||
39 | sizeof(udp6_addr.sin6_addr)); | ||
40 | udp6_addr.sin6_port = cfg->peer_udp_port; | ||
41 | err = kernel_connect(sock, | ||
42 | (struct sockaddr *)&udp6_addr, | ||
43 | sizeof(udp6_addr), 0); | ||
44 | } | ||
45 | if (err < 0) | ||
46 | goto error; | ||
47 | |||
48 | udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); | ||
49 | udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); | ||
50 | } else | ||
51 | #endif | ||
52 | if (cfg->family == AF_INET) { | ||
53 | struct sockaddr_in udp_addr; | ||
54 | |||
55 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); | ||
56 | if (err < 0) | ||
57 | goto error; | ||
58 | |||
59 | sk_change_net(sock->sk, net); | ||
60 | |||
61 | udp_addr.sin_family = AF_INET; | ||
62 | udp_addr.sin_addr = cfg->local_ip; | ||
63 | udp_addr.sin_port = cfg->local_udp_port; | ||
64 | err = kernel_bind(sock, (struct sockaddr *)&udp_addr, | ||
65 | sizeof(udp_addr)); | ||
66 | if (err < 0) | ||
67 | goto error; | ||
68 | |||
69 | if (cfg->peer_udp_port) { | ||
70 | udp_addr.sin_family = AF_INET; | ||
71 | udp_addr.sin_addr = cfg->peer_ip; | ||
72 | udp_addr.sin_port = cfg->peer_udp_port; | ||
73 | err = kernel_connect(sock, | ||
74 | (struct sockaddr *)&udp_addr, | ||
75 | sizeof(udp_addr), 0); | ||
76 | if (err < 0) | ||
77 | goto error; | ||
78 | } | ||
79 | |||
80 | sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; | ||
81 | } else { | ||
82 | return -EPFNOSUPPORT; | ||
83 | } | ||
84 | |||
85 | |||
86 | *sockp = sock; | ||
87 | |||
88 | return 0; | ||
89 | |||
90 | error: | ||
91 | if (sock) { | ||
92 | kernel_sock_shutdown(sock, SHUT_RDWR); | ||
93 | sk_release_kernel(sock->sk); | ||
94 | } | ||
95 | *sockp = NULL; | ||
96 | return err; | ||
97 | } | ||
98 | EXPORT_SYMBOL(udp_sock_create); | ||
99 | |||
100 | MODULE_LICENSE("GPL"); | ||