diff options
Diffstat (limited to 'net/ipv4')
63 files changed, 2021 insertions, 4121 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe4582ca969a..766c59658563 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -212,6 +212,26 @@ int inet_listen(struct socket *sock, int backlog) | |||
212 | * we can only allow the backlog to be adjusted. | 212 | * we can only allow the backlog to be adjusted. |
213 | */ | 213 | */ |
214 | if (old_state != TCP_LISTEN) { | 214 | if (old_state != TCP_LISTEN) { |
215 | /* Check special setups for testing purpose to enable TFO w/o | ||
216 | * requiring TCP_FASTOPEN sockopt. | ||
217 | * Note that only TCP sockets (SOCK_STREAM) will reach here. | ||
218 | * Also fastopenq may already been allocated because this | ||
219 | * socket was in TCP_LISTEN state previously but was | ||
220 | * shutdown() (rather than close()). | ||
221 | */ | ||
222 | if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && | ||
223 | inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { | ||
224 | if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) | ||
225 | err = fastopen_init_queue(sk, backlog); | ||
226 | else if ((sysctl_tcp_fastopen & | ||
227 | TFO_SERVER_WO_SOCKOPT2) != 0) | ||
228 | err = fastopen_init_queue(sk, | ||
229 | ((uint)sysctl_tcp_fastopen) >> 16); | ||
230 | else | ||
231 | err = 0; | ||
232 | if (err) | ||
233 | goto out; | ||
234 | } | ||
215 | err = inet_csk_listen_start(sk, backlog); | 235 | err = inet_csk_listen_start(sk, backlog); |
216 | if (err) | 236 | if (err) |
217 | goto out; | 237 | goto out; |
@@ -701,7 +721,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) | |||
701 | 721 | ||
702 | sock_rps_record_flow(sk2); | 722 | sock_rps_record_flow(sk2); |
703 | WARN_ON(!((1 << sk2->sk_state) & | 723 | WARN_ON(!((1 << sk2->sk_state) & |
704 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); | 724 | (TCPF_ESTABLISHED | TCPF_SYN_RECV | |
725 | TCPF_CLOSE_WAIT | TCPF_CLOSE))); | ||
705 | 726 | ||
706 | sock_graft(sk2, newsock); | 727 | sock_graft(sk2, newsock); |
707 | 728 | ||
@@ -1364,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1364 | if (*(u8 *)iph != 0x45) | 1385 | if (*(u8 *)iph != 0x45) |
1365 | goto out_unlock; | 1386 | goto out_unlock; |
1366 | 1387 | ||
1367 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) | 1388 | if (unlikely(ip_fast_csum((u8 *)iph, 5))) |
1368 | goto out_unlock; | 1389 | goto out_unlock; |
1369 | 1390 | ||
1370 | id = ntohl(*(__be32 *)&iph->id); | 1391 | id = ntohl(*(__be32 *)&iph->id); |
@@ -1380,7 +1401,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1380 | iph2 = ip_hdr(p); | 1401 | iph2 = ip_hdr(p); |
1381 | 1402 | ||
1382 | if ((iph->protocol ^ iph2->protocol) | | 1403 | if ((iph->protocol ^ iph2->protocol) | |
1383 | (iph->tos ^ iph2->tos) | | ||
1384 | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | | 1404 | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | |
1385 | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { | 1405 | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { |
1386 | NAPI_GRO_CB(p)->same_flow = 0; | 1406 | NAPI_GRO_CB(p)->same_flow = 0; |
@@ -1390,6 +1410,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1390 | /* All fields must match except length and checksum. */ | 1410 | /* All fields must match except length and checksum. */ |
1391 | NAPI_GRO_CB(p)->flush |= | 1411 | NAPI_GRO_CB(p)->flush |= |
1392 | (iph->ttl ^ iph2->ttl) | | 1412 | (iph->ttl ^ iph2->ttl) | |
1413 | (iph->tos ^ iph2->tos) | | ||
1393 | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); | 1414 | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); |
1394 | 1415 | ||
1395 | NAPI_GRO_CB(p)->flush |= flush; | 1416 | NAPI_GRO_CB(p)->flush |= flush; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e12fad773852..2a6abc163ed2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { | |||
94 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | 94 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, |
95 | }; | 95 | }; |
96 | 96 | ||
97 | /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE | 97 | #define IN4_ADDR_HSIZE_SHIFT 8 |
98 | * value. So if you change this define, make appropriate changes to | 98 | #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) |
99 | * inet_addr_hash as well. | 99 | |
100 | */ | ||
101 | #define IN4_ADDR_HSIZE 256 | ||
102 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; | 100 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; |
103 | static DEFINE_SPINLOCK(inet_addr_hash_lock); | 101 | static DEFINE_SPINLOCK(inet_addr_hash_lock); |
104 | 102 | ||
105 | static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) | 103 | static u32 inet_addr_hash(struct net *net, __be32 addr) |
106 | { | 104 | { |
107 | u32 val = (__force u32) addr ^ hash_ptr(net, 8); | 105 | u32 val = (__force u32) addr ^ net_hash_mix(net); |
108 | 106 | ||
109 | return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & | 107 | return hash_32(val, IN4_ADDR_HSIZE_SHIFT); |
110 | (IN4_ADDR_HSIZE - 1)); | ||
111 | } | 108 | } |
112 | 109 | ||
113 | static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) | 110 | static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) |
114 | { | 111 | { |
115 | unsigned int hash = inet_addr_hash(net, ifa->ifa_local); | 112 | u32 hash = inet_addr_hash(net, ifa->ifa_local); |
116 | 113 | ||
117 | spin_lock(&inet_addr_hash_lock); | 114 | spin_lock(&inet_addr_hash_lock); |
118 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); | 115 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); |
@@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa) | |||
136 | */ | 133 | */ |
137 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | 134 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) |
138 | { | 135 | { |
139 | unsigned int hash = inet_addr_hash(net, addr); | 136 | u32 hash = inet_addr_hash(net, addr); |
140 | struct net_device *result = NULL; | 137 | struct net_device *result = NULL; |
141 | struct in_ifaddr *ifa; | 138 | struct in_ifaddr *ifa; |
142 | struct hlist_node *node; | 139 | struct hlist_node *node; |
143 | 140 | ||
144 | rcu_read_lock(); | 141 | rcu_read_lock(); |
145 | hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { | 142 | hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { |
146 | struct net_device *dev = ifa->ifa_dev->dev; | ||
147 | |||
148 | if (!net_eq(dev_net(dev), net)) | ||
149 | continue; | ||
150 | if (ifa->ifa_local == addr) { | 143 | if (ifa->ifa_local == addr) { |
144 | struct net_device *dev = ifa->ifa_dev->dev; | ||
145 | |||
146 | if (!net_eq(dev_net(dev), net)) | ||
147 | continue; | ||
151 | result = dev; | 148 | result = dev; |
152 | break; | 149 | break; |
153 | } | 150 | } |
@@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
182 | static void devinet_sysctl_register(struct in_device *idev); | 179 | static void devinet_sysctl_register(struct in_device *idev); |
183 | static void devinet_sysctl_unregister(struct in_device *idev); | 180 | static void devinet_sysctl_unregister(struct in_device *idev); |
184 | #else | 181 | #else |
185 | static inline void devinet_sysctl_register(struct in_device *idev) | 182 | static void devinet_sysctl_register(struct in_device *idev) |
186 | { | 183 | { |
187 | } | 184 | } |
188 | static inline void devinet_sysctl_unregister(struct in_device *idev) | 185 | static void devinet_sysctl_unregister(struct in_device *idev) |
189 | { | 186 | { |
190 | } | 187 | } |
191 | #endif | 188 | #endif |
@@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head) | |||
205 | kfree(ifa); | 202 | kfree(ifa); |
206 | } | 203 | } |
207 | 204 | ||
208 | static inline void inet_free_ifa(struct in_ifaddr *ifa) | 205 | static void inet_free_ifa(struct in_ifaddr *ifa) |
209 | { | 206 | { |
210 | call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); | 207 | call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); |
211 | } | 208 | } |
@@ -314,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) | |||
314 | } | 311 | } |
315 | 312 | ||
316 | static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | 313 | static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
317 | int destroy, struct nlmsghdr *nlh, u32 pid) | 314 | int destroy, struct nlmsghdr *nlh, u32 portid) |
318 | { | 315 | { |
319 | struct in_ifaddr *promote = NULL; | 316 | struct in_ifaddr *promote = NULL; |
320 | struct in_ifaddr *ifa, *ifa1 = *ifap; | 317 | struct in_ifaddr *ifa, *ifa1 = *ifap; |
@@ -348,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
348 | inet_hash_remove(ifa); | 345 | inet_hash_remove(ifa); |
349 | *ifap1 = ifa->ifa_next; | 346 | *ifap1 = ifa->ifa_next; |
350 | 347 | ||
351 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); | 348 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); |
352 | blocking_notifier_call_chain(&inetaddr_chain, | 349 | blocking_notifier_call_chain(&inetaddr_chain, |
353 | NETDEV_DOWN, ifa); | 350 | NETDEV_DOWN, ifa); |
354 | inet_free_ifa(ifa); | 351 | inet_free_ifa(ifa); |
@@ -385,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
385 | is valid, it will try to restore deleted routes... Grr. | 382 | is valid, it will try to restore deleted routes... Grr. |
386 | So that, this order is correct. | 383 | So that, this order is correct. |
387 | */ | 384 | */ |
388 | rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); | 385 | rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); |
389 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); | 386 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); |
390 | 387 | ||
391 | if (promote) { | 388 | if (promote) { |
@@ -398,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
398 | } | 395 | } |
399 | 396 | ||
400 | promote->ifa_flags &= ~IFA_F_SECONDARY; | 397 | promote->ifa_flags &= ~IFA_F_SECONDARY; |
401 | rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); | 398 | rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); |
402 | blocking_notifier_call_chain(&inetaddr_chain, | 399 | blocking_notifier_call_chain(&inetaddr_chain, |
403 | NETDEV_UP, promote); | 400 | NETDEV_UP, promote); |
404 | for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { | 401 | for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { |
@@ -420,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
420 | } | 417 | } |
421 | 418 | ||
422 | static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | 419 | static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, |
423 | u32 pid) | 420 | u32 portid) |
424 | { | 421 | { |
425 | struct in_device *in_dev = ifa->ifa_dev; | 422 | struct in_device *in_dev = ifa->ifa_dev; |
426 | struct in_ifaddr *ifa1, **ifap, **last_primary; | 423 | struct in_ifaddr *ifa1, **ifap, **last_primary; |
@@ -467,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
467 | /* Send message first, then call notifier. | 464 | /* Send message first, then call notifier. |
468 | Notifier will trigger FIB update, so that | 465 | Notifier will trigger FIB update, so that |
469 | listeners of netlink will know about new ifaddr */ | 466 | listeners of netlink will know about new ifaddr */ |
470 | rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); | 467 | rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); |
471 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); | 468 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); |
472 | 469 | ||
473 | return 0; | 470 | return 0; |
@@ -566,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
566 | !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) | 563 | !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) |
567 | continue; | 564 | continue; |
568 | 565 | ||
569 | __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); | 566 | __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); |
570 | return 0; | 567 | return 0; |
571 | } | 568 | } |
572 | 569 | ||
@@ -652,14 +649,14 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
652 | if (IS_ERR(ifa)) | 649 | if (IS_ERR(ifa)) |
653 | return PTR_ERR(ifa); | 650 | return PTR_ERR(ifa); |
654 | 651 | ||
655 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); | 652 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); |
656 | } | 653 | } |
657 | 654 | ||
658 | /* | 655 | /* |
659 | * Determine a default network mask, based on the IP address. | 656 | * Determine a default network mask, based on the IP address. |
660 | */ | 657 | */ |
661 | 658 | ||
662 | static inline int inet_abc_len(__be32 addr) | 659 | static int inet_abc_len(__be32 addr) |
663 | { | 660 | { |
664 | int rc = -1; /* Something else, probably a multicast. */ | 661 | int rc = -1; /* Something else, probably a multicast. */ |
665 | 662 | ||
@@ -1124,7 +1121,7 @@ skip: | |||
1124 | } | 1121 | } |
1125 | } | 1122 | } |
1126 | 1123 | ||
1127 | static inline bool inetdev_valid_mtu(unsigned int mtu) | 1124 | static bool inetdev_valid_mtu(unsigned int mtu) |
1128 | { | 1125 | { |
1129 | return mtu >= 68; | 1126 | return mtu >= 68; |
1130 | } | 1127 | } |
@@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = { | |||
1239 | .notifier_call = inetdev_event, | 1236 | .notifier_call = inetdev_event, |
1240 | }; | 1237 | }; |
1241 | 1238 | ||
1242 | static inline size_t inet_nlmsg_size(void) | 1239 | static size_t inet_nlmsg_size(void) |
1243 | { | 1240 | { |
1244 | return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) | 1241 | return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) |
1245 | + nla_total_size(4) /* IFA_ADDRESS */ | 1242 | + nla_total_size(4) /* IFA_ADDRESS */ |
@@ -1249,12 +1246,12 @@ static inline size_t inet_nlmsg_size(void) | |||
1249 | } | 1246 | } |
1250 | 1247 | ||
1251 | static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, | 1248 | static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, |
1252 | u32 pid, u32 seq, int event, unsigned int flags) | 1249 | u32 portid, u32 seq, int event, unsigned int flags) |
1253 | { | 1250 | { |
1254 | struct ifaddrmsg *ifm; | 1251 | struct ifaddrmsg *ifm; |
1255 | struct nlmsghdr *nlh; | 1252 | struct nlmsghdr *nlh; |
1256 | 1253 | ||
1257 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); | 1254 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); |
1258 | if (nlh == NULL) | 1255 | if (nlh == NULL) |
1259 | return -EMSGSIZE; | 1256 | return -EMSGSIZE; |
1260 | 1257 | ||
@@ -1316,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
1316 | if (ip_idx < s_ip_idx) | 1313 | if (ip_idx < s_ip_idx) |
1317 | continue; | 1314 | continue; |
1318 | if (inet_fill_ifaddr(skb, ifa, | 1315 | if (inet_fill_ifaddr(skb, ifa, |
1319 | NETLINK_CB(cb->skb).pid, | 1316 | NETLINK_CB(cb->skb).portid, |
1320 | cb->nlh->nlmsg_seq, | 1317 | cb->nlh->nlmsg_seq, |
1321 | RTM_NEWADDR, NLM_F_MULTI) <= 0) { | 1318 | RTM_NEWADDR, NLM_F_MULTI) <= 0) { |
1322 | rcu_read_unlock(); | 1319 | rcu_read_unlock(); |
@@ -1338,7 +1335,7 @@ done: | |||
1338 | } | 1335 | } |
1339 | 1336 | ||
1340 | static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, | 1337 | static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, |
1341 | u32 pid) | 1338 | u32 portid) |
1342 | { | 1339 | { |
1343 | struct sk_buff *skb; | 1340 | struct sk_buff *skb; |
1344 | u32 seq = nlh ? nlh->nlmsg_seq : 0; | 1341 | u32 seq = nlh ? nlh->nlmsg_seq : 0; |
@@ -1350,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
1350 | if (skb == NULL) | 1347 | if (skb == NULL) |
1351 | goto errout; | 1348 | goto errout; |
1352 | 1349 | ||
1353 | err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); | 1350 | err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); |
1354 | if (err < 0) { | 1351 | if (err < 0) { |
1355 | /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ | 1352 | /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ |
1356 | WARN_ON(err == -EMSGSIZE); | 1353 | WARN_ON(err == -EMSGSIZE); |
1357 | kfree_skb(skb); | 1354 | kfree_skb(skb); |
1358 | goto errout; | 1355 | goto errout; |
1359 | } | 1356 | } |
1360 | rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); | 1357 | rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); |
1361 | return; | 1358 | return; |
1362 | errout: | 1359 | errout: |
1363 | if (err < 0) | 1360 | if (err < 0) |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8e2b475da9fa..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) | |||
218 | scope = RT_SCOPE_UNIVERSE; | 218 | scope = RT_SCOPE_UNIVERSE; |
219 | if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { | 219 | if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { |
220 | fl4.flowi4_oif = 0; | 220 | fl4.flowi4_oif = 0; |
221 | fl4.flowi4_iif = net->loopback_dev->ifindex; | 221 | fl4.flowi4_iif = LOOPBACK_IFINDEX; |
222 | fl4.daddr = ip_hdr(skb)->saddr; | 222 | fl4.daddr = ip_hdr(skb)->saddr; |
223 | fl4.saddr = 0; | 223 | fl4.saddr = 0; |
224 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | 224 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); |
@@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, | |||
557 | cfg->fc_flags = rtm->rtm_flags; | 557 | cfg->fc_flags = rtm->rtm_flags; |
558 | cfg->fc_nlflags = nlh->nlmsg_flags; | 558 | cfg->fc_nlflags = nlh->nlmsg_flags; |
559 | 559 | ||
560 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; | 560 | cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; |
561 | cfg->fc_nlinfo.nlh = nlh; | 561 | cfg->fc_nlinfo.nlh = nlh; |
562 | cfg->fc_nlinfo.nl_net = net; | 562 | cfg->fc_nlinfo.nl_net = net; |
563 | 563 | ||
@@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) | |||
955 | struct fib_result_nl *frn; | 955 | struct fib_result_nl *frn; |
956 | struct nlmsghdr *nlh; | 956 | struct nlmsghdr *nlh; |
957 | struct fib_table *tb; | 957 | struct fib_table *tb; |
958 | u32 pid; | 958 | u32 portid; |
959 | 959 | ||
960 | net = sock_net(skb->sk); | 960 | net = sock_net(skb->sk); |
961 | nlh = nlmsg_hdr(skb); | 961 | nlh = nlmsg_hdr(skb); |
@@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) | |||
973 | 973 | ||
974 | nl_fib_lookup(frn, tb); | 974 | nl_fib_lookup(frn, tb); |
975 | 975 | ||
976 | pid = NETLINK_CB(skb).pid; /* pid of sending process */ | 976 | portid = NETLINK_CB(skb).portid; /* pid of sending process */ |
977 | NETLINK_CB(skb).pid = 0; /* from kernel */ | 977 | NETLINK_CB(skb).portid = 0; /* from kernel */ |
978 | NETLINK_CB(skb).dst_group = 0; /* unicast */ | 978 | NETLINK_CB(skb).dst_group = 0; /* unicast */ |
979 | netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); | 979 | netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); |
980 | } | 980 | } |
981 | 981 | ||
982 | static int __net_init nl_fib_lookup_init(struct net *net) | 982 | static int __net_init nl_fib_lookup_init(struct net *net) |
@@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net) | |||
986 | .input = nl_fib_input, | 986 | .input = nl_fib_input, |
987 | }; | 987 | }; |
988 | 988 | ||
989 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg); | 989 | sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg); |
990 | if (sk == NULL) | 990 | if (sk == NULL) |
991 | return -EAFNOSUPPORT; | 991 | return -EAFNOSUPPORT; |
992 | net->ipv4.fibnl = sk; | 992 | net->ipv4.fibnl = sk; |
@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
1041 | static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) | 1041 | static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) |
1042 | { | 1042 | { |
1043 | struct net_device *dev = ptr; | 1043 | struct net_device *dev = ptr; |
1044 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1044 | struct in_device *in_dev; |
1045 | struct net *net = dev_net(dev); | 1045 | struct net *net = dev_net(dev); |
1046 | 1046 | ||
1047 | if (event == NETDEV_UNREGISTER) { | 1047 | if (event == NETDEV_UNREGISTER) { |
@@ -1050,8 +1050,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1050 | return NOTIFY_DONE; | 1050 | return NOTIFY_DONE; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | if (!in_dev) | 1053 | in_dev = __in_dev_get_rtnl(dev); |
1054 | return NOTIFY_DONE; | ||
1055 | 1054 | ||
1056 | switch (event) { | 1055 | switch (event) { |
1057 | case NETDEV_UP: | 1056 | case NETDEV_UP: |
@@ -1062,16 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1062 | fib_sync_up(dev); | 1061 | fib_sync_up(dev); |
1063 | #endif | 1062 | #endif |
1064 | atomic_inc(&net->ipv4.dev_addr_genid); | 1063 | atomic_inc(&net->ipv4.dev_addr_genid); |
1065 | rt_cache_flush(dev_net(dev)); | 1064 | rt_cache_flush(net); |
1066 | break; | 1065 | break; |
1067 | case NETDEV_DOWN: | 1066 | case NETDEV_DOWN: |
1068 | fib_disable_ip(dev, 0); | 1067 | fib_disable_ip(dev, 0); |
1069 | break; | 1068 | break; |
1070 | case NETDEV_CHANGEMTU: | 1069 | case NETDEV_CHANGEMTU: |
1071 | case NETDEV_CHANGE: | 1070 | case NETDEV_CHANGE: |
1072 | rt_cache_flush(dev_net(dev)); | 1071 | rt_cache_flush(net); |
1073 | break; | ||
1074 | case NETDEV_UNREGISTER_BATCH: | ||
1075 | break; | 1072 | break; |
1076 | } | 1073 | } |
1077 | return NOTIFY_DONE; | 1074 | return NOTIFY_DONE; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da80dc14cc76..3509065e409a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | |||
391 | if (skb == NULL) | 391 | if (skb == NULL) |
392 | goto errout; | 392 | goto errout; |
393 | 393 | ||
394 | err = fib_dump_info(skb, info->pid, seq, event, tb_id, | 394 | err = fib_dump_info(skb, info->portid, seq, event, tb_id, |
395 | fa->fa_type, key, dst_len, | 395 | fa->fa_type, key, dst_len, |
396 | fa->fa_tos, fa->fa_info, nlm_flags); | 396 | fa->fa_tos, fa->fa_info, nlm_flags); |
397 | if (err < 0) { | 397 | if (err < 0) { |
@@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | |||
400 | kfree_skb(skb); | 400 | kfree_skb(skb); |
401 | goto errout; | 401 | goto errout; |
402 | } | 402 | } |
403 | rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, | 403 | rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, |
404 | info->nlh, GFP_KERNEL); | 404 | info->nlh, GFP_KERNEL); |
405 | return; | 405 | return; |
406 | errout: | 406 | errout: |
@@ -989,14 +989,14 @@ failure: | |||
989 | return ERR_PTR(err); | 989 | return ERR_PTR(err); |
990 | } | 990 | } |
991 | 991 | ||
992 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 992 | int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, |
993 | u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, | 993 | u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, |
994 | struct fib_info *fi, unsigned int flags) | 994 | struct fib_info *fi, unsigned int flags) |
995 | { | 995 | { |
996 | struct nlmsghdr *nlh; | 996 | struct nlmsghdr *nlh; |
997 | struct rtmsg *rtm; | 997 | struct rtmsg *rtm; |
998 | 998 | ||
999 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); | 999 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); |
1000 | if (nlh == NULL) | 1000 | if (nlh == NULL) |
1001 | return -EMSGSIZE; | 1001 | return -EMSGSIZE; |
1002 | 1002 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d1b93595b4a7..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1550,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, | |||
1550 | * state.directly. | 1550 | * state.directly. |
1551 | */ | 1551 | */ |
1552 | if (pref_mismatch) { | 1552 | if (pref_mismatch) { |
1553 | int mp = KEYLENGTH - fls(pref_mismatch); | 1553 | /* fls(x) = __fls(x) + 1 */ |
1554 | int mp = KEYLENGTH - __fls(pref_mismatch) - 1; | ||
1554 | 1555 | ||
1555 | if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) | 1556 | if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) |
1556 | goto backtrace; | 1557 | goto backtrace; |
@@ -1655,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1655 | if (!l) | 1656 | if (!l) |
1656 | return -ESRCH; | 1657 | return -ESRCH; |
1657 | 1658 | ||
1658 | fa_head = get_fa_head(l, plen); | 1659 | li = find_leaf_info(l, plen); |
1660 | |||
1661 | if (!li) | ||
1662 | return -ESRCH; | ||
1663 | |||
1664 | fa_head = &li->falh; | ||
1659 | fa = fib_find_alias(fa_head, tos, 0); | 1665 | fa = fib_find_alias(fa_head, tos, 0); |
1660 | 1666 | ||
1661 | if (!fa) | 1667 | if (!fa) |
@@ -1691,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1691 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, | 1697 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, |
1692 | &cfg->fc_nlinfo, 0); | 1698 | &cfg->fc_nlinfo, 0); |
1693 | 1699 | ||
1694 | l = fib_find_node(t, key); | ||
1695 | li = find_leaf_info(l, plen); | ||
1696 | |||
1697 | list_del_rcu(&fa->fa_list); | 1700 | list_del_rcu(&fa->fa_list); |
1698 | 1701 | ||
1699 | if (!plen) | 1702 | if (!plen) |
@@ -1870,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | |||
1870 | continue; | 1873 | continue; |
1871 | } | 1874 | } |
1872 | 1875 | ||
1873 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, | 1876 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, |
1874 | cb->nlh->nlmsg_seq, | 1877 | cb->nlh->nlmsg_seq, |
1875 | RTM_NEWROUTE, | 1878 | RTM_NEWROUTE, |
1876 | tb->tb_id, | 1879 | tb->tb_id, |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6699f23e6f55..736ab70fd179 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -815,14 +815,15 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) | |||
815 | return 1; | 815 | return 1; |
816 | } | 816 | } |
817 | 817 | ||
818 | static void igmp_heard_report(struct in_device *in_dev, __be32 group) | 818 | /* return true if packet was dropped */ |
819 | static bool igmp_heard_report(struct in_device *in_dev, __be32 group) | ||
819 | { | 820 | { |
820 | struct ip_mc_list *im; | 821 | struct ip_mc_list *im; |
821 | 822 | ||
822 | /* Timers are only set for non-local groups */ | 823 | /* Timers are only set for non-local groups */ |
823 | 824 | ||
824 | if (group == IGMP_ALL_HOSTS) | 825 | if (group == IGMP_ALL_HOSTS) |
825 | return; | 826 | return false; |
826 | 827 | ||
827 | rcu_read_lock(); | 828 | rcu_read_lock(); |
828 | for_each_pmc_rcu(in_dev, im) { | 829 | for_each_pmc_rcu(in_dev, im) { |
@@ -832,9 +833,11 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) | |||
832 | } | 833 | } |
833 | } | 834 | } |
834 | rcu_read_unlock(); | 835 | rcu_read_unlock(); |
836 | return false; | ||
835 | } | 837 | } |
836 | 838 | ||
837 | static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | 839 | /* return true if packet was dropped */ |
840 | static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | ||
838 | int len) | 841 | int len) |
839 | { | 842 | { |
840 | struct igmphdr *ih = igmp_hdr(skb); | 843 | struct igmphdr *ih = igmp_hdr(skb); |
@@ -866,7 +869,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
866 | /* clear deleted report items */ | 869 | /* clear deleted report items */ |
867 | igmpv3_clear_delrec(in_dev); | 870 | igmpv3_clear_delrec(in_dev); |
868 | } else if (len < 12) { | 871 | } else if (len < 12) { |
869 | return; /* ignore bogus packet; freed by caller */ | 872 | return true; /* ignore bogus packet; freed by caller */ |
870 | } else if (IGMP_V1_SEEN(in_dev)) { | 873 | } else if (IGMP_V1_SEEN(in_dev)) { |
871 | /* This is a v3 query with v1 queriers present */ | 874 | /* This is a v3 query with v1 queriers present */ |
872 | max_delay = IGMP_Query_Response_Interval; | 875 | max_delay = IGMP_Query_Response_Interval; |
@@ -883,13 +886,13 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
883 | max_delay = 1; /* can't mod w/ 0 */ | 886 | max_delay = 1; /* can't mod w/ 0 */ |
884 | } else { /* v3 */ | 887 | } else { /* v3 */ |
885 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) | 888 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) |
886 | return; | 889 | return true; |
887 | 890 | ||
888 | ih3 = igmpv3_query_hdr(skb); | 891 | ih3 = igmpv3_query_hdr(skb); |
889 | if (ih3->nsrcs) { | 892 | if (ih3->nsrcs) { |
890 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) | 893 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) |
891 | + ntohs(ih3->nsrcs)*sizeof(__be32))) | 894 | + ntohs(ih3->nsrcs)*sizeof(__be32))) |
892 | return; | 895 | return true; |
893 | ih3 = igmpv3_query_hdr(skb); | 896 | ih3 = igmpv3_query_hdr(skb); |
894 | } | 897 | } |
895 | 898 | ||
@@ -901,9 +904,9 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
901 | in_dev->mr_qrv = ih3->qrv; | 904 | in_dev->mr_qrv = ih3->qrv; |
902 | if (!group) { /* general query */ | 905 | if (!group) { /* general query */ |
903 | if (ih3->nsrcs) | 906 | if (ih3->nsrcs) |
904 | return; /* no sources allowed */ | 907 | return false; /* no sources allowed */ |
905 | igmp_gq_start_timer(in_dev); | 908 | igmp_gq_start_timer(in_dev); |
906 | return; | 909 | return false; |
907 | } | 910 | } |
908 | /* mark sources to include, if group & source-specific */ | 911 | /* mark sources to include, if group & source-specific */ |
909 | mark = ih3->nsrcs != 0; | 912 | mark = ih3->nsrcs != 0; |
@@ -939,6 +942,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
939 | igmp_mod_timer(im, max_delay); | 942 | igmp_mod_timer(im, max_delay); |
940 | } | 943 | } |
941 | rcu_read_unlock(); | 944 | rcu_read_unlock(); |
945 | return false; | ||
942 | } | 946 | } |
943 | 947 | ||
944 | /* called in rcu_read_lock() section */ | 948 | /* called in rcu_read_lock() section */ |
@@ -948,6 +952,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
948 | struct igmphdr *ih; | 952 | struct igmphdr *ih; |
949 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); | 953 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); |
950 | int len = skb->len; | 954 | int len = skb->len; |
955 | bool dropped = true; | ||
951 | 956 | ||
952 | if (in_dev == NULL) | 957 | if (in_dev == NULL) |
953 | goto drop; | 958 | goto drop; |
@@ -969,7 +974,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
969 | ih = igmp_hdr(skb); | 974 | ih = igmp_hdr(skb); |
970 | switch (ih->type) { | 975 | switch (ih->type) { |
971 | case IGMP_HOST_MEMBERSHIP_QUERY: | 976 | case IGMP_HOST_MEMBERSHIP_QUERY: |
972 | igmp_heard_query(in_dev, skb, len); | 977 | dropped = igmp_heard_query(in_dev, skb, len); |
973 | break; | 978 | break; |
974 | case IGMP_HOST_MEMBERSHIP_REPORT: | 979 | case IGMP_HOST_MEMBERSHIP_REPORT: |
975 | case IGMPV2_HOST_MEMBERSHIP_REPORT: | 980 | case IGMPV2_HOST_MEMBERSHIP_REPORT: |
@@ -979,7 +984,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
979 | /* don't rely on MC router hearing unicast reports */ | 984 | /* don't rely on MC router hearing unicast reports */ |
980 | if (skb->pkt_type == PACKET_MULTICAST || | 985 | if (skb->pkt_type == PACKET_MULTICAST || |
981 | skb->pkt_type == PACKET_BROADCAST) | 986 | skb->pkt_type == PACKET_BROADCAST) |
982 | igmp_heard_report(in_dev, ih->group); | 987 | dropped = igmp_heard_report(in_dev, ih->group); |
983 | break; | 988 | break; |
984 | case IGMP_PIM: | 989 | case IGMP_PIM: |
985 | #ifdef CONFIG_IP_PIMSM_V1 | 990 | #ifdef CONFIG_IP_PIMSM_V1 |
@@ -997,7 +1002,10 @@ int igmp_rcv(struct sk_buff *skb) | |||
997 | } | 1002 | } |
998 | 1003 | ||
999 | drop: | 1004 | drop: |
1000 | kfree_skb(skb); | 1005 | if (dropped) |
1006 | kfree_skb(skb); | ||
1007 | else | ||
1008 | consume_skb(skb); | ||
1001 | return 0; | 1009 | return 0; |
1002 | } | 1010 | } |
1003 | 1011 | ||
@@ -1896,6 +1904,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1896 | rtnl_unlock(); | 1904 | rtnl_unlock(); |
1897 | return ret; | 1905 | return ret; |
1898 | } | 1906 | } |
1907 | EXPORT_SYMBOL(ip_mc_leave_group); | ||
1899 | 1908 | ||
1900 | int ip_mc_source(int add, int omode, struct sock *sk, struct | 1909 | int ip_mc_source(int add, int omode, struct sock *sk, struct |
1901 | ip_mreq_source *mreqs, int ifindex) | 1910 | ip_mreq_source *mreqs, int ifindex) |
@@ -2435,6 +2444,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) | |||
2435 | struct ip_mc_list *im = (struct ip_mc_list *)v; | 2444 | struct ip_mc_list *im = (struct ip_mc_list *)v; |
2436 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2445 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2437 | char *querier; | 2446 | char *querier; |
2447 | long delta; | ||
2448 | |||
2438 | #ifdef CONFIG_IP_MULTICAST | 2449 | #ifdef CONFIG_IP_MULTICAST |
2439 | querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : | 2450 | querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : |
2440 | IGMP_V2_SEEN(state->in_dev) ? "V2" : | 2451 | IGMP_V2_SEEN(state->in_dev) ? "V2" : |
@@ -2448,11 +2459,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) | |||
2448 | state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); | 2459 | state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); |
2449 | } | 2460 | } |
2450 | 2461 | ||
2462 | delta = im->timer.expires - jiffies; | ||
2451 | seq_printf(seq, | 2463 | seq_printf(seq, |
2452 | "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", | 2464 | "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", |
2453 | im->multiaddr, im->users, | 2465 | im->multiaddr, im->users, |
2454 | im->tm_running, im->tm_running ? | 2466 | im->tm_running, |
2455 | jiffies_to_clock_t(im->timer.expires-jiffies) : 0, | 2467 | im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, |
2456 | im->reporter); | 2468 | im->reporter); |
2457 | } | 2469 | } |
2458 | return 0; | 2470 | return 0; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7f75f21d7b83..f0c5b9c1a957 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | |||
283 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | 283 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) |
284 | { | 284 | { |
285 | struct inet_connection_sock *icsk = inet_csk(sk); | 285 | struct inet_connection_sock *icsk = inet_csk(sk); |
286 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
286 | struct sock *newsk; | 287 | struct sock *newsk; |
288 | struct request_sock *req; | ||
287 | int error; | 289 | int error; |
288 | 290 | ||
289 | lock_sock(sk); | 291 | lock_sock(sk); |
@@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | |||
296 | goto out_err; | 298 | goto out_err; |
297 | 299 | ||
298 | /* Find already established connection */ | 300 | /* Find already established connection */ |
299 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | 301 | if (reqsk_queue_empty(queue)) { |
300 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 302 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
301 | 303 | ||
302 | /* If this is a non blocking socket don't sleep */ | 304 | /* If this is a non blocking socket don't sleep */ |
@@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | |||
308 | if (error) | 310 | if (error) |
309 | goto out_err; | 311 | goto out_err; |
310 | } | 312 | } |
311 | 313 | req = reqsk_queue_remove(queue); | |
312 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | 314 | newsk = req->sk; |
313 | WARN_ON(newsk->sk_state == TCP_SYN_RECV); | 315 | |
316 | sk_acceptq_removed(sk); | ||
317 | if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { | ||
318 | spin_lock_bh(&queue->fastopenq->lock); | ||
319 | if (tcp_rsk(req)->listener) { | ||
320 | /* We are still waiting for the final ACK from 3WHS | ||
321 | * so can't free req now. Instead, we set req->sk to | ||
322 | * NULL to signify that the child socket is taken | ||
323 | * so reqsk_fastopen_remove() will free the req | ||
324 | * when 3WHS finishes (or is aborted). | ||
325 | */ | ||
326 | req->sk = NULL; | ||
327 | req = NULL; | ||
328 | } | ||
329 | spin_unlock_bh(&queue->fastopenq->lock); | ||
330 | } | ||
314 | out: | 331 | out: |
315 | release_sock(sk); | 332 | release_sock(sk); |
333 | if (req) | ||
334 | __reqsk_free(req); | ||
316 | return newsk; | 335 | return newsk; |
317 | out_err: | 336 | out_err: |
318 | newsk = NULL; | 337 | newsk = NULL; |
338 | req = NULL; | ||
319 | *err = error; | 339 | *err = error; |
320 | goto out; | 340 | goto out; |
321 | } | 341 | } |
@@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); | |||
720 | void inet_csk_listen_stop(struct sock *sk) | 740 | void inet_csk_listen_stop(struct sock *sk) |
721 | { | 741 | { |
722 | struct inet_connection_sock *icsk = inet_csk(sk); | 742 | struct inet_connection_sock *icsk = inet_csk(sk); |
743 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
723 | struct request_sock *acc_req; | 744 | struct request_sock *acc_req; |
724 | struct request_sock *req; | 745 | struct request_sock *req; |
725 | 746 | ||
726 | inet_csk_delete_keepalive_timer(sk); | 747 | inet_csk_delete_keepalive_timer(sk); |
727 | 748 | ||
728 | /* make all the listen_opt local to us */ | 749 | /* make all the listen_opt local to us */ |
729 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | 750 | acc_req = reqsk_queue_yank_acceptq(queue); |
730 | 751 | ||
731 | /* Following specs, it would be better either to send FIN | 752 | /* Following specs, it would be better either to send FIN |
732 | * (and enter FIN-WAIT-1, it is normal close) | 753 | * (and enter FIN-WAIT-1, it is normal close) |
@@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk) | |||
736 | * To be honest, we are not able to make either | 757 | * To be honest, we are not able to make either |
737 | * of the variants now. --ANK | 758 | * of the variants now. --ANK |
738 | */ | 759 | */ |
739 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | 760 | reqsk_queue_destroy(queue); |
740 | 761 | ||
741 | while ((req = acc_req) != NULL) { | 762 | while ((req = acc_req) != NULL) { |
742 | struct sock *child = req->sk; | 763 | struct sock *child = req->sk; |
@@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk) | |||
754 | 775 | ||
755 | percpu_counter_inc(sk->sk_prot->orphan_count); | 776 | percpu_counter_inc(sk->sk_prot->orphan_count); |
756 | 777 | ||
778 | if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { | ||
779 | BUG_ON(tcp_sk(child)->fastopen_rsk != req); | ||
780 | BUG_ON(sk != tcp_rsk(req)->listener); | ||
781 | |||
782 | /* Paranoid, to prevent race condition if | ||
783 | * an inbound pkt destined for child is | ||
784 | * blocked by sock lock in tcp_v4_rcv(). | ||
785 | * Also to satisfy an assertion in | ||
786 | * tcp_v4_destroy_sock(). | ||
787 | */ | ||
788 | tcp_sk(child)->fastopen_rsk = NULL; | ||
789 | sock_put(sk); | ||
790 | } | ||
757 | inet_csk_destroy_sock(child); | 791 | inet_csk_destroy_sock(child); |
758 | 792 | ||
759 | bh_unlock_sock(child); | 793 | bh_unlock_sock(child); |
@@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk) | |||
763 | sk_acceptq_removed(sk); | 797 | sk_acceptq_removed(sk); |
764 | __reqsk_free(req); | 798 | __reqsk_free(req); |
765 | } | 799 | } |
800 | if (queue->fastopenq != NULL) { | ||
801 | /* Free all the reqs queued in rskq_rst_head. */ | ||
802 | spin_lock_bh(&queue->fastopenq->lock); | ||
803 | acc_req = queue->fastopenq->rskq_rst_head; | ||
804 | queue->fastopenq->rskq_rst_head = NULL; | ||
805 | spin_unlock_bh(&queue->fastopenq->lock); | ||
806 | while ((req = acc_req) != NULL) { | ||
807 | acc_req = req->dl_next; | ||
808 | __reqsk_free(req); | ||
809 | } | ||
810 | } | ||
766 | WARN_ON(sk->sk_ack_backlog); | 811 | WARN_ON(sk->sk_ack_backlog); |
767 | } | 812 | } |
768 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 813 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bc005b1435f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler( | |||
70 | int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | 70 | int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, |
71 | struct sk_buff *skb, struct inet_diag_req_v2 *req, | 71 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
72 | struct user_namespace *user_ns, | 72 | struct user_namespace *user_ns, |
73 | u32 pid, u32 seq, u16 nlmsg_flags, | 73 | u32 portid, u32 seq, u16 nlmsg_flags, |
74 | const struct nlmsghdr *unlh) | 74 | const struct nlmsghdr *unlh) |
75 | { | 75 | { |
76 | const struct inet_sock *inet = inet_sk(sk); | 76 | const struct inet_sock *inet = inet_sk(sk); |
@@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, | |||
84 | handler = inet_diag_table[req->sdiag_protocol]; | 84 | handler = inet_diag_table[req->sdiag_protocol]; |
85 | BUG_ON(handler == NULL); | 85 | BUG_ON(handler == NULL); |
86 | 86 | ||
87 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), | 87 | nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), |
88 | nlmsg_flags); | 88 | nlmsg_flags); |
89 | if (!nlh) | 89 | if (!nlh) |
90 | return -EMSGSIZE; | 90 | return -EMSGSIZE; |
@@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); | |||
201 | static int inet_csk_diag_fill(struct sock *sk, | 201 | static int inet_csk_diag_fill(struct sock *sk, |
202 | struct sk_buff *skb, struct inet_diag_req_v2 *req, | 202 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
203 | struct user_namespace *user_ns, | 203 | struct user_namespace *user_ns, |
204 | u32 pid, u32 seq, u16 nlmsg_flags, | 204 | u32 portid, u32 seq, u16 nlmsg_flags, |
205 | const struct nlmsghdr *unlh) | 205 | const struct nlmsghdr *unlh) |
206 | { | 206 | { |
207 | return inet_sk_diag_fill(sk, inet_csk(sk), | 207 | return inet_sk_diag_fill(sk, inet_csk(sk), |
208 | skb, req, user_ns, pid, seq, nlmsg_flags, unlh); | 208 | skb, req, user_ns, portid, seq, nlmsg_flags, unlh); |
209 | } | 209 | } |
210 | 210 | ||
211 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | 211 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, |
212 | struct sk_buff *skb, struct inet_diag_req_v2 *req, | 212 | struct sk_buff *skb, struct inet_diag_req_v2 *req, |
213 | u32 pid, u32 seq, u16 nlmsg_flags, | 213 | u32 portid, u32 seq, u16 nlmsg_flags, |
214 | const struct nlmsghdr *unlh) | 214 | const struct nlmsghdr *unlh) |
215 | { | 215 | { |
216 | long tmo; | 216 | long tmo; |
217 | struct inet_diag_msg *r; | 217 | struct inet_diag_msg *r; |
218 | struct nlmsghdr *nlh; | 218 | struct nlmsghdr *nlh; |
219 | 219 | ||
220 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), | 220 | nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), |
221 | nlmsg_flags); | 221 | nlmsg_flags); |
222 | if (!nlh) | 222 | if (!nlh) |
223 | return -EMSGSIZE; | 223 | return -EMSGSIZE; |
@@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
260 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | 260 | static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, |
261 | struct inet_diag_req_v2 *r, | 261 | struct inet_diag_req_v2 *r, |
262 | struct user_namespace *user_ns, | 262 | struct user_namespace *user_ns, |
263 | u32 pid, u32 seq, u16 nlmsg_flags, | 263 | u32 portid, u32 seq, u16 nlmsg_flags, |
264 | const struct nlmsghdr *unlh) | 264 | const struct nlmsghdr *unlh) |
265 | { | 265 | { |
266 | if (sk->sk_state == TCP_TIME_WAIT) | 266 | if (sk->sk_state == TCP_TIME_WAIT) |
267 | return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, | 267 | return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, |
268 | skb, r, pid, seq, nlmsg_flags, | 268 | skb, r, portid, seq, nlmsg_flags, |
269 | unlh); | 269 | unlh); |
270 | return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); | 270 | return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); |
271 | } | 271 | } |
272 | 272 | ||
273 | int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, | 273 | int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, |
@@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s | |||
316 | 316 | ||
317 | err = sk_diag_fill(sk, rep, req, | 317 | err = sk_diag_fill(sk, rep, req, |
318 | sk_user_ns(NETLINK_CB(in_skb).ssk), | 318 | sk_user_ns(NETLINK_CB(in_skb).ssk), |
319 | NETLINK_CB(in_skb).pid, | 319 | NETLINK_CB(in_skb).portid, |
320 | nlh->nlmsg_seq, 0, nlh); | 320 | nlh->nlmsg_seq, 0, nlh); |
321 | if (err < 0) { | 321 | if (err < 0) { |
322 | WARN_ON(err == -EMSGSIZE); | 322 | WARN_ON(err == -EMSGSIZE); |
323 | nlmsg_free(rep); | 323 | nlmsg_free(rep); |
324 | goto out; | 324 | goto out; |
325 | } | 325 | } |
326 | err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, | 326 | err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, |
327 | MSG_DONTWAIT); | 327 | MSG_DONTWAIT); |
328 | if (err > 0) | 328 | if (err > 0) |
329 | err = 0; | 329 | err = 0; |
@@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk, | |||
557 | 557 | ||
558 | return inet_csk_diag_fill(sk, skb, r, | 558 | return inet_csk_diag_fill(sk, skb, r, |
559 | sk_user_ns(NETLINK_CB(cb->skb).ssk), | 559 | sk_user_ns(NETLINK_CB(cb->skb).ssk), |
560 | NETLINK_CB(cb->skb).pid, | 560 | NETLINK_CB(cb->skb).portid, |
561 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 561 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
562 | } | 562 | } |
563 | 563 | ||
@@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, | |||
592 | } | 592 | } |
593 | 593 | ||
594 | return inet_twsk_diag_fill(tw, skb, r, | 594 | return inet_twsk_diag_fill(tw, skb, r, |
595 | NETLINK_CB(cb->skb).pid, | 595 | NETLINK_CB(cb->skb).portid, |
596 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 596 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
597 | } | 597 | } |
598 | 598 | ||
599 | static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | 599 | static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, |
600 | struct request_sock *req, | 600 | struct request_sock *req, |
601 | struct user_namespace *user_ns, | 601 | struct user_namespace *user_ns, |
602 | u32 pid, u32 seq, | 602 | u32 portid, u32 seq, |
603 | const struct nlmsghdr *unlh) | 603 | const struct nlmsghdr *unlh) |
604 | { | 604 | { |
605 | const struct inet_request_sock *ireq = inet_rsk(req); | 605 | const struct inet_request_sock *ireq = inet_rsk(req); |
@@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
608 | struct nlmsghdr *nlh; | 608 | struct nlmsghdr *nlh; |
609 | long tmo; | 609 | long tmo; |
610 | 610 | ||
611 | nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), | 611 | nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), |
612 | NLM_F_MULTI); | 612 | NLM_F_MULTI); |
613 | if (!nlh) | 613 | if (!nlh) |
614 | return -EMSGSIZE; | 614 | return -EMSGSIZE; |
@@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
711 | 711 | ||
712 | err = inet_diag_fill_req(skb, sk, req, | 712 | err = inet_diag_fill_req(skb, sk, req, |
713 | sk_user_ns(NETLINK_CB(cb->skb).ssk), | 713 | sk_user_ns(NETLINK_CB(cb->skb).ssk), |
714 | NETLINK_CB(cb->skb).pid, | 714 | NETLINK_CB(cb->skb).portid, |
715 | cb->nlh->nlmsg_seq, cb->nlh); | 715 | cb->nlh->nlmsg_seq, cb->nlh); |
716 | if (err < 0) { | 716 | if (err < 0) { |
717 | cb->args[3] = j + 1; | 717 | cb->args[3] = j + 1; |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 85190e69297b..4750d2b74d79 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) | |||
89 | nf->low_thresh = 0; | 89 | nf->low_thresh = 0; |
90 | 90 | ||
91 | local_bh_disable(); | 91 | local_bh_disable(); |
92 | inet_frag_evictor(nf, f); | 92 | inet_frag_evictor(nf, f, true); |
93 | local_bh_enable(); | 93 | local_bh_enable(); |
94 | } | 94 | } |
95 | EXPORT_SYMBOL(inet_frags_exit_net); | 95 | EXPORT_SYMBOL(inet_frags_exit_net); |
@@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, | |||
158 | } | 158 | } |
159 | EXPORT_SYMBOL(inet_frag_destroy); | 159 | EXPORT_SYMBOL(inet_frag_destroy); |
160 | 160 | ||
161 | int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) | 161 | int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) |
162 | { | 162 | { |
163 | struct inet_frag_queue *q; | 163 | struct inet_frag_queue *q; |
164 | int work, evicted = 0; | 164 | int work, evicted = 0; |
165 | 165 | ||
166 | if (!force) { | ||
167 | if (atomic_read(&nf->mem) <= nf->high_thresh) | ||
168 | return 0; | ||
169 | } | ||
170 | |||
166 | work = atomic_read(&nf->mem) - nf->low_thresh; | 171 | work = atomic_read(&nf->mem) - nf->low_thresh; |
167 | while (work > 0) { | 172 | while (work > 0) { |
168 | read_lock(&f->lock); | 173 | read_lock(&f->lock); |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..448e68546827 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -219,7 +219,7 @@ static void ip_evictor(struct net *net) | |||
219 | { | 219 | { |
220 | int evicted; | 220 | int evicted; |
221 | 221 | ||
222 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); | 222 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); |
223 | if (evicted) | 223 | if (evicted) |
224 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); | 224 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); |
225 | } | 225 | } |
@@ -523,6 +523,10 @@ found: | |||
523 | if (offset == 0) | 523 | if (offset == 0) |
524 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 524 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
525 | 525 | ||
526 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && | ||
527 | skb->len + ihl > qp->q.max_size) | ||
528 | qp->q.max_size = skb->len + ihl; | ||
529 | |||
526 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | 530 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
527 | qp->q.meat == qp->q.len) | 531 | qp->q.meat == qp->q.len) |
528 | return ip_frag_reasm(qp, prev, dev); | 532 | return ip_frag_reasm(qp, prev, dev); |
@@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
646 | head->next = NULL; | 650 | head->next = NULL; |
647 | head->dev = dev; | 651 | head->dev = dev; |
648 | head->tstamp = qp->q.stamp; | 652 | head->tstamp = qp->q.stamp; |
653 | IPCB(head)->frag_max_size = qp->q.max_size; | ||
649 | 654 | ||
650 | iph = ip_hdr(head); | 655 | iph = ip_hdr(head); |
651 | iph->frag_off = 0; | 656 | /* max_size != 0 implies at least one fragment had IP_DF set */ |
657 | iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; | ||
652 | iph->tot_len = htons(len); | 658 | iph->tot_len = htons(len); |
653 | iph->tos |= ecn; | 659 | iph->tos |= ecn; |
654 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 660 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
@@ -678,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
678 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); | 684 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); |
679 | 685 | ||
680 | /* Start by cleaning up the memory. */ | 686 | /* Start by cleaning up the memory. */ |
681 | if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) | 687 | ip_evictor(net); |
682 | ip_evictor(net); | ||
683 | 688 | ||
684 | /* Lookup (or create) queue header */ | 689 | /* Lookup (or create) queue header */ |
685 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { | 690 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b062a98574f2..7240f8e2dd45 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -120,6 +120,10 @@ | |||
120 | Alexey Kuznetsov. | 120 | Alexey Kuznetsov. |
121 | */ | 121 | */ |
122 | 122 | ||
123 | static bool log_ecn_error = true; | ||
124 | module_param(log_ecn_error, bool, 0644); | ||
125 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | ||
126 | |||
123 | static struct rtnl_link_ops ipgre_link_ops __read_mostly; | 127 | static struct rtnl_link_ops ipgre_link_ops __read_mostly; |
124 | static int ipgre_tunnel_init(struct net_device *dev); | 128 | static int ipgre_tunnel_init(struct net_device *dev); |
125 | static void ipgre_tunnel_setup(struct net_device *dev); | 129 | static void ipgre_tunnel_setup(struct net_device *dev); |
@@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, | |||
204 | tot->rx_crc_errors = dev->stats.rx_crc_errors; | 208 | tot->rx_crc_errors = dev->stats.rx_crc_errors; |
205 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; | 209 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; |
206 | tot->rx_length_errors = dev->stats.rx_length_errors; | 210 | tot->rx_length_errors = dev->stats.rx_length_errors; |
211 | tot->rx_frame_errors = dev->stats.rx_frame_errors; | ||
207 | tot->rx_errors = dev->stats.rx_errors; | 212 | tot->rx_errors = dev->stats.rx_errors; |
213 | |||
208 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | 214 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; |
209 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | 215 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; |
210 | tot->tx_dropped = dev->stats.tx_dropped; | 216 | tot->tx_dropped = dev->stats.tx_dropped; |
@@ -214,11 +220,25 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, | |||
214 | return tot; | 220 | return tot; |
215 | } | 221 | } |
216 | 222 | ||
223 | /* Does key in tunnel parameters match packet */ | ||
224 | static bool ipgre_key_match(const struct ip_tunnel_parm *p, | ||
225 | __be16 flags, __be32 key) | ||
226 | { | ||
227 | if (p->i_flags & GRE_KEY) { | ||
228 | if (flags & GRE_KEY) | ||
229 | return key == p->i_key; | ||
230 | else | ||
231 | return false; /* key expected, none present */ | ||
232 | } else | ||
233 | return !(flags & GRE_KEY); | ||
234 | } | ||
235 | |||
217 | /* Given src, dst and key, find appropriate for input tunnel. */ | 236 | /* Given src, dst and key, find appropriate for input tunnel. */ |
218 | 237 | ||
219 | static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, | 238 | static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, |
220 | __be32 remote, __be32 local, | 239 | __be32 remote, __be32 local, |
221 | __be32 key, __be16 gre_proto) | 240 | __be16 flags, __be32 key, |
241 | __be16 gre_proto) | ||
222 | { | 242 | { |
223 | struct net *net = dev_net(dev); | 243 | struct net *net = dev_net(dev); |
224 | int link = dev->ifindex; | 244 | int link = dev->ifindex; |
@@ -233,10 +253,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, | |||
233 | for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { | 253 | for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { |
234 | if (local != t->parms.iph.saddr || | 254 | if (local != t->parms.iph.saddr || |
235 | remote != t->parms.iph.daddr || | 255 | remote != t->parms.iph.daddr || |
236 | key != t->parms.i_key || | ||
237 | !(t->dev->flags & IFF_UP)) | 256 | !(t->dev->flags & IFF_UP)) |
238 | continue; | 257 | continue; |
239 | 258 | ||
259 | if (!ipgre_key_match(&t->parms, flags, key)) | ||
260 | continue; | ||
261 | |||
240 | if (t->dev->type != ARPHRD_IPGRE && | 262 | if (t->dev->type != ARPHRD_IPGRE && |
241 | t->dev->type != dev_type) | 263 | t->dev->type != dev_type) |
242 | continue; | 264 | continue; |
@@ -257,10 +279,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, | |||
257 | 279 | ||
258 | for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { | 280 | for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { |
259 | if (remote != t->parms.iph.daddr || | 281 | if (remote != t->parms.iph.daddr || |
260 | key != t->parms.i_key || | ||
261 | !(t->dev->flags & IFF_UP)) | 282 | !(t->dev->flags & IFF_UP)) |
262 | continue; | 283 | continue; |
263 | 284 | ||
285 | if (!ipgre_key_match(&t->parms, flags, key)) | ||
286 | continue; | ||
287 | |||
264 | if (t->dev->type != ARPHRD_IPGRE && | 288 | if (t->dev->type != ARPHRD_IPGRE && |
265 | t->dev->type != dev_type) | 289 | t->dev->type != dev_type) |
266 | continue; | 290 | continue; |
@@ -283,10 +307,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, | |||
283 | if ((local != t->parms.iph.saddr && | 307 | if ((local != t->parms.iph.saddr && |
284 | (local != t->parms.iph.daddr || | 308 | (local != t->parms.iph.daddr || |
285 | !ipv4_is_multicast(local))) || | 309 | !ipv4_is_multicast(local))) || |
286 | key != t->parms.i_key || | ||
287 | !(t->dev->flags & IFF_UP)) | 310 | !(t->dev->flags & IFF_UP)) |
288 | continue; | 311 | continue; |
289 | 312 | ||
313 | if (!ipgre_key_match(&t->parms, flags, key)) | ||
314 | continue; | ||
315 | |||
290 | if (t->dev->type != ARPHRD_IPGRE && | 316 | if (t->dev->type != ARPHRD_IPGRE && |
291 | t->dev->type != dev_type) | 317 | t->dev->type != dev_type) |
292 | continue; | 318 | continue; |
@@ -489,6 +515,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
489 | const int code = icmp_hdr(skb)->code; | 515 | const int code = icmp_hdr(skb)->code; |
490 | struct ip_tunnel *t; | 516 | struct ip_tunnel *t; |
491 | __be16 flags; | 517 | __be16 flags; |
518 | __be32 key = 0; | ||
492 | 519 | ||
493 | flags = p[0]; | 520 | flags = p[0]; |
494 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { | 521 | if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { |
@@ -505,6 +532,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
505 | if (skb_headlen(skb) < grehlen) | 532 | if (skb_headlen(skb) < grehlen) |
506 | return; | 533 | return; |
507 | 534 | ||
535 | if (flags & GRE_KEY) | ||
536 | key = *(((__be32 *)p) + (grehlen / 4) - 1); | ||
537 | |||
508 | switch (type) { | 538 | switch (type) { |
509 | default: | 539 | default: |
510 | case ICMP_PARAMETERPROB: | 540 | case ICMP_PARAMETERPROB: |
@@ -533,49 +563,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
533 | break; | 563 | break; |
534 | } | 564 | } |
535 | 565 | ||
536 | rcu_read_lock(); | ||
537 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, | 566 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, |
538 | flags & GRE_KEY ? | 567 | flags, key, p[1]); |
539 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, | 568 | |
540 | p[1]); | ||
541 | if (t == NULL) | 569 | if (t == NULL) |
542 | goto out; | 570 | return; |
543 | 571 | ||
544 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 572 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
545 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | 573 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, |
546 | t->parms.link, 0, IPPROTO_GRE, 0); | 574 | t->parms.link, 0, IPPROTO_GRE, 0); |
547 | goto out; | 575 | return; |
548 | } | 576 | } |
549 | if (type == ICMP_REDIRECT) { | 577 | if (type == ICMP_REDIRECT) { |
550 | ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, | 578 | ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, |
551 | IPPROTO_GRE, 0); | 579 | IPPROTO_GRE, 0); |
552 | goto out; | 580 | return; |
553 | } | 581 | } |
554 | if (t->parms.iph.daddr == 0 || | 582 | if (t->parms.iph.daddr == 0 || |
555 | ipv4_is_multicast(t->parms.iph.daddr)) | 583 | ipv4_is_multicast(t->parms.iph.daddr)) |
556 | goto out; | 584 | return; |
557 | 585 | ||
558 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 586 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
559 | goto out; | 587 | return; |
560 | 588 | ||
561 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) | 589 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
562 | t->err_count++; | 590 | t->err_count++; |
563 | else | 591 | else |
564 | t->err_count = 1; | 592 | t->err_count = 1; |
565 | t->err_time = jiffies; | 593 | t->err_time = jiffies; |
566 | out: | ||
567 | rcu_read_unlock(); | ||
568 | } | ||
569 | |||
570 | static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) | ||
571 | { | ||
572 | if (INET_ECN_is_ce(iph->tos)) { | ||
573 | if (skb->protocol == htons(ETH_P_IP)) { | ||
574 | IP_ECN_set_ce(ip_hdr(skb)); | ||
575 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | ||
576 | IP6_ECN_set_ce(ipv6_hdr(skb)); | ||
577 | } | ||
578 | } | ||
579 | } | 594 | } |
580 | 595 | ||
581 | static inline u8 | 596 | static inline u8 |
@@ -600,9 +615,10 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
600 | struct ip_tunnel *tunnel; | 615 | struct ip_tunnel *tunnel; |
601 | int offset = 4; | 616 | int offset = 4; |
602 | __be16 gre_proto; | 617 | __be16 gre_proto; |
618 | int err; | ||
603 | 619 | ||
604 | if (!pskb_may_pull(skb, 16)) | 620 | if (!pskb_may_pull(skb, 16)) |
605 | goto drop_nolock; | 621 | goto drop; |
606 | 622 | ||
607 | iph = ip_hdr(skb); | 623 | iph = ip_hdr(skb); |
608 | h = skb->data; | 624 | h = skb->data; |
@@ -613,7 +629,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
613 | - We do not support routing headers. | 629 | - We do not support routing headers. |
614 | */ | 630 | */ |
615 | if (flags&(GRE_VERSION|GRE_ROUTING)) | 631 | if (flags&(GRE_VERSION|GRE_ROUTING)) |
616 | goto drop_nolock; | 632 | goto drop; |
617 | 633 | ||
618 | if (flags&GRE_CSUM) { | 634 | if (flags&GRE_CSUM) { |
619 | switch (skb->ip_summed) { | 635 | switch (skb->ip_summed) { |
@@ -641,10 +657,10 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
641 | 657 | ||
642 | gre_proto = *(__be16 *)(h + 2); | 658 | gre_proto = *(__be16 *)(h + 2); |
643 | 659 | ||
644 | rcu_read_lock(); | 660 | tunnel = ipgre_tunnel_lookup(skb->dev, |
645 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, | 661 | iph->saddr, iph->daddr, flags, key, |
646 | iph->saddr, iph->daddr, key, | 662 | gre_proto); |
647 | gre_proto))) { | 663 | if (tunnel) { |
648 | struct pcpu_tstats *tstats; | 664 | struct pcpu_tstats *tstats; |
649 | 665 | ||
650 | secpath_reset(skb); | 666 | secpath_reset(skb); |
@@ -703,27 +719,33 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
703 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | 719 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); |
704 | } | 720 | } |
705 | 721 | ||
722 | __skb_tunnel_rx(skb, tunnel->dev); | ||
723 | |||
724 | skb_reset_network_header(skb); | ||
725 | err = IP_ECN_decapsulate(iph, skb); | ||
726 | if (unlikely(err)) { | ||
727 | if (log_ecn_error) | ||
728 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | ||
729 | &iph->saddr, iph->tos); | ||
730 | if (err > 1) { | ||
731 | ++tunnel->dev->stats.rx_frame_errors; | ||
732 | ++tunnel->dev->stats.rx_errors; | ||
733 | goto drop; | ||
734 | } | ||
735 | } | ||
736 | |||
706 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 737 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
707 | u64_stats_update_begin(&tstats->syncp); | 738 | u64_stats_update_begin(&tstats->syncp); |
708 | tstats->rx_packets++; | 739 | tstats->rx_packets++; |
709 | tstats->rx_bytes += skb->len; | 740 | tstats->rx_bytes += skb->len; |
710 | u64_stats_update_end(&tstats->syncp); | 741 | u64_stats_update_end(&tstats->syncp); |
711 | 742 | ||
712 | __skb_tunnel_rx(skb, tunnel->dev); | 743 | gro_cells_receive(&tunnel->gro_cells, skb); |
713 | |||
714 | skb_reset_network_header(skb); | ||
715 | ipgre_ecn_decapsulate(iph, skb); | ||
716 | |||
717 | netif_rx(skb); | ||
718 | |||
719 | rcu_read_unlock(); | ||
720 | return 0; | 744 | return 0; |
721 | } | 745 | } |
722 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 746 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
723 | 747 | ||
724 | drop: | 748 | drop: |
725 | rcu_read_unlock(); | ||
726 | drop_nolock: | ||
727 | kfree_skb(skb); | 749 | kfree_skb(skb); |
728 | return 0; | 750 | return 0; |
729 | } | 751 | } |
@@ -745,6 +767,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
745 | __be32 dst; | 767 | __be32 dst; |
746 | int mtu; | 768 | int mtu; |
747 | 769 | ||
770 | if (skb->ip_summed == CHECKSUM_PARTIAL && | ||
771 | skb_checksum_help(skb)) | ||
772 | goto tx_error; | ||
773 | |||
748 | if (dev->type == ARPHRD_ETHER) | 774 | if (dev->type == ARPHRD_ETHER) |
749 | IPCB(skb)->flags = 0; | 775 | IPCB(skb)->flags = 0; |
750 | 776 | ||
@@ -1292,10 +1318,18 @@ static const struct net_device_ops ipgre_netdev_ops = { | |||
1292 | 1318 | ||
1293 | static void ipgre_dev_free(struct net_device *dev) | 1319 | static void ipgre_dev_free(struct net_device *dev) |
1294 | { | 1320 | { |
1321 | struct ip_tunnel *tunnel = netdev_priv(dev); | ||
1322 | |||
1323 | gro_cells_destroy(&tunnel->gro_cells); | ||
1295 | free_percpu(dev->tstats); | 1324 | free_percpu(dev->tstats); |
1296 | free_netdev(dev); | 1325 | free_netdev(dev); |
1297 | } | 1326 | } |
1298 | 1327 | ||
1328 | #define GRE_FEATURES (NETIF_F_SG | \ | ||
1329 | NETIF_F_FRAGLIST | \ | ||
1330 | NETIF_F_HIGHDMA | \ | ||
1331 | NETIF_F_HW_CSUM) | ||
1332 | |||
1299 | static void ipgre_tunnel_setup(struct net_device *dev) | 1333 | static void ipgre_tunnel_setup(struct net_device *dev) |
1300 | { | 1334 | { |
1301 | dev->netdev_ops = &ipgre_netdev_ops; | 1335 | dev->netdev_ops = &ipgre_netdev_ops; |
@@ -1309,12 +1343,16 @@ static void ipgre_tunnel_setup(struct net_device *dev) | |||
1309 | dev->addr_len = 4; | 1343 | dev->addr_len = 4; |
1310 | dev->features |= NETIF_F_NETNS_LOCAL; | 1344 | dev->features |= NETIF_F_NETNS_LOCAL; |
1311 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; | 1345 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
1346 | |||
1347 | dev->features |= GRE_FEATURES; | ||
1348 | dev->hw_features |= GRE_FEATURES; | ||
1312 | } | 1349 | } |
1313 | 1350 | ||
1314 | static int ipgre_tunnel_init(struct net_device *dev) | 1351 | static int ipgre_tunnel_init(struct net_device *dev) |
1315 | { | 1352 | { |
1316 | struct ip_tunnel *tunnel; | 1353 | struct ip_tunnel *tunnel; |
1317 | struct iphdr *iph; | 1354 | struct iphdr *iph; |
1355 | int err; | ||
1318 | 1356 | ||
1319 | tunnel = netdev_priv(dev); | 1357 | tunnel = netdev_priv(dev); |
1320 | iph = &tunnel->parms.iph; | 1358 | iph = &tunnel->parms.iph; |
@@ -1341,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev) | |||
1341 | if (!dev->tstats) | 1379 | if (!dev->tstats) |
1342 | return -ENOMEM; | 1380 | return -ENOMEM; |
1343 | 1381 | ||
1382 | err = gro_cells_init(&tunnel->gro_cells, dev); | ||
1383 | if (err) { | ||
1384 | free_percpu(dev->tstats); | ||
1385 | return err; | ||
1386 | } | ||
1387 | |||
1344 | return 0; | 1388 | return 0; |
1345 | } | 1389 | } |
1346 | 1390 | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c196d749daf2..24a29a39e9a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
467 | 467 | ||
468 | iph = ip_hdr(skb); | 468 | iph = ip_hdr(skb); |
469 | 469 | ||
470 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 470 | if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || |
471 | (IPCB(skb)->frag_max_size && | ||
472 | IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { | ||
471 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 473 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
472 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 474 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
473 | htonl(ip_skb_dst_mtu(skb))); | 475 | htonl(ip_skb_dst_mtu(skb))); |
@@ -791,6 +793,7 @@ static int __ip_append_data(struct sock *sk, | |||
791 | struct flowi4 *fl4, | 793 | struct flowi4 *fl4, |
792 | struct sk_buff_head *queue, | 794 | struct sk_buff_head *queue, |
793 | struct inet_cork *cork, | 795 | struct inet_cork *cork, |
796 | struct page_frag *pfrag, | ||
794 | int getfrag(void *from, char *to, int offset, | 797 | int getfrag(void *from, char *to, int offset, |
795 | int len, int odd, struct sk_buff *skb), | 798 | int len, int odd, struct sk_buff *skb), |
796 | void *from, int length, int transhdrlen, | 799 | void *from, int length, int transhdrlen, |
@@ -985,47 +988,30 @@ alloc_new_skb: | |||
985 | } | 988 | } |
986 | } else { | 989 | } else { |
987 | int i = skb_shinfo(skb)->nr_frags; | 990 | int i = skb_shinfo(skb)->nr_frags; |
988 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | ||
989 | struct page *page = cork->page; | ||
990 | int off = cork->off; | ||
991 | unsigned int left; | ||
992 | |||
993 | if (page && (left = PAGE_SIZE - off) > 0) { | ||
994 | if (copy >= left) | ||
995 | copy = left; | ||
996 | if (page != skb_frag_page(frag)) { | ||
997 | if (i == MAX_SKB_FRAGS) { | ||
998 | err = -EMSGSIZE; | ||
999 | goto error; | ||
1000 | } | ||
1001 | skb_fill_page_desc(skb, i, page, off, 0); | ||
1002 | skb_frag_ref(skb, i); | ||
1003 | frag = &skb_shinfo(skb)->frags[i]; | ||
1004 | } | ||
1005 | } else if (i < MAX_SKB_FRAGS) { | ||
1006 | if (copy > PAGE_SIZE) | ||
1007 | copy = PAGE_SIZE; | ||
1008 | page = alloc_pages(sk->sk_allocation, 0); | ||
1009 | if (page == NULL) { | ||
1010 | err = -ENOMEM; | ||
1011 | goto error; | ||
1012 | } | ||
1013 | cork->page = page; | ||
1014 | cork->off = 0; | ||
1015 | 991 | ||
1016 | skb_fill_page_desc(skb, i, page, 0, 0); | 992 | err = -ENOMEM; |
1017 | frag = &skb_shinfo(skb)->frags[i]; | 993 | if (!sk_page_frag_refill(sk, pfrag)) |
1018 | } else { | ||
1019 | err = -EMSGSIZE; | ||
1020 | goto error; | ||
1021 | } | ||
1022 | if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), | ||
1023 | offset, copy, skb->len, skb) < 0) { | ||
1024 | err = -EFAULT; | ||
1025 | goto error; | 994 | goto error; |
995 | |||
996 | if (!skb_can_coalesce(skb, i, pfrag->page, | ||
997 | pfrag->offset)) { | ||
998 | err = -EMSGSIZE; | ||
999 | if (i == MAX_SKB_FRAGS) | ||
1000 | goto error; | ||
1001 | |||
1002 | __skb_fill_page_desc(skb, i, pfrag->page, | ||
1003 | pfrag->offset, 0); | ||
1004 | skb_shinfo(skb)->nr_frags = ++i; | ||
1005 | get_page(pfrag->page); | ||
1026 | } | 1006 | } |
1027 | cork->off += copy; | 1007 | copy = min_t(int, copy, pfrag->size - pfrag->offset); |
1028 | skb_frag_size_add(frag, copy); | 1008 | if (getfrag(from, |
1009 | page_address(pfrag->page) + pfrag->offset, | ||
1010 | offset, copy, skb->len, skb) < 0) | ||
1011 | goto error_efault; | ||
1012 | |||
1013 | pfrag->offset += copy; | ||
1014 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||
1029 | skb->len += copy; | 1015 | skb->len += copy; |
1030 | skb->data_len += copy; | 1016 | skb->data_len += copy; |
1031 | skb->truesize += copy; | 1017 | skb->truesize += copy; |
@@ -1037,6 +1023,8 @@ alloc_new_skb: | |||
1037 | 1023 | ||
1038 | return 0; | 1024 | return 0; |
1039 | 1025 | ||
1026 | error_efault: | ||
1027 | err = -EFAULT; | ||
1040 | error: | 1028 | error: |
1041 | cork->length -= length; | 1029 | cork->length -= length; |
1042 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1030 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
@@ -1077,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1077 | cork->dst = &rt->dst; | 1065 | cork->dst = &rt->dst; |
1078 | cork->length = 0; | 1066 | cork->length = 0; |
1079 | cork->tx_flags = ipc->tx_flags; | 1067 | cork->tx_flags = ipc->tx_flags; |
1080 | cork->page = NULL; | ||
1081 | cork->off = 0; | ||
1082 | 1068 | ||
1083 | return 0; | 1069 | return 0; |
1084 | } | 1070 | } |
@@ -1115,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, | |||
1115 | transhdrlen = 0; | 1101 | transhdrlen = 0; |
1116 | } | 1102 | } |
1117 | 1103 | ||
1118 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, | 1104 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, |
1105 | sk_page_frag(sk), getfrag, | ||
1119 | from, length, transhdrlen, flags); | 1106 | from, length, transhdrlen, flags); |
1120 | } | 1107 | } |
1121 | 1108 | ||
@@ -1437,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, | |||
1437 | if (err) | 1424 | if (err) |
1438 | return ERR_PTR(err); | 1425 | return ERR_PTR(err); |
1439 | 1426 | ||
1440 | err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, | 1427 | err = __ip_append_data(sk, fl4, &queue, &cork, |
1428 | ¤t->task_frag, getfrag, | ||
1441 | from, length, transhdrlen, flags); | 1429 | from, length, transhdrlen, flags); |
1442 | if (err) { | 1430 | if (err) { |
1443 | __ip_flush_pending_frames(sk, &queue, &cork); | 1431 | __ip_flush_pending_frames(sk, &queue, &cork); |
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3511ffba7bd4..978bca4818ae 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c | |||
@@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb, u32 info) | |||
304 | 304 | ||
305 | err = -ENOENT; | 305 | err = -ENOENT; |
306 | 306 | ||
307 | rcu_read_lock(); | ||
308 | t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 307 | t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
309 | if (t == NULL) | 308 | if (t == NULL) |
310 | goto out; | 309 | goto out; |
@@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb, u32 info) | |||
326 | t->err_count = 1; | 325 | t->err_count = 1; |
327 | t->err_time = jiffies; | 326 | t->err_time = jiffies; |
328 | out: | 327 | out: |
329 | rcu_read_unlock(); | ||
330 | return err; | 328 | return err; |
331 | } | 329 | } |
332 | 330 | ||
@@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb) | |||
336 | struct ip_tunnel *tunnel; | 334 | struct ip_tunnel *tunnel; |
337 | const struct iphdr *iph = ip_hdr(skb); | 335 | const struct iphdr *iph = ip_hdr(skb); |
338 | 336 | ||
339 | rcu_read_lock(); | ||
340 | tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); | 337 | tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); |
341 | if (tunnel != NULL) { | 338 | if (tunnel != NULL) { |
342 | struct pcpu_tstats *tstats; | 339 | struct pcpu_tstats *tstats; |
@@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb) | |||
348 | u64_stats_update_end(&tstats->syncp); | 345 | u64_stats_update_end(&tstats->syncp); |
349 | 346 | ||
350 | skb->dev = tunnel->dev; | 347 | skb->dev = tunnel->dev; |
351 | rcu_read_unlock(); | ||
352 | return 1; | 348 | return 1; |
353 | } | 349 | } |
354 | rcu_read_unlock(); | ||
355 | 350 | ||
356 | return -1; | 351 | return -1; |
357 | } | 352 | } |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67e8a6b086ea..798358b10717 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -583,6 +583,17 @@ static void __init ic_rarp_send_if(struct ic_device *d) | |||
583 | #endif | 583 | #endif |
584 | 584 | ||
585 | /* | 585 | /* |
586 | * Predefine Nameservers | ||
587 | */ | ||
588 | static inline void __init ic_nameservers_predef(void) | ||
589 | { | ||
590 | int i; | ||
591 | |||
592 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) | ||
593 | ic_nameservers[i] = NONE; | ||
594 | } | ||
595 | |||
596 | /* | ||
586 | * DHCP/BOOTP support. | 597 | * DHCP/BOOTP support. |
587 | */ | 598 | */ |
588 | 599 | ||
@@ -747,10 +758,7 @@ static void __init ic_bootp_init_ext(u8 *e) | |||
747 | */ | 758 | */ |
748 | static inline void __init ic_bootp_init(void) | 759 | static inline void __init ic_bootp_init(void) |
749 | { | 760 | { |
750 | int i; | 761 | ic_nameservers_predef(); |
751 | |||
752 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) | ||
753 | ic_nameservers[i] = NONE; | ||
754 | 762 | ||
755 | dev_add_pack(&bootp_packet_type); | 763 | dev_add_pack(&bootp_packet_type); |
756 | } | 764 | } |
@@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void) | |||
1379 | int retries = CONF_OPEN_RETRIES; | 1387 | int retries = CONF_OPEN_RETRIES; |
1380 | #endif | 1388 | #endif |
1381 | int err; | 1389 | int err; |
1390 | unsigned int i; | ||
1382 | 1391 | ||
1383 | #ifdef CONFIG_PROC_FS | 1392 | #ifdef CONFIG_PROC_FS |
1384 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); | 1393 | proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); |
@@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void) | |||
1499 | &ic_servaddr, &root_server_addr, root_server_path); | 1508 | &ic_servaddr, &root_server_addr, root_server_path); |
1500 | if (ic_dev_mtu) | 1509 | if (ic_dev_mtu) |
1501 | pr_cont(", mtu=%d", ic_dev_mtu); | 1510 | pr_cont(", mtu=%d", ic_dev_mtu); |
1502 | pr_cont("\n"); | 1511 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) |
1512 | if (ic_nameservers[i] != NONE) { | ||
1513 | pr_info(" nameserver%u=%pI4", | ||
1514 | i, &ic_nameservers[i]); | ||
1515 | break; | ||
1516 | } | ||
1517 | for (i++; i < CONF_NAMESERVERS_MAX; i++) | ||
1518 | if (ic_nameservers[i] != NONE) | ||
1519 | pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); | ||
1503 | #endif /* !SILENT */ | 1520 | #endif /* !SILENT */ |
1504 | 1521 | ||
1505 | return 0; | 1522 | return 0; |
@@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs) | |||
1570 | return 1; | 1587 | return 1; |
1571 | } | 1588 | } |
1572 | 1589 | ||
1590 | ic_nameservers_predef(); | ||
1591 | |||
1573 | /* Parse string for static IP assignment. */ | 1592 | /* Parse string for static IP assignment. */ |
1574 | ip = addrs; | 1593 | ip = addrs; |
1575 | while (ip && *ip) { | 1594 | while (ip && *ip) { |
@@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs) | |||
1613 | ic_enable = 0; | 1632 | ic_enable = 0; |
1614 | } | 1633 | } |
1615 | break; | 1634 | break; |
1635 | case 7: | ||
1636 | if (CONF_NAMESERVERS_MAX >= 1) { | ||
1637 | ic_nameservers[0] = in_aton(ip); | ||
1638 | if (ic_nameservers[0] == ANY) | ||
1639 | ic_nameservers[0] = NONE; | ||
1640 | } | ||
1641 | break; | ||
1642 | case 8: | ||
1643 | if (CONF_NAMESERVERS_MAX >= 2) { | ||
1644 | ic_nameservers[1] = in_aton(ip); | ||
1645 | if (ic_nameservers[1] == ANY) | ||
1646 | ic_nameservers[1] = NONE; | ||
1647 | } | ||
1648 | break; | ||
1616 | } | 1649 | } |
1617 | } | 1650 | } |
1618 | ip = cp; | 1651 | ip = cp; |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 99af1f0cc658..e15b45297c09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -120,6 +120,10 @@ | |||
120 | #define HASH_SIZE 16 | 120 | #define HASH_SIZE 16 |
121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
122 | 122 | ||
123 | static bool log_ecn_error = true; | ||
124 | module_param(log_ecn_error, bool, 0644); | ||
125 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | ||
126 | |||
123 | static int ipip_net_id __read_mostly; | 127 | static int ipip_net_id __read_mostly; |
124 | struct ipip_net { | 128 | struct ipip_net { |
125 | struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; | 129 | struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; |
@@ -365,8 +369,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
365 | } | 369 | } |
366 | 370 | ||
367 | err = -ENOENT; | 371 | err = -ENOENT; |
368 | |||
369 | rcu_read_lock(); | ||
370 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 372 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
371 | if (t == NULL) | 373 | if (t == NULL) |
372 | goto out; | 374 | goto out; |
@@ -398,34 +400,22 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
398 | t->err_count = 1; | 400 | t->err_count = 1; |
399 | t->err_time = jiffies; | 401 | t->err_time = jiffies; |
400 | out: | 402 | out: |
401 | rcu_read_unlock(); | ||
402 | return err; | ||
403 | } | ||
404 | |||
405 | static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, | ||
406 | struct sk_buff *skb) | ||
407 | { | ||
408 | struct iphdr *inner_iph = ip_hdr(skb); | ||
409 | 403 | ||
410 | if (INET_ECN_is_ce(outer_iph->tos)) | 404 | return err; |
411 | IP_ECN_set_ce(inner_iph); | ||
412 | } | 405 | } |
413 | 406 | ||
414 | static int ipip_rcv(struct sk_buff *skb) | 407 | static int ipip_rcv(struct sk_buff *skb) |
415 | { | 408 | { |
416 | struct ip_tunnel *tunnel; | 409 | struct ip_tunnel *tunnel; |
417 | const struct iphdr *iph = ip_hdr(skb); | 410 | const struct iphdr *iph = ip_hdr(skb); |
411 | int err; | ||
418 | 412 | ||
419 | rcu_read_lock(); | ||
420 | tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); | 413 | tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); |
421 | if (tunnel != NULL) { | 414 | if (tunnel != NULL) { |
422 | struct pcpu_tstats *tstats; | 415 | struct pcpu_tstats *tstats; |
423 | 416 | ||
424 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 417 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
425 | rcu_read_unlock(); | 418 | goto drop; |
426 | kfree_skb(skb); | ||
427 | return 0; | ||
428 | } | ||
429 | 419 | ||
430 | secpath_reset(skb); | 420 | secpath_reset(skb); |
431 | 421 | ||
@@ -434,24 +424,35 @@ static int ipip_rcv(struct sk_buff *skb) | |||
434 | skb->protocol = htons(ETH_P_IP); | 424 | skb->protocol = htons(ETH_P_IP); |
435 | skb->pkt_type = PACKET_HOST; | 425 | skb->pkt_type = PACKET_HOST; |
436 | 426 | ||
427 | __skb_tunnel_rx(skb, tunnel->dev); | ||
428 | |||
429 | err = IP_ECN_decapsulate(iph, skb); | ||
430 | if (unlikely(err)) { | ||
431 | if (log_ecn_error) | ||
432 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | ||
433 | &iph->saddr, iph->tos); | ||
434 | if (err > 1) { | ||
435 | ++tunnel->dev->stats.rx_frame_errors; | ||
436 | ++tunnel->dev->stats.rx_errors; | ||
437 | goto drop; | ||
438 | } | ||
439 | } | ||
440 | |||
437 | tstats = this_cpu_ptr(tunnel->dev->tstats); | 441 | tstats = this_cpu_ptr(tunnel->dev->tstats); |
438 | u64_stats_update_begin(&tstats->syncp); | 442 | u64_stats_update_begin(&tstats->syncp); |
439 | tstats->rx_packets++; | 443 | tstats->rx_packets++; |
440 | tstats->rx_bytes += skb->len; | 444 | tstats->rx_bytes += skb->len; |
441 | u64_stats_update_end(&tstats->syncp); | 445 | u64_stats_update_end(&tstats->syncp); |
442 | 446 | ||
443 | __skb_tunnel_rx(skb, tunnel->dev); | ||
444 | |||
445 | ipip_ecn_decapsulate(iph, skb); | ||
446 | |||
447 | netif_rx(skb); | 447 | netif_rx(skb); |
448 | |||
449 | rcu_read_unlock(); | ||
450 | return 0; | 448 | return 0; |
451 | } | 449 | } |
452 | rcu_read_unlock(); | ||
453 | 450 | ||
454 | return -1; | 451 | return -1; |
452 | |||
453 | drop: | ||
454 | kfree_skb(skb); | ||
455 | return 0; | ||
455 | } | 456 | } |
456 | 457 | ||
457 | /* | 458 | /* |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ebdf06f938bf..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) | |||
626 | e->error = -ETIMEDOUT; | 626 | e->error = -ETIMEDOUT; |
627 | memset(&e->msg, 0, sizeof(e->msg)); | 627 | memset(&e->msg, 0, sizeof(e->msg)); |
628 | 628 | ||
629 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 629 | rtnl_unicast(skb, net, NETLINK_CB(skb).portid); |
630 | } else { | 630 | } else { |
631 | kfree_skb(skb); | 631 | kfree_skb(skb); |
632 | } | 632 | } |
@@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, | |||
870 | memset(&e->msg, 0, sizeof(e->msg)); | 870 | memset(&e->msg, 0, sizeof(e->msg)); |
871 | } | 871 | } |
872 | 872 | ||
873 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); | 873 | rtnl_unicast(skb, net, NETLINK_CB(skb).portid); |
874 | } else { | 874 | } else { |
875 | ip_mr_forward(net, mrt, skb, c, 0); | 875 | ip_mr_forward(net, mrt, skb, c, 0); |
876 | } | 876 | } |
@@ -1808,7 +1808,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) | |||
1808 | .flowi4_oif = (rt_is_output_route(rt) ? | 1808 | .flowi4_oif = (rt_is_output_route(rt) ? |
1809 | skb->dev->ifindex : 0), | 1809 | skb->dev->ifindex : 0), |
1810 | .flowi4_iif = (rt_is_output_route(rt) ? | 1810 | .flowi4_iif = (rt_is_output_route(rt) ? |
1811 | net->loopback_dev->ifindex : | 1811 | LOOPBACK_IFINDEX : |
1812 | skb->dev->ifindex), | 1812 | skb->dev->ifindex), |
1813 | .flowi4_mark = skb->mark, | 1813 | .flowi4_mark = skb->mark, |
1814 | }; | 1814 | }; |
@@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, | |||
2117 | } | 2117 | } |
2118 | 2118 | ||
2119 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 2119 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
2120 | u32 pid, u32 seq, struct mfc_cache *c) | 2120 | u32 portid, u32 seq, struct mfc_cache *c) |
2121 | { | 2121 | { |
2122 | struct nlmsghdr *nlh; | 2122 | struct nlmsghdr *nlh; |
2123 | struct rtmsg *rtm; | 2123 | struct rtmsg *rtm; |
2124 | 2124 | ||
2125 | nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); | 2125 | nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); |
2126 | if (nlh == NULL) | 2126 | if (nlh == NULL) |
2127 | return -EMSGSIZE; | 2127 | return -EMSGSIZE; |
2128 | 2128 | ||
@@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) | |||
2176 | if (e < s_e) | 2176 | if (e < s_e) |
2177 | goto next_entry; | 2177 | goto next_entry; |
2178 | if (ipmr_fill_mroute(mrt, skb, | 2178 | if (ipmr_fill_mroute(mrt, skb, |
2179 | NETLINK_CB(cb->skb).pid, | 2179 | NETLINK_CB(cb->skb).portid, |
2180 | cb->nlh->nlmsg_seq, | 2180 | cb->nlh->nlmsg_seq, |
2181 | mfc) < 0) | 2181 | mfc) < 0) |
2182 | goto done; | 2182 | goto done; |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ed1b36783192..4c0cf63dd92e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) | |||
72 | } | 72 | } |
73 | EXPORT_SYMBOL(ip_route_me_harder); | 73 | EXPORT_SYMBOL(ip_route_me_harder); |
74 | 74 | ||
75 | #ifdef CONFIG_XFRM | ||
76 | int ip_xfrm_me_harder(struct sk_buff *skb) | ||
77 | { | ||
78 | struct flowi fl; | ||
79 | unsigned int hh_len; | ||
80 | struct dst_entry *dst; | ||
81 | |||
82 | if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) | ||
83 | return 0; | ||
84 | if (xfrm_decode_session(skb, &fl, AF_INET) < 0) | ||
85 | return -1; | ||
86 | |||
87 | dst = skb_dst(skb); | ||
88 | if (dst->xfrm) | ||
89 | dst = ((struct xfrm_dst *)dst)->route; | ||
90 | dst_hold(dst); | ||
91 | |||
92 | dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); | ||
93 | if (IS_ERR(dst)) | ||
94 | return -1; | ||
95 | |||
96 | skb_dst_drop(skb); | ||
97 | skb_dst_set(skb, dst); | ||
98 | |||
99 | /* Change in oif may mean change in hh_len. */ | ||
100 | hh_len = skb_dst(skb)->dev->hard_header_len; | ||
101 | if (skb_headroom(skb) < hh_len && | ||
102 | pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) | ||
103 | return -1; | ||
104 | return 0; | ||
105 | } | ||
106 | EXPORT_SYMBOL(ip_xfrm_me_harder); | ||
107 | #endif | ||
108 | |||
109 | void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); | ||
110 | EXPORT_SYMBOL(ip_nat_decode_session); | ||
111 | |||
112 | /* | 75 | /* |
113 | * Extra routing may needed on local out, as the QUEUE target never | 76 | * Extra routing may needed on local out, as the QUEUE target never |
114 | * returns control to the table. | 77 | * returns control to the table. |
@@ -225,12 +188,12 @@ static const struct nf_afinfo nf_ip_afinfo = { | |||
225 | .route_key_size = sizeof(struct ip_rt_info), | 188 | .route_key_size = sizeof(struct ip_rt_info), |
226 | }; | 189 | }; |
227 | 190 | ||
228 | static int ipv4_netfilter_init(void) | 191 | static int __init ipv4_netfilter_init(void) |
229 | { | 192 | { |
230 | return nf_register_afinfo(&nf_ip_afinfo); | 193 | return nf_register_afinfo(&nf_ip_afinfo); |
231 | } | 194 | } |
232 | 195 | ||
233 | static void ipv4_netfilter_fini(void) | 196 | static void __exit ipv4_netfilter_fini(void) |
234 | { | 197 | { |
235 | nf_unregister_afinfo(&nf_ip_afinfo); | 198 | nf_unregister_afinfo(&nf_ip_afinfo); |
236 | } | 199 | } |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987a..d8d6f2a5bf12 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG | |||
143 | To compile it as a module, choose M here. If unsure, say N. | 143 | To compile it as a module, choose M here. If unsure, say N. |
144 | 144 | ||
145 | # NAT + specific targets: nf_conntrack | 145 | # NAT + specific targets: nf_conntrack |
146 | config NF_NAT | 146 | config NF_NAT_IPV4 |
147 | tristate "Full NAT" | 147 | tristate "IPv4 NAT" |
148 | depends on NF_CONNTRACK_IPV4 | 148 | depends on NF_CONNTRACK_IPV4 |
149 | default m if NETFILTER_ADVANCED=n | 149 | default m if NETFILTER_ADVANCED=n |
150 | select NF_NAT | ||
150 | help | 151 | help |
151 | The Full NAT option allows masquerading, port forwarding and other | 152 | The IPv4 NAT option allows masquerading, port forwarding and other |
152 | forms of full Network Address Port Translation. It is controlled by | 153 | forms of full Network Address Port Translation. It is controlled by |
153 | the `nat' table in iptables: see the man page for iptables(8). | 154 | the `nat' table in iptables: see the man page for iptables(8). |
154 | 155 | ||
155 | To compile it as a module, choose M here. If unsure, say N. | 156 | To compile it as a module, choose M here. If unsure, say N. |
156 | 157 | ||
157 | config NF_NAT_NEEDED | 158 | if NF_NAT_IPV4 |
158 | bool | ||
159 | depends on NF_NAT | ||
160 | default y | ||
161 | 159 | ||
162 | config IP_NF_TARGET_MASQUERADE | 160 | config IP_NF_TARGET_MASQUERADE |
163 | tristate "MASQUERADE target support" | 161 | tristate "MASQUERADE target support" |
164 | depends on NF_NAT | ||
165 | default m if NETFILTER_ADVANCED=n | 162 | default m if NETFILTER_ADVANCED=n |
166 | help | 163 | help |
167 | Masquerading is a special case of NAT: all outgoing connections are | 164 | Masquerading is a special case of NAT: all outgoing connections are |
@@ -174,30 +171,27 @@ config IP_NF_TARGET_MASQUERADE | |||
174 | 171 | ||
175 | config IP_NF_TARGET_NETMAP | 172 | config IP_NF_TARGET_NETMAP |
176 | tristate "NETMAP target support" | 173 | tristate "NETMAP target support" |
177 | depends on NF_NAT | ||
178 | depends on NETFILTER_ADVANCED | 174 | depends on NETFILTER_ADVANCED |
179 | help | 175 | select NETFILTER_XT_TARGET_NETMAP |
180 | NETMAP is an implementation of static 1:1 NAT mapping of network | 176 | ---help--- |
181 | addresses. It maps the network address part, while keeping the host | 177 | This is a backwards-compat option for the user's convenience |
182 | address part intact. | 178 | (e.g. when running oldconfig). It selects |
183 | 179 | CONFIG_NETFILTER_XT_TARGET_NETMAP. | |
184 | To compile it as a module, choose M here. If unsure, say N. | ||
185 | 180 | ||
186 | config IP_NF_TARGET_REDIRECT | 181 | config IP_NF_TARGET_REDIRECT |
187 | tristate "REDIRECT target support" | 182 | tristate "REDIRECT target support" |
188 | depends on NF_NAT | ||
189 | depends on NETFILTER_ADVANCED | 183 | depends on NETFILTER_ADVANCED |
190 | help | 184 | select NETFILTER_XT_TARGET_REDIRECT |
191 | REDIRECT is a special case of NAT: all incoming connections are | 185 | ---help--- |
192 | mapped onto the incoming interface's address, causing the packets to | 186 | This is a backwards-compat option for the user's convenience |
193 | come to the local machine instead of passing through. This is | 187 | (e.g. when running oldconfig). It selects |
194 | useful for transparent proxies. | 188 | CONFIG_NETFILTER_XT_TARGET_REDIRECT. |
195 | 189 | ||
196 | To compile it as a module, choose M here. If unsure, say N. | 190 | endif |
197 | 191 | ||
198 | config NF_NAT_SNMP_BASIC | 192 | config NF_NAT_SNMP_BASIC |
199 | tristate "Basic SNMP-ALG support" | 193 | tristate "Basic SNMP-ALG support" |
200 | depends on NF_CONNTRACK_SNMP && NF_NAT | 194 | depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 |
201 | depends on NETFILTER_ADVANCED | 195 | depends on NETFILTER_ADVANCED |
202 | default NF_NAT && NF_CONNTRACK_SNMP | 196 | default NF_NAT && NF_CONNTRACK_SNMP |
203 | ---help--- | 197 | ---help--- |
@@ -219,61 +213,21 @@ config NF_NAT_SNMP_BASIC | |||
219 | # <expr> '&&' <expr> (6) | 213 | # <expr> '&&' <expr> (6) |
220 | # | 214 | # |
221 | # (6) Returns the result of min(/expr/, /expr/). | 215 | # (6) Returns the result of min(/expr/, /expr/). |
222 | config NF_NAT_PROTO_DCCP | ||
223 | tristate | ||
224 | depends on NF_NAT && NF_CT_PROTO_DCCP | ||
225 | default NF_NAT && NF_CT_PROTO_DCCP | ||
226 | 216 | ||
227 | config NF_NAT_PROTO_GRE | 217 | config NF_NAT_PROTO_GRE |
228 | tristate | 218 | tristate |
229 | depends on NF_NAT && NF_CT_PROTO_GRE | 219 | depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE |
230 | |||
231 | config NF_NAT_PROTO_UDPLITE | ||
232 | tristate | ||
233 | depends on NF_NAT && NF_CT_PROTO_UDPLITE | ||
234 | default NF_NAT && NF_CT_PROTO_UDPLITE | ||
235 | |||
236 | config NF_NAT_PROTO_SCTP | ||
237 | tristate | ||
238 | default NF_NAT && NF_CT_PROTO_SCTP | ||
239 | depends on NF_NAT && NF_CT_PROTO_SCTP | ||
240 | select LIBCRC32C | ||
241 | |||
242 | config NF_NAT_FTP | ||
243 | tristate | ||
244 | depends on NF_CONNTRACK && NF_NAT | ||
245 | default NF_NAT && NF_CONNTRACK_FTP | ||
246 | |||
247 | config NF_NAT_IRC | ||
248 | tristate | ||
249 | depends on NF_CONNTRACK && NF_NAT | ||
250 | default NF_NAT && NF_CONNTRACK_IRC | ||
251 | |||
252 | config NF_NAT_TFTP | ||
253 | tristate | ||
254 | depends on NF_CONNTRACK && NF_NAT | ||
255 | default NF_NAT && NF_CONNTRACK_TFTP | ||
256 | |||
257 | config NF_NAT_AMANDA | ||
258 | tristate | ||
259 | depends on NF_CONNTRACK && NF_NAT | ||
260 | default NF_NAT && NF_CONNTRACK_AMANDA | ||
261 | 220 | ||
262 | config NF_NAT_PPTP | 221 | config NF_NAT_PPTP |
263 | tristate | 222 | tristate |
264 | depends on NF_CONNTRACK && NF_NAT | 223 | depends on NF_CONNTRACK && NF_NAT_IPV4 |
265 | default NF_NAT && NF_CONNTRACK_PPTP | 224 | default NF_NAT_IPV4 && NF_CONNTRACK_PPTP |
266 | select NF_NAT_PROTO_GRE | 225 | select NF_NAT_PROTO_GRE |
267 | 226 | ||
268 | config NF_NAT_H323 | 227 | config NF_NAT_H323 |
269 | tristate | 228 | tristate |
270 | depends on NF_CONNTRACK && NF_NAT | 229 | depends on NF_CONNTRACK && NF_NAT_IPV4 |
271 | default NF_NAT && NF_CONNTRACK_H323 | 230 | default NF_NAT_IPV4 && NF_CONNTRACK_H323 |
272 | |||
273 | config NF_NAT_SIP | ||
274 | tristate | ||
275 | depends on NF_CONNTRACK && NF_NAT | ||
276 | default NF_NAT && NF_CONNTRACK_SIP | ||
277 | 231 | ||
278 | # mangle + specific targets | 232 | # mangle + specific targets |
279 | config IP_NF_MANGLE | 233 | config IP_NF_MANGLE |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c20674dc9452..007b128eecc9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -10,32 +10,22 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o | |||
10 | endif | 10 | endif |
11 | endif | 11 | endif |
12 | 12 | ||
13 | nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o | ||
14 | iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o | ||
15 | |||
16 | # connection tracking | 13 | # connection tracking |
17 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o | 14 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o |
18 | 15 | ||
19 | obj-$(CONFIG_NF_NAT) += nf_nat.o | 16 | nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o |
17 | obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o | ||
20 | 18 | ||
21 | # defrag | 19 | # defrag |
22 | obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o | 20 | obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o |
23 | 21 | ||
24 | # NAT helpers (nf_conntrack) | 22 | # NAT helpers (nf_conntrack) |
25 | obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o | ||
26 | obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o | ||
27 | obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o | 23 | obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o |
28 | obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o | ||
29 | obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o | 24 | obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o |
30 | obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o | ||
31 | obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o | 25 | obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o |
32 | obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o | ||
33 | 26 | ||
34 | # NAT protocols (nf_nat) | 27 | # NAT protocols (nf_nat) |
35 | obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o | ||
36 | obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o | 28 | obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o |
37 | obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o | ||
38 | obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o | ||
39 | 29 | ||
40 | # generic IP tables | 30 | # generic IP tables |
41 | obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o | 31 | obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o |
@@ -43,7 +33,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o | |||
43 | # the three instances of ip_tables | 33 | # the three instances of ip_tables |
44 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o | 34 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o |
45 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o | 35 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o |
46 | obj-$(CONFIG_NF_NAT) += iptable_nat.o | 36 | obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o |
47 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o | 37 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o |
48 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o | 38 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o |
49 | 39 | ||
@@ -55,8 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o | |||
55 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o | 45 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o |
56 | obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o | 46 | obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o |
57 | obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o | 47 | obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o |
58 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o | ||
59 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o | ||
60 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o | 48 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o |
61 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o | 49 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o |
62 | 50 | ||
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index cbb6a1a6f6f7..5d5d4d1be9c2 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -19,9 +19,9 @@ | |||
19 | #include <net/ip.h> | 19 | #include <net/ip.h> |
20 | #include <net/checksum.h> | 20 | #include <net/checksum.h> |
21 | #include <net/route.h> | 21 | #include <net/route.h> |
22 | #include <net/netfilter/nf_nat_rule.h> | ||
23 | #include <linux/netfilter_ipv4.h> | 22 | #include <linux/netfilter_ipv4.h> |
24 | #include <linux/netfilter/x_tables.h> | 23 | #include <linux/netfilter/x_tables.h> |
24 | #include <net/netfilter/nf_nat.h> | ||
25 | 25 | ||
26 | MODULE_LICENSE("GPL"); | 26 | MODULE_LICENSE("GPL"); |
27 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 27 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
@@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
49 | struct nf_conn *ct; | 49 | struct nf_conn *ct; |
50 | struct nf_conn_nat *nat; | 50 | struct nf_conn_nat *nat; |
51 | enum ip_conntrack_info ctinfo; | 51 | enum ip_conntrack_info ctinfo; |
52 | struct nf_nat_ipv4_range newrange; | 52 | struct nf_nat_range newrange; |
53 | const struct nf_nat_ipv4_multi_range_compat *mr; | 53 | const struct nf_nat_ipv4_multi_range_compat *mr; |
54 | const struct rtable *rt; | 54 | const struct rtable *rt; |
55 | __be32 newsrc, nh; | 55 | __be32 newsrc, nh; |
@@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
80 | nat->masq_index = par->out->ifindex; | 80 | nat->masq_index = par->out->ifindex; |
81 | 81 | ||
82 | /* Transfer from original range. */ | 82 | /* Transfer from original range. */ |
83 | newrange = ((struct nf_nat_ipv4_range) | 83 | memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); |
84 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, | 84 | memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); |
85 | newsrc, newsrc, | 85 | newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; |
86 | mr->range[0].min, mr->range[0].max }); | 86 | newrange.min_addr.ip = newsrc; |
87 | newrange.max_addr.ip = newsrc; | ||
88 | newrange.min_proto = mr->range[0].min; | ||
89 | newrange.max_proto = mr->range[0].max; | ||
87 | 90 | ||
88 | /* Hand modified range to generic setup. */ | 91 | /* Hand modified range to generic setup. */ |
89 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); | 92 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); |
@@ -96,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex) | |||
96 | 99 | ||
97 | if (!nat) | 100 | if (!nat) |
98 | return 0; | 101 | return 0; |
99 | 102 | if (nf_ct_l3num(i) != NFPROTO_IPV4) | |
103 | return 0; | ||
100 | return nat->masq_index == (int)(long)ifindex; | 104 | return nat->masq_index == (int)(long)ifindex; |
101 | } | 105 | } |
102 | 106 | ||
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c deleted file mode 100644 index b5bfbbabf70d..000000000000 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ /dev/null | |||
@@ -1,98 +0,0 @@ | |||
1 | /* NETMAP - static NAT mapping of IP network addresses (1:1). | ||
2 | * The mapping can be applied to source (POSTROUTING), | ||
3 | * destination (PREROUTING), or both (with separate rules). | ||
4 | */ | ||
5 | |||
6 | /* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/netdevice.h> | ||
16 | #include <linux/netfilter.h> | ||
17 | #include <linux/netfilter_ipv4.h> | ||
18 | #include <linux/netfilter/x_tables.h> | ||
19 | #include <net/netfilter/nf_nat_rule.h> | ||
20 | |||
21 | MODULE_LICENSE("GPL"); | ||
22 | MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); | ||
23 | MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); | ||
24 | |||
25 | static int netmap_tg_check(const struct xt_tgchk_param *par) | ||
26 | { | ||
27 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
28 | |||
29 | if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { | ||
30 | pr_debug("bad MAP_IPS.\n"); | ||
31 | return -EINVAL; | ||
32 | } | ||
33 | if (mr->rangesize != 1) { | ||
34 | pr_debug("bad rangesize %u.\n", mr->rangesize); | ||
35 | return -EINVAL; | ||
36 | } | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static unsigned int | ||
41 | netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
42 | { | ||
43 | struct nf_conn *ct; | ||
44 | enum ip_conntrack_info ctinfo; | ||
45 | __be32 new_ip, netmask; | ||
46 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
47 | struct nf_nat_ipv4_range newrange; | ||
48 | |||
49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | ||
50 | par->hooknum == NF_INET_POST_ROUTING || | ||
51 | par->hooknum == NF_INET_LOCAL_OUT || | ||
52 | par->hooknum == NF_INET_LOCAL_IN); | ||
53 | ct = nf_ct_get(skb, &ctinfo); | ||
54 | |||
55 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); | ||
56 | |||
57 | if (par->hooknum == NF_INET_PRE_ROUTING || | ||
58 | par->hooknum == NF_INET_LOCAL_OUT) | ||
59 | new_ip = ip_hdr(skb)->daddr & ~netmask; | ||
60 | else | ||
61 | new_ip = ip_hdr(skb)->saddr & ~netmask; | ||
62 | new_ip |= mr->range[0].min_ip & netmask; | ||
63 | |||
64 | newrange = ((struct nf_nat_ipv4_range) | ||
65 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, | ||
66 | new_ip, new_ip, | ||
67 | mr->range[0].min, mr->range[0].max }); | ||
68 | |||
69 | /* Hand modified range to generic setup. */ | ||
70 | return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); | ||
71 | } | ||
72 | |||
73 | static struct xt_target netmap_tg_reg __read_mostly = { | ||
74 | .name = "NETMAP", | ||
75 | .family = NFPROTO_IPV4, | ||
76 | .target = netmap_tg, | ||
77 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), | ||
78 | .table = "nat", | ||
79 | .hooks = (1 << NF_INET_PRE_ROUTING) | | ||
80 | (1 << NF_INET_POST_ROUTING) | | ||
81 | (1 << NF_INET_LOCAL_OUT) | | ||
82 | (1 << NF_INET_LOCAL_IN), | ||
83 | .checkentry = netmap_tg_check, | ||
84 | .me = THIS_MODULE | ||
85 | }; | ||
86 | |||
87 | static int __init netmap_tg_init(void) | ||
88 | { | ||
89 | return xt_register_target(&netmap_tg_reg); | ||
90 | } | ||
91 | |||
92 | static void __exit netmap_tg_exit(void) | ||
93 | { | ||
94 | xt_unregister_target(&netmap_tg_reg); | ||
95 | } | ||
96 | |||
97 | module_init(netmap_tg_init); | ||
98 | module_exit(netmap_tg_exit); | ||
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c deleted file mode 100644 index 7c0103a5203e..000000000000 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ /dev/null | |||
@@ -1,110 +0,0 @@ | |||
1 | /* Redirect. Simple mapping which alters dst to a local IP address. */ | ||
2 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
3 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
10 | #include <linux/types.h> | ||
11 | #include <linux/ip.h> | ||
12 | #include <linux/timer.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/netfilter.h> | ||
15 | #include <linux/netdevice.h> | ||
16 | #include <linux/if.h> | ||
17 | #include <linux/inetdevice.h> | ||
18 | #include <net/protocol.h> | ||
19 | #include <net/checksum.h> | ||
20 | #include <linux/netfilter_ipv4.h> | ||
21 | #include <linux/netfilter/x_tables.h> | ||
22 | #include <net/netfilter/nf_nat_rule.h> | ||
23 | |||
24 | MODULE_LICENSE("GPL"); | ||
25 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | ||
26 | MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); | ||
27 | |||
28 | /* FIXME: Take multiple ranges --RR */ | ||
29 | static int redirect_tg_check(const struct xt_tgchk_param *par) | ||
30 | { | ||
31 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
32 | |||
33 | if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { | ||
34 | pr_debug("bad MAP_IPS.\n"); | ||
35 | return -EINVAL; | ||
36 | } | ||
37 | if (mr->rangesize != 1) { | ||
38 | pr_debug("bad rangesize %u.\n", mr->rangesize); | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static unsigned int | ||
45 | redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
46 | { | ||
47 | struct nf_conn *ct; | ||
48 | enum ip_conntrack_info ctinfo; | ||
49 | __be32 newdst; | ||
50 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
51 | struct nf_nat_ipv4_range newrange; | ||
52 | |||
53 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | ||
54 | par->hooknum == NF_INET_LOCAL_OUT); | ||
55 | |||
56 | ct = nf_ct_get(skb, &ctinfo); | ||
57 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | ||
58 | |||
59 | /* Local packets: make them go to loopback */ | ||
60 | if (par->hooknum == NF_INET_LOCAL_OUT) | ||
61 | newdst = htonl(0x7F000001); | ||
62 | else { | ||
63 | struct in_device *indev; | ||
64 | struct in_ifaddr *ifa; | ||
65 | |||
66 | newdst = 0; | ||
67 | |||
68 | rcu_read_lock(); | ||
69 | indev = __in_dev_get_rcu(skb->dev); | ||
70 | if (indev && (ifa = indev->ifa_list)) | ||
71 | newdst = ifa->ifa_local; | ||
72 | rcu_read_unlock(); | ||
73 | |||
74 | if (!newdst) | ||
75 | return NF_DROP; | ||
76 | } | ||
77 | |||
78 | /* Transfer from original range. */ | ||
79 | newrange = ((struct nf_nat_ipv4_range) | ||
80 | { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, | ||
81 | newdst, newdst, | ||
82 | mr->range[0].min, mr->range[0].max }); | ||
83 | |||
84 | /* Hand modified range to generic setup. */ | ||
85 | return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); | ||
86 | } | ||
87 | |||
88 | static struct xt_target redirect_tg_reg __read_mostly = { | ||
89 | .name = "REDIRECT", | ||
90 | .family = NFPROTO_IPV4, | ||
91 | .target = redirect_tg, | ||
92 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), | ||
93 | .table = "nat", | ||
94 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), | ||
95 | .checkentry = redirect_tg_check, | ||
96 | .me = THIS_MODULE, | ||
97 | }; | ||
98 | |||
99 | static int __init redirect_tg_init(void) | ||
100 | { | ||
101 | return xt_register_target(&redirect_tg_reg); | ||
102 | } | ||
103 | |||
104 | static void __exit redirect_tg_exit(void) | ||
105 | { | ||
106 | xt_unregister_target(&redirect_tg_reg); | ||
107 | } | ||
108 | |||
109 | module_init(redirect_tg_init); | ||
110 | module_exit(redirect_tg_exit); | ||
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1109f7f6c254..b5ef3cba2250 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -396,8 +396,7 @@ static int __init ulog_tg_init(void) | |||
396 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) | 396 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) |
397 | setup_timer(&ulog_buffers[i].timer, ulog_timer, i); | 397 | setup_timer(&ulog_buffers[i].timer, ulog_timer, i); |
398 | 398 | ||
399 | nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, | 399 | nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); |
400 | THIS_MODULE, &cfg); | ||
401 | if (!nflognl) | 400 | if (!nflognl) |
402 | return -ENOMEM; | 401 | return -ENOMEM; |
403 | 402 | ||
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 31371be8174b..c30130062cd6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c | |||
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
85 | return ipv4_is_local_multicast(iph->daddr) ^ invert; | 85 | return ipv4_is_local_multicast(iph->daddr) ^ invert; |
86 | flow.flowi4_iif = 0; | 86 | flow.flowi4_iif = 0; |
87 | } else { | 87 | } else { |
88 | flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; | 88 | flow.flowi4_iif = LOOPBACK_IFINDEX; |
89 | } | 89 | } |
90 | 90 | ||
91 | flow.daddr = iph->saddr; | 91 | flow.daddr = iph->saddr; |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 851acec852d2..6b3da5cf54e9 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net) | |||
69 | net->ipv4.iptable_filter = | 69 | net->ipv4.iptable_filter = |
70 | ipt_register_table(net, &packet_filter, repl); | 70 | ipt_register_table(net, &packet_filter, repl); |
71 | kfree(repl); | 71 | kfree(repl); |
72 | if (IS_ERR(net->ipv4.iptable_filter)) | 72 | return PTR_RET(net->ipv4.iptable_filter); |
73 | return PTR_ERR(net->ipv4.iptable_filter); | ||
74 | return 0; | ||
75 | } | 73 | } |
76 | 74 | ||
77 | static void __net_exit iptable_filter_net_exit(struct net *net) | 75 | static void __net_exit iptable_filter_net_exit(struct net *net) |
@@ -96,14 +94,10 @@ static int __init iptable_filter_init(void) | |||
96 | filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); | 94 | filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); |
97 | if (IS_ERR(filter_ops)) { | 95 | if (IS_ERR(filter_ops)) { |
98 | ret = PTR_ERR(filter_ops); | 96 | ret = PTR_ERR(filter_ops); |
99 | goto cleanup_table; | 97 | unregister_pernet_subsys(&iptable_filter_net_ops); |
100 | } | 98 | } |
101 | 99 | ||
102 | return ret; | 100 | return ret; |
103 | |||
104 | cleanup_table: | ||
105 | unregister_pernet_subsys(&iptable_filter_net_ops); | ||
106 | return ret; | ||
107 | } | 101 | } |
108 | 102 | ||
109 | static void __exit iptable_filter_fini(void) | 103 | static void __exit iptable_filter_fini(void) |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aef5d1fbe77d..85d88f206447 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) | |||
104 | net->ipv4.iptable_mangle = | 104 | net->ipv4.iptable_mangle = |
105 | ipt_register_table(net, &packet_mangler, repl); | 105 | ipt_register_table(net, &packet_mangler, repl); |
106 | kfree(repl); | 106 | kfree(repl); |
107 | if (IS_ERR(net->ipv4.iptable_mangle)) | 107 | return PTR_RET(net->ipv4.iptable_mangle); |
108 | return PTR_ERR(net->ipv4.iptable_mangle); | ||
109 | return 0; | ||
110 | } | 108 | } |
111 | 109 | ||
112 | static void __net_exit iptable_mangle_net_exit(struct net *net) | 110 | static void __net_exit iptable_mangle_net_exit(struct net *net) |
@@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void) | |||
131 | mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); | 129 | mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); |
132 | if (IS_ERR(mangle_ops)) { | 130 | if (IS_ERR(mangle_ops)) { |
133 | ret = PTR_ERR(mangle_ops); | 131 | ret = PTR_ERR(mangle_ops); |
134 | goto cleanup_table; | 132 | unregister_pernet_subsys(&iptable_mangle_net_ops); |
135 | } | 133 | } |
136 | 134 | ||
137 | return ret; | 135 | return ret; |
138 | |||
139 | cleanup_table: | ||
140 | unregister_pernet_subsys(&iptable_mangle_net_ops); | ||
141 | return ret; | ||
142 | } | 136 | } |
143 | 137 | ||
144 | static void __exit iptable_mangle_fini(void) | 138 | static void __exit iptable_mangle_fini(void) |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/iptable_nat.c index 3828a4229822..9e0ffaf1d942 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/iptable_nat.c | |||
@@ -1,84 +1,71 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> |
3 | * (C) 2011 Patrick McHardy <kaber@trash.net> | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
7 | */ | 8 | */ |
8 | #include <linux/types.h> | 9 | |
9 | #include <linux/icmp.h> | 10 | #include <linux/module.h> |
10 | #include <linux/gfp.h> | ||
11 | #include <linux/ip.h> | ||
12 | #include <linux/netfilter.h> | 11 | #include <linux/netfilter.h> |
13 | #include <linux/netfilter_ipv4.h> | 12 | #include <linux/netfilter_ipv4.h> |
14 | #include <linux/module.h> | 13 | #include <linux/netfilter_ipv4/ip_tables.h> |
15 | #include <linux/skbuff.h> | 14 | #include <linux/ip.h> |
16 | #include <linux/proc_fs.h> | ||
17 | #include <net/ip.h> | 15 | #include <net/ip.h> |
18 | #include <net/checksum.h> | ||
19 | #include <linux/spinlock.h> | ||
20 | 16 | ||
21 | #include <net/netfilter/nf_conntrack.h> | ||
22 | #include <net/netfilter/nf_conntrack_core.h> | ||
23 | #include <net/netfilter/nf_conntrack_extend.h> | ||
24 | #include <net/netfilter/nf_nat.h> | 17 | #include <net/netfilter/nf_nat.h> |
25 | #include <net/netfilter/nf_nat_rule.h> | ||
26 | #include <net/netfilter/nf_nat_protocol.h> | ||
27 | #include <net/netfilter/nf_nat_core.h> | 18 | #include <net/netfilter/nf_nat_core.h> |
28 | #include <net/netfilter/nf_nat_helper.h> | 19 | #include <net/netfilter/nf_nat_l3proto.h> |
29 | #include <linux/netfilter_ipv4/ip_tables.h> | 20 | |
21 | static const struct xt_table nf_nat_ipv4_table = { | ||
22 | .name = "nat", | ||
23 | .valid_hooks = (1 << NF_INET_PRE_ROUTING) | | ||
24 | (1 << NF_INET_POST_ROUTING) | | ||
25 | (1 << NF_INET_LOCAL_OUT) | | ||
26 | (1 << NF_INET_LOCAL_IN), | ||
27 | .me = THIS_MODULE, | ||
28 | .af = NFPROTO_IPV4, | ||
29 | }; | ||
30 | 30 | ||
31 | #ifdef CONFIG_XFRM | 31 | static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
32 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | ||
33 | { | 32 | { |
34 | struct flowi4 *fl4 = &fl->u.ip4; | 33 | /* Force range to this IP; let proto decide mapping for |
35 | const struct nf_conn *ct; | 34 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). |
36 | const struct nf_conntrack_tuple *t; | 35 | */ |
37 | enum ip_conntrack_info ctinfo; | 36 | struct nf_nat_range range; |
38 | enum ip_conntrack_dir dir; | 37 | |
39 | unsigned long statusbit; | 38 | range.flags = 0; |
40 | 39 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, | |
41 | ct = nf_ct_get(skb, &ctinfo); | 40 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? |
42 | if (ct == NULL) | 41 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : |
43 | return; | 42 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); |
44 | dir = CTINFO2DIR(ctinfo); | 43 | |
45 | t = &ct->tuplehash[dir].tuple; | 44 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); |
46 | 45 | } | |
47 | if (dir == IP_CT_DIR_ORIGINAL) | ||
48 | statusbit = IPS_DST_NAT; | ||
49 | else | ||
50 | statusbit = IPS_SRC_NAT; | ||
51 | |||
52 | if (ct->status & statusbit) { | ||
53 | fl4->daddr = t->dst.u3.ip; | ||
54 | if (t->dst.protonum == IPPROTO_TCP || | ||
55 | t->dst.protonum == IPPROTO_UDP || | ||
56 | t->dst.protonum == IPPROTO_UDPLITE || | ||
57 | t->dst.protonum == IPPROTO_DCCP || | ||
58 | t->dst.protonum == IPPROTO_SCTP) | ||
59 | fl4->fl4_dport = t->dst.u.tcp.port; | ||
60 | } | ||
61 | 46 | ||
62 | statusbit ^= IPS_NAT_MASK; | 47 | static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, |
48 | const struct net_device *in, | ||
49 | const struct net_device *out, | ||
50 | struct nf_conn *ct) | ||
51 | { | ||
52 | struct net *net = nf_ct_net(ct); | ||
53 | unsigned int ret; | ||
63 | 54 | ||
64 | if (ct->status & statusbit) { | 55 | ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); |
65 | fl4->saddr = t->src.u3.ip; | 56 | if (ret == NF_ACCEPT) { |
66 | if (t->dst.protonum == IPPROTO_TCP || | 57 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) |
67 | t->dst.protonum == IPPROTO_UDP || | 58 | ret = alloc_null_binding(ct, hooknum); |
68 | t->dst.protonum == IPPROTO_UDPLITE || | ||
69 | t->dst.protonum == IPPROTO_DCCP || | ||
70 | t->dst.protonum == IPPROTO_SCTP) | ||
71 | fl4->fl4_sport = t->src.u.tcp.port; | ||
72 | } | 59 | } |
60 | return ret; | ||
73 | } | 61 | } |
74 | #endif | ||
75 | 62 | ||
76 | static unsigned int | 63 | static unsigned int |
77 | nf_nat_fn(unsigned int hooknum, | 64 | nf_nat_ipv4_fn(unsigned int hooknum, |
78 | struct sk_buff *skb, | 65 | struct sk_buff *skb, |
79 | const struct net_device *in, | 66 | const struct net_device *in, |
80 | const struct net_device *out, | 67 | const struct net_device *out, |
81 | int (*okfn)(struct sk_buff *)) | 68 | int (*okfn)(struct sk_buff *)) |
82 | { | 69 | { |
83 | struct nf_conn *ct; | 70 | struct nf_conn *ct; |
84 | enum ip_conntrack_info ctinfo; | 71 | enum ip_conntrack_info ctinfo; |
@@ -87,14 +74,16 @@ nf_nat_fn(unsigned int hooknum, | |||
87 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); | 74 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); |
88 | 75 | ||
89 | /* We never see fragments: conntrack defrags on pre-routing | 76 | /* We never see fragments: conntrack defrags on pre-routing |
90 | and local-out, and nf_nat_out protects post-routing. */ | 77 | * and local-out, and nf_nat_out protects post-routing. |
78 | */ | ||
91 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); | 79 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); |
92 | 80 | ||
93 | ct = nf_ct_get(skb, &ctinfo); | 81 | ct = nf_ct_get(skb, &ctinfo); |
94 | /* Can't track? It's not due to stress, or conntrack would | 82 | /* Can't track? It's not due to stress, or conntrack would |
95 | have dropped it. Hence it's the user's responsibilty to | 83 | * have dropped it. Hence it's the user's responsibilty to |
96 | packet filter it out, or implement conntrack/NAT for that | 84 | * packet filter it out, or implement conntrack/NAT for that |
97 | protocol. 8) --RR */ | 85 | * protocol. 8) --RR |
86 | */ | ||
98 | if (!ct) | 87 | if (!ct) |
99 | return NF_ACCEPT; | 88 | return NF_ACCEPT; |
100 | 89 | ||
@@ -118,17 +107,17 @@ nf_nat_fn(unsigned int hooknum, | |||
118 | case IP_CT_RELATED: | 107 | case IP_CT_RELATED: |
119 | case IP_CT_RELATED_REPLY: | 108 | case IP_CT_RELATED_REPLY: |
120 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { | 109 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { |
121 | if (!nf_nat_icmp_reply_translation(ct, ctinfo, | 110 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, |
122 | hooknum, skb)) | 111 | hooknum)) |
123 | return NF_DROP; | 112 | return NF_DROP; |
124 | else | 113 | else |
125 | return NF_ACCEPT; | 114 | return NF_ACCEPT; |
126 | } | 115 | } |
127 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ | 116 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ |
128 | case IP_CT_NEW: | 117 | case IP_CT_NEW: |
129 | |||
130 | /* Seen it before? This can happen for loopback, retrans, | 118 | /* Seen it before? This can happen for loopback, retrans, |
131 | or local packets.. */ | 119 | * or local packets. |
120 | */ | ||
132 | if (!nf_nat_initialized(ct, maniptype)) { | 121 | if (!nf_nat_initialized(ct, maniptype)) { |
133 | unsigned int ret; | 122 | unsigned int ret; |
134 | 123 | ||
@@ -151,16 +140,16 @@ nf_nat_fn(unsigned int hooknum, | |||
151 | } | 140 | } |
152 | 141 | ||
153 | static unsigned int | 142 | static unsigned int |
154 | nf_nat_in(unsigned int hooknum, | 143 | nf_nat_ipv4_in(unsigned int hooknum, |
155 | struct sk_buff *skb, | 144 | struct sk_buff *skb, |
156 | const struct net_device *in, | 145 | const struct net_device *in, |
157 | const struct net_device *out, | 146 | const struct net_device *out, |
158 | int (*okfn)(struct sk_buff *)) | 147 | int (*okfn)(struct sk_buff *)) |
159 | { | 148 | { |
160 | unsigned int ret; | 149 | unsigned int ret; |
161 | __be32 daddr = ip_hdr(skb)->daddr; | 150 | __be32 daddr = ip_hdr(skb)->daddr; |
162 | 151 | ||
163 | ret = nf_nat_fn(hooknum, skb, in, out, okfn); | 152 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
164 | if (ret != NF_DROP && ret != NF_STOLEN && | 153 | if (ret != NF_DROP && ret != NF_STOLEN && |
165 | daddr != ip_hdr(skb)->daddr) | 154 | daddr != ip_hdr(skb)->daddr) |
166 | skb_dst_drop(skb); | 155 | skb_dst_drop(skb); |
@@ -169,11 +158,11 @@ nf_nat_in(unsigned int hooknum, | |||
169 | } | 158 | } |
170 | 159 | ||
171 | static unsigned int | 160 | static unsigned int |
172 | nf_nat_out(unsigned int hooknum, | 161 | nf_nat_ipv4_out(unsigned int hooknum, |
173 | struct sk_buff *skb, | 162 | struct sk_buff *skb, |
174 | const struct net_device *in, | 163 | const struct net_device *in, |
175 | const struct net_device *out, | 164 | const struct net_device *out, |
176 | int (*okfn)(struct sk_buff *)) | 165 | int (*okfn)(struct sk_buff *)) |
177 | { | 166 | { |
178 | #ifdef CONFIG_XFRM | 167 | #ifdef CONFIG_XFRM |
179 | const struct nf_conn *ct; | 168 | const struct nf_conn *ct; |
@@ -186,29 +175,30 @@ nf_nat_out(unsigned int hooknum, | |||
186 | ip_hdrlen(skb) < sizeof(struct iphdr)) | 175 | ip_hdrlen(skb) < sizeof(struct iphdr)) |
187 | return NF_ACCEPT; | 176 | return NF_ACCEPT; |
188 | 177 | ||
189 | ret = nf_nat_fn(hooknum, skb, in, out, okfn); | 178 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
190 | #ifdef CONFIG_XFRM | 179 | #ifdef CONFIG_XFRM |
191 | if (ret != NF_DROP && ret != NF_STOLEN && | 180 | if (ret != NF_DROP && ret != NF_STOLEN && |
181 | !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | ||
192 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 182 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
193 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 183 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
194 | 184 | ||
195 | if ((ct->tuplehash[dir].tuple.src.u3.ip != | 185 | if ((ct->tuplehash[dir].tuple.src.u3.ip != |
196 | ct->tuplehash[!dir].tuple.dst.u3.ip) || | 186 | ct->tuplehash[!dir].tuple.dst.u3.ip) || |
197 | (ct->tuplehash[dir].tuple.src.u.all != | 187 | (ct->tuplehash[dir].tuple.src.u.all != |
198 | ct->tuplehash[!dir].tuple.dst.u.all) | 188 | ct->tuplehash[!dir].tuple.dst.u.all)) |
199 | ) | 189 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) |
200 | return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; | 190 | ret = NF_DROP; |
201 | } | 191 | } |
202 | #endif | 192 | #endif |
203 | return ret; | 193 | return ret; |
204 | } | 194 | } |
205 | 195 | ||
206 | static unsigned int | 196 | static unsigned int |
207 | nf_nat_local_fn(unsigned int hooknum, | 197 | nf_nat_ipv4_local_fn(unsigned int hooknum, |
208 | struct sk_buff *skb, | 198 | struct sk_buff *skb, |
209 | const struct net_device *in, | 199 | const struct net_device *in, |
210 | const struct net_device *out, | 200 | const struct net_device *out, |
211 | int (*okfn)(struct sk_buff *)) | 201 | int (*okfn)(struct sk_buff *)) |
212 | { | 202 | { |
213 | const struct nf_conn *ct; | 203 | const struct nf_conn *ct; |
214 | enum ip_conntrack_info ctinfo; | 204 | enum ip_conntrack_info ctinfo; |
@@ -219,7 +209,7 @@ nf_nat_local_fn(unsigned int hooknum, | |||
219 | ip_hdrlen(skb) < sizeof(struct iphdr)) | 209 | ip_hdrlen(skb) < sizeof(struct iphdr)) |
220 | return NF_ACCEPT; | 210 | return NF_ACCEPT; |
221 | 211 | ||
222 | ret = nf_nat_fn(hooknum, skb, in, out, okfn); | 212 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
223 | if (ret != NF_DROP && ret != NF_STOLEN && | 213 | if (ret != NF_DROP && ret != NF_STOLEN && |
224 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 214 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
225 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 215 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
@@ -230,21 +220,20 @@ nf_nat_local_fn(unsigned int hooknum, | |||
230 | ret = NF_DROP; | 220 | ret = NF_DROP; |
231 | } | 221 | } |
232 | #ifdef CONFIG_XFRM | 222 | #ifdef CONFIG_XFRM |
233 | else if (ct->tuplehash[dir].tuple.dst.u.all != | 223 | else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
224 | ct->tuplehash[dir].tuple.dst.u.all != | ||
234 | ct->tuplehash[!dir].tuple.src.u.all) | 225 | ct->tuplehash[!dir].tuple.src.u.all) |
235 | if (ip_xfrm_me_harder(skb)) | 226 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) |
236 | ret = NF_DROP; | 227 | ret = NF_DROP; |
237 | #endif | 228 | #endif |
238 | } | 229 | } |
239 | return ret; | 230 | return ret; |
240 | } | 231 | } |
241 | 232 | ||
242 | /* We must be after connection tracking and before packet filtering. */ | 233 | static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { |
243 | |||
244 | static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | ||
245 | /* Before packet filtering, change destination */ | 234 | /* Before packet filtering, change destination */ |
246 | { | 235 | { |
247 | .hook = nf_nat_in, | 236 | .hook = nf_nat_ipv4_in, |
248 | .owner = THIS_MODULE, | 237 | .owner = THIS_MODULE, |
249 | .pf = NFPROTO_IPV4, | 238 | .pf = NFPROTO_IPV4, |
250 | .hooknum = NF_INET_PRE_ROUTING, | 239 | .hooknum = NF_INET_PRE_ROUTING, |
@@ -252,7 +241,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
252 | }, | 241 | }, |
253 | /* After packet filtering, change source */ | 242 | /* After packet filtering, change source */ |
254 | { | 243 | { |
255 | .hook = nf_nat_out, | 244 | .hook = nf_nat_ipv4_out, |
256 | .owner = THIS_MODULE, | 245 | .owner = THIS_MODULE, |
257 | .pf = NFPROTO_IPV4, | 246 | .pf = NFPROTO_IPV4, |
258 | .hooknum = NF_INET_POST_ROUTING, | 247 | .hooknum = NF_INET_POST_ROUTING, |
@@ -260,7 +249,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
260 | }, | 249 | }, |
261 | /* Before packet filtering, change destination */ | 250 | /* Before packet filtering, change destination */ |
262 | { | 251 | { |
263 | .hook = nf_nat_local_fn, | 252 | .hook = nf_nat_ipv4_local_fn, |
264 | .owner = THIS_MODULE, | 253 | .owner = THIS_MODULE, |
265 | .pf = NFPROTO_IPV4, | 254 | .pf = NFPROTO_IPV4, |
266 | .hooknum = NF_INET_LOCAL_OUT, | 255 | .hooknum = NF_INET_LOCAL_OUT, |
@@ -268,7 +257,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
268 | }, | 257 | }, |
269 | /* After packet filtering, change source */ | 258 | /* After packet filtering, change source */ |
270 | { | 259 | { |
271 | .hook = nf_nat_fn, | 260 | .hook = nf_nat_ipv4_fn, |
272 | .owner = THIS_MODULE, | 261 | .owner = THIS_MODULE, |
273 | .pf = NFPROTO_IPV4, | 262 | .pf = NFPROTO_IPV4, |
274 | .hooknum = NF_INET_LOCAL_IN, | 263 | .hooknum = NF_INET_LOCAL_IN, |
@@ -276,51 +265,56 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
276 | }, | 265 | }, |
277 | }; | 266 | }; |
278 | 267 | ||
279 | static int __init nf_nat_standalone_init(void) | 268 | static int __net_init iptable_nat_net_init(struct net *net) |
280 | { | 269 | { |
281 | int ret = 0; | 270 | struct ipt_replace *repl; |
271 | |||
272 | repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); | ||
273 | if (repl == NULL) | ||
274 | return -ENOMEM; | ||
275 | net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); | ||
276 | kfree(repl); | ||
277 | if (IS_ERR(net->ipv4.nat_table)) | ||
278 | return PTR_ERR(net->ipv4.nat_table); | ||
279 | return 0; | ||
280 | } | ||
282 | 281 | ||
283 | need_ipv4_conntrack(); | 282 | static void __net_exit iptable_nat_net_exit(struct net *net) |
283 | { | ||
284 | ipt_unregister_table(net, net->ipv4.nat_table); | ||
285 | } | ||
284 | 286 | ||
285 | #ifdef CONFIG_XFRM | 287 | static struct pernet_operations iptable_nat_net_ops = { |
286 | BUG_ON(ip_nat_decode_session != NULL); | 288 | .init = iptable_nat_net_init, |
287 | RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); | 289 | .exit = iptable_nat_net_exit, |
288 | #endif | 290 | }; |
289 | ret = nf_nat_rule_init(); | ||
290 | if (ret < 0) { | ||
291 | pr_err("nf_nat_init: can't setup rules.\n"); | ||
292 | goto cleanup_decode_session; | ||
293 | } | ||
294 | ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); | ||
295 | if (ret < 0) { | ||
296 | pr_err("nf_nat_init: can't register hooks.\n"); | ||
297 | goto cleanup_rule_init; | ||
298 | } | ||
299 | return ret; | ||
300 | 291 | ||
301 | cleanup_rule_init: | 292 | static int __init iptable_nat_init(void) |
302 | nf_nat_rule_cleanup(); | 293 | { |
303 | cleanup_decode_session: | 294 | int err; |
304 | #ifdef CONFIG_XFRM | 295 | |
305 | RCU_INIT_POINTER(ip_nat_decode_session, NULL); | 296 | err = register_pernet_subsys(&iptable_nat_net_ops); |
306 | synchronize_net(); | 297 | if (err < 0) |
307 | #endif | 298 | goto err1; |
308 | return ret; | 299 | |
300 | err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); | ||
301 | if (err < 0) | ||
302 | goto err2; | ||
303 | return 0; | ||
304 | |||
305 | err2: | ||
306 | unregister_pernet_subsys(&iptable_nat_net_ops); | ||
307 | err1: | ||
308 | return err; | ||
309 | } | 309 | } |
310 | 310 | ||
311 | static void __exit nf_nat_standalone_fini(void) | 311 | static void __exit iptable_nat_exit(void) |
312 | { | 312 | { |
313 | nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); | 313 | nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); |
314 | nf_nat_rule_cleanup(); | 314 | unregister_pernet_subsys(&iptable_nat_net_ops); |
315 | #ifdef CONFIG_XFRM | ||
316 | RCU_INIT_POINTER(ip_nat_decode_session, NULL); | ||
317 | synchronize_net(); | ||
318 | #endif | ||
319 | /* Conntrack caches are unregistered in nf_conntrack_cleanup */ | ||
320 | } | 315 | } |
321 | 316 | ||
322 | module_init(nf_nat_standalone_init); | 317 | module_init(iptable_nat_init); |
323 | module_exit(nf_nat_standalone_fini); | 318 | module_exit(iptable_nat_exit); |
324 | 319 | ||
325 | MODULE_LICENSE("GPL"); | 320 | MODULE_LICENSE("GPL"); |
326 | MODULE_ALIAS("ip_nat"); | ||
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 07fb710cd722..03d9696d3c6e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net) | |||
48 | net->ipv4.iptable_raw = | 48 | net->ipv4.iptable_raw = |
49 | ipt_register_table(net, &packet_raw, repl); | 49 | ipt_register_table(net, &packet_raw, repl); |
50 | kfree(repl); | 50 | kfree(repl); |
51 | if (IS_ERR(net->ipv4.iptable_raw)) | 51 | return PTR_RET(net->ipv4.iptable_raw); |
52 | return PTR_ERR(net->ipv4.iptable_raw); | ||
53 | return 0; | ||
54 | } | 52 | } |
55 | 53 | ||
56 | static void __net_exit iptable_raw_net_exit(struct net *net) | 54 | static void __net_exit iptable_raw_net_exit(struct net *net) |
@@ -75,14 +73,10 @@ static int __init iptable_raw_init(void) | |||
75 | rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); | 73 | rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); |
76 | if (IS_ERR(rawtable_ops)) { | 74 | if (IS_ERR(rawtable_ops)) { |
77 | ret = PTR_ERR(rawtable_ops); | 75 | ret = PTR_ERR(rawtable_ops); |
78 | goto cleanup_table; | 76 | unregister_pernet_subsys(&iptable_raw_net_ops); |
79 | } | 77 | } |
80 | 78 | ||
81 | return ret; | 79 | return ret; |
82 | |||
83 | cleanup_table: | ||
84 | unregister_pernet_subsys(&iptable_raw_net_ops); | ||
85 | return ret; | ||
86 | } | 80 | } |
87 | 81 | ||
88 | static void __exit iptable_raw_fini(void) | 82 | static void __exit iptable_raw_fini(void) |
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index be45bdc4c602..b283d8e2601a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c | |||
@@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net) | |||
66 | net->ipv4.iptable_security = | 66 | net->ipv4.iptable_security = |
67 | ipt_register_table(net, &security_table, repl); | 67 | ipt_register_table(net, &security_table, repl); |
68 | kfree(repl); | 68 | kfree(repl); |
69 | if (IS_ERR(net->ipv4.iptable_security)) | 69 | return PTR_RET(net->ipv4.iptable_security); |
70 | return PTR_ERR(net->ipv4.iptable_security); | ||
71 | |||
72 | return 0; | ||
73 | } | 70 | } |
74 | 71 | ||
75 | static void __net_exit iptable_security_net_exit(struct net *net) | 72 | static void __net_exit iptable_security_net_exit(struct net *net) |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index e7ff2dcab6ce..fcdd0c2406e6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -29,11 +29,6 @@ | |||
29 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> | 29 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> |
30 | #include <net/netfilter/nf_log.h> | 30 | #include <net/netfilter/nf_log.h> |
31 | 31 | ||
32 | int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, | ||
33 | struct nf_conn *ct, | ||
34 | enum ip_conntrack_info ctinfo); | ||
35 | EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); | ||
36 | |||
37 | static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | 32 | static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, |
38 | struct nf_conntrack_tuple *tuple) | 33 | struct nf_conntrack_tuple *tuple) |
39 | { | 34 | { |
@@ -149,7 +144,8 @@ static unsigned int ipv4_confirm(unsigned int hooknum, | |||
149 | typeof(nf_nat_seq_adjust_hook) seq_adjust; | 144 | typeof(nf_nat_seq_adjust_hook) seq_adjust; |
150 | 145 | ||
151 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); | 146 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); |
152 | if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { | 147 | if (!seq_adjust || |
148 | !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { | ||
153 | NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); | 149 | NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); |
154 | return NF_DROP; | 150 | return NF_DROP; |
155 | } | 151 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c deleted file mode 100644 index 3c04d24e2976..000000000000 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | /* Amanda extension for TCP NAT alteration. | ||
2 | * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> | ||
3 | * based on a copy of HW's ip_nat_irc.c as well as other modules | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/udp.h> | ||
15 | |||
16 | #include <net/netfilter/nf_conntrack_helper.h> | ||
17 | #include <net/netfilter/nf_conntrack_expect.h> | ||
18 | #include <net/netfilter/nf_nat_helper.h> | ||
19 | #include <net/netfilter/nf_nat_rule.h> | ||
20 | #include <linux/netfilter/nf_conntrack_amanda.h> | ||
21 | |||
22 | MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); | ||
23 | MODULE_DESCRIPTION("Amanda NAT helper"); | ||
24 | MODULE_LICENSE("GPL"); | ||
25 | MODULE_ALIAS("ip_nat_amanda"); | ||
26 | |||
27 | static unsigned int help(struct sk_buff *skb, | ||
28 | enum ip_conntrack_info ctinfo, | ||
29 | unsigned int matchoff, | ||
30 | unsigned int matchlen, | ||
31 | struct nf_conntrack_expect *exp) | ||
32 | { | ||
33 | char buffer[sizeof("65535")]; | ||
34 | u_int16_t port; | ||
35 | unsigned int ret; | ||
36 | |||
37 | /* Connection comes from client. */ | ||
38 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
39 | exp->dir = IP_CT_DIR_ORIGINAL; | ||
40 | |||
41 | /* When you see the packet, we need to NAT it the same as the | ||
42 | * this one (ie. same IP: it will be TCP and master is UDP). */ | ||
43 | exp->expectfn = nf_nat_follow_master; | ||
44 | |||
45 | /* Try to get same port: if not, try to change it. */ | ||
46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
47 | int res; | ||
48 | |||
49 | exp->tuple.dst.u.tcp.port = htons(port); | ||
50 | res = nf_ct_expect_related(exp); | ||
51 | if (res == 0) | ||
52 | break; | ||
53 | else if (res != -EBUSY) { | ||
54 | port = 0; | ||
55 | break; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | if (port == 0) | ||
60 | return NF_DROP; | ||
61 | |||
62 | sprintf(buffer, "%u", port); | ||
63 | ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, | ||
64 | matchoff, matchlen, | ||
65 | buffer, strlen(buffer)); | ||
66 | if (ret != NF_ACCEPT) | ||
67 | nf_ct_unexpect_related(exp); | ||
68 | return ret; | ||
69 | } | ||
70 | |||
71 | static void __exit nf_nat_amanda_fini(void) | ||
72 | { | ||
73 | RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); | ||
74 | synchronize_rcu(); | ||
75 | } | ||
76 | |||
77 | static int __init nf_nat_amanda_init(void) | ||
78 | { | ||
79 | BUG_ON(nf_nat_amanda_hook != NULL); | ||
80 | RCU_INIT_POINTER(nf_nat_amanda_hook, help); | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | module_init(nf_nat_amanda_init); | ||
85 | module_exit(nf_nat_amanda_fini); | ||
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c deleted file mode 100644 index 44b082fd48ab..000000000000 --- a/net/ipv4/netfilter/nf_nat_core.c +++ /dev/null | |||
@@ -1,763 +0,0 @@ | |||
1 | /* NAT for netfilter; shared with compatibility layer. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/timer.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/gfp.h> | ||
16 | #include <net/checksum.h> | ||
17 | #include <net/icmp.h> | ||
18 | #include <net/ip.h> | ||
19 | #include <net/tcp.h> /* For tcp_prot in getorigdst */ | ||
20 | #include <linux/icmp.h> | ||
21 | #include <linux/udp.h> | ||
22 | #include <linux/jhash.h> | ||
23 | |||
24 | #include <linux/netfilter_ipv4.h> | ||
25 | #include <net/netfilter/nf_conntrack.h> | ||
26 | #include <net/netfilter/nf_conntrack_core.h> | ||
27 | #include <net/netfilter/nf_nat.h> | ||
28 | #include <net/netfilter/nf_nat_protocol.h> | ||
29 | #include <net/netfilter/nf_nat_core.h> | ||
30 | #include <net/netfilter/nf_nat_helper.h> | ||
31 | #include <net/netfilter/nf_conntrack_helper.h> | ||
32 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
33 | #include <net/netfilter/nf_conntrack_zones.h> | ||
34 | |||
35 | static DEFINE_SPINLOCK(nf_nat_lock); | ||
36 | |||
37 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | ||
38 | |||
39 | #define MAX_IP_NAT_PROTO 256 | ||
40 | static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] | ||
41 | __read_mostly; | ||
42 | |||
43 | static inline const struct nf_nat_protocol * | ||
44 | __nf_nat_proto_find(u_int8_t protonum) | ||
45 | { | ||
46 | return rcu_dereference(nf_nat_protos[protonum]); | ||
47 | } | ||
48 | |||
49 | /* We keep an extra hash for each conntrack, for fast searching. */ | ||
50 | static inline unsigned int | ||
51 | hash_by_src(const struct net *net, u16 zone, | ||
52 | const struct nf_conntrack_tuple *tuple) | ||
53 | { | ||
54 | unsigned int hash; | ||
55 | |||
56 | /* Original src, to ensure we map it consistently if poss. */ | ||
57 | hash = jhash_3words((__force u32)tuple->src.u3.ip, | ||
58 | (__force u32)tuple->src.u.all ^ zone, | ||
59 | tuple->dst.protonum, nf_conntrack_hash_rnd); | ||
60 | return ((u64)hash * net->ipv4.nat_htable_size) >> 32; | ||
61 | } | ||
62 | |||
63 | /* Is this tuple already taken? (not by us) */ | ||
64 | int | ||
65 | nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, | ||
66 | const struct nf_conn *ignored_conntrack) | ||
67 | { | ||
68 | /* Conntrack tracking doesn't keep track of outgoing tuples; only | ||
69 | incoming ones. NAT means they don't have a fixed mapping, | ||
70 | so we invert the tuple and look for the incoming reply. | ||
71 | |||
72 | We could keep a separate hash if this proves too slow. */ | ||
73 | struct nf_conntrack_tuple reply; | ||
74 | |||
75 | nf_ct_invert_tuplepr(&reply, tuple); | ||
76 | return nf_conntrack_tuple_taken(&reply, ignored_conntrack); | ||
77 | } | ||
78 | EXPORT_SYMBOL(nf_nat_used_tuple); | ||
79 | |||
80 | /* If we source map this tuple so reply looks like reply_tuple, will | ||
81 | * that meet the constraints of range. */ | ||
82 | static int | ||
83 | in_range(const struct nf_conntrack_tuple *tuple, | ||
84 | const struct nf_nat_ipv4_range *range) | ||
85 | { | ||
86 | const struct nf_nat_protocol *proto; | ||
87 | int ret = 0; | ||
88 | |||
89 | /* If we are supposed to map IPs, then we must be in the | ||
90 | range specified, otherwise let this drag us onto a new src IP. */ | ||
91 | if (range->flags & NF_NAT_RANGE_MAP_IPS) { | ||
92 | if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || | ||
93 | ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | rcu_read_lock(); | ||
98 | proto = __nf_nat_proto_find(tuple->dst.protonum); | ||
99 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || | ||
100 | proto->in_range(tuple, NF_NAT_MANIP_SRC, | ||
101 | &range->min, &range->max)) | ||
102 | ret = 1; | ||
103 | rcu_read_unlock(); | ||
104 | |||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | static inline int | ||
109 | same_src(const struct nf_conn *ct, | ||
110 | const struct nf_conntrack_tuple *tuple) | ||
111 | { | ||
112 | const struct nf_conntrack_tuple *t; | ||
113 | |||
114 | t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
115 | return (t->dst.protonum == tuple->dst.protonum && | ||
116 | t->src.u3.ip == tuple->src.u3.ip && | ||
117 | t->src.u.all == tuple->src.u.all); | ||
118 | } | ||
119 | |||
120 | /* Only called for SRC manip */ | ||
121 | static int | ||
122 | find_appropriate_src(struct net *net, u16 zone, | ||
123 | const struct nf_conntrack_tuple *tuple, | ||
124 | struct nf_conntrack_tuple *result, | ||
125 | const struct nf_nat_ipv4_range *range) | ||
126 | { | ||
127 | unsigned int h = hash_by_src(net, zone, tuple); | ||
128 | const struct nf_conn_nat *nat; | ||
129 | const struct nf_conn *ct; | ||
130 | const struct hlist_node *n; | ||
131 | |||
132 | rcu_read_lock(); | ||
133 | hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { | ||
134 | ct = nat->ct; | ||
135 | if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { | ||
136 | /* Copy source part from reply tuple. */ | ||
137 | nf_ct_invert_tuplepr(result, | ||
138 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
139 | result->dst = tuple->dst; | ||
140 | |||
141 | if (in_range(result, range)) { | ||
142 | rcu_read_unlock(); | ||
143 | return 1; | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | rcu_read_unlock(); | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | /* For [FUTURE] fragmentation handling, we want the least-used | ||
152 | src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus | ||
153 | if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports | ||
154 | 1-65535, we don't do pro-rata allocation based on ports; we choose | ||
155 | the ip with the lowest src-ip/dst-ip/proto usage. | ||
156 | */ | ||
157 | static void | ||
158 | find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, | ||
159 | const struct nf_nat_ipv4_range *range, | ||
160 | const struct nf_conn *ct, | ||
161 | enum nf_nat_manip_type maniptype) | ||
162 | { | ||
163 | __be32 *var_ipp; | ||
164 | /* Host order */ | ||
165 | u_int32_t minip, maxip, j; | ||
166 | |||
167 | /* No IP mapping? Do nothing. */ | ||
168 | if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) | ||
169 | return; | ||
170 | |||
171 | if (maniptype == NF_NAT_MANIP_SRC) | ||
172 | var_ipp = &tuple->src.u3.ip; | ||
173 | else | ||
174 | var_ipp = &tuple->dst.u3.ip; | ||
175 | |||
176 | /* Fast path: only one choice. */ | ||
177 | if (range->min_ip == range->max_ip) { | ||
178 | *var_ipp = range->min_ip; | ||
179 | return; | ||
180 | } | ||
181 | |||
182 | /* Hashing source and destination IPs gives a fairly even | ||
183 | * spread in practice (if there are a small number of IPs | ||
184 | * involved, there usually aren't that many connections | ||
185 | * anyway). The consistency means that servers see the same | ||
186 | * client coming from the same IP (some Internet Banking sites | ||
187 | * like this), even across reboots. */ | ||
188 | minip = ntohl(range->min_ip); | ||
189 | maxip = ntohl(range->max_ip); | ||
190 | j = jhash_2words((__force u32)tuple->src.u3.ip, | ||
191 | range->flags & NF_NAT_RANGE_PERSISTENT ? | ||
192 | 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); | ||
193 | j = ((u64)j * (maxip - minip + 1)) >> 32; | ||
194 | *var_ipp = htonl(minip + j); | ||
195 | } | ||
196 | |||
197 | /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, | ||
198 | * we change the source to map into the range. For NF_INET_PRE_ROUTING | ||
199 | * and NF_INET_LOCAL_OUT, we change the destination to map into the | ||
200 | * range. It might not be possible to get a unique tuple, but we try. | ||
201 | * At worst (or if we race), we will end up with a final duplicate in | ||
202 | * __ip_conntrack_confirm and drop the packet. */ | ||
203 | static void | ||
204 | get_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
205 | const struct nf_conntrack_tuple *orig_tuple, | ||
206 | const struct nf_nat_ipv4_range *range, | ||
207 | struct nf_conn *ct, | ||
208 | enum nf_nat_manip_type maniptype) | ||
209 | { | ||
210 | struct net *net = nf_ct_net(ct); | ||
211 | const struct nf_nat_protocol *proto; | ||
212 | u16 zone = nf_ct_zone(ct); | ||
213 | |||
214 | /* 1) If this srcip/proto/src-proto-part is currently mapped, | ||
215 | and that same mapping gives a unique tuple within the given | ||
216 | range, use that. | ||
217 | |||
218 | This is only required for source (ie. NAT/masq) mappings. | ||
219 | So far, we don't do local source mappings, so multiple | ||
220 | manips not an issue. */ | ||
221 | if (maniptype == NF_NAT_MANIP_SRC && | ||
222 | !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { | ||
223 | /* try the original tuple first */ | ||
224 | if (in_range(orig_tuple, range)) { | ||
225 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | ||
226 | *tuple = *orig_tuple; | ||
227 | return; | ||
228 | } | ||
229 | } else if (find_appropriate_src(net, zone, orig_tuple, tuple, | ||
230 | range)) { | ||
231 | pr_debug("get_unique_tuple: Found current src map\n"); | ||
232 | if (!nf_nat_used_tuple(tuple, ct)) | ||
233 | return; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | /* 2) Select the least-used IP/proto combination in the given | ||
238 | range. */ | ||
239 | *tuple = *orig_tuple; | ||
240 | find_best_ips_proto(zone, tuple, range, ct, maniptype); | ||
241 | |||
242 | /* 3) The per-protocol part of the manip is made to map into | ||
243 | the range to make a unique tuple. */ | ||
244 | |||
245 | rcu_read_lock(); | ||
246 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); | ||
247 | |||
248 | /* Only bother mapping if it's not already in range and unique */ | ||
249 | if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { | ||
250 | if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { | ||
251 | if (proto->in_range(tuple, maniptype, &range->min, | ||
252 | &range->max) && | ||
253 | (range->min.all == range->max.all || | ||
254 | !nf_nat_used_tuple(tuple, ct))) | ||
255 | goto out; | ||
256 | } else if (!nf_nat_used_tuple(tuple, ct)) { | ||
257 | goto out; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | /* Last change: get protocol to try to obtain unique tuple. */ | ||
262 | proto->unique_tuple(tuple, range, maniptype, ct); | ||
263 | out: | ||
264 | rcu_read_unlock(); | ||
265 | } | ||
266 | |||
267 | unsigned int | ||
268 | nf_nat_setup_info(struct nf_conn *ct, | ||
269 | const struct nf_nat_ipv4_range *range, | ||
270 | enum nf_nat_manip_type maniptype) | ||
271 | { | ||
272 | struct net *net = nf_ct_net(ct); | ||
273 | struct nf_conntrack_tuple curr_tuple, new_tuple; | ||
274 | struct nf_conn_nat *nat; | ||
275 | |||
276 | /* nat helper or nfctnetlink also setup binding */ | ||
277 | nat = nfct_nat(ct); | ||
278 | if (!nat) { | ||
279 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | ||
280 | if (nat == NULL) { | ||
281 | pr_debug("failed to add NAT extension\n"); | ||
282 | return NF_ACCEPT; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || | ||
287 | maniptype == NF_NAT_MANIP_DST); | ||
288 | BUG_ON(nf_nat_initialized(ct, maniptype)); | ||
289 | |||
290 | /* What we've got will look like inverse of reply. Normally | ||
291 | this is what is in the conntrack, except for prior | ||
292 | manipulations (future optimization: if num_manips == 0, | ||
293 | orig_tp = | ||
294 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ | ||
295 | nf_ct_invert_tuplepr(&curr_tuple, | ||
296 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
297 | |||
298 | get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); | ||
299 | |||
300 | if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { | ||
301 | struct nf_conntrack_tuple reply; | ||
302 | |||
303 | /* Alter conntrack table so will recognize replies. */ | ||
304 | nf_ct_invert_tuplepr(&reply, &new_tuple); | ||
305 | nf_conntrack_alter_reply(ct, &reply); | ||
306 | |||
307 | /* Non-atomic: we own this at the moment. */ | ||
308 | if (maniptype == NF_NAT_MANIP_SRC) | ||
309 | ct->status |= IPS_SRC_NAT; | ||
310 | else | ||
311 | ct->status |= IPS_DST_NAT; | ||
312 | } | ||
313 | |||
314 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
315 | unsigned int srchash; | ||
316 | |||
317 | srchash = hash_by_src(net, nf_ct_zone(ct), | ||
318 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
319 | spin_lock_bh(&nf_nat_lock); | ||
320 | /* nf_conntrack_alter_reply might re-allocate extension area */ | ||
321 | nat = nfct_nat(ct); | ||
322 | nat->ct = ct; | ||
323 | hlist_add_head_rcu(&nat->bysource, | ||
324 | &net->ipv4.nat_bysource[srchash]); | ||
325 | spin_unlock_bh(&nf_nat_lock); | ||
326 | } | ||
327 | |||
328 | /* It's done. */ | ||
329 | if (maniptype == NF_NAT_MANIP_DST) | ||
330 | ct->status |= IPS_DST_NAT_DONE; | ||
331 | else | ||
332 | ct->status |= IPS_SRC_NAT_DONE; | ||
333 | |||
334 | return NF_ACCEPT; | ||
335 | } | ||
336 | EXPORT_SYMBOL(nf_nat_setup_info); | ||
337 | |||
338 | /* Returns true if succeeded. */ | ||
339 | static bool | ||
340 | manip_pkt(u_int16_t proto, | ||
341 | struct sk_buff *skb, | ||
342 | unsigned int iphdroff, | ||
343 | const struct nf_conntrack_tuple *target, | ||
344 | enum nf_nat_manip_type maniptype) | ||
345 | { | ||
346 | struct iphdr *iph; | ||
347 | const struct nf_nat_protocol *p; | ||
348 | |||
349 | if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) | ||
350 | return false; | ||
351 | |||
352 | iph = (void *)skb->data + iphdroff; | ||
353 | |||
354 | /* Manipulate protcol part. */ | ||
355 | |||
356 | /* rcu_read_lock()ed by nf_hook_slow */ | ||
357 | p = __nf_nat_proto_find(proto); | ||
358 | if (!p->manip_pkt(skb, iphdroff, target, maniptype)) | ||
359 | return false; | ||
360 | |||
361 | iph = (void *)skb->data + iphdroff; | ||
362 | |||
363 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
364 | csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); | ||
365 | iph->saddr = target->src.u3.ip; | ||
366 | } else { | ||
367 | csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); | ||
368 | iph->daddr = target->dst.u3.ip; | ||
369 | } | ||
370 | return true; | ||
371 | } | ||
372 | |||
373 | /* Do packet manipulations according to nf_nat_setup_info. */ | ||
374 | unsigned int nf_nat_packet(struct nf_conn *ct, | ||
375 | enum ip_conntrack_info ctinfo, | ||
376 | unsigned int hooknum, | ||
377 | struct sk_buff *skb) | ||
378 | { | ||
379 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
380 | unsigned long statusbit; | ||
381 | enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); | ||
382 | |||
383 | if (mtype == NF_NAT_MANIP_SRC) | ||
384 | statusbit = IPS_SRC_NAT; | ||
385 | else | ||
386 | statusbit = IPS_DST_NAT; | ||
387 | |||
388 | /* Invert if this is reply dir. */ | ||
389 | if (dir == IP_CT_DIR_REPLY) | ||
390 | statusbit ^= IPS_NAT_MASK; | ||
391 | |||
392 | /* Non-atomic: these bits don't change. */ | ||
393 | if (ct->status & statusbit) { | ||
394 | struct nf_conntrack_tuple target; | ||
395 | |||
396 | /* We are aiming to look like inverse of other direction. */ | ||
397 | nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
398 | |||
399 | if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) | ||
400 | return NF_DROP; | ||
401 | } | ||
402 | return NF_ACCEPT; | ||
403 | } | ||
404 | EXPORT_SYMBOL_GPL(nf_nat_packet); | ||
405 | |||
406 | /* Dir is direction ICMP is coming from (opposite to packet it contains) */ | ||
407 | int nf_nat_icmp_reply_translation(struct nf_conn *ct, | ||
408 | enum ip_conntrack_info ctinfo, | ||
409 | unsigned int hooknum, | ||
410 | struct sk_buff *skb) | ||
411 | { | ||
412 | struct { | ||
413 | struct icmphdr icmp; | ||
414 | struct iphdr ip; | ||
415 | } *inside; | ||
416 | struct nf_conntrack_tuple target; | ||
417 | int hdrlen = ip_hdrlen(skb); | ||
418 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
419 | unsigned long statusbit; | ||
420 | enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); | ||
421 | |||
422 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) | ||
423 | return 0; | ||
424 | |||
425 | inside = (void *)skb->data + hdrlen; | ||
426 | |||
427 | /* We're actually going to mangle it beyond trivial checksum | ||
428 | adjustment, so make sure the current checksum is correct. */ | ||
429 | if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) | ||
430 | return 0; | ||
431 | |||
432 | /* Must be RELATED */ | ||
433 | NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || | ||
434 | skb->nfctinfo == IP_CT_RELATED_REPLY); | ||
435 | |||
436 | /* Redirects on non-null nats must be dropped, else they'll | ||
437 | start talking to each other without our translation, and be | ||
438 | confused... --RR */ | ||
439 | if (inside->icmp.type == ICMP_REDIRECT) { | ||
440 | /* If NAT isn't finished, assume it and drop. */ | ||
441 | if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) | ||
442 | return 0; | ||
443 | |||
444 | if (ct->status & IPS_NAT_MASK) | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | if (manip == NF_NAT_MANIP_SRC) | ||
449 | statusbit = IPS_SRC_NAT; | ||
450 | else | ||
451 | statusbit = IPS_DST_NAT; | ||
452 | |||
453 | /* Invert if this is reply dir. */ | ||
454 | if (dir == IP_CT_DIR_REPLY) | ||
455 | statusbit ^= IPS_NAT_MASK; | ||
456 | |||
457 | if (!(ct->status & statusbit)) | ||
458 | return 1; | ||
459 | |||
460 | pr_debug("icmp_reply_translation: translating error %p manip %u " | ||
461 | "dir %s\n", skb, manip, | ||
462 | dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); | ||
463 | |||
464 | /* Change inner back to look like incoming packet. We do the | ||
465 | opposite manip on this hook to normal, because it might not | ||
466 | pass all hooks (locally-generated ICMP). Consider incoming | ||
467 | packet: PREROUTING (DST manip), routing produces ICMP, goes | ||
468 | through POSTROUTING (which must correct the DST manip). */ | ||
469 | if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), | ||
470 | &ct->tuplehash[!dir].tuple, !manip)) | ||
471 | return 0; | ||
472 | |||
473 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||
474 | /* Reloading "inside" here since manip_pkt inner. */ | ||
475 | inside = (void *)skb->data + hdrlen; | ||
476 | inside->icmp.checksum = 0; | ||
477 | inside->icmp.checksum = | ||
478 | csum_fold(skb_checksum(skb, hdrlen, | ||
479 | skb->len - hdrlen, 0)); | ||
480 | } | ||
481 | |||
482 | /* Change outer to look the reply to an incoming packet | ||
483 | * (proto 0 means don't invert per-proto part). */ | ||
484 | nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
485 | if (!manip_pkt(0, skb, 0, &target, manip)) | ||
486 | return 0; | ||
487 | |||
488 | return 1; | ||
489 | } | ||
490 | EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); | ||
491 | |||
492 | /* Protocol registration. */ | ||
493 | int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | ||
494 | { | ||
495 | int ret = 0; | ||
496 | |||
497 | spin_lock_bh(&nf_nat_lock); | ||
498 | if (rcu_dereference_protected( | ||
499 | nf_nat_protos[proto->protonum], | ||
500 | lockdep_is_held(&nf_nat_lock) | ||
501 | ) != &nf_nat_unknown_protocol) { | ||
502 | ret = -EBUSY; | ||
503 | goto out; | ||
504 | } | ||
505 | RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); | ||
506 | out: | ||
507 | spin_unlock_bh(&nf_nat_lock); | ||
508 | return ret; | ||
509 | } | ||
510 | EXPORT_SYMBOL(nf_nat_protocol_register); | ||
511 | |||
512 | /* No one stores the protocol anywhere; simply delete it. */ | ||
513 | void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) | ||
514 | { | ||
515 | spin_lock_bh(&nf_nat_lock); | ||
516 | RCU_INIT_POINTER(nf_nat_protos[proto->protonum], | ||
517 | &nf_nat_unknown_protocol); | ||
518 | spin_unlock_bh(&nf_nat_lock); | ||
519 | synchronize_rcu(); | ||
520 | } | ||
521 | EXPORT_SYMBOL(nf_nat_protocol_unregister); | ||
522 | |||
523 | /* No one using conntrack by the time this called. */ | ||
524 | static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | ||
525 | { | ||
526 | struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); | ||
527 | |||
528 | if (nat == NULL || nat->ct == NULL) | ||
529 | return; | ||
530 | |||
531 | NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); | ||
532 | |||
533 | spin_lock_bh(&nf_nat_lock); | ||
534 | hlist_del_rcu(&nat->bysource); | ||
535 | spin_unlock_bh(&nf_nat_lock); | ||
536 | } | ||
537 | |||
538 | static void nf_nat_move_storage(void *new, void *old) | ||
539 | { | ||
540 | struct nf_conn_nat *new_nat = new; | ||
541 | struct nf_conn_nat *old_nat = old; | ||
542 | struct nf_conn *ct = old_nat->ct; | ||
543 | |||
544 | if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) | ||
545 | return; | ||
546 | |||
547 | spin_lock_bh(&nf_nat_lock); | ||
548 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | ||
549 | spin_unlock_bh(&nf_nat_lock); | ||
550 | } | ||
551 | |||
552 | static struct nf_ct_ext_type nat_extend __read_mostly = { | ||
553 | .len = sizeof(struct nf_conn_nat), | ||
554 | .align = __alignof__(struct nf_conn_nat), | ||
555 | .destroy = nf_nat_cleanup_conntrack, | ||
556 | .move = nf_nat_move_storage, | ||
557 | .id = NF_CT_EXT_NAT, | ||
558 | .flags = NF_CT_EXT_F_PREALLOC, | ||
559 | }; | ||
560 | |||
561 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
562 | |||
563 | #include <linux/netfilter/nfnetlink.h> | ||
564 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
565 | |||
566 | static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { | ||
567 | [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, | ||
568 | [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, | ||
569 | }; | ||
570 | |||
571 | static int nfnetlink_parse_nat_proto(struct nlattr *attr, | ||
572 | const struct nf_conn *ct, | ||
573 | struct nf_nat_ipv4_range *range) | ||
574 | { | ||
575 | struct nlattr *tb[CTA_PROTONAT_MAX+1]; | ||
576 | const struct nf_nat_protocol *npt; | ||
577 | int err; | ||
578 | |||
579 | err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); | ||
580 | if (err < 0) | ||
581 | return err; | ||
582 | |||
583 | rcu_read_lock(); | ||
584 | npt = __nf_nat_proto_find(nf_ct_protonum(ct)); | ||
585 | if (npt->nlattr_to_range) | ||
586 | err = npt->nlattr_to_range(tb, range); | ||
587 | rcu_read_unlock(); | ||
588 | return err; | ||
589 | } | ||
590 | |||
591 | static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { | ||
592 | [CTA_NAT_MINIP] = { .type = NLA_U32 }, | ||
593 | [CTA_NAT_MAXIP] = { .type = NLA_U32 }, | ||
594 | [CTA_NAT_PROTO] = { .type = NLA_NESTED }, | ||
595 | }; | ||
596 | |||
597 | static int | ||
598 | nfnetlink_parse_nat(const struct nlattr *nat, | ||
599 | const struct nf_conn *ct, struct nf_nat_ipv4_range *range) | ||
600 | { | ||
601 | struct nlattr *tb[CTA_NAT_MAX+1]; | ||
602 | int err; | ||
603 | |||
604 | memset(range, 0, sizeof(*range)); | ||
605 | |||
606 | err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); | ||
607 | if (err < 0) | ||
608 | return err; | ||
609 | |||
610 | if (tb[CTA_NAT_MINIP]) | ||
611 | range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); | ||
612 | |||
613 | if (!tb[CTA_NAT_MAXIP]) | ||
614 | range->max_ip = range->min_ip; | ||
615 | else | ||
616 | range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); | ||
617 | |||
618 | if (range->min_ip) | ||
619 | range->flags |= NF_NAT_RANGE_MAP_IPS; | ||
620 | |||
621 | if (!tb[CTA_NAT_PROTO]) | ||
622 | return 0; | ||
623 | |||
624 | err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); | ||
625 | if (err < 0) | ||
626 | return err; | ||
627 | |||
628 | return 0; | ||
629 | } | ||
630 | |||
631 | static int | ||
632 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | ||
633 | enum nf_nat_manip_type manip, | ||
634 | const struct nlattr *attr) | ||
635 | { | ||
636 | struct nf_nat_ipv4_range range; | ||
637 | |||
638 | if (nfnetlink_parse_nat(attr, ct, &range) < 0) | ||
639 | return -EINVAL; | ||
640 | if (nf_nat_initialized(ct, manip)) | ||
641 | return -EEXIST; | ||
642 | |||
643 | return nf_nat_setup_info(ct, &range, manip); | ||
644 | } | ||
645 | #else | ||
646 | static int | ||
647 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | ||
648 | enum nf_nat_manip_type manip, | ||
649 | const struct nlattr *attr) | ||
650 | { | ||
651 | return -EOPNOTSUPP; | ||
652 | } | ||
653 | #endif | ||
654 | |||
655 | static int __net_init nf_nat_net_init(struct net *net) | ||
656 | { | ||
657 | /* Leave them the same for the moment. */ | ||
658 | net->ipv4.nat_htable_size = net->ct.htable_size; | ||
659 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); | ||
660 | if (!net->ipv4.nat_bysource) | ||
661 | return -ENOMEM; | ||
662 | return 0; | ||
663 | } | ||
664 | |||
665 | /* Clear NAT section of all conntracks, in case we're loaded again. */ | ||
666 | static int clean_nat(struct nf_conn *i, void *data) | ||
667 | { | ||
668 | struct nf_conn_nat *nat = nfct_nat(i); | ||
669 | |||
670 | if (!nat) | ||
671 | return 0; | ||
672 | memset(nat, 0, sizeof(*nat)); | ||
673 | i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); | ||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | static void __net_exit nf_nat_net_exit(struct net *net) | ||
678 | { | ||
679 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | ||
680 | synchronize_rcu(); | ||
681 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); | ||
682 | } | ||
683 | |||
684 | static struct pernet_operations nf_nat_net_ops = { | ||
685 | .init = nf_nat_net_init, | ||
686 | .exit = nf_nat_net_exit, | ||
687 | }; | ||
688 | |||
689 | static struct nf_ct_helper_expectfn follow_master_nat = { | ||
690 | .name = "nat-follow-master", | ||
691 | .expectfn = nf_nat_follow_master, | ||
692 | }; | ||
693 | |||
694 | static struct nfq_ct_nat_hook nfq_ct_nat = { | ||
695 | .seq_adjust = nf_nat_tcp_seq_adjust, | ||
696 | }; | ||
697 | |||
698 | static int __init nf_nat_init(void) | ||
699 | { | ||
700 | size_t i; | ||
701 | int ret; | ||
702 | |||
703 | need_ipv4_conntrack(); | ||
704 | |||
705 | ret = nf_ct_extend_register(&nat_extend); | ||
706 | if (ret < 0) { | ||
707 | printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); | ||
708 | return ret; | ||
709 | } | ||
710 | |||
711 | ret = register_pernet_subsys(&nf_nat_net_ops); | ||
712 | if (ret < 0) | ||
713 | goto cleanup_extend; | ||
714 | |||
715 | /* Sew in builtin protocols. */ | ||
716 | spin_lock_bh(&nf_nat_lock); | ||
717 | for (i = 0; i < MAX_IP_NAT_PROTO; i++) | ||
718 | RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); | ||
719 | RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); | ||
720 | RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); | ||
721 | RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); | ||
722 | spin_unlock_bh(&nf_nat_lock); | ||
723 | |||
724 | /* Initialize fake conntrack so that NAT will skip it */ | ||
725 | nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); | ||
726 | |||
727 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); | ||
728 | |||
729 | nf_ct_helper_expectfn_register(&follow_master_nat); | ||
730 | |||
731 | BUG_ON(nf_nat_seq_adjust_hook != NULL); | ||
732 | RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); | ||
733 | BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); | ||
734 | RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, | ||
735 | nfnetlink_parse_nat_setup); | ||
736 | BUG_ON(nf_ct_nat_offset != NULL); | ||
737 | RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); | ||
738 | RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); | ||
739 | return 0; | ||
740 | |||
741 | cleanup_extend: | ||
742 | nf_ct_extend_unregister(&nat_extend); | ||
743 | return ret; | ||
744 | } | ||
745 | |||
746 | static void __exit nf_nat_cleanup(void) | ||
747 | { | ||
748 | unregister_pernet_subsys(&nf_nat_net_ops); | ||
749 | nf_ct_l3proto_put(l3proto); | ||
750 | nf_ct_extend_unregister(&nat_extend); | ||
751 | nf_ct_helper_expectfn_unregister(&follow_master_nat); | ||
752 | RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); | ||
753 | RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); | ||
754 | RCU_INIT_POINTER(nf_ct_nat_offset, NULL); | ||
755 | RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); | ||
756 | synchronize_net(); | ||
757 | } | ||
758 | |||
759 | MODULE_LICENSE("GPL"); | ||
760 | MODULE_ALIAS("nf-nat-ipv4"); | ||
761 | |||
762 | module_init(nf_nat_init); | ||
763 | module_exit(nf_nat_cleanup); | ||
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c deleted file mode 100644 index e462a957d080..000000000000 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | /* FTP extension for TCP NAT alteration. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/netfilter_ipv4.h> | ||
16 | #include <net/netfilter/nf_nat.h> | ||
17 | #include <net/netfilter/nf_nat_helper.h> | ||
18 | #include <net/netfilter/nf_nat_rule.h> | ||
19 | #include <net/netfilter/nf_conntrack_helper.h> | ||
20 | #include <net/netfilter/nf_conntrack_expect.h> | ||
21 | #include <linux/netfilter/nf_conntrack_ftp.h> | ||
22 | |||
23 | MODULE_LICENSE("GPL"); | ||
24 | MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | ||
25 | MODULE_DESCRIPTION("ftp NAT helper"); | ||
26 | MODULE_ALIAS("ip_nat_ftp"); | ||
27 | |||
28 | /* FIXME: Time out? --RR */ | ||
29 | |||
30 | static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, | ||
31 | char *buffer, size_t buflen, | ||
32 | __be32 addr, u16 port) | ||
33 | { | ||
34 | switch (type) { | ||
35 | case NF_CT_FTP_PORT: | ||
36 | case NF_CT_FTP_PASV: | ||
37 | return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", | ||
38 | ((unsigned char *)&addr)[0], | ||
39 | ((unsigned char *)&addr)[1], | ||
40 | ((unsigned char *)&addr)[2], | ||
41 | ((unsigned char *)&addr)[3], | ||
42 | port >> 8, | ||
43 | port & 0xFF); | ||
44 | case NF_CT_FTP_EPRT: | ||
45 | return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); | ||
46 | case NF_CT_FTP_EPSV: | ||
47 | return snprintf(buffer, buflen, "|||%u|", port); | ||
48 | } | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | /* So, this packet has hit the connection tracking matching code. | ||
54 | Mangle it, and change the expectation to match the new version. */ | ||
55 | static unsigned int nf_nat_ftp(struct sk_buff *skb, | ||
56 | enum ip_conntrack_info ctinfo, | ||
57 | enum nf_ct_ftp_type type, | ||
58 | unsigned int matchoff, | ||
59 | unsigned int matchlen, | ||
60 | struct nf_conntrack_expect *exp) | ||
61 | { | ||
62 | __be32 newip; | ||
63 | u_int16_t port; | ||
64 | int dir = CTINFO2DIR(ctinfo); | ||
65 | struct nf_conn *ct = exp->master; | ||
66 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
67 | unsigned int buflen; | ||
68 | |||
69 | pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); | ||
70 | |||
71 | /* Connection will come from wherever this packet goes, hence !dir */ | ||
72 | newip = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
73 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
74 | exp->dir = !dir; | ||
75 | |||
76 | /* When you see the packet, we need to NAT it the same as the | ||
77 | * this one. */ | ||
78 | exp->expectfn = nf_nat_follow_master; | ||
79 | |||
80 | /* Try to get same port: if not, try to change it. */ | ||
81 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
82 | int ret; | ||
83 | |||
84 | exp->tuple.dst.u.tcp.port = htons(port); | ||
85 | ret = nf_ct_expect_related(exp); | ||
86 | if (ret == 0) | ||
87 | break; | ||
88 | else if (ret != -EBUSY) { | ||
89 | port = 0; | ||
90 | break; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | if (port == 0) | ||
95 | return NF_DROP; | ||
96 | |||
97 | buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); | ||
98 | if (!buflen) | ||
99 | goto out; | ||
100 | |||
101 | pr_debug("calling nf_nat_mangle_tcp_packet\n"); | ||
102 | |||
103 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, | ||
104 | matchlen, buffer, buflen)) | ||
105 | goto out; | ||
106 | |||
107 | return NF_ACCEPT; | ||
108 | |||
109 | out: | ||
110 | nf_ct_unexpect_related(exp); | ||
111 | return NF_DROP; | ||
112 | } | ||
113 | |||
114 | static void __exit nf_nat_ftp_fini(void) | ||
115 | { | ||
116 | RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); | ||
117 | synchronize_rcu(); | ||
118 | } | ||
119 | |||
120 | static int __init nf_nat_ftp_init(void) | ||
121 | { | ||
122 | BUG_ON(nf_nat_ftp_hook != NULL); | ||
123 | RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ | ||
128 | static int warn_set(const char *val, struct kernel_param *kp) | ||
129 | { | ||
130 | printk(KERN_INFO KBUILD_MODNAME | ||
131 | ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); | ||
132 | return 0; | ||
133 | } | ||
134 | module_param_call(ports, warn_set, NULL, NULL, 0); | ||
135 | |||
136 | module_init(nf_nat_ftp_init); | ||
137 | module_exit(nf_nat_ftp_fini); | ||
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c6784a18c1c4..9c3db10b22d3 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -15,13 +15,12 @@ | |||
15 | 15 | ||
16 | #include <net/netfilter/nf_nat.h> | 16 | #include <net/netfilter/nf_nat.h> |
17 | #include <net/netfilter/nf_nat_helper.h> | 17 | #include <net/netfilter/nf_nat_helper.h> |
18 | #include <net/netfilter/nf_nat_rule.h> | ||
19 | #include <net/netfilter/nf_conntrack_helper.h> | 18 | #include <net/netfilter/nf_conntrack_helper.h> |
20 | #include <net/netfilter/nf_conntrack_expect.h> | 19 | #include <net/netfilter/nf_conntrack_expect.h> |
21 | #include <linux/netfilter/nf_conntrack_h323.h> | 20 | #include <linux/netfilter/nf_conntrack_h323.h> |
22 | 21 | ||
23 | /****************************************************************************/ | 22 | /****************************************************************************/ |
24 | static int set_addr(struct sk_buff *skb, | 23 | static int set_addr(struct sk_buff *skb, unsigned int protoff, |
25 | unsigned char **data, int dataoff, | 24 | unsigned char **data, int dataoff, |
26 | unsigned int addroff, __be32 ip, __be16 port) | 25 | unsigned int addroff, __be32 ip, __be16 port) |
27 | { | 26 | { |
@@ -40,7 +39,7 @@ static int set_addr(struct sk_buff *skb, | |||
40 | 39 | ||
41 | if (ip_hdr(skb)->protocol == IPPROTO_TCP) { | 40 | if (ip_hdr(skb)->protocol == IPPROTO_TCP) { |
42 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 41 | if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, |
43 | addroff, sizeof(buf), | 42 | protoff, addroff, sizeof(buf), |
44 | (char *) &buf, sizeof(buf))) { | 43 | (char *) &buf, sizeof(buf))) { |
45 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); | 44 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); |
46 | return -1; | 45 | return -1; |
@@ -54,7 +53,7 @@ static int set_addr(struct sk_buff *skb, | |||
54 | *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; | 53 | *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; |
55 | } else { | 54 | } else { |
56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 55 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, |
57 | addroff, sizeof(buf), | 56 | protoff, addroff, sizeof(buf), |
58 | (char *) &buf, sizeof(buf))) { | 57 | (char *) &buf, sizeof(buf))) { |
59 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); | 58 | net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); |
60 | return -1; | 59 | return -1; |
@@ -69,22 +68,22 @@ static int set_addr(struct sk_buff *skb, | |||
69 | } | 68 | } |
70 | 69 | ||
71 | /****************************************************************************/ | 70 | /****************************************************************************/ |
72 | static int set_h225_addr(struct sk_buff *skb, | 71 | static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, |
73 | unsigned char **data, int dataoff, | 72 | unsigned char **data, int dataoff, |
74 | TransportAddress *taddr, | 73 | TransportAddress *taddr, |
75 | union nf_inet_addr *addr, __be16 port) | 74 | union nf_inet_addr *addr, __be16 port) |
76 | { | 75 | { |
77 | return set_addr(skb, data, dataoff, taddr->ipAddress.ip, | 76 | return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, |
78 | addr->ip, port); | 77 | addr->ip, port); |
79 | } | 78 | } |
80 | 79 | ||
81 | /****************************************************************************/ | 80 | /****************************************************************************/ |
82 | static int set_h245_addr(struct sk_buff *skb, | 81 | static int set_h245_addr(struct sk_buff *skb, unsigned protoff, |
83 | unsigned char **data, int dataoff, | 82 | unsigned char **data, int dataoff, |
84 | H245_TransportAddress *taddr, | 83 | H245_TransportAddress *taddr, |
85 | union nf_inet_addr *addr, __be16 port) | 84 | union nf_inet_addr *addr, __be16 port) |
86 | { | 85 | { |
87 | return set_addr(skb, data, dataoff, | 86 | return set_addr(skb, protoff, data, dataoff, |
88 | taddr->unicastAddress.iPAddress.network, | 87 | taddr->unicastAddress.iPAddress.network, |
89 | addr->ip, port); | 88 | addr->ip, port); |
90 | } | 89 | } |
@@ -92,7 +91,7 @@ static int set_h245_addr(struct sk_buff *skb, | |||
92 | /****************************************************************************/ | 91 | /****************************************************************************/ |
93 | static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | 92 | static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, |
94 | enum ip_conntrack_info ctinfo, | 93 | enum ip_conntrack_info ctinfo, |
95 | unsigned char **data, | 94 | unsigned int protoff, unsigned char **data, |
96 | TransportAddress *taddr, int count) | 95 | TransportAddress *taddr, int count) |
97 | { | 96 | { |
98 | const struct nf_ct_h323_master *info = nfct_help_data(ct); | 97 | const struct nf_ct_h323_master *info = nfct_help_data(ct); |
@@ -118,7 +117,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
118 | &addr.ip, port, | 117 | &addr.ip, port, |
119 | &ct->tuplehash[!dir].tuple.dst.u3.ip, | 118 | &ct->tuplehash[!dir].tuple.dst.u3.ip, |
120 | info->sig_port[!dir]); | 119 | info->sig_port[!dir]); |
121 | return set_h225_addr(skb, data, 0, &taddr[i], | 120 | return set_h225_addr(skb, protoff, data, 0, |
121 | &taddr[i], | ||
122 | &ct->tuplehash[!dir]. | 122 | &ct->tuplehash[!dir]. |
123 | tuple.dst.u3, | 123 | tuple.dst.u3, |
124 | info->sig_port[!dir]); | 124 | info->sig_port[!dir]); |
@@ -129,7 +129,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
129 | &addr.ip, port, | 129 | &addr.ip, port, |
130 | &ct->tuplehash[!dir].tuple.src.u3.ip, | 130 | &ct->tuplehash[!dir].tuple.src.u3.ip, |
131 | info->sig_port[!dir]); | 131 | info->sig_port[!dir]); |
132 | return set_h225_addr(skb, data, 0, &taddr[i], | 132 | return set_h225_addr(skb, protoff, data, 0, |
133 | &taddr[i], | ||
133 | &ct->tuplehash[!dir]. | 134 | &ct->tuplehash[!dir]. |
134 | tuple.src.u3, | 135 | tuple.src.u3, |
135 | info->sig_port[!dir]); | 136 | info->sig_port[!dir]); |
@@ -143,7 +144,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
143 | /****************************************************************************/ | 144 | /****************************************************************************/ |
144 | static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, | 145 | static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, |
145 | enum ip_conntrack_info ctinfo, | 146 | enum ip_conntrack_info ctinfo, |
146 | unsigned char **data, | 147 | unsigned int protoff, unsigned char **data, |
147 | TransportAddress *taddr, int count) | 148 | TransportAddress *taddr, int count) |
148 | { | 149 | { |
149 | int dir = CTINFO2DIR(ctinfo); | 150 | int dir = CTINFO2DIR(ctinfo); |
@@ -159,7 +160,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
159 | &addr.ip, ntohs(port), | 160 | &addr.ip, ntohs(port), |
160 | &ct->tuplehash[!dir].tuple.dst.u3.ip, | 161 | &ct->tuplehash[!dir].tuple.dst.u3.ip, |
161 | ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); | 162 | ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); |
162 | return set_h225_addr(skb, data, 0, &taddr[i], | 163 | return set_h225_addr(skb, protoff, data, 0, &taddr[i], |
163 | &ct->tuplehash[!dir].tuple.dst.u3, | 164 | &ct->tuplehash[!dir].tuple.dst.u3, |
164 | ct->tuplehash[!dir].tuple. | 165 | ct->tuplehash[!dir].tuple. |
165 | dst.u.udp.port); | 166 | dst.u.udp.port); |
@@ -172,7 +173,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
172 | /****************************************************************************/ | 173 | /****************************************************************************/ |
173 | static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | 174 | static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, |
174 | enum ip_conntrack_info ctinfo, | 175 | enum ip_conntrack_info ctinfo, |
175 | unsigned char **data, int dataoff, | 176 | unsigned int protoff, unsigned char **data, int dataoff, |
176 | H245_TransportAddress *taddr, | 177 | H245_TransportAddress *taddr, |
177 | __be16 port, __be16 rtp_port, | 178 | __be16 port, __be16 rtp_port, |
178 | struct nf_conntrack_expect *rtp_exp, | 179 | struct nf_conntrack_expect *rtp_exp, |
@@ -244,7 +245,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
244 | } | 245 | } |
245 | 246 | ||
246 | /* Modify signal */ | 247 | /* Modify signal */ |
247 | if (set_h245_addr(skb, data, dataoff, taddr, | 248 | if (set_h245_addr(skb, protoff, data, dataoff, taddr, |
248 | &ct->tuplehash[!dir].tuple.dst.u3, | 249 | &ct->tuplehash[!dir].tuple.dst.u3, |
249 | htons((port & htons(1)) ? nated_port + 1 : | 250 | htons((port & htons(1)) ? nated_port + 1 : |
250 | nated_port)) == 0) { | 251 | nated_port)) == 0) { |
@@ -275,7 +276,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
275 | /****************************************************************************/ | 276 | /****************************************************************************/ |
276 | static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | 277 | static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, |
277 | enum ip_conntrack_info ctinfo, | 278 | enum ip_conntrack_info ctinfo, |
278 | unsigned char **data, int dataoff, | 279 | unsigned int protoff, unsigned char **data, int dataoff, |
279 | H245_TransportAddress *taddr, __be16 port, | 280 | H245_TransportAddress *taddr, __be16 port, |
280 | struct nf_conntrack_expect *exp) | 281 | struct nf_conntrack_expect *exp) |
281 | { | 282 | { |
@@ -307,7 +308,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
307 | } | 308 | } |
308 | 309 | ||
309 | /* Modify signal */ | 310 | /* Modify signal */ |
310 | if (set_h245_addr(skb, data, dataoff, taddr, | 311 | if (set_h245_addr(skb, protoff, data, dataoff, taddr, |
311 | &ct->tuplehash[!dir].tuple.dst.u3, | 312 | &ct->tuplehash[!dir].tuple.dst.u3, |
312 | htons(nated_port)) < 0) { | 313 | htons(nated_port)) < 0) { |
313 | nf_ct_unexpect_related(exp); | 314 | nf_ct_unexpect_related(exp); |
@@ -326,7 +327,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, | |||
326 | /****************************************************************************/ | 327 | /****************************************************************************/ |
327 | static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | 328 | static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, |
328 | enum ip_conntrack_info ctinfo, | 329 | enum ip_conntrack_info ctinfo, |
329 | unsigned char **data, int dataoff, | 330 | unsigned int protoff, unsigned char **data, int dataoff, |
330 | TransportAddress *taddr, __be16 port, | 331 | TransportAddress *taddr, __be16 port, |
331 | struct nf_conntrack_expect *exp) | 332 | struct nf_conntrack_expect *exp) |
332 | { | 333 | { |
@@ -363,7 +364,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
363 | } | 364 | } |
364 | 365 | ||
365 | /* Modify signal */ | 366 | /* Modify signal */ |
366 | if (set_h225_addr(skb, data, dataoff, taddr, | 367 | if (set_h225_addr(skb, protoff, data, dataoff, taddr, |
367 | &ct->tuplehash[!dir].tuple.dst.u3, | 368 | &ct->tuplehash[!dir].tuple.dst.u3, |
368 | htons(nated_port)) == 0) { | 369 | htons(nated_port)) == 0) { |
369 | /* Save ports */ | 370 | /* Save ports */ |
@@ -390,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, | |||
390 | static void ip_nat_q931_expect(struct nf_conn *new, | 391 | static void ip_nat_q931_expect(struct nf_conn *new, |
391 | struct nf_conntrack_expect *this) | 392 | struct nf_conntrack_expect *this) |
392 | { | 393 | { |
393 | struct nf_nat_ipv4_range range; | 394 | struct nf_nat_range range; |
394 | 395 | ||
395 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ | 396 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ |
396 | nf_nat_follow_master(new, this); | 397 | nf_nat_follow_master(new, this); |
@@ -402,21 +403,23 @@ static void ip_nat_q931_expect(struct nf_conn *new, | |||
402 | 403 | ||
403 | /* Change src to where master sends to */ | 404 | /* Change src to where master sends to */ |
404 | range.flags = NF_NAT_RANGE_MAP_IPS; | 405 | range.flags = NF_NAT_RANGE_MAP_IPS; |
405 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; | 406 | range.min_addr = range.max_addr = |
407 | new->tuplehash[!this->dir].tuple.src.u3; | ||
406 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); | 408 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); |
407 | 409 | ||
408 | /* For DST manip, map port here to where it's expected. */ | 410 | /* For DST manip, map port here to where it's expected. */ |
409 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); | 411 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
410 | range.min = range.max = this->saved_proto; | 412 | range.min_proto = range.max_proto = this->saved_proto; |
411 | range.min_ip = range.max_ip = | 413 | range.min_addr = range.max_addr = |
412 | new->master->tuplehash[!this->dir].tuple.src.u3.ip; | 414 | new->master->tuplehash[!this->dir].tuple.src.u3; |
413 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); | 415 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); |
414 | } | 416 | } |
415 | 417 | ||
416 | /****************************************************************************/ | 418 | /****************************************************************************/ |
417 | static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | 419 | static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, |
418 | enum ip_conntrack_info ctinfo, | 420 | enum ip_conntrack_info ctinfo, |
419 | unsigned char **data, TransportAddress *taddr, int idx, | 421 | unsigned int protoff, unsigned char **data, |
422 | TransportAddress *taddr, int idx, | ||
420 | __be16 port, struct nf_conntrack_expect *exp) | 423 | __be16 port, struct nf_conntrack_expect *exp) |
421 | { | 424 | { |
422 | struct nf_ct_h323_master *info = nfct_help_data(ct); | 425 | struct nf_ct_h323_master *info = nfct_help_data(ct); |
@@ -453,7 +456,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
453 | } | 456 | } |
454 | 457 | ||
455 | /* Modify signal */ | 458 | /* Modify signal */ |
456 | if (set_h225_addr(skb, data, 0, &taddr[idx], | 459 | if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], |
457 | &ct->tuplehash[!dir].tuple.dst.u3, | 460 | &ct->tuplehash[!dir].tuple.dst.u3, |
458 | htons(nated_port)) == 0) { | 461 | htons(nated_port)) == 0) { |
459 | /* Save ports */ | 462 | /* Save ports */ |
@@ -464,7 +467,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
464 | if (idx > 0 && | 467 | if (idx > 0 && |
465 | get_h225_addr(ct, *data, &taddr[0], &addr, &port) && | 468 | get_h225_addr(ct, *data, &taddr[0], &addr, &port) && |
466 | (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { | 469 | (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { |
467 | set_h225_addr(skb, data, 0, &taddr[0], | 470 | set_h225_addr(skb, protoff, data, 0, &taddr[0], |
468 | &ct->tuplehash[!dir].tuple.dst.u3, | 471 | &ct->tuplehash[!dir].tuple.dst.u3, |
469 | info->sig_port[!dir]); | 472 | info->sig_port[!dir]); |
470 | } | 473 | } |
@@ -487,26 +490,28 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, | |||
487 | static void ip_nat_callforwarding_expect(struct nf_conn *new, | 490 | static void ip_nat_callforwarding_expect(struct nf_conn *new, |
488 | struct nf_conntrack_expect *this) | 491 | struct nf_conntrack_expect *this) |
489 | { | 492 | { |
490 | struct nf_nat_ipv4_range range; | 493 | struct nf_nat_range range; |
491 | 494 | ||
492 | /* This must be a fresh one. */ | 495 | /* This must be a fresh one. */ |
493 | BUG_ON(new->status & IPS_NAT_DONE_MASK); | 496 | BUG_ON(new->status & IPS_NAT_DONE_MASK); |
494 | 497 | ||
495 | /* Change src to where master sends to */ | 498 | /* Change src to where master sends to */ |
496 | range.flags = NF_NAT_RANGE_MAP_IPS; | 499 | range.flags = NF_NAT_RANGE_MAP_IPS; |
497 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; | 500 | range.min_addr = range.max_addr = |
501 | new->tuplehash[!this->dir].tuple.src.u3; | ||
498 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); | 502 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); |
499 | 503 | ||
500 | /* For DST manip, map port here to where it's expected. */ | 504 | /* For DST manip, map port here to where it's expected. */ |
501 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); | 505 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); |
502 | range.min = range.max = this->saved_proto; | 506 | range.min_proto = range.max_proto = this->saved_proto; |
503 | range.min_ip = range.max_ip = this->saved_ip; | 507 | range.min_addr = range.max_addr = this->saved_addr; |
504 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); | 508 | nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); |
505 | } | 509 | } |
506 | 510 | ||
507 | /****************************************************************************/ | 511 | /****************************************************************************/ |
508 | static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | 512 | static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, |
509 | enum ip_conntrack_info ctinfo, | 513 | enum ip_conntrack_info ctinfo, |
514 | unsigned int protoff, | ||
510 | unsigned char **data, int dataoff, | 515 | unsigned char **data, int dataoff, |
511 | TransportAddress *taddr, __be16 port, | 516 | TransportAddress *taddr, __be16 port, |
512 | struct nf_conntrack_expect *exp) | 517 | struct nf_conntrack_expect *exp) |
@@ -515,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
515 | u_int16_t nated_port; | 520 | u_int16_t nated_port; |
516 | 521 | ||
517 | /* Set expectations for NAT */ | 522 | /* Set expectations for NAT */ |
518 | exp->saved_ip = exp->tuple.dst.u3.ip; | 523 | exp->saved_addr = exp->tuple.dst.u3; |
519 | exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; | 524 | exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; |
520 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | 525 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; |
521 | exp->expectfn = ip_nat_callforwarding_expect; | 526 | exp->expectfn = ip_nat_callforwarding_expect; |
@@ -541,7 +546,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, | |||
541 | } | 546 | } |
542 | 547 | ||
543 | /* Modify signal */ | 548 | /* Modify signal */ |
544 | if (!set_h225_addr(skb, data, dataoff, taddr, | 549 | if (!set_h225_addr(skb, protoff, data, dataoff, taddr, |
545 | &ct->tuplehash[!dir].tuple.dst.u3, | 550 | &ct->tuplehash[!dir].tuple.dst.u3, |
546 | htons(nated_port)) == 0) { | 551 | htons(nated_port)) == 0) { |
547 | nf_ct_unexpect_related(exp); | 552 | nf_ct_unexpect_related(exp); |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c deleted file mode 100644 index 2e59ad0b90ca..000000000000 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ /dev/null | |||
@@ -1,458 +0,0 @@ | |||
1 | /* ip_nat_helper.c - generic support functions for NAT helpers | ||
2 | * | ||
3 | * (C) 2000-2002 Harald Welte <laforge@netfilter.org> | ||
4 | * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/gfp.h> | ||
12 | #include <linux/kmod.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/timer.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/tcp.h> | ||
17 | #include <linux/udp.h> | ||
18 | #include <net/checksum.h> | ||
19 | #include <net/tcp.h> | ||
20 | #include <net/route.h> | ||
21 | |||
22 | #include <linux/netfilter_ipv4.h> | ||
23 | #include <net/netfilter/nf_conntrack.h> | ||
24 | #include <net/netfilter/nf_conntrack_helper.h> | ||
25 | #include <net/netfilter/nf_conntrack_ecache.h> | ||
26 | #include <net/netfilter/nf_conntrack_expect.h> | ||
27 | #include <net/netfilter/nf_nat.h> | ||
28 | #include <net/netfilter/nf_nat_protocol.h> | ||
29 | #include <net/netfilter/nf_nat_core.h> | ||
30 | #include <net/netfilter/nf_nat_helper.h> | ||
31 | |||
32 | #define DUMP_OFFSET(x) \ | ||
33 | pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ | ||
34 | x->offset_before, x->offset_after, x->correction_pos); | ||
35 | |||
36 | static DEFINE_SPINLOCK(nf_nat_seqofs_lock); | ||
37 | |||
38 | /* Setup TCP sequence correction given this change at this sequence */ | ||
39 | static inline void | ||
40 | adjust_tcp_sequence(u32 seq, | ||
41 | int sizediff, | ||
42 | struct nf_conn *ct, | ||
43 | enum ip_conntrack_info ctinfo) | ||
44 | { | ||
45 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
46 | struct nf_conn_nat *nat = nfct_nat(ct); | ||
47 | struct nf_nat_seq *this_way = &nat->seq[dir]; | ||
48 | |||
49 | pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", | ||
50 | seq, sizediff); | ||
51 | |||
52 | pr_debug("adjust_tcp_sequence: Seq_offset before: "); | ||
53 | DUMP_OFFSET(this_way); | ||
54 | |||
55 | spin_lock_bh(&nf_nat_seqofs_lock); | ||
56 | |||
57 | /* SYN adjust. If it's uninitialized, or this is after last | ||
58 | * correction, record it: we don't handle more than one | ||
59 | * adjustment in the window, but do deal with common case of a | ||
60 | * retransmit */ | ||
61 | if (this_way->offset_before == this_way->offset_after || | ||
62 | before(this_way->correction_pos, seq)) { | ||
63 | this_way->correction_pos = seq; | ||
64 | this_way->offset_before = this_way->offset_after; | ||
65 | this_way->offset_after += sizediff; | ||
66 | } | ||
67 | spin_unlock_bh(&nf_nat_seqofs_lock); | ||
68 | |||
69 | pr_debug("adjust_tcp_sequence: Seq_offset after: "); | ||
70 | DUMP_OFFSET(this_way); | ||
71 | } | ||
72 | |||
73 | /* Get the offset value, for conntrack */ | ||
74 | s16 nf_nat_get_offset(const struct nf_conn *ct, | ||
75 | enum ip_conntrack_dir dir, | ||
76 | u32 seq) | ||
77 | { | ||
78 | struct nf_conn_nat *nat = nfct_nat(ct); | ||
79 | struct nf_nat_seq *this_way; | ||
80 | s16 offset; | ||
81 | |||
82 | if (!nat) | ||
83 | return 0; | ||
84 | |||
85 | this_way = &nat->seq[dir]; | ||
86 | spin_lock_bh(&nf_nat_seqofs_lock); | ||
87 | offset = after(seq, this_way->correction_pos) | ||
88 | ? this_way->offset_after : this_way->offset_before; | ||
89 | spin_unlock_bh(&nf_nat_seqofs_lock); | ||
90 | |||
91 | return offset; | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(nf_nat_get_offset); | ||
94 | |||
95 | /* Frobs data inside this packet, which is linear. */ | ||
96 | static void mangle_contents(struct sk_buff *skb, | ||
97 | unsigned int dataoff, | ||
98 | unsigned int match_offset, | ||
99 | unsigned int match_len, | ||
100 | const char *rep_buffer, | ||
101 | unsigned int rep_len) | ||
102 | { | ||
103 | unsigned char *data; | ||
104 | |||
105 | BUG_ON(skb_is_nonlinear(skb)); | ||
106 | data = skb_network_header(skb) + dataoff; | ||
107 | |||
108 | /* move post-replacement */ | ||
109 | memmove(data + match_offset + rep_len, | ||
110 | data + match_offset + match_len, | ||
111 | skb->tail - (skb->network_header + dataoff + | ||
112 | match_offset + match_len)); | ||
113 | |||
114 | /* insert data from buffer */ | ||
115 | memcpy(data + match_offset, rep_buffer, rep_len); | ||
116 | |||
117 | /* update skb info */ | ||
118 | if (rep_len > match_len) { | ||
119 | pr_debug("nf_nat_mangle_packet: Extending packet by " | ||
120 | "%u from %u bytes\n", rep_len - match_len, skb->len); | ||
121 | skb_put(skb, rep_len - match_len); | ||
122 | } else { | ||
123 | pr_debug("nf_nat_mangle_packet: Shrinking packet from " | ||
124 | "%u from %u bytes\n", match_len - rep_len, skb->len); | ||
125 | __skb_trim(skb, skb->len + rep_len - match_len); | ||
126 | } | ||
127 | |||
128 | /* fix IP hdr checksum information */ | ||
129 | ip_hdr(skb)->tot_len = htons(skb->len); | ||
130 | ip_send_check(ip_hdr(skb)); | ||
131 | } | ||
132 | |||
133 | /* Unusual, but possible case. */ | ||
134 | static int enlarge_skb(struct sk_buff *skb, unsigned int extra) | ||
135 | { | ||
136 | if (skb->len + extra > 65535) | ||
137 | return 0; | ||
138 | |||
139 | if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) | ||
140 | return 0; | ||
141 | |||
142 | return 1; | ||
143 | } | ||
144 | |||
145 | void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | ||
146 | __be32 seq, s16 off) | ||
147 | { | ||
148 | if (!off) | ||
149 | return; | ||
150 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | ||
151 | adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); | ||
152 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | ||
153 | } | ||
154 | EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); | ||
155 | |||
156 | void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, | ||
157 | u32 ctinfo, int off) | ||
158 | { | ||
159 | const struct tcphdr *th; | ||
160 | |||
161 | if (nf_ct_protonum(ct) != IPPROTO_TCP) | ||
162 | return; | ||
163 | |||
164 | th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); | ||
165 | nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); | ||
168 | |||
169 | static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, | ||
170 | int datalen, __sum16 *check, int oldlen) | ||
171 | { | ||
172 | struct rtable *rt = skb_rtable(skb); | ||
173 | |||
174 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||
175 | if (!(rt->rt_flags & RTCF_LOCAL) && | ||
176 | (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { | ||
177 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
178 | skb->csum_start = skb_headroom(skb) + | ||
179 | skb_network_offset(skb) + | ||
180 | iph->ihl * 4; | ||
181 | skb->csum_offset = (void *)check - data; | ||
182 | *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
183 | datalen, iph->protocol, 0); | ||
184 | } else { | ||
185 | *check = 0; | ||
186 | *check = csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
187 | datalen, iph->protocol, | ||
188 | csum_partial(data, datalen, | ||
189 | 0)); | ||
190 | if (iph->protocol == IPPROTO_UDP && !*check) | ||
191 | *check = CSUM_MANGLED_0; | ||
192 | } | ||
193 | } else | ||
194 | inet_proto_csum_replace2(check, skb, | ||
195 | htons(oldlen), htons(datalen), 1); | ||
196 | } | ||
197 | |||
198 | /* Generic function for mangling variable-length address changes inside | ||
199 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX | ||
200 | * command in FTP). | ||
201 | * | ||
202 | * Takes care about all the nasty sequence number changes, checksumming, | ||
203 | * skb enlargement, ... | ||
204 | * | ||
205 | * */ | ||
206 | int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, | ||
207 | struct nf_conn *ct, | ||
208 | enum ip_conntrack_info ctinfo, | ||
209 | unsigned int match_offset, | ||
210 | unsigned int match_len, | ||
211 | const char *rep_buffer, | ||
212 | unsigned int rep_len, bool adjust) | ||
213 | { | ||
214 | struct iphdr *iph; | ||
215 | struct tcphdr *tcph; | ||
216 | int oldlen, datalen; | ||
217 | |||
218 | if (!skb_make_writable(skb, skb->len)) | ||
219 | return 0; | ||
220 | |||
221 | if (rep_len > match_len && | ||
222 | rep_len - match_len > skb_tailroom(skb) && | ||
223 | !enlarge_skb(skb, rep_len - match_len)) | ||
224 | return 0; | ||
225 | |||
226 | SKB_LINEAR_ASSERT(skb); | ||
227 | |||
228 | iph = ip_hdr(skb); | ||
229 | tcph = (void *)iph + iph->ihl*4; | ||
230 | |||
231 | oldlen = skb->len - iph->ihl*4; | ||
232 | mangle_contents(skb, iph->ihl*4 + tcph->doff*4, | ||
233 | match_offset, match_len, rep_buffer, rep_len); | ||
234 | |||
235 | datalen = skb->len - iph->ihl*4; | ||
236 | nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen); | ||
237 | |||
238 | if (adjust && rep_len != match_len) | ||
239 | nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, | ||
240 | (int)rep_len - (int)match_len); | ||
241 | |||
242 | return 1; | ||
243 | } | ||
244 | EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); | ||
245 | |||
246 | /* Generic function for mangling variable-length address changes inside | ||
247 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX | ||
248 | * command in the Amanda protocol) | ||
249 | * | ||
250 | * Takes care about all the nasty sequence number changes, checksumming, | ||
251 | * skb enlargement, ... | ||
252 | * | ||
253 | * XXX - This function could be merged with nf_nat_mangle_tcp_packet which | ||
254 | * should be fairly easy to do. | ||
255 | */ | ||
256 | int | ||
257 | nf_nat_mangle_udp_packet(struct sk_buff *skb, | ||
258 | struct nf_conn *ct, | ||
259 | enum ip_conntrack_info ctinfo, | ||
260 | unsigned int match_offset, | ||
261 | unsigned int match_len, | ||
262 | const char *rep_buffer, | ||
263 | unsigned int rep_len) | ||
264 | { | ||
265 | struct iphdr *iph; | ||
266 | struct udphdr *udph; | ||
267 | int datalen, oldlen; | ||
268 | |||
269 | if (!skb_make_writable(skb, skb->len)) | ||
270 | return 0; | ||
271 | |||
272 | if (rep_len > match_len && | ||
273 | rep_len - match_len > skb_tailroom(skb) && | ||
274 | !enlarge_skb(skb, rep_len - match_len)) | ||
275 | return 0; | ||
276 | |||
277 | iph = ip_hdr(skb); | ||
278 | udph = (void *)iph + iph->ihl*4; | ||
279 | |||
280 | oldlen = skb->len - iph->ihl*4; | ||
281 | mangle_contents(skb, iph->ihl*4 + sizeof(*udph), | ||
282 | match_offset, match_len, rep_buffer, rep_len); | ||
283 | |||
284 | /* update the length of the UDP packet */ | ||
285 | datalen = skb->len - iph->ihl*4; | ||
286 | udph->len = htons(datalen); | ||
287 | |||
288 | /* fix udp checksum if udp checksum was previously calculated */ | ||
289 | if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) | ||
290 | return 1; | ||
291 | |||
292 | nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen); | ||
293 | |||
294 | return 1; | ||
295 | } | ||
296 | EXPORT_SYMBOL(nf_nat_mangle_udp_packet); | ||
297 | |||
298 | /* Adjust one found SACK option including checksum correction */ | ||
299 | static void | ||
300 | sack_adjust(struct sk_buff *skb, | ||
301 | struct tcphdr *tcph, | ||
302 | unsigned int sackoff, | ||
303 | unsigned int sackend, | ||
304 | struct nf_nat_seq *natseq) | ||
305 | { | ||
306 | while (sackoff < sackend) { | ||
307 | struct tcp_sack_block_wire *sack; | ||
308 | __be32 new_start_seq, new_end_seq; | ||
309 | |||
310 | sack = (void *)skb->data + sackoff; | ||
311 | if (after(ntohl(sack->start_seq) - natseq->offset_before, | ||
312 | natseq->correction_pos)) | ||
313 | new_start_seq = htonl(ntohl(sack->start_seq) | ||
314 | - natseq->offset_after); | ||
315 | else | ||
316 | new_start_seq = htonl(ntohl(sack->start_seq) | ||
317 | - natseq->offset_before); | ||
318 | |||
319 | if (after(ntohl(sack->end_seq) - natseq->offset_before, | ||
320 | natseq->correction_pos)) | ||
321 | new_end_seq = htonl(ntohl(sack->end_seq) | ||
322 | - natseq->offset_after); | ||
323 | else | ||
324 | new_end_seq = htonl(ntohl(sack->end_seq) | ||
325 | - natseq->offset_before); | ||
326 | |||
327 | pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", | ||
328 | ntohl(sack->start_seq), new_start_seq, | ||
329 | ntohl(sack->end_seq), new_end_seq); | ||
330 | |||
331 | inet_proto_csum_replace4(&tcph->check, skb, | ||
332 | sack->start_seq, new_start_seq, 0); | ||
333 | inet_proto_csum_replace4(&tcph->check, skb, | ||
334 | sack->end_seq, new_end_seq, 0); | ||
335 | sack->start_seq = new_start_seq; | ||
336 | sack->end_seq = new_end_seq; | ||
337 | sackoff += sizeof(*sack); | ||
338 | } | ||
339 | } | ||
340 | |||
341 | /* TCP SACK sequence number adjustment */ | ||
342 | static inline unsigned int | ||
343 | nf_nat_sack_adjust(struct sk_buff *skb, | ||
344 | struct tcphdr *tcph, | ||
345 | struct nf_conn *ct, | ||
346 | enum ip_conntrack_info ctinfo) | ||
347 | { | ||
348 | unsigned int dir, optoff, optend; | ||
349 | struct nf_conn_nat *nat = nfct_nat(ct); | ||
350 | |||
351 | optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); | ||
352 | optend = ip_hdrlen(skb) + tcph->doff * 4; | ||
353 | |||
354 | if (!skb_make_writable(skb, optend)) | ||
355 | return 0; | ||
356 | |||
357 | dir = CTINFO2DIR(ctinfo); | ||
358 | |||
359 | while (optoff < optend) { | ||
360 | /* Usually: option, length. */ | ||
361 | unsigned char *op = skb->data + optoff; | ||
362 | |||
363 | switch (op[0]) { | ||
364 | case TCPOPT_EOL: | ||
365 | return 1; | ||
366 | case TCPOPT_NOP: | ||
367 | optoff++; | ||
368 | continue; | ||
369 | default: | ||
370 | /* no partial options */ | ||
371 | if (optoff + 1 == optend || | ||
372 | optoff + op[1] > optend || | ||
373 | op[1] < 2) | ||
374 | return 0; | ||
375 | if (op[0] == TCPOPT_SACK && | ||
376 | op[1] >= 2+TCPOLEN_SACK_PERBLOCK && | ||
377 | ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) | ||
378 | sack_adjust(skb, tcph, optoff+2, | ||
379 | optoff+op[1], &nat->seq[!dir]); | ||
380 | optoff += op[1]; | ||
381 | } | ||
382 | } | ||
383 | return 1; | ||
384 | } | ||
385 | |||
386 | /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ | ||
387 | int | ||
388 | nf_nat_seq_adjust(struct sk_buff *skb, | ||
389 | struct nf_conn *ct, | ||
390 | enum ip_conntrack_info ctinfo) | ||
391 | { | ||
392 | struct tcphdr *tcph; | ||
393 | int dir; | ||
394 | __be32 newseq, newack; | ||
395 | s16 seqoff, ackoff; | ||
396 | struct nf_conn_nat *nat = nfct_nat(ct); | ||
397 | struct nf_nat_seq *this_way, *other_way; | ||
398 | |||
399 | dir = CTINFO2DIR(ctinfo); | ||
400 | |||
401 | this_way = &nat->seq[dir]; | ||
402 | other_way = &nat->seq[!dir]; | ||
403 | |||
404 | if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) | ||
405 | return 0; | ||
406 | |||
407 | tcph = (void *)skb->data + ip_hdrlen(skb); | ||
408 | if (after(ntohl(tcph->seq), this_way->correction_pos)) | ||
409 | seqoff = this_way->offset_after; | ||
410 | else | ||
411 | seqoff = this_way->offset_before; | ||
412 | |||
413 | if (after(ntohl(tcph->ack_seq) - other_way->offset_before, | ||
414 | other_way->correction_pos)) | ||
415 | ackoff = other_way->offset_after; | ||
416 | else | ||
417 | ackoff = other_way->offset_before; | ||
418 | |||
419 | newseq = htonl(ntohl(tcph->seq) + seqoff); | ||
420 | newack = htonl(ntohl(tcph->ack_seq) - ackoff); | ||
421 | |||
422 | inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); | ||
423 | inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); | ||
424 | |||
425 | pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", | ||
426 | ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), | ||
427 | ntohl(newack)); | ||
428 | |||
429 | tcph->seq = newseq; | ||
430 | tcph->ack_seq = newack; | ||
431 | |||
432 | return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); | ||
433 | } | ||
434 | |||
435 | /* Setup NAT on this expected conntrack so it follows master. */ | ||
436 | /* If we fail to get a free NAT slot, we'll get dropped on confirm */ | ||
437 | void nf_nat_follow_master(struct nf_conn *ct, | ||
438 | struct nf_conntrack_expect *exp) | ||
439 | { | ||
440 | struct nf_nat_ipv4_range range; | ||
441 | |||
442 | /* This must be a fresh one. */ | ||
443 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | ||
444 | |||
445 | /* Change src to where master sends to */ | ||
446 | range.flags = NF_NAT_RANGE_MAP_IPS; | ||
447 | range.min_ip = range.max_ip | ||
448 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | ||
449 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); | ||
450 | |||
451 | /* For DST manip, map port here to where it's expected. */ | ||
452 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); | ||
453 | range.min = range.max = exp->saved_proto; | ||
454 | range.min_ip = range.max_ip | ||
455 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; | ||
456 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); | ||
457 | } | ||
458 | EXPORT_SYMBOL(nf_nat_follow_master); | ||
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c deleted file mode 100644 index 979ae165f4ef..000000000000 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ /dev/null | |||
@@ -1,99 +0,0 @@ | |||
1 | /* IRC extension for TCP NAT alteration. | ||
2 | * | ||
3 | * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> | ||
4 | * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation | ||
5 | * based on a copy of RR's ip_nat_ftp.c | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/moduleparam.h> | ||
15 | #include <linux/tcp.h> | ||
16 | #include <linux/kernel.h> | ||
17 | |||
18 | #include <net/netfilter/nf_nat.h> | ||
19 | #include <net/netfilter/nf_nat_helper.h> | ||
20 | #include <net/netfilter/nf_nat_rule.h> | ||
21 | #include <net/netfilter/nf_conntrack_helper.h> | ||
22 | #include <net/netfilter/nf_conntrack_expect.h> | ||
23 | #include <linux/netfilter/nf_conntrack_irc.h> | ||
24 | |||
25 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
26 | MODULE_DESCRIPTION("IRC (DCC) NAT helper"); | ||
27 | MODULE_LICENSE("GPL"); | ||
28 | MODULE_ALIAS("ip_nat_irc"); | ||
29 | |||
30 | static unsigned int help(struct sk_buff *skb, | ||
31 | enum ip_conntrack_info ctinfo, | ||
32 | unsigned int matchoff, | ||
33 | unsigned int matchlen, | ||
34 | struct nf_conntrack_expect *exp) | ||
35 | { | ||
36 | char buffer[sizeof("4294967296 65635")]; | ||
37 | u_int32_t ip; | ||
38 | u_int16_t port; | ||
39 | unsigned int ret; | ||
40 | |||
41 | /* Reply comes from server. */ | ||
42 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
43 | exp->dir = IP_CT_DIR_REPLY; | ||
44 | exp->expectfn = nf_nat_follow_master; | ||
45 | |||
46 | /* Try to get same port: if not, try to change it. */ | ||
47 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
48 | int ret; | ||
49 | |||
50 | exp->tuple.dst.u.tcp.port = htons(port); | ||
51 | ret = nf_ct_expect_related(exp); | ||
52 | if (ret == 0) | ||
53 | break; | ||
54 | else if (ret != -EBUSY) { | ||
55 | port = 0; | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | if (port == 0) | ||
61 | return NF_DROP; | ||
62 | |||
63 | ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); | ||
64 | sprintf(buffer, "%u %u", ip, port); | ||
65 | pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", | ||
66 | buffer, &ip, port); | ||
67 | |||
68 | ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, | ||
69 | matchoff, matchlen, buffer, | ||
70 | strlen(buffer)); | ||
71 | if (ret != NF_ACCEPT) | ||
72 | nf_ct_unexpect_related(exp); | ||
73 | return ret; | ||
74 | } | ||
75 | |||
76 | static void __exit nf_nat_irc_fini(void) | ||
77 | { | ||
78 | RCU_INIT_POINTER(nf_nat_irc_hook, NULL); | ||
79 | synchronize_rcu(); | ||
80 | } | ||
81 | |||
82 | static int __init nf_nat_irc_init(void) | ||
83 | { | ||
84 | BUG_ON(nf_nat_irc_hook != NULL); | ||
85 | RCU_INIT_POINTER(nf_nat_irc_hook, help); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ | ||
90 | static int warn_set(const char *val, struct kernel_param *kp) | ||
91 | { | ||
92 | printk(KERN_INFO KBUILD_MODNAME | ||
93 | ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); | ||
94 | return 0; | ||
95 | } | ||
96 | module_param_call(ports, warn_set, NULL, NULL, 0); | ||
97 | |||
98 | module_init(nf_nat_irc_init); | ||
99 | module_exit(nf_nat_irc_fini); | ||
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 000000000000..d8b2e14efddc --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | |||
@@ -0,0 +1,281 @@ | |||
1 | /* | ||
2 | * (C) 1999-2001 Paul `Rusty' Russell | ||
3 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
4 | * (C) 2011 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/types.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/icmp.h> | ||
16 | #include <linux/netfilter.h> | ||
17 | #include <linux/netfilter_ipv4.h> | ||
18 | #include <net/secure_seq.h> | ||
19 | #include <net/checksum.h> | ||
20 | #include <net/route.h> | ||
21 | #include <net/ip.h> | ||
22 | |||
23 | #include <net/netfilter/nf_conntrack_core.h> | ||
24 | #include <net/netfilter/nf_conntrack.h> | ||
25 | #include <net/netfilter/nf_nat_core.h> | ||
26 | #include <net/netfilter/nf_nat_l3proto.h> | ||
27 | #include <net/netfilter/nf_nat_l4proto.h> | ||
28 | |||
29 | static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; | ||
30 | |||
31 | #ifdef CONFIG_XFRM | ||
32 | static void nf_nat_ipv4_decode_session(struct sk_buff *skb, | ||
33 | const struct nf_conn *ct, | ||
34 | enum ip_conntrack_dir dir, | ||
35 | unsigned long statusbit, | ||
36 | struct flowi *fl) | ||
37 | { | ||
38 | const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; | ||
39 | struct flowi4 *fl4 = &fl->u.ip4; | ||
40 | |||
41 | if (ct->status & statusbit) { | ||
42 | fl4->daddr = t->dst.u3.ip; | ||
43 | if (t->dst.protonum == IPPROTO_TCP || | ||
44 | t->dst.protonum == IPPROTO_UDP || | ||
45 | t->dst.protonum == IPPROTO_UDPLITE || | ||
46 | t->dst.protonum == IPPROTO_DCCP || | ||
47 | t->dst.protonum == IPPROTO_SCTP) | ||
48 | fl4->fl4_dport = t->dst.u.all; | ||
49 | } | ||
50 | |||
51 | statusbit ^= IPS_NAT_MASK; | ||
52 | |||
53 | if (ct->status & statusbit) { | ||
54 | fl4->saddr = t->src.u3.ip; | ||
55 | if (t->dst.protonum == IPPROTO_TCP || | ||
56 | t->dst.protonum == IPPROTO_UDP || | ||
57 | t->dst.protonum == IPPROTO_UDPLITE || | ||
58 | t->dst.protonum == IPPROTO_DCCP || | ||
59 | t->dst.protonum == IPPROTO_SCTP) | ||
60 | fl4->fl4_sport = t->src.u.all; | ||
61 | } | ||
62 | } | ||
63 | #endif /* CONFIG_XFRM */ | ||
64 | |||
65 | static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, | ||
66 | const struct nf_nat_range *range) | ||
67 | { | ||
68 | return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && | ||
69 | ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); | ||
70 | } | ||
71 | |||
72 | static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, | ||
73 | __be16 dport) | ||
74 | { | ||
75 | return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); | ||
76 | } | ||
77 | |||
78 | static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, | ||
79 | unsigned int iphdroff, | ||
80 | const struct nf_nat_l4proto *l4proto, | ||
81 | const struct nf_conntrack_tuple *target, | ||
82 | enum nf_nat_manip_type maniptype) | ||
83 | { | ||
84 | struct iphdr *iph; | ||
85 | unsigned int hdroff; | ||
86 | |||
87 | if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) | ||
88 | return false; | ||
89 | |||
90 | iph = (void *)skb->data + iphdroff; | ||
91 | hdroff = iphdroff + iph->ihl * 4; | ||
92 | |||
93 | if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, | ||
94 | target, maniptype)) | ||
95 | return false; | ||
96 | iph = (void *)skb->data + iphdroff; | ||
97 | |||
98 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
99 | csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); | ||
100 | iph->saddr = target->src.u3.ip; | ||
101 | } else { | ||
102 | csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); | ||
103 | iph->daddr = target->dst.u3.ip; | ||
104 | } | ||
105 | return true; | ||
106 | } | ||
107 | |||
108 | static void nf_nat_ipv4_csum_update(struct sk_buff *skb, | ||
109 | unsigned int iphdroff, __sum16 *check, | ||
110 | const struct nf_conntrack_tuple *t, | ||
111 | enum nf_nat_manip_type maniptype) | ||
112 | { | ||
113 | struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
114 | __be32 oldip, newip; | ||
115 | |||
116 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
117 | oldip = iph->saddr; | ||
118 | newip = t->src.u3.ip; | ||
119 | } else { | ||
120 | oldip = iph->daddr; | ||
121 | newip = t->dst.u3.ip; | ||
122 | } | ||
123 | inet_proto_csum_replace4(check, skb, oldip, newip, 1); | ||
124 | } | ||
125 | |||
126 | static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, | ||
127 | u8 proto, void *data, __sum16 *check, | ||
128 | int datalen, int oldlen) | ||
129 | { | ||
130 | const struct iphdr *iph = ip_hdr(skb); | ||
131 | struct rtable *rt = skb_rtable(skb); | ||
132 | |||
133 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||
134 | if (!(rt->rt_flags & RTCF_LOCAL) && | ||
135 | (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { | ||
136 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
137 | skb->csum_start = skb_headroom(skb) + | ||
138 | skb_network_offset(skb) + | ||
139 | ip_hdrlen(skb); | ||
140 | skb->csum_offset = (void *)check - data; | ||
141 | *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
142 | datalen, proto, 0); | ||
143 | } else { | ||
144 | *check = 0; | ||
145 | *check = csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
146 | datalen, proto, | ||
147 | csum_partial(data, datalen, | ||
148 | 0)); | ||
149 | if (proto == IPPROTO_UDP && !*check) | ||
150 | *check = CSUM_MANGLED_0; | ||
151 | } | ||
152 | } else | ||
153 | inet_proto_csum_replace2(check, skb, | ||
154 | htons(oldlen), htons(datalen), 1); | ||
155 | } | ||
156 | |||
157 | static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], | ||
158 | struct nf_nat_range *range) | ||
159 | { | ||
160 | if (tb[CTA_NAT_V4_MINIP]) { | ||
161 | range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); | ||
162 | range->flags |= NF_NAT_RANGE_MAP_IPS; | ||
163 | } | ||
164 | |||
165 | if (tb[CTA_NAT_V4_MAXIP]) | ||
166 | range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); | ||
167 | else | ||
168 | range->max_addr.ip = range->min_addr.ip; | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { | ||
174 | .l3proto = NFPROTO_IPV4, | ||
175 | .in_range = nf_nat_ipv4_in_range, | ||
176 | .secure_port = nf_nat_ipv4_secure_port, | ||
177 | .manip_pkt = nf_nat_ipv4_manip_pkt, | ||
178 | .csum_update = nf_nat_ipv4_csum_update, | ||
179 | .csum_recalc = nf_nat_ipv4_csum_recalc, | ||
180 | .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, | ||
181 | #ifdef CONFIG_XFRM | ||
182 | .decode_session = nf_nat_ipv4_decode_session, | ||
183 | #endif | ||
184 | }; | ||
185 | |||
186 | int nf_nat_icmp_reply_translation(struct sk_buff *skb, | ||
187 | struct nf_conn *ct, | ||
188 | enum ip_conntrack_info ctinfo, | ||
189 | unsigned int hooknum) | ||
190 | { | ||
191 | struct { | ||
192 | struct icmphdr icmp; | ||
193 | struct iphdr ip; | ||
194 | } *inside; | ||
195 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
196 | enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); | ||
197 | unsigned int hdrlen = ip_hdrlen(skb); | ||
198 | const struct nf_nat_l4proto *l4proto; | ||
199 | struct nf_conntrack_tuple target; | ||
200 | unsigned long statusbit; | ||
201 | |||
202 | NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); | ||
203 | |||
204 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) | ||
205 | return 0; | ||
206 | if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) | ||
207 | return 0; | ||
208 | |||
209 | inside = (void *)skb->data + hdrlen; | ||
210 | if (inside->icmp.type == ICMP_REDIRECT) { | ||
211 | if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) | ||
212 | return 0; | ||
213 | if (ct->status & IPS_NAT_MASK) | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | if (manip == NF_NAT_MANIP_SRC) | ||
218 | statusbit = IPS_SRC_NAT; | ||
219 | else | ||
220 | statusbit = IPS_DST_NAT; | ||
221 | |||
222 | /* Invert if this is reply direction */ | ||
223 | if (dir == IP_CT_DIR_REPLY) | ||
224 | statusbit ^= IPS_NAT_MASK; | ||
225 | |||
226 | if (!(ct->status & statusbit)) | ||
227 | return 1; | ||
228 | |||
229 | l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); | ||
230 | if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), | ||
231 | l4proto, &ct->tuplehash[!dir].tuple, !manip)) | ||
232 | return 0; | ||
233 | |||
234 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||
235 | /* Reloading "inside" here since manip_pkt may reallocate */ | ||
236 | inside = (void *)skb->data + hdrlen; | ||
237 | inside->icmp.checksum = 0; | ||
238 | inside->icmp.checksum = | ||
239 | csum_fold(skb_checksum(skb, hdrlen, | ||
240 | skb->len - hdrlen, 0)); | ||
241 | } | ||
242 | |||
243 | /* Change outer to look like the reply to an incoming packet */ | ||
244 | nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
245 | l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); | ||
246 | if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) | ||
247 | return 0; | ||
248 | |||
249 | return 1; | ||
250 | } | ||
251 | EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); | ||
252 | |||
253 | static int __init nf_nat_l3proto_ipv4_init(void) | ||
254 | { | ||
255 | int err; | ||
256 | |||
257 | err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); | ||
258 | if (err < 0) | ||
259 | goto err1; | ||
260 | err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); | ||
261 | if (err < 0) | ||
262 | goto err2; | ||
263 | return err; | ||
264 | |||
265 | err2: | ||
266 | nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); | ||
267 | err1: | ||
268 | return err; | ||
269 | } | ||
270 | |||
271 | static void __exit nf_nat_l3proto_ipv4_exit(void) | ||
272 | { | ||
273 | nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); | ||
274 | nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); | ||
275 | } | ||
276 | |||
277 | MODULE_LICENSE("GPL"); | ||
278 | MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); | ||
279 | |||
280 | module_init(nf_nat_l3proto_ipv4_init); | ||
281 | module_exit(nf_nat_l3proto_ipv4_exit); | ||
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 388140881ebe..a06d7d74817d 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | #include <net/netfilter/nf_nat.h> | 23 | #include <net/netfilter/nf_nat.h> |
24 | #include <net/netfilter/nf_nat_helper.h> | 24 | #include <net/netfilter/nf_nat_helper.h> |
25 | #include <net/netfilter/nf_nat_rule.h> | ||
26 | #include <net/netfilter/nf_conntrack_helper.h> | 25 | #include <net/netfilter/nf_conntrack_helper.h> |
27 | #include <net/netfilter/nf_conntrack_expect.h> | 26 | #include <net/netfilter/nf_conntrack_expect.h> |
28 | #include <net/netfilter/nf_conntrack_zones.h> | 27 | #include <net/netfilter/nf_conntrack_zones.h> |
@@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
47 | struct nf_conntrack_tuple t; | 46 | struct nf_conntrack_tuple t; |
48 | const struct nf_ct_pptp_master *ct_pptp_info; | 47 | const struct nf_ct_pptp_master *ct_pptp_info; |
49 | const struct nf_nat_pptp *nat_pptp_info; | 48 | const struct nf_nat_pptp *nat_pptp_info; |
50 | struct nf_nat_ipv4_range range; | 49 | struct nf_nat_range range; |
51 | 50 | ||
52 | ct_pptp_info = nfct_help_data(master); | 51 | ct_pptp_info = nfct_help_data(master); |
53 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; | 52 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; |
@@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
89 | 88 | ||
90 | /* Change src to where master sends to */ | 89 | /* Change src to where master sends to */ |
91 | range.flags = NF_NAT_RANGE_MAP_IPS; | 90 | range.flags = NF_NAT_RANGE_MAP_IPS; |
92 | range.min_ip = range.max_ip | 91 | range.min_addr = range.max_addr |
93 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | 92 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3; |
94 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | 93 | if (exp->dir == IP_CT_DIR_ORIGINAL) { |
95 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | 94 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
96 | range.min = range.max = exp->saved_proto; | 95 | range.min_proto = range.max_proto = exp->saved_proto; |
97 | } | 96 | } |
98 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); | 97 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); |
99 | 98 | ||
100 | /* For DST manip, map port here to where it's expected. */ | 99 | /* For DST manip, map port here to where it's expected. */ |
101 | range.flags = NF_NAT_RANGE_MAP_IPS; | 100 | range.flags = NF_NAT_RANGE_MAP_IPS; |
102 | range.min_ip = range.max_ip | 101 | range.min_addr = range.max_addr |
103 | = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; | 102 | = ct->master->tuplehash[!exp->dir].tuple.src.u3; |
104 | if (exp->dir == IP_CT_DIR_REPLY) { | 103 | if (exp->dir == IP_CT_DIR_REPLY) { |
105 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | 104 | range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; |
106 | range.min = range.max = exp->saved_proto; | 105 | range.min_proto = range.max_proto = exp->saved_proto; |
107 | } | 106 | } |
108 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); | 107 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); |
109 | } | 108 | } |
@@ -113,6 +112,7 @@ static int | |||
113 | pptp_outbound_pkt(struct sk_buff *skb, | 112 | pptp_outbound_pkt(struct sk_buff *skb, |
114 | struct nf_conn *ct, | 113 | struct nf_conn *ct, |
115 | enum ip_conntrack_info ctinfo, | 114 | enum ip_conntrack_info ctinfo, |
115 | unsigned int protoff, | ||
116 | struct PptpControlHeader *ctlh, | 116 | struct PptpControlHeader *ctlh, |
117 | union pptp_ctrl_union *pptpReq) | 117 | union pptp_ctrl_union *pptpReq) |
118 | 118 | ||
@@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff *skb, | |||
175 | ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); | 175 | ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); |
176 | 176 | ||
177 | /* mangle packet */ | 177 | /* mangle packet */ |
178 | if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 178 | if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, |
179 | cid_off + sizeof(struct pptp_pkt_hdr) + | 179 | cid_off + sizeof(struct pptp_pkt_hdr) + |
180 | sizeof(struct PptpControlHeader), | 180 | sizeof(struct PptpControlHeader), |
181 | sizeof(new_callid), (char *)&new_callid, | 181 | sizeof(new_callid), (char *)&new_callid, |
@@ -216,6 +216,7 @@ static int | |||
216 | pptp_inbound_pkt(struct sk_buff *skb, | 216 | pptp_inbound_pkt(struct sk_buff *skb, |
217 | struct nf_conn *ct, | 217 | struct nf_conn *ct, |
218 | enum ip_conntrack_info ctinfo, | 218 | enum ip_conntrack_info ctinfo, |
219 | unsigned int protoff, | ||
219 | struct PptpControlHeader *ctlh, | 220 | struct PptpControlHeader *ctlh, |
220 | union pptp_ctrl_union *pptpReq) | 221 | union pptp_ctrl_union *pptpReq) |
221 | { | 222 | { |
@@ -268,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb, | |||
268 | pr_debug("altering peer call id from 0x%04x to 0x%04x\n", | 269 | pr_debug("altering peer call id from 0x%04x to 0x%04x\n", |
269 | ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); | 270 | ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); |
270 | 271 | ||
271 | if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | 272 | if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, |
272 | pcid_off + sizeof(struct pptp_pkt_hdr) + | 273 | pcid_off + sizeof(struct pptp_pkt_hdr) + |
273 | sizeof(struct PptpControlHeader), | 274 | sizeof(struct PptpControlHeader), |
274 | sizeof(new_pcid), (char *)&new_pcid, | 275 | sizeof(new_pcid), (char *)&new_pcid, |
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c deleted file mode 100644 index 9993bc93e102..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ /dev/null | |||
@@ -1,114 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * (C) 2008 Patrick McHardy <kaber@trash.net> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/random.h> | ||
12 | #include <linux/ip.h> | ||
13 | |||
14 | #include <linux/netfilter.h> | ||
15 | #include <linux/export.h> | ||
16 | #include <net/secure_seq.h> | ||
17 | #include <net/netfilter/nf_nat.h> | ||
18 | #include <net/netfilter/nf_nat_core.h> | ||
19 | #include <net/netfilter/nf_nat_rule.h> | ||
20 | #include <net/netfilter/nf_nat_protocol.h> | ||
21 | |||
22 | bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | ||
23 | enum nf_nat_manip_type maniptype, | ||
24 | const union nf_conntrack_man_proto *min, | ||
25 | const union nf_conntrack_man_proto *max) | ||
26 | { | ||
27 | __be16 port; | ||
28 | |||
29 | if (maniptype == NF_NAT_MANIP_SRC) | ||
30 | port = tuple->src.u.all; | ||
31 | else | ||
32 | port = tuple->dst.u.all; | ||
33 | |||
34 | return ntohs(port) >= ntohs(min->all) && | ||
35 | ntohs(port) <= ntohs(max->all); | ||
36 | } | ||
37 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); | ||
38 | |||
39 | void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
40 | const struct nf_nat_ipv4_range *range, | ||
41 | enum nf_nat_manip_type maniptype, | ||
42 | const struct nf_conn *ct, | ||
43 | u_int16_t *rover) | ||
44 | { | ||
45 | unsigned int range_size, min, i; | ||
46 | __be16 *portptr; | ||
47 | u_int16_t off; | ||
48 | |||
49 | if (maniptype == NF_NAT_MANIP_SRC) | ||
50 | portptr = &tuple->src.u.all; | ||
51 | else | ||
52 | portptr = &tuple->dst.u.all; | ||
53 | |||
54 | /* If no range specified... */ | ||
55 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { | ||
56 | /* If it's dst rewrite, can't change port */ | ||
57 | if (maniptype == NF_NAT_MANIP_DST) | ||
58 | return; | ||
59 | |||
60 | if (ntohs(*portptr) < 1024) { | ||
61 | /* Loose convention: >> 512 is credential passing */ | ||
62 | if (ntohs(*portptr) < 512) { | ||
63 | min = 1; | ||
64 | range_size = 511 - min + 1; | ||
65 | } else { | ||
66 | min = 600; | ||
67 | range_size = 1023 - min + 1; | ||
68 | } | ||
69 | } else { | ||
70 | min = 1024; | ||
71 | range_size = 65535 - 1024 + 1; | ||
72 | } | ||
73 | } else { | ||
74 | min = ntohs(range->min.all); | ||
75 | range_size = ntohs(range->max.all) - min + 1; | ||
76 | } | ||
77 | |||
78 | if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) | ||
79 | off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, | ||
80 | maniptype == NF_NAT_MANIP_SRC | ||
81 | ? tuple->dst.u.all | ||
82 | : tuple->src.u.all); | ||
83 | else | ||
84 | off = *rover; | ||
85 | |||
86 | for (i = 0; ; ++off) { | ||
87 | *portptr = htons(min + off % range_size); | ||
88 | if (++i != range_size && nf_nat_used_tuple(tuple, ct)) | ||
89 | continue; | ||
90 | if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) | ||
91 | *rover = off; | ||
92 | return; | ||
93 | } | ||
94 | return; | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); | ||
97 | |||
98 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
99 | int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], | ||
100 | struct nf_nat_ipv4_range *range) | ||
101 | { | ||
102 | if (tb[CTA_PROTONAT_PORT_MIN]) { | ||
103 | range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); | ||
104 | range->max.all = range->min.tcp.port; | ||
105 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
106 | } | ||
107 | if (tb[CTA_PROTONAT_PORT_MAX]) { | ||
108 | range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); | ||
109 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
110 | } | ||
111 | return 0; | ||
112 | } | ||
113 | EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); | ||
114 | #endif | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c deleted file mode 100644 index 3f67138d187c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ /dev/null | |||
@@ -1,106 +0,0 @@ | |||
1 | /* | ||
2 | * DCCP NAT protocol helper | ||
3 | * | ||
4 | * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <linux/dccp.h> | ||
18 | |||
19 | #include <net/netfilter/nf_conntrack.h> | ||
20 | #include <net/netfilter/nf_nat.h> | ||
21 | #include <net/netfilter/nf_nat_protocol.h> | ||
22 | |||
23 | static u_int16_t dccp_port_rover; | ||
24 | |||
25 | static void | ||
26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
27 | const struct nf_nat_ipv4_range *range, | ||
28 | enum nf_nat_manip_type maniptype, | ||
29 | const struct nf_conn *ct) | ||
30 | { | ||
31 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
32 | &dccp_port_rover); | ||
33 | } | ||
34 | |||
35 | static bool | ||
36 | dccp_manip_pkt(struct sk_buff *skb, | ||
37 | unsigned int iphdroff, | ||
38 | const struct nf_conntrack_tuple *tuple, | ||
39 | enum nf_nat_manip_type maniptype) | ||
40 | { | ||
41 | const struct iphdr *iph = (const void *)(skb->data + iphdroff); | ||
42 | struct dccp_hdr *hdr; | ||
43 | unsigned int hdroff = iphdroff + iph->ihl * 4; | ||
44 | __be32 oldip, newip; | ||
45 | __be16 *portptr, oldport, newport; | ||
46 | int hdrsize = 8; /* DCCP connection tracking guarantees this much */ | ||
47 | |||
48 | if (skb->len >= hdroff + sizeof(struct dccp_hdr)) | ||
49 | hdrsize = sizeof(struct dccp_hdr); | ||
50 | |||
51 | if (!skb_make_writable(skb, hdroff + hdrsize)) | ||
52 | return false; | ||
53 | |||
54 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
55 | hdr = (struct dccp_hdr *)(skb->data + hdroff); | ||
56 | |||
57 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
58 | oldip = iph->saddr; | ||
59 | newip = tuple->src.u3.ip; | ||
60 | newport = tuple->src.u.dccp.port; | ||
61 | portptr = &hdr->dccph_sport; | ||
62 | } else { | ||
63 | oldip = iph->daddr; | ||
64 | newip = tuple->dst.u3.ip; | ||
65 | newport = tuple->dst.u.dccp.port; | ||
66 | portptr = &hdr->dccph_dport; | ||
67 | } | ||
68 | |||
69 | oldport = *portptr; | ||
70 | *portptr = newport; | ||
71 | |||
72 | if (hdrsize < sizeof(*hdr)) | ||
73 | return true; | ||
74 | |||
75 | inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1); | ||
76 | inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, | ||
77 | 0); | ||
78 | return true; | ||
79 | } | ||
80 | |||
81 | static const struct nf_nat_protocol nf_nat_protocol_dccp = { | ||
82 | .protonum = IPPROTO_DCCP, | ||
83 | .manip_pkt = dccp_manip_pkt, | ||
84 | .in_range = nf_nat_proto_in_range, | ||
85 | .unique_tuple = dccp_unique_tuple, | ||
86 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
87 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
88 | #endif | ||
89 | }; | ||
90 | |||
91 | static int __init nf_nat_proto_dccp_init(void) | ||
92 | { | ||
93 | return nf_nat_protocol_register(&nf_nat_protocol_dccp); | ||
94 | } | ||
95 | |||
96 | static void __exit nf_nat_proto_dccp_fini(void) | ||
97 | { | ||
98 | nf_nat_protocol_unregister(&nf_nat_protocol_dccp); | ||
99 | } | ||
100 | |||
101 | module_init(nf_nat_proto_dccp_init); | ||
102 | module_exit(nf_nat_proto_dccp_fini); | ||
103 | |||
104 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
105 | MODULE_DESCRIPTION("DCCP NAT protocol helper"); | ||
106 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 46ba0b9ab985..ea44f02563b5 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
@@ -28,8 +28,7 @@ | |||
28 | #include <linux/ip.h> | 28 | #include <linux/ip.h> |
29 | 29 | ||
30 | #include <net/netfilter/nf_nat.h> | 30 | #include <net/netfilter/nf_nat.h> |
31 | #include <net/netfilter/nf_nat_rule.h> | 31 | #include <net/netfilter/nf_nat_l4proto.h> |
32 | #include <net/netfilter/nf_nat_protocol.h> | ||
33 | #include <linux/netfilter/nf_conntrack_proto_gre.h> | 32 | #include <linux/netfilter/nf_conntrack_proto_gre.h> |
34 | 33 | ||
35 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
@@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | |||
38 | 37 | ||
39 | /* generate unique tuple ... */ | 38 | /* generate unique tuple ... */ |
40 | static void | 39 | static void |
41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 40 | gre_unique_tuple(const struct nf_nat_l3proto *l3proto, |
42 | const struct nf_nat_ipv4_range *range, | 41 | struct nf_conntrack_tuple *tuple, |
42 | const struct nf_nat_range *range, | ||
43 | enum nf_nat_manip_type maniptype, | 43 | enum nf_nat_manip_type maniptype, |
44 | const struct nf_conn *ct) | 44 | const struct nf_conn *ct) |
45 | { | 45 | { |
@@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
62 | min = 1; | 62 | min = 1; |
63 | range_size = 0xffff; | 63 | range_size = 0xffff; |
64 | } else { | 64 | } else { |
65 | min = ntohs(range->min.gre.key); | 65 | min = ntohs(range->min_proto.gre.key); |
66 | range_size = ntohs(range->max.gre.key) - min + 1; | 66 | range_size = ntohs(range->max_proto.gre.key) - min + 1; |
67 | } | 67 | } |
68 | 68 | ||
69 | pr_debug("min = %u, range_size = %u\n", min, range_size); | 69 | pr_debug("min = %u, range_size = %u\n", min, range_size); |
@@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
80 | 80 | ||
81 | /* manipulate a GRE packet according to maniptype */ | 81 | /* manipulate a GRE packet according to maniptype */ |
82 | static bool | 82 | static bool |
83 | gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | 83 | gre_manip_pkt(struct sk_buff *skb, |
84 | const struct nf_nat_l3proto *l3proto, | ||
85 | unsigned int iphdroff, unsigned int hdroff, | ||
84 | const struct nf_conntrack_tuple *tuple, | 86 | const struct nf_conntrack_tuple *tuple, |
85 | enum nf_nat_manip_type maniptype) | 87 | enum nf_nat_manip_type maniptype) |
86 | { | 88 | { |
87 | const struct gre_hdr *greh; | 89 | const struct gre_hdr *greh; |
88 | struct gre_hdr_pptp *pgreh; | 90 | struct gre_hdr_pptp *pgreh; |
89 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
90 | unsigned int hdroff = iphdroff + iph->ihl * 4; | ||
91 | 91 | ||
92 | /* pgreh includes two optional 32bit fields which are not required | 92 | /* pgreh includes two optional 32bit fields which are not required |
93 | * to be there. That's where the magic '8' comes from */ | 93 | * to be there. That's where the magic '8' comes from */ |
@@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
117 | return true; | 117 | return true; |
118 | } | 118 | } |
119 | 119 | ||
120 | static const struct nf_nat_protocol gre = { | 120 | static const struct nf_nat_l4proto gre = { |
121 | .protonum = IPPROTO_GRE, | 121 | .l4proto = IPPROTO_GRE, |
122 | .manip_pkt = gre_manip_pkt, | 122 | .manip_pkt = gre_manip_pkt, |
123 | .in_range = nf_nat_proto_in_range, | 123 | .in_range = nf_nat_l4proto_in_range, |
124 | .unique_tuple = gre_unique_tuple, | 124 | .unique_tuple = gre_unique_tuple, |
125 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 125 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
126 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 126 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, |
127 | #endif | 127 | #endif |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static int __init nf_nat_proto_gre_init(void) | 130 | static int __init nf_nat_proto_gre_init(void) |
131 | { | 131 | { |
132 | return nf_nat_protocol_register(&gre); | 132 | return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); |
133 | } | 133 | } |
134 | 134 | ||
135 | static void __exit nf_nat_proto_gre_fini(void) | 135 | static void __exit nf_nat_proto_gre_fini(void) |
136 | { | 136 | { |
137 | nf_nat_protocol_unregister(&gre); | 137 | nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); |
138 | } | 138 | } |
139 | 139 | ||
140 | module_init(nf_nat_proto_gre_init); | 140 | module_init(nf_nat_proto_gre_init); |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index b35172851bae..eb303471bcf6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
@@ -15,8 +15,7 @@ | |||
15 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
16 | #include <net/netfilter/nf_nat.h> | 16 | #include <net/netfilter/nf_nat.h> |
17 | #include <net/netfilter/nf_nat_core.h> | 17 | #include <net/netfilter/nf_nat_core.h> |
18 | #include <net/netfilter/nf_nat_rule.h> | 18 | #include <net/netfilter/nf_nat_l4proto.h> |
19 | #include <net/netfilter/nf_nat_protocol.h> | ||
20 | 19 | ||
21 | static bool | 20 | static bool |
22 | icmp_in_range(const struct nf_conntrack_tuple *tuple, | 21 | icmp_in_range(const struct nf_conntrack_tuple *tuple, |
@@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, | |||
29 | } | 28 | } |
30 | 29 | ||
31 | static void | 30 | static void |
32 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | 31 | icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, |
33 | const struct nf_nat_ipv4_range *range, | 32 | struct nf_conntrack_tuple *tuple, |
33 | const struct nf_nat_range *range, | ||
34 | enum nf_nat_manip_type maniptype, | 34 | enum nf_nat_manip_type maniptype, |
35 | const struct nf_conn *ct) | 35 | const struct nf_conn *ct) |
36 | { | 36 | { |
@@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
38 | unsigned int range_size; | 38 | unsigned int range_size; |
39 | unsigned int i; | 39 | unsigned int i; |
40 | 40 | ||
41 | range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; | 41 | range_size = ntohs(range->max_proto.icmp.id) - |
42 | ntohs(range->min_proto.icmp.id) + 1; | ||
42 | /* If no range specified... */ | 43 | /* If no range specified... */ |
43 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) | 44 | if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) |
44 | range_size = 0xFFFF; | 45 | range_size = 0xFFFF; |
45 | 46 | ||
46 | for (i = 0; ; ++id) { | 47 | for (i = 0; ; ++id) { |
47 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + | 48 | tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + |
48 | (id % range_size)); | 49 | (id % range_size)); |
49 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) | 50 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) |
50 | return; | 51 | return; |
@@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
54 | 55 | ||
55 | static bool | 56 | static bool |
56 | icmp_manip_pkt(struct sk_buff *skb, | 57 | icmp_manip_pkt(struct sk_buff *skb, |
57 | unsigned int iphdroff, | 58 | const struct nf_nat_l3proto *l3proto, |
59 | unsigned int iphdroff, unsigned int hdroff, | ||
58 | const struct nf_conntrack_tuple *tuple, | 60 | const struct nf_conntrack_tuple *tuple, |
59 | enum nf_nat_manip_type maniptype) | 61 | enum nf_nat_manip_type maniptype) |
60 | { | 62 | { |
61 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
62 | struct icmphdr *hdr; | 63 | struct icmphdr *hdr; |
63 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
64 | 64 | ||
65 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | 65 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) |
66 | return false; | 66 | return false; |
@@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb, | |||
72 | return true; | 72 | return true; |
73 | } | 73 | } |
74 | 74 | ||
75 | const struct nf_nat_protocol nf_nat_protocol_icmp = { | 75 | const struct nf_nat_l4proto nf_nat_l4proto_icmp = { |
76 | .protonum = IPPROTO_ICMP, | 76 | .l4proto = IPPROTO_ICMP, |
77 | .manip_pkt = icmp_manip_pkt, | 77 | .manip_pkt = icmp_manip_pkt, |
78 | .in_range = icmp_in_range, | 78 | .in_range = icmp_in_range, |
79 | .unique_tuple = icmp_unique_tuple, | 79 | .unique_tuple = icmp_unique_tuple, |
80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
81 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | 81 | .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, |
82 | #endif | 82 | #endif |
83 | }; | 83 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c deleted file mode 100644 index 3cce9b6c1c29..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ /dev/null | |||
@@ -1,96 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/ip.h> | ||
12 | #include <linux/sctp.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <net/sctp/checksum.h> | ||
15 | |||
16 | #include <net/netfilter/nf_nat_protocol.h> | ||
17 | |||
18 | static u_int16_t nf_sctp_port_rover; | ||
19 | |||
20 | static void | ||
21 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
22 | const struct nf_nat_ipv4_range *range, | ||
23 | enum nf_nat_manip_type maniptype, | ||
24 | const struct nf_conn *ct) | ||
25 | { | ||
26 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
27 | &nf_sctp_port_rover); | ||
28 | } | ||
29 | |||
30 | static bool | ||
31 | sctp_manip_pkt(struct sk_buff *skb, | ||
32 | unsigned int iphdroff, | ||
33 | const struct nf_conntrack_tuple *tuple, | ||
34 | enum nf_nat_manip_type maniptype) | ||
35 | { | ||
36 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
37 | struct sk_buff *frag; | ||
38 | sctp_sctphdr_t *hdr; | ||
39 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
40 | __be32 oldip, newip; | ||
41 | __be32 crc32; | ||
42 | |||
43 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | ||
44 | return false; | ||
45 | |||
46 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
47 | hdr = (struct sctphdr *)(skb->data + hdroff); | ||
48 | |||
49 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
50 | /* Get rid of src ip and src pt */ | ||
51 | oldip = iph->saddr; | ||
52 | newip = tuple->src.u3.ip; | ||
53 | hdr->source = tuple->src.u.sctp.port; | ||
54 | } else { | ||
55 | /* Get rid of dst ip and dst pt */ | ||
56 | oldip = iph->daddr; | ||
57 | newip = tuple->dst.u3.ip; | ||
58 | hdr->dest = tuple->dst.u.sctp.port; | ||
59 | } | ||
60 | |||
61 | crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); | ||
62 | skb_walk_frags(skb, frag) | ||
63 | crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), | ||
64 | crc32); | ||
65 | crc32 = sctp_end_cksum(crc32); | ||
66 | hdr->checksum = crc32; | ||
67 | |||
68 | return true; | ||
69 | } | ||
70 | |||
71 | static const struct nf_nat_protocol nf_nat_protocol_sctp = { | ||
72 | .protonum = IPPROTO_SCTP, | ||
73 | .manip_pkt = sctp_manip_pkt, | ||
74 | .in_range = nf_nat_proto_in_range, | ||
75 | .unique_tuple = sctp_unique_tuple, | ||
76 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
77 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
78 | #endif | ||
79 | }; | ||
80 | |||
81 | static int __init nf_nat_proto_sctp_init(void) | ||
82 | { | ||
83 | return nf_nat_protocol_register(&nf_nat_protocol_sctp); | ||
84 | } | ||
85 | |||
86 | static void __exit nf_nat_proto_sctp_exit(void) | ||
87 | { | ||
88 | nf_nat_protocol_unregister(&nf_nat_protocol_sctp); | ||
89 | } | ||
90 | |||
91 | module_init(nf_nat_proto_sctp_init); | ||
92 | module_exit(nf_nat_proto_sctp_exit); | ||
93 | |||
94 | MODULE_LICENSE("GPL"); | ||
95 | MODULE_DESCRIPTION("SCTP NAT protocol helper"); | ||
96 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c deleted file mode 100644 index 9fb4b4e72bbf..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ /dev/null | |||
@@ -1,91 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/export.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/tcp.h> | ||
14 | |||
15 | #include <linux/netfilter.h> | ||
16 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
17 | #include <net/netfilter/nf_nat.h> | ||
18 | #include <net/netfilter/nf_nat_rule.h> | ||
19 | #include <net/netfilter/nf_nat_protocol.h> | ||
20 | #include <net/netfilter/nf_nat_core.h> | ||
21 | |||
22 | static u_int16_t tcp_port_rover; | ||
23 | |||
24 | static void | ||
25 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
26 | const struct nf_nat_ipv4_range *range, | ||
27 | enum nf_nat_manip_type maniptype, | ||
28 | const struct nf_conn *ct) | ||
29 | { | ||
30 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); | ||
31 | } | ||
32 | |||
33 | static bool | ||
34 | tcp_manip_pkt(struct sk_buff *skb, | ||
35 | unsigned int iphdroff, | ||
36 | const struct nf_conntrack_tuple *tuple, | ||
37 | enum nf_nat_manip_type maniptype) | ||
38 | { | ||
39 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
40 | struct tcphdr *hdr; | ||
41 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
42 | __be32 oldip, newip; | ||
43 | __be16 *portptr, newport, oldport; | ||
44 | int hdrsize = 8; /* TCP connection tracking guarantees this much */ | ||
45 | |||
46 | /* this could be a inner header returned in icmp packet; in such | ||
47 | cases we cannot update the checksum field since it is outside of | ||
48 | the 8 bytes of transport layer headers we are guaranteed */ | ||
49 | if (skb->len >= hdroff + sizeof(struct tcphdr)) | ||
50 | hdrsize = sizeof(struct tcphdr); | ||
51 | |||
52 | if (!skb_make_writable(skb, hdroff + hdrsize)) | ||
53 | return false; | ||
54 | |||
55 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
56 | hdr = (struct tcphdr *)(skb->data + hdroff); | ||
57 | |||
58 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
59 | /* Get rid of src ip and src pt */ | ||
60 | oldip = iph->saddr; | ||
61 | newip = tuple->src.u3.ip; | ||
62 | newport = tuple->src.u.tcp.port; | ||
63 | portptr = &hdr->source; | ||
64 | } else { | ||
65 | /* Get rid of dst ip and dst pt */ | ||
66 | oldip = iph->daddr; | ||
67 | newip = tuple->dst.u3.ip; | ||
68 | newport = tuple->dst.u.tcp.port; | ||
69 | portptr = &hdr->dest; | ||
70 | } | ||
71 | |||
72 | oldport = *portptr; | ||
73 | *portptr = newport; | ||
74 | |||
75 | if (hdrsize < sizeof(*hdr)) | ||
76 | return true; | ||
77 | |||
78 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
79 | inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); | ||
80 | return true; | ||
81 | } | ||
82 | |||
83 | const struct nf_nat_protocol nf_nat_protocol_tcp = { | ||
84 | .protonum = IPPROTO_TCP, | ||
85 | .manip_pkt = tcp_manip_pkt, | ||
86 | .in_range = nf_nat_proto_in_range, | ||
87 | .unique_tuple = tcp_unique_tuple, | ||
88 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
89 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
90 | #endif | ||
91 | }; | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c deleted file mode 100644 index 9883336e628f..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ /dev/null | |||
@@ -1,82 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/export.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/udp.h> | ||
14 | |||
15 | #include <linux/netfilter.h> | ||
16 | #include <net/netfilter/nf_nat.h> | ||
17 | #include <net/netfilter/nf_nat_core.h> | ||
18 | #include <net/netfilter/nf_nat_rule.h> | ||
19 | #include <net/netfilter/nf_nat_protocol.h> | ||
20 | |||
21 | static u_int16_t udp_port_rover; | ||
22 | |||
23 | static void | ||
24 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
25 | const struct nf_nat_ipv4_range *range, | ||
26 | enum nf_nat_manip_type maniptype, | ||
27 | const struct nf_conn *ct) | ||
28 | { | ||
29 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); | ||
30 | } | ||
31 | |||
32 | static bool | ||
33 | udp_manip_pkt(struct sk_buff *skb, | ||
34 | unsigned int iphdroff, | ||
35 | const struct nf_conntrack_tuple *tuple, | ||
36 | enum nf_nat_manip_type maniptype) | ||
37 | { | ||
38 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
39 | struct udphdr *hdr; | ||
40 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
41 | __be32 oldip, newip; | ||
42 | __be16 *portptr, newport; | ||
43 | |||
44 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | ||
45 | return false; | ||
46 | |||
47 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
48 | hdr = (struct udphdr *)(skb->data + hdroff); | ||
49 | |||
50 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
51 | /* Get rid of src ip and src pt */ | ||
52 | oldip = iph->saddr; | ||
53 | newip = tuple->src.u3.ip; | ||
54 | newport = tuple->src.u.udp.port; | ||
55 | portptr = &hdr->source; | ||
56 | } else { | ||
57 | /* Get rid of dst ip and dst pt */ | ||
58 | oldip = iph->daddr; | ||
59 | newip = tuple->dst.u3.ip; | ||
60 | newport = tuple->dst.u.udp.port; | ||
61 | portptr = &hdr->dest; | ||
62 | } | ||
63 | if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { | ||
64 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
65 | inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, | ||
66 | 0); | ||
67 | if (!hdr->check) | ||
68 | hdr->check = CSUM_MANGLED_0; | ||
69 | } | ||
70 | *portptr = newport; | ||
71 | return true; | ||
72 | } | ||
73 | |||
74 | const struct nf_nat_protocol nf_nat_protocol_udp = { | ||
75 | .protonum = IPPROTO_UDP, | ||
76 | .manip_pkt = udp_manip_pkt, | ||
77 | .in_range = nf_nat_proto_in_range, | ||
78 | .unique_tuple = udp_unique_tuple, | ||
79 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
80 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
81 | #endif | ||
82 | }; | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c deleted file mode 100644 index d24d10a7beb2..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ /dev/null | |||
@@ -1,98 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * (C) 2008 Patrick McHardy <kaber@trash.net> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/udp.h> | ||
14 | |||
15 | #include <linux/netfilter.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <net/netfilter/nf_nat.h> | ||
18 | #include <net/netfilter/nf_nat_protocol.h> | ||
19 | |||
20 | static u_int16_t udplite_port_rover; | ||
21 | |||
22 | static void | ||
23 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
24 | const struct nf_nat_ipv4_range *range, | ||
25 | enum nf_nat_manip_type maniptype, | ||
26 | const struct nf_conn *ct) | ||
27 | { | ||
28 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
29 | &udplite_port_rover); | ||
30 | } | ||
31 | |||
32 | static bool | ||
33 | udplite_manip_pkt(struct sk_buff *skb, | ||
34 | unsigned int iphdroff, | ||
35 | const struct nf_conntrack_tuple *tuple, | ||
36 | enum nf_nat_manip_type maniptype) | ||
37 | { | ||
38 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
39 | struct udphdr *hdr; | ||
40 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
41 | __be32 oldip, newip; | ||
42 | __be16 *portptr, newport; | ||
43 | |||
44 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | ||
45 | return false; | ||
46 | |||
47 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
48 | hdr = (struct udphdr *)(skb->data + hdroff); | ||
49 | |||
50 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
51 | /* Get rid of src ip and src pt */ | ||
52 | oldip = iph->saddr; | ||
53 | newip = tuple->src.u3.ip; | ||
54 | newport = tuple->src.u.udp.port; | ||
55 | portptr = &hdr->source; | ||
56 | } else { | ||
57 | /* Get rid of dst ip and dst pt */ | ||
58 | oldip = iph->daddr; | ||
59 | newip = tuple->dst.u3.ip; | ||
60 | newport = tuple->dst.u.udp.port; | ||
61 | portptr = &hdr->dest; | ||
62 | } | ||
63 | |||
64 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
65 | inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); | ||
66 | if (!hdr->check) | ||
67 | hdr->check = CSUM_MANGLED_0; | ||
68 | |||
69 | *portptr = newport; | ||
70 | return true; | ||
71 | } | ||
72 | |||
73 | static const struct nf_nat_protocol nf_nat_protocol_udplite = { | ||
74 | .protonum = IPPROTO_UDPLITE, | ||
75 | .manip_pkt = udplite_manip_pkt, | ||
76 | .in_range = nf_nat_proto_in_range, | ||
77 | .unique_tuple = udplite_unique_tuple, | ||
78 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
79 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
80 | #endif | ||
81 | }; | ||
82 | |||
83 | static int __init nf_nat_proto_udplite_init(void) | ||
84 | { | ||
85 | return nf_nat_protocol_register(&nf_nat_protocol_udplite); | ||
86 | } | ||
87 | |||
88 | static void __exit nf_nat_proto_udplite_fini(void) | ||
89 | { | ||
90 | nf_nat_protocol_unregister(&nf_nat_protocol_udplite); | ||
91 | } | ||
92 | |||
93 | module_init(nf_nat_proto_udplite_init); | ||
94 | module_exit(nf_nat_proto_udplite_fini); | ||
95 | |||
96 | MODULE_LICENSE("GPL"); | ||
97 | MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); | ||
98 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c deleted file mode 100644 index e0afe8112b1c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* The "unknown" protocol. This is what is used for protocols we | ||
2 | * don't understand. It's returned by ip_ct_find_proto(). | ||
3 | */ | ||
4 | |||
5 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
6 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/init.h> | ||
15 | |||
16 | #include <linux/netfilter.h> | ||
17 | #include <net/netfilter/nf_nat.h> | ||
18 | #include <net/netfilter/nf_nat_rule.h> | ||
19 | #include <net/netfilter/nf_nat_protocol.h> | ||
20 | |||
21 | static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, | ||
22 | enum nf_nat_manip_type manip_type, | ||
23 | const union nf_conntrack_man_proto *min, | ||
24 | const union nf_conntrack_man_proto *max) | ||
25 | { | ||
26 | return true; | ||
27 | } | ||
28 | |||
29 | static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
30 | const struct nf_nat_ipv4_range *range, | ||
31 | enum nf_nat_manip_type maniptype, | ||
32 | const struct nf_conn *ct) | ||
33 | { | ||
34 | /* Sorry: we can't help you; if it's not unique, we can't frob | ||
35 | anything. */ | ||
36 | return; | ||
37 | } | ||
38 | |||
39 | static bool | ||
40 | unknown_manip_pkt(struct sk_buff *skb, | ||
41 | unsigned int iphdroff, | ||
42 | const struct nf_conntrack_tuple *tuple, | ||
43 | enum nf_nat_manip_type maniptype) | ||
44 | { | ||
45 | return true; | ||
46 | } | ||
47 | |||
48 | const struct nf_nat_protocol nf_nat_unknown_protocol = { | ||
49 | .manip_pkt = unknown_manip_pkt, | ||
50 | .in_range = unknown_in_range, | ||
51 | .unique_tuple = unknown_unique_tuple, | ||
52 | }; | ||
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c deleted file mode 100644 index d2a9dc314e0e..000000000000 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ /dev/null | |||
@@ -1,214 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* Everything about the rules for NAT. */ | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/netfilter.h> | ||
14 | #include <linux/netfilter_ipv4.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kmod.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/proc_fs.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <net/checksum.h> | ||
21 | #include <net/route.h> | ||
22 | #include <linux/bitops.h> | ||
23 | |||
24 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
25 | #include <net/netfilter/nf_nat.h> | ||
26 | #include <net/netfilter/nf_nat_core.h> | ||
27 | #include <net/netfilter/nf_nat_rule.h> | ||
28 | |||
29 | #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ | ||
30 | (1 << NF_INET_POST_ROUTING) | \ | ||
31 | (1 << NF_INET_LOCAL_OUT) | \ | ||
32 | (1 << NF_INET_LOCAL_IN)) | ||
33 | |||
34 | static const struct xt_table nat_table = { | ||
35 | .name = "nat", | ||
36 | .valid_hooks = NAT_VALID_HOOKS, | ||
37 | .me = THIS_MODULE, | ||
38 | .af = NFPROTO_IPV4, | ||
39 | }; | ||
40 | |||
41 | /* Source NAT */ | ||
42 | static unsigned int | ||
43 | ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) | ||
44 | { | ||
45 | struct nf_conn *ct; | ||
46 | enum ip_conntrack_info ctinfo; | ||
47 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
48 | |||
49 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || | ||
50 | par->hooknum == NF_INET_LOCAL_IN); | ||
51 | |||
52 | ct = nf_ct_get(skb, &ctinfo); | ||
53 | |||
54 | /* Connection must be valid and new. */ | ||
55 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || | ||
56 | ctinfo == IP_CT_RELATED_REPLY)); | ||
57 | NF_CT_ASSERT(par->out != NULL); | ||
58 | |||
59 | return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); | ||
60 | } | ||
61 | |||
62 | static unsigned int | ||
63 | ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) | ||
64 | { | ||
65 | struct nf_conn *ct; | ||
66 | enum ip_conntrack_info ctinfo; | ||
67 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
68 | |||
69 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | ||
70 | par->hooknum == NF_INET_LOCAL_OUT); | ||
71 | |||
72 | ct = nf_ct_get(skb, &ctinfo); | ||
73 | |||
74 | /* Connection must be valid and new. */ | ||
75 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | ||
76 | |||
77 | return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); | ||
78 | } | ||
79 | |||
80 | static int ipt_snat_checkentry(const struct xt_tgchk_param *par) | ||
81 | { | ||
82 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
83 | |||
84 | /* Must be a valid range */ | ||
85 | if (mr->rangesize != 1) { | ||
86 | pr_info("SNAT: multiple ranges no longer supported\n"); | ||
87 | return -EINVAL; | ||
88 | } | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) | ||
93 | { | ||
94 | const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; | ||
95 | |||
96 | /* Must be a valid range */ | ||
97 | if (mr->rangesize != 1) { | ||
98 | pr_info("DNAT: multiple ranges no longer supported\n"); | ||
99 | return -EINVAL; | ||
100 | } | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static unsigned int | ||
105 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | ||
106 | { | ||
107 | /* Force range to this IP; let proto decide mapping for | ||
108 | per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). | ||
109 | */ | ||
110 | struct nf_nat_ipv4_range range; | ||
111 | |||
112 | range.flags = 0; | ||
113 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, | ||
114 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? | ||
115 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : | ||
116 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); | ||
117 | |||
118 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | ||
119 | } | ||
120 | |||
121 | int nf_nat_rule_find(struct sk_buff *skb, | ||
122 | unsigned int hooknum, | ||
123 | const struct net_device *in, | ||
124 | const struct net_device *out, | ||
125 | struct nf_conn *ct) | ||
126 | { | ||
127 | struct net *net = nf_ct_net(ct); | ||
128 | int ret; | ||
129 | |||
130 | ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); | ||
131 | |||
132 | if (ret == NF_ACCEPT) { | ||
133 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) | ||
134 | /* NUL mapping */ | ||
135 | ret = alloc_null_binding(ct, hooknum); | ||
136 | } | ||
137 | return ret; | ||
138 | } | ||
139 | |||
140 | static struct xt_target ipt_snat_reg __read_mostly = { | ||
141 | .name = "SNAT", | ||
142 | .target = ipt_snat_target, | ||
143 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), | ||
144 | .table = "nat", | ||
145 | .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), | ||
146 | .checkentry = ipt_snat_checkentry, | ||
147 | .family = AF_INET, | ||
148 | }; | ||
149 | |||
150 | static struct xt_target ipt_dnat_reg __read_mostly = { | ||
151 | .name = "DNAT", | ||
152 | .target = ipt_dnat_target, | ||
153 | .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), | ||
154 | .table = "nat", | ||
155 | .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), | ||
156 | .checkentry = ipt_dnat_checkentry, | ||
157 | .family = AF_INET, | ||
158 | }; | ||
159 | |||
160 | static int __net_init nf_nat_rule_net_init(struct net *net) | ||
161 | { | ||
162 | struct ipt_replace *repl; | ||
163 | |||
164 | repl = ipt_alloc_initial_table(&nat_table); | ||
165 | if (repl == NULL) | ||
166 | return -ENOMEM; | ||
167 | net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); | ||
168 | kfree(repl); | ||
169 | if (IS_ERR(net->ipv4.nat_table)) | ||
170 | return PTR_ERR(net->ipv4.nat_table); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static void __net_exit nf_nat_rule_net_exit(struct net *net) | ||
175 | { | ||
176 | ipt_unregister_table(net, net->ipv4.nat_table); | ||
177 | } | ||
178 | |||
179 | static struct pernet_operations nf_nat_rule_net_ops = { | ||
180 | .init = nf_nat_rule_net_init, | ||
181 | .exit = nf_nat_rule_net_exit, | ||
182 | }; | ||
183 | |||
184 | int __init nf_nat_rule_init(void) | ||
185 | { | ||
186 | int ret; | ||
187 | |||
188 | ret = register_pernet_subsys(&nf_nat_rule_net_ops); | ||
189 | if (ret != 0) | ||
190 | goto out; | ||
191 | ret = xt_register_target(&ipt_snat_reg); | ||
192 | if (ret != 0) | ||
193 | goto unregister_table; | ||
194 | |||
195 | ret = xt_register_target(&ipt_dnat_reg); | ||
196 | if (ret != 0) | ||
197 | goto unregister_snat; | ||
198 | |||
199 | return ret; | ||
200 | |||
201 | unregister_snat: | ||
202 | xt_unregister_target(&ipt_snat_reg); | ||
203 | unregister_table: | ||
204 | unregister_pernet_subsys(&nf_nat_rule_net_ops); | ||
205 | out: | ||
206 | return ret; | ||
207 | } | ||
208 | |||
209 | void nf_nat_rule_cleanup(void) | ||
210 | { | ||
211 | xt_unregister_target(&ipt_dnat_reg); | ||
212 | xt_unregister_target(&ipt_snat_reg); | ||
213 | unregister_pernet_subsys(&nf_nat_rule_net_ops); | ||
214 | } | ||
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c deleted file mode 100644 index 9c87cde28ff8..000000000000 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ /dev/null | |||
@@ -1,572 +0,0 @@ | |||
1 | /* SIP extension for NAT alteration. | ||
2 | * | ||
3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | ||
4 | * based on RR's ip_nat_ftp.c and other modules. | ||
5 | * (C) 2007 United Security Providers | ||
6 | * (C) 2007, 2008 Patrick McHardy <kaber@trash.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <net/ip.h> | ||
17 | #include <linux/udp.h> | ||
18 | #include <linux/tcp.h> | ||
19 | |||
20 | #include <net/netfilter/nf_nat.h> | ||
21 | #include <net/netfilter/nf_nat_helper.h> | ||
22 | #include <net/netfilter/nf_nat_rule.h> | ||
23 | #include <net/netfilter/nf_conntrack_helper.h> | ||
24 | #include <net/netfilter/nf_conntrack_expect.h> | ||
25 | #include <linux/netfilter/nf_conntrack_sip.h> | ||
26 | |||
27 | MODULE_LICENSE("GPL"); | ||
28 | MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); | ||
29 | MODULE_DESCRIPTION("SIP NAT helper"); | ||
30 | MODULE_ALIAS("ip_nat_sip"); | ||
31 | |||
32 | |||
33 | static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, | ||
34 | const char **dptr, unsigned int *datalen, | ||
35 | unsigned int matchoff, unsigned int matchlen, | ||
36 | const char *buffer, unsigned int buflen) | ||
37 | { | ||
38 | enum ip_conntrack_info ctinfo; | ||
39 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
40 | struct tcphdr *th; | ||
41 | unsigned int baseoff; | ||
42 | |||
43 | if (nf_ct_protonum(ct) == IPPROTO_TCP) { | ||
44 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
45 | baseoff = ip_hdrlen(skb) + th->doff * 4; | ||
46 | matchoff += dataoff - baseoff; | ||
47 | |||
48 | if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, | ||
49 | matchoff, matchlen, | ||
50 | buffer, buflen, false)) | ||
51 | return 0; | ||
52 | } else { | ||
53 | baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); | ||
54 | matchoff += dataoff - baseoff; | ||
55 | |||
56 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | ||
57 | matchoff, matchlen, | ||
58 | buffer, buflen)) | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | /* Reload data pointer and adjust datalen value */ | ||
63 | *dptr = skb->data + dataoff; | ||
64 | *datalen += buflen - matchlen; | ||
65 | return 1; | ||
66 | } | ||
67 | |||
68 | static int map_addr(struct sk_buff *skb, unsigned int dataoff, | ||
69 | const char **dptr, unsigned int *datalen, | ||
70 | unsigned int matchoff, unsigned int matchlen, | ||
71 | union nf_inet_addr *addr, __be16 port) | ||
72 | { | ||
73 | enum ip_conntrack_info ctinfo; | ||
74 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
75 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
76 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
77 | unsigned int buflen; | ||
78 | __be32 newaddr; | ||
79 | __be16 newport; | ||
80 | |||
81 | if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip && | ||
82 | ct->tuplehash[dir].tuple.src.u.udp.port == port) { | ||
83 | newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
84 | newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; | ||
85 | } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && | ||
86 | ct->tuplehash[dir].tuple.dst.u.udp.port == port) { | ||
87 | newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; | ||
88 | newport = ct->tuplehash[!dir].tuple.src.u.udp.port; | ||
89 | } else | ||
90 | return 1; | ||
91 | |||
92 | if (newaddr == addr->ip && newport == port) | ||
93 | return 1; | ||
94 | |||
95 | buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); | ||
96 | |||
97 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
98 | buffer, buflen); | ||
99 | } | ||
100 | |||
101 | static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, | ||
102 | const char **dptr, unsigned int *datalen, | ||
103 | enum sip_header_types type) | ||
104 | { | ||
105 | enum ip_conntrack_info ctinfo; | ||
106 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
107 | unsigned int matchlen, matchoff; | ||
108 | union nf_inet_addr addr; | ||
109 | __be16 port; | ||
110 | |||
111 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, | ||
112 | &matchoff, &matchlen, &addr, &port) <= 0) | ||
113 | return 1; | ||
114 | return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
115 | &addr, port); | ||
116 | } | ||
117 | |||
118 | static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, | ||
119 | const char **dptr, unsigned int *datalen) | ||
120 | { | ||
121 | enum ip_conntrack_info ctinfo; | ||
122 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
123 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
124 | unsigned int coff, matchoff, matchlen; | ||
125 | enum sip_header_types hdr; | ||
126 | union nf_inet_addr addr; | ||
127 | __be16 port; | ||
128 | int request, in_header; | ||
129 | |||
130 | /* Basic rules: requests and responses. */ | ||
131 | if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { | ||
132 | if (ct_sip_parse_request(ct, *dptr, *datalen, | ||
133 | &matchoff, &matchlen, | ||
134 | &addr, &port) > 0 && | ||
135 | !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
136 | &addr, port)) | ||
137 | return NF_DROP; | ||
138 | request = 1; | ||
139 | } else | ||
140 | request = 0; | ||
141 | |||
142 | if (nf_ct_protonum(ct) == IPPROTO_TCP) | ||
143 | hdr = SIP_HDR_VIA_TCP; | ||
144 | else | ||
145 | hdr = SIP_HDR_VIA_UDP; | ||
146 | |||
147 | /* Translate topmost Via header and parameters */ | ||
148 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, | ||
149 | hdr, NULL, &matchoff, &matchlen, | ||
150 | &addr, &port) > 0) { | ||
151 | unsigned int olen, matchend, poff, plen, buflen, n; | ||
152 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
153 | |||
154 | /* We're only interested in headers related to this | ||
155 | * connection */ | ||
156 | if (request) { | ||
157 | if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip || | ||
158 | port != ct->tuplehash[dir].tuple.src.u.udp.port) | ||
159 | goto next; | ||
160 | } else { | ||
161 | if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip || | ||
162 | port != ct->tuplehash[dir].tuple.dst.u.udp.port) | ||
163 | goto next; | ||
164 | } | ||
165 | |||
166 | olen = *datalen; | ||
167 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
168 | &addr, port)) | ||
169 | return NF_DROP; | ||
170 | |||
171 | matchend = matchoff + matchlen + *datalen - olen; | ||
172 | |||
173 | /* The maddr= parameter (RFC 2361) specifies where to send | ||
174 | * the reply. */ | ||
175 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | ||
176 | "maddr=", &poff, &plen, | ||
177 | &addr, true) > 0 && | ||
178 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && | ||
179 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | ||
180 | buflen = sprintf(buffer, "%pI4", | ||
181 | &ct->tuplehash[!dir].tuple.dst.u3.ip); | ||
182 | if (!mangle_packet(skb, dataoff, dptr, datalen, | ||
183 | poff, plen, buffer, buflen)) | ||
184 | return NF_DROP; | ||
185 | } | ||
186 | |||
187 | /* The received= parameter (RFC 2361) contains the address | ||
188 | * from which the server received the request. */ | ||
189 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | ||
190 | "received=", &poff, &plen, | ||
191 | &addr, false) > 0 && | ||
192 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && | ||
193 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | ||
194 | buflen = sprintf(buffer, "%pI4", | ||
195 | &ct->tuplehash[!dir].tuple.src.u3.ip); | ||
196 | if (!mangle_packet(skb, dataoff, dptr, datalen, | ||
197 | poff, plen, buffer, buflen)) | ||
198 | return NF_DROP; | ||
199 | } | ||
200 | |||
201 | /* The rport= parameter (RFC 3581) contains the port number | ||
202 | * from which the server received the request. */ | ||
203 | if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, | ||
204 | "rport=", &poff, &plen, | ||
205 | &n) > 0 && | ||
206 | htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && | ||
207 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { | ||
208 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; | ||
209 | buflen = sprintf(buffer, "%u", ntohs(p)); | ||
210 | if (!mangle_packet(skb, dataoff, dptr, datalen, | ||
211 | poff, plen, buffer, buflen)) | ||
212 | return NF_DROP; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | next: | ||
217 | /* Translate Contact headers */ | ||
218 | coff = 0; | ||
219 | in_header = 0; | ||
220 | while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, | ||
221 | SIP_HDR_CONTACT, &in_header, | ||
222 | &matchoff, &matchlen, | ||
223 | &addr, &port) > 0) { | ||
224 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
225 | &addr, port)) | ||
226 | return NF_DROP; | ||
227 | } | ||
228 | |||
229 | if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || | ||
230 | !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) | ||
231 | return NF_DROP; | ||
232 | |||
233 | return NF_ACCEPT; | ||
234 | } | ||
235 | |||
236 | static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) | ||
237 | { | ||
238 | enum ip_conntrack_info ctinfo; | ||
239 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
240 | const struct tcphdr *th; | ||
241 | |||
242 | if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) | ||
243 | return; | ||
244 | |||
245 | th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); | ||
246 | nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); | ||
247 | } | ||
248 | |||
249 | /* Handles expected signalling connections and media streams */ | ||
250 | static void ip_nat_sip_expected(struct nf_conn *ct, | ||
251 | struct nf_conntrack_expect *exp) | ||
252 | { | ||
253 | struct nf_nat_ipv4_range range; | ||
254 | |||
255 | /* This must be a fresh one. */ | ||
256 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | ||
257 | |||
258 | /* For DST manip, map port here to where it's expected. */ | ||
259 | range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); | ||
260 | range.min = range.max = exp->saved_proto; | ||
261 | range.min_ip = range.max_ip = exp->saved_ip; | ||
262 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); | ||
263 | |||
264 | /* Change src to where master sends to, but only if the connection | ||
265 | * actually came from the same source. */ | ||
266 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == | ||
267 | ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { | ||
268 | range.flags = NF_NAT_RANGE_MAP_IPS; | ||
269 | range.min_ip = range.max_ip | ||
270 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | ||
271 | nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, | ||
276 | const char **dptr, unsigned int *datalen, | ||
277 | struct nf_conntrack_expect *exp, | ||
278 | unsigned int matchoff, | ||
279 | unsigned int matchlen) | ||
280 | { | ||
281 | enum ip_conntrack_info ctinfo; | ||
282 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
283 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
284 | __be32 newip; | ||
285 | u_int16_t port; | ||
286 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
287 | unsigned int buflen; | ||
288 | |||
289 | /* Connection will come from reply */ | ||
290 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) | ||
291 | newip = exp->tuple.dst.u3.ip; | ||
292 | else | ||
293 | newip = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
294 | |||
295 | /* If the signalling port matches the connection's source port in the | ||
296 | * original direction, try to use the destination port in the opposite | ||
297 | * direction. */ | ||
298 | if (exp->tuple.dst.u.udp.port == | ||
299 | ct->tuplehash[dir].tuple.src.u.udp.port) | ||
300 | port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); | ||
301 | else | ||
302 | port = ntohs(exp->tuple.dst.u.udp.port); | ||
303 | |||
304 | exp->saved_ip = exp->tuple.dst.u3.ip; | ||
305 | exp->tuple.dst.u3.ip = newip; | ||
306 | exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; | ||
307 | exp->dir = !dir; | ||
308 | exp->expectfn = ip_nat_sip_expected; | ||
309 | |||
310 | for (; port != 0; port++) { | ||
311 | int ret; | ||
312 | |||
313 | exp->tuple.dst.u.udp.port = htons(port); | ||
314 | ret = nf_ct_expect_related(exp); | ||
315 | if (ret == 0) | ||
316 | break; | ||
317 | else if (ret != -EBUSY) { | ||
318 | port = 0; | ||
319 | break; | ||
320 | } | ||
321 | } | ||
322 | |||
323 | if (port == 0) | ||
324 | return NF_DROP; | ||
325 | |||
326 | if (exp->tuple.dst.u3.ip != exp->saved_ip || | ||
327 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { | ||
328 | buflen = sprintf(buffer, "%pI4:%u", &newip, port); | ||
329 | if (!mangle_packet(skb, dataoff, dptr, datalen, | ||
330 | matchoff, matchlen, buffer, buflen)) | ||
331 | goto err; | ||
332 | } | ||
333 | return NF_ACCEPT; | ||
334 | |||
335 | err: | ||
336 | nf_ct_unexpect_related(exp); | ||
337 | return NF_DROP; | ||
338 | } | ||
339 | |||
340 | static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, | ||
341 | const char **dptr, unsigned int *datalen) | ||
342 | { | ||
343 | enum ip_conntrack_info ctinfo; | ||
344 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
345 | unsigned int matchoff, matchlen; | ||
346 | char buffer[sizeof("65536")]; | ||
347 | int buflen, c_len; | ||
348 | |||
349 | /* Get actual SDP length */ | ||
350 | if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, | ||
351 | SDP_HDR_VERSION, SDP_HDR_UNSPEC, | ||
352 | &matchoff, &matchlen) <= 0) | ||
353 | return 0; | ||
354 | c_len = *datalen - matchoff + strlen("v="); | ||
355 | |||
356 | /* Now, update SDP length */ | ||
357 | if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH, | ||
358 | &matchoff, &matchlen) <= 0) | ||
359 | return 0; | ||
360 | |||
361 | buflen = sprintf(buffer, "%u", c_len); | ||
362 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
363 | buffer, buflen); | ||
364 | } | ||
365 | |||
366 | static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, | ||
367 | const char **dptr, unsigned int *datalen, | ||
368 | unsigned int sdpoff, | ||
369 | enum sdp_header_types type, | ||
370 | enum sdp_header_types term, | ||
371 | char *buffer, int buflen) | ||
372 | { | ||
373 | enum ip_conntrack_info ctinfo; | ||
374 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
375 | unsigned int matchlen, matchoff; | ||
376 | |||
377 | if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, | ||
378 | &matchoff, &matchlen) <= 0) | ||
379 | return -ENOENT; | ||
380 | return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
381 | buffer, buflen) ? 0 : -EINVAL; | ||
382 | } | ||
383 | |||
384 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, | ||
385 | const char **dptr, unsigned int *datalen, | ||
386 | unsigned int sdpoff, | ||
387 | enum sdp_header_types type, | ||
388 | enum sdp_header_types term, | ||
389 | const union nf_inet_addr *addr) | ||
390 | { | ||
391 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | ||
392 | unsigned int buflen; | ||
393 | |||
394 | buflen = sprintf(buffer, "%pI4", &addr->ip); | ||
395 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, | ||
396 | buffer, buflen)) | ||
397 | return 0; | ||
398 | |||
399 | return mangle_content_len(skb, dataoff, dptr, datalen); | ||
400 | } | ||
401 | |||
402 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, | ||
403 | const char **dptr, unsigned int *datalen, | ||
404 | unsigned int matchoff, | ||
405 | unsigned int matchlen, | ||
406 | u_int16_t port) | ||
407 | { | ||
408 | char buffer[sizeof("nnnnn")]; | ||
409 | unsigned int buflen; | ||
410 | |||
411 | buflen = sprintf(buffer, "%u", port); | ||
412 | if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, | ||
413 | buffer, buflen)) | ||
414 | return 0; | ||
415 | |||
416 | return mangle_content_len(skb, dataoff, dptr, datalen); | ||
417 | } | ||
418 | |||
419 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, | ||
420 | const char **dptr, unsigned int *datalen, | ||
421 | unsigned int sdpoff, | ||
422 | const union nf_inet_addr *addr) | ||
423 | { | ||
424 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | ||
425 | unsigned int buflen; | ||
426 | |||
427 | /* Mangle session description owner and contact addresses */ | ||
428 | buflen = sprintf(buffer, "%pI4", &addr->ip); | ||
429 | if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, | ||
430 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | ||
431 | buffer, buflen)) | ||
432 | return 0; | ||
433 | |||
434 | switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, | ||
435 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | ||
436 | buffer, buflen)) { | ||
437 | case 0: | ||
438 | /* | ||
439 | * RFC 2327: | ||
440 | * | ||
441 | * Session description | ||
442 | * | ||
443 | * c=* (connection information - not required if included in all media) | ||
444 | */ | ||
445 | case -ENOENT: | ||
446 | break; | ||
447 | default: | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | return mangle_content_len(skb, dataoff, dptr, datalen); | ||
452 | } | ||
453 | |||
454 | /* So, this packet has hit the connection tracking matching code. | ||
455 | Mangle it, and change the expectation to match the new version. */ | ||
456 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, | ||
457 | const char **dptr, unsigned int *datalen, | ||
458 | struct nf_conntrack_expect *rtp_exp, | ||
459 | struct nf_conntrack_expect *rtcp_exp, | ||
460 | unsigned int mediaoff, | ||
461 | unsigned int medialen, | ||
462 | union nf_inet_addr *rtp_addr) | ||
463 | { | ||
464 | enum ip_conntrack_info ctinfo; | ||
465 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
466 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
467 | u_int16_t port; | ||
468 | |||
469 | /* Connection will come from reply */ | ||
470 | if (ct->tuplehash[dir].tuple.src.u3.ip == | ||
471 | ct->tuplehash[!dir].tuple.dst.u3.ip) | ||
472 | rtp_addr->ip = rtp_exp->tuple.dst.u3.ip; | ||
473 | else | ||
474 | rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
475 | |||
476 | rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; | ||
477 | rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; | ||
478 | rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; | ||
479 | rtp_exp->dir = !dir; | ||
480 | rtp_exp->expectfn = ip_nat_sip_expected; | ||
481 | |||
482 | rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; | ||
483 | rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; | ||
484 | rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; | ||
485 | rtcp_exp->dir = !dir; | ||
486 | rtcp_exp->expectfn = ip_nat_sip_expected; | ||
487 | |||
488 | /* Try to get same pair of ports: if not, try to change them. */ | ||
489 | for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); | ||
490 | port != 0; port += 2) { | ||
491 | int ret; | ||
492 | |||
493 | rtp_exp->tuple.dst.u.udp.port = htons(port); | ||
494 | ret = nf_ct_expect_related(rtp_exp); | ||
495 | if (ret == -EBUSY) | ||
496 | continue; | ||
497 | else if (ret < 0) { | ||
498 | port = 0; | ||
499 | break; | ||
500 | } | ||
501 | rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); | ||
502 | ret = nf_ct_expect_related(rtcp_exp); | ||
503 | if (ret == 0) | ||
504 | break; | ||
505 | else if (ret == -EBUSY) { | ||
506 | nf_ct_unexpect_related(rtp_exp); | ||
507 | continue; | ||
508 | } else if (ret < 0) { | ||
509 | nf_ct_unexpect_related(rtp_exp); | ||
510 | port = 0; | ||
511 | break; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | if (port == 0) | ||
516 | goto err1; | ||
517 | |||
518 | /* Update media port. */ | ||
519 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && | ||
520 | !ip_nat_sdp_port(skb, dataoff, dptr, datalen, | ||
521 | mediaoff, medialen, port)) | ||
522 | goto err2; | ||
523 | |||
524 | return NF_ACCEPT; | ||
525 | |||
526 | err2: | ||
527 | nf_ct_unexpect_related(rtp_exp); | ||
528 | nf_ct_unexpect_related(rtcp_exp); | ||
529 | err1: | ||
530 | return NF_DROP; | ||
531 | } | ||
532 | |||
533 | static struct nf_ct_helper_expectfn sip_nat = { | ||
534 | .name = "sip", | ||
535 | .expectfn = ip_nat_sip_expected, | ||
536 | }; | ||
537 | |||
538 | static void __exit nf_nat_sip_fini(void) | ||
539 | { | ||
540 | RCU_INIT_POINTER(nf_nat_sip_hook, NULL); | ||
541 | RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); | ||
542 | RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); | ||
543 | RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); | ||
544 | RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); | ||
545 | RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); | ||
546 | RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); | ||
547 | nf_ct_helper_expectfn_unregister(&sip_nat); | ||
548 | synchronize_rcu(); | ||
549 | } | ||
550 | |||
551 | static int __init nf_nat_sip_init(void) | ||
552 | { | ||
553 | BUG_ON(nf_nat_sip_hook != NULL); | ||
554 | BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); | ||
555 | BUG_ON(nf_nat_sip_expect_hook != NULL); | ||
556 | BUG_ON(nf_nat_sdp_addr_hook != NULL); | ||
557 | BUG_ON(nf_nat_sdp_port_hook != NULL); | ||
558 | BUG_ON(nf_nat_sdp_session_hook != NULL); | ||
559 | BUG_ON(nf_nat_sdp_media_hook != NULL); | ||
560 | RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); | ||
561 | RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); | ||
562 | RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); | ||
563 | RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); | ||
564 | RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); | ||
565 | RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); | ||
566 | RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); | ||
567 | nf_ct_helper_expectfn_register(&sip_nat); | ||
568 | return 0; | ||
569 | } | ||
570 | |||
571 | module_init(nf_nat_sip_init); | ||
572 | module_exit(nf_nat_sip_fini); | ||
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c deleted file mode 100644 index 9dbb8d284f99..000000000000 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License version 2 as | ||
5 | * published by the Free Software Foundation. | ||
6 | */ | ||
7 | |||
8 | #include <linux/module.h> | ||
9 | #include <linux/udp.h> | ||
10 | |||
11 | #include <net/netfilter/nf_conntrack_helper.h> | ||
12 | #include <net/netfilter/nf_conntrack_expect.h> | ||
13 | #include <net/netfilter/nf_nat_helper.h> | ||
14 | #include <net/netfilter/nf_nat_rule.h> | ||
15 | #include <linux/netfilter/nf_conntrack_tftp.h> | ||
16 | |||
17 | MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); | ||
18 | MODULE_DESCRIPTION("TFTP NAT helper"); | ||
19 | MODULE_LICENSE("GPL"); | ||
20 | MODULE_ALIAS("ip_nat_tftp"); | ||
21 | |||
22 | static unsigned int help(struct sk_buff *skb, | ||
23 | enum ip_conntrack_info ctinfo, | ||
24 | struct nf_conntrack_expect *exp) | ||
25 | { | ||
26 | const struct nf_conn *ct = exp->master; | ||
27 | |||
28 | exp->saved_proto.udp.port | ||
29 | = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; | ||
30 | exp->dir = IP_CT_DIR_REPLY; | ||
31 | exp->expectfn = nf_nat_follow_master; | ||
32 | if (nf_ct_expect_related(exp) != 0) | ||
33 | return NF_DROP; | ||
34 | return NF_ACCEPT; | ||
35 | } | ||
36 | |||
37 | static void __exit nf_nat_tftp_fini(void) | ||
38 | { | ||
39 | RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); | ||
40 | synchronize_rcu(); | ||
41 | } | ||
42 | |||
43 | static int __init nf_nat_tftp_init(void) | ||
44 | { | ||
45 | BUG_ON(nf_nat_tftp_hook != NULL); | ||
46 | RCU_INIT_POINTER(nf_nat_tftp_hook, help); | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | module_init(nf_nat_tftp_init); | ||
51 | module_exit(nf_nat_tftp_fini); | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250b..8de53e1ddd54 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), | 263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), |
264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), | 264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), |
265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), | 265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), |
266 | SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), | ||
267 | SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), | ||
268 | SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), | ||
269 | SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), | ||
266 | SNMP_MIB_SENTINEL | 270 | SNMP_MIB_SENTINEL |
267 | }; | 271 | }; |
268 | 272 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fd9af60397b5..ff622069fcef 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1111,10 +1111,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1111 | const struct rtable *rt = (const struct rtable *) dst; | 1111 | const struct rtable *rt = (const struct rtable *) dst; |
1112 | unsigned int mtu = rt->rt_pmtu; | 1112 | unsigned int mtu = rt->rt_pmtu; |
1113 | 1113 | ||
1114 | if (mtu && time_after_eq(jiffies, rt->dst.expires)) | 1114 | if (!mtu || time_after_eq(jiffies, rt->dst.expires)) |
1115 | mtu = 0; | ||
1116 | |||
1117 | if (!mtu) | ||
1118 | mtu = dst_metric_raw(dst, RTAX_MTU); | 1115 | mtu = dst_metric_raw(dst, RTAX_MTU); |
1119 | 1116 | ||
1120 | if (mtu && rt_is_output_route(rt)) | 1117 | if (mtu && rt_is_output_route(rt)) |
@@ -1566,11 +1563,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1566 | if (ipv4_is_zeronet(daddr)) | 1563 | if (ipv4_is_zeronet(daddr)) |
1567 | goto martian_destination; | 1564 | goto martian_destination; |
1568 | 1565 | ||
1569 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { | 1566 | /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), |
1570 | if (ipv4_is_loopback(daddr)) | 1567 | * and call it once if daddr or/and saddr are loopback addresses |
1568 | */ | ||
1569 | if (ipv4_is_loopback(daddr)) { | ||
1570 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) | ||
1571 | goto martian_destination; | 1571 | goto martian_destination; |
1572 | 1572 | } else if (ipv4_is_loopback(saddr)) { | |
1573 | if (ipv4_is_loopback(saddr)) | 1573 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) |
1574 | goto martian_source; | 1574 | goto martian_source; |
1575 | } | 1575 | } |
1576 | 1576 | ||
@@ -1595,7 +1595,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1595 | 1595 | ||
1596 | if (res.type == RTN_LOCAL) { | 1596 | if (res.type == RTN_LOCAL) { |
1597 | err = fib_validate_source(skb, saddr, daddr, tos, | 1597 | err = fib_validate_source(skb, saddr, daddr, tos, |
1598 | net->loopback_dev->ifindex, | 1598 | LOOPBACK_IFINDEX, |
1599 | dev, in_dev, &itag); | 1599 | dev, in_dev, &itag); |
1600 | if (err < 0) | 1600 | if (err < 0) |
1601 | goto martian_source_keep_err; | 1601 | goto martian_source_keep_err; |
@@ -1871,7 +1871,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) | |||
1871 | 1871 | ||
1872 | orig_oif = fl4->flowi4_oif; | 1872 | orig_oif = fl4->flowi4_oif; |
1873 | 1873 | ||
1874 | fl4->flowi4_iif = net->loopback_dev->ifindex; | 1874 | fl4->flowi4_iif = LOOPBACK_IFINDEX; |
1875 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; | 1875 | fl4->flowi4_tos = tos & IPTOS_RT_MASK; |
1876 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? | 1876 | fl4->flowi4_scope = ((tos & RTO_ONLINK) ? |
1877 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | 1877 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); |
@@ -1960,7 +1960,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) | |||
1960 | if (!fl4->daddr) | 1960 | if (!fl4->daddr) |
1961 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); | 1961 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); |
1962 | dev_out = net->loopback_dev; | 1962 | dev_out = net->loopback_dev; |
1963 | fl4->flowi4_oif = net->loopback_dev->ifindex; | 1963 | fl4->flowi4_oif = LOOPBACK_IFINDEX; |
1964 | res.type = RTN_LOCAL; | 1964 | res.type = RTN_LOCAL; |
1965 | flags |= RTCF_LOCAL; | 1965 | flags |= RTCF_LOCAL; |
1966 | goto make_route; | 1966 | goto make_route; |
@@ -2131,7 +2131,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | |||
2131 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2131 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2132 | 2132 | ||
2133 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | 2133 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, |
2134 | struct flowi4 *fl4, struct sk_buff *skb, u32 pid, | 2134 | struct flowi4 *fl4, struct sk_buff *skb, u32 portid, |
2135 | u32 seq, int event, int nowait, unsigned int flags) | 2135 | u32 seq, int event, int nowait, unsigned int flags) |
2136 | { | 2136 | { |
2137 | struct rtable *rt = skb_rtable(skb); | 2137 | struct rtable *rt = skb_rtable(skb); |
@@ -2141,7 +2141,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | |||
2141 | u32 error; | 2141 | u32 error; |
2142 | u32 metrics[RTAX_MAX]; | 2142 | u32 metrics[RTAX_MAX]; |
2143 | 2143 | ||
2144 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); | 2144 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); |
2145 | if (nlh == NULL) | 2145 | if (nlh == NULL) |
2146 | return -EMSGSIZE; | 2146 | return -EMSGSIZE; |
2147 | 2147 | ||
@@ -2301,12 +2301,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void | |||
2301 | rt->rt_flags |= RTCF_NOTIFY; | 2301 | rt->rt_flags |= RTCF_NOTIFY; |
2302 | 2302 | ||
2303 | err = rt_fill_info(net, dst, src, &fl4, skb, | 2303 | err = rt_fill_info(net, dst, src, &fl4, skb, |
2304 | NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2304 | NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, |
2305 | RTM_NEWROUTE, 0, 0); | 2305 | RTM_NEWROUTE, 0, 0); |
2306 | if (err <= 0) | 2306 | if (err <= 0) |
2307 | goto errout_free; | 2307 | goto errout_free; |
2308 | 2308 | ||
2309 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); | 2309 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); |
2310 | errout: | 2310 | errout: |
2311 | return err; | 2311 | return err; |
2312 | 2312 | ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650e1528e1e6..ba48e799b031 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
319 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | 319 | ireq->tstamp_ok = tcp_opt.saw_tstamp; |
320 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | 320 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; |
321 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; | 321 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; |
322 | treq->listener = NULL; | ||
322 | 323 | ||
323 | /* We throwed the options of the initial SYN away, so we hope | 324 | /* We throwed the options of the initial SYN away, so we hope |
324 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 325 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e78c79b5586..9205e492dc9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | 234 | ||
235 | int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, | ||
236 | size_t *lenp, loff_t *ppos) | ||
237 | { | ||
238 | ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; | ||
239 | struct tcp_fastopen_context *ctxt; | ||
240 | int ret; | ||
241 | u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ | ||
242 | |||
243 | tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); | ||
244 | if (!tbl.data) | ||
245 | return -ENOMEM; | ||
246 | |||
247 | rcu_read_lock(); | ||
248 | ctxt = rcu_dereference(tcp_fastopen_ctx); | ||
249 | if (ctxt) | ||
250 | memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); | ||
251 | rcu_read_unlock(); | ||
252 | |||
253 | snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", | ||
254 | user_key[0], user_key[1], user_key[2], user_key[3]); | ||
255 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); | ||
256 | |||
257 | if (write && ret == 0) { | ||
258 | if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, | ||
259 | user_key + 2, user_key + 3) != 4) { | ||
260 | ret = -EINVAL; | ||
261 | goto bad_key; | ||
262 | } | ||
263 | tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); | ||
264 | } | ||
265 | |||
266 | bad_key: | ||
267 | pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", | ||
268 | user_key[0], user_key[1], user_key[2], user_key[3], | ||
269 | (char *)tbl.data, ret); | ||
270 | kfree(tbl.data); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
235 | static struct ctl_table ipv4_table[] = { | 274 | static struct ctl_table ipv4_table[] = { |
236 | { | 275 | { |
237 | .procname = "tcp_timestamps", | 276 | .procname = "tcp_timestamps", |
@@ -386,6 +425,12 @@ static struct ctl_table ipv4_table[] = { | |||
386 | .proc_handler = proc_dointvec, | 425 | .proc_handler = proc_dointvec, |
387 | }, | 426 | }, |
388 | { | 427 | { |
428 | .procname = "tcp_fastopen_key", | ||
429 | .mode = 0600, | ||
430 | .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), | ||
431 | .proc_handler = proc_tcp_fastopen_key, | ||
432 | }, | ||
433 | { | ||
389 | .procname = "tcp_tw_recycle", | 434 | .procname = "tcp_tw_recycle", |
390 | .data = &tcp_death_row.sysctl_tw_recycle, | 435 | .data = &tcp_death_row.sysctl_tw_recycle, |
391 | .maxlen = sizeof(int), | 436 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5f6419341821..f32c02e2a543 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
486 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 486 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
487 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; | 487 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; |
488 | 488 | ||
489 | /* Connected? */ | 489 | /* Connected or passive Fast Open socket? */ |
490 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 490 | if (sk->sk_state != TCP_SYN_SENT && |
491 | (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { | ||
491 | int target = sock_rcvlowat(sk, 0, INT_MAX); | 492 | int target = sock_rcvlowat(sk, 0, INT_MAX); |
492 | 493 | ||
493 | if (tp->urg_seq == tp->copied_seq && | 494 | if (tp->urg_seq == tp->copied_seq && |
@@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
840 | ssize_t copied; | 841 | ssize_t copied; |
841 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 842 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
842 | 843 | ||
843 | /* Wait for a connection to finish. */ | 844 | /* Wait for a connection to finish. One exception is TCP Fast Open |
844 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 845 | * (passive side) where data is allowed to be sent before a connection |
846 | * is fully established. | ||
847 | */ | ||
848 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
849 | !tcp_passive_fastopen(sk)) { | ||
845 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 850 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
846 | goto out_err; | 851 | goto out_err; |
852 | } | ||
847 | 853 | ||
848 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 854 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
849 | 855 | ||
@@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1042 | 1048 | ||
1043 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 1049 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
1044 | 1050 | ||
1045 | /* Wait for a connection to finish. */ | 1051 | /* Wait for a connection to finish. One exception is TCP Fast Open |
1046 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 1052 | * (passive side) where data is allowed to be sent before a connection |
1053 | * is fully established. | ||
1054 | */ | ||
1055 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
1056 | !tcp_passive_fastopen(sk)) { | ||
1047 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 1057 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
1048 | goto do_error; | 1058 | goto do_error; |
1059 | } | ||
1049 | 1060 | ||
1050 | if (unlikely(tp->repair)) { | 1061 | if (unlikely(tp->repair)) { |
1051 | if (tp->repair_queue == TCP_RECV_QUEUE) { | 1062 | if (tp->repair_queue == TCP_RECV_QUEUE) { |
@@ -1139,78 +1150,43 @@ new_segment: | |||
1139 | if (err) | 1150 | if (err) |
1140 | goto do_fault; | 1151 | goto do_fault; |
1141 | } else { | 1152 | } else { |
1142 | bool merge = false; | 1153 | bool merge = true; |
1143 | int i = skb_shinfo(skb)->nr_frags; | 1154 | int i = skb_shinfo(skb)->nr_frags; |
1144 | struct page *page = sk->sk_sndmsg_page; | 1155 | struct page_frag *pfrag = sk_page_frag(sk); |
1145 | int off; | 1156 | |
1146 | 1157 | if (!sk_page_frag_refill(sk, pfrag)) | |
1147 | if (page && page_count(page) == 1) | 1158 | goto wait_for_memory; |
1148 | sk->sk_sndmsg_off = 0; | 1159 | |
1149 | 1160 | if (!skb_can_coalesce(skb, i, pfrag->page, | |
1150 | off = sk->sk_sndmsg_off; | 1161 | pfrag->offset)) { |
1151 | 1162 | if (i == MAX_SKB_FRAGS || !sg) { | |
1152 | if (skb_can_coalesce(skb, i, page, off) && | 1163 | tcp_mark_push(tp, skb); |
1153 | off != PAGE_SIZE) { | 1164 | goto new_segment; |
1154 | /* We can extend the last page | ||
1155 | * fragment. */ | ||
1156 | merge = true; | ||
1157 | } else if (i == MAX_SKB_FRAGS || !sg) { | ||
1158 | /* Need to add new fragment and cannot | ||
1159 | * do this because interface is non-SG, | ||
1160 | * or because all the page slots are | ||
1161 | * busy. */ | ||
1162 | tcp_mark_push(tp, skb); | ||
1163 | goto new_segment; | ||
1164 | } else if (page) { | ||
1165 | if (off == PAGE_SIZE) { | ||
1166 | put_page(page); | ||
1167 | sk->sk_sndmsg_page = page = NULL; | ||
1168 | off = 0; | ||
1169 | } | 1165 | } |
1170 | } else | 1166 | merge = false; |
1171 | off = 0; | 1167 | } |
1172 | 1168 | ||
1173 | if (copy > PAGE_SIZE - off) | 1169 | copy = min_t(int, copy, pfrag->size - pfrag->offset); |
1174 | copy = PAGE_SIZE - off; | ||
1175 | 1170 | ||
1176 | if (!sk_wmem_schedule(sk, copy)) | 1171 | if (!sk_wmem_schedule(sk, copy)) |
1177 | goto wait_for_memory; | 1172 | goto wait_for_memory; |
1178 | 1173 | ||
1179 | if (!page) { | ||
1180 | /* Allocate new cache page. */ | ||
1181 | if (!(page = sk_stream_alloc_page(sk))) | ||
1182 | goto wait_for_memory; | ||
1183 | } | ||
1184 | |||
1185 | /* Time to copy data. We are close to | ||
1186 | * the end! */ | ||
1187 | err = skb_copy_to_page_nocache(sk, from, skb, | 1174 | err = skb_copy_to_page_nocache(sk, from, skb, |
1188 | page, off, copy); | 1175 | pfrag->page, |
1189 | if (err) { | 1176 | pfrag->offset, |
1190 | /* If this page was new, give it to the | 1177 | copy); |
1191 | * socket so it does not get leaked. | 1178 | if (err) |
1192 | */ | ||
1193 | if (!sk->sk_sndmsg_page) { | ||
1194 | sk->sk_sndmsg_page = page; | ||
1195 | sk->sk_sndmsg_off = 0; | ||
1196 | } | ||
1197 | goto do_error; | 1179 | goto do_error; |
1198 | } | ||
1199 | 1180 | ||
1200 | /* Update the skb. */ | 1181 | /* Update the skb. */ |
1201 | if (merge) { | 1182 | if (merge) { |
1202 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | 1183 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); |
1203 | } else { | 1184 | } else { |
1204 | skb_fill_page_desc(skb, i, page, off, copy); | 1185 | skb_fill_page_desc(skb, i, pfrag->page, |
1205 | if (sk->sk_sndmsg_page) { | 1186 | pfrag->offset, copy); |
1206 | get_page(page); | 1187 | get_page(pfrag->page); |
1207 | } else if (off + copy < PAGE_SIZE) { | ||
1208 | get_page(page); | ||
1209 | sk->sk_sndmsg_page = page; | ||
1210 | } | ||
1211 | } | 1188 | } |
1212 | 1189 | pfrag->offset += copy; | |
1213 | sk->sk_sndmsg_off = off + copy; | ||
1214 | } | 1190 | } |
1215 | 1191 | ||
1216 | if (!copied) | 1192 | if (!copied) |
@@ -2150,6 +2126,10 @@ void tcp_close(struct sock *sk, long timeout) | |||
2150 | * they look as CLOSING or LAST_ACK for Linux) | 2126 | * they look as CLOSING or LAST_ACK for Linux) |
2151 | * Probably, I missed some more holelets. | 2127 | * Probably, I missed some more holelets. |
2152 | * --ANK | 2128 | * --ANK |
2129 | * XXX (TFO) - To start off we don't support SYN+ACK+FIN | ||
2130 | * in a single packet! (May consider it later but will | ||
2131 | * probably need API support or TCP_CORK SYN-ACK until | ||
2132 | * data is written and socket is closed.) | ||
2153 | */ | 2133 | */ |
2154 | tcp_send_fin(sk); | 2134 | tcp_send_fin(sk); |
2155 | } | 2135 | } |
@@ -2221,8 +2201,16 @@ adjudge_to_death: | |||
2221 | } | 2201 | } |
2222 | } | 2202 | } |
2223 | 2203 | ||
2224 | if (sk->sk_state == TCP_CLOSE) | 2204 | if (sk->sk_state == TCP_CLOSE) { |
2205 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
2206 | /* We could get here with a non-NULL req if the socket is | ||
2207 | * aborted (e.g., closed with unread data) before 3WHS | ||
2208 | * finishes. | ||
2209 | */ | ||
2210 | if (req != NULL) | ||
2211 | reqsk_fastopen_remove(sk, req, false); | ||
2225 | inet_csk_destroy_sock(sk); | 2212 | inet_csk_destroy_sock(sk); |
2213 | } | ||
2226 | /* Otherwise, socket is reprieved until protocol close. */ | 2214 | /* Otherwise, socket is reprieved until protocol close. */ |
2227 | 2215 | ||
2228 | out: | 2216 | out: |
@@ -2308,6 +2296,13 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2308 | } | 2296 | } |
2309 | EXPORT_SYMBOL(tcp_disconnect); | 2297 | EXPORT_SYMBOL(tcp_disconnect); |
2310 | 2298 | ||
2299 | void tcp_sock_destruct(struct sock *sk) | ||
2300 | { | ||
2301 | inet_sock_destruct(sk); | ||
2302 | |||
2303 | kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); | ||
2304 | } | ||
2305 | |||
2311 | static inline bool tcp_can_repair_sock(const struct sock *sk) | 2306 | static inline bool tcp_can_repair_sock(const struct sock *sk) |
2312 | { | 2307 | { |
2313 | return capable(CAP_NET_ADMIN) && | 2308 | return capable(CAP_NET_ADMIN) && |
@@ -2701,6 +2696,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2701 | else | 2696 | else |
2702 | icsk->icsk_user_timeout = msecs_to_jiffies(val); | 2697 | icsk->icsk_user_timeout = msecs_to_jiffies(val); |
2703 | break; | 2698 | break; |
2699 | |||
2700 | case TCP_FASTOPEN: | ||
2701 | if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | | ||
2702 | TCPF_LISTEN))) | ||
2703 | err = fastopen_init_queue(sk, val); | ||
2704 | else | ||
2705 | err = -EINVAL; | ||
2706 | break; | ||
2704 | default: | 2707 | default: |
2705 | err = -ENOPROTOOPT; | 2708 | err = -ENOPROTOOPT; |
2706 | break; | 2709 | break; |
@@ -3514,11 +3517,15 @@ EXPORT_SYMBOL(tcp_cookie_generator); | |||
3514 | 3517 | ||
3515 | void tcp_done(struct sock *sk) | 3518 | void tcp_done(struct sock *sk) |
3516 | { | 3519 | { |
3520 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
3521 | |||
3517 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 3522 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
3518 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); | 3523 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); |
3519 | 3524 | ||
3520 | tcp_set_state(sk, TCP_CLOSE); | 3525 | tcp_set_state(sk, TCP_CLOSE); |
3521 | tcp_clear_xmit_timers(sk); | 3526 | tcp_clear_xmit_timers(sk); |
3527 | if (req != NULL) | ||
3528 | reqsk_fastopen_remove(sk, req, false); | ||
3522 | 3529 | ||
3523 | sk->sk_shutdown = SHUTDOWN_MASK; | 3530 | sk->sk_shutdown = SHUTDOWN_MASK; |
3524 | 3531 | ||
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d7..8f7ef0ad80e5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -1,10 +1,91 @@ | |||
1 | #include <linux/err.h> | ||
1 | #include <linux/init.h> | 2 | #include <linux/init.h> |
2 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
4 | #include <linux/list.h> | ||
5 | #include <linux/tcp.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/rculist.h> | ||
8 | #include <net/inetpeer.h> | ||
9 | #include <net/tcp.h> | ||
3 | 10 | ||
4 | int sysctl_tcp_fastopen; | 11 | int sysctl_tcp_fastopen __read_mostly; |
12 | |||
13 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; | ||
14 | |||
15 | static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); | ||
16 | |||
17 | static void tcp_fastopen_ctx_free(struct rcu_head *head) | ||
18 | { | ||
19 | struct tcp_fastopen_context *ctx = | ||
20 | container_of(head, struct tcp_fastopen_context, rcu); | ||
21 | crypto_free_cipher(ctx->tfm); | ||
22 | kfree(ctx); | ||
23 | } | ||
24 | |||
25 | int tcp_fastopen_reset_cipher(void *key, unsigned int len) | ||
26 | { | ||
27 | int err; | ||
28 | struct tcp_fastopen_context *ctx, *octx; | ||
29 | |||
30 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
31 | if (!ctx) | ||
32 | return -ENOMEM; | ||
33 | ctx->tfm = crypto_alloc_cipher("aes", 0, 0); | ||
34 | |||
35 | if (IS_ERR(ctx->tfm)) { | ||
36 | err = PTR_ERR(ctx->tfm); | ||
37 | error: kfree(ctx); | ||
38 | pr_err("TCP: TFO aes cipher alloc error: %d\n", err); | ||
39 | return err; | ||
40 | } | ||
41 | err = crypto_cipher_setkey(ctx->tfm, key, len); | ||
42 | if (err) { | ||
43 | pr_err("TCP: TFO cipher key error: %d\n", err); | ||
44 | crypto_free_cipher(ctx->tfm); | ||
45 | goto error; | ||
46 | } | ||
47 | memcpy(ctx->key, key, len); | ||
48 | |||
49 | spin_lock(&tcp_fastopen_ctx_lock); | ||
50 | |||
51 | octx = rcu_dereference_protected(tcp_fastopen_ctx, | ||
52 | lockdep_is_held(&tcp_fastopen_ctx_lock)); | ||
53 | rcu_assign_pointer(tcp_fastopen_ctx, ctx); | ||
54 | spin_unlock(&tcp_fastopen_ctx_lock); | ||
55 | |||
56 | if (octx) | ||
57 | call_rcu(&octx->rcu, tcp_fastopen_ctx_free); | ||
58 | return err; | ||
59 | } | ||
60 | |||
61 | /* Computes the fastopen cookie for the peer. | ||
62 | * The peer address is a 128 bits long (pad with zeros for IPv4). | ||
63 | * | ||
64 | * The caller must check foc->len to determine if a valid cookie | ||
65 | * has been generated successfully. | ||
66 | */ | ||
67 | void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) | ||
68 | { | ||
69 | __be32 peer_addr[4] = { addr, 0, 0, 0 }; | ||
70 | struct tcp_fastopen_context *ctx; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | ctx = rcu_dereference(tcp_fastopen_ctx); | ||
74 | if (ctx) { | ||
75 | crypto_cipher_encrypt_one(ctx->tfm, | ||
76 | foc->val, | ||
77 | (__u8 *)peer_addr); | ||
78 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
5 | 82 | ||
6 | static int __init tcp_fastopen_init(void) | 83 | static int __init tcp_fastopen_init(void) |
7 | { | 84 | { |
85 | __u8 key[TCP_FASTOPEN_KEY_LENGTH]; | ||
86 | |||
87 | get_random_bytes(key, sizeof(key)); | ||
88 | tcp_fastopen_reset_cipher(key, sizeof(key)); | ||
8 | return 0; | 89 | return 0; |
9 | } | 90 | } |
10 | 91 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d377f4854cb8..432c36649db3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s | |||
237 | tcp_enter_quickack_mode((struct sock *)tp); | 237 | tcp_enter_quickack_mode((struct sock *)tp); |
238 | break; | 238 | break; |
239 | case INET_ECN_CE: | 239 | case INET_ECN_CE: |
240 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; | 240 | if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { |
241 | /* Better not delay acks, sender can have a very low cwnd */ | ||
242 | tcp_enter_quickack_mode((struct sock *)tp); | ||
243 | tp->ecn_flags |= TCP_ECN_DEMAND_CWR; | ||
244 | } | ||
241 | /* fallinto */ | 245 | /* fallinto */ |
242 | default: | 246 | default: |
243 | tp->ecn_flags |= TCP_ECN_SEEN; | 247 | tp->ecn_flags |= TCP_ECN_SEEN; |
@@ -374,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
374 | /* 4. Try to fixup all. It is made immediately after connection enters | 378 | /* 4. Try to fixup all. It is made immediately after connection enters |
375 | * established state. | 379 | * established state. |
376 | */ | 380 | */ |
377 | static void tcp_init_buffer_space(struct sock *sk) | 381 | void tcp_init_buffer_space(struct sock *sk) |
378 | { | 382 | { |
379 | struct tcp_sock *tp = tcp_sk(sk); | 383 | struct tcp_sock *tp = tcp_sk(sk); |
380 | int maxwin; | 384 | int maxwin; |
@@ -739,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) | |||
739 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 743 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
740 | } | 744 | } |
741 | 745 | ||
742 | /* Set slow start threshold and cwnd not falling to slow start */ | ||
743 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | ||
744 | { | ||
745 | struct tcp_sock *tp = tcp_sk(sk); | ||
746 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
747 | |||
748 | tp->prior_ssthresh = 0; | ||
749 | tp->bytes_acked = 0; | ||
750 | if (icsk->icsk_ca_state < TCP_CA_CWR) { | ||
751 | tp->undo_marker = 0; | ||
752 | if (set_ssthresh) | ||
753 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
754 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
755 | tcp_packets_in_flight(tp) + 1U); | ||
756 | tp->snd_cwnd_cnt = 0; | ||
757 | tp->high_seq = tp->snd_nxt; | ||
758 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
759 | TCP_ECN_queue_cwr(tp); | ||
760 | |||
761 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
762 | } | ||
763 | } | ||
764 | |||
765 | /* | 746 | /* |
766 | * Packet counting of FACK is based on in-order assumptions, therefore TCP | 747 | * Packet counting of FACK is based on in-order assumptions, therefore TCP |
767 | * disables it when reordering is detected | 748 | * disables it when reordering is detected |
@@ -2489,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) | |||
2489 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2470 | tp->snd_cwnd_stamp = tcp_time_stamp; |
2490 | } | 2471 | } |
2491 | 2472 | ||
2492 | /* Lower bound on congestion window is slow start threshold | ||
2493 | * unless congestion avoidance choice decides to overide it. | ||
2494 | */ | ||
2495 | static inline u32 tcp_cwnd_min(const struct sock *sk) | ||
2496 | { | ||
2497 | const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; | ||
2498 | |||
2499 | return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; | ||
2500 | } | ||
2501 | |||
2502 | /* Decrease cwnd each second ack. */ | ||
2503 | static void tcp_cwnd_down(struct sock *sk, int flag) | ||
2504 | { | ||
2505 | struct tcp_sock *tp = tcp_sk(sk); | ||
2506 | int decr = tp->snd_cwnd_cnt + 1; | ||
2507 | |||
2508 | if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || | ||
2509 | (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { | ||
2510 | tp->snd_cwnd_cnt = decr & 1; | ||
2511 | decr >>= 1; | ||
2512 | |||
2513 | if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) | ||
2514 | tp->snd_cwnd -= decr; | ||
2515 | |||
2516 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); | ||
2517 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2518 | } | ||
2519 | } | ||
2520 | |||
2521 | /* Nothing was retransmitted or returned timestamp is less | 2473 | /* Nothing was retransmitted or returned timestamp is less |
2522 | * than timestamp of the first retransmission. | 2474 | * than timestamp of the first retransmission. |
2523 | */ | 2475 | */ |
@@ -2719,24 +2671,80 @@ static bool tcp_try_undo_loss(struct sock *sk) | |||
2719 | return false; | 2671 | return false; |
2720 | } | 2672 | } |
2721 | 2673 | ||
2722 | static inline void tcp_complete_cwr(struct sock *sk) | 2674 | /* The cwnd reduction in CWR and Recovery use the PRR algorithm |
2675 | * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/ | ||
2676 | * It computes the number of packets to send (sndcnt) based on packets newly | ||
2677 | * delivered: | ||
2678 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | ||
2679 | * cwnd reductions across a full RTT. | ||
2680 | * 2) If packets in flight is lower than ssthresh (such as due to excess | ||
2681 | * losses and/or application stalls), do not perform any further cwnd | ||
2682 | * reductions, but instead slow start up to ssthresh. | ||
2683 | */ | ||
2684 | static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) | ||
2723 | { | 2685 | { |
2724 | struct tcp_sock *tp = tcp_sk(sk); | 2686 | struct tcp_sock *tp = tcp_sk(sk); |
2725 | 2687 | ||
2726 | /* Do not moderate cwnd if it's already undone in cwr or recovery. */ | 2688 | tp->high_seq = tp->snd_nxt; |
2727 | if (tp->undo_marker) { | 2689 | tp->bytes_acked = 0; |
2728 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { | 2690 | tp->snd_cwnd_cnt = 0; |
2729 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2691 | tp->prior_cwnd = tp->snd_cwnd; |
2730 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2692 | tp->prr_delivered = 0; |
2731 | } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) { | 2693 | tp->prr_out = 0; |
2732 | /* PRR algorithm. */ | 2694 | if (set_ssthresh) |
2733 | tp->snd_cwnd = tp->snd_ssthresh; | 2695 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); |
2734 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2696 | TCP_ECN_queue_cwr(tp); |
2735 | } | 2697 | } |
2698 | |||
2699 | static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, | ||
2700 | int fast_rexmit) | ||
2701 | { | ||
2702 | struct tcp_sock *tp = tcp_sk(sk); | ||
2703 | int sndcnt = 0; | ||
2704 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | ||
2705 | |||
2706 | tp->prr_delivered += newly_acked_sacked; | ||
2707 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | ||
2708 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | ||
2709 | tp->prior_cwnd - 1; | ||
2710 | sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | ||
2711 | } else { | ||
2712 | sndcnt = min_t(int, delta, | ||
2713 | max_t(int, tp->prr_delivered - tp->prr_out, | ||
2714 | newly_acked_sacked) + 1); | ||
2715 | } | ||
2716 | |||
2717 | sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); | ||
2718 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | ||
2719 | } | ||
2720 | |||
2721 | static inline void tcp_end_cwnd_reduction(struct sock *sk) | ||
2722 | { | ||
2723 | struct tcp_sock *tp = tcp_sk(sk); | ||
2724 | |||
2725 | /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ | ||
2726 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || | ||
2727 | (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { | ||
2728 | tp->snd_cwnd = tp->snd_ssthresh; | ||
2729 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
2736 | } | 2730 | } |
2737 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2731 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
2738 | } | 2732 | } |
2739 | 2733 | ||
2734 | /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ | ||
2735 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | ||
2736 | { | ||
2737 | struct tcp_sock *tp = tcp_sk(sk); | ||
2738 | |||
2739 | tp->prior_ssthresh = 0; | ||
2740 | tp->bytes_acked = 0; | ||
2741 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | ||
2742 | tp->undo_marker = 0; | ||
2743 | tcp_init_cwnd_reduction(sk, set_ssthresh); | ||
2744 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
2745 | } | ||
2746 | } | ||
2747 | |||
2740 | static void tcp_try_keep_open(struct sock *sk) | 2748 | static void tcp_try_keep_open(struct sock *sk) |
2741 | { | 2749 | { |
2742 | struct tcp_sock *tp = tcp_sk(sk); | 2750 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2751,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk) | |||
2751 | } | 2759 | } |
2752 | } | 2760 | } |
2753 | 2761 | ||
2754 | static void tcp_try_to_open(struct sock *sk, int flag) | 2762 | static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) |
2755 | { | 2763 | { |
2756 | struct tcp_sock *tp = tcp_sk(sk); | 2764 | struct tcp_sock *tp = tcp_sk(sk); |
2757 | 2765 | ||
@@ -2768,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) | |||
2768 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2776 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
2769 | tcp_moderate_cwnd(tp); | 2777 | tcp_moderate_cwnd(tp); |
2770 | } else { | 2778 | } else { |
2771 | tcp_cwnd_down(sk, flag); | 2779 | tcp_cwnd_reduction(sk, newly_acked_sacked, 0); |
2772 | } | 2780 | } |
2773 | } | 2781 | } |
2774 | 2782 | ||
@@ -2850,38 +2858,6 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2850 | } | 2858 | } |
2851 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2859 | EXPORT_SYMBOL(tcp_simple_retransmit); |
2852 | 2860 | ||
2853 | /* This function implements the PRR algorithm, specifcally the PRR-SSRB | ||
2854 | * (proportional rate reduction with slow start reduction bound) as described in | ||
2855 | * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. | ||
2856 | * It computes the number of packets to send (sndcnt) based on packets newly | ||
2857 | * delivered: | ||
2858 | * 1) If the packets in flight is larger than ssthresh, PRR spreads the | ||
2859 | * cwnd reductions across a full RTT. | ||
2860 | * 2) If packets in flight is lower than ssthresh (such as due to excess | ||
2861 | * losses and/or application stalls), do not perform any further cwnd | ||
2862 | * reductions, but instead slow start up to ssthresh. | ||
2863 | */ | ||
2864 | static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, | ||
2865 | int fast_rexmit, int flag) | ||
2866 | { | ||
2867 | struct tcp_sock *tp = tcp_sk(sk); | ||
2868 | int sndcnt = 0; | ||
2869 | int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); | ||
2870 | |||
2871 | if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { | ||
2872 | u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + | ||
2873 | tp->prior_cwnd - 1; | ||
2874 | sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; | ||
2875 | } else { | ||
2876 | sndcnt = min_t(int, delta, | ||
2877 | max_t(int, tp->prr_delivered - tp->prr_out, | ||
2878 | newly_acked_sacked) + 1); | ||
2879 | } | ||
2880 | |||
2881 | sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); | ||
2882 | tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; | ||
2883 | } | ||
2884 | |||
2885 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | 2861 | static void tcp_enter_recovery(struct sock *sk, bool ece_ack) |
2886 | { | 2862 | { |
2887 | struct tcp_sock *tp = tcp_sk(sk); | 2863 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2894,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2894 | 2870 | ||
2895 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2871 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2896 | 2872 | ||
2897 | tp->high_seq = tp->snd_nxt; | ||
2898 | tp->prior_ssthresh = 0; | 2873 | tp->prior_ssthresh = 0; |
2899 | tp->undo_marker = tp->snd_una; | 2874 | tp->undo_marker = tp->snd_una; |
2900 | tp->undo_retrans = tp->retrans_out; | 2875 | tp->undo_retrans = tp->retrans_out; |
@@ -2902,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
2902 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 2877 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
2903 | if (!ece_ack) | 2878 | if (!ece_ack) |
2904 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 2879 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
2905 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 2880 | tcp_init_cwnd_reduction(sk, true); |
2906 | TCP_ECN_queue_cwr(tp); | ||
2907 | } | 2881 | } |
2908 | |||
2909 | tp->bytes_acked = 0; | ||
2910 | tp->snd_cwnd_cnt = 0; | ||
2911 | tp->prior_cwnd = tp->snd_cwnd; | ||
2912 | tp->prr_delivered = 0; | ||
2913 | tp->prr_out = 0; | ||
2914 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2882 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2915 | } | 2883 | } |
2916 | 2884 | ||
@@ -2970,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2970 | /* CWR is to be held something *above* high_seq | 2938 | /* CWR is to be held something *above* high_seq |
2971 | * is ACKed for CWR bit to reach receiver. */ | 2939 | * is ACKed for CWR bit to reach receiver. */ |
2972 | if (tp->snd_una != tp->high_seq) { | 2940 | if (tp->snd_una != tp->high_seq) { |
2973 | tcp_complete_cwr(sk); | 2941 | tcp_end_cwnd_reduction(sk); |
2974 | tcp_set_ca_state(sk, TCP_CA_Open); | 2942 | tcp_set_ca_state(sk, TCP_CA_Open); |
2975 | } | 2943 | } |
2976 | break; | 2944 | break; |
@@ -2980,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
2980 | tcp_reset_reno_sack(tp); | 2948 | tcp_reset_reno_sack(tp); |
2981 | if (tcp_try_undo_recovery(sk)) | 2949 | if (tcp_try_undo_recovery(sk)) |
2982 | return; | 2950 | return; |
2983 | tcp_complete_cwr(sk); | 2951 | tcp_end_cwnd_reduction(sk); |
2984 | break; | 2952 | break; |
2985 | } | 2953 | } |
2986 | } | 2954 | } |
@@ -3021,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3021 | tcp_try_undo_dsack(sk); | 2989 | tcp_try_undo_dsack(sk); |
3022 | 2990 | ||
3023 | if (!tcp_time_to_recover(sk, flag)) { | 2991 | if (!tcp_time_to_recover(sk, flag)) { |
3024 | tcp_try_to_open(sk, flag); | 2992 | tcp_try_to_open(sk, flag, newly_acked_sacked); |
3025 | return; | 2993 | return; |
3026 | } | 2994 | } |
3027 | 2995 | ||
@@ -3043,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3043 | 3011 | ||
3044 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) | 3012 | if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) |
3045 | tcp_update_scoreboard(sk, fast_rexmit); | 3013 | tcp_update_scoreboard(sk, fast_rexmit); |
3046 | tp->prr_delivered += newly_acked_sacked; | 3014 | tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); |
3047 | tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); | ||
3048 | tcp_xmit_retransmit_queue(sk); | 3015 | tcp_xmit_retransmit_queue(sk); |
3049 | } | 3016 | } |
3050 | 3017 | ||
@@ -3123,6 +3090,12 @@ void tcp_rearm_rto(struct sock *sk) | |||
3123 | { | 3090 | { |
3124 | struct tcp_sock *tp = tcp_sk(sk); | 3091 | struct tcp_sock *tp = tcp_sk(sk); |
3125 | 3092 | ||
3093 | /* If the retrans timer is currently being used by Fast Open | ||
3094 | * for SYN-ACK retrans purpose, stay put. | ||
3095 | */ | ||
3096 | if (tp->fastopen_rsk) | ||
3097 | return; | ||
3098 | |||
3126 | if (!tp->packets_out) { | 3099 | if (!tp->packets_out) { |
3127 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3100 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3128 | } else { | 3101 | } else { |
@@ -3384,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) | |||
3384 | { | 3357 | { |
3385 | const struct tcp_sock *tp = tcp_sk(sk); | 3358 | const struct tcp_sock *tp = tcp_sk(sk); |
3386 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 3359 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && |
3387 | !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); | 3360 | !tcp_in_cwnd_reduction(sk); |
3388 | } | 3361 | } |
3389 | 3362 | ||
3390 | /* Check that window update is acceptable. | 3363 | /* Check that window update is acceptable. |
@@ -3452,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | |||
3452 | } | 3425 | } |
3453 | 3426 | ||
3454 | /* A conservative spurious RTO response algorithm: reduce cwnd using | 3427 | /* A conservative spurious RTO response algorithm: reduce cwnd using |
3455 | * rate halving and continue in congestion avoidance. | 3428 | * PRR and continue in congestion avoidance. |
3456 | */ | 3429 | */ |
3457 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | 3430 | static void tcp_cwr_spur_to_response(struct sock *sk) |
3458 | { | 3431 | { |
3459 | tcp_enter_cwr(sk, 0); | 3432 | tcp_enter_cwr(sk, 0); |
3460 | } | 3433 | } |
@@ -3462,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) | |||
3462 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | 3435 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) |
3463 | { | 3436 | { |
3464 | if (flag & FLAG_ECE) | 3437 | if (flag & FLAG_ECE) |
3465 | tcp_ratehalving_spur_to_response(sk); | 3438 | tcp_cwr_spur_to_response(sk); |
3466 | else | 3439 | else |
3467 | tcp_undo_cwr(sk, true); | 3440 | tcp_undo_cwr(sk, true); |
3468 | } | 3441 | } |
@@ -3569,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag) | |||
3569 | tcp_conservative_spur_to_response(tp); | 3542 | tcp_conservative_spur_to_response(tp); |
3570 | break; | 3543 | break; |
3571 | default: | 3544 | default: |
3572 | tcp_ratehalving_spur_to_response(sk); | 3545 | tcp_cwr_spur_to_response(sk); |
3573 | break; | 3546 | break; |
3574 | } | 3547 | } |
3575 | tp->frto_counter = 0; | 3548 | tp->frto_counter = 0; |
@@ -4034,7 +4007,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) | |||
4034 | } | 4007 | } |
4035 | 4008 | ||
4036 | /* When we get a reset we do this. */ | 4009 | /* When we get a reset we do this. */ |
4037 | static void tcp_reset(struct sock *sk) | 4010 | void tcp_reset(struct sock *sk) |
4038 | { | 4011 | { |
4039 | /* We want the right error as BSD sees it (and indeed as we do). */ | 4012 | /* We want the right error as BSD sees it (and indeed as we do). */ |
4040 | switch (sk->sk_state) { | 4013 | switch (sk->sk_state) { |
@@ -5740,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5740 | 5713 | ||
5741 | TCP_ECN_rcv_synack(tp, th); | 5714 | TCP_ECN_rcv_synack(tp, th); |
5742 | 5715 | ||
5743 | tp->snd_wl1 = TCP_SKB_CB(skb)->seq; | 5716 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5744 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 5717 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
5745 | 5718 | ||
5746 | /* Ok.. it's good. Set up sequence numbers and | 5719 | /* Ok.. it's good. Set up sequence numbers and |
@@ -5753,7 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5753 | * never scaled. | 5726 | * never scaled. |
5754 | */ | 5727 | */ |
5755 | tp->snd_wnd = ntohs(th->window); | 5728 | tp->snd_wnd = ntohs(th->window); |
5756 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | ||
5757 | 5729 | ||
5758 | if (!tp->rx_opt.wscale_ok) { | 5730 | if (!tp->rx_opt.wscale_ok) { |
5759 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; | 5731 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; |
@@ -5891,7 +5863,9 @@ discard: | |||
5891 | tcp_send_synack(sk); | 5863 | tcp_send_synack(sk); |
5892 | #if 0 | 5864 | #if 0 |
5893 | /* Note, we could accept data and URG from this segment. | 5865 | /* Note, we could accept data and URG from this segment. |
5894 | * There are no obstacles to make this. | 5866 | * There are no obstacles to make this (except that we must |
5867 | * either change tcp_recvmsg() to prevent it from returning data | ||
5868 | * before 3WHS completes per RFC793, or employ TCP Fast Open). | ||
5895 | * | 5869 | * |
5896 | * However, if we ignore data in ACKless segments sometimes, | 5870 | * However, if we ignore data in ACKless segments sometimes, |
5897 | * we have no reasons to accept it sometimes. | 5871 | * we have no reasons to accept it sometimes. |
@@ -5931,6 +5905,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5931 | { | 5905 | { |
5932 | struct tcp_sock *tp = tcp_sk(sk); | 5906 | struct tcp_sock *tp = tcp_sk(sk); |
5933 | struct inet_connection_sock *icsk = inet_csk(sk); | 5907 | struct inet_connection_sock *icsk = inet_csk(sk); |
5908 | struct request_sock *req; | ||
5934 | int queued = 0; | 5909 | int queued = 0; |
5935 | 5910 | ||
5936 | tp->rx_opt.saw_tstamp = 0; | 5911 | tp->rx_opt.saw_tstamp = 0; |
@@ -5986,6 +5961,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5986 | return 0; | 5961 | return 0; |
5987 | } | 5962 | } |
5988 | 5963 | ||
5964 | req = tp->fastopen_rsk; | ||
5965 | if (req != NULL) { | ||
5966 | BUG_ON(sk->sk_state != TCP_SYN_RECV && | ||
5967 | sk->sk_state != TCP_FIN_WAIT1); | ||
5968 | |||
5969 | if (tcp_check_req(sk, skb, req, NULL, true) == NULL) | ||
5970 | goto discard; | ||
5971 | } | ||
5989 | if (!tcp_validate_incoming(sk, skb, th, 0)) | 5972 | if (!tcp_validate_incoming(sk, skb, th, 0)) |
5990 | return 0; | 5973 | return 0; |
5991 | 5974 | ||
@@ -5996,7 +5979,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5996 | switch (sk->sk_state) { | 5979 | switch (sk->sk_state) { |
5997 | case TCP_SYN_RECV: | 5980 | case TCP_SYN_RECV: |
5998 | if (acceptable) { | 5981 | if (acceptable) { |
5999 | tp->copied_seq = tp->rcv_nxt; | 5982 | /* Once we leave TCP_SYN_RECV, we no longer |
5983 | * need req so release it. | ||
5984 | */ | ||
5985 | if (req) { | ||
5986 | tcp_synack_rtt_meas(sk, req); | ||
5987 | tp->total_retrans = req->retrans; | ||
5988 | |||
5989 | reqsk_fastopen_remove(sk, req, false); | ||
5990 | } else { | ||
5991 | /* Make sure socket is routed, for | ||
5992 | * correct metrics. | ||
5993 | */ | ||
5994 | icsk->icsk_af_ops->rebuild_header(sk); | ||
5995 | tcp_init_congestion_control(sk); | ||
5996 | |||
5997 | tcp_mtup_init(sk); | ||
5998 | tcp_init_buffer_space(sk); | ||
5999 | tp->copied_seq = tp->rcv_nxt; | ||
6000 | } | ||
6000 | smp_mb(); | 6001 | smp_mb(); |
6001 | tcp_set_state(sk, TCP_ESTABLISHED); | 6002 | tcp_set_state(sk, TCP_ESTABLISHED); |
6002 | sk->sk_state_change(sk); | 6003 | sk->sk_state_change(sk); |
@@ -6018,23 +6019,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6018 | if (tp->rx_opt.tstamp_ok) | 6019 | if (tp->rx_opt.tstamp_ok) |
6019 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 6020 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
6020 | 6021 | ||
6021 | /* Make sure socket is routed, for | 6022 | if (req) { |
6022 | * correct metrics. | 6023 | /* Re-arm the timer because data may |
6023 | */ | 6024 | * have been sent out. This is similar |
6024 | icsk->icsk_af_ops->rebuild_header(sk); | 6025 | * to the regular data transmission case |
6025 | 6026 | * when new data has just been ack'ed. | |
6026 | tcp_init_metrics(sk); | 6027 | * |
6027 | 6028 | * (TFO) - we could try to be more | |
6028 | tcp_init_congestion_control(sk); | 6029 | * aggressive and retranmitting any data |
6030 | * sooner based on when they were sent | ||
6031 | * out. | ||
6032 | */ | ||
6033 | tcp_rearm_rto(sk); | ||
6034 | } else | ||
6035 | tcp_init_metrics(sk); | ||
6029 | 6036 | ||
6030 | /* Prevent spurious tcp_cwnd_restart() on | 6037 | /* Prevent spurious tcp_cwnd_restart() on |
6031 | * first data packet. | 6038 | * first data packet. |
6032 | */ | 6039 | */ |
6033 | tp->lsndtime = tcp_time_stamp; | 6040 | tp->lsndtime = tcp_time_stamp; |
6034 | 6041 | ||
6035 | tcp_mtup_init(sk); | ||
6036 | tcp_initialize_rcv_mss(sk); | 6042 | tcp_initialize_rcv_mss(sk); |
6037 | tcp_init_buffer_space(sk); | ||
6038 | tcp_fast_path_on(tp); | 6043 | tcp_fast_path_on(tp); |
6039 | } else { | 6044 | } else { |
6040 | return 1; | 6045 | return 1; |
@@ -6042,6 +6047,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6042 | break; | 6047 | break; |
6043 | 6048 | ||
6044 | case TCP_FIN_WAIT1: | 6049 | case TCP_FIN_WAIT1: |
6050 | /* If we enter the TCP_FIN_WAIT1 state and we are a | ||
6051 | * Fast Open socket and this is the first acceptable | ||
6052 | * ACK we have received, this would have acknowledged | ||
6053 | * our SYNACK so stop the SYNACK timer. | ||
6054 | */ | ||
6055 | if (acceptable && req != NULL) { | ||
6056 | /* We no longer need the request sock. */ | ||
6057 | reqsk_fastopen_remove(sk, req, false); | ||
6058 | tcp_rearm_rto(sk); | ||
6059 | } | ||
6045 | if (tp->snd_una == tp->write_seq) { | 6060 | if (tp->snd_una == tp->write_seq) { |
6046 | struct dst_entry *dst; | 6061 | struct dst_entry *dst; |
6047 | 6062 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index be23a0b7b89e..75735c9a6a9d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
352 | const int code = icmp_hdr(icmp_skb)->code; | 352 | const int code = icmp_hdr(icmp_skb)->code; |
353 | struct sock *sk; | 353 | struct sock *sk; |
354 | struct sk_buff *skb; | 354 | struct sk_buff *skb; |
355 | struct request_sock *req; | ||
355 | __u32 seq; | 356 | __u32 seq; |
356 | __u32 remaining; | 357 | __u32 remaining; |
357 | int err; | 358 | int err; |
@@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
394 | 395 | ||
395 | icsk = inet_csk(sk); | 396 | icsk = inet_csk(sk); |
396 | tp = tcp_sk(sk); | 397 | tp = tcp_sk(sk); |
398 | req = tp->fastopen_rsk; | ||
397 | seq = ntohl(th->seq); | 399 | seq = ntohl(th->seq); |
398 | if (sk->sk_state != TCP_LISTEN && | 400 | if (sk->sk_state != TCP_LISTEN && |
399 | !between(seq, tp->snd_una, tp->snd_nxt)) { | 401 | !between(seq, tp->snd_una, tp->snd_nxt) && |
402 | (req == NULL || seq != tcp_rsk(req)->snt_isn)) { | ||
403 | /* For a Fast Open socket, allow seq to be snt_isn. */ | ||
400 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); | 404 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
401 | goto out; | 405 | goto out; |
402 | } | 406 | } |
@@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
435 | !icsk->icsk_backoff) | 439 | !icsk->icsk_backoff) |
436 | break; | 440 | break; |
437 | 441 | ||
442 | /* XXX (TFO) - revisit the following logic for TFO */ | ||
443 | |||
438 | if (sock_owned_by_user(sk)) | 444 | if (sock_owned_by_user(sk)) |
439 | break; | 445 | break; |
440 | 446 | ||
@@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
466 | goto out; | 472 | goto out; |
467 | } | 473 | } |
468 | 474 | ||
475 | /* XXX (TFO) - if it's a TFO socket and has been accepted, rather | ||
476 | * than following the TCP_SYN_RECV case and closing the socket, | ||
477 | * we ignore the ICMP error and keep trying like a fully established | ||
478 | * socket. Is this the right thing to do? | ||
479 | */ | ||
480 | if (req && req->sk == NULL) | ||
481 | goto out; | ||
482 | |||
469 | switch (sk->sk_state) { | 483 | switch (sk->sk_state) { |
470 | struct request_sock *req, **prev; | 484 | struct request_sock *req, **prev; |
471 | case TCP_LISTEN: | 485 | case TCP_LISTEN: |
@@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
498 | 512 | ||
499 | case TCP_SYN_SENT: | 513 | case TCP_SYN_SENT: |
500 | case TCP_SYN_RECV: /* Cannot happen. | 514 | case TCP_SYN_RECV: /* Cannot happen. |
501 | It can f.e. if SYNs crossed. | 515 | It can f.e. if SYNs crossed, |
516 | or Fast Open. | ||
502 | */ | 517 | */ |
503 | if (!sock_owned_by_user(sk)) { | 518 | if (!sock_owned_by_user(sk)) { |
504 | sk->sk_err = err; | 519 | sk->sk_err = err; |
@@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | |||
809 | static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | 824 | static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, |
810 | struct request_sock *req) | 825 | struct request_sock *req) |
811 | { | 826 | { |
812 | tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, | 827 | /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV |
813 | tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, | 828 | * sk->sk_state == TCP_SYN_RECV -> for Fast Open. |
829 | */ | ||
830 | tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? | ||
831 | tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, | ||
832 | tcp_rsk(req)->rcv_nxt, req->rcv_wnd, | ||
814 | req->ts_recent, | 833 | req->ts_recent, |
815 | 0, | 834 | 0, |
816 | tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, | 835 | tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, |
@@ -839,7 +858,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
839 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | 858 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
840 | return -1; | 859 | return -1; |
841 | 860 | ||
842 | skb = tcp_make_synack(sk, dst, req, rvp); | 861 | skb = tcp_make_synack(sk, dst, req, rvp, NULL); |
843 | 862 | ||
844 | if (skb) { | 863 | if (skb) { |
845 | __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); | 864 | __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); |
@@ -849,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
849 | ireq->rmt_addr, | 868 | ireq->rmt_addr, |
850 | ireq->opt); | 869 | ireq->opt); |
851 | err = net_xmit_eval(err); | 870 | err = net_xmit_eval(err); |
871 | if (!tcp_rsk(req)->snt_synack && !err) | ||
872 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
852 | } | 873 | } |
853 | 874 | ||
854 | return err; | 875 | return err; |
@@ -904,8 +925,7 @@ EXPORT_SYMBOL(tcp_syn_flood_action); | |||
904 | /* | 925 | /* |
905 | * Save and compile IPv4 options into the request_sock if needed. | 926 | * Save and compile IPv4 options into the request_sock if needed. |
906 | */ | 927 | */ |
907 | static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, | 928 | static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) |
908 | struct sk_buff *skb) | ||
909 | { | 929 | { |
910 | const struct ip_options *opt = &(IPCB(skb)->opt); | 930 | const struct ip_options *opt = &(IPCB(skb)->opt); |
911 | struct ip_options_rcu *dopt = NULL; | 931 | struct ip_options_rcu *dopt = NULL; |
@@ -1272,6 +1292,182 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | |||
1272 | }; | 1292 | }; |
1273 | #endif | 1293 | #endif |
1274 | 1294 | ||
1295 | static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, | ||
1296 | struct request_sock *req, | ||
1297 | struct tcp_fastopen_cookie *foc, | ||
1298 | struct tcp_fastopen_cookie *valid_foc) | ||
1299 | { | ||
1300 | bool skip_cookie = false; | ||
1301 | struct fastopen_queue *fastopenq; | ||
1302 | |||
1303 | if (likely(!fastopen_cookie_present(foc))) { | ||
1304 | /* See include/net/tcp.h for the meaning of these knobs */ | ||
1305 | if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || | ||
1306 | ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && | ||
1307 | (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) | ||
1308 | skip_cookie = true; /* no cookie to validate */ | ||
1309 | else | ||
1310 | return false; | ||
1311 | } | ||
1312 | fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; | ||
1313 | /* A FO option is present; bump the counter. */ | ||
1314 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); | ||
1315 | |||
1316 | /* Make sure the listener has enabled fastopen, and we don't | ||
1317 | * exceed the max # of pending TFO requests allowed before trying | ||
1318 | * to validating the cookie in order to avoid burning CPU cycles | ||
1319 | * unnecessarily. | ||
1320 | * | ||
1321 | * XXX (TFO) - The implication of checking the max_qlen before | ||
1322 | * processing a cookie request is that clients can't differentiate | ||
1323 | * between qlen overflow causing Fast Open to be disabled | ||
1324 | * temporarily vs a server not supporting Fast Open at all. | ||
1325 | */ | ||
1326 | if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || | ||
1327 | fastopenq == NULL || fastopenq->max_qlen == 0) | ||
1328 | return false; | ||
1329 | |||
1330 | if (fastopenq->qlen >= fastopenq->max_qlen) { | ||
1331 | struct request_sock *req1; | ||
1332 | spin_lock(&fastopenq->lock); | ||
1333 | req1 = fastopenq->rskq_rst_head; | ||
1334 | if ((req1 == NULL) || time_after(req1->expires, jiffies)) { | ||
1335 | spin_unlock(&fastopenq->lock); | ||
1336 | NET_INC_STATS_BH(sock_net(sk), | ||
1337 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); | ||
1338 | /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ | ||
1339 | foc->len = -1; | ||
1340 | return false; | ||
1341 | } | ||
1342 | fastopenq->rskq_rst_head = req1->dl_next; | ||
1343 | fastopenq->qlen--; | ||
1344 | spin_unlock(&fastopenq->lock); | ||
1345 | reqsk_free(req1); | ||
1346 | } | ||
1347 | if (skip_cookie) { | ||
1348 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1349 | return true; | ||
1350 | } | ||
1351 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { | ||
1352 | if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { | ||
1353 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); | ||
1354 | if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || | ||
1355 | memcmp(&foc->val[0], &valid_foc->val[0], | ||
1356 | TCP_FASTOPEN_COOKIE_SIZE) != 0) | ||
1357 | return false; | ||
1358 | valid_foc->len = -1; | ||
1359 | } | ||
1360 | /* Acknowledge the data received from the peer. */ | ||
1361 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1362 | return true; | ||
1363 | } else if (foc->len == 0) { /* Client requesting a cookie */ | ||
1364 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); | ||
1365 | NET_INC_STATS_BH(sock_net(sk), | ||
1366 | LINUX_MIB_TCPFASTOPENCOOKIEREQD); | ||
1367 | } else { | ||
1368 | /* Client sent a cookie with wrong size. Treat it | ||
1369 | * the same as invalid and return a valid one. | ||
1370 | */ | ||
1371 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); | ||
1372 | } | ||
1373 | return false; | ||
1374 | } | ||
1375 | |||
1376 | static int tcp_v4_conn_req_fastopen(struct sock *sk, | ||
1377 | struct sk_buff *skb, | ||
1378 | struct sk_buff *skb_synack, | ||
1379 | struct request_sock *req, | ||
1380 | struct request_values *rvp) | ||
1381 | { | ||
1382 | struct tcp_sock *tp = tcp_sk(sk); | ||
1383 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | ||
1384 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
1385 | struct sock *child; | ||
1386 | int err; | ||
1387 | |||
1388 | req->retrans = 0; | ||
1389 | req->sk = NULL; | ||
1390 | |||
1391 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | ||
1392 | if (child == NULL) { | ||
1393 | NET_INC_STATS_BH(sock_net(sk), | ||
1394 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
1395 | kfree_skb(skb_synack); | ||
1396 | return -1; | ||
1397 | } | ||
1398 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, | ||
1399 | ireq->rmt_addr, ireq->opt); | ||
1400 | err = net_xmit_eval(err); | ||
1401 | if (!err) | ||
1402 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1403 | /* XXX (TFO) - is it ok to ignore error and continue? */ | ||
1404 | |||
1405 | spin_lock(&queue->fastopenq->lock); | ||
1406 | queue->fastopenq->qlen++; | ||
1407 | spin_unlock(&queue->fastopenq->lock); | ||
1408 | |||
1409 | /* Initialize the child socket. Have to fix some values to take | ||
1410 | * into account the child is a Fast Open socket and is created | ||
1411 | * only out of the bits carried in the SYN packet. | ||
1412 | */ | ||
1413 | tp = tcp_sk(child); | ||
1414 | |||
1415 | tp->fastopen_rsk = req; | ||
1416 | /* Do a hold on the listner sk so that if the listener is being | ||
1417 | * closed, the child that has been accepted can live on and still | ||
1418 | * access listen_lock. | ||
1419 | */ | ||
1420 | sock_hold(sk); | ||
1421 | tcp_rsk(req)->listener = sk; | ||
1422 | |||
1423 | /* RFC1323: The window in SYN & SYN/ACK segments is never | ||
1424 | * scaled. So correct it appropriately. | ||
1425 | */ | ||
1426 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); | ||
1427 | |||
1428 | /* Activate the retrans timer so that SYNACK can be retransmitted. | ||
1429 | * The request socket is not added to the SYN table of the parent | ||
1430 | * because it's been added to the accept queue directly. | ||
1431 | */ | ||
1432 | inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, | ||
1433 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | ||
1434 | |||
1435 | /* Add the child socket directly into the accept queue */ | ||
1436 | inet_csk_reqsk_queue_add(sk, req, child); | ||
1437 | |||
1438 | /* Now finish processing the fastopen child socket. */ | ||
1439 | inet_csk(child)->icsk_af_ops->rebuild_header(child); | ||
1440 | tcp_init_congestion_control(child); | ||
1441 | tcp_mtup_init(child); | ||
1442 | tcp_init_buffer_space(child); | ||
1443 | tcp_init_metrics(child); | ||
1444 | |||
1445 | /* Queue the data carried in the SYN packet. We need to first | ||
1446 | * bump skb's refcnt because the caller will attempt to free it. | ||
1447 | * | ||
1448 | * XXX (TFO) - we honor a zero-payload TFO request for now. | ||
1449 | * (Any reason not to?) | ||
1450 | */ | ||
1451 | if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { | ||
1452 | /* Don't queue the skb if there is no payload in SYN. | ||
1453 | * XXX (TFO) - How about SYN+FIN? | ||
1454 | */ | ||
1455 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1456 | } else { | ||
1457 | skb = skb_get(skb); | ||
1458 | skb_dst_drop(skb); | ||
1459 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); | ||
1460 | skb_set_owner_r(skb, child); | ||
1461 | __skb_queue_tail(&child->sk_receive_queue, skb); | ||
1462 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1463 | } | ||
1464 | sk->sk_data_ready(sk, 0); | ||
1465 | bh_unlock_sock(child); | ||
1466 | sock_put(child); | ||
1467 | WARN_ON(req->sk == NULL); | ||
1468 | return 0; | ||
1469 | } | ||
1470 | |||
1275 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1471 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1276 | { | 1472 | { |
1277 | struct tcp_extend_values tmp_ext; | 1473 | struct tcp_extend_values tmp_ext; |
@@ -1285,6 +1481,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1285 | __be32 daddr = ip_hdr(skb)->daddr; | 1481 | __be32 daddr = ip_hdr(skb)->daddr; |
1286 | __u32 isn = TCP_SKB_CB(skb)->when; | 1482 | __u32 isn = TCP_SKB_CB(skb)->when; |
1287 | bool want_cookie = false; | 1483 | bool want_cookie = false; |
1484 | struct flowi4 fl4; | ||
1485 | struct tcp_fastopen_cookie foc = { .len = -1 }; | ||
1486 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | ||
1487 | struct sk_buff *skb_synack; | ||
1488 | int do_fastopen; | ||
1288 | 1489 | ||
1289 | /* Never answer to SYNs send to broadcast or multicast */ | 1490 | /* Never answer to SYNs send to broadcast or multicast */ |
1290 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1491 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
@@ -1319,7 +1520,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1319 | tcp_clear_options(&tmp_opt); | 1520 | tcp_clear_options(&tmp_opt); |
1320 | tmp_opt.mss_clamp = TCP_MSS_DEFAULT; | 1521 | tmp_opt.mss_clamp = TCP_MSS_DEFAULT; |
1321 | tmp_opt.user_mss = tp->rx_opt.user_mss; | 1522 | tmp_opt.user_mss = tp->rx_opt.user_mss; |
1322 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); | 1523 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, |
1524 | want_cookie ? NULL : &foc); | ||
1323 | 1525 | ||
1324 | if (tmp_opt.cookie_plus > 0 && | 1526 | if (tmp_opt.cookie_plus > 0 && |
1325 | tmp_opt.saw_tstamp && | 1527 | tmp_opt.saw_tstamp && |
@@ -1365,7 +1567,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1365 | ireq->loc_addr = daddr; | 1567 | ireq->loc_addr = daddr; |
1366 | ireq->rmt_addr = saddr; | 1568 | ireq->rmt_addr = saddr; |
1367 | ireq->no_srccheck = inet_sk(sk)->transparent; | 1569 | ireq->no_srccheck = inet_sk(sk)->transparent; |
1368 | ireq->opt = tcp_v4_save_options(sk, skb); | 1570 | ireq->opt = tcp_v4_save_options(skb); |
1369 | 1571 | ||
1370 | if (security_inet_conn_request(sk, skb, req)) | 1572 | if (security_inet_conn_request(sk, skb, req)) |
1371 | goto drop_and_free; | 1573 | goto drop_and_free; |
@@ -1377,8 +1579,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1377 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1579 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
1378 | req->cookie_ts = tmp_opt.tstamp_ok; | 1580 | req->cookie_ts = tmp_opt.tstamp_ok; |
1379 | } else if (!isn) { | 1581 | } else if (!isn) { |
1380 | struct flowi4 fl4; | ||
1381 | |||
1382 | /* VJ's idea. We save last timestamp seen | 1582 | /* VJ's idea. We save last timestamp seen |
1383 | * from the destination in peer table, when entering | 1583 | * from the destination in peer table, when entering |
1384 | * state TIME-WAIT, and check against it before | 1584 | * state TIME-WAIT, and check against it before |
@@ -1417,16 +1617,54 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1417 | isn = tcp_v4_init_sequence(skb); | 1617 | isn = tcp_v4_init_sequence(skb); |
1418 | } | 1618 | } |
1419 | tcp_rsk(req)->snt_isn = isn; | 1619 | tcp_rsk(req)->snt_isn = isn; |
1420 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1421 | 1620 | ||
1422 | if (tcp_v4_send_synack(sk, dst, req, | 1621 | if (dst == NULL) { |
1423 | (struct request_values *)&tmp_ext, | 1622 | dst = inet_csk_route_req(sk, &fl4, req); |
1424 | skb_get_queue_mapping(skb), | 1623 | if (dst == NULL) |
1425 | want_cookie) || | 1624 | goto drop_and_free; |
1426 | want_cookie) | 1625 | } |
1626 | do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); | ||
1627 | |||
1628 | /* We don't call tcp_v4_send_synack() directly because we need | ||
1629 | * to make sure a child socket can be created successfully before | ||
1630 | * sending back synack! | ||
1631 | * | ||
1632 | * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() | ||
1633 | * (or better yet, call tcp_send_synack() in the child context | ||
1634 | * directly, but will have to fix bunch of other code first) | ||
1635 | * after syn_recv_sock() except one will need to first fix the | ||
1636 | * latter to remove its dependency on the current implementation | ||
1637 | * of tcp_v4_send_synack()->tcp_select_initial_window(). | ||
1638 | */ | ||
1639 | skb_synack = tcp_make_synack(sk, dst, req, | ||
1640 | (struct request_values *)&tmp_ext, | ||
1641 | fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); | ||
1642 | |||
1643 | if (skb_synack) { | ||
1644 | __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); | ||
1645 | skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); | ||
1646 | } else | ||
1647 | goto drop_and_free; | ||
1648 | |||
1649 | if (likely(!do_fastopen)) { | ||
1650 | int err; | ||
1651 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, | ||
1652 | ireq->rmt_addr, ireq->opt); | ||
1653 | err = net_xmit_eval(err); | ||
1654 | if (err || want_cookie) | ||
1655 | goto drop_and_free; | ||
1656 | |||
1657 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1658 | tcp_rsk(req)->listener = NULL; | ||
1659 | /* Add the request_sock to the SYN table */ | ||
1660 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1661 | if (fastopen_cookie_present(&foc) && foc.len != 0) | ||
1662 | NET_INC_STATS_BH(sock_net(sk), | ||
1663 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
1664 | } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, | ||
1665 | (struct request_values *)&tmp_ext)) | ||
1427 | goto drop_and_free; | 1666 | goto drop_and_free; |
1428 | 1667 | ||
1429 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1430 | return 0; | 1668 | return 0; |
1431 | 1669 | ||
1432 | drop_and_release: | 1670 | drop_and_release: |
@@ -1500,9 +1738,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1500 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | 1738 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; |
1501 | 1739 | ||
1502 | tcp_initialize_rcv_mss(newsk); | 1740 | tcp_initialize_rcv_mss(newsk); |
1503 | if (tcp_rsk(req)->snt_synack) | 1741 | tcp_synack_rtt_meas(newsk, req); |
1504 | tcp_valid_rtt_meas(newsk, | ||
1505 | tcp_time_stamp - tcp_rsk(req)->snt_synack); | ||
1506 | newtp->total_retrans = req->retrans; | 1742 | newtp->total_retrans = req->retrans; |
1507 | 1743 | ||
1508 | #ifdef CONFIG_TCP_MD5SIG | 1744 | #ifdef CONFIG_TCP_MD5SIG |
@@ -1554,7 +1790,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1554 | struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, | 1790 | struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, |
1555 | iph->saddr, iph->daddr); | 1791 | iph->saddr, iph->daddr); |
1556 | if (req) | 1792 | if (req) |
1557 | return tcp_check_req(sk, skb, req, prev); | 1793 | return tcp_check_req(sk, skb, req, prev, false); |
1558 | 1794 | ||
1559 | nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, | 1795 | nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, |
1560 | th->source, iph->daddr, th->dest, inet_iif(skb)); | 1796 | th->source, iph->daddr, th->dest, inet_iif(skb)); |
@@ -1963,20 +2199,13 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1963 | if (inet_csk(sk)->icsk_bind_hash) | 2199 | if (inet_csk(sk)->icsk_bind_hash) |
1964 | inet_put_port(sk); | 2200 | inet_put_port(sk); |
1965 | 2201 | ||
1966 | /* | ||
1967 | * If sendmsg cached page exists, toss it. | ||
1968 | */ | ||
1969 | if (sk->sk_sndmsg_page) { | ||
1970 | __free_page(sk->sk_sndmsg_page); | ||
1971 | sk->sk_sndmsg_page = NULL; | ||
1972 | } | ||
1973 | |||
1974 | /* TCP Cookie Transactions */ | 2202 | /* TCP Cookie Transactions */ |
1975 | if (tp->cookie_values != NULL) { | 2203 | if (tp->cookie_values != NULL) { |
1976 | kref_put(&tp->cookie_values->kref, | 2204 | kref_put(&tp->cookie_values->kref, |
1977 | tcp_cookie_values_release); | 2205 | tcp_cookie_values_release); |
1978 | tp->cookie_values = NULL; | 2206 | tp->cookie_values = NULL; |
1979 | } | 2207 | } |
2208 | BUG_ON(tp->fastopen_rsk != NULL); | ||
1980 | 2209 | ||
1981 | /* If socket is aborted during connect operation */ | 2210 | /* If socket is aborted during connect operation */ |
1982 | tcp_free_fastopen_req(tp); | 2211 | tcp_free_fastopen_req(tp); |
@@ -2396,7 +2625,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, | |||
2396 | struct seq_file *f, int i, kuid_t uid, int *len) | 2625 | struct seq_file *f, int i, kuid_t uid, int *len) |
2397 | { | 2626 | { |
2398 | const struct inet_request_sock *ireq = inet_rsk(req); | 2627 | const struct inet_request_sock *ireq = inet_rsk(req); |
2399 | int ttd = req->expires - jiffies; | 2628 | long delta = req->expires - jiffies; |
2400 | 2629 | ||
2401 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 2630 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" |
2402 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", | 2631 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", |
@@ -2408,7 +2637,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, | |||
2408 | TCP_SYN_RECV, | 2637 | TCP_SYN_RECV, |
2409 | 0, 0, /* could print option size, but that is af dependent. */ | 2638 | 0, 0, /* could print option size, but that is af dependent. */ |
2410 | 1, /* timers active (only the expire timer) */ | 2639 | 1, /* timers active (only the expire timer) */ |
2411 | jiffies_to_clock_t(ttd), | 2640 | jiffies_delta_to_clock_t(delta), |
2412 | req->retrans, | 2641 | req->retrans, |
2413 | from_kuid_munged(seq_user_ns(f), uid), | 2642 | from_kuid_munged(seq_user_ns(f), uid), |
2414 | 0, /* non standard timer */ | 2643 | 0, /* non standard timer */ |
@@ -2425,6 +2654,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2425 | const struct tcp_sock *tp = tcp_sk(sk); | 2654 | const struct tcp_sock *tp = tcp_sk(sk); |
2426 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2655 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2427 | const struct inet_sock *inet = inet_sk(sk); | 2656 | const struct inet_sock *inet = inet_sk(sk); |
2657 | struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; | ||
2428 | __be32 dest = inet->inet_daddr; | 2658 | __be32 dest = inet->inet_daddr; |
2429 | __be32 src = inet->inet_rcv_saddr; | 2659 | __be32 src = inet->inet_rcv_saddr; |
2430 | __u16 destp = ntohs(inet->inet_dport); | 2660 | __u16 destp = ntohs(inet->inet_dport); |
@@ -2459,7 +2689,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2459 | tp->write_seq - tp->snd_una, | 2689 | tp->write_seq - tp->snd_una, |
2460 | rx_queue, | 2690 | rx_queue, |
2461 | timer_active, | 2691 | timer_active, |
2462 | jiffies_to_clock_t(timer_expires - jiffies), | 2692 | jiffies_delta_to_clock_t(timer_expires - jiffies), |
2463 | icsk->icsk_retransmits, | 2693 | icsk->icsk_retransmits, |
2464 | from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), | 2694 | from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), |
2465 | icsk->icsk_probes_out, | 2695 | icsk->icsk_probes_out, |
@@ -2469,7 +2699,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) | |||
2469 | jiffies_to_clock_t(icsk->icsk_ack.ato), | 2699 | jiffies_to_clock_t(icsk->icsk_ack.ato), |
2470 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, | 2700 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, |
2471 | tp->snd_cwnd, | 2701 | tp->snd_cwnd, |
2472 | tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, | 2702 | sk->sk_state == TCP_LISTEN ? |
2703 | (fastopenq ? fastopenq->max_qlen : 0) : | ||
2704 | (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), | ||
2473 | len); | 2705 | len); |
2474 | } | 2706 | } |
2475 | 2707 | ||
@@ -2478,10 +2710,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, | |||
2478 | { | 2710 | { |
2479 | __be32 dest, src; | 2711 | __be32 dest, src; |
2480 | __u16 destp, srcp; | 2712 | __u16 destp, srcp; |
2481 | int ttd = tw->tw_ttd - jiffies; | 2713 | long delta = tw->tw_ttd - jiffies; |
2482 | |||
2483 | if (ttd < 0) | ||
2484 | ttd = 0; | ||
2485 | 2714 | ||
2486 | dest = tw->tw_daddr; | 2715 | dest = tw->tw_daddr; |
2487 | src = tw->tw_rcv_saddr; | 2716 | src = tw->tw_rcv_saddr; |
@@ -2491,7 +2720,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, | |||
2491 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 2720 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" |
2492 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", | 2721 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", |
2493 | i, src, srcp, dest, destp, tw->tw_substate, 0, 0, | 2722 | i, src, srcp, dest, destp, tw->tw_substate, 0, 0, |
2494 | 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, | 2723 | 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, |
2495 | atomic_read(&tw->tw_refcnt), tw, len); | 2724 | atomic_read(&tw->tw_refcnt), tw, len); |
2496 | } | 2725 | } |
2497 | 2726 | ||
@@ -2574,6 +2803,8 @@ void tcp4_proc_exit(void) | |||
2574 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | 2803 | struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) |
2575 | { | 2804 | { |
2576 | const struct iphdr *iph = skb_gro_network_header(skb); | 2805 | const struct iphdr *iph = skb_gro_network_header(skb); |
2806 | __wsum wsum; | ||
2807 | __sum16 sum; | ||
2577 | 2808 | ||
2578 | switch (skb->ip_summed) { | 2809 | switch (skb->ip_summed) { |
2579 | case CHECKSUM_COMPLETE: | 2810 | case CHECKSUM_COMPLETE: |
@@ -2582,11 +2813,22 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2582 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 2813 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
2583 | break; | 2814 | break; |
2584 | } | 2815 | } |
2585 | 2816 | flush: | |
2586 | /* fall through */ | ||
2587 | case CHECKSUM_NONE: | ||
2588 | NAPI_GRO_CB(skb)->flush = 1; | 2817 | NAPI_GRO_CB(skb)->flush = 1; |
2589 | return NULL; | 2818 | return NULL; |
2819 | |||
2820 | case CHECKSUM_NONE: | ||
2821 | wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
2822 | skb_gro_len(skb), IPPROTO_TCP, 0); | ||
2823 | sum = csum_fold(skb_checksum(skb, | ||
2824 | skb_gro_offset(skb), | ||
2825 | skb_gro_len(skb), | ||
2826 | wsum)); | ||
2827 | if (sum) | ||
2828 | goto flush; | ||
2829 | |||
2830 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
2831 | break; | ||
2590 | } | 2832 | } |
2591 | 2833 | ||
2592 | return tcp_gro_receive(head, skb); | 2834 | return tcp_gro_receive(head, skb); |
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0abe67bb4d3a..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/tcp.h> | 9 | #include <linux/tcp.h> |
10 | #include <linux/hash.h> | 10 | #include <linux/hash.h> |
11 | #include <linux/tcp_metrics.h> | ||
11 | 12 | ||
12 | #include <net/inet_connection_sock.h> | 13 | #include <net/inet_connection_sock.h> |
13 | #include <net/net_namespace.h> | 14 | #include <net/net_namespace.h> |
@@ -17,20 +18,10 @@ | |||
17 | #include <net/ipv6.h> | 18 | #include <net/ipv6.h> |
18 | #include <net/dst.h> | 19 | #include <net/dst.h> |
19 | #include <net/tcp.h> | 20 | #include <net/tcp.h> |
21 | #include <net/genetlink.h> | ||
20 | 22 | ||
21 | int sysctl_tcp_nometrics_save __read_mostly; | 23 | int sysctl_tcp_nometrics_save __read_mostly; |
22 | 24 | ||
23 | enum tcp_metric_index { | ||
24 | TCP_METRIC_RTT, | ||
25 | TCP_METRIC_RTTVAR, | ||
26 | TCP_METRIC_SSTHRESH, | ||
27 | TCP_METRIC_CWND, | ||
28 | TCP_METRIC_REORDERING, | ||
29 | |||
30 | /* Always last. */ | ||
31 | TCP_METRIC_MAX, | ||
32 | }; | ||
33 | |||
34 | struct tcp_fastopen_metrics { | 25 | struct tcp_fastopen_metrics { |
35 | u16 mss; | 26 | u16 mss; |
36 | u16 syn_loss:10; /* Recurring Fast Open SYN losses */ | 27 | u16 syn_loss:10; /* Recurring Fast Open SYN losses */ |
@@ -45,8 +36,10 @@ struct tcp_metrics_block { | |||
45 | u32 tcpm_ts; | 36 | u32 tcpm_ts; |
46 | u32 tcpm_ts_stamp; | 37 | u32 tcpm_ts_stamp; |
47 | u32 tcpm_lock; | 38 | u32 tcpm_lock; |
48 | u32 tcpm_vals[TCP_METRIC_MAX]; | 39 | u32 tcpm_vals[TCP_METRIC_MAX + 1]; |
49 | struct tcp_fastopen_metrics tcpm_fastopen; | 40 | struct tcp_fastopen_metrics tcpm_fastopen; |
41 | |||
42 | struct rcu_head rcu_head; | ||
50 | }; | 43 | }; |
51 | 44 | ||
52 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, | 45 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, |
@@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, | |||
690 | rcu_read_unlock(); | 683 | rcu_read_unlock(); |
691 | } | 684 | } |
692 | 685 | ||
686 | static struct genl_family tcp_metrics_nl_family = { | ||
687 | .id = GENL_ID_GENERATE, | ||
688 | .hdrsize = 0, | ||
689 | .name = TCP_METRICS_GENL_NAME, | ||
690 | .version = TCP_METRICS_GENL_VERSION, | ||
691 | .maxattr = TCP_METRICS_ATTR_MAX, | ||
692 | .netnsok = true, | ||
693 | }; | ||
694 | |||
695 | static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { | ||
696 | [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, | ||
697 | [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, | ||
698 | .len = sizeof(struct in6_addr), }, | ||
699 | /* Following attributes are not received for GET/DEL, | ||
700 | * we keep them for reference | ||
701 | */ | ||
702 | #if 0 | ||
703 | [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, }, | ||
704 | [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, }, | ||
705 | [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, }, | ||
706 | [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, }, | ||
707 | [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, }, | ||
708 | [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, }, | ||
709 | [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, }, | ||
710 | [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY, | ||
711 | .len = TCP_FASTOPEN_COOKIE_MAX, }, | ||
712 | #endif | ||
713 | }; | ||
714 | |||
715 | /* Add attributes, caller cancels its header on failure */ | ||
716 | static int tcp_metrics_fill_info(struct sk_buff *msg, | ||
717 | struct tcp_metrics_block *tm) | ||
718 | { | ||
719 | struct nlattr *nest; | ||
720 | int i; | ||
721 | |||
722 | switch (tm->tcpm_addr.family) { | ||
723 | case AF_INET: | ||
724 | if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, | ||
725 | tm->tcpm_addr.addr.a4) < 0) | ||
726 | goto nla_put_failure; | ||
727 | break; | ||
728 | case AF_INET6: | ||
729 | if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, | ||
730 | tm->tcpm_addr.addr.a6) < 0) | ||
731 | goto nla_put_failure; | ||
732 | break; | ||
733 | default: | ||
734 | return -EAFNOSUPPORT; | ||
735 | } | ||
736 | |||
737 | if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, | ||
738 | jiffies - tm->tcpm_stamp) < 0) | ||
739 | goto nla_put_failure; | ||
740 | if (tm->tcpm_ts_stamp) { | ||
741 | if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, | ||
742 | (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) | ||
743 | goto nla_put_failure; | ||
744 | if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, | ||
745 | tm->tcpm_ts) < 0) | ||
746 | goto nla_put_failure; | ||
747 | } | ||
748 | |||
749 | { | ||
750 | int n = 0; | ||
751 | |||
752 | nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); | ||
753 | if (!nest) | ||
754 | goto nla_put_failure; | ||
755 | for (i = 0; i < TCP_METRIC_MAX + 1; i++) { | ||
756 | if (!tm->tcpm_vals[i]) | ||
757 | continue; | ||
758 | if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) | ||
759 | goto nla_put_failure; | ||
760 | n++; | ||
761 | } | ||
762 | if (n) | ||
763 | nla_nest_end(msg, nest); | ||
764 | else | ||
765 | nla_nest_cancel(msg, nest); | ||
766 | } | ||
767 | |||
768 | { | ||
769 | struct tcp_fastopen_metrics tfom_copy[1], *tfom; | ||
770 | unsigned int seq; | ||
771 | |||
772 | do { | ||
773 | seq = read_seqbegin(&fastopen_seqlock); | ||
774 | tfom_copy[0] = tm->tcpm_fastopen; | ||
775 | } while (read_seqretry(&fastopen_seqlock, seq)); | ||
776 | |||
777 | tfom = tfom_copy; | ||
778 | if (tfom->mss && | ||
779 | nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS, | ||
780 | tfom->mss) < 0) | ||
781 | goto nla_put_failure; | ||
782 | if (tfom->syn_loss && | ||
783 | (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, | ||
784 | tfom->syn_loss) < 0 || | ||
785 | nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, | ||
786 | jiffies - tfom->last_syn_loss) < 0)) | ||
787 | goto nla_put_failure; | ||
788 | if (tfom->cookie.len > 0 && | ||
789 | nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, | ||
790 | tfom->cookie.len, tfom->cookie.val) < 0) | ||
791 | goto nla_put_failure; | ||
792 | } | ||
793 | |||
794 | return 0; | ||
795 | |||
796 | nla_put_failure: | ||
797 | return -EMSGSIZE; | ||
798 | } | ||
799 | |||
800 | static int tcp_metrics_dump_info(struct sk_buff *skb, | ||
801 | struct netlink_callback *cb, | ||
802 | struct tcp_metrics_block *tm) | ||
803 | { | ||
804 | void *hdr; | ||
805 | |||
806 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, | ||
807 | &tcp_metrics_nl_family, NLM_F_MULTI, | ||
808 | TCP_METRICS_CMD_GET); | ||
809 | if (!hdr) | ||
810 | return -EMSGSIZE; | ||
811 | |||
812 | if (tcp_metrics_fill_info(skb, tm) < 0) | ||
813 | goto nla_put_failure; | ||
814 | |||
815 | return genlmsg_end(skb, hdr); | ||
816 | |||
817 | nla_put_failure: | ||
818 | genlmsg_cancel(skb, hdr); | ||
819 | return -EMSGSIZE; | ||
820 | } | ||
821 | |||
822 | static int tcp_metrics_nl_dump(struct sk_buff *skb, | ||
823 | struct netlink_callback *cb) | ||
824 | { | ||
825 | struct net *net = sock_net(skb->sk); | ||
826 | unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; | ||
827 | unsigned int row, s_row = cb->args[0]; | ||
828 | int s_col = cb->args[1], col = s_col; | ||
829 | |||
830 | for (row = s_row; row < max_rows; row++, s_col = 0) { | ||
831 | struct tcp_metrics_block *tm; | ||
832 | struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; | ||
833 | |||
834 | rcu_read_lock(); | ||
835 | for (col = 0, tm = rcu_dereference(hb->chain); tm; | ||
836 | tm = rcu_dereference(tm->tcpm_next), col++) { | ||
837 | if (col < s_col) | ||
838 | continue; | ||
839 | if (tcp_metrics_dump_info(skb, cb, tm) < 0) { | ||
840 | rcu_read_unlock(); | ||
841 | goto done; | ||
842 | } | ||
843 | } | ||
844 | rcu_read_unlock(); | ||
845 | } | ||
846 | |||
847 | done: | ||
848 | cb->args[0] = row; | ||
849 | cb->args[1] = col; | ||
850 | return skb->len; | ||
851 | } | ||
852 | |||
853 | static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, | ||
854 | unsigned int *hash, int optional) | ||
855 | { | ||
856 | struct nlattr *a; | ||
857 | |||
858 | a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; | ||
859 | if (a) { | ||
860 | addr->family = AF_INET; | ||
861 | addr->addr.a4 = nla_get_be32(a); | ||
862 | *hash = (__force unsigned int) addr->addr.a4; | ||
863 | return 0; | ||
864 | } | ||
865 | a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; | ||
866 | if (a) { | ||
867 | if (nla_len(a) != sizeof(sizeof(struct in6_addr))) | ||
868 | return -EINVAL; | ||
869 | addr->family = AF_INET6; | ||
870 | memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); | ||
871 | *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); | ||
872 | return 0; | ||
873 | } | ||
874 | return optional ? 1 : -EAFNOSUPPORT; | ||
875 | } | ||
876 | |||
877 | static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) | ||
878 | { | ||
879 | struct tcp_metrics_block *tm; | ||
880 | struct inetpeer_addr addr; | ||
881 | unsigned int hash; | ||
882 | struct sk_buff *msg; | ||
883 | struct net *net = genl_info_net(info); | ||
884 | void *reply; | ||
885 | int ret; | ||
886 | |||
887 | ret = parse_nl_addr(info, &addr, &hash, 0); | ||
888 | if (ret < 0) | ||
889 | return ret; | ||
890 | |||
891 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
892 | if (!msg) | ||
893 | return -ENOMEM; | ||
894 | |||
895 | reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0, | ||
896 | info->genlhdr->cmd); | ||
897 | if (!reply) | ||
898 | goto nla_put_failure; | ||
899 | |||
900 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | ||
901 | ret = -ESRCH; | ||
902 | rcu_read_lock(); | ||
903 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | ||
904 | tm = rcu_dereference(tm->tcpm_next)) { | ||
905 | if (addr_same(&tm->tcpm_addr, &addr)) { | ||
906 | ret = tcp_metrics_fill_info(msg, tm); | ||
907 | break; | ||
908 | } | ||
909 | } | ||
910 | rcu_read_unlock(); | ||
911 | if (ret < 0) | ||
912 | goto out_free; | ||
913 | |||
914 | genlmsg_end(msg, reply); | ||
915 | return genlmsg_reply(msg, info); | ||
916 | |||
917 | nla_put_failure: | ||
918 | ret = -EMSGSIZE; | ||
919 | |||
920 | out_free: | ||
921 | nlmsg_free(msg); | ||
922 | return ret; | ||
923 | } | ||
924 | |||
925 | #define deref_locked_genl(p) \ | ||
926 | rcu_dereference_protected(p, lockdep_genl_is_held() && \ | ||
927 | lockdep_is_held(&tcp_metrics_lock)) | ||
928 | |||
929 | #define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) | ||
930 | |||
931 | static int tcp_metrics_flush_all(struct net *net) | ||
932 | { | ||
933 | unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; | ||
934 | struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; | ||
935 | struct tcp_metrics_block *tm; | ||
936 | unsigned int row; | ||
937 | |||
938 | for (row = 0; row < max_rows; row++, hb++) { | ||
939 | spin_lock_bh(&tcp_metrics_lock); | ||
940 | tm = deref_locked_genl(hb->chain); | ||
941 | if (tm) | ||
942 | hb->chain = NULL; | ||
943 | spin_unlock_bh(&tcp_metrics_lock); | ||
944 | while (tm) { | ||
945 | struct tcp_metrics_block *next; | ||
946 | |||
947 | next = deref_genl(tm->tcpm_next); | ||
948 | kfree_rcu(tm, rcu_head); | ||
949 | tm = next; | ||
950 | } | ||
951 | } | ||
952 | return 0; | ||
953 | } | ||
954 | |||
955 | static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) | ||
956 | { | ||
957 | struct tcpm_hash_bucket *hb; | ||
958 | struct tcp_metrics_block *tm; | ||
959 | struct tcp_metrics_block __rcu **pp; | ||
960 | struct inetpeer_addr addr; | ||
961 | unsigned int hash; | ||
962 | struct net *net = genl_info_net(info); | ||
963 | int ret; | ||
964 | |||
965 | ret = parse_nl_addr(info, &addr, &hash, 1); | ||
966 | if (ret < 0) | ||
967 | return ret; | ||
968 | if (ret > 0) | ||
969 | return tcp_metrics_flush_all(net); | ||
970 | |||
971 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | ||
972 | hb = net->ipv4.tcp_metrics_hash + hash; | ||
973 | pp = &hb->chain; | ||
974 | spin_lock_bh(&tcp_metrics_lock); | ||
975 | for (tm = deref_locked_genl(*pp); tm; | ||
976 | pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { | ||
977 | if (addr_same(&tm->tcpm_addr, &addr)) { | ||
978 | *pp = tm->tcpm_next; | ||
979 | break; | ||
980 | } | ||
981 | } | ||
982 | spin_unlock_bh(&tcp_metrics_lock); | ||
983 | if (!tm) | ||
984 | return -ESRCH; | ||
985 | kfree_rcu(tm, rcu_head); | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | static struct genl_ops tcp_metrics_nl_ops[] = { | ||
990 | { | ||
991 | .cmd = TCP_METRICS_CMD_GET, | ||
992 | .doit = tcp_metrics_nl_cmd_get, | ||
993 | .dumpit = tcp_metrics_nl_dump, | ||
994 | .policy = tcp_metrics_nl_policy, | ||
995 | .flags = GENL_ADMIN_PERM, | ||
996 | }, | ||
997 | { | ||
998 | .cmd = TCP_METRICS_CMD_DEL, | ||
999 | .doit = tcp_metrics_nl_cmd_del, | ||
1000 | .policy = tcp_metrics_nl_policy, | ||
1001 | .flags = GENL_ADMIN_PERM, | ||
1002 | }, | ||
1003 | }; | ||
1004 | |||
693 | static unsigned int tcpmhash_entries; | 1005 | static unsigned int tcpmhash_entries; |
694 | static int __init set_tcpmhash_entries(char *str) | 1006 | static int __init set_tcpmhash_entries(char *str) |
695 | { | 1007 | { |
@@ -753,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = { | |||
753 | 1065 | ||
754 | void __init tcp_metrics_init(void) | 1066 | void __init tcp_metrics_init(void) |
755 | { | 1067 | { |
756 | register_pernet_subsys(&tcp_net_metrics_ops); | 1068 | int ret; |
1069 | |||
1070 | ret = register_pernet_subsys(&tcp_net_metrics_ops); | ||
1071 | if (ret < 0) | ||
1072 | goto cleanup; | ||
1073 | ret = genl_register_family_with_ops(&tcp_metrics_nl_family, | ||
1074 | tcp_metrics_nl_ops, | ||
1075 | ARRAY_SIZE(tcp_metrics_nl_ops)); | ||
1076 | if (ret < 0) | ||
1077 | goto cleanup_subsys; | ||
1078 | return; | ||
1079 | |||
1080 | cleanup_subsys: | ||
1081 | unregister_pernet_subsys(&tcp_net_metrics_ops); | ||
1082 | |||
1083 | cleanup: | ||
1084 | return; | ||
757 | } | 1085 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6ff7f10dce9d..27536ba16c9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | |||
85 | * spinlock it. I do not want! Well, probability of misbehaviour | 85 | * spinlock it. I do not want! Well, probability of misbehaviour |
86 | * is ridiculously low and, seems, we could use some mb() tricks | 86 | * is ridiculously low and, seems, we could use some mb() tricks |
87 | * to avoid misread sequence numbers, states etc. --ANK | 87 | * to avoid misread sequence numbers, states etc. --ANK |
88 | * | ||
89 | * We don't need to initialize tmp_out.sack_ok as we don't use the results | ||
88 | */ | 90 | */ |
89 | enum tcp_tw_status | 91 | enum tcp_tw_status |
90 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | 92 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, |
@@ -507,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
507 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; | 509 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; |
508 | newtp->rx_opt.mss_clamp = req->mss; | 510 | newtp->rx_opt.mss_clamp = req->mss; |
509 | TCP_ECN_openreq_child(newtp, req); | 511 | TCP_ECN_openreq_child(newtp, req); |
512 | newtp->fastopen_rsk = NULL; | ||
510 | 513 | ||
511 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); | 514 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); |
512 | } | 515 | } |
@@ -515,13 +518,20 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
515 | EXPORT_SYMBOL(tcp_create_openreq_child); | 518 | EXPORT_SYMBOL(tcp_create_openreq_child); |
516 | 519 | ||
517 | /* | 520 | /* |
518 | * Process an incoming packet for SYN_RECV sockets represented | 521 | * Process an incoming packet for SYN_RECV sockets represented as a |
519 | * as a request_sock. | 522 | * request_sock. Normally sk is the listener socket but for TFO it |
523 | * points to the child socket. | ||
524 | * | ||
525 | * XXX (TFO) - The current impl contains a special check for ack | ||
526 | * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? | ||
527 | * | ||
528 | * We don't need to initialize tmp_opt.sack_ok as we don't use the results | ||
520 | */ | 529 | */ |
521 | 530 | ||
522 | struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | 531 | struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, |
523 | struct request_sock *req, | 532 | struct request_sock *req, |
524 | struct request_sock **prev) | 533 | struct request_sock **prev, |
534 | bool fastopen) | ||
525 | { | 535 | { |
526 | struct tcp_options_received tmp_opt; | 536 | struct tcp_options_received tmp_opt; |
527 | const u8 *hash_location; | 537 | const u8 *hash_location; |
@@ -530,6 +540,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
530 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 540 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
531 | bool paws_reject = false; | 541 | bool paws_reject = false; |
532 | 542 | ||
543 | BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); | ||
544 | |||
533 | tmp_opt.saw_tstamp = 0; | 545 | tmp_opt.saw_tstamp = 0; |
534 | if (th->doff > (sizeof(struct tcphdr)>>2)) { | 546 | if (th->doff > (sizeof(struct tcphdr)>>2)) { |
535 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); | 547 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); |
@@ -565,6 +577,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
565 | * | 577 | * |
566 | * Enforce "SYN-ACK" according to figure 8, figure 6 | 578 | * Enforce "SYN-ACK" according to figure 8, figure 6 |
567 | * of RFC793, fixed by RFC1122. | 579 | * of RFC793, fixed by RFC1122. |
580 | * | ||
581 | * Note that even if there is new data in the SYN packet | ||
582 | * they will be thrown away too. | ||
568 | */ | 583 | */ |
569 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | 584 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); |
570 | return NULL; | 585 | return NULL; |
@@ -622,9 +637,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
622 | * sent (the segment carries an unacceptable ACK) ... | 637 | * sent (the segment carries an unacceptable ACK) ... |
623 | * a reset is sent." | 638 | * a reset is sent." |
624 | * | 639 | * |
625 | * Invalid ACK: reset will be sent by listening socket | 640 | * Invalid ACK: reset will be sent by listening socket. |
641 | * Note that the ACK validity check for a Fast Open socket is done | ||
642 | * elsewhere and is checked directly against the child socket rather | ||
643 | * than req because user data may have been sent out. | ||
626 | */ | 644 | */ |
627 | if ((flg & TCP_FLAG_ACK) && | 645 | if ((flg & TCP_FLAG_ACK) && !fastopen && |
628 | (TCP_SKB_CB(skb)->ack_seq != | 646 | (TCP_SKB_CB(skb)->ack_seq != |
629 | tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) | 647 | tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) |
630 | return sk; | 648 | return sk; |
@@ -637,7 +655,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
637 | /* RFC793: "first check sequence number". */ | 655 | /* RFC793: "first check sequence number". */ |
638 | 656 | ||
639 | if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, | 657 | if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, |
640 | tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { | 658 | tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { |
641 | /* Out of window: send ACK and drop. */ | 659 | /* Out of window: send ACK and drop. */ |
642 | if (!(flg & TCP_FLAG_RST)) | 660 | if (!(flg & TCP_FLAG_RST)) |
643 | req->rsk_ops->send_ack(sk, skb, req); | 661 | req->rsk_ops->send_ack(sk, skb, req); |
@@ -648,7 +666,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
648 | 666 | ||
649 | /* In sequence, PAWS is OK. */ | 667 | /* In sequence, PAWS is OK. */ |
650 | 668 | ||
651 | if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) | 669 | if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) |
652 | req->ts_recent = tmp_opt.rcv_tsval; | 670 | req->ts_recent = tmp_opt.rcv_tsval; |
653 | 671 | ||
654 | if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { | 672 | if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { |
@@ -667,10 +685,25 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
667 | 685 | ||
668 | /* ACK sequence verified above, just make sure ACK is | 686 | /* ACK sequence verified above, just make sure ACK is |
669 | * set. If ACK not set, just silently drop the packet. | 687 | * set. If ACK not set, just silently drop the packet. |
688 | * | ||
689 | * XXX (TFO) - if we ever allow "data after SYN", the | ||
690 | * following check needs to be removed. | ||
670 | */ | 691 | */ |
671 | if (!(flg & TCP_FLAG_ACK)) | 692 | if (!(flg & TCP_FLAG_ACK)) |
672 | return NULL; | 693 | return NULL; |
673 | 694 | ||
695 | /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ | ||
696 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
697 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
698 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
699 | tcp_rsk(req)->snt_synack = 0; | ||
700 | |||
701 | /* For Fast Open no more processing is needed (sk is the | ||
702 | * child socket). | ||
703 | */ | ||
704 | if (fastopen) | ||
705 | return sk; | ||
706 | |||
674 | /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ | 707 | /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ |
675 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && | 708 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
676 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | 709 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { |
@@ -678,10 +711,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
678 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); | 711 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); |
679 | return NULL; | 712 | return NULL; |
680 | } | 713 | } |
681 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
682 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
683 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
684 | tcp_rsk(req)->snt_synack = 0; | ||
685 | 714 | ||
686 | /* OK, ACK is valid, create big socket and | 715 | /* OK, ACK is valid, create big socket and |
687 | * feed this segment to it. It will repeat all | 716 | * feed this segment to it. It will repeat all |
@@ -706,11 +735,21 @@ listen_overflow: | |||
706 | } | 735 | } |
707 | 736 | ||
708 | embryonic_reset: | 737 | embryonic_reset: |
709 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); | 738 | if (!(flg & TCP_FLAG_RST)) { |
710 | if (!(flg & TCP_FLAG_RST)) | 739 | /* Received a bad SYN pkt - for TFO We try not to reset |
740 | * the local connection unless it's really necessary to | ||
741 | * avoid becoming vulnerable to outside attack aiming at | ||
742 | * resetting legit local connections. | ||
743 | */ | ||
711 | req->rsk_ops->send_reset(sk, skb); | 744 | req->rsk_ops->send_reset(sk, skb); |
712 | 745 | } else if (fastopen) { /* received a valid RST pkt */ | |
713 | inet_csk_reqsk_queue_drop(sk, req, prev); | 746 | reqsk_fastopen_remove(sk, req, true); |
747 | tcp_reset(sk); | ||
748 | } | ||
749 | if (!fastopen) { | ||
750 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
751 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); | ||
752 | } | ||
714 | return NULL; | 753 | return NULL; |
715 | } | 754 | } |
716 | EXPORT_SYMBOL(tcp_check_req); | 755 | EXPORT_SYMBOL(tcp_check_req); |
@@ -719,6 +758,12 @@ EXPORT_SYMBOL(tcp_check_req); | |||
719 | * Queue segment on the new socket if the new socket is active, | 758 | * Queue segment on the new socket if the new socket is active, |
720 | * otherwise we just shortcircuit this and continue with | 759 | * otherwise we just shortcircuit this and continue with |
721 | * the new socket. | 760 | * the new socket. |
761 | * | ||
762 | * For the vast majority of cases child->sk_state will be TCP_SYN_RECV | ||
763 | * when entering. But other states are possible due to a race condition | ||
764 | * where after __inet_lookup_established() fails but before the listener | ||
765 | * locked is obtained, other packets cause the same connection to | ||
766 | * be created. | ||
722 | */ | 767 | */ |
723 | 768 | ||
724 | int tcp_child_process(struct sock *parent, struct sock *child, | 769 | int tcp_child_process(struct sock *parent, struct sock *child, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d04632673a9e..cfe6ffe1c177 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
702 | unsigned int mss, struct sk_buff *skb, | 702 | unsigned int mss, struct sk_buff *skb, |
703 | struct tcp_out_options *opts, | 703 | struct tcp_out_options *opts, |
704 | struct tcp_md5sig_key **md5, | 704 | struct tcp_md5sig_key **md5, |
705 | struct tcp_extend_values *xvp) | 705 | struct tcp_extend_values *xvp, |
706 | struct tcp_fastopen_cookie *foc) | ||
706 | { | 707 | { |
707 | struct inet_request_sock *ireq = inet_rsk(req); | 708 | struct inet_request_sock *ireq = inet_rsk(req); |
708 | unsigned int remaining = MAX_TCP_OPTION_SPACE; | 709 | unsigned int remaining = MAX_TCP_OPTION_SPACE; |
@@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk, | |||
747 | if (unlikely(!ireq->tstamp_ok)) | 748 | if (unlikely(!ireq->tstamp_ok)) |
748 | remaining -= TCPOLEN_SACKPERM_ALIGNED; | 749 | remaining -= TCPOLEN_SACKPERM_ALIGNED; |
749 | } | 750 | } |
750 | 751 | if (foc != NULL) { | |
752 | u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; | ||
753 | need = (need + 3) & ~3U; /* Align to 32 bits */ | ||
754 | if (remaining >= need) { | ||
755 | opts->options |= OPTION_FAST_OPEN_COOKIE; | ||
756 | opts->fastopen_cookie = foc; | ||
757 | remaining -= need; | ||
758 | } | ||
759 | } | ||
751 | /* Similar rationale to tcp_syn_options() applies here, too. | 760 | /* Similar rationale to tcp_syn_options() applies here, too. |
752 | * If the <SYN> options fit, the same options should fit now! | 761 | * If the <SYN> options fit, the same options should fit now! |
753 | */ | 762 | */ |
@@ -2028,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2028 | if (push_one) | 2037 | if (push_one) |
2029 | break; | 2038 | break; |
2030 | } | 2039 | } |
2031 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | ||
2032 | tp->prr_out += sent_pkts; | ||
2033 | 2040 | ||
2034 | if (likely(sent_pkts)) { | 2041 | if (likely(sent_pkts)) { |
2042 | if (tcp_in_cwnd_reduction(sk)) | ||
2043 | tp->prr_out += sent_pkts; | ||
2035 | tcp_cwnd_validate(sk); | 2044 | tcp_cwnd_validate(sk); |
2036 | return false; | 2045 | return false; |
2037 | } | 2046 | } |
@@ -2533,7 +2542,7 @@ begin_fwd: | |||
2533 | } | 2542 | } |
2534 | NET_INC_STATS_BH(sock_net(sk), mib_idx); | 2543 | NET_INC_STATS_BH(sock_net(sk), mib_idx); |
2535 | 2544 | ||
2536 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) | 2545 | if (tcp_in_cwnd_reduction(sk)) |
2537 | tp->prr_out += tcp_skb_pcount(skb); | 2546 | tp->prr_out += tcp_skb_pcount(skb); |
2538 | 2547 | ||
2539 | if (skb == tcp_write_queue_head(sk)) | 2548 | if (skb == tcp_write_queue_head(sk)) |
@@ -2658,7 +2667,8 @@ int tcp_send_synack(struct sock *sk) | |||
2658 | */ | 2667 | */ |
2659 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2668 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2660 | struct request_sock *req, | 2669 | struct request_sock *req, |
2661 | struct request_values *rvp) | 2670 | struct request_values *rvp, |
2671 | struct tcp_fastopen_cookie *foc) | ||
2662 | { | 2672 | { |
2663 | struct tcp_out_options opts; | 2673 | struct tcp_out_options opts; |
2664 | struct tcp_extend_values *xvp = tcp_xv(rvp); | 2674 | struct tcp_extend_values *xvp = tcp_xv(rvp); |
@@ -2718,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2718 | #endif | 2728 | #endif |
2719 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2729 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2720 | tcp_header_size = tcp_synack_options(sk, req, mss, | 2730 | tcp_header_size = tcp_synack_options(sk, req, mss, |
2721 | skb, &opts, &md5, xvp) | 2731 | skb, &opts, &md5, xvp, foc) |
2722 | + sizeof(*th); | 2732 | + sizeof(*th); |
2723 | 2733 | ||
2724 | skb_push(skb, tcp_header_size); | 2734 | skb_push(skb, tcp_header_size); |
@@ -2772,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2772 | } | 2782 | } |
2773 | 2783 | ||
2774 | th->seq = htonl(TCP_SKB_CB(skb)->seq); | 2784 | th->seq = htonl(TCP_SKB_CB(skb)->seq); |
2775 | th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); | 2785 | /* XXX data is queued and acked as is. No buffer/window check */ |
2786 | th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); | ||
2776 | 2787 | ||
2777 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ | 2788 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
2778 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2789 | th->window = htons(min(req->rcv_wnd, 65535U)); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b774a03bd1dc..fc04711e80c8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -305,6 +305,35 @@ static void tcp_probe_timer(struct sock *sk) | |||
305 | } | 305 | } |
306 | 306 | ||
307 | /* | 307 | /* |
308 | * Timer for Fast Open socket to retransmit SYNACK. Note that the | ||
309 | * sk here is the child socket, not the parent (listener) socket. | ||
310 | */ | ||
311 | static void tcp_fastopen_synack_timer(struct sock *sk) | ||
312 | { | ||
313 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
314 | int max_retries = icsk->icsk_syn_retries ? : | ||
315 | sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ | ||
316 | struct request_sock *req; | ||
317 | |||
318 | req = tcp_sk(sk)->fastopen_rsk; | ||
319 | req->rsk_ops->syn_ack_timeout(sk, req); | ||
320 | |||
321 | if (req->retrans >= max_retries) { | ||
322 | tcp_write_err(sk); | ||
323 | return; | ||
324 | } | ||
325 | /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error | ||
326 | * returned from rtx_syn_ack() to make it more persistent like | ||
327 | * regular retransmit because if the child socket has been accepted | ||
328 | * it's not good to give up too easily. | ||
329 | */ | ||
330 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | ||
331 | req->retrans++; | ||
332 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
333 | TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); | ||
334 | } | ||
335 | |||
336 | /* | ||
308 | * The TCP retransmit timer. | 337 | * The TCP retransmit timer. |
309 | */ | 338 | */ |
310 | 339 | ||
@@ -317,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk) | |||
317 | tcp_resume_early_retransmit(sk); | 346 | tcp_resume_early_retransmit(sk); |
318 | return; | 347 | return; |
319 | } | 348 | } |
320 | 349 | if (tp->fastopen_rsk) { | |
350 | BUG_ON(sk->sk_state != TCP_SYN_RECV && | ||
351 | sk->sk_state != TCP_FIN_WAIT1); | ||
352 | tcp_fastopen_synack_timer(sk); | ||
353 | /* Before we receive ACK to our SYN-ACK don't retransmit | ||
354 | * anything else (e.g., data or FIN segments). | ||
355 | */ | ||
356 | return; | ||
357 | } | ||
321 | if (!tp->packets_out) | 358 | if (!tp->packets_out) |
322 | goto out; | 359 | goto out; |
323 | 360 | ||
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d2f336ea82ca..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c | |||
@@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, | |||
26 | 26 | ||
27 | return inet_sk_diag_fill(sk, NULL, skb, req, | 27 | return inet_sk_diag_fill(sk, NULL, skb, req, |
28 | sk_user_ns(NETLINK_CB(cb->skb).ssk), | 28 | sk_user_ns(NETLINK_CB(cb->skb).ssk), |
29 | NETLINK_CB(cb->skb).pid, | 29 | NETLINK_CB(cb->skb).portid, |
30 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 30 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
31 | } | 31 | } |
32 | 32 | ||
@@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, | |||
72 | 72 | ||
73 | err = inet_sk_diag_fill(sk, NULL, rep, req, | 73 | err = inet_sk_diag_fill(sk, NULL, rep, req, |
74 | sk_user_ns(NETLINK_CB(in_skb).ssk), | 74 | sk_user_ns(NETLINK_CB(in_skb).ssk), |
75 | NETLINK_CB(in_skb).pid, | 75 | NETLINK_CB(in_skb).portid, |
76 | nlh->nlmsg_seq, 0, nlh); | 76 | nlh->nlmsg_seq, 0, nlh); |
77 | if (err < 0) { | 77 | if (err < 0) { |
78 | WARN_ON(err == -EMSGSIZE); | 78 | WARN_ON(err == -EMSGSIZE); |
79 | kfree_skb(rep); | 79 | kfree_skb(rep); |
80 | goto out; | 80 | goto out; |
81 | } | 81 | } |
82 | err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, | 82 | err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, |
83 | MSG_DONTWAIT); | 83 | MSG_DONTWAIT); |
84 | if (err > 0) | 84 | if (err > 0) |
85 | err = 0; | 85 | err = 0; |