diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-04-27 12:26:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-04-27 12:26:46 -0400 |
commit | 15c54033964a943de7b0763efd3bd0ede7326395 (patch) | |
tree | 840b292612d1b5396d5bab5bde537a9013db3ceb /net/ipv4 | |
parent | ad5da3cf39a5b11a198929be1f2644e17ecd767e (diff) | |
parent | 912a41a4ab935ce8c4308428ec13fc7f8b1f18f4 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (448 commits)
[IPV4] nl_fib_lookup: Initialise res.r before fib_res_put(&res)
[IPV6]: Fix thinko in ipv6_rthdr_rcv() changes.
[IPV4]: Add multipath cached to feature-removal-schedule.txt
[WIRELESS] cfg80211: Clarify locking comment.
[WIRELESS] cfg80211: Fix locking in wiphy_new.
[WEXT] net_device: Don't include wext bits if not required.
[WEXT]: Misc code cleanups.
[WEXT]: Reduce inline abuse.
[WEXT]: Move EXPORT_SYMBOL statements where they belong.
[WEXT]: Cleanup early ioctl call path.
[WEXT]: Remove options.
[WEXT]: Remove dead debug code.
[WEXT]: Clean up how wext is called.
[WEXT]: Move to net/wireless
[AFS]: Eliminate cmpxchg() usage in vlocation code.
[RXRPC]: Fix pointers passed to bitops.
[RXRPC]: Remove bogus atomic_* overrides.
[AFS]: Fix u64 printing in debug logging.
[AFS]: Add "directory write" support.
[AFS]: Implement the CB.InitCallBackState3 operation.
...
Diffstat (limited to 'net/ipv4')
145 files changed, 3565 insertions, 19436 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9e8ef509c51d..e62aee0ec4c5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -574,6 +574,33 @@ config TCP_CONG_VENO | |||
574 | loss packets. | 574 | loss packets. |
575 | See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf | 575 | See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf |
576 | 576 | ||
577 | config TCP_CONG_YEAH | ||
578 | tristate "YeAH TCP" | ||
579 | depends on EXPERIMENTAL | ||
580 | default n | ||
581 | ---help--- | ||
582 | YeAH-TCP is a sender-side high-speed enabled TCP congestion control | ||
583 | algorithm, which uses a mixed loss/delay approach to compute the | ||
584 | congestion window. It's design goals target high efficiency, | ||
585 | internal, RTT and Reno fairness, resilience to link loss while | ||
586 | keeping network elements load as low as possible. | ||
587 | |||
588 | For further details look here: | ||
589 | http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf | ||
590 | |||
591 | config TCP_CONG_ILLINOIS | ||
592 | tristate "TCP Illinois" | ||
593 | depends on EXPERIMENTAL | ||
594 | default n | ||
595 | ---help--- | ||
596 | TCP-Illinois is a sender-side modificatio of TCP Reno for | ||
597 | high speed long delay links. It uses round-trip-time to | ||
598 | adjust the alpha and beta parameters to achieve a higher average | ||
599 | throughput and maintain fairness. | ||
600 | |||
601 | For further details see: | ||
602 | http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html | ||
603 | |||
577 | choice | 604 | choice |
578 | prompt "Default TCP congestion control" | 605 | prompt "Default TCP congestion control" |
579 | default DEFAULT_CUBIC | 606 | default DEFAULT_CUBIC |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7a068626feea..4ff6c151d7f3 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -49,6 +49,8 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o | |||
49 | obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o | 49 | obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o |
50 | obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | 50 | obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o |
51 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 51 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
52 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | ||
53 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | ||
52 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 54 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
53 | 55 | ||
54 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 56 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cf358c84c440..16aae8ef5555 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -87,6 +87,7 @@ | |||
87 | #include <linux/init.h> | 87 | #include <linux/init.h> |
88 | #include <linux/poll.h> | 88 | #include <linux/poll.h> |
89 | #include <linux/netfilter_ipv4.h> | 89 | #include <linux/netfilter_ipv4.h> |
90 | #include <linux/random.h> | ||
90 | 91 | ||
91 | #include <asm/uaccess.h> | 92 | #include <asm/uaccess.h> |
92 | #include <asm/system.h> | 93 | #include <asm/system.h> |
@@ -217,6 +218,26 @@ out: | |||
217 | return err; | 218 | return err; |
218 | } | 219 | } |
219 | 220 | ||
221 | u32 inet_ehash_secret __read_mostly; | ||
222 | EXPORT_SYMBOL(inet_ehash_secret); | ||
223 | |||
224 | /* | ||
225 | * inet_ehash_secret must be set exactly once | ||
226 | * Instead of using a dedicated spinlock, we (ab)use inetsw_lock | ||
227 | */ | ||
228 | void build_ehash_secret(void) | ||
229 | { | ||
230 | u32 rnd; | ||
231 | do { | ||
232 | get_random_bytes(&rnd, sizeof(rnd)); | ||
233 | } while (rnd == 0); | ||
234 | spin_lock_bh(&inetsw_lock); | ||
235 | if (!inet_ehash_secret) | ||
236 | inet_ehash_secret = rnd; | ||
237 | spin_unlock_bh(&inetsw_lock); | ||
238 | } | ||
239 | EXPORT_SYMBOL(build_ehash_secret); | ||
240 | |||
220 | /* | 241 | /* |
221 | * Create an inet socket. | 242 | * Create an inet socket. |
222 | */ | 243 | */ |
@@ -233,6 +254,11 @@ static int inet_create(struct socket *sock, int protocol) | |||
233 | int try_loading_module = 0; | 254 | int try_loading_module = 0; |
234 | int err; | 255 | int err; |
235 | 256 | ||
257 | if (sock->type != SOCK_RAW && | ||
258 | sock->type != SOCK_DGRAM && | ||
259 | !inet_ehash_secret) | ||
260 | build_ehash_secret(); | ||
261 | |||
236 | sock->state = SS_UNCONNECTED; | 262 | sock->state = SS_UNCONNECTED; |
237 | 263 | ||
238 | /* Look for the requested type/protocol pair. */ | 264 | /* Look for the requested type/protocol pair. */ |
@@ -755,6 +781,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
755 | case SIOCGSTAMP: | 781 | case SIOCGSTAMP: |
756 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); | 782 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); |
757 | break; | 783 | break; |
784 | case SIOCGSTAMPNS: | ||
785 | err = sock_get_timestampns(sk, (struct timespec __user *)arg); | ||
786 | break; | ||
758 | case SIOCADDRT: | 787 | case SIOCADDRT: |
759 | case SIOCDELRT: | 788 | case SIOCDELRT: |
760 | case SIOCRTMSG: | 789 | case SIOCRTMSG: |
@@ -1109,7 +1138,7 @@ static int inet_gso_send_check(struct sk_buff *skb) | |||
1109 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) | 1138 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) |
1110 | goto out; | 1139 | goto out; |
1111 | 1140 | ||
1112 | iph = skb->nh.iph; | 1141 | iph = ip_hdr(skb); |
1113 | ihl = iph->ihl * 4; | 1142 | ihl = iph->ihl * 4; |
1114 | if (ihl < sizeof(*iph)) | 1143 | if (ihl < sizeof(*iph)) |
1115 | goto out; | 1144 | goto out; |
@@ -1117,8 +1146,9 @@ static int inet_gso_send_check(struct sk_buff *skb) | |||
1117 | if (unlikely(!pskb_may_pull(skb, ihl))) | 1146 | if (unlikely(!pskb_may_pull(skb, ihl))) |
1118 | goto out; | 1147 | goto out; |
1119 | 1148 | ||
1120 | skb->h.raw = __skb_pull(skb, ihl); | 1149 | __skb_pull(skb, ihl); |
1121 | iph = skb->nh.iph; | 1150 | skb_reset_transport_header(skb); |
1151 | iph = ip_hdr(skb); | ||
1122 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1152 | proto = iph->protocol & (MAX_INET_PROTOS - 1); |
1123 | err = -EPROTONOSUPPORT; | 1153 | err = -EPROTONOSUPPORT; |
1124 | 1154 | ||
@@ -1152,7 +1182,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1152 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) | 1182 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) |
1153 | goto out; | 1183 | goto out; |
1154 | 1184 | ||
1155 | iph = skb->nh.iph; | 1185 | iph = ip_hdr(skb); |
1156 | ihl = iph->ihl * 4; | 1186 | ihl = iph->ihl * 4; |
1157 | if (ihl < sizeof(*iph)) | 1187 | if (ihl < sizeof(*iph)) |
1158 | goto out; | 1188 | goto out; |
@@ -1160,8 +1190,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1160 | if (unlikely(!pskb_may_pull(skb, ihl))) | 1190 | if (unlikely(!pskb_may_pull(skb, ihl))) |
1161 | goto out; | 1191 | goto out; |
1162 | 1192 | ||
1163 | skb->h.raw = __skb_pull(skb, ihl); | 1193 | __skb_pull(skb, ihl); |
1164 | iph = skb->nh.iph; | 1194 | skb_reset_transport_header(skb); |
1195 | iph = ip_hdr(skb); | ||
1165 | id = ntohs(iph->id); | 1196 | id = ntohs(iph->id); |
1166 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1197 | proto = iph->protocol & (MAX_INET_PROTOS - 1); |
1167 | segs = ERR_PTR(-EPROTONOSUPPORT); | 1198 | segs = ERR_PTR(-EPROTONOSUPPORT); |
@@ -1177,17 +1208,57 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1177 | 1208 | ||
1178 | skb = segs; | 1209 | skb = segs; |
1179 | do { | 1210 | do { |
1180 | iph = skb->nh.iph; | 1211 | iph = ip_hdr(skb); |
1181 | iph->id = htons(id++); | 1212 | iph->id = htons(id++); |
1182 | iph->tot_len = htons(skb->len - skb->mac_len); | 1213 | iph->tot_len = htons(skb->len - skb->mac_len); |
1183 | iph->check = 0; | 1214 | iph->check = 0; |
1184 | iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); | 1215 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); |
1185 | } while ((skb = skb->next)); | 1216 | } while ((skb = skb->next)); |
1186 | 1217 | ||
1187 | out: | 1218 | out: |
1188 | return segs; | 1219 | return segs; |
1189 | } | 1220 | } |
1190 | 1221 | ||
1222 | unsigned long snmp_fold_field(void *mib[], int offt) | ||
1223 | { | ||
1224 | unsigned long res = 0; | ||
1225 | int i; | ||
1226 | |||
1227 | for_each_possible_cpu(i) { | ||
1228 | res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); | ||
1229 | res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); | ||
1230 | } | ||
1231 | return res; | ||
1232 | } | ||
1233 | EXPORT_SYMBOL_GPL(snmp_fold_field); | ||
1234 | |||
1235 | int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) | ||
1236 | { | ||
1237 | BUG_ON(ptr == NULL); | ||
1238 | ptr[0] = __alloc_percpu(mibsize); | ||
1239 | if (!ptr[0]) | ||
1240 | goto err0; | ||
1241 | ptr[1] = __alloc_percpu(mibsize); | ||
1242 | if (!ptr[1]) | ||
1243 | goto err1; | ||
1244 | return 0; | ||
1245 | err1: | ||
1246 | free_percpu(ptr[0]); | ||
1247 | ptr[0] = NULL; | ||
1248 | err0: | ||
1249 | return -ENOMEM; | ||
1250 | } | ||
1251 | EXPORT_SYMBOL_GPL(snmp_mib_init); | ||
1252 | |||
1253 | void snmp_mib_free(void *ptr[2]) | ||
1254 | { | ||
1255 | BUG_ON(ptr == NULL); | ||
1256 | free_percpu(ptr[0]); | ||
1257 | free_percpu(ptr[1]); | ||
1258 | ptr[0] = ptr[1] = NULL; | ||
1259 | } | ||
1260 | EXPORT_SYMBOL_GPL(snmp_mib_free); | ||
1261 | |||
1191 | #ifdef CONFIG_IP_MULTICAST | 1262 | #ifdef CONFIG_IP_MULTICAST |
1192 | static struct net_protocol igmp_protocol = { | 1263 | static struct net_protocol igmp_protocol = { |
1193 | .handler = igmp_rcv, | 1264 | .handler = igmp_rcv, |
@@ -1214,28 +1285,47 @@ static struct net_protocol icmp_protocol = { | |||
1214 | 1285 | ||
1215 | static int __init init_ipv4_mibs(void) | 1286 | static int __init init_ipv4_mibs(void) |
1216 | { | 1287 | { |
1217 | net_statistics[0] = alloc_percpu(struct linux_mib); | 1288 | if (snmp_mib_init((void **)net_statistics, |
1218 | net_statistics[1] = alloc_percpu(struct linux_mib); | 1289 | sizeof(struct linux_mib), |
1219 | ip_statistics[0] = alloc_percpu(struct ipstats_mib); | 1290 | __alignof__(struct linux_mib)) < 0) |
1220 | ip_statistics[1] = alloc_percpu(struct ipstats_mib); | 1291 | goto err_net_mib; |
1221 | icmp_statistics[0] = alloc_percpu(struct icmp_mib); | 1292 | if (snmp_mib_init((void **)ip_statistics, |
1222 | icmp_statistics[1] = alloc_percpu(struct icmp_mib); | 1293 | sizeof(struct ipstats_mib), |
1223 | tcp_statistics[0] = alloc_percpu(struct tcp_mib); | 1294 | __alignof__(struct ipstats_mib)) < 0) |
1224 | tcp_statistics[1] = alloc_percpu(struct tcp_mib); | 1295 | goto err_ip_mib; |
1225 | udp_statistics[0] = alloc_percpu(struct udp_mib); | 1296 | if (snmp_mib_init((void **)icmp_statistics, |
1226 | udp_statistics[1] = alloc_percpu(struct udp_mib); | 1297 | sizeof(struct icmp_mib), |
1227 | udplite_statistics[0] = alloc_percpu(struct udp_mib); | 1298 | __alignof__(struct icmp_mib)) < 0) |
1228 | udplite_statistics[1] = alloc_percpu(struct udp_mib); | 1299 | goto err_icmp_mib; |
1229 | if (! | 1300 | if (snmp_mib_init((void **)tcp_statistics, |
1230 | (net_statistics[0] && net_statistics[1] && ip_statistics[0] | 1301 | sizeof(struct tcp_mib), |
1231 | && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] | 1302 | __alignof__(struct tcp_mib)) < 0) |
1232 | && udp_statistics[0] && udp_statistics[1] | 1303 | goto err_tcp_mib; |
1233 | && udplite_statistics[0] && udplite_statistics[1] ) ) | 1304 | if (snmp_mib_init((void **)udp_statistics, |
1234 | return -ENOMEM; | 1305 | sizeof(struct udp_mib), |
1235 | 1306 | __alignof__(struct udp_mib)) < 0) | |
1236 | (void) tcp_mib_init(); | 1307 | goto err_udp_mib; |
1308 | if (snmp_mib_init((void **)udplite_statistics, | ||
1309 | sizeof(struct udp_mib), | ||
1310 | __alignof__(struct udp_mib)) < 0) | ||
1311 | goto err_udplite_mib; | ||
1312 | |||
1313 | tcp_mib_init(); | ||
1237 | 1314 | ||
1238 | return 0; | 1315 | return 0; |
1316 | |||
1317 | err_udplite_mib: | ||
1318 | snmp_mib_free((void **)udp_statistics); | ||
1319 | err_udp_mib: | ||
1320 | snmp_mib_free((void **)tcp_statistics); | ||
1321 | err_tcp_mib: | ||
1322 | snmp_mib_free((void **)icmp_statistics); | ||
1323 | err_icmp_mib: | ||
1324 | snmp_mib_free((void **)ip_statistics); | ||
1325 | err_ip_mib: | ||
1326 | snmp_mib_free((void **)net_statistics); | ||
1327 | err_net_mib: | ||
1328 | return -ENOMEM; | ||
1239 | } | 1329 | } |
1240 | 1330 | ||
1241 | static int ipv4_proc_init(void); | 1331 | static int ipv4_proc_init(void); |
@@ -1336,7 +1426,7 @@ static int __init inet_init(void) | |||
1336 | * Initialise per-cpu ipv4 mibs | 1426 | * Initialise per-cpu ipv4 mibs |
1337 | */ | 1427 | */ |
1338 | 1428 | ||
1339 | if(init_ipv4_mibs()) | 1429 | if (init_ipv4_mibs()) |
1340 | printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; | 1430 | printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; |
1341 | 1431 | ||
1342 | ipv4_proc_init(); | 1432 | ipv4_proc_init(); |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7194eb40b6d0..6da8ff597ad3 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
65 | char buf[60]; | 65 | char buf[60]; |
66 | } tmp_iph; | 66 | } tmp_iph; |
67 | 67 | ||
68 | top_iph = skb->nh.iph; | 68 | top_iph = ip_hdr(skb); |
69 | iph = &tmp_iph.iph; | 69 | iph = &tmp_iph.iph; |
70 | 70 | ||
71 | iph->tos = top_iph->tos; | 71 | iph->tos = top_iph->tos; |
@@ -152,9 +152,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
152 | skb->ip_summed = CHECKSUM_NONE; | 152 | skb->ip_summed = CHECKSUM_NONE; |
153 | 153 | ||
154 | ah = (struct ip_auth_hdr*)skb->data; | 154 | ah = (struct ip_auth_hdr*)skb->data; |
155 | iph = skb->nh.iph; | 155 | iph = ip_hdr(skb); |
156 | 156 | ||
157 | ihl = skb->data - skb->nh.raw; | 157 | ihl = skb->data - skb_network_header(skb); |
158 | memcpy(work_buf, iph, ihl); | 158 | memcpy(work_buf, iph, ihl); |
159 | 159 | ||
160 | iph->ttl = 0; | 160 | iph->ttl = 0; |
@@ -181,7 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
181 | } | 181 | } |
182 | } | 182 | } |
183 | ((struct iphdr*)work_buf)->protocol = ah->nexthdr; | 183 | ((struct iphdr*)work_buf)->protocol = ah->nexthdr; |
184 | skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl); | 184 | skb->network_header += ah_hlen; |
185 | memcpy(skb_network_header(skb), work_buf, ihl); | ||
186 | skb->transport_header = skb->network_header; | ||
185 | __skb_pull(skb, ah_hlen + ihl); | 187 | __skb_pull(skb, ah_hlen + ihl); |
186 | 188 | ||
187 | return 0; | 189 | return 0; |
@@ -196,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info) | |||
196 | struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); | 198 | struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); |
197 | struct xfrm_state *x; | 199 | struct xfrm_state *x; |
198 | 200 | ||
199 | if (skb->h.icmph->type != ICMP_DEST_UNREACH || | 201 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || |
200 | skb->h.icmph->code != ICMP_FRAG_NEEDED) | 202 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
201 | return; | 203 | return; |
202 | 204 | ||
203 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); | 205 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1a3488a83f49..7110779a0244 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
342 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { | 342 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { |
343 | default: | 343 | default: |
344 | case 0: /* By default announce any local IP */ | 344 | case 0: /* By default announce any local IP */ |
345 | if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL) | 345 | if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) |
346 | saddr = skb->nh.iph->saddr; | 346 | saddr = ip_hdr(skb)->saddr; |
347 | break; | 347 | break; |
348 | case 1: /* Restrict announcements of saddr in same subnet */ | 348 | case 1: /* Restrict announcements of saddr in same subnet */ |
349 | if (!skb) | 349 | if (!skb) |
350 | break; | 350 | break; |
351 | saddr = skb->nh.iph->saddr; | 351 | saddr = ip_hdr(skb)->saddr; |
352 | if (inet_addr_type(saddr) == RTN_LOCAL) { | 352 | if (inet_addr_type(saddr) == RTN_LOCAL) { |
353 | /* saddr should be known to target */ | 353 | /* saddr should be known to target */ |
354 | if (inet_addr_onlink(in_dev, target, saddr)) | 354 | if (inet_addr_onlink(in_dev, target, saddr)) |
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
578 | return NULL; | 578 | return NULL; |
579 | 579 | ||
580 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 580 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
581 | skb->nh.raw = skb->data; | 581 | skb_reset_network_header(skb); |
582 | arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); | 582 | arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); |
583 | skb->dev = dev; | 583 | skb->dev = dev; |
584 | skb->protocol = htons(ETH_P_ARP); | 584 | skb->protocol = htons(ETH_P_ARP); |
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb) | |||
721 | if (in_dev == NULL) | 721 | if (in_dev == NULL) |
722 | goto out; | 722 | goto out; |
723 | 723 | ||
724 | arp = skb->nh.arph; | 724 | arp = arp_hdr(skb); |
725 | 725 | ||
726 | switch (dev_type) { | 726 | switch (dev_type) { |
727 | default: | 727 | default: |
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, | |||
937 | (2 * sizeof(u32))))) | 937 | (2 * sizeof(u32))))) |
938 | goto freeskb; | 938 | goto freeskb; |
939 | 939 | ||
940 | arp = skb->nh.arph; | 940 | arp = arp_hdr(skb); |
941 | if (arp->ar_hln != dev->addr_len || | 941 | if (arp->ar_hln != dev->addr_len || |
942 | dev->flags & IFF_NOARP || | 942 | dev->flags & IFF_NOARP || |
943 | skb->pkt_type == PACKET_OTHERHOST || | 943 | skb->pkt_type == PACKET_OTHERHOST || |
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg) | |||
1178 | goto out; | 1178 | goto out; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | switch(cmd) { | 1181 | switch (cmd) { |
1182 | case SIOCDARP: | 1182 | case SIOCDARP: |
1183 | err = arp_req_delete(&r, dev); | 1183 | err = arp_req_delete(&r, dev); |
1184 | break; | 1184 | break; |
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos) | |||
1360 | 1360 | ||
1361 | /* ------------------------------------------------------------------------ */ | 1361 | /* ------------------------------------------------------------------------ */ |
1362 | 1362 | ||
1363 | static struct seq_operations arp_seq_ops = { | 1363 | static const struct seq_operations arp_seq_ops = { |
1364 | .start = arp_seq_start, | 1364 | .start = arp_seq_start, |
1365 | .next = neigh_seq_next, | 1365 | .next = neigh_seq_next, |
1366 | .stop = neigh_seq_stop, | 1366 | .stop = neigh_seq_stop, |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2ce5b693a8bd..11a3404d65af 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -1174,7 +1174,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, | |||
1174 | u16 cat_low; | 1174 | u16 cat_low; |
1175 | u16 cat_high; | 1175 | u16 cat_high; |
1176 | 1176 | ||
1177 | for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) { | 1177 | for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) { |
1178 | cat_high = ntohs(*((__be16 *)&net_cat[net_iter])); | 1178 | cat_high = ntohs(*((__be16 *)&net_cat[net_iter])); |
1179 | if ((net_iter + 4) <= net_cat_len) | 1179 | if ((net_iter + 4) <= net_cat_len) |
1180 | cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2])); | 1180 | cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2])); |
@@ -1676,7 +1676,7 @@ validate_return: | |||
1676 | */ | 1676 | */ |
1677 | void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) | 1677 | void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) |
1678 | { | 1678 | { |
1679 | if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) | 1679 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) |
1680 | return; | 1680 | return; |
1681 | 1681 | ||
1682 | if (gateway) | 1682 | if (gateway) |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 98a00d0edc76..088888db8b3d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #include <linux/netdevice.h> | 48 | #include <linux/netdevice.h> |
49 | #include <linux/etherdevice.h> | 49 | #include <linux/etherdevice.h> |
50 | #include <linux/skbuff.h> | 50 | #include <linux/skbuff.h> |
51 | #include <linux/rtnetlink.h> | ||
52 | #include <linux/init.h> | 51 | #include <linux/init.h> |
53 | #include <linux/notifier.h> | 52 | #include <linux/notifier.h> |
54 | #include <linux/inetdevice.h> | 53 | #include <linux/inetdevice.h> |
@@ -62,7 +61,7 @@ | |||
62 | #include <net/ip.h> | 61 | #include <net/ip.h> |
63 | #include <net/route.h> | 62 | #include <net/route.h> |
64 | #include <net/ip_fib.h> | 63 | #include <net/ip_fib.h> |
65 | #include <net/netlink.h> | 64 | #include <net/rtnetlink.h> |
66 | 65 | ||
67 | struct ipv4_devconf ipv4_devconf = { | 66 | struct ipv4_devconf ipv4_devconf = { |
68 | .accept_redirects = 1, | 67 | .accept_redirects = 1, |
@@ -633,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
633 | dev_load(ifr.ifr_name); | 632 | dev_load(ifr.ifr_name); |
634 | #endif | 633 | #endif |
635 | 634 | ||
636 | switch(cmd) { | 635 | switch (cmd) { |
637 | case SIOCGIFADDR: /* Get interface address */ | 636 | case SIOCGIFADDR: /* Get interface address */ |
638 | case SIOCGIFBRDADDR: /* Get the broadcast address */ | 637 | case SIOCGIFBRDADDR: /* Get the broadcast address */ |
639 | case SIOCGIFDSTADDR: /* Get the destination address */ | 638 | case SIOCGIFDSTADDR: /* Get the destination address */ |
@@ -708,7 +707,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
708 | if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) | 707 | if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) |
709 | goto done; | 708 | goto done; |
710 | 709 | ||
711 | switch(cmd) { | 710 | switch (cmd) { |
712 | case SIOCGIFADDR: /* Get interface address */ | 711 | case SIOCGIFADDR: /* Get interface address */ |
713 | sin->sin_addr.s_addr = ifa->ifa_local; | 712 | sin->sin_addr.s_addr = ifa->ifa_local; |
714 | goto rarok; | 713 | goto rarok; |
@@ -1183,17 +1182,13 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
1183 | int s_ip_idx, s_idx = cb->args[0]; | 1182 | int s_ip_idx, s_idx = cb->args[0]; |
1184 | 1183 | ||
1185 | s_ip_idx = ip_idx = cb->args[1]; | 1184 | s_ip_idx = ip_idx = cb->args[1]; |
1186 | read_lock(&dev_base_lock); | ||
1187 | for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { | 1185 | for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { |
1188 | if (idx < s_idx) | 1186 | if (idx < s_idx) |
1189 | continue; | 1187 | continue; |
1190 | if (idx > s_idx) | 1188 | if (idx > s_idx) |
1191 | s_ip_idx = 0; | 1189 | s_ip_idx = 0; |
1192 | rcu_read_lock(); | 1190 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) |
1193 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { | ||
1194 | rcu_read_unlock(); | ||
1195 | continue; | 1191 | continue; |
1196 | } | ||
1197 | 1192 | ||
1198 | for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; | 1193 | for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; |
1199 | ifa = ifa->ifa_next, ip_idx++) { | 1194 | ifa = ifa->ifa_next, ip_idx++) { |
@@ -1201,16 +1196,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
1201 | continue; | 1196 | continue; |
1202 | if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, | 1197 | if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, |
1203 | cb->nlh->nlmsg_seq, | 1198 | cb->nlh->nlmsg_seq, |
1204 | RTM_NEWADDR, NLM_F_MULTI) <= 0) { | 1199 | RTM_NEWADDR, NLM_F_MULTI) <= 0) |
1205 | rcu_read_unlock(); | ||
1206 | goto done; | 1200 | goto done; |
1207 | } | ||
1208 | } | 1201 | } |
1209 | rcu_read_unlock(); | ||
1210 | } | 1202 | } |
1211 | 1203 | ||
1212 | done: | 1204 | done: |
1213 | read_unlock(&dev_base_lock); | ||
1214 | cb->args[0] = idx; | 1205 | cb->args[0] = idx; |
1215 | cb->args[1] = ip_idx; | 1206 | cb->args[1] = ip_idx; |
1216 | 1207 | ||
@@ -1241,19 +1232,6 @@ errout: | |||
1241 | rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); | 1232 | rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); |
1242 | } | 1233 | } |
1243 | 1234 | ||
1244 | static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { | ||
1245 | [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, }, | ||
1246 | [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, }, | ||
1247 | [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, }, | ||
1248 | [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, }, | ||
1249 | [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, }, | ||
1250 | [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, | ||
1251 | .dumpit = inet_dump_fib, }, | ||
1252 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
1253 | [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, | ||
1254 | #endif | ||
1255 | }; | ||
1256 | |||
1257 | #ifdef CONFIG_SYSCTL | 1235 | #ifdef CONFIG_SYSCTL |
1258 | 1236 | ||
1259 | void inet_forward_change(void) | 1237 | void inet_forward_change(void) |
@@ -1636,7 +1614,10 @@ void __init devinet_init(void) | |||
1636 | { | 1614 | { |
1637 | register_gifconf(PF_INET, inet_gifconf); | 1615 | register_gifconf(PF_INET, inet_gifconf); |
1638 | register_netdevice_notifier(&ip_netdev_notifier); | 1616 | register_netdevice_notifier(&ip_netdev_notifier); |
1639 | rtnetlink_links[PF_INET] = inet_rtnetlink_table; | 1617 | |
1618 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); | ||
1619 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); | ||
1620 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); | ||
1640 | #ifdef CONFIG_SYSCTL | 1621 | #ifdef CONFIG_SYSCTL |
1641 | devinet_sysctl.sysctl_header = | 1622 | devinet_sysctl.sysctl_header = |
1642 | register_sysctl_table(devinet_sysctl.devinet_root_dir); | 1623 | register_sysctl_table(devinet_sysctl.devinet_root_dir); |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 31041127eeb8..47c95e8ef045 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -21,13 +21,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
21 | struct blkcipher_desc desc; | 21 | struct blkcipher_desc desc; |
22 | struct esp_data *esp; | 22 | struct esp_data *esp; |
23 | struct sk_buff *trailer; | 23 | struct sk_buff *trailer; |
24 | u8 *tail; | ||
24 | int blksize; | 25 | int blksize; |
25 | int clen; | 26 | int clen; |
26 | int alen; | 27 | int alen; |
27 | int nfrags; | 28 | int nfrags; |
28 | 29 | ||
29 | /* Strip IP+ESP header. */ | 30 | /* Strip IP+ESP header. */ |
30 | __skb_pull(skb, skb->h.raw - skb->data); | 31 | __skb_pull(skb, skb_transport_offset(skb)); |
31 | /* Now skb is pure payload to encrypt */ | 32 | /* Now skb is pure payload to encrypt */ |
32 | 33 | ||
33 | err = -ENOMEM; | 34 | err = -ENOMEM; |
@@ -49,19 +50,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
49 | goto error; | 50 | goto error; |
50 | 51 | ||
51 | /* Fill padding... */ | 52 | /* Fill padding... */ |
53 | tail = skb_tail_pointer(trailer); | ||
52 | do { | 54 | do { |
53 | int i; | 55 | int i; |
54 | for (i=0; i<clen-skb->len - 2; i++) | 56 | for (i=0; i<clen-skb->len - 2; i++) |
55 | *(u8*)(trailer->tail + i) = i+1; | 57 | tail[i] = i + 1; |
56 | } while (0); | 58 | } while (0); |
57 | *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; | 59 | tail[clen - skb->len - 2] = (clen - skb->len) - 2; |
58 | pskb_put(skb, trailer, clen - skb->len); | 60 | pskb_put(skb, trailer, clen - skb->len); |
59 | 61 | ||
60 | __skb_push(skb, skb->data - skb->nh.raw); | 62 | __skb_push(skb, skb->data - skb_network_header(skb)); |
61 | top_iph = skb->nh.iph; | 63 | top_iph = ip_hdr(skb); |
62 | esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4); | 64 | esph = (struct ip_esp_hdr *)(skb_network_header(skb) + |
65 | top_iph->ihl * 4); | ||
63 | top_iph->tot_len = htons(skb->len + alen); | 66 | top_iph->tot_len = htons(skb->len + alen); |
64 | *(u8*)(trailer->tail - 1) = top_iph->protocol; | 67 | *(skb_tail_pointer(trailer) - 1) = top_iph->protocol; |
65 | 68 | ||
66 | /* this is non-NULL only with UDP Encapsulation */ | 69 | /* this is non-NULL only with UDP Encapsulation */ |
67 | if (x->encap) { | 70 | if (x->encap) { |
@@ -217,12 +220,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
217 | 220 | ||
218 | /* ... check padding bits here. Silly. :-) */ | 221 | /* ... check padding bits here. Silly. :-) */ |
219 | 222 | ||
220 | iph = skb->nh.iph; | 223 | iph = ip_hdr(skb); |
221 | ihl = iph->ihl * 4; | 224 | ihl = iph->ihl * 4; |
222 | 225 | ||
223 | if (x->encap) { | 226 | if (x->encap) { |
224 | struct xfrm_encap_tmpl *encap = x->encap; | 227 | struct xfrm_encap_tmpl *encap = x->encap; |
225 | struct udphdr *uh = (void *)(skb->nh.raw + ihl); | 228 | struct udphdr *uh = (void *)(skb_network_header(skb) + ihl); |
226 | 229 | ||
227 | /* | 230 | /* |
228 | * 1) if the NAT-T peer's IP or port changed then | 231 | * 1) if the NAT-T peer's IP or port changed then |
@@ -260,7 +263,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
260 | 263 | ||
261 | iph->protocol = nexthdr[1]; | 264 | iph->protocol = nexthdr[1]; |
262 | pskb_trim(skb, skb->len - alen - padlen - 2); | 265 | pskb_trim(skb, skb->len - alen - padlen - 2); |
263 | skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl; | 266 | __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); |
267 | skb_set_transport_header(skb, -ihl); | ||
264 | 268 | ||
265 | return 0; | 269 | return 0; |
266 | 270 | ||
@@ -268,32 +272,33 @@ out: | |||
268 | return -EINVAL; | 272 | return -EINVAL; |
269 | } | 273 | } |
270 | 274 | ||
271 | static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) | 275 | static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) |
272 | { | 276 | { |
273 | struct esp_data *esp = x->data; | 277 | struct esp_data *esp = x->data; |
274 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); | 278 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); |
275 | int enclen = 0; | 279 | u32 align = max_t(u32, blksize, esp->conf.padlen); |
280 | u32 rem; | ||
281 | |||
282 | mtu -= x->props.header_len + esp->auth.icv_trunc_len; | ||
283 | rem = mtu & (align - 1); | ||
284 | mtu &= ~(align - 1); | ||
276 | 285 | ||
277 | switch (x->props.mode) { | 286 | switch (x->props.mode) { |
278 | case XFRM_MODE_TUNNEL: | 287 | case XFRM_MODE_TUNNEL: |
279 | mtu = ALIGN(mtu +2, blksize); | ||
280 | break; | 288 | break; |
281 | default: | 289 | default: |
282 | case XFRM_MODE_TRANSPORT: | 290 | case XFRM_MODE_TRANSPORT: |
283 | /* The worst case */ | 291 | /* The worst case */ |
284 | mtu = ALIGN(mtu + 2, 4) + blksize - 4; | 292 | mtu -= blksize - 4; |
293 | mtu += min_t(u32, blksize - 4, rem); | ||
285 | break; | 294 | break; |
286 | case XFRM_MODE_BEET: | 295 | case XFRM_MODE_BEET: |
287 | /* The worst case. */ | 296 | /* The worst case. */ |
288 | enclen = IPV4_BEET_PHMAXLEN; | 297 | mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem); |
289 | mtu = ALIGN(mtu + enclen + 2, blksize); | ||
290 | break; | 298 | break; |
291 | } | 299 | } |
292 | 300 | ||
293 | if (esp->conf.padlen) | 301 | return mtu - 2; |
294 | mtu = ALIGN(mtu, esp->conf.padlen); | ||
295 | |||
296 | return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen; | ||
297 | } | 302 | } |
298 | 303 | ||
299 | static void esp4_err(struct sk_buff *skb, u32 info) | 304 | static void esp4_err(struct sk_buff *skb, u32 info) |
@@ -302,8 +307,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
302 | struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); | 307 | struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); |
303 | struct xfrm_state *x; | 308 | struct xfrm_state *x; |
304 | 309 | ||
305 | if (skb->h.icmph->type != ICMP_DEST_UNREACH || | 310 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || |
306 | skb->h.icmph->code != ICMP_FRAG_NEEDED) | 311 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
307 | return; | 312 | return; |
308 | 313 | ||
309 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 314 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
@@ -336,6 +341,7 @@ static int esp_init_state(struct xfrm_state *x) | |||
336 | { | 341 | { |
337 | struct esp_data *esp = NULL; | 342 | struct esp_data *esp = NULL; |
338 | struct crypto_blkcipher *tfm; | 343 | struct crypto_blkcipher *tfm; |
344 | u32 align; | ||
339 | 345 | ||
340 | /* null auth and encryption can have zero length keys */ | 346 | /* null auth and encryption can have zero length keys */ |
341 | if (x->aalg) { | 347 | if (x->aalg) { |
@@ -402,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x) | |||
402 | x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; | 408 | x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; |
403 | if (x->props.mode == XFRM_MODE_TUNNEL) | 409 | if (x->props.mode == XFRM_MODE_TUNNEL) |
404 | x->props.header_len += sizeof(struct iphdr); | 410 | x->props.header_len += sizeof(struct iphdr); |
411 | else if (x->props.mode == XFRM_MODE_BEET) | ||
412 | x->props.header_len += IPV4_BEET_PHMAXLEN; | ||
405 | if (x->encap) { | 413 | if (x->encap) { |
406 | struct xfrm_encap_tmpl *encap = x->encap; | 414 | struct xfrm_encap_tmpl *encap = x->encap; |
407 | 415 | ||
@@ -417,7 +425,10 @@ static int esp_init_state(struct xfrm_state *x) | |||
417 | } | 425 | } |
418 | } | 426 | } |
419 | x->data = esp; | 427 | x->data = esp; |
420 | x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len; | 428 | align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); |
429 | if (esp->conf.padlen) | ||
430 | align = max_t(u32, align, esp->conf.padlen); | ||
431 | x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; | ||
421 | return 0; | 432 | return 0; |
422 | 433 | ||
423 | error: | 434 | error: |
@@ -434,7 +445,7 @@ static struct xfrm_type esp_type = | |||
434 | .proto = IPPROTO_ESP, | 445 | .proto = IPPROTO_ESP, |
435 | .init_state = esp_init_state, | 446 | .init_state = esp_init_state, |
436 | .destructor = esp_destroy, | 447 | .destructor = esp_destroy, |
437 | .get_max_size = esp4_get_max_size, | 448 | .get_mtu = esp4_get_mtu, |
438 | .input = esp_input, | 449 | .input = esp_input, |
439 | .output = esp_output | 450 | .output = esp_output |
440 | }; | 451 | }; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cac06c43f004..837f2957fa83 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/if_addr.h> | 34 | #include <linux/if_addr.h> |
35 | #include <linux/if_arp.h> | 35 | #include <linux/if_arp.h> |
36 | #include <linux/skbuff.h> | 36 | #include <linux/skbuff.h> |
37 | #include <linux/netlink.h> | ||
38 | #include <linux/init.h> | 37 | #include <linux/init.h> |
39 | #include <linux/list.h> | 38 | #include <linux/list.h> |
40 | 39 | ||
@@ -46,6 +45,7 @@ | |||
46 | #include <net/icmp.h> | 45 | #include <net/icmp.h> |
47 | #include <net/arp.h> | 46 | #include <net/arp.h> |
48 | #include <net/ip_fib.h> | 47 | #include <net/ip_fib.h> |
48 | #include <net/rtnetlink.h> | ||
49 | 49 | ||
50 | #define FFprint(a...) printk(KERN_DEBUG a) | 50 | #define FFprint(a...) printk(KERN_DEBUG a) |
51 | 51 | ||
@@ -540,7 +540,7 @@ errout: | |||
540 | return err; | 540 | return err; |
541 | } | 541 | } |
542 | 542 | ||
543 | int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 543 | static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
544 | { | 544 | { |
545 | struct fib_config cfg; | 545 | struct fib_config cfg; |
546 | struct fib_table *tb; | 546 | struct fib_table *tb; |
@@ -561,7 +561,7 @@ errout: | |||
561 | return err; | 561 | return err; |
562 | } | 562 | } |
563 | 563 | ||
564 | int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 564 | static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
565 | { | 565 | { |
566 | struct fib_config cfg; | 566 | struct fib_config cfg; |
567 | struct fib_table *tb; | 567 | struct fib_table *tb; |
@@ -582,7 +582,7 @@ errout: | |||
582 | return err; | 582 | return err; |
583 | } | 583 | } |
584 | 584 | ||
585 | int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 585 | static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
586 | { | 586 | { |
587 | unsigned int h, s_h; | 587 | unsigned int h, s_h; |
588 | unsigned int e = 0, s_e; | 588 | unsigned int e = 0, s_e; |
@@ -777,6 +777,10 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) | |||
777 | .tos = frn->fl_tos, | 777 | .tos = frn->fl_tos, |
778 | .scope = frn->fl_scope } } }; | 778 | .scope = frn->fl_scope } } }; |
779 | 779 | ||
780 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
781 | res.r = NULL; | ||
782 | #endif | ||
783 | |||
780 | frn->err = -ENOENT; | 784 | frn->err = -ENOENT; |
781 | if (tb) { | 785 | if (tb) { |
782 | local_bh_disable(); | 786 | local_bh_disable(); |
@@ -807,7 +811,7 @@ static void nl_fib_input(struct sock *sk, int len) | |||
807 | if (skb == NULL) | 811 | if (skb == NULL) |
808 | return; | 812 | return; |
809 | 813 | ||
810 | nlh = (struct nlmsghdr *)skb->data; | 814 | nlh = nlmsg_hdr(skb); |
811 | if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || | 815 | if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || |
812 | nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { | 816 | nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { |
813 | kfree_skb(skb); | 817 | kfree_skb(skb); |
@@ -827,7 +831,8 @@ static void nl_fib_input(struct sock *sk, int len) | |||
827 | 831 | ||
828 | static void nl_fib_lookup_init(void) | 832 | static void nl_fib_lookup_init(void) |
829 | { | 833 | { |
830 | netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); | 834 | netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, |
835 | THIS_MODULE); | ||
831 | } | 836 | } |
832 | 837 | ||
833 | static void fib_disable_ip(struct net_device *dev, int force) | 838 | static void fib_disable_ip(struct net_device *dev, int force) |
@@ -925,6 +930,10 @@ void __init ip_fib_init(void) | |||
925 | register_netdevice_notifier(&fib_netdev_notifier); | 930 | register_netdevice_notifier(&fib_netdev_notifier); |
926 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 931 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
927 | nl_fib_lookup_init(); | 932 | nl_fib_lookup_init(); |
933 | |||
934 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); | ||
935 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); | ||
936 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); | ||
928 | } | 937 | } |
929 | 938 | ||
930 | EXPORT_SYMBOL(inet_addr_type); | 939 | EXPORT_SYMBOL(inet_addr_type); |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index a4949f957ab5..9cfecf1215c9 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -1027,7 +1027,7 @@ out: | |||
1027 | return 0; | 1027 | return 0; |
1028 | } | 1028 | } |
1029 | 1029 | ||
1030 | static struct seq_operations fib_seq_ops = { | 1030 | static const struct seq_operations fib_seq_ops = { |
1031 | .start = fib_seq_start, | 1031 | .start = fib_seq_start, |
1032 | .next = fib_seq_next, | 1032 | .next = fib_seq_next, |
1033 | .stop = fib_seq_stop, | 1033 | .stop = fib_seq_stop, |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index c660c074c76c..33083ad52e9f 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -274,11 +274,6 @@ nla_put_failure: | |||
274 | return -ENOBUFS; | 274 | return -ENOBUFS; |
275 | } | 275 | } |
276 | 276 | ||
277 | int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
278 | { | ||
279 | return fib_rules_dump(skb, cb, AF_INET); | ||
280 | } | ||
281 | |||
282 | static u32 fib4_rule_default_pref(void) | 277 | static u32 fib4_rule_default_pref(void) |
283 | { | 278 | { |
284 | struct list_head *pos; | 279 | struct list_head *pos; |
@@ -303,6 +298,11 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) | |||
303 | + nla_total_size(4); /* flow */ | 298 | + nla_total_size(4); /* flow */ |
304 | } | 299 | } |
305 | 300 | ||
301 | static void fib4_rule_flush_cache(void) | ||
302 | { | ||
303 | rt_cache_flush(-1); | ||
304 | } | ||
305 | |||
306 | static struct fib_rules_ops fib4_rules_ops = { | 306 | static struct fib_rules_ops fib4_rules_ops = { |
307 | .family = AF_INET, | 307 | .family = AF_INET, |
308 | .rule_size = sizeof(struct fib4_rule), | 308 | .rule_size = sizeof(struct fib4_rule), |
@@ -314,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = { | |||
314 | .fill = fib4_rule_fill, | 314 | .fill = fib4_rule_fill, |
315 | .default_pref = fib4_rule_default_pref, | 315 | .default_pref = fib4_rule_default_pref, |
316 | .nlmsg_payload = fib4_rule_nlmsg_payload, | 316 | .nlmsg_payload = fib4_rule_nlmsg_payload, |
317 | .flush_cache = fib4_rule_flush_cache, | ||
317 | .nlgroup = RTNLGRP_IPV4_RULE, | 318 | .nlgroup = RTNLGRP_IPV4_RULE, |
318 | .policy = fib4_rule_policy, | 319 | .policy = fib4_rule_policy, |
319 | .rules_list = &fib4_rules, | 320 | .rules_list = &fib4_rules, |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3dad12ee76c3..406ea7050aed 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, | |||
927 | default: | 927 | default: |
928 | printk(KERN_DEBUG "impossible 102\n"); | 928 | printk(KERN_DEBUG "impossible 102\n"); |
929 | return -EINVAL; | 929 | return -EINVAL; |
930 | }; | 930 | } |
931 | } | 931 | } |
932 | return err; | 932 | return err; |
933 | } | 933 | } |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 214c34732e84..9be7da7c3a8f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * Patrick McHardy <kaber@trash.net> | 50 | * Patrick McHardy <kaber@trash.net> |
51 | */ | 51 | */ |
52 | 52 | ||
53 | #define VERSION "0.407" | 53 | #define VERSION "0.408" |
54 | 54 | ||
55 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
56 | #include <asm/system.h> | 56 | #include <asm/system.h> |
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn) | |||
292 | 292 | ||
293 | static int halve_threshold = 25; | 293 | static int halve_threshold = 25; |
294 | static int inflate_threshold = 50; | 294 | static int inflate_threshold = 50; |
295 | static int halve_threshold_root = 15; | 295 | static int halve_threshold_root = 8; |
296 | static int inflate_threshold_root = 25; | 296 | static int inflate_threshold_root = 15; |
297 | 297 | ||
298 | 298 | ||
299 | static void __alias_free_mem(struct rcu_head *head) | 299 | static void __alias_free_mem(struct rcu_head *head) |
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
350 | 350 | ||
351 | static inline void tnode_free(struct tnode *tn) | 351 | static inline void tnode_free(struct tnode *tn) |
352 | { | 352 | { |
353 | if(IS_LEAF(tn)) { | 353 | if (IS_LEAF(tn)) { |
354 | struct leaf *l = (struct leaf *) tn; | 354 | struct leaf *l = (struct leaf *) tn; |
355 | call_rcu_bh(&l->rcu, __leaf_free_rcu); | 355 | call_rcu_bh(&l->rcu, __leaf_free_rcu); |
356 | } | 356 | } else |
357 | else | ||
358 | call_rcu(&tn->rcu, __tnode_free_rcu); | 357 | call_rcu(&tn->rcu, __tnode_free_rcu); |
359 | } | 358 | } |
360 | 359 | ||
@@ -459,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
459 | struct tnode *old_tn; | 458 | struct tnode *old_tn; |
460 | int inflate_threshold_use; | 459 | int inflate_threshold_use; |
461 | int halve_threshold_use; | 460 | int halve_threshold_use; |
461 | int max_resize; | ||
462 | 462 | ||
463 | if (!tn) | 463 | if (!tn) |
464 | return NULL; | 464 | return NULL; |
@@ -553,13 +553,14 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
553 | 553 | ||
554 | /* Keep root node larger */ | 554 | /* Keep root node larger */ |
555 | 555 | ||
556 | if(!tn->parent) | 556 | if (!tn->parent) |
557 | inflate_threshold_use = inflate_threshold_root; | 557 | inflate_threshold_use = inflate_threshold_root; |
558 | else | 558 | else |
559 | inflate_threshold_use = inflate_threshold; | 559 | inflate_threshold_use = inflate_threshold; |
560 | 560 | ||
561 | err = 0; | 561 | err = 0; |
562 | while ((tn->full_children > 0 && | 562 | max_resize = 10; |
563 | while ((tn->full_children > 0 && max_resize-- && | ||
563 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= | 564 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= |
564 | inflate_threshold_use * tnode_child_length(tn))) { | 565 | inflate_threshold_use * tnode_child_length(tn))) { |
565 | 566 | ||
@@ -574,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
574 | } | 575 | } |
575 | } | 576 | } |
576 | 577 | ||
578 | if (max_resize < 0) { | ||
579 | if (!tn->parent) | ||
580 | printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n", | ||
581 | inflate_threshold_root, tn->bits); | ||
582 | else | ||
583 | printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n", | ||
584 | inflate_threshold, tn->bits); | ||
585 | } | ||
586 | |||
577 | check_tnode(tn); | 587 | check_tnode(tn); |
578 | 588 | ||
579 | /* | 589 | /* |
@@ -584,13 +594,14 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
584 | 594 | ||
585 | /* Keep root node larger */ | 595 | /* Keep root node larger */ |
586 | 596 | ||
587 | if(!tn->parent) | 597 | if (!tn->parent) |
588 | halve_threshold_use = halve_threshold_root; | 598 | halve_threshold_use = halve_threshold_root; |
589 | else | 599 | else |
590 | halve_threshold_use = halve_threshold; | 600 | halve_threshold_use = halve_threshold; |
591 | 601 | ||
592 | err = 0; | 602 | err = 0; |
593 | while (tn->bits > 1 && | 603 | max_resize = 10; |
604 | while (tn->bits > 1 && max_resize-- && | ||
594 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 605 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
595 | halve_threshold_use * tnode_child_length(tn)) { | 606 | halve_threshold_use * tnode_child_length(tn)) { |
596 | 607 | ||
@@ -605,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
605 | } | 616 | } |
606 | } | 617 | } |
607 | 618 | ||
619 | if (max_resize < 0) { | ||
620 | if (!tn->parent) | ||
621 | printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n", | ||
622 | halve_threshold_root, tn->bits); | ||
623 | else | ||
624 | printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n", | ||
625 | halve_threshold, tn->bits); | ||
626 | } | ||
608 | 627 | ||
609 | /* Only one child remains */ | 628 | /* Only one child remains */ |
610 | if (tn->empty_children == tnode_child_length(tn) - 1) | 629 | if (tn->empty_children == tnode_child_length(tn) - 1) |
@@ -2039,12 +2058,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
2039 | { | 2058 | { |
2040 | struct node *n ; | 2059 | struct node *n ; |
2041 | 2060 | ||
2042 | if(!t) | 2061 | if (!t) |
2043 | return NULL; | 2062 | return NULL; |
2044 | 2063 | ||
2045 | n = rcu_dereference(t->trie); | 2064 | n = rcu_dereference(t->trie); |
2046 | 2065 | ||
2047 | if(!iter) | 2066 | if (!iter) |
2048 | return NULL; | 2067 | return NULL; |
2049 | 2068 | ||
2050 | if (n) { | 2069 | if (n) { |
@@ -2084,7 +2103,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) | |||
2084 | int i; | 2103 | int i; |
2085 | 2104 | ||
2086 | s->tnodes++; | 2105 | s->tnodes++; |
2087 | if(tn->bits < MAX_STAT_DEPTH) | 2106 | if (tn->bits < MAX_STAT_DEPTH) |
2088 | s->nodesizes[tn->bits]++; | 2107 | s->nodesizes[tn->bits]++; |
2089 | 2108 | ||
2090 | for (i = 0; i < (1<<tn->bits); i++) | 2109 | for (i = 0; i < (1<<tn->bits); i++) |
@@ -2250,7 +2269,7 @@ static inline const char *rtn_scope(enum rt_scope_t s) | |||
2250 | { | 2269 | { |
2251 | static char buf[32]; | 2270 | static char buf[32]; |
2252 | 2271 | ||
2253 | switch(s) { | 2272 | switch (s) { |
2254 | case RT_SCOPE_UNIVERSE: return "universe"; | 2273 | case RT_SCOPE_UNIVERSE: return "universe"; |
2255 | case RT_SCOPE_SITE: return "site"; | 2274 | case RT_SCOPE_SITE: return "site"; |
2256 | case RT_SCOPE_LINK: return "link"; | 2275 | case RT_SCOPE_LINK: return "link"; |
@@ -2340,7 +2359,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2340 | return 0; | 2359 | return 0; |
2341 | } | 2360 | } |
2342 | 2361 | ||
2343 | static struct seq_operations fib_trie_seq_ops = { | 2362 | static const struct seq_operations fib_trie_seq_ops = { |
2344 | .start = fib_trie_seq_start, | 2363 | .start = fib_trie_seq_start, |
2345 | .next = fib_trie_seq_next, | 2364 | .next = fib_trie_seq_next, |
2346 | .stop = fib_trie_seq_stop, | 2365 | .stop = fib_trie_seq_stop, |
@@ -2461,7 +2480,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) | |||
2461 | return 0; | 2480 | return 0; |
2462 | } | 2481 | } |
2463 | 2482 | ||
2464 | static struct seq_operations fib_route_seq_ops = { | 2483 | static const struct seq_operations fib_route_seq_ops = { |
2465 | .start = fib_trie_seq_start, | 2484 | .start = fib_trie_seq_start, |
2466 | .next = fib_trie_seq_next, | 2485 | .next = fib_trie_seq_next, |
2467 | .stop = fib_trie_seq_stop, | 2486 | .stop = fib_trie_seq_stop, |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4b7a0d946a0d..d38cbba92a4d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
355 | ipc, rt, MSG_DONTWAIT) < 0) | 355 | ipc, rt, MSG_DONTWAIT) < 0) |
356 | ip_flush_pending_frames(icmp_socket->sk); | 356 | ip_flush_pending_frames(icmp_socket->sk); |
357 | else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { | 357 | else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { |
358 | struct icmphdr *icmph = skb->h.icmph; | 358 | struct icmphdr *icmph = icmp_hdr(skb); |
359 | __wsum csum = 0; | 359 | __wsum csum = 0; |
360 | struct sk_buff *skb1; | 360 | struct sk_buff *skb1; |
361 | 361 | ||
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
392 | icmp_param->data.icmph.checksum = 0; | 392 | icmp_param->data.icmph.checksum = 0; |
393 | icmp_out_count(icmp_param->data.icmph.type); | 393 | icmp_out_count(icmp_param->data.icmph.type); |
394 | 394 | ||
395 | inet->tos = skb->nh.iph->tos; | 395 | inet->tos = ip_hdr(skb)->tos; |
396 | daddr = ipc.addr = rt->rt_src; | 396 | daddr = ipc.addr = rt->rt_src; |
397 | ipc.opt = NULL; | 397 | ipc.opt = NULL; |
398 | if (icmp_param->replyopts.optlen) { | 398 | if (icmp_param->replyopts.optlen) { |
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
404 | struct flowi fl = { .nl_u = { .ip4_u = | 404 | struct flowi fl = { .nl_u = { .ip4_u = |
405 | { .daddr = daddr, | 405 | { .daddr = daddr, |
406 | .saddr = rt->rt_spec_dst, | 406 | .saddr = rt->rt_spec_dst, |
407 | .tos = RT_TOS(skb->nh.iph->tos) } }, | 407 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, |
408 | .proto = IPPROTO_ICMP }; | 408 | .proto = IPPROTO_ICMP }; |
409 | security_skb_classify_flow(skb, &fl); | 409 | security_skb_classify_flow(skb, &fl); |
410 | if (ip_route_output_key(&rt, &fl)) | 410 | if (ip_route_output_key(&rt, &fl)) |
@@ -448,9 +448,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
448 | * Check this, icmp_send is called from the most obscure devices | 448 | * Check this, icmp_send is called from the most obscure devices |
449 | * sometimes. | 449 | * sometimes. |
450 | */ | 450 | */ |
451 | iph = skb_in->nh.iph; | 451 | iph = ip_hdr(skb_in); |
452 | 452 | ||
453 | if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail) | 453 | if ((u8 *)iph < skb_in->head || |
454 | (skb_in->network_header + sizeof(*iph)) > skb_in->tail) | ||
454 | goto out; | 455 | goto out; |
455 | 456 | ||
456 | /* | 457 | /* |
@@ -484,7 +485,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
484 | u8 _inner_type, *itp; | 485 | u8 _inner_type, *itp; |
485 | 486 | ||
486 | itp = skb_header_pointer(skb_in, | 487 | itp = skb_header_pointer(skb_in, |
487 | skb_in->nh.raw + | 488 | skb_network_header(skb_in) + |
488 | (iph->ihl << 2) + | 489 | (iph->ihl << 2) + |
489 | offsetof(struct icmphdr, | 490 | offsetof(struct icmphdr, |
490 | type) - | 491 | type) - |
@@ -536,7 +537,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
536 | icmp_param.data.icmph.un.gateway = info; | 537 | icmp_param.data.icmph.un.gateway = info; |
537 | icmp_param.data.icmph.checksum = 0; | 538 | icmp_param.data.icmph.checksum = 0; |
538 | icmp_param.skb = skb_in; | 539 | icmp_param.skb = skb_in; |
539 | icmp_param.offset = skb_in->nh.raw - skb_in->data; | 540 | icmp_param.offset = skb_network_offset(skb_in); |
540 | icmp_out_count(icmp_param.data.icmph.type); | 541 | icmp_out_count(icmp_param.data.icmph.type); |
541 | inet_sk(icmp_socket->sk)->tos = tos; | 542 | inet_sk(icmp_socket->sk)->tos = tos; |
542 | ipc.addr = iph->saddr; | 543 | ipc.addr = iph->saddr; |
@@ -613,7 +614,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
613 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | 614 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) |
614 | goto out_err; | 615 | goto out_err; |
615 | 616 | ||
616 | icmph = skb->h.icmph; | 617 | icmph = icmp_hdr(skb); |
617 | iph = (struct iphdr *)skb->data; | 618 | iph = (struct iphdr *)skb->data; |
618 | 619 | ||
619 | if (iph->ihl < 5) /* Mangled header, drop. */ | 620 | if (iph->ihl < 5) /* Mangled header, drop. */ |
@@ -676,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
676 | printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " | 677 | printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " |
677 | "type %u, code %u " | 678 | "type %u, code %u " |
678 | "error to a broadcast: %u.%u.%u.%u on %s\n", | 679 | "error to a broadcast: %u.%u.%u.%u on %s\n", |
679 | NIPQUAD(skb->nh.iph->saddr), | 680 | NIPQUAD(ip_hdr(skb)->saddr), |
680 | icmph->type, icmph->code, | 681 | icmph->type, icmph->code, |
681 | NIPQUAD(iph->daddr), | 682 | NIPQUAD(iph->daddr), |
682 | skb->dev->name); | 683 | skb->dev->name); |
@@ -743,7 +744,7 @@ static void icmp_redirect(struct sk_buff *skb) | |||
743 | 744 | ||
744 | iph = (struct iphdr *)skb->data; | 745 | iph = (struct iphdr *)skb->data; |
745 | 746 | ||
746 | switch (skb->h.icmph->code & 7) { | 747 | switch (icmp_hdr(skb)->code & 7) { |
747 | case ICMP_REDIR_NET: | 748 | case ICMP_REDIR_NET: |
748 | case ICMP_REDIR_NETTOS: | 749 | case ICMP_REDIR_NETTOS: |
749 | /* | 750 | /* |
@@ -751,8 +752,8 @@ static void icmp_redirect(struct sk_buff *skb) | |||
751 | */ | 752 | */ |
752 | case ICMP_REDIR_HOST: | 753 | case ICMP_REDIR_HOST: |
753 | case ICMP_REDIR_HOSTTOS: | 754 | case ICMP_REDIR_HOSTTOS: |
754 | ip_rt_redirect(skb->nh.iph->saddr, iph->daddr, | 755 | ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr, |
755 | skb->h.icmph->un.gateway, | 756 | icmp_hdr(skb)->un.gateway, |
756 | iph->saddr, skb->dev); | 757 | iph->saddr, skb->dev); |
757 | break; | 758 | break; |
758 | } | 759 | } |
@@ -780,7 +781,7 @@ static void icmp_echo(struct sk_buff *skb) | |||
780 | if (!sysctl_icmp_echo_ignore_all) { | 781 | if (!sysctl_icmp_echo_ignore_all) { |
781 | struct icmp_bxm icmp_param; | 782 | struct icmp_bxm icmp_param; |
782 | 783 | ||
783 | icmp_param.data.icmph = *skb->h.icmph; | 784 | icmp_param.data.icmph = *icmp_hdr(skb); |
784 | icmp_param.data.icmph.type = ICMP_ECHOREPLY; | 785 | icmp_param.data.icmph.type = ICMP_ECHOREPLY; |
785 | icmp_param.skb = skb; | 786 | icmp_param.skb = skb; |
786 | icmp_param.offset = 0; | 787 | icmp_param.offset = 0; |
@@ -816,7 +817,7 @@ static void icmp_timestamp(struct sk_buff *skb) | |||
816 | icmp_param.data.times[2] = icmp_param.data.times[1]; | 817 | icmp_param.data.times[2] = icmp_param.data.times[1]; |
817 | if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) | 818 | if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) |
818 | BUG(); | 819 | BUG(); |
819 | icmp_param.data.icmph = *skb->h.icmph; | 820 | icmp_param.data.icmph = *icmp_hdr(skb); |
820 | icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; | 821 | icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; |
821 | icmp_param.data.icmph.code = 0; | 822 | icmp_param.data.icmph.code = 0; |
822 | icmp_param.skb = skb; | 823 | icmp_param.skb = skb; |
@@ -943,7 +944,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
943 | if (!pskb_pull(skb, sizeof(struct icmphdr))) | 944 | if (!pskb_pull(skb, sizeof(struct icmphdr))) |
944 | goto error; | 945 | goto error; |
945 | 946 | ||
946 | icmph = skb->h.icmph; | 947 | icmph = icmp_hdr(skb); |
947 | 948 | ||
948 | /* | 949 | /* |
949 | * 18 is the highest 'known' ICMP type. Anything else is a mystery | 950 | * 18 is the highest 'known' ICMP type. Anything else is a mystery |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8cedb2a2c9df..2506021c2935 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
314 | 314 | ||
315 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 315 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
316 | 316 | ||
317 | skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); | 317 | skb_reset_network_header(skb); |
318 | pip = ip_hdr(skb); | ||
319 | skb_put(skb, sizeof(struct iphdr) + 4); | ||
318 | 320 | ||
319 | pip->version = 4; | 321 | pip->version = 4; |
320 | pip->ihl = (sizeof(struct iphdr)+4)>>2; | 322 | pip->ihl = (sizeof(struct iphdr)+4)>>2; |
@@ -331,8 +333,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
331 | ((u8*)&pip[1])[2] = 0; | 333 | ((u8*)&pip[1])[2] = 0; |
332 | ((u8*)&pip[1])[3] = 0; | 334 | ((u8*)&pip[1])[3] = 0; |
333 | 335 | ||
334 | pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig)); | 336 | skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4; |
335 | skb->h.igmph = (struct igmphdr *)pig; | 337 | skb_put(skb, sizeof(*pig)); |
338 | pig = igmpv3_report_hdr(skb); | ||
336 | pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT; | 339 | pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT; |
337 | pig->resv1 = 0; | 340 | pig->resv1 = 0; |
338 | pig->csum = 0; | 341 | pig->csum = 0; |
@@ -343,16 +346,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
343 | 346 | ||
344 | static int igmpv3_sendpack(struct sk_buff *skb) | 347 | static int igmpv3_sendpack(struct sk_buff *skb) |
345 | { | 348 | { |
346 | struct iphdr *pip = skb->nh.iph; | 349 | struct iphdr *pip = ip_hdr(skb); |
347 | struct igmphdr *pig = skb->h.igmph; | 350 | struct igmphdr *pig = igmp_hdr(skb); |
348 | int iplen, igmplen; | 351 | const int iplen = skb->tail - skb->network_header; |
352 | const int igmplen = skb->tail - skb->transport_header; | ||
349 | 353 | ||
350 | iplen = skb->tail - (unsigned char *)skb->nh.iph; | ||
351 | pip->tot_len = htons(iplen); | 354 | pip->tot_len = htons(iplen); |
352 | ip_send_check(pip); | 355 | ip_send_check(pip); |
353 | 356 | pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); | |
354 | igmplen = skb->tail - (unsigned char *)skb->h.igmph; | ||
355 | pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen); | ||
356 | 357 | ||
357 | return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, | 358 | return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, |
358 | dst_output); | 359 | dst_output); |
@@ -379,7 +380,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
379 | pgr->grec_auxwords = 0; | 380 | pgr->grec_auxwords = 0; |
380 | pgr->grec_nsrcs = 0; | 381 | pgr->grec_nsrcs = 0; |
381 | pgr->grec_mca = pmc->multiaddr; | 382 | pgr->grec_mca = pmc->multiaddr; |
382 | pih = (struct igmpv3_report *)skb->h.igmph; | 383 | pih = igmpv3_report_hdr(skb); |
383 | pih->ngrec = htons(ntohs(pih->ngrec)+1); | 384 | pih->ngrec = htons(ntohs(pih->ngrec)+1); |
384 | *ppgr = pgr; | 385 | *ppgr = pgr; |
385 | return skb; | 386 | return skb; |
@@ -412,7 +413,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
412 | if (!*psf_list) | 413 | if (!*psf_list) |
413 | goto empty_source; | 414 | goto empty_source; |
414 | 415 | ||
415 | pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL; | 416 | pih = skb ? igmpv3_report_hdr(skb) : NULL; |
416 | 417 | ||
417 | /* EX and TO_EX get a fresh packet, if needed */ | 418 | /* EX and TO_EX get a fresh packet, if needed */ |
418 | if (truncate) { | 419 | if (truncate) { |
@@ -664,7 +665,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
664 | 665 | ||
665 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 666 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
666 | 667 | ||
667 | skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); | 668 | skb_reset_network_header(skb); |
669 | iph = ip_hdr(skb); | ||
670 | skb_put(skb, sizeof(struct iphdr) + 4); | ||
668 | 671 | ||
669 | iph->version = 4; | 672 | iph->version = 4; |
670 | iph->ihl = (sizeof(struct iphdr)+4)>>2; | 673 | iph->ihl = (sizeof(struct iphdr)+4)>>2; |
@@ -827,8 +830,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) | |||
827 | static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | 830 | static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, |
828 | int len) | 831 | int len) |
829 | { | 832 | { |
830 | struct igmphdr *ih = skb->h.igmph; | 833 | struct igmphdr *ih = igmp_hdr(skb); |
831 | struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; | 834 | struct igmpv3_query *ih3 = igmpv3_query_hdr(skb); |
832 | struct ip_mc_list *im; | 835 | struct ip_mc_list *im; |
833 | __be32 group = ih->group; | 836 | __be32 group = ih->group; |
834 | int max_delay; | 837 | int max_delay; |
@@ -861,12 +864,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
861 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) | 864 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) |
862 | return; | 865 | return; |
863 | 866 | ||
864 | ih3 = (struct igmpv3_query *) skb->h.raw; | 867 | ih3 = igmpv3_query_hdr(skb); |
865 | if (ih3->nsrcs) { | 868 | if (ih3->nsrcs) { |
866 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) | 869 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) |
867 | + ntohs(ih3->nsrcs)*sizeof(__be32))) | 870 | + ntohs(ih3->nsrcs)*sizeof(__be32))) |
868 | return; | 871 | return; |
869 | ih3 = (struct igmpv3_query *) skb->h.raw; | 872 | ih3 = igmpv3_query_hdr(skb); |
870 | } | 873 | } |
871 | 874 | ||
872 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); | 875 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); |
@@ -943,7 +946,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
943 | goto drop; | 946 | goto drop; |
944 | } | 947 | } |
945 | 948 | ||
946 | ih = skb->h.igmph; | 949 | ih = igmp_hdr(skb); |
947 | switch (ih->type) { | 950 | switch (ih->type) { |
948 | case IGMP_HOST_MEMBERSHIP_QUERY: | 951 | case IGMP_HOST_MEMBERSHIP_QUERY: |
949 | igmp_heard_query(in_dev, skb, len); | 952 | igmp_heard_query(in_dev, skb, len); |
@@ -2397,7 +2400,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) | |||
2397 | return 0; | 2400 | return 0; |
2398 | } | 2401 | } |
2399 | 2402 | ||
2400 | static struct seq_operations igmp_mc_seq_ops = { | 2403 | static const struct seq_operations igmp_mc_seq_ops = { |
2401 | .start = igmp_mc_seq_start, | 2404 | .start = igmp_mc_seq_start, |
2402 | .next = igmp_mc_seq_next, | 2405 | .next = igmp_mc_seq_next, |
2403 | .stop = igmp_mc_seq_stop, | 2406 | .stop = igmp_mc_seq_stop, |
@@ -2571,7 +2574,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v) | |||
2571 | return 0; | 2574 | return 0; |
2572 | } | 2575 | } |
2573 | 2576 | ||
2574 | static struct seq_operations igmp_mcf_seq_ops = { | 2577 | static const struct seq_operations igmp_mcf_seq_ops = { |
2575 | .start = igmp_mcf_seq_start, | 2578 | .start = igmp_mcf_seq_start, |
2576 | .next = igmp_mcf_seq_next, | 2579 | .next = igmp_mcf_seq_next, |
2577 | .stop = igmp_mcf_seq_stop, | 2580 | .stop = igmp_mcf_seq_stop, |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5df71cd08da8..dbeacd8b0f90 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <net/inet_hashtables.h> | 27 | #include <net/inet_hashtables.h> |
28 | #include <net/inet_timewait_sock.h> | 28 | #include <net/inet_timewait_sock.h> |
29 | #include <net/inet6_hashtables.h> | 29 | #include <net/inet6_hashtables.h> |
30 | #include <net/netlink.h> | ||
30 | 31 | ||
31 | #include <linux/inet.h> | 32 | #include <linux/inet.h> |
32 | #include <linux/stddef.h> | 33 | #include <linux/stddef.h> |
@@ -60,7 +61,7 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
60 | struct nlmsghdr *nlh; | 61 | struct nlmsghdr *nlh; |
61 | void *info = NULL; | 62 | void *info = NULL; |
62 | struct inet_diag_meminfo *minfo = NULL; | 63 | struct inet_diag_meminfo *minfo = NULL; |
63 | unsigned char *b = skb->tail; | 64 | unsigned char *b = skb_tail_pointer(skb); |
64 | const struct inet_diag_handler *handler; | 65 | const struct inet_diag_handler *handler; |
65 | 66 | ||
66 | handler = inet_diag_table[unlh->nlmsg_type]; | 67 | handler = inet_diag_table[unlh->nlmsg_type]; |
@@ -147,12 +148,12 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
147 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) | 148 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) |
148 | icsk->icsk_ca_ops->get_info(sk, ext, skb); | 149 | icsk->icsk_ca_ops->get_info(sk, ext, skb); |
149 | 150 | ||
150 | nlh->nlmsg_len = skb->tail - b; | 151 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
151 | return skb->len; | 152 | return skb->len; |
152 | 153 | ||
153 | rtattr_failure: | 154 | rtattr_failure: |
154 | nlmsg_failure: | 155 | nlmsg_failure: |
155 | skb_trim(skb, b - skb->data); | 156 | nlmsg_trim(skb, b); |
156 | return -EMSGSIZE; | 157 | return -EMSGSIZE; |
157 | } | 158 | } |
158 | 159 | ||
@@ -163,7 +164,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
163 | { | 164 | { |
164 | long tmo; | 165 | long tmo; |
165 | struct inet_diag_msg *r; | 166 | struct inet_diag_msg *r; |
166 | const unsigned char *previous_tail = skb->tail; | 167 | const unsigned char *previous_tail = skb_tail_pointer(skb); |
167 | struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, | 168 | struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, |
168 | unlh->nlmsg_type, sizeof(*r)); | 169 | unlh->nlmsg_type, sizeof(*r)); |
169 | 170 | ||
@@ -205,10 +206,10 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
205 | &tw6->tw_v6_daddr); | 206 | &tw6->tw_v6_daddr); |
206 | } | 207 | } |
207 | #endif | 208 | #endif |
208 | nlh->nlmsg_len = skb->tail - previous_tail; | 209 | nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; |
209 | return skb->len; | 210 | return skb->len; |
210 | nlmsg_failure: | 211 | nlmsg_failure: |
211 | skb_trim(skb, previous_tail - skb->data); | 212 | nlmsg_trim(skb, previous_tail); |
212 | return -EMSGSIZE; | 213 | return -EMSGSIZE; |
213 | } | 214 | } |
214 | 215 | ||
@@ -535,7 +536,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
535 | { | 536 | { |
536 | const struct inet_request_sock *ireq = inet_rsk(req); | 537 | const struct inet_request_sock *ireq = inet_rsk(req); |
537 | struct inet_sock *inet = inet_sk(sk); | 538 | struct inet_sock *inet = inet_sk(sk); |
538 | unsigned char *b = skb->tail; | 539 | unsigned char *b = skb_tail_pointer(skb); |
539 | struct inet_diag_msg *r; | 540 | struct inet_diag_msg *r; |
540 | struct nlmsghdr *nlh; | 541 | struct nlmsghdr *nlh; |
541 | long tmo; | 542 | long tmo; |
@@ -574,12 +575,12 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
574 | &inet6_rsk(req)->rmt_addr); | 575 | &inet6_rsk(req)->rmt_addr); |
575 | } | 576 | } |
576 | #endif | 577 | #endif |
577 | nlh->nlmsg_len = skb->tail - b; | 578 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
578 | 579 | ||
579 | return skb->len; | 580 | return skb->len; |
580 | 581 | ||
581 | nlmsg_failure: | 582 | nlmsg_failure: |
582 | skb_trim(skb, b - skb->data); | 583 | nlmsg_trim(skb, b); |
583 | return -1; | 584 | return -1; |
584 | } | 585 | } |
585 | 586 | ||
@@ -805,68 +806,43 @@ done: | |||
805 | return skb->len; | 806 | return skb->len; |
806 | } | 807 | } |
807 | 808 | ||
808 | static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 809 | static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
809 | { | 810 | { |
810 | if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) | 811 | int hdrlen = sizeof(struct inet_diag_req); |
811 | return 0; | ||
812 | 812 | ||
813 | if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) | 813 | if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || |
814 | goto err_inval; | 814 | nlmsg_len(nlh) < hdrlen) |
815 | return -EINVAL; | ||
815 | 816 | ||
816 | if (inet_diag_table[nlh->nlmsg_type] == NULL) | 817 | if (inet_diag_table[nlh->nlmsg_type] == NULL) |
817 | return -ENOENT; | 818 | return -ENOENT; |
818 | 819 | ||
819 | if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) | 820 | if (nlh->nlmsg_flags & NLM_F_DUMP) { |
820 | goto err_inval; | 821 | if (nlmsg_attrlen(nlh, hdrlen)) { |
821 | 822 | struct nlattr *attr; | |
822 | if (nlh->nlmsg_flags&NLM_F_DUMP) { | 823 | |
823 | if (nlh->nlmsg_len > | 824 | attr = nlmsg_find_attr(nlh, hdrlen, |
824 | (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { | 825 | INET_DIAG_REQ_BYTECODE); |
825 | struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + | 826 | if (attr == NULL || |
826 | sizeof(struct inet_diag_req)); | 827 | nla_len(attr) < sizeof(struct inet_diag_bc_op) || |
827 | if (rta->rta_type != INET_DIAG_REQ_BYTECODE || | 828 | inet_diag_bc_audit(nla_data(attr), nla_len(attr))) |
828 | rta->rta_len < 8 || | 829 | return -EINVAL; |
829 | rta->rta_len > | ||
830 | (nlh->nlmsg_len - | ||
831 | NLMSG_SPACE(sizeof(struct inet_diag_req)))) | ||
832 | goto err_inval; | ||
833 | if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) | ||
834 | goto err_inval; | ||
835 | } | 830 | } |
831 | |||
836 | return netlink_dump_start(idiagnl, skb, nlh, | 832 | return netlink_dump_start(idiagnl, skb, nlh, |
837 | inet_diag_dump, NULL); | 833 | inet_diag_dump, NULL); |
838 | } else | ||
839 | return inet_diag_get_exact(skb, nlh); | ||
840 | |||
841 | err_inval: | ||
842 | return -EINVAL; | ||
843 | } | ||
844 | |||
845 | |||
846 | static inline void inet_diag_rcv_skb(struct sk_buff *skb) | ||
847 | { | ||
848 | if (skb->len >= NLMSG_SPACE(0)) { | ||
849 | int err; | ||
850 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; | ||
851 | |||
852 | if (nlh->nlmsg_len < sizeof(*nlh) || | ||
853 | skb->len < nlh->nlmsg_len) | ||
854 | return; | ||
855 | err = inet_diag_rcv_msg(skb, nlh); | ||
856 | if (err || nlh->nlmsg_flags & NLM_F_ACK) | ||
857 | netlink_ack(skb, nlh, err); | ||
858 | } | 834 | } |
835 | |||
836 | return inet_diag_get_exact(skb, nlh); | ||
859 | } | 837 | } |
860 | 838 | ||
861 | static void inet_diag_rcv(struct sock *sk, int len) | 839 | static void inet_diag_rcv(struct sock *sk, int len) |
862 | { | 840 | { |
863 | struct sk_buff *skb; | 841 | unsigned int qlen = 0; |
864 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | ||
865 | 842 | ||
866 | while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { | 843 | do { |
867 | inet_diag_rcv_skb(skb); | 844 | netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); |
868 | kfree_skb(skb); | 845 | } while (qlen); |
869 | } | ||
870 | } | 846 | } |
871 | 847 | ||
872 | static DEFINE_SPINLOCK(inet_diag_register_lock); | 848 | static DEFINE_SPINLOCK(inet_diag_register_lock); |
@@ -917,7 +893,7 @@ static int __init inet_diag_init(void) | |||
917 | goto out; | 893 | goto out; |
918 | 894 | ||
919 | idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, | 895 | idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, |
920 | THIS_MODULE); | 896 | NULL, THIS_MODULE); |
921 | if (idiagnl == NULL) | 897 | if (idiagnl == NULL) |
922 | goto out_free_table; | 898 | goto out_free_table; |
923 | err = 0; | 899 | err = 0; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index db3ef96bdfd9..2f44e6128068 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock); | |||
87 | 87 | ||
88 | static int peer_total; | 88 | static int peer_total; |
89 | /* Exported for sysctl_net_ipv4. */ | 89 | /* Exported for sysctl_net_ipv4. */ |
90 | int inet_peer_threshold = 65536 + 128; /* start to throw entries more | 90 | int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more |
91 | * aggressively at this stage */ | 91 | * aggressively at this stage */ |
92 | int inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ | 92 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ |
93 | int inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ | 93 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ |
94 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; | ||
95 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; | ||
94 | 96 | ||
95 | static struct inet_peer *inet_peer_unused_head; | 97 | static struct inet_peer *inet_peer_unused_head; |
96 | static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; | 98 | static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; |
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock); | |||
99 | static void peer_check_expire(unsigned long dummy); | 101 | static void peer_check_expire(unsigned long dummy); |
100 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); | 102 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); |
101 | 103 | ||
102 | /* Exported for sysctl_net_ipv4. */ | ||
103 | int inet_peer_gc_mintime = 10 * HZ, | ||
104 | inet_peer_gc_maxtime = 120 * HZ; | ||
105 | 104 | ||
106 | /* Called from ip_output.c:ip_init */ | 105 | /* Called from ip_output.c:ip_init */ |
107 | void __init inet_initpeers(void) | 106 | void __init inet_initpeers(void) |
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p) | |||
151 | spin_unlock_bh(&inet_peer_unused_lock); | 150 | spin_unlock_bh(&inet_peer_unused_lock); |
152 | } | 151 | } |
153 | 152 | ||
154 | /* Called with local BH disabled and the pool lock held. */ | 153 | /* |
155 | #define lookup(daddr) \ | 154 | * Called with local BH disabled and the pool lock held. |
155 | * _stack is known to be NULL or not at compile time, | ||
156 | * so compiler will optimize the if (_stack) tests. | ||
157 | */ | ||
158 | #define lookup(_daddr,_stack) \ | ||
156 | ({ \ | 159 | ({ \ |
157 | struct inet_peer *u, **v; \ | 160 | struct inet_peer *u, **v; \ |
158 | stackptr = stack; \ | 161 | if (_stack) { \ |
159 | *stackptr++ = &peer_root; \ | 162 | stackptr = _stack; \ |
163 | *stackptr++ = &peer_root; \ | ||
164 | } \ | ||
160 | for (u = peer_root; u != peer_avl_empty; ) { \ | 165 | for (u = peer_root; u != peer_avl_empty; ) { \ |
161 | if (daddr == u->v4daddr) \ | 166 | if (_daddr == u->v4daddr) \ |
162 | break; \ | 167 | break; \ |
163 | if ((__force __u32)daddr < (__force __u32)u->v4daddr) \ | 168 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ |
164 | v = &u->avl_left; \ | 169 | v = &u->avl_left; \ |
165 | else \ | 170 | else \ |
166 | v = &u->avl_right; \ | 171 | v = &u->avl_right; \ |
167 | *stackptr++ = v; \ | 172 | if (_stack) \ |
173 | *stackptr++ = v; \ | ||
168 | u = *v; \ | 174 | u = *v; \ |
169 | } \ | 175 | } \ |
170 | u; \ | 176 | u; \ |
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p) | |||
288 | if (atomic_read(&p->refcnt) == 1) { | 294 | if (atomic_read(&p->refcnt) == 1) { |
289 | struct inet_peer **stack[PEER_MAXDEPTH]; | 295 | struct inet_peer **stack[PEER_MAXDEPTH]; |
290 | struct inet_peer ***stackptr, ***delp; | 296 | struct inet_peer ***stackptr, ***delp; |
291 | if (lookup(p->v4daddr) != p) | 297 | if (lookup(p->v4daddr, stack) != p) |
292 | BUG(); | 298 | BUG(); |
293 | delp = stackptr - 1; /* *delp[0] == p */ | 299 | delp = stackptr - 1; /* *delp[0] == p */ |
294 | if (p->avl_left == peer_avl_empty) { | 300 | if (p->avl_left == peer_avl_empty) { |
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) | |||
373 | 379 | ||
374 | /* Look up for the address quickly. */ | 380 | /* Look up for the address quickly. */ |
375 | read_lock_bh(&peer_pool_lock); | 381 | read_lock_bh(&peer_pool_lock); |
376 | p = lookup(daddr); | 382 | p = lookup(daddr, NULL); |
377 | if (p != peer_avl_empty) | 383 | if (p != peer_avl_empty) |
378 | atomic_inc(&p->refcnt); | 384 | atomic_inc(&p->refcnt); |
379 | read_unlock_bh(&peer_pool_lock); | 385 | read_unlock_bh(&peer_pool_lock); |
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) | |||
400 | 406 | ||
401 | write_lock_bh(&peer_pool_lock); | 407 | write_lock_bh(&peer_pool_lock); |
402 | /* Check if an entry has suddenly appeared. */ | 408 | /* Check if an entry has suddenly appeared. */ |
403 | p = lookup(daddr); | 409 | p = lookup(daddr, stack); |
404 | if (p != peer_avl_empty) | 410 | if (p != peer_avl_empty) |
405 | goto out_free; | 411 | goto out_free; |
406 | 412 | ||
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 369e721c4bab..9cb04df0054b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -67,14 +67,14 @@ int ip_forward(struct sk_buff *skb) | |||
67 | if (skb->pkt_type != PACKET_HOST) | 67 | if (skb->pkt_type != PACKET_HOST) |
68 | goto drop; | 68 | goto drop; |
69 | 69 | ||
70 | skb->ip_summed = CHECKSUM_NONE; | 70 | skb_forward_csum(skb); |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * According to the RFC, we must first decrease the TTL field. If | 73 | * According to the RFC, we must first decrease the TTL field. If |
74 | * that reaches zero, we must reply an ICMP control message telling | 74 | * that reaches zero, we must reply an ICMP control message telling |
75 | * that the packet's lifetime expired. | 75 | * that the packet's lifetime expired. |
76 | */ | 76 | */ |
77 | if (skb->nh.iph->ttl <= 1) | 77 | if (ip_hdr(skb)->ttl <= 1) |
78 | goto too_many_hops; | 78 | goto too_many_hops; |
79 | 79 | ||
80 | if (!xfrm4_route_forward(skb)) | 80 | if (!xfrm4_route_forward(skb)) |
@@ -85,10 +85,18 @@ int ip_forward(struct sk_buff *skb) | |||
85 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 85 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
86 | goto sr_failed; | 86 | goto sr_failed; |
87 | 87 | ||
88 | if (unlikely(skb->len > dst_mtu(&rt->u.dst) && | ||
89 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { | ||
90 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); | ||
91 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | ||
92 | htonl(dst_mtu(&rt->u.dst))); | ||
93 | goto drop; | ||
94 | } | ||
95 | |||
88 | /* We are about to mangle packet. Copy it! */ | 96 | /* We are about to mangle packet. Copy it! */ |
89 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) | 97 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) |
90 | goto drop; | 98 | goto drop; |
91 | iph = skb->nh.iph; | 99 | iph = ip_hdr(skb); |
92 | 100 | ||
93 | /* Decrease ttl after skb cow done */ | 101 | /* Decrease ttl after skb cow done */ |
94 | ip_decrease_ttl(iph); | 102 | ip_decrease_ttl(iph); |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6f055380373..0231bdcb2ab7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -92,7 +92,7 @@ struct ipq { | |||
92 | spinlock_t lock; | 92 | spinlock_t lock; |
93 | atomic_t refcnt; | 93 | atomic_t refcnt; |
94 | struct timer_list timer; /* when will this queue expire? */ | 94 | struct timer_list timer; /* when will this queue expire? */ |
95 | struct timeval stamp; | 95 | ktime_t stamp; |
96 | int iif; | 96 | int iif; |
97 | unsigned int rid; | 97 | unsigned int rid; |
98 | struct inet_peer *peer; | 98 | struct inet_peer *peer; |
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void) | |||
184 | { | 184 | { |
185 | struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); | 185 | struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); |
186 | 186 | ||
187 | if(!qp) | 187 | if (!qp) |
188 | return NULL; | 188 | return NULL; |
189 | atomic_add(sizeof(struct ipq), &ip_frag_mem); | 189 | atomic_add(sizeof(struct ipq), &ip_frag_mem); |
190 | return qp; | 190 | return qp; |
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) | |||
321 | * promoted read lock to write lock. | 321 | * promoted read lock to write lock. |
322 | */ | 322 | */ |
323 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { | 323 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { |
324 | if(qp->id == qp_in->id && | 324 | if (qp->id == qp_in->id && |
325 | qp->saddr == qp_in->saddr && | 325 | qp->saddr == qp_in->saddr && |
326 | qp->daddr == qp_in->daddr && | 326 | qp->daddr == qp_in->daddr && |
327 | qp->protocol == qp_in->protocol && | 327 | qp->protocol == qp_in->protocol && |
328 | qp->user == qp_in->user) { | 328 | qp->user == qp_in->user) { |
329 | atomic_inc(&qp->refcnt); | 329 | atomic_inc(&qp->refcnt); |
330 | write_unlock(&ipfrag_lock); | 330 | write_unlock(&ipfrag_lock); |
331 | qp_in->last_in |= COMPLETE; | 331 | qp_in->last_in |= COMPLETE; |
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) | |||
398 | read_lock(&ipfrag_lock); | 398 | read_lock(&ipfrag_lock); |
399 | hash = ipqhashfn(id, saddr, daddr, protocol); | 399 | hash = ipqhashfn(id, saddr, daddr, protocol); |
400 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { | 400 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { |
401 | if(qp->id == id && | 401 | if (qp->id == id && |
402 | qp->saddr == saddr && | 402 | qp->saddr == saddr && |
403 | qp->daddr == daddr && | 403 | qp->daddr == daddr && |
404 | qp->protocol == protocol && | 404 | qp->protocol == protocol && |
405 | qp->user == user) { | 405 | qp->user == user) { |
406 | atomic_inc(&qp->refcnt); | 406 | atomic_inc(&qp->refcnt); |
407 | read_unlock(&ipfrag_lock); | 407 | read_unlock(&ipfrag_lock); |
408 | return qp; | 408 | return qp; |
@@ -479,11 +479,11 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
479 | goto err; | 479 | goto err; |
480 | } | 480 | } |
481 | 481 | ||
482 | offset = ntohs(skb->nh.iph->frag_off); | 482 | offset = ntohs(ip_hdr(skb)->frag_off); |
483 | flags = offset & ~IP_OFFSET; | 483 | flags = offset & ~IP_OFFSET; |
484 | offset &= IP_OFFSET; | 484 | offset &= IP_OFFSET; |
485 | offset <<= 3; /* offset is in 8-byte chunks */ | 485 | offset <<= 3; /* offset is in 8-byte chunks */ |
486 | ihl = skb->nh.iph->ihl * 4; | 486 | ihl = ip_hdrlen(skb); |
487 | 487 | ||
488 | /* Determine the position of this fragment. */ | 488 | /* Determine the position of this fragment. */ |
489 | end = offset + skb->len - ihl; | 489 | end = offset + skb->len - ihl; |
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
524 | * this fragment, right? | 524 | * this fragment, right? |
525 | */ | 525 | */ |
526 | prev = NULL; | 526 | prev = NULL; |
527 | for(next = qp->fragments; next != NULL; next = next->next) { | 527 | for (next = qp->fragments; next != NULL; next = next->next) { |
528 | if (FRAG_CB(next)->offset >= offset) | 528 | if (FRAG_CB(next)->offset >= offset) |
529 | break; /* bingo! */ | 529 | break; /* bingo! */ |
530 | prev = next; | 530 | prev = next; |
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
592 | if (skb->dev) | 592 | if (skb->dev) |
593 | qp->iif = skb->dev->ifindex; | 593 | qp->iif = skb->dev->ifindex; |
594 | skb->dev = NULL; | 594 | skb->dev = NULL; |
595 | skb_get_timestamp(skb, &qp->stamp); | 595 | qp->stamp = skb->tstamp; |
596 | qp->meat += skb->len; | 596 | qp->meat += skb->len; |
597 | atomic_add(skb->truesize, &ip_frag_mem); | 597 | atomic_add(skb->truesize, &ip_frag_mem); |
598 | if (offset == 0) | 598 | if (offset == 0) |
@@ -624,10 +624,10 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
624 | BUG_TRAP(FRAG_CB(head)->offset == 0); | 624 | BUG_TRAP(FRAG_CB(head)->offset == 0); |
625 | 625 | ||
626 | /* Allocate a new buffer for the datagram. */ | 626 | /* Allocate a new buffer for the datagram. */ |
627 | ihlen = head->nh.iph->ihl*4; | 627 | ihlen = ip_hdrlen(head); |
628 | len = ihlen + qp->len; | 628 | len = ihlen + qp->len; |
629 | 629 | ||
630 | if(len > 65535) | 630 | if (len > 65535) |
631 | goto out_oversize; | 631 | goto out_oversize; |
632 | 632 | ||
633 | /* Head of list must not be cloned. */ | 633 | /* Head of list must not be cloned. */ |
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
658 | } | 658 | } |
659 | 659 | ||
660 | skb_shinfo(head)->frag_list = head->next; | 660 | skb_shinfo(head)->frag_list = head->next; |
661 | skb_push(head, head->data - head->nh.raw); | 661 | skb_push(head, head->data - skb_network_header(head)); |
662 | atomic_sub(head->truesize, &ip_frag_mem); | 662 | atomic_sub(head->truesize, &ip_frag_mem); |
663 | 663 | ||
664 | for (fp=head->next; fp; fp = fp->next) { | 664 | for (fp=head->next; fp; fp = fp->next) { |
@@ -674,9 +674,9 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
674 | 674 | ||
675 | head->next = NULL; | 675 | head->next = NULL; |
676 | head->dev = dev; | 676 | head->dev = dev; |
677 | skb_set_timestamp(head, &qp->stamp); | 677 | head->tstamp = qp->stamp; |
678 | 678 | ||
679 | iph = head->nh.iph; | 679 | iph = ip_hdr(head); |
680 | iph->frag_off = 0; | 680 | iph->frag_off = 0; |
681 | iph->tot_len = htons(len); | 681 | iph->tot_len = htons(len); |
682 | IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); | 682 | IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); |
@@ -700,7 +700,6 @@ out_fail: | |||
700 | /* Process an incoming IP datagram fragment. */ | 700 | /* Process an incoming IP datagram fragment. */ |
701 | struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) | 701 | struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) |
702 | { | 702 | { |
703 | struct iphdr *iph = skb->nh.iph; | ||
704 | struct ipq *qp; | 703 | struct ipq *qp; |
705 | struct net_device *dev; | 704 | struct net_device *dev; |
706 | 705 | ||
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) | |||
713 | dev = skb->dev; | 712 | dev = skb->dev; |
714 | 713 | ||
715 | /* Lookup (or create) queue header */ | 714 | /* Lookup (or create) queue header */ |
716 | if ((qp = ip_find(iph, user)) != NULL) { | 715 | if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { |
717 | struct sk_buff *ret = NULL; | 716 | struct sk_buff *ret = NULL; |
718 | 717 | ||
719 | spin_lock(&qp->lock); | 718 | spin_lock(&qp->lock); |
@@ -734,7 +733,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) | |||
734 | return NULL; | 733 | return NULL; |
735 | } | 734 | } |
736 | 735 | ||
737 | void ipfrag_init(void) | 736 | void __init ipfrag_init(void) |
738 | { | 737 | { |
739 | ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ | 738 | ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ |
740 | (jiffies ^ (jiffies >> 6))); | 739 | (jiffies ^ (jiffies >> 6))); |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9151da642318..63282934725e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3 | |||
191 | return NULL; | 191 | return NULL; |
192 | } | 192 | } |
193 | 193 | ||
194 | static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) | 194 | static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms) |
195 | { | 195 | { |
196 | __be32 remote = t->parms.iph.daddr; | 196 | __be32 remote = parms->iph.daddr; |
197 | __be32 local = t->parms.iph.saddr; | 197 | __be32 local = parms->iph.saddr; |
198 | __be32 key = t->parms.i_key; | 198 | __be32 key = parms->i_key; |
199 | unsigned h = HASH(key); | 199 | unsigned h = HASH(key); |
200 | int prio = 0; | 200 | int prio = 0; |
201 | 201 | ||
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) | |||
209 | return &tunnels[prio][h]; | 209 | return &tunnels[prio][h]; |
210 | } | 210 | } |
211 | 211 | ||
212 | static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) | ||
213 | { | ||
214 | return __ipgre_bucket(&t->parms); | ||
215 | } | ||
216 | |||
212 | static void ipgre_tunnel_link(struct ip_tunnel *t) | 217 | static void ipgre_tunnel_link(struct ip_tunnel *t) |
213 | { | 218 | { |
214 | struct ip_tunnel **tp = ipgre_bucket(t); | 219 | struct ip_tunnel **tp = ipgre_bucket(t); |
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int | |||
240 | __be32 key = parms->i_key; | 245 | __be32 key = parms->i_key; |
241 | struct ip_tunnel *t, **tp, *nt; | 246 | struct ip_tunnel *t, **tp, *nt; |
242 | struct net_device *dev; | 247 | struct net_device *dev; |
243 | unsigned h = HASH(key); | ||
244 | int prio = 0; | ||
245 | char name[IFNAMSIZ]; | 248 | char name[IFNAMSIZ]; |
246 | 249 | ||
247 | if (local) | 250 | for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) { |
248 | prio |= 1; | ||
249 | if (remote && !MULTICAST(remote)) { | ||
250 | prio |= 2; | ||
251 | h ^= HASH(remote); | ||
252 | } | ||
253 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { | ||
254 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 251 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { |
255 | if (key == t->parms.i_key) | 252 | if (key == t->parms.i_key) |
256 | return t; | 253 | return t; |
@@ -320,8 +317,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
320 | struct iphdr *iph = (struct iphdr*)skb->data; | 317 | struct iphdr *iph = (struct iphdr*)skb->data; |
321 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); | 318 | __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); |
322 | int grehlen = (iph->ihl<<2) + 4; | 319 | int grehlen = (iph->ihl<<2) + 4; |
323 | int type = skb->h.icmph->type; | 320 | const int type = icmp_hdr(skb)->type; |
324 | int code = skb->h.icmph->code; | 321 | const int code = icmp_hdr(skb)->code; |
325 | struct ip_tunnel *t; | 322 | struct ip_tunnel *t; |
326 | __be16 flags; | 323 | __be16 flags; |
327 | 324 | ||
@@ -388,8 +385,8 @@ out: | |||
388 | struct iphdr *iph = (struct iphdr*)dp; | 385 | struct iphdr *iph = (struct iphdr*)dp; |
389 | struct iphdr *eiph; | 386 | struct iphdr *eiph; |
390 | __be16 *p = (__be16*)(dp+(iph->ihl<<2)); | 387 | __be16 *p = (__be16*)(dp+(iph->ihl<<2)); |
391 | int type = skb->h.icmph->type; | 388 | const int type = icmp_hdr(skb)->type; |
392 | int code = skb->h.icmph->code; | 389 | const int code = icmp_hdr(skb)->code; |
393 | int rel_type = 0; | 390 | int rel_type = 0; |
394 | int rel_code = 0; | 391 | int rel_code = 0; |
395 | __be32 rel_info = 0; | 392 | __be32 rel_info = 0; |
@@ -422,7 +419,7 @@ out: | |||
422 | default: | 419 | default: |
423 | return; | 420 | return; |
424 | case ICMP_PARAMETERPROB: | 421 | case ICMP_PARAMETERPROB: |
425 | n = ntohl(skb->h.icmph->un.gateway) >> 24; | 422 | n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; |
426 | if (n < (iph->ihl<<2)) | 423 | if (n < (iph->ihl<<2)) |
427 | return; | 424 | return; |
428 | 425 | ||
@@ -442,7 +439,7 @@ out: | |||
442 | return; | 439 | return; |
443 | case ICMP_FRAG_NEEDED: | 440 | case ICMP_FRAG_NEEDED: |
444 | /* And it is the only really necessary thing :-) */ | 441 | /* And it is the only really necessary thing :-) */ |
445 | n = ntohs(skb->h.icmph->un.frag.mtu); | 442 | n = ntohs(icmp_hdr(skb)->un.frag.mtu); |
446 | if (n < grehlen+68) | 443 | if (n < grehlen+68) |
447 | return; | 444 | return; |
448 | n -= grehlen; | 445 | n -= grehlen; |
@@ -474,7 +471,7 @@ out: | |||
474 | dst_release(skb2->dst); | 471 | dst_release(skb2->dst); |
475 | skb2->dst = NULL; | 472 | skb2->dst = NULL; |
476 | skb_pull(skb2, skb->data - (u8*)eiph); | 473 | skb_pull(skb2, skb->data - (u8*)eiph); |
477 | skb2->nh.raw = skb2->data; | 474 | skb_reset_network_header(skb2); |
478 | 475 | ||
479 | /* Try to guess incoming interface */ | 476 | /* Try to guess incoming interface */ |
480 | memset(&fl, 0, sizeof(fl)); | 477 | memset(&fl, 0, sizeof(fl)); |
@@ -533,9 +530,9 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | |||
533 | { | 530 | { |
534 | if (INET_ECN_is_ce(iph->tos)) { | 531 | if (INET_ECN_is_ce(iph->tos)) { |
535 | if (skb->protocol == htons(ETH_P_IP)) { | 532 | if (skb->protocol == htons(ETH_P_IP)) { |
536 | IP_ECN_set_ce(skb->nh.iph); | 533 | IP_ECN_set_ce(ip_hdr(skb)); |
537 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | 534 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
538 | IP6_ECN_set_ce(skb->nh.ipv6h); | 535 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
539 | } | 536 | } |
540 | } | 537 | } |
541 | } | 538 | } |
@@ -565,7 +562,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
565 | if (!pskb_may_pull(skb, 16)) | 562 | if (!pskb_may_pull(skb, 16)) |
566 | goto drop_nolock; | 563 | goto drop_nolock; |
567 | 564 | ||
568 | iph = skb->nh.iph; | 565 | iph = ip_hdr(skb); |
569 | h = skb->data; | 566 | h = skb->data; |
570 | flags = *(__be16*)h; | 567 | flags = *(__be16*)h; |
571 | 568 | ||
@@ -616,9 +613,10 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
616 | offset += 4; | 613 | offset += 4; |
617 | } | 614 | } |
618 | 615 | ||
619 | skb->mac.raw = skb->nh.raw; | 616 | skb_reset_mac_header(skb); |
620 | skb->nh.raw = __pskb_pull(skb, offset); | 617 | __pskb_pull(skb, offset); |
621 | skb_postpull_rcsum(skb, skb->h.raw, offset); | 618 | skb_reset_network_header(skb); |
619 | skb_postpull_rcsum(skb, skb_transport_header(skb), offset); | ||
622 | skb->pkt_type = PACKET_HOST; | 620 | skb->pkt_type = PACKET_HOST; |
623 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 621 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
624 | if (MULTICAST(iph->daddr)) { | 622 | if (MULTICAST(iph->daddr)) { |
@@ -669,7 +667,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
669 | { | 667 | { |
670 | struct ip_tunnel *tunnel = netdev_priv(dev); | 668 | struct ip_tunnel *tunnel = netdev_priv(dev); |
671 | struct net_device_stats *stats = &tunnel->stat; | 669 | struct net_device_stats *stats = &tunnel->stat; |
672 | struct iphdr *old_iph = skb->nh.iph; | 670 | struct iphdr *old_iph = ip_hdr(skb); |
673 | struct iphdr *tiph; | 671 | struct iphdr *tiph; |
674 | u8 tos; | 672 | u8 tos; |
675 | __be16 df; | 673 | __be16 df; |
@@ -720,7 +718,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
720 | addr_type = ipv6_addr_type(addr6); | 718 | addr_type = ipv6_addr_type(addr6); |
721 | 719 | ||
722 | if (addr_type == IPV6_ADDR_ANY) { | 720 | if (addr_type == IPV6_ADDR_ANY) { |
723 | addr6 = &skb->nh.ipv6h->daddr; | 721 | addr6 = &ipv6_hdr(skb)->daddr; |
724 | addr_type = ipv6_addr_type(addr6); | 722 | addr_type = ipv6_addr_type(addr6); |
725 | } | 723 | } |
726 | 724 | ||
@@ -824,11 +822,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
824 | skb_set_owner_w(new_skb, skb->sk); | 822 | skb_set_owner_w(new_skb, skb->sk); |
825 | dev_kfree_skb(skb); | 823 | dev_kfree_skb(skb); |
826 | skb = new_skb; | 824 | skb = new_skb; |
827 | old_iph = skb->nh.iph; | 825 | old_iph = ip_hdr(skb); |
828 | } | 826 | } |
829 | 827 | ||
830 | skb->h.raw = skb->nh.raw; | 828 | skb->transport_header = skb->network_header; |
831 | skb->nh.raw = skb_push(skb, gre_hlen); | 829 | skb_push(skb, gre_hlen); |
830 | skb_reset_network_header(skb); | ||
832 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 831 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
833 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 832 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
834 | IPSKB_REROUTED); | 833 | IPSKB_REROUTED); |
@@ -839,7 +838,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
839 | * Push down and install the IPIP header. | 838 | * Push down and install the IPIP header. |
840 | */ | 839 | */ |
841 | 840 | ||
842 | iph = skb->nh.iph; | 841 | iph = ip_hdr(skb); |
843 | iph->version = 4; | 842 | iph->version = 4; |
844 | iph->ihl = sizeof(struct iphdr) >> 2; | 843 | iph->ihl = sizeof(struct iphdr) >> 2; |
845 | iph->frag_off = df; | 844 | iph->frag_off = df; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f38e97647ac0..324e7e0fdb2a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; | |||
158 | int ip_call_ra_chain(struct sk_buff *skb) | 158 | int ip_call_ra_chain(struct sk_buff *skb) |
159 | { | 159 | { |
160 | struct ip_ra_chain *ra; | 160 | struct ip_ra_chain *ra; |
161 | u8 protocol = skb->nh.iph->protocol; | 161 | u8 protocol = ip_hdr(skb)->protocol; |
162 | struct sock *last = NULL; | 162 | struct sock *last = NULL; |
163 | 163 | ||
164 | read_lock(&ip_ra_lock); | 164 | read_lock(&ip_ra_lock); |
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
171 | if (sk && inet_sk(sk)->num == protocol && | 171 | if (sk && inet_sk(sk)->num == protocol && |
172 | (!sk->sk_bound_dev_if || | 172 | (!sk->sk_bound_dev_if || |
173 | sk->sk_bound_dev_if == skb->dev->ifindex)) { | 173 | sk->sk_bound_dev_if == skb->dev->ifindex)) { |
174 | if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | 174 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
175 | skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); | 175 | skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); |
176 | if (skb == NULL) { | 176 | if (skb == NULL) { |
177 | read_unlock(&ip_ra_lock); | 177 | read_unlock(&ip_ra_lock); |
@@ -198,17 +198,15 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
198 | 198 | ||
199 | static inline int ip_local_deliver_finish(struct sk_buff *skb) | 199 | static inline int ip_local_deliver_finish(struct sk_buff *skb) |
200 | { | 200 | { |
201 | int ihl = skb->nh.iph->ihl*4; | 201 | __skb_pull(skb, ip_hdrlen(skb)); |
202 | |||
203 | __skb_pull(skb, ihl); | ||
204 | 202 | ||
205 | /* Point into the IP datagram, just past the header. */ | 203 | /* Point into the IP datagram, just past the header. */ |
206 | skb->h.raw = skb->data; | 204 | skb_reset_transport_header(skb); |
207 | 205 | ||
208 | rcu_read_lock(); | 206 | rcu_read_lock(); |
209 | { | 207 | { |
210 | /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ | 208 | /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ |
211 | int protocol = skb->nh.iph->protocol; | 209 | int protocol = ip_hdr(skb)->protocol; |
212 | int hash; | 210 | int hash; |
213 | struct sock *raw_sk; | 211 | struct sock *raw_sk; |
214 | struct net_protocol *ipprot; | 212 | struct net_protocol *ipprot; |
@@ -220,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) | |||
220 | /* If there maybe a raw socket we must check - if not we | 218 | /* If there maybe a raw socket we must check - if not we |
221 | * don't care less | 219 | * don't care less |
222 | */ | 220 | */ |
223 | if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash)) | 221 | if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) |
224 | raw_sk = NULL; | 222 | raw_sk = NULL; |
225 | 223 | ||
226 | if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { | 224 | if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { |
@@ -266,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb) | |||
266 | * Reassemble IP fragments. | 264 | * Reassemble IP fragments. |
267 | */ | 265 | */ |
268 | 266 | ||
269 | if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | 267 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
270 | skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); | 268 | skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); |
271 | if (!skb) | 269 | if (!skb) |
272 | return 0; | 270 | return 0; |
@@ -294,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
294 | goto drop; | 292 | goto drop; |
295 | } | 293 | } |
296 | 294 | ||
297 | iph = skb->nh.iph; | 295 | iph = ip_hdr(skb); |
298 | 296 | ||
299 | if (ip_options_compile(NULL, skb)) { | 297 | if (ip_options_compile(NULL, skb)) { |
300 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | 298 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); |
@@ -330,7 +328,7 @@ drop: | |||
330 | 328 | ||
331 | static inline int ip_rcv_finish(struct sk_buff *skb) | 329 | static inline int ip_rcv_finish(struct sk_buff *skb) |
332 | { | 330 | { |
333 | struct iphdr *iph = skb->nh.iph; | 331 | const struct iphdr *iph = ip_hdr(skb); |
334 | 332 | ||
335 | /* | 333 | /* |
336 | * Initialise the virtual path cache for the packet. It describes | 334 | * Initialise the virtual path cache for the packet. It describes |
@@ -391,7 +389,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, | |||
391 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | 389 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) |
392 | goto inhdr_error; | 390 | goto inhdr_error; |
393 | 391 | ||
394 | iph = skb->nh.iph; | 392 | iph = ip_hdr(skb); |
395 | 393 | ||
396 | /* | 394 | /* |
397 | * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. | 395 | * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. |
@@ -410,7 +408,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, | |||
410 | if (!pskb_may_pull(skb, iph->ihl*4)) | 408 | if (!pskb_may_pull(skb, iph->ihl*4)) |
411 | goto inhdr_error; | 409 | goto inhdr_error; |
412 | 410 | ||
413 | iph = skb->nh.iph; | 411 | iph = ip_hdr(skb); |
414 | 412 | ||
415 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) | 413 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) |
416 | goto inhdr_error; | 414 | goto inhdr_error; |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f906a80d5a87..251346828cb4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -40,7 +40,7 @@ | |||
40 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | 40 | void ip_options_build(struct sk_buff * skb, struct ip_options * opt, |
41 | __be32 daddr, struct rtable *rt, int is_frag) | 41 | __be32 daddr, struct rtable *rt, int is_frag) |
42 | { | 42 | { |
43 | unsigned char * iph = skb->nh.raw; | 43 | unsigned char *iph = skb_network_header(skb); |
44 | 44 | ||
45 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); | 45 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); |
46 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); | 46 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); |
@@ -104,13 +104,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
104 | return 0; | 104 | return 0; |
105 | } | 105 | } |
106 | 106 | ||
107 | sptr = skb->nh.raw; | 107 | sptr = skb_network_header(skb); |
108 | dptr = dopt->__data; | 108 | dptr = dopt->__data; |
109 | 109 | ||
110 | if (skb->dst) | 110 | if (skb->dst) |
111 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; | 111 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; |
112 | else | 112 | else |
113 | daddr = skb->nh.iph->daddr; | 113 | daddr = ip_hdr(skb)->daddr; |
114 | 114 | ||
115 | if (sopt->rr) { | 115 | if (sopt->rr) { |
116 | optlen = sptr[sopt->rr+1]; | 116 | optlen = sptr[sopt->rr+1]; |
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
180 | /* | 180 | /* |
181 | * RFC1812 requires to fix illegal source routes. | 181 | * RFC1812 requires to fix illegal source routes. |
182 | */ | 182 | */ |
183 | if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0) | 183 | if (memcmp(&ip_hdr(skb)->saddr, |
184 | &start[soffset + 3], 4) == 0) | ||
184 | doffset -= 4; | 185 | doffset -= 4; |
185 | } | 186 | } |
186 | if (doffset > 3) { | 187 | if (doffset > 3) { |
@@ -217,7 +218,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
217 | 218 | ||
218 | void ip_options_fragment(struct sk_buff * skb) | 219 | void ip_options_fragment(struct sk_buff * skb) |
219 | { | 220 | { |
220 | unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr); | 221 | unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr); |
221 | struct ip_options * opt = &(IPCB(skb)->opt); | 222 | struct ip_options * opt = &(IPCB(skb)->opt); |
222 | int l = opt->optlen; | 223 | int l = opt->optlen; |
223 | int optlen; | 224 | int optlen; |
@@ -264,12 +265,13 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) | |||
264 | 265 | ||
265 | if (!opt) { | 266 | if (!opt) { |
266 | opt = &(IPCB(skb)->opt); | 267 | opt = &(IPCB(skb)->opt); |
267 | iph = skb->nh.raw; | 268 | iph = skb_network_header(skb); |
268 | opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); | 269 | opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); |
269 | optptr = iph + sizeof(struct iphdr); | 270 | optptr = iph + sizeof(struct iphdr); |
270 | opt->is_data = 0; | 271 | opt->is_data = 0; |
271 | } else { | 272 | } else { |
272 | optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]); | 273 | optptr = opt->is_data ? opt->__data : |
274 | (unsigned char *)&(ip_hdr(skb)[1]); | ||
273 | iph = optptr - sizeof(struct iphdr); | 275 | iph = optptr - sizeof(struct iphdr); |
274 | } | 276 | } |
275 | 277 | ||
@@ -563,7 +565,7 @@ void ip_forward_options(struct sk_buff *skb) | |||
563 | struct ip_options * opt = &(IPCB(skb)->opt); | 565 | struct ip_options * opt = &(IPCB(skb)->opt); |
564 | unsigned char * optptr; | 566 | unsigned char * optptr; |
565 | struct rtable *rt = (struct rtable*)skb->dst; | 567 | struct rtable *rt = (struct rtable*)skb->dst; |
566 | unsigned char *raw = skb->nh.raw; | 568 | unsigned char *raw = skb_network_header(skb); |
567 | 569 | ||
568 | if (opt->rr_needaddr) { | 570 | if (opt->rr_needaddr) { |
569 | optptr = (unsigned char *)raw + opt->rr; | 571 | optptr = (unsigned char *)raw + opt->rr; |
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb) | |||
587 | if (srrptr + 3 <= srrspace) { | 589 | if (srrptr + 3 <= srrspace) { |
588 | opt->is_changed = 1; | 590 | opt->is_changed = 1; |
589 | ip_rt_get_source(&optptr[srrptr-1], rt); | 591 | ip_rt_get_source(&optptr[srrptr-1], rt); |
590 | skb->nh.iph->daddr = rt->rt_dst; | 592 | ip_hdr(skb)->daddr = rt->rt_dst; |
591 | optptr[2] = srrptr+4; | 593 | optptr[2] = srrptr+4; |
592 | } else if (net_ratelimit()) | 594 | } else if (net_ratelimit()) |
593 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); | 595 | printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); |
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb) | |||
599 | } | 601 | } |
600 | if (opt->is_changed) { | 602 | if (opt->is_changed) { |
601 | opt->is_changed = 0; | 603 | opt->is_changed = 0; |
602 | ip_send_check(skb->nh.iph); | 604 | ip_send_check(ip_hdr(skb)); |
603 | } | 605 | } |
604 | } | 606 | } |
605 | 607 | ||
@@ -608,8 +610,8 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
608 | struct ip_options *opt = &(IPCB(skb)->opt); | 610 | struct ip_options *opt = &(IPCB(skb)->opt); |
609 | int srrspace, srrptr; | 611 | int srrspace, srrptr; |
610 | __be32 nexthop; | 612 | __be32 nexthop; |
611 | struct iphdr *iph = skb->nh.iph; | 613 | struct iphdr *iph = ip_hdr(skb); |
612 | unsigned char * optptr = skb->nh.raw + opt->srr; | 614 | unsigned char *optptr = skb_network_header(skb) + opt->srr; |
613 | struct rtable *rt = (struct rtable*)skb->dst; | 615 | struct rtable *rt = (struct rtable*)skb->dst; |
614 | struct rtable *rt2; | 616 | struct rtable *rt2; |
615 | int err; | 617 | int err; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d096332f6c6d..534650cad3a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph) | |||
95 | /* dev_loopback_xmit for use with netfilter. */ | 95 | /* dev_loopback_xmit for use with netfilter. */ |
96 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) | 96 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) |
97 | { | 97 | { |
98 | newskb->mac.raw = newskb->data; | 98 | skb_reset_mac_header(newskb); |
99 | __skb_pull(newskb, newskb->nh.raw - newskb->data); | 99 | __skb_pull(newskb, skb_network_offset(newskb)); |
100 | newskb->pkt_type = PACKET_LOOPBACK; | 100 | newskb->pkt_type = PACKET_LOOPBACK; |
101 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 101 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
102 | BUG_TRAP(newskb->dst); | 102 | BUG_TRAP(newskb->dst); |
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
125 | struct iphdr *iph; | 125 | struct iphdr *iph; |
126 | 126 | ||
127 | /* Build the IP header. */ | 127 | /* Build the IP header. */ |
128 | if (opt) | 128 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); |
129 | iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen); | 129 | skb_reset_network_header(skb); |
130 | else | 130 | iph = ip_hdr(skb); |
131 | iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr)); | ||
132 | |||
133 | iph->version = 4; | 131 | iph->version = 4; |
134 | iph->ihl = 5; | 132 | iph->ihl = 5; |
135 | iph->tos = inet->tos; | 133 | iph->tos = inet->tos; |
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
143 | iph->protocol = sk->sk_protocol; | 141 | iph->protocol = sk->sk_protocol; |
144 | iph->tot_len = htons(skb->len); | 142 | iph->tot_len = htons(skb->len); |
145 | ip_select_ident(iph, &rt->u.dst, sk); | 143 | ip_select_ident(iph, &rt->u.dst, sk); |
146 | skb->nh.iph = iph; | ||
147 | 144 | ||
148 | if (opt && opt->optlen) { | 145 | if (opt && opt->optlen) { |
149 | iph->ihl += opt->optlen>>2; | 146 | iph->ihl += opt->optlen>>2; |
@@ -192,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
192 | return -EINVAL; | 189 | return -EINVAL; |
193 | } | 190 | } |
194 | 191 | ||
192 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) | ||
193 | { | ||
194 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | ||
195 | |||
196 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | ||
197 | skb->dst->dev->mtu : dst_mtu(skb->dst); | ||
198 | } | ||
199 | |||
195 | static inline int ip_finish_output(struct sk_buff *skb) | 200 | static inline int ip_finish_output(struct sk_buff *skb) |
196 | { | 201 | { |
197 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 202 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
@@ -201,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) | |||
201 | return dst_output(skb); | 206 | return dst_output(skb); |
202 | } | 207 | } |
203 | #endif | 208 | #endif |
204 | if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) | 209 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) |
205 | return ip_fragment(skb, ip_finish_output2); | 210 | return ip_fragment(skb, ip_finish_output2); |
206 | else | 211 | else |
207 | return ip_finish_output2(skb); | 212 | return ip_finish_output2(skb); |
@@ -248,7 +253,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
248 | 253 | ||
249 | /* Multicasts with ttl 0 must not go beyond the host */ | 254 | /* Multicasts with ttl 0 must not go beyond the host */ |
250 | 255 | ||
251 | if (skb->nh.iph->ttl == 0) { | 256 | if (ip_hdr(skb)->ttl == 0) { |
252 | kfree_skb(skb); | 257 | kfree_skb(skb); |
253 | return 0; | 258 | return 0; |
254 | } | 259 | } |
@@ -333,7 +338,9 @@ packet_routed: | |||
333 | goto no_route; | 338 | goto no_route; |
334 | 339 | ||
335 | /* OK, we know where to send it, allocate and build IP header. */ | 340 | /* OK, we know where to send it, allocate and build IP header. */ |
336 | iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 341 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); |
342 | skb_reset_network_header(skb); | ||
343 | iph = ip_hdr(skb); | ||
337 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 344 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
338 | iph->tot_len = htons(skb->len); | 345 | iph->tot_len = htons(skb->len); |
339 | if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) | 346 | if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) |
@@ -344,7 +351,6 @@ packet_routed: | |||
344 | iph->protocol = sk->sk_protocol; | 351 | iph->protocol = sk->sk_protocol; |
345 | iph->saddr = rt->rt_src; | 352 | iph->saddr = rt->rt_src; |
346 | iph->daddr = rt->rt_dst; | 353 | iph->daddr = rt->rt_dst; |
347 | skb->nh.iph = iph; | ||
348 | /* Transport layer set skb->h.foo itself. */ | 354 | /* Transport layer set skb->h.foo itself. */ |
349 | 355 | ||
350 | if (opt && opt->optlen) { | 356 | if (opt && opt->optlen) { |
@@ -386,21 +392,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
386 | #ifdef CONFIG_NET_SCHED | 392 | #ifdef CONFIG_NET_SCHED |
387 | to->tc_index = from->tc_index; | 393 | to->tc_index = from->tc_index; |
388 | #endif | 394 | #endif |
389 | #ifdef CONFIG_NETFILTER | 395 | nf_copy(to, from); |
390 | /* Connection association is same as pre-frag packet */ | ||
391 | nf_conntrack_put(to->nfct); | ||
392 | to->nfct = from->nfct; | ||
393 | nf_conntrack_get(to->nfct); | ||
394 | to->nfctinfo = from->nfctinfo; | ||
395 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 396 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
396 | to->ipvs_property = from->ipvs_property; | 397 | to->ipvs_property = from->ipvs_property; |
397 | #endif | 398 | #endif |
398 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
399 | nf_bridge_put(to->nf_bridge); | ||
400 | to->nf_bridge = from->nf_bridge; | ||
401 | nf_bridge_get(to->nf_bridge); | ||
402 | #endif | ||
403 | #endif | ||
404 | skb_copy_secmark(to, from); | 399 | skb_copy_secmark(to, from); |
405 | } | 400 | } |
406 | 401 | ||
@@ -430,12 +425,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
430 | * Point into the IP datagram header. | 425 | * Point into the IP datagram header. |
431 | */ | 426 | */ |
432 | 427 | ||
433 | iph = skb->nh.iph; | 428 | iph = ip_hdr(skb); |
434 | 429 | ||
435 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 430 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
436 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); | 431 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
437 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 432 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
438 | htonl(dst_mtu(&rt->u.dst))); | 433 | htonl(ip_skb_dst_mtu(skb))); |
439 | kfree_skb(skb); | 434 | kfree_skb(skb); |
440 | return -EMSGSIZE; | 435 | return -EMSGSIZE; |
441 | } | 436 | } |
@@ -502,10 +497,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
502 | * before previous one went down. */ | 497 | * before previous one went down. */ |
503 | if (frag) { | 498 | if (frag) { |
504 | frag->ip_summed = CHECKSUM_NONE; | 499 | frag->ip_summed = CHECKSUM_NONE; |
505 | frag->h.raw = frag->data; | 500 | skb_reset_transport_header(frag); |
506 | frag->nh.raw = __skb_push(frag, hlen); | 501 | __skb_push(frag, hlen); |
507 | memcpy(frag->nh.raw, iph, hlen); | 502 | skb_reset_network_header(frag); |
508 | iph = frag->nh.iph; | 503 | memcpy(skb_network_header(frag), iph, hlen); |
504 | iph = ip_hdr(frag); | ||
509 | iph->tot_len = htons(frag->len); | 505 | iph->tot_len = htons(frag->len); |
510 | ip_copy_metadata(frag, skb); | 506 | ip_copy_metadata(frag, skb); |
511 | if (offset == 0) | 507 | if (offset == 0) |
@@ -566,7 +562,7 @@ slow_path: | |||
566 | * Keep copying data until we run out. | 562 | * Keep copying data until we run out. |
567 | */ | 563 | */ |
568 | 564 | ||
569 | while(left > 0) { | 565 | while (left > 0) { |
570 | len = left; | 566 | len = left; |
571 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 567 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
572 | if (len > mtu) | 568 | if (len > mtu) |
@@ -593,8 +589,8 @@ slow_path: | |||
593 | ip_copy_metadata(skb2, skb); | 589 | ip_copy_metadata(skb2, skb); |
594 | skb_reserve(skb2, ll_rs); | 590 | skb_reserve(skb2, ll_rs); |
595 | skb_put(skb2, len + hlen); | 591 | skb_put(skb2, len + hlen); |
596 | skb2->nh.raw = skb2->data; | 592 | skb_reset_network_header(skb2); |
597 | skb2->h.raw = skb2->data + hlen; | 593 | skb2->transport_header = skb2->network_header + hlen; |
598 | 594 | ||
599 | /* | 595 | /* |
600 | * Charge the memory for the fragment to any owner | 596 | * Charge the memory for the fragment to any owner |
@@ -608,19 +604,19 @@ slow_path: | |||
608 | * Copy the packet header into the new buffer. | 604 | * Copy the packet header into the new buffer. |
609 | */ | 605 | */ |
610 | 606 | ||
611 | memcpy(skb2->nh.raw, skb->data, hlen); | 607 | skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); |
612 | 608 | ||
613 | /* | 609 | /* |
614 | * Copy a block of the IP datagram. | 610 | * Copy a block of the IP datagram. |
615 | */ | 611 | */ |
616 | if (skb_copy_bits(skb, ptr, skb2->h.raw, len)) | 612 | if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) |
617 | BUG(); | 613 | BUG(); |
618 | left -= len; | 614 | left -= len; |
619 | 615 | ||
620 | /* | 616 | /* |
621 | * Fill in the new header fields. | 617 | * Fill in the new header fields. |
622 | */ | 618 | */ |
623 | iph = skb2->nh.iph; | 619 | iph = ip_hdr(skb2); |
624 | iph->frag_off = htons((offset >> 3)); | 620 | iph->frag_off = htons((offset >> 3)); |
625 | 621 | ||
626 | /* ANK: dirty, but effective trick. Upgrade options only if | 622 | /* ANK: dirty, but effective trick. Upgrade options only if |
@@ -722,10 +718,10 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
722 | skb_put(skb,fragheaderlen + transhdrlen); | 718 | skb_put(skb,fragheaderlen + transhdrlen); |
723 | 719 | ||
724 | /* initialize network header pointer */ | 720 | /* initialize network header pointer */ |
725 | skb->nh.raw = skb->data; | 721 | skb_reset_network_header(skb); |
726 | 722 | ||
727 | /* initialize protocol header pointer */ | 723 | /* initialize protocol header pointer */ |
728 | skb->h.raw = skb->data + fragheaderlen; | 724 | skb->transport_header = skb->network_header + fragheaderlen; |
729 | 725 | ||
730 | skb->ip_summed = CHECKSUM_PARTIAL; | 726 | skb->ip_summed = CHECKSUM_PARTIAL; |
731 | skb->csum = 0; | 727 | skb->csum = 0; |
@@ -799,7 +795,9 @@ int ip_append_data(struct sock *sk, | |||
799 | inet->cork.addr = ipc->addr; | 795 | inet->cork.addr = ipc->addr; |
800 | } | 796 | } |
801 | dst_hold(&rt->u.dst); | 797 | dst_hold(&rt->u.dst); |
802 | inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); | 798 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
799 | rt->u.dst.dev->mtu : | ||
800 | dst_mtu(rt->u.dst.path); | ||
803 | inet->cork.rt = rt; | 801 | inet->cork.rt = rt; |
804 | inet->cork.length = 0; | 802 | inet->cork.length = 0; |
805 | sk->sk_sndmsg_page = NULL; | 803 | sk->sk_sndmsg_page = NULL; |
@@ -929,9 +927,10 @@ alloc_new_skb: | |||
929 | * Find where to start putting bytes. | 927 | * Find where to start putting bytes. |
930 | */ | 928 | */ |
931 | data = skb_put(skb, fraglen); | 929 | data = skb_put(skb, fraglen); |
932 | skb->nh.raw = data + exthdrlen; | 930 | skb_set_network_header(skb, exthdrlen); |
931 | skb->transport_header = (skb->network_header + | ||
932 | fragheaderlen); | ||
933 | data += fragheaderlen; | 933 | data += fragheaderlen; |
934 | skb->h.raw = data + exthdrlen; | ||
935 | 934 | ||
936 | if (fraggap) { | 935 | if (fraggap) { |
937 | skb->csum = skb_copy_and_csum_bits( | 936 | skb->csum = skb_copy_and_csum_bits( |
@@ -1100,8 +1099,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1100 | } | 1099 | } |
1101 | if (len <= 0) { | 1100 | if (len <= 0) { |
1102 | struct sk_buff *skb_prev; | 1101 | struct sk_buff *skb_prev; |
1103 | char *data; | ||
1104 | struct iphdr *iph; | ||
1105 | int alloclen; | 1102 | int alloclen; |
1106 | 1103 | ||
1107 | skb_prev = skb; | 1104 | skb_prev = skb; |
@@ -1124,15 +1121,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1124 | /* | 1121 | /* |
1125 | * Find where to start putting bytes. | 1122 | * Find where to start putting bytes. |
1126 | */ | 1123 | */ |
1127 | data = skb_put(skb, fragheaderlen + fraggap); | 1124 | skb_put(skb, fragheaderlen + fraggap); |
1128 | skb->nh.iph = iph = (struct iphdr *)data; | 1125 | skb_reset_network_header(skb); |
1129 | data += fragheaderlen; | 1126 | skb->transport_header = (skb->network_header + |
1130 | skb->h.raw = data; | 1127 | fragheaderlen); |
1131 | |||
1132 | if (fraggap) { | 1128 | if (fraggap) { |
1133 | skb->csum = skb_copy_and_csum_bits( | 1129 | skb->csum = skb_copy_and_csum_bits(skb_prev, |
1134 | skb_prev, maxfraglen, | 1130 | maxfraglen, |
1135 | data, fraggap, 0); | 1131 | skb_transport_header(skb), |
1132 | fraggap, 0); | ||
1136 | skb_prev->csum = csum_sub(skb_prev->csum, | 1133 | skb_prev->csum = csum_sub(skb_prev->csum, |
1137 | skb->csum); | 1134 | skb->csum); |
1138 | pskb_trim_unique(skb_prev, maxfraglen); | 1135 | pskb_trim_unique(skb_prev, maxfraglen); |
@@ -1198,10 +1195,10 @@ int ip_push_pending_frames(struct sock *sk) | |||
1198 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1195 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1199 | 1196 | ||
1200 | /* move skb->data to ip header from ext header */ | 1197 | /* move skb->data to ip header from ext header */ |
1201 | if (skb->data < skb->nh.raw) | 1198 | if (skb->data < skb_network_header(skb)) |
1202 | __skb_pull(skb, skb->nh.raw - skb->data); | 1199 | __skb_pull(skb, skb_network_offset(skb)); |
1203 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1200 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { |
1204 | __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); | 1201 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1205 | *tail_skb = tmp_skb; | 1202 | *tail_skb = tmp_skb; |
1206 | tail_skb = &(tmp_skb->next); | 1203 | tail_skb = &(tmp_skb->next); |
1207 | skb->len += tmp_skb->len; | 1204 | skb->len += tmp_skb->len; |
@@ -1216,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) | |||
1216 | * to fragment the frame generated here. No matter, what transforms | 1213 | * to fragment the frame generated here. No matter, what transforms |
1217 | * how transforms change size of the packet, it will come out. | 1214 | * how transforms change size of the packet, it will come out. |
1218 | */ | 1215 | */ |
1219 | if (inet->pmtudisc != IP_PMTUDISC_DO) | 1216 | if (inet->pmtudisc < IP_PMTUDISC_DO) |
1220 | skb->local_df = 1; | 1217 | skb->local_df = 1; |
1221 | 1218 | ||
1222 | /* DF bit is set when we want to see DF on outgoing frames. | 1219 | /* DF bit is set when we want to see DF on outgoing frames. |
1223 | * If local_df is set too, we still allow to fragment this frame | 1220 | * If local_df is set too, we still allow to fragment this frame |
1224 | * locally. */ | 1221 | * locally. */ |
1225 | if (inet->pmtudisc == IP_PMTUDISC_DO || | 1222 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1226 | (skb->len <= dst_mtu(&rt->u.dst) && | 1223 | (skb->len <= dst_mtu(&rt->u.dst) && |
1227 | ip_dont_fragment(sk, &rt->u.dst))) | 1224 | ip_dont_fragment(sk, &rt->u.dst))) |
1228 | df = htons(IP_DF); | 1225 | df = htons(IP_DF); |
@@ -1352,11 +1349,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1352 | struct flowi fl = { .nl_u = { .ip4_u = | 1349 | struct flowi fl = { .nl_u = { .ip4_u = |
1353 | { .daddr = daddr, | 1350 | { .daddr = daddr, |
1354 | .saddr = rt->rt_spec_dst, | 1351 | .saddr = rt->rt_spec_dst, |
1355 | .tos = RT_TOS(skb->nh.iph->tos) } }, | 1352 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, |
1356 | /* Not quite clean, but right. */ | 1353 | /* Not quite clean, but right. */ |
1357 | .uli_u = { .ports = | 1354 | .uli_u = { .ports = |
1358 | { .sport = skb->h.th->dest, | 1355 | { .sport = tcp_hdr(skb)->dest, |
1359 | .dport = skb->h.th->source } }, | 1356 | .dport = tcp_hdr(skb)->source } }, |
1360 | .proto = sk->sk_protocol }; | 1357 | .proto = sk->sk_protocol }; |
1361 | security_skb_classify_flow(skb, &fl); | 1358 | security_skb_classify_flow(skb, &fl); |
1362 | if (ip_route_output_key(&rt, &fl)) | 1359 | if (ip_route_output_key(&rt, &fl)) |
@@ -1370,14 +1367,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1370 | with locally disabled BH and that sk cannot be already spinlocked. | 1367 | with locally disabled BH and that sk cannot be already spinlocked. |
1371 | */ | 1368 | */ |
1372 | bh_lock_sock(sk); | 1369 | bh_lock_sock(sk); |
1373 | inet->tos = skb->nh.iph->tos; | 1370 | inet->tos = ip_hdr(skb)->tos; |
1374 | sk->sk_priority = skb->priority; | 1371 | sk->sk_priority = skb->priority; |
1375 | sk->sk_protocol = skb->nh.iph->protocol; | 1372 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1376 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1373 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1377 | &ipc, rt, MSG_DONTWAIT); | 1374 | &ipc, rt, MSG_DONTWAIT); |
1378 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1375 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1379 | if (arg->csumoffset >= 0) | 1376 | if (arg->csumoffset >= 0) |
1380 | *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); | 1377 | *((__sum16 *)skb_transport_header(skb) + |
1378 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | ||
1379 | arg->csum)); | ||
1381 | skb->ip_summed = CHECKSUM_NONE; | 1380 | skb->ip_summed = CHECKSUM_NONE; |
1382 | ip_push_pending_frames(sk); | 1381 | ip_push_pending_frames(sk); |
1383 | } | 1382 | } |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 23048d9f3584..4d544573f48a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | |||
59 | struct in_pktinfo info; | 59 | struct in_pktinfo info; |
60 | struct rtable *rt = (struct rtable *)skb->dst; | 60 | struct rtable *rt = (struct rtable *)skb->dst; |
61 | 61 | ||
62 | info.ipi_addr.s_addr = skb->nh.iph->daddr; | 62 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; |
63 | if (rt) { | 63 | if (rt) { |
64 | info.ipi_ifindex = rt->rt_iif; | 64 | info.ipi_ifindex = rt->rt_iif; |
65 | info.ipi_spec_dst.s_addr = rt->rt_spec_dst; | 65 | info.ipi_spec_dst.s_addr = rt->rt_spec_dst; |
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | |||
73 | 73 | ||
74 | static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) | 74 | static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) |
75 | { | 75 | { |
76 | int ttl = skb->nh.iph->ttl; | 76 | int ttl = ip_hdr(skb)->ttl; |
77 | put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); | 77 | put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); |
78 | } | 78 | } |
79 | 79 | ||
80 | static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) | 80 | static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) |
81 | { | 81 | { |
82 | put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos); | 82 | put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); |
83 | } | 83 | } |
84 | 84 | ||
85 | static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) | 85 | static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) |
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) | |||
87 | if (IPCB(skb)->opt.optlen == 0) | 87 | if (IPCB(skb)->opt.optlen == 0) |
88 | return; | 88 | return; |
89 | 89 | ||
90 | put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1); | 90 | put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, |
91 | ip_hdr(skb) + 1); | ||
91 | } | 92 | } |
92 | 93 | ||
93 | 94 | ||
@@ -268,18 +269,21 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, | |||
268 | serr = SKB_EXT_ERR(skb); | 269 | serr = SKB_EXT_ERR(skb); |
269 | serr->ee.ee_errno = err; | 270 | serr->ee.ee_errno = err; |
270 | serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; | 271 | serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; |
271 | serr->ee.ee_type = skb->h.icmph->type; | 272 | serr->ee.ee_type = icmp_hdr(skb)->type; |
272 | serr->ee.ee_code = skb->h.icmph->code; | 273 | serr->ee.ee_code = icmp_hdr(skb)->code; |
273 | serr->ee.ee_pad = 0; | 274 | serr->ee.ee_pad = 0; |
274 | serr->ee.ee_info = info; | 275 | serr->ee.ee_info = info; |
275 | serr->ee.ee_data = 0; | 276 | serr->ee.ee_data = 0; |
276 | serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw; | 277 | serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - |
278 | skb_network_header(skb); | ||
277 | serr->port = port; | 279 | serr->port = port; |
278 | 280 | ||
279 | skb->h.raw = payload; | 281 | if (skb_pull(skb, payload - skb->data) != NULL) { |
280 | if (!skb_pull(skb, payload - skb->data) || | 282 | skb_reset_transport_header(skb); |
281 | sock_queue_err_skb(sk, skb)) | 283 | if (sock_queue_err_skb(sk, skb) == 0) |
282 | kfree_skb(skb); | 284 | return; |
285 | } | ||
286 | kfree_skb(skb); | ||
283 | } | 287 | } |
284 | 288 | ||
285 | void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) | 289 | void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) |
@@ -296,8 +300,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf | |||
296 | if (!skb) | 300 | if (!skb) |
297 | return; | 301 | return; |
298 | 302 | ||
299 | iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr)); | 303 | skb_put(skb, sizeof(struct iphdr)); |
300 | skb->nh.iph = iph; | 304 | skb_reset_network_header(skb); |
305 | iph = ip_hdr(skb); | ||
301 | iph->daddr = daddr; | 306 | iph->daddr = daddr; |
302 | 307 | ||
303 | serr = SKB_EXT_ERR(skb); | 308 | serr = SKB_EXT_ERR(skb); |
@@ -308,11 +313,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf | |||
308 | serr->ee.ee_pad = 0; | 313 | serr->ee.ee_pad = 0; |
309 | serr->ee.ee_info = info; | 314 | serr->ee.ee_info = info; |
310 | serr->ee.ee_data = 0; | 315 | serr->ee.ee_data = 0; |
311 | serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw; | 316 | serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); |
312 | serr->port = port; | 317 | serr->port = port; |
313 | 318 | ||
314 | skb->h.raw = skb->tail; | 319 | __skb_pull(skb, skb_tail_pointer(skb) - skb->data); |
315 | __skb_pull(skb, skb->tail - skb->data); | 320 | skb_reset_transport_header(skb); |
316 | 321 | ||
317 | if (sock_queue_err_skb(sk, skb)) | 322 | if (sock_queue_err_skb(sk, skb)) |
318 | kfree_skb(skb); | 323 | kfree_skb(skb); |
@@ -354,7 +359,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) | |||
354 | sin = (struct sockaddr_in *)msg->msg_name; | 359 | sin = (struct sockaddr_in *)msg->msg_name; |
355 | if (sin) { | 360 | if (sin) { |
356 | sin->sin_family = AF_INET; | 361 | sin->sin_family = AF_INET; |
357 | sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset); | 362 | sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + |
363 | serr->addr_offset); | ||
358 | sin->sin_port = serr->port; | 364 | sin->sin_port = serr->port; |
359 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); | 365 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); |
360 | } | 366 | } |
@@ -366,7 +372,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) | |||
366 | struct inet_sock *inet = inet_sk(sk); | 372 | struct inet_sock *inet = inet_sk(sk); |
367 | 373 | ||
368 | sin->sin_family = AF_INET; | 374 | sin->sin_family = AF_INET; |
369 | sin->sin_addr.s_addr = skb->nh.iph->saddr; | 375 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
370 | sin->sin_port = 0; | 376 | sin->sin_port = 0; |
371 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); | 377 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); |
372 | if (inet->cmsg_flags) | 378 | if (inet->cmsg_flags) |
@@ -403,20 +409,20 @@ out: | |||
403 | */ | 409 | */ |
404 | 410 | ||
405 | static int do_ip_setsockopt(struct sock *sk, int level, | 411 | static int do_ip_setsockopt(struct sock *sk, int level, |
406 | int optname, char __user *optval, int optlen) | 412 | int optname, char __user *optval, int optlen) |
407 | { | 413 | { |
408 | struct inet_sock *inet = inet_sk(sk); | 414 | struct inet_sock *inet = inet_sk(sk); |
409 | int val=0,err; | 415 | int val=0,err; |
410 | 416 | ||
411 | if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | | 417 | if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | |
412 | (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | | 418 | (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | |
413 | (1<<IP_RETOPTS) | (1<<IP_TOS) | | 419 | (1<<IP_RETOPTS) | (1<<IP_TOS) | |
414 | (1<<IP_TTL) | (1<<IP_HDRINCL) | | 420 | (1<<IP_TTL) | (1<<IP_HDRINCL) | |
415 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | | 421 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | |
416 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | | 422 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | |
417 | (1<<IP_PASSSEC))) || | 423 | (1<<IP_PASSSEC))) || |
418 | optname == IP_MULTICAST_TTL || | 424 | optname == IP_MULTICAST_TTL || |
419 | optname == IP_MULTICAST_LOOP) { | 425 | optname == IP_MULTICAST_LOOP) { |
420 | if (optlen >= sizeof(int)) { | 426 | if (optlen >= sizeof(int)) { |
421 | if (get_user(val, (int __user *) optval)) | 427 | if (get_user(val, (int __user *) optval)) |
422 | return -EFAULT; | 428 | return -EFAULT; |
@@ -440,444 +446,444 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
440 | lock_sock(sk); | 446 | lock_sock(sk); |
441 | 447 | ||
442 | switch (optname) { | 448 | switch (optname) { |
443 | case IP_OPTIONS: | 449 | case IP_OPTIONS: |
444 | { | 450 | { |
445 | struct ip_options * opt = NULL; | 451 | struct ip_options * opt = NULL; |
446 | if (optlen > 40 || optlen < 0) | 452 | if (optlen > 40 || optlen < 0) |
447 | goto e_inval; | 453 | goto e_inval; |
448 | err = ip_options_get_from_user(&opt, optval, optlen); | 454 | err = ip_options_get_from_user(&opt, optval, optlen); |
449 | if (err) | 455 | if (err) |
450 | break; | 456 | break; |
451 | if (inet->is_icsk) { | 457 | if (inet->is_icsk) { |
452 | struct inet_connection_sock *icsk = inet_csk(sk); | 458 | struct inet_connection_sock *icsk = inet_csk(sk); |
453 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 459 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
454 | if (sk->sk_family == PF_INET || | 460 | if (sk->sk_family == PF_INET || |
455 | (!((1 << sk->sk_state) & | 461 | (!((1 << sk->sk_state) & |
456 | (TCPF_LISTEN | TCPF_CLOSE)) && | 462 | (TCPF_LISTEN | TCPF_CLOSE)) && |
457 | inet->daddr != LOOPBACK4_IPV6)) { | 463 | inet->daddr != LOOPBACK4_IPV6)) { |
458 | #endif | 464 | #endif |
459 | if (inet->opt) | 465 | if (inet->opt) |
460 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; | 466 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; |
461 | if (opt) | 467 | if (opt) |
462 | icsk->icsk_ext_hdr_len += opt->optlen; | 468 | icsk->icsk_ext_hdr_len += opt->optlen; |
463 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | 469 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
464 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 470 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
465 | } | ||
466 | #endif | ||
467 | } | 471 | } |
468 | opt = xchg(&inet->opt, opt); | 472 | #endif |
469 | kfree(opt); | ||
470 | break; | ||
471 | } | 473 | } |
472 | case IP_PKTINFO: | 474 | opt = xchg(&inet->opt, opt); |
473 | if (val) | 475 | kfree(opt); |
474 | inet->cmsg_flags |= IP_CMSG_PKTINFO; | 476 | break; |
475 | else | 477 | } |
476 | inet->cmsg_flags &= ~IP_CMSG_PKTINFO; | 478 | case IP_PKTINFO: |
477 | break; | 479 | if (val) |
478 | case IP_RECVTTL: | 480 | inet->cmsg_flags |= IP_CMSG_PKTINFO; |
479 | if (val) | 481 | else |
480 | inet->cmsg_flags |= IP_CMSG_TTL; | 482 | inet->cmsg_flags &= ~IP_CMSG_PKTINFO; |
481 | else | 483 | break; |
482 | inet->cmsg_flags &= ~IP_CMSG_TTL; | 484 | case IP_RECVTTL: |
483 | break; | 485 | if (val) |
484 | case IP_RECVTOS: | 486 | inet->cmsg_flags |= IP_CMSG_TTL; |
485 | if (val) | 487 | else |
486 | inet->cmsg_flags |= IP_CMSG_TOS; | 488 | inet->cmsg_flags &= ~IP_CMSG_TTL; |
487 | else | 489 | break; |
488 | inet->cmsg_flags &= ~IP_CMSG_TOS; | 490 | case IP_RECVTOS: |
489 | break; | 491 | if (val) |
490 | case IP_RECVOPTS: | 492 | inet->cmsg_flags |= IP_CMSG_TOS; |
491 | if (val) | 493 | else |
492 | inet->cmsg_flags |= IP_CMSG_RECVOPTS; | 494 | inet->cmsg_flags &= ~IP_CMSG_TOS; |
493 | else | 495 | break; |
494 | inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; | 496 | case IP_RECVOPTS: |
495 | break; | 497 | if (val) |
496 | case IP_RETOPTS: | 498 | inet->cmsg_flags |= IP_CMSG_RECVOPTS; |
497 | if (val) | 499 | else |
498 | inet->cmsg_flags |= IP_CMSG_RETOPTS; | 500 | inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; |
499 | else | 501 | break; |
500 | inet->cmsg_flags &= ~IP_CMSG_RETOPTS; | 502 | case IP_RETOPTS: |
503 | if (val) | ||
504 | inet->cmsg_flags |= IP_CMSG_RETOPTS; | ||
505 | else | ||
506 | inet->cmsg_flags &= ~IP_CMSG_RETOPTS; | ||
507 | break; | ||
508 | case IP_PASSSEC: | ||
509 | if (val) | ||
510 | inet->cmsg_flags |= IP_CMSG_PASSSEC; | ||
511 | else | ||
512 | inet->cmsg_flags &= ~IP_CMSG_PASSSEC; | ||
513 | break; | ||
514 | case IP_TOS: /* This sets both TOS and Precedence */ | ||
515 | if (sk->sk_type == SOCK_STREAM) { | ||
516 | val &= ~3; | ||
517 | val |= inet->tos & 3; | ||
518 | } | ||
519 | if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && | ||
520 | !capable(CAP_NET_ADMIN)) { | ||
521 | err = -EPERM; | ||
501 | break; | 522 | break; |
502 | case IP_PASSSEC: | 523 | } |
503 | if (val) | 524 | if (inet->tos != val) { |
504 | inet->cmsg_flags |= IP_CMSG_PASSSEC; | 525 | inet->tos = val; |
505 | else | 526 | sk->sk_priority = rt_tos2priority(val); |
506 | inet->cmsg_flags &= ~IP_CMSG_PASSSEC; | 527 | sk_dst_reset(sk); |
528 | } | ||
529 | break; | ||
530 | case IP_TTL: | ||
531 | if (optlen<1) | ||
532 | goto e_inval; | ||
533 | if (val != -1 && (val < 1 || val>255)) | ||
534 | goto e_inval; | ||
535 | inet->uc_ttl = val; | ||
536 | break; | ||
537 | case IP_HDRINCL: | ||
538 | if (sk->sk_type != SOCK_RAW) { | ||
539 | err = -ENOPROTOOPT; | ||
507 | break; | 540 | break; |
508 | case IP_TOS: /* This sets both TOS and Precedence */ | 541 | } |
509 | if (sk->sk_type == SOCK_STREAM) { | 542 | inet->hdrincl = val ? 1 : 0; |
510 | val &= ~3; | 543 | break; |
511 | val |= inet->tos & 3; | 544 | case IP_MTU_DISCOVER: |
512 | } | 545 | if (val<0 || val>3) |
513 | if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && | 546 | goto e_inval; |
514 | !capable(CAP_NET_ADMIN)) { | 547 | inet->pmtudisc = val; |
515 | err = -EPERM; | 548 | break; |
549 | case IP_RECVERR: | ||
550 | inet->recverr = !!val; | ||
551 | if (!val) | ||
552 | skb_queue_purge(&sk->sk_error_queue); | ||
553 | break; | ||
554 | case IP_MULTICAST_TTL: | ||
555 | if (sk->sk_type == SOCK_STREAM) | ||
556 | goto e_inval; | ||
557 | if (optlen<1) | ||
558 | goto e_inval; | ||
559 | if (val==-1) | ||
560 | val = 1; | ||
561 | if (val < 0 || val > 255) | ||
562 | goto e_inval; | ||
563 | inet->mc_ttl = val; | ||
564 | break; | ||
565 | case IP_MULTICAST_LOOP: | ||
566 | if (optlen<1) | ||
567 | goto e_inval; | ||
568 | inet->mc_loop = !!val; | ||
569 | break; | ||
570 | case IP_MULTICAST_IF: | ||
571 | { | ||
572 | struct ip_mreqn mreq; | ||
573 | struct net_device *dev = NULL; | ||
574 | |||
575 | if (sk->sk_type == SOCK_STREAM) | ||
576 | goto e_inval; | ||
577 | /* | ||
578 | * Check the arguments are allowable | ||
579 | */ | ||
580 | |||
581 | err = -EFAULT; | ||
582 | if (optlen >= sizeof(struct ip_mreqn)) { | ||
583 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | ||
516 | break; | 584 | break; |
517 | } | 585 | } else { |
518 | if (inet->tos != val) { | 586 | memset(&mreq, 0, sizeof(mreq)); |
519 | inet->tos = val; | 587 | if (optlen >= sizeof(struct in_addr) && |
520 | sk->sk_priority = rt_tos2priority(val); | 588 | copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) |
521 | sk_dst_reset(sk); | 589 | break; |
522 | } | 590 | } |
523 | break; | 591 | |
524 | case IP_TTL: | 592 | if (!mreq.imr_ifindex) { |
525 | if (optlen<1) | 593 | if (mreq.imr_address.s_addr == INADDR_ANY) { |
526 | goto e_inval; | 594 | inet->mc_index = 0; |
527 | if (val != -1 && (val < 1 || val>255)) | 595 | inet->mc_addr = 0; |
528 | goto e_inval; | 596 | err = 0; |
529 | inet->uc_ttl = val; | ||
530 | break; | ||
531 | case IP_HDRINCL: | ||
532 | if (sk->sk_type != SOCK_RAW) { | ||
533 | err = -ENOPROTOOPT; | ||
534 | break; | 597 | break; |
535 | } | 598 | } |
536 | inet->hdrincl = val ? 1 : 0; | 599 | dev = ip_dev_find(mreq.imr_address.s_addr); |
537 | break; | 600 | if (dev) { |
538 | case IP_MTU_DISCOVER: | 601 | mreq.imr_ifindex = dev->ifindex; |
539 | if (val<0 || val>2) | 602 | dev_put(dev); |
540 | goto e_inval; | 603 | } |
541 | inet->pmtudisc = val; | 604 | } else |
542 | break; | 605 | dev = __dev_get_by_index(mreq.imr_ifindex); |
543 | case IP_RECVERR: | ||
544 | inet->recverr = !!val; | ||
545 | if (!val) | ||
546 | skb_queue_purge(&sk->sk_error_queue); | ||
547 | break; | ||
548 | case IP_MULTICAST_TTL: | ||
549 | if (sk->sk_type == SOCK_STREAM) | ||
550 | goto e_inval; | ||
551 | if (optlen<1) | ||
552 | goto e_inval; | ||
553 | if (val==-1) | ||
554 | val = 1; | ||
555 | if (val < 0 || val > 255) | ||
556 | goto e_inval; | ||
557 | inet->mc_ttl = val; | ||
558 | break; | ||
559 | case IP_MULTICAST_LOOP: | ||
560 | if (optlen<1) | ||
561 | goto e_inval; | ||
562 | inet->mc_loop = !!val; | ||
563 | break; | ||
564 | case IP_MULTICAST_IF: | ||
565 | { | ||
566 | struct ip_mreqn mreq; | ||
567 | struct net_device *dev = NULL; | ||
568 | 606 | ||
569 | if (sk->sk_type == SOCK_STREAM) | ||
570 | goto e_inval; | ||
571 | /* | ||
572 | * Check the arguments are allowable | ||
573 | */ | ||
574 | 607 | ||
575 | err = -EFAULT; | 608 | err = -EADDRNOTAVAIL; |
576 | if (optlen >= sizeof(struct ip_mreqn)) { | 609 | if (!dev) |
577 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | 610 | break; |
578 | break; | ||
579 | } else { | ||
580 | memset(&mreq, 0, sizeof(mreq)); | ||
581 | if (optlen >= sizeof(struct in_addr) && | ||
582 | copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) | ||
583 | break; | ||
584 | } | ||
585 | 611 | ||
586 | if (!mreq.imr_ifindex) { | 612 | err = -EINVAL; |
587 | if (mreq.imr_address.s_addr == INADDR_ANY) { | 613 | if (sk->sk_bound_dev_if && |
588 | inet->mc_index = 0; | 614 | mreq.imr_ifindex != sk->sk_bound_dev_if) |
589 | inet->mc_addr = 0; | 615 | break; |
590 | err = 0; | ||
591 | break; | ||
592 | } | ||
593 | dev = ip_dev_find(mreq.imr_address.s_addr); | ||
594 | if (dev) { | ||
595 | mreq.imr_ifindex = dev->ifindex; | ||
596 | dev_put(dev); | ||
597 | } | ||
598 | } else | ||
599 | dev = __dev_get_by_index(mreq.imr_ifindex); | ||
600 | 616 | ||
617 | inet->mc_index = mreq.imr_ifindex; | ||
618 | inet->mc_addr = mreq.imr_address.s_addr; | ||
619 | err = 0; | ||
620 | break; | ||
621 | } | ||
601 | 622 | ||
602 | err = -EADDRNOTAVAIL; | 623 | case IP_ADD_MEMBERSHIP: |
603 | if (!dev) | 624 | case IP_DROP_MEMBERSHIP: |
604 | break; | 625 | { |
626 | struct ip_mreqn mreq; | ||
605 | 627 | ||
606 | err = -EINVAL; | 628 | if (optlen < sizeof(struct ip_mreq)) |
607 | if (sk->sk_bound_dev_if && | 629 | goto e_inval; |
608 | mreq.imr_ifindex != sk->sk_bound_dev_if) | 630 | err = -EFAULT; |
631 | if (optlen >= sizeof(struct ip_mreqn)) { | ||
632 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | ||
609 | break; | 633 | break; |
634 | } else { | ||
635 | memset(&mreq, 0, sizeof(mreq)); | ||
636 | if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) | ||
637 | break; | ||
638 | } | ||
610 | 639 | ||
611 | inet->mc_index = mreq.imr_ifindex; | 640 | if (optname == IP_ADD_MEMBERSHIP) |
612 | inet->mc_addr = mreq.imr_address.s_addr; | 641 | err = ip_mc_join_group(sk, &mreq); |
613 | err = 0; | 642 | else |
643 | err = ip_mc_leave_group(sk, &mreq); | ||
644 | break; | ||
645 | } | ||
646 | case IP_MSFILTER: | ||
647 | { | ||
648 | extern int sysctl_igmp_max_msf; | ||
649 | struct ip_msfilter *msf; | ||
650 | |||
651 | if (optlen < IP_MSFILTER_SIZE(0)) | ||
652 | goto e_inval; | ||
653 | if (optlen > sysctl_optmem_max) { | ||
654 | err = -ENOBUFS; | ||
614 | break; | 655 | break; |
615 | } | 656 | } |
657 | msf = kmalloc(optlen, GFP_KERNEL); | ||
658 | if (msf == 0) { | ||
659 | err = -ENOBUFS; | ||
660 | break; | ||
661 | } | ||
662 | err = -EFAULT; | ||
663 | if (copy_from_user(msf, optval, optlen)) { | ||
664 | kfree(msf); | ||
665 | break; | ||
666 | } | ||
667 | /* numsrc >= (1G-4) overflow in 32 bits */ | ||
668 | if (msf->imsf_numsrc >= 0x3ffffffcU || | ||
669 | msf->imsf_numsrc > sysctl_igmp_max_msf) { | ||
670 | kfree(msf); | ||
671 | err = -ENOBUFS; | ||
672 | break; | ||
673 | } | ||
674 | if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { | ||
675 | kfree(msf); | ||
676 | err = -EINVAL; | ||
677 | break; | ||
678 | } | ||
679 | err = ip_mc_msfilter(sk, msf, 0); | ||
680 | kfree(msf); | ||
681 | break; | ||
682 | } | ||
683 | case IP_BLOCK_SOURCE: | ||
684 | case IP_UNBLOCK_SOURCE: | ||
685 | case IP_ADD_SOURCE_MEMBERSHIP: | ||
686 | case IP_DROP_SOURCE_MEMBERSHIP: | ||
687 | { | ||
688 | struct ip_mreq_source mreqs; | ||
689 | int omode, add; | ||
616 | 690 | ||
617 | case IP_ADD_MEMBERSHIP: | 691 | if (optlen != sizeof(struct ip_mreq_source)) |
618 | case IP_DROP_MEMBERSHIP: | 692 | goto e_inval; |
619 | { | 693 | if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { |
620 | struct ip_mreqn mreq; | ||
621 | |||
622 | if (optlen < sizeof(struct ip_mreq)) | ||
623 | goto e_inval; | ||
624 | err = -EFAULT; | 694 | err = -EFAULT; |
625 | if (optlen >= sizeof(struct ip_mreqn)) { | ||
626 | if(copy_from_user(&mreq,optval,sizeof(mreq))) | ||
627 | break; | ||
628 | } else { | ||
629 | memset(&mreq, 0, sizeof(mreq)); | ||
630 | if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) | ||
631 | break; | ||
632 | } | ||
633 | |||
634 | if (optname == IP_ADD_MEMBERSHIP) | ||
635 | err = ip_mc_join_group(sk, &mreq); | ||
636 | else | ||
637 | err = ip_mc_leave_group(sk, &mreq); | ||
638 | break; | 695 | break; |
639 | } | 696 | } |
640 | case IP_MSFILTER: | 697 | if (optname == IP_BLOCK_SOURCE) { |
641 | { | 698 | omode = MCAST_EXCLUDE; |
642 | extern int sysctl_igmp_max_msf; | 699 | add = 1; |
643 | struct ip_msfilter *msf; | 700 | } else if (optname == IP_UNBLOCK_SOURCE) { |
701 | omode = MCAST_EXCLUDE; | ||
702 | add = 0; | ||
703 | } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { | ||
704 | struct ip_mreqn mreq; | ||
644 | 705 | ||
645 | if (optlen < IP_MSFILTER_SIZE(0)) | 706 | mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; |
646 | goto e_inval; | 707 | mreq.imr_address.s_addr = mreqs.imr_interface; |
647 | if (optlen > sysctl_optmem_max) { | 708 | mreq.imr_ifindex = 0; |
648 | err = -ENOBUFS; | 709 | err = ip_mc_join_group(sk, &mreq); |
649 | break; | 710 | if (err && err != -EADDRINUSE) |
650 | } | ||
651 | msf = kmalloc(optlen, GFP_KERNEL); | ||
652 | if (msf == 0) { | ||
653 | err = -ENOBUFS; | ||
654 | break; | 711 | break; |
655 | } | 712 | omode = MCAST_INCLUDE; |
713 | add = 1; | ||
714 | } else /* IP_DROP_SOURCE_MEMBERSHIP */ { | ||
715 | omode = MCAST_INCLUDE; | ||
716 | add = 0; | ||
717 | } | ||
718 | err = ip_mc_source(add, omode, sk, &mreqs, 0); | ||
719 | break; | ||
720 | } | ||
721 | case MCAST_JOIN_GROUP: | ||
722 | case MCAST_LEAVE_GROUP: | ||
723 | { | ||
724 | struct group_req greq; | ||
725 | struct sockaddr_in *psin; | ||
726 | struct ip_mreqn mreq; | ||
727 | |||
728 | if (optlen < sizeof(struct group_req)) | ||
729 | goto e_inval; | ||
730 | err = -EFAULT; | ||
731 | if (copy_from_user(&greq, optval, sizeof(greq))) | ||
732 | break; | ||
733 | psin = (struct sockaddr_in *)&greq.gr_group; | ||
734 | if (psin->sin_family != AF_INET) | ||
735 | goto e_inval; | ||
736 | memset(&mreq, 0, sizeof(mreq)); | ||
737 | mreq.imr_multiaddr = psin->sin_addr; | ||
738 | mreq.imr_ifindex = greq.gr_interface; | ||
739 | |||
740 | if (optname == MCAST_JOIN_GROUP) | ||
741 | err = ip_mc_join_group(sk, &mreq); | ||
742 | else | ||
743 | err = ip_mc_leave_group(sk, &mreq); | ||
744 | break; | ||
745 | } | ||
746 | case MCAST_JOIN_SOURCE_GROUP: | ||
747 | case MCAST_LEAVE_SOURCE_GROUP: | ||
748 | case MCAST_BLOCK_SOURCE: | ||
749 | case MCAST_UNBLOCK_SOURCE: | ||
750 | { | ||
751 | struct group_source_req greqs; | ||
752 | struct ip_mreq_source mreqs; | ||
753 | struct sockaddr_in *psin; | ||
754 | int omode, add; | ||
755 | |||
756 | if (optlen != sizeof(struct group_source_req)) | ||
757 | goto e_inval; | ||
758 | if (copy_from_user(&greqs, optval, sizeof(greqs))) { | ||
656 | err = -EFAULT; | 759 | err = -EFAULT; |
657 | if (copy_from_user(msf, optval, optlen)) { | ||
658 | kfree(msf); | ||
659 | break; | ||
660 | } | ||
661 | /* numsrc >= (1G-4) overflow in 32 bits */ | ||
662 | if (msf->imsf_numsrc >= 0x3ffffffcU || | ||
663 | msf->imsf_numsrc > sysctl_igmp_max_msf) { | ||
664 | kfree(msf); | ||
665 | err = -ENOBUFS; | ||
666 | break; | ||
667 | } | ||
668 | if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { | ||
669 | kfree(msf); | ||
670 | err = -EINVAL; | ||
671 | break; | ||
672 | } | ||
673 | err = ip_mc_msfilter(sk, msf, 0); | ||
674 | kfree(msf); | ||
675 | break; | 760 | break; |
676 | } | 761 | } |
677 | case IP_BLOCK_SOURCE: | 762 | if (greqs.gsr_group.ss_family != AF_INET || |
678 | case IP_UNBLOCK_SOURCE: | 763 | greqs.gsr_source.ss_family != AF_INET) { |
679 | case IP_ADD_SOURCE_MEMBERSHIP: | 764 | err = -EADDRNOTAVAIL; |
680 | case IP_DROP_SOURCE_MEMBERSHIP: | ||
681 | { | ||
682 | struct ip_mreq_source mreqs; | ||
683 | int omode, add; | ||
684 | |||
685 | if (optlen != sizeof(struct ip_mreq_source)) | ||
686 | goto e_inval; | ||
687 | if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { | ||
688 | err = -EFAULT; | ||
689 | break; | ||
690 | } | ||
691 | if (optname == IP_BLOCK_SOURCE) { | ||
692 | omode = MCAST_EXCLUDE; | ||
693 | add = 1; | ||
694 | } else if (optname == IP_UNBLOCK_SOURCE) { | ||
695 | omode = MCAST_EXCLUDE; | ||
696 | add = 0; | ||
697 | } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { | ||
698 | struct ip_mreqn mreq; | ||
699 | |||
700 | mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; | ||
701 | mreq.imr_address.s_addr = mreqs.imr_interface; | ||
702 | mreq.imr_ifindex = 0; | ||
703 | err = ip_mc_join_group(sk, &mreq); | ||
704 | if (err && err != -EADDRINUSE) | ||
705 | break; | ||
706 | omode = MCAST_INCLUDE; | ||
707 | add = 1; | ||
708 | } else /* IP_DROP_SOURCE_MEMBERSHIP */ { | ||
709 | omode = MCAST_INCLUDE; | ||
710 | add = 0; | ||
711 | } | ||
712 | err = ip_mc_source(add, omode, sk, &mreqs, 0); | ||
713 | break; | 765 | break; |
714 | } | 766 | } |
715 | case MCAST_JOIN_GROUP: | 767 | psin = (struct sockaddr_in *)&greqs.gsr_group; |
716 | case MCAST_LEAVE_GROUP: | 768 | mreqs.imr_multiaddr = psin->sin_addr.s_addr; |
717 | { | 769 | psin = (struct sockaddr_in *)&greqs.gsr_source; |
718 | struct group_req greq; | 770 | mreqs.imr_sourceaddr = psin->sin_addr.s_addr; |
719 | struct sockaddr_in *psin; | 771 | mreqs.imr_interface = 0; /* use index for mc_source */ |
772 | |||
773 | if (optname == MCAST_BLOCK_SOURCE) { | ||
774 | omode = MCAST_EXCLUDE; | ||
775 | add = 1; | ||
776 | } else if (optname == MCAST_UNBLOCK_SOURCE) { | ||
777 | omode = MCAST_EXCLUDE; | ||
778 | add = 0; | ||
779 | } else if (optname == MCAST_JOIN_SOURCE_GROUP) { | ||
720 | struct ip_mreqn mreq; | 780 | struct ip_mreqn mreq; |
721 | 781 | ||
722 | if (optlen < sizeof(struct group_req)) | 782 | psin = (struct sockaddr_in *)&greqs.gsr_group; |
723 | goto e_inval; | ||
724 | err = -EFAULT; | ||
725 | if(copy_from_user(&greq, optval, sizeof(greq))) | ||
726 | break; | ||
727 | psin = (struct sockaddr_in *)&greq.gr_group; | ||
728 | if (psin->sin_family != AF_INET) | ||
729 | goto e_inval; | ||
730 | memset(&mreq, 0, sizeof(mreq)); | ||
731 | mreq.imr_multiaddr = psin->sin_addr; | 783 | mreq.imr_multiaddr = psin->sin_addr; |
732 | mreq.imr_ifindex = greq.gr_interface; | 784 | mreq.imr_address.s_addr = 0; |
733 | 785 | mreq.imr_ifindex = greqs.gsr_interface; | |
734 | if (optname == MCAST_JOIN_GROUP) | 786 | err = ip_mc_join_group(sk, &mreq); |
735 | err = ip_mc_join_group(sk, &mreq); | 787 | if (err && err != -EADDRINUSE) |
736 | else | 788 | break; |
737 | err = ip_mc_leave_group(sk, &mreq); | 789 | greqs.gsr_interface = mreq.imr_ifindex; |
790 | omode = MCAST_INCLUDE; | ||
791 | add = 1; | ||
792 | } else /* MCAST_LEAVE_SOURCE_GROUP */ { | ||
793 | omode = MCAST_INCLUDE; | ||
794 | add = 0; | ||
795 | } | ||
796 | err = ip_mc_source(add, omode, sk, &mreqs, | ||
797 | greqs.gsr_interface); | ||
798 | break; | ||
799 | } | ||
800 | case MCAST_MSFILTER: | ||
801 | { | ||
802 | extern int sysctl_igmp_max_msf; | ||
803 | struct sockaddr_in *psin; | ||
804 | struct ip_msfilter *msf = NULL; | ||
805 | struct group_filter *gsf = NULL; | ||
806 | int msize, i, ifindex; | ||
807 | |||
808 | if (optlen < GROUP_FILTER_SIZE(0)) | ||
809 | goto e_inval; | ||
810 | if (optlen > sysctl_optmem_max) { | ||
811 | err = -ENOBUFS; | ||
738 | break; | 812 | break; |
739 | } | 813 | } |
740 | case MCAST_JOIN_SOURCE_GROUP: | 814 | gsf = kmalloc(optlen,GFP_KERNEL); |
741 | case MCAST_LEAVE_SOURCE_GROUP: | 815 | if (gsf == 0) { |
742 | case MCAST_BLOCK_SOURCE: | 816 | err = -ENOBUFS; |
743 | case MCAST_UNBLOCK_SOURCE: | ||
744 | { | ||
745 | struct group_source_req greqs; | ||
746 | struct ip_mreq_source mreqs; | ||
747 | struct sockaddr_in *psin; | ||
748 | int omode, add; | ||
749 | |||
750 | if (optlen != sizeof(struct group_source_req)) | ||
751 | goto e_inval; | ||
752 | if (copy_from_user(&greqs, optval, sizeof(greqs))) { | ||
753 | err = -EFAULT; | ||
754 | break; | ||
755 | } | ||
756 | if (greqs.gsr_group.ss_family != AF_INET || | ||
757 | greqs.gsr_source.ss_family != AF_INET) { | ||
758 | err = -EADDRNOTAVAIL; | ||
759 | break; | ||
760 | } | ||
761 | psin = (struct sockaddr_in *)&greqs.gsr_group; | ||
762 | mreqs.imr_multiaddr = psin->sin_addr.s_addr; | ||
763 | psin = (struct sockaddr_in *)&greqs.gsr_source; | ||
764 | mreqs.imr_sourceaddr = psin->sin_addr.s_addr; | ||
765 | mreqs.imr_interface = 0; /* use index for mc_source */ | ||
766 | |||
767 | if (optname == MCAST_BLOCK_SOURCE) { | ||
768 | omode = MCAST_EXCLUDE; | ||
769 | add = 1; | ||
770 | } else if (optname == MCAST_UNBLOCK_SOURCE) { | ||
771 | omode = MCAST_EXCLUDE; | ||
772 | add = 0; | ||
773 | } else if (optname == MCAST_JOIN_SOURCE_GROUP) { | ||
774 | struct ip_mreqn mreq; | ||
775 | |||
776 | psin = (struct sockaddr_in *)&greqs.gsr_group; | ||
777 | mreq.imr_multiaddr = psin->sin_addr; | ||
778 | mreq.imr_address.s_addr = 0; | ||
779 | mreq.imr_ifindex = greqs.gsr_interface; | ||
780 | err = ip_mc_join_group(sk, &mreq); | ||
781 | if (err && err != -EADDRINUSE) | ||
782 | break; | ||
783 | greqs.gsr_interface = mreq.imr_ifindex; | ||
784 | omode = MCAST_INCLUDE; | ||
785 | add = 1; | ||
786 | } else /* MCAST_LEAVE_SOURCE_GROUP */ { | ||
787 | omode = MCAST_INCLUDE; | ||
788 | add = 0; | ||
789 | } | ||
790 | err = ip_mc_source(add, omode, sk, &mreqs, | ||
791 | greqs.gsr_interface); | ||
792 | break; | 817 | break; |
793 | } | 818 | } |
794 | case MCAST_MSFILTER: | 819 | err = -EFAULT; |
795 | { | 820 | if (copy_from_user(gsf, optval, optlen)) { |
796 | extern int sysctl_igmp_max_msf; | 821 | goto mc_msf_out; |
797 | struct sockaddr_in *psin; | 822 | } |
798 | struct ip_msfilter *msf = NULL; | 823 | /* numsrc >= (4G-140)/128 overflow in 32 bits */ |
799 | struct group_filter *gsf = NULL; | 824 | if (gsf->gf_numsrc >= 0x1ffffff || |
800 | int msize, i, ifindex; | 825 | gsf->gf_numsrc > sysctl_igmp_max_msf) { |
801 | 826 | err = -ENOBUFS; | |
802 | if (optlen < GROUP_FILTER_SIZE(0)) | 827 | goto mc_msf_out; |
803 | goto e_inval; | 828 | } |
804 | if (optlen > sysctl_optmem_max) { | 829 | if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { |
805 | err = -ENOBUFS; | 830 | err = -EINVAL; |
806 | break; | 831 | goto mc_msf_out; |
807 | } | 832 | } |
808 | gsf = kmalloc(optlen,GFP_KERNEL); | 833 | msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); |
809 | if (gsf == 0) { | 834 | msf = kmalloc(msize,GFP_KERNEL); |
810 | err = -ENOBUFS; | 835 | if (msf == 0) { |
811 | break; | 836 | err = -ENOBUFS; |
812 | } | 837 | goto mc_msf_out; |
813 | err = -EFAULT; | 838 | } |
814 | if (copy_from_user(gsf, optval, optlen)) { | 839 | ifindex = gsf->gf_interface; |
815 | goto mc_msf_out; | 840 | psin = (struct sockaddr_in *)&gsf->gf_group; |
816 | } | 841 | if (psin->sin_family != AF_INET) { |
817 | /* numsrc >= (4G-140)/128 overflow in 32 bits */ | ||
818 | if (gsf->gf_numsrc >= 0x1ffffff || | ||
819 | gsf->gf_numsrc > sysctl_igmp_max_msf) { | ||
820 | err = -ENOBUFS; | ||
821 | goto mc_msf_out; | ||
822 | } | ||
823 | if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { | ||
824 | err = -EINVAL; | ||
825 | goto mc_msf_out; | ||
826 | } | ||
827 | msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); | ||
828 | msf = kmalloc(msize,GFP_KERNEL); | ||
829 | if (msf == 0) { | ||
830 | err = -ENOBUFS; | ||
831 | goto mc_msf_out; | ||
832 | } | ||
833 | ifindex = gsf->gf_interface; | ||
834 | psin = (struct sockaddr_in *)&gsf->gf_group; | ||
835 | if (psin->sin_family != AF_INET) { | ||
836 | err = -EADDRNOTAVAIL; | ||
837 | goto mc_msf_out; | ||
838 | } | ||
839 | msf->imsf_multiaddr = psin->sin_addr.s_addr; | ||
840 | msf->imsf_interface = 0; | ||
841 | msf->imsf_fmode = gsf->gf_fmode; | ||
842 | msf->imsf_numsrc = gsf->gf_numsrc; | ||
843 | err = -EADDRNOTAVAIL; | 842 | err = -EADDRNOTAVAIL; |
844 | for (i=0; i<gsf->gf_numsrc; ++i) { | 843 | goto mc_msf_out; |
845 | psin = (struct sockaddr_in *)&gsf->gf_slist[i]; | ||
846 | |||
847 | if (psin->sin_family != AF_INET) | ||
848 | goto mc_msf_out; | ||
849 | msf->imsf_slist[i] = psin->sin_addr.s_addr; | ||
850 | } | ||
851 | kfree(gsf); | ||
852 | gsf = NULL; | ||
853 | |||
854 | err = ip_mc_msfilter(sk, msf, ifindex); | ||
855 | mc_msf_out: | ||
856 | kfree(msf); | ||
857 | kfree(gsf); | ||
858 | break; | ||
859 | } | 844 | } |
860 | case IP_ROUTER_ALERT: | 845 | msf->imsf_multiaddr = psin->sin_addr.s_addr; |
861 | err = ip_ra_control(sk, val ? 1 : 0, NULL); | 846 | msf->imsf_interface = 0; |
862 | break; | 847 | msf->imsf_fmode = gsf->gf_fmode; |
863 | 848 | msf->imsf_numsrc = gsf->gf_numsrc; | |
864 | case IP_FREEBIND: | 849 | err = -EADDRNOTAVAIL; |
865 | if (optlen<1) | 850 | for (i=0; i<gsf->gf_numsrc; ++i) { |
866 | goto e_inval; | 851 | psin = (struct sockaddr_in *)&gsf->gf_slist[i]; |
867 | inet->freebind = !!val; | ||
868 | break; | ||
869 | 852 | ||
870 | case IP_IPSEC_POLICY: | 853 | if (psin->sin_family != AF_INET) |
871 | case IP_XFRM_POLICY: | 854 | goto mc_msf_out; |
872 | err = -EPERM; | 855 | msf->imsf_slist[i] = psin->sin_addr.s_addr; |
873 | if (!capable(CAP_NET_ADMIN)) | 856 | } |
874 | break; | 857 | kfree(gsf); |
875 | err = xfrm_user_policy(sk, optname, optval, optlen); | 858 | gsf = NULL; |
859 | |||
860 | err = ip_mc_msfilter(sk, msf, ifindex); | ||
861 | mc_msf_out: | ||
862 | kfree(msf); | ||
863 | kfree(gsf); | ||
864 | break; | ||
865 | } | ||
866 | case IP_ROUTER_ALERT: | ||
867 | err = ip_ra_control(sk, val ? 1 : 0, NULL); | ||
868 | break; | ||
869 | |||
870 | case IP_FREEBIND: | ||
871 | if (optlen<1) | ||
872 | goto e_inval; | ||
873 | inet->freebind = !!val; | ||
874 | break; | ||
875 | |||
876 | case IP_IPSEC_POLICY: | ||
877 | case IP_XFRM_POLICY: | ||
878 | err = -EPERM; | ||
879 | if (!capable(CAP_NET_ADMIN)) | ||
876 | break; | 880 | break; |
881 | err = xfrm_user_policy(sk, optname, optval, optlen); | ||
882 | break; | ||
877 | 883 | ||
878 | default: | 884 | default: |
879 | err = -ENOPROTOOPT; | 885 | err = -ENOPROTOOPT; |
880 | break; | 886 | break; |
881 | } | 887 | } |
882 | release_sock(sk); | 888 | release_sock(sk); |
883 | return err; | 889 | return err; |
@@ -948,214 +954,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt); | |||
948 | */ | 954 | */ |
949 | 955 | ||
950 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, | 956 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, |
951 | char __user *optval, int __user *optlen) | 957 | char __user *optval, int __user *optlen) |
952 | { | 958 | { |
953 | struct inet_sock *inet = inet_sk(sk); | 959 | struct inet_sock *inet = inet_sk(sk); |
954 | int val; | 960 | int val; |
955 | int len; | 961 | int len; |
956 | 962 | ||
957 | if(level!=SOL_IP) | 963 | if (level != SOL_IP) |
958 | return -EOPNOTSUPP; | 964 | return -EOPNOTSUPP; |
959 | 965 | ||
960 | #ifdef CONFIG_IP_MROUTE | 966 | #ifdef CONFIG_IP_MROUTE |
961 | if(optname>=MRT_BASE && optname <=MRT_BASE+10) | 967 | if (optname >= MRT_BASE && optname <= MRT_BASE+10) { |
962 | { | ||
963 | return ip_mroute_getsockopt(sk,optname,optval,optlen); | 968 | return ip_mroute_getsockopt(sk,optname,optval,optlen); |
964 | } | 969 | } |
965 | #endif | 970 | #endif |
966 | 971 | ||
967 | if(get_user(len,optlen)) | 972 | if (get_user(len,optlen)) |
968 | return -EFAULT; | 973 | return -EFAULT; |
969 | if(len < 0) | 974 | if (len < 0) |
970 | return -EINVAL; | 975 | return -EINVAL; |
971 | 976 | ||
972 | lock_sock(sk); | 977 | lock_sock(sk); |
973 | 978 | ||
974 | switch(optname) { | 979 | switch (optname) { |
975 | case IP_OPTIONS: | 980 | case IP_OPTIONS: |
976 | { | 981 | { |
977 | unsigned char optbuf[sizeof(struct ip_options)+40]; | 982 | unsigned char optbuf[sizeof(struct ip_options)+40]; |
978 | struct ip_options * opt = (struct ip_options*)optbuf; | 983 | struct ip_options * opt = (struct ip_options*)optbuf; |
979 | opt->optlen = 0; | 984 | opt->optlen = 0; |
980 | if (inet->opt) | 985 | if (inet->opt) |
981 | memcpy(optbuf, inet->opt, | 986 | memcpy(optbuf, inet->opt, |
982 | sizeof(struct ip_options)+ | 987 | sizeof(struct ip_options)+ |
983 | inet->opt->optlen); | 988 | inet->opt->optlen); |
984 | release_sock(sk); | 989 | release_sock(sk); |
985 | 990 | ||
986 | if (opt->optlen == 0) | 991 | if (opt->optlen == 0) |
987 | return put_user(0, optlen); | 992 | return put_user(0, optlen); |
988 | 993 | ||
989 | ip_options_undo(opt); | 994 | ip_options_undo(opt); |
990 | 995 | ||
991 | len = min_t(unsigned int, len, opt->optlen); | 996 | len = min_t(unsigned int, len, opt->optlen); |
992 | if(put_user(len, optlen)) | 997 | if (put_user(len, optlen)) |
993 | return -EFAULT; | 998 | return -EFAULT; |
994 | if(copy_to_user(optval, opt->__data, len)) | 999 | if (copy_to_user(optval, opt->__data, len)) |
995 | return -EFAULT; | 1000 | return -EFAULT; |
996 | return 0; | 1001 | return 0; |
997 | } | 1002 | } |
998 | case IP_PKTINFO: | 1003 | case IP_PKTINFO: |
999 | val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; | 1004 | val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; |
1000 | break; | 1005 | break; |
1001 | case IP_RECVTTL: | 1006 | case IP_RECVTTL: |
1002 | val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; | 1007 | val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; |
1003 | break; | 1008 | break; |
1004 | case IP_RECVTOS: | 1009 | case IP_RECVTOS: |
1005 | val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; | 1010 | val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; |
1006 | break; | 1011 | break; |
1007 | case IP_RECVOPTS: | 1012 | case IP_RECVOPTS: |
1008 | val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; | 1013 | val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; |
1009 | break; | 1014 | break; |
1010 | case IP_RETOPTS: | 1015 | case IP_RETOPTS: |
1011 | val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; | 1016 | val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; |
1012 | break; | 1017 | break; |
1013 | case IP_PASSSEC: | 1018 | case IP_PASSSEC: |
1014 | val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; | 1019 | val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; |
1015 | break; | 1020 | break; |
1016 | case IP_TOS: | 1021 | case IP_TOS: |
1017 | val = inet->tos; | 1022 | val = inet->tos; |
1018 | break; | 1023 | break; |
1019 | case IP_TTL: | 1024 | case IP_TTL: |
1020 | val = (inet->uc_ttl == -1 ? | 1025 | val = (inet->uc_ttl == -1 ? |
1021 | sysctl_ip_default_ttl : | 1026 | sysctl_ip_default_ttl : |
1022 | inet->uc_ttl); | 1027 | inet->uc_ttl); |
1023 | break; | 1028 | break; |
1024 | case IP_HDRINCL: | 1029 | case IP_HDRINCL: |
1025 | val = inet->hdrincl; | 1030 | val = inet->hdrincl; |
1026 | break; | 1031 | break; |
1027 | case IP_MTU_DISCOVER: | 1032 | case IP_MTU_DISCOVER: |
1028 | val = inet->pmtudisc; | 1033 | val = inet->pmtudisc; |
1029 | break; | 1034 | break; |
1030 | case IP_MTU: | 1035 | case IP_MTU: |
1031 | { | 1036 | { |
1032 | struct dst_entry *dst; | 1037 | struct dst_entry *dst; |
1033 | val = 0; | 1038 | val = 0; |
1034 | dst = sk_dst_get(sk); | 1039 | dst = sk_dst_get(sk); |
1035 | if (dst) { | 1040 | if (dst) { |
1036 | val = dst_mtu(dst); | 1041 | val = dst_mtu(dst); |
1037 | dst_release(dst); | 1042 | dst_release(dst); |
1038 | } | ||
1039 | if (!val) { | ||
1040 | release_sock(sk); | ||
1041 | return -ENOTCONN; | ||
1042 | } | ||
1043 | break; | ||
1044 | } | 1043 | } |
1045 | case IP_RECVERR: | 1044 | if (!val) { |
1046 | val = inet->recverr; | ||
1047 | break; | ||
1048 | case IP_MULTICAST_TTL: | ||
1049 | val = inet->mc_ttl; | ||
1050 | break; | ||
1051 | case IP_MULTICAST_LOOP: | ||
1052 | val = inet->mc_loop; | ||
1053 | break; | ||
1054 | case IP_MULTICAST_IF: | ||
1055 | { | ||
1056 | struct in_addr addr; | ||
1057 | len = min_t(unsigned int, len, sizeof(struct in_addr)); | ||
1058 | addr.s_addr = inet->mc_addr; | ||
1059 | release_sock(sk); | 1045 | release_sock(sk); |
1060 | 1046 | return -ENOTCONN; | |
1061 | if(put_user(len, optlen)) | ||
1062 | return -EFAULT; | ||
1063 | if(copy_to_user(optval, &addr, len)) | ||
1064 | return -EFAULT; | ||
1065 | return 0; | ||
1066 | } | 1047 | } |
1067 | case IP_MSFILTER: | 1048 | break; |
1068 | { | 1049 | } |
1069 | struct ip_msfilter msf; | 1050 | case IP_RECVERR: |
1070 | int err; | 1051 | val = inet->recverr; |
1052 | break; | ||
1053 | case IP_MULTICAST_TTL: | ||
1054 | val = inet->mc_ttl; | ||
1055 | break; | ||
1056 | case IP_MULTICAST_LOOP: | ||
1057 | val = inet->mc_loop; | ||
1058 | break; | ||
1059 | case IP_MULTICAST_IF: | ||
1060 | { | ||
1061 | struct in_addr addr; | ||
1062 | len = min_t(unsigned int, len, sizeof(struct in_addr)); | ||
1063 | addr.s_addr = inet->mc_addr; | ||
1064 | release_sock(sk); | ||
1071 | 1065 | ||
1072 | if (len < IP_MSFILTER_SIZE(0)) { | 1066 | if (put_user(len, optlen)) |
1073 | release_sock(sk); | 1067 | return -EFAULT; |
1074 | return -EINVAL; | 1068 | if (copy_to_user(optval, &addr, len)) |
1075 | } | 1069 | return -EFAULT; |
1076 | if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { | 1070 | return 0; |
1077 | release_sock(sk); | 1071 | } |
1078 | return -EFAULT; | 1072 | case IP_MSFILTER: |
1079 | } | 1073 | { |
1080 | err = ip_mc_msfget(sk, &msf, | 1074 | struct ip_msfilter msf; |
1081 | (struct ip_msfilter __user *)optval, optlen); | 1075 | int err; |
1076 | |||
1077 | if (len < IP_MSFILTER_SIZE(0)) { | ||
1082 | release_sock(sk); | 1078 | release_sock(sk); |
1083 | return err; | 1079 | return -EINVAL; |
1084 | } | 1080 | } |
1085 | case MCAST_MSFILTER: | 1081 | if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { |
1086 | { | ||
1087 | struct group_filter gsf; | ||
1088 | int err; | ||
1089 | |||
1090 | if (len < GROUP_FILTER_SIZE(0)) { | ||
1091 | release_sock(sk); | ||
1092 | return -EINVAL; | ||
1093 | } | ||
1094 | if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { | ||
1095 | release_sock(sk); | ||
1096 | return -EFAULT; | ||
1097 | } | ||
1098 | err = ip_mc_gsfget(sk, &gsf, | ||
1099 | (struct group_filter __user *)optval, optlen); | ||
1100 | release_sock(sk); | 1082 | release_sock(sk); |
1101 | return err; | 1083 | return -EFAULT; |
1102 | } | 1084 | } |
1103 | case IP_PKTOPTIONS: | 1085 | err = ip_mc_msfget(sk, &msf, |
1104 | { | 1086 | (struct ip_msfilter __user *)optval, optlen); |
1105 | struct msghdr msg; | 1087 | release_sock(sk); |
1088 | return err; | ||
1089 | } | ||
1090 | case MCAST_MSFILTER: | ||
1091 | { | ||
1092 | struct group_filter gsf; | ||
1093 | int err; | ||
1106 | 1094 | ||
1095 | if (len < GROUP_FILTER_SIZE(0)) { | ||
1107 | release_sock(sk); | 1096 | release_sock(sk); |
1097 | return -EINVAL; | ||
1098 | } | ||
1099 | if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { | ||
1100 | release_sock(sk); | ||
1101 | return -EFAULT; | ||
1102 | } | ||
1103 | err = ip_mc_gsfget(sk, &gsf, | ||
1104 | (struct group_filter __user *)optval, optlen); | ||
1105 | release_sock(sk); | ||
1106 | return err; | ||
1107 | } | ||
1108 | case IP_PKTOPTIONS: | ||
1109 | { | ||
1110 | struct msghdr msg; | ||
1111 | |||
1112 | release_sock(sk); | ||
1108 | 1113 | ||
1109 | if (sk->sk_type != SOCK_STREAM) | 1114 | if (sk->sk_type != SOCK_STREAM) |
1110 | return -ENOPROTOOPT; | 1115 | return -ENOPROTOOPT; |
1111 | 1116 | ||
1112 | msg.msg_control = optval; | 1117 | msg.msg_control = optval; |
1113 | msg.msg_controllen = len; | 1118 | msg.msg_controllen = len; |
1114 | msg.msg_flags = 0; | 1119 | msg.msg_flags = 0; |
1115 | 1120 | ||
1116 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { | 1121 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { |
1117 | struct in_pktinfo info; | 1122 | struct in_pktinfo info; |
1118 | 1123 | ||
1119 | info.ipi_addr.s_addr = inet->rcv_saddr; | 1124 | info.ipi_addr.s_addr = inet->rcv_saddr; |
1120 | info.ipi_spec_dst.s_addr = inet->rcv_saddr; | 1125 | info.ipi_spec_dst.s_addr = inet->rcv_saddr; |
1121 | info.ipi_ifindex = inet->mc_index; | 1126 | info.ipi_ifindex = inet->mc_index; |
1122 | put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); | 1127 | put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); |
1123 | } | ||
1124 | if (inet->cmsg_flags & IP_CMSG_TTL) { | ||
1125 | int hlim = inet->mc_ttl; | ||
1126 | put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); | ||
1127 | } | ||
1128 | len -= msg.msg_controllen; | ||
1129 | return put_user(len, optlen); | ||
1130 | } | 1128 | } |
1131 | case IP_FREEBIND: | 1129 | if (inet->cmsg_flags & IP_CMSG_TTL) { |
1132 | val = inet->freebind; | 1130 | int hlim = inet->mc_ttl; |
1133 | break; | 1131 | put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); |
1134 | default: | 1132 | } |
1135 | release_sock(sk); | 1133 | len -= msg.msg_controllen; |
1136 | return -ENOPROTOOPT; | 1134 | return put_user(len, optlen); |
1135 | } | ||
1136 | case IP_FREEBIND: | ||
1137 | val = inet->freebind; | ||
1138 | break; | ||
1139 | default: | ||
1140 | release_sock(sk); | ||
1141 | return -ENOPROTOOPT; | ||
1137 | } | 1142 | } |
1138 | release_sock(sk); | 1143 | release_sock(sk); |
1139 | 1144 | ||
1140 | if (len < sizeof(int) && len > 0 && val>=0 && val<255) { | 1145 | if (len < sizeof(int) && len > 0 && val>=0 && val<255) { |
1141 | unsigned char ucval = (unsigned char)val; | 1146 | unsigned char ucval = (unsigned char)val; |
1142 | len = 1; | 1147 | len = 1; |
1143 | if(put_user(len, optlen)) | 1148 | if (put_user(len, optlen)) |
1144 | return -EFAULT; | 1149 | return -EFAULT; |
1145 | if(copy_to_user(optval,&ucval,1)) | 1150 | if (copy_to_user(optval,&ucval,1)) |
1146 | return -EFAULT; | 1151 | return -EFAULT; |
1147 | } else { | 1152 | } else { |
1148 | len = min_t(unsigned int, sizeof(int), len); | 1153 | len = min_t(unsigned int, sizeof(int), len); |
1149 | if(put_user(len, optlen)) | 1154 | if (put_user(len, optlen)) |
1150 | return -EFAULT; | 1155 | return -EFAULT; |
1151 | if(copy_to_user(optval,&val,len)) | 1156 | if (copy_to_user(optval,&val,len)) |
1152 | return -EFAULT; | 1157 | return -EFAULT; |
1153 | } | 1158 | } |
1154 | return 0; | 1159 | return 0; |
1155 | } | 1160 | } |
1156 | 1161 | ||
1157 | int ip_getsockopt(struct sock *sk, int level, | 1162 | int ip_getsockopt(struct sock *sk, int level, |
1158 | int optname, char __user *optval, int __user *optlen) | 1163 | int optname, char __user *optval, int __user *optlen) |
1159 | { | 1164 | { |
1160 | int err; | 1165 | int err; |
1161 | 1166 | ||
@@ -1169,7 +1174,7 @@ int ip_getsockopt(struct sock *sk, int level, | |||
1169 | ) { | 1174 | ) { |
1170 | int len; | 1175 | int len; |
1171 | 1176 | ||
1172 | if(get_user(len,optlen)) | 1177 | if (get_user(len,optlen)) |
1173 | return -EFAULT; | 1178 | return -EFAULT; |
1174 | 1179 | ||
1175 | lock_sock(sk); | 1180 | lock_sock(sk); |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index aa704b88f014..ab86137c71d2 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list); | |||
43 | 43 | ||
44 | static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) | 44 | static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) |
45 | { | 45 | { |
46 | int err, plen, dlen; | ||
47 | struct ipcomp_data *ipcd = x->data; | 46 | struct ipcomp_data *ipcd = x->data; |
48 | u8 *start, *scratch; | 47 | const int plen = skb->len; |
49 | struct crypto_comp *tfm; | 48 | int dlen = IPCOMP_SCRATCH_SIZE; |
50 | int cpu; | 49 | const u8 *start = skb->data; |
51 | 50 | const int cpu = get_cpu(); | |
52 | plen = skb->len; | 51 | u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); |
53 | dlen = IPCOMP_SCRATCH_SIZE; | 52 | struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); |
54 | start = skb->data; | 53 | int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); |
55 | 54 | ||
56 | cpu = get_cpu(); | ||
57 | scratch = *per_cpu_ptr(ipcomp_scratches, cpu); | ||
58 | tfm = *per_cpu_ptr(ipcd->tfms, cpu); | ||
59 | |||
60 | err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); | ||
61 | if (err) | 55 | if (err) |
62 | goto out; | 56 | goto out; |
63 | 57 | ||
@@ -72,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) | |||
72 | 66 | ||
73 | skb->truesize += dlen - plen; | 67 | skb->truesize += dlen - plen; |
74 | __skb_put(skb, dlen - plen); | 68 | __skb_put(skb, dlen - plen); |
75 | memcpy(skb->data, scratch, dlen); | 69 | skb_copy_to_linear_data(skb, scratch, dlen); |
76 | out: | 70 | out: |
77 | put_cpu(); | 71 | put_cpu(); |
78 | return err; | 72 | return err; |
@@ -90,10 +84,10 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
90 | skb->ip_summed = CHECKSUM_NONE; | 84 | skb->ip_summed = CHECKSUM_NONE; |
91 | 85 | ||
92 | /* Remove ipcomp header and decompress original payload */ | 86 | /* Remove ipcomp header and decompress original payload */ |
93 | iph = skb->nh.iph; | 87 | iph = ip_hdr(skb); |
94 | ipch = (void *)skb->data; | 88 | ipch = (void *)skb->data; |
95 | iph->protocol = ipch->nexthdr; | 89 | iph->protocol = ipch->nexthdr; |
96 | skb->h.raw = skb->nh.raw + sizeof(*ipch); | 90 | skb->transport_header = skb->network_header + sizeof(*ipch); |
97 | __skb_pull(skb, sizeof(*ipch)); | 91 | __skb_pull(skb, sizeof(*ipch)); |
98 | err = ipcomp_decompress(x, skb); | 92 | err = ipcomp_decompress(x, skb); |
99 | 93 | ||
@@ -103,23 +97,16 @@ out: | |||
103 | 97 | ||
104 | static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) | 98 | static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) |
105 | { | 99 | { |
106 | int err, plen, dlen, ihlen; | ||
107 | struct iphdr *iph = skb->nh.iph; | ||
108 | struct ipcomp_data *ipcd = x->data; | 100 | struct ipcomp_data *ipcd = x->data; |
109 | u8 *start, *scratch; | 101 | const int ihlen = ip_hdrlen(skb); |
110 | struct crypto_comp *tfm; | 102 | const int plen = skb->len - ihlen; |
111 | int cpu; | 103 | int dlen = IPCOMP_SCRATCH_SIZE; |
104 | u8 *start = skb->data + ihlen; | ||
105 | const int cpu = get_cpu(); | ||
106 | u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); | ||
107 | struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); | ||
108 | int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); | ||
112 | 109 | ||
113 | ihlen = iph->ihl * 4; | ||
114 | plen = skb->len - ihlen; | ||
115 | dlen = IPCOMP_SCRATCH_SIZE; | ||
116 | start = skb->data + ihlen; | ||
117 | |||
118 | cpu = get_cpu(); | ||
119 | scratch = *per_cpu_ptr(ipcomp_scratches, cpu); | ||
120 | tfm = *per_cpu_ptr(ipcd->tfms, cpu); | ||
121 | |||
122 | err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); | ||
123 | if (err) | 110 | if (err) |
124 | goto out; | 111 | goto out; |
125 | 112 | ||
@@ -142,12 +129,11 @@ out: | |||
142 | static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) | 129 | static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) |
143 | { | 130 | { |
144 | int err; | 131 | int err; |
145 | struct iphdr *iph; | ||
146 | struct ip_comp_hdr *ipch; | 132 | struct ip_comp_hdr *ipch; |
147 | struct ipcomp_data *ipcd = x->data; | 133 | struct ipcomp_data *ipcd = x->data; |
148 | int hdr_len = 0; | 134 | int hdr_len = 0; |
135 | struct iphdr *iph = ip_hdr(skb); | ||
149 | 136 | ||
150 | iph = skb->nh.iph; | ||
151 | iph->tot_len = htons(skb->len); | 137 | iph->tot_len = htons(skb->len); |
152 | hdr_len = iph->ihl * 4; | 138 | hdr_len = iph->ihl * 4; |
153 | if ((skb->len - hdr_len) < ipcd->threshold) { | 139 | if ((skb->len - hdr_len) < ipcd->threshold) { |
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
159 | goto out_ok; | 145 | goto out_ok; |
160 | 146 | ||
161 | err = ipcomp_compress(x, skb); | 147 | err = ipcomp_compress(x, skb); |
162 | iph = skb->nh.iph; | 148 | iph = ip_hdr(skb); |
163 | 149 | ||
164 | if (err) { | 150 | if (err) { |
165 | goto out_ok; | 151 | goto out_ok; |
@@ -188,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
188 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); | 174 | struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); |
189 | struct xfrm_state *x; | 175 | struct xfrm_state *x; |
190 | 176 | ||
191 | if (skb->h.icmph->type != ICMP_DEST_UNREACH || | 177 | if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || |
192 | skb->h.icmph->code != ICMP_FRAG_NEEDED) | 178 | icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) |
193 | return; | 179 | return; |
194 | 180 | ||
195 | spi = htonl(ntohs(ipch->cpi)); | 181 | spi = htonl(ntohs(ipch->cpi)); |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cf49de1a4983..597c800b2fdc 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
432 | goto drop; | 432 | goto drop; |
433 | 433 | ||
434 | /* Basic sanity checks can be done without the lock. */ | 434 | /* Basic sanity checks can be done without the lock. */ |
435 | rarp = (struct arphdr *)skb->h.raw; | 435 | rarp = (struct arphdr *)skb_transport_header(skb); |
436 | 436 | ||
437 | /* If this test doesn't pass, it's not IP, or we should | 437 | /* If this test doesn't pass, it's not IP, or we should |
438 | * ignore it anyway. | 438 | * ignore it anyway. |
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
455 | goto drop; | 455 | goto drop; |
456 | 456 | ||
457 | /* OK, it is all there and looks valid, process... */ | 457 | /* OK, it is all there and looks valid, process... */ |
458 | rarp = (struct arphdr *)skb->h.raw; | 458 | rarp = (struct arphdr *)skb_transport_header(skb); |
459 | rarp_ptr = (unsigned char *) (rarp + 1); | 459 | rarp_ptr = (unsigned char *) (rarp + 1); |
460 | 460 | ||
461 | /* One reply at a time, please. */ | 461 | /* One reply at a time, please. */ |
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d | |||
702 | memset(b, 0, sizeof(struct bootp_pkt)); | 702 | memset(b, 0, sizeof(struct bootp_pkt)); |
703 | 703 | ||
704 | /* Construct IP header */ | 704 | /* Construct IP header */ |
705 | skb->nh.iph = h = &b->iph; | 705 | skb_reset_network_header(skb); |
706 | h = ip_hdr(skb); | ||
706 | h->version = 4; | 707 | h->version = 4; |
707 | h->ihl = 5; | 708 | h->ihl = 5; |
708 | h->tot_len = htons(sizeof(struct bootp_pkt)); | 709 | h->tot_len = htons(sizeof(struct bootp_pkt)); |
@@ -782,7 +783,7 @@ static void __init ic_do_bootp_ext(u8 *ext) | |||
782 | u8 *c; | 783 | u8 *c; |
783 | 784 | ||
784 | printk("DHCP/BOOTP: Got extension %d:",*ext); | 785 | printk("DHCP/BOOTP: Got extension %d:",*ext); |
785 | for(c=ext+2; c<ext+2+ext[1]; c++) | 786 | for (c=ext+2; c<ext+2+ext[1]; c++) |
786 | printk(" %02x", *c); | 787 | printk(" %02x", *c); |
787 | printk("\n"); | 788 | printk("\n"); |
788 | #endif | 789 | #endif |
@@ -845,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
845 | sizeof(struct udphdr))) | 846 | sizeof(struct udphdr))) |
846 | goto drop; | 847 | goto drop; |
847 | 848 | ||
848 | b = (struct bootp_pkt *) skb->nh.iph; | 849 | b = (struct bootp_pkt *)skb_network_header(skb); |
849 | h = &b->iph; | 850 | h = &b->iph; |
850 | 851 | ||
851 | if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) | 852 | if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) |
@@ -883,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
883 | if (!pskb_may_pull(skb, skb->len)) | 884 | if (!pskb_may_pull(skb, skb->len)) |
884 | goto drop; | 885 | goto drop; |
885 | 886 | ||
886 | b = (struct bootp_pkt *) skb->nh.iph; | 887 | b = (struct bootp_pkt *)skb_network_header(skb); |
887 | h = &b->iph; | 888 | h = &b->iph; |
888 | 889 | ||
889 | /* One reply at a time, please. */ | 890 | /* One reply at a time, please. */ |
@@ -938,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
938 | if (opt[1] >= 4) | 939 | if (opt[1] >= 4) |
939 | memcpy(&server_id, opt + 2, 4); | 940 | memcpy(&server_id, opt + 2, 4); |
940 | break; | 941 | break; |
941 | }; | 942 | } |
942 | } | 943 | } |
943 | 944 | ||
944 | #ifdef IPCONFIG_DEBUG | 945 | #ifdef IPCONFIG_DEBUG |
@@ -983,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
983 | ic_myaddr = NONE; | 984 | ic_myaddr = NONE; |
984 | ic_servaddr = NONE; | 985 | ic_servaddr = NONE; |
985 | goto drop_unlock; | 986 | goto drop_unlock; |
986 | }; | 987 | } |
987 | 988 | ||
988 | ic_dhcp_msgtype = mt; | 989 | ic_dhcp_msgtype = mt; |
989 | 990 | ||
@@ -1094,7 +1095,7 @@ static int __init ic_dynamic(void) | |||
1094 | retries = CONF_SEND_RETRIES; | 1095 | retries = CONF_SEND_RETRIES; |
1095 | get_random_bytes(&timeout, sizeof(timeout)); | 1096 | get_random_bytes(&timeout, sizeof(timeout)); |
1096 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); | 1097 | timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); |
1097 | for(;;) { | 1098 | for (;;) { |
1098 | #ifdef IPCONFIG_BOOTP | 1099 | #ifdef IPCONFIG_BOOTP |
1099 | if (do_bootp && (d->able & IC_BOOTP)) | 1100 | if (do_bootp && (d->able & IC_BOOTP)) |
1100 | ic_bootp_send_if(d, jiffies - start_jiffies); | 1101 | ic_bootp_send_if(d, jiffies - start_jiffies); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 3ec5ce0f5498..ebd2f2d532f6 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local) | |||
157 | return NULL; | 157 | return NULL; |
158 | } | 158 | } |
159 | 159 | ||
160 | static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) | 160 | static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) |
161 | { | 161 | { |
162 | __be32 remote = t->parms.iph.daddr; | 162 | __be32 remote = parms->iph.daddr; |
163 | __be32 local = t->parms.iph.saddr; | 163 | __be32 local = parms->iph.saddr; |
164 | unsigned h = 0; | 164 | unsigned h = 0; |
165 | int prio = 0; | 165 | int prio = 0; |
166 | 166 | ||
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) | |||
175 | return &tunnels[prio][h]; | 175 | return &tunnels[prio][h]; |
176 | } | 176 | } |
177 | 177 | ||
178 | static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) | ||
179 | { | ||
180 | return __ipip_bucket(&t->parms); | ||
181 | } | ||
178 | 182 | ||
179 | static void ipip_tunnel_unlink(struct ip_tunnel *t) | 183 | static void ipip_tunnel_unlink(struct ip_tunnel *t) |
180 | { | 184 | { |
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c | |||
206 | __be32 local = parms->iph.saddr; | 210 | __be32 local = parms->iph.saddr; |
207 | struct ip_tunnel *t, **tp, *nt; | 211 | struct ip_tunnel *t, **tp, *nt; |
208 | struct net_device *dev; | 212 | struct net_device *dev; |
209 | unsigned h = 0; | ||
210 | int prio = 0; | ||
211 | char name[IFNAMSIZ]; | 213 | char name[IFNAMSIZ]; |
212 | 214 | ||
213 | if (remote) { | 215 | for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) { |
214 | prio |= 2; | ||
215 | h ^= HASH(remote); | ||
216 | } | ||
217 | if (local) { | ||
218 | prio |= 1; | ||
219 | h ^= HASH(local); | ||
220 | } | ||
221 | for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { | ||
222 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | 216 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) |
223 | return t; | 217 | return t; |
224 | } | 218 | } |
@@ -280,8 +274,8 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
280 | ICMP in the real Internet is absolutely infeasible. | 274 | ICMP in the real Internet is absolutely infeasible. |
281 | */ | 275 | */ |
282 | struct iphdr *iph = (struct iphdr*)skb->data; | 276 | struct iphdr *iph = (struct iphdr*)skb->data; |
283 | int type = skb->h.icmph->type; | 277 | const int type = icmp_hdr(skb)->type; |
284 | int code = skb->h.icmph->code; | 278 | const int code = icmp_hdr(skb)->code; |
285 | struct ip_tunnel *t; | 279 | struct ip_tunnel *t; |
286 | int err; | 280 | int err; |
287 | 281 | ||
@@ -336,8 +330,8 @@ out: | |||
336 | struct iphdr *iph = (struct iphdr*)dp; | 330 | struct iphdr *iph = (struct iphdr*)dp; |
337 | int hlen = iph->ihl<<2; | 331 | int hlen = iph->ihl<<2; |
338 | struct iphdr *eiph; | 332 | struct iphdr *eiph; |
339 | int type = skb->h.icmph->type; | 333 | const int type = icmp_hdr(skb)->type; |
340 | int code = skb->h.icmph->code; | 334 | const int code = icmp_hdr(skb)->code; |
341 | int rel_type = 0; | 335 | int rel_type = 0; |
342 | int rel_code = 0; | 336 | int rel_code = 0; |
343 | __be32 rel_info = 0; | 337 | __be32 rel_info = 0; |
@@ -354,7 +348,7 @@ out: | |||
354 | default: | 348 | default: |
355 | return 0; | 349 | return 0; |
356 | case ICMP_PARAMETERPROB: | 350 | case ICMP_PARAMETERPROB: |
357 | n = ntohl(skb->h.icmph->un.gateway) >> 24; | 351 | n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; |
358 | if (n < hlen) | 352 | if (n < hlen) |
359 | return 0; | 353 | return 0; |
360 | 354 | ||
@@ -373,7 +367,7 @@ out: | |||
373 | return 0; | 367 | return 0; |
374 | case ICMP_FRAG_NEEDED: | 368 | case ICMP_FRAG_NEEDED: |
375 | /* And it is the only really necessary thing :-) */ | 369 | /* And it is the only really necessary thing :-) */ |
376 | n = ntohs(skb->h.icmph->un.frag.mtu); | 370 | n = ntohs(icmp_hdr(skb)->un.frag.mtu); |
377 | if (n < hlen+68) | 371 | if (n < hlen+68) |
378 | return 0; | 372 | return 0; |
379 | n -= hlen; | 373 | n -= hlen; |
@@ -405,7 +399,7 @@ out: | |||
405 | dst_release(skb2->dst); | 399 | dst_release(skb2->dst); |
406 | skb2->dst = NULL; | 400 | skb2->dst = NULL; |
407 | skb_pull(skb2, skb->data - (u8*)eiph); | 401 | skb_pull(skb2, skb->data - (u8*)eiph); |
408 | skb2->nh.raw = skb2->data; | 402 | skb_reset_network_header(skb2); |
409 | 403 | ||
410 | /* Try to guess incoming interface */ | 404 | /* Try to guess incoming interface */ |
411 | memset(&fl, 0, sizeof(fl)); | 405 | memset(&fl, 0, sizeof(fl)); |
@@ -461,9 +455,10 @@ out: | |||
461 | #endif | 455 | #endif |
462 | } | 456 | } |
463 | 457 | ||
464 | static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) | 458 | static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, |
459 | struct sk_buff *skb) | ||
465 | { | 460 | { |
466 | struct iphdr *inner_iph = skb->nh.iph; | 461 | struct iphdr *inner_iph = ip_hdr(skb); |
467 | 462 | ||
468 | if (INET_ECN_is_ce(outer_iph->tos)) | 463 | if (INET_ECN_is_ce(outer_iph->tos)) |
469 | IP_ECN_set_ce(inner_iph); | 464 | IP_ECN_set_ce(inner_iph); |
@@ -471,10 +466,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff | |||
471 | 466 | ||
472 | static int ipip_rcv(struct sk_buff *skb) | 467 | static int ipip_rcv(struct sk_buff *skb) |
473 | { | 468 | { |
474 | struct iphdr *iph; | ||
475 | struct ip_tunnel *tunnel; | 469 | struct ip_tunnel *tunnel; |
476 | 470 | const struct iphdr *iph = ip_hdr(skb); | |
477 | iph = skb->nh.iph; | ||
478 | 471 | ||
479 | read_lock(&ipip_lock); | 472 | read_lock(&ipip_lock); |
480 | if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { | 473 | if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { |
@@ -486,8 +479,8 @@ static int ipip_rcv(struct sk_buff *skb) | |||
486 | 479 | ||
487 | secpath_reset(skb); | 480 | secpath_reset(skb); |
488 | 481 | ||
489 | skb->mac.raw = skb->nh.raw; | 482 | skb->mac_header = skb->network_header; |
490 | skb->nh.raw = skb->data; | 483 | skb_reset_network_header(skb); |
491 | skb->protocol = htons(ETH_P_IP); | 484 | skb->protocol = htons(ETH_P_IP); |
492 | skb->pkt_type = PACKET_HOST; | 485 | skb->pkt_type = PACKET_HOST; |
493 | 486 | ||
@@ -521,7 +514,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
521 | __be16 df = tiph->frag_off; | 514 | __be16 df = tiph->frag_off; |
522 | struct rtable *rt; /* Route to the other host */ | 515 | struct rtable *rt; /* Route to the other host */ |
523 | struct net_device *tdev; /* Device to other host */ | 516 | struct net_device *tdev; /* Device to other host */ |
524 | struct iphdr *old_iph = skb->nh.iph; | 517 | struct iphdr *old_iph = ip_hdr(skb); |
525 | struct iphdr *iph; /* Our new IP header */ | 518 | struct iphdr *iph; /* Our new IP header */ |
526 | int max_headroom; /* The extra header space needed */ | 519 | int max_headroom; /* The extra header space needed */ |
527 | __be32 dst = tiph->daddr; | 520 | __be32 dst = tiph->daddr; |
@@ -615,11 +608,12 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
615 | skb_set_owner_w(new_skb, skb->sk); | 608 | skb_set_owner_w(new_skb, skb->sk); |
616 | dev_kfree_skb(skb); | 609 | dev_kfree_skb(skb); |
617 | skb = new_skb; | 610 | skb = new_skb; |
618 | old_iph = skb->nh.iph; | 611 | old_iph = ip_hdr(skb); |
619 | } | 612 | } |
620 | 613 | ||
621 | skb->h.raw = skb->nh.raw; | 614 | skb->transport_header = skb->network_header; |
622 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | 615 | skb_push(skb, sizeof(struct iphdr)); |
616 | skb_reset_network_header(skb); | ||
623 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 617 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
624 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 618 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
625 | IPSKB_REROUTED); | 619 | IPSKB_REROUTED); |
@@ -630,7 +624,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
630 | * Push down and install the IPIP header. | 624 | * Push down and install the IPIP header. |
631 | */ | 625 | */ |
632 | 626 | ||
633 | iph = skb->nh.iph; | 627 | iph = ip_hdr(skb); |
634 | iph->version = 4; | 628 | iph->version = 4; |
635 | iph->ihl = sizeof(struct iphdr)>>2; | 629 | iph->ihl = sizeof(struct iphdr)>>2; |
636 | iph->frag_off = df; | 630 | iph->frag_off = df; |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 601e3df69258..0ebae413ae87 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/netfilter_ipv4.h> | 62 | #include <linux/netfilter_ipv4.h> |
63 | #include <net/ipip.h> | 63 | #include <net/ipip.h> |
64 | #include <net/checksum.h> | 64 | #include <net/checksum.h> |
65 | #include <net/netlink.h> | ||
65 | 66 | ||
66 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) | 67 | #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) |
67 | #define CONFIG_IP_PIMSM 1 | 68 | #define CONFIG_IP_PIMSM 1 |
@@ -302,8 +303,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
302 | 303 | ||
303 | atomic_dec(&cache_resolve_queue_len); | 304 | atomic_dec(&cache_resolve_queue_len); |
304 | 305 | ||
305 | while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { | 306 | while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { |
306 | if (skb->nh.iph->version == 0) { | 307 | if (ip_hdr(skb)->version == 0) { |
307 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 308 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
308 | nlh->nlmsg_type = NLMSG_ERROR; | 309 | nlh->nlmsg_type = NLMSG_ERROR; |
309 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 310 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
@@ -479,7 +480,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) | |||
479 | static struct mfc_cache *ipmr_cache_alloc(void) | 480 | static struct mfc_cache *ipmr_cache_alloc(void) |
480 | { | 481 | { |
481 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 482 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
482 | if(c==NULL) | 483 | if (c==NULL) |
483 | return NULL; | 484 | return NULL; |
484 | c->mfc_un.res.minvif = MAXVIFS; | 485 | c->mfc_un.res.minvif = MAXVIFS; |
485 | return c; | 486 | return c; |
@@ -488,7 +489,7 @@ static struct mfc_cache *ipmr_cache_alloc(void) | |||
488 | static struct mfc_cache *ipmr_cache_alloc_unres(void) | 489 | static struct mfc_cache *ipmr_cache_alloc_unres(void) |
489 | { | 490 | { |
490 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 491 | struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
491 | if(c==NULL) | 492 | if (c==NULL) |
492 | return NULL; | 493 | return NULL; |
493 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 494 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
494 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 495 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
@@ -508,12 +509,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
508 | * Play the pending entries through our router | 509 | * Play the pending entries through our router |
509 | */ | 510 | */ |
510 | 511 | ||
511 | while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 512 | while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
512 | if (skb->nh.iph->version == 0) { | 513 | if (ip_hdr(skb)->version == 0) { |
513 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 514 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
514 | 515 | ||
515 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { | 516 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { |
516 | nlh->nlmsg_len = skb->tail - (u8*)nlh; | 517 | nlh->nlmsg_len = (skb_tail_pointer(skb) - |
518 | (u8 *)nlh); | ||
517 | } else { | 519 | } else { |
518 | nlh->nlmsg_type = NLMSG_ERROR; | 520 | nlh->nlmsg_type = NLMSG_ERROR; |
519 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); | 521 | nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); |
@@ -539,7 +541,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
539 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | 541 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) |
540 | { | 542 | { |
541 | struct sk_buff *skb; | 543 | struct sk_buff *skb; |
542 | int ihl = pkt->nh.iph->ihl<<2; | 544 | const int ihl = ip_hdrlen(pkt); |
543 | struct igmphdr *igmp; | 545 | struct igmphdr *igmp; |
544 | struct igmpmsg *msg; | 546 | struct igmpmsg *msg; |
545 | int ret; | 547 | int ret; |
@@ -551,7 +553,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
551 | #endif | 553 | #endif |
552 | skb = alloc_skb(128, GFP_ATOMIC); | 554 | skb = alloc_skb(128, GFP_ATOMIC); |
553 | 555 | ||
554 | if(!skb) | 556 | if (!skb) |
555 | return -ENOBUFS; | 557 | return -ENOBUFS; |
556 | 558 | ||
557 | #ifdef CONFIG_IP_PIMSM | 559 | #ifdef CONFIG_IP_PIMSM |
@@ -561,14 +563,17 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
561 | And all this only to mangle msg->im_msgtype and | 563 | And all this only to mangle msg->im_msgtype and |
562 | to set msg->im_mbz to "mbz" :-) | 564 | to set msg->im_mbz to "mbz" :-) |
563 | */ | 565 | */ |
564 | msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); | 566 | skb_push(skb, sizeof(struct iphdr)); |
565 | skb->nh.raw = skb->h.raw = (u8*)msg; | 567 | skb_reset_network_header(skb); |
566 | memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); | 568 | skb_reset_transport_header(skb); |
569 | msg = (struct igmpmsg *)skb_network_header(skb); | ||
570 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); | ||
567 | msg->im_msgtype = IGMPMSG_WHOLEPKT; | 571 | msg->im_msgtype = IGMPMSG_WHOLEPKT; |
568 | msg->im_mbz = 0; | 572 | msg->im_mbz = 0; |
569 | msg->im_vif = reg_vif_num; | 573 | msg->im_vif = reg_vif_num; |
570 | skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; | 574 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; |
571 | skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); | 575 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + |
576 | sizeof(struct iphdr)); | ||
572 | } else | 577 | } else |
573 | #endif | 578 | #endif |
574 | { | 579 | { |
@@ -577,10 +582,11 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
577 | * Copy the IP header | 582 | * Copy the IP header |
578 | */ | 583 | */ |
579 | 584 | ||
580 | skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); | 585 | skb->network_header = skb->tail; |
581 | memcpy(skb->data,pkt->data,ihl); | 586 | skb_put(skb, ihl); |
582 | skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ | 587 | skb_copy_to_linear_data(skb, pkt->data, ihl); |
583 | msg = (struct igmpmsg*)skb->nh.iph; | 588 | ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ |
589 | msg = (struct igmpmsg *)skb_network_header(skb); | ||
584 | msg->im_vif = vifi; | 590 | msg->im_vif = vifi; |
585 | skb->dst = dst_clone(pkt->dst); | 591 | skb->dst = dst_clone(pkt->dst); |
586 | 592 | ||
@@ -592,8 +598,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
592 | igmp->type = | 598 | igmp->type = |
593 | msg->im_msgtype = assert; | 599 | msg->im_msgtype = assert; |
594 | igmp->code = 0; | 600 | igmp->code = 0; |
595 | skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ | 601 | ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ |
596 | skb->h.raw = skb->nh.raw; | 602 | skb->transport_header = skb->network_header; |
597 | } | 603 | } |
598 | 604 | ||
599 | if (mroute_socket == NULL) { | 605 | if (mroute_socket == NULL) { |
@@ -622,11 +628,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
622 | { | 628 | { |
623 | int err; | 629 | int err; |
624 | struct mfc_cache *c; | 630 | struct mfc_cache *c; |
631 | const struct iphdr *iph = ip_hdr(skb); | ||
625 | 632 | ||
626 | spin_lock_bh(&mfc_unres_lock); | 633 | spin_lock_bh(&mfc_unres_lock); |
627 | for (c=mfc_unres_queue; c; c=c->next) { | 634 | for (c=mfc_unres_queue; c; c=c->next) { |
628 | if (c->mfc_mcastgrp == skb->nh.iph->daddr && | 635 | if (c->mfc_mcastgrp == iph->daddr && |
629 | c->mfc_origin == skb->nh.iph->saddr) | 636 | c->mfc_origin == iph->saddr) |
630 | break; | 637 | break; |
631 | } | 638 | } |
632 | 639 | ||
@@ -646,9 +653,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
646 | /* | 653 | /* |
647 | * Fill in the new cache entry | 654 | * Fill in the new cache entry |
648 | */ | 655 | */ |
649 | c->mfc_parent=-1; | 656 | c->mfc_parent = -1; |
650 | c->mfc_origin=skb->nh.iph->saddr; | 657 | c->mfc_origin = iph->saddr; |
651 | c->mfc_mcastgrp=skb->nh.iph->daddr; | 658 | c->mfc_mcastgrp = iph->daddr; |
652 | 659 | ||
653 | /* | 660 | /* |
654 | * Reflect first query at mrouted. | 661 | * Reflect first query at mrouted. |
@@ -734,7 +741,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
734 | return 0; | 741 | return 0; |
735 | } | 742 | } |
736 | 743 | ||
737 | if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) | 744 | if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) |
738 | return -EINVAL; | 745 | return -EINVAL; |
739 | 746 | ||
740 | c=ipmr_cache_alloc(); | 747 | c=ipmr_cache_alloc(); |
@@ -788,7 +795,7 @@ static void mroute_clean_tables(struct sock *sk) | |||
788 | /* | 795 | /* |
789 | * Shut down all active vif entries | 796 | * Shut down all active vif entries |
790 | */ | 797 | */ |
791 | for(i=0; i<maxvif; i++) { | 798 | for (i=0; i<maxvif; i++) { |
792 | if (!(vif_table[i].flags&VIFF_STATIC)) | 799 | if (!(vif_table[i].flags&VIFF_STATIC)) |
793 | vif_delete(i); | 800 | vif_delete(i); |
794 | } | 801 | } |
@@ -858,119 +865,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
858 | struct vifctl vif; | 865 | struct vifctl vif; |
859 | struct mfcctl mfc; | 866 | struct mfcctl mfc; |
860 | 867 | ||
861 | if(optname!=MRT_INIT) | 868 | if (optname != MRT_INIT) { |
862 | { | 869 | if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) |
863 | if(sk!=mroute_socket && !capable(CAP_NET_ADMIN)) | ||
864 | return -EACCES; | 870 | return -EACCES; |
865 | } | 871 | } |
866 | 872 | ||
867 | switch(optname) | 873 | switch (optname) { |
868 | { | 874 | case MRT_INIT: |
869 | case MRT_INIT: | 875 | if (sk->sk_type != SOCK_RAW || |
870 | if (sk->sk_type != SOCK_RAW || | 876 | inet_sk(sk)->num != IPPROTO_IGMP) |
871 | inet_sk(sk)->num != IPPROTO_IGMP) | 877 | return -EOPNOTSUPP; |
872 | return -EOPNOTSUPP; | 878 | if (optlen!=sizeof(int)) |
873 | if(optlen!=sizeof(int)) | 879 | return -ENOPROTOOPT; |
874 | return -ENOPROTOOPT; | ||
875 | |||
876 | rtnl_lock(); | ||
877 | if (mroute_socket) { | ||
878 | rtnl_unlock(); | ||
879 | return -EADDRINUSE; | ||
880 | } | ||
881 | |||
882 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | ||
883 | if (ret == 0) { | ||
884 | write_lock_bh(&mrt_lock); | ||
885 | mroute_socket=sk; | ||
886 | write_unlock_bh(&mrt_lock); | ||
887 | 880 | ||
888 | ipv4_devconf.mc_forwarding++; | 881 | rtnl_lock(); |
889 | } | 882 | if (mroute_socket) { |
890 | rtnl_unlock(); | 883 | rtnl_unlock(); |
891 | return ret; | 884 | return -EADDRINUSE; |
892 | case MRT_DONE: | 885 | } |
893 | if (sk!=mroute_socket) | 886 | |
894 | return -EACCES; | 887 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
895 | return ip_ra_control(sk, 0, NULL); | 888 | if (ret == 0) { |
896 | case MRT_ADD_VIF: | 889 | write_lock_bh(&mrt_lock); |
897 | case MRT_DEL_VIF: | 890 | mroute_socket=sk; |
898 | if(optlen!=sizeof(vif)) | 891 | write_unlock_bh(&mrt_lock); |
899 | return -EINVAL; | 892 | |
900 | if (copy_from_user(&vif,optval,sizeof(vif))) | 893 | ipv4_devconf.mc_forwarding++; |
901 | return -EFAULT; | 894 | } |
902 | if(vif.vifc_vifi >= MAXVIFS) | 895 | rtnl_unlock(); |
903 | return -ENFILE; | 896 | return ret; |
904 | rtnl_lock(); | 897 | case MRT_DONE: |
905 | if (optname==MRT_ADD_VIF) { | 898 | if (sk!=mroute_socket) |
906 | ret = vif_add(&vif, sk==mroute_socket); | 899 | return -EACCES; |
907 | } else { | 900 | return ip_ra_control(sk, 0, NULL); |
908 | ret = vif_delete(vif.vifc_vifi); | 901 | case MRT_ADD_VIF: |
909 | } | 902 | case MRT_DEL_VIF: |
910 | rtnl_unlock(); | 903 | if (optlen!=sizeof(vif)) |
911 | return ret; | 904 | return -EINVAL; |
905 | if (copy_from_user(&vif,optval,sizeof(vif))) | ||
906 | return -EFAULT; | ||
907 | if (vif.vifc_vifi >= MAXVIFS) | ||
908 | return -ENFILE; | ||
909 | rtnl_lock(); | ||
910 | if (optname==MRT_ADD_VIF) { | ||
911 | ret = vif_add(&vif, sk==mroute_socket); | ||
912 | } else { | ||
913 | ret = vif_delete(vif.vifc_vifi); | ||
914 | } | ||
915 | rtnl_unlock(); | ||
916 | return ret; | ||
912 | 917 | ||
913 | /* | 918 | /* |
914 | * Manipulate the forwarding caches. These live | 919 | * Manipulate the forwarding caches. These live |
915 | * in a sort of kernel/user symbiosis. | 920 | * in a sort of kernel/user symbiosis. |
916 | */ | 921 | */ |
917 | case MRT_ADD_MFC: | 922 | case MRT_ADD_MFC: |
918 | case MRT_DEL_MFC: | 923 | case MRT_DEL_MFC: |
919 | if(optlen!=sizeof(mfc)) | 924 | if (optlen!=sizeof(mfc)) |
920 | return -EINVAL; | 925 | return -EINVAL; |
921 | if (copy_from_user(&mfc,optval, sizeof(mfc))) | 926 | if (copy_from_user(&mfc,optval, sizeof(mfc))) |
922 | return -EFAULT; | 927 | return -EFAULT; |
923 | rtnl_lock(); | 928 | rtnl_lock(); |
924 | if (optname==MRT_DEL_MFC) | 929 | if (optname==MRT_DEL_MFC) |
925 | ret = ipmr_mfc_delete(&mfc); | 930 | ret = ipmr_mfc_delete(&mfc); |
926 | else | 931 | else |
927 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); | 932 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); |
928 | rtnl_unlock(); | 933 | rtnl_unlock(); |
929 | return ret; | 934 | return ret; |
930 | /* | 935 | /* |
931 | * Control PIM assert. | 936 | * Control PIM assert. |
932 | */ | 937 | */ |
933 | case MRT_ASSERT: | 938 | case MRT_ASSERT: |
934 | { | 939 | { |
935 | int v; | 940 | int v; |
936 | if(get_user(v,(int __user *)optval)) | 941 | if (get_user(v,(int __user *)optval)) |
937 | return -EFAULT; | 942 | return -EFAULT; |
938 | mroute_do_assert=(v)?1:0; | 943 | mroute_do_assert=(v)?1:0; |
939 | return 0; | 944 | return 0; |
940 | } | 945 | } |
941 | #ifdef CONFIG_IP_PIMSM | 946 | #ifdef CONFIG_IP_PIMSM |
942 | case MRT_PIM: | 947 | case MRT_PIM: |
943 | { | 948 | { |
944 | int v, ret; | 949 | int v, ret; |
945 | if(get_user(v,(int __user *)optval)) | 950 | if (get_user(v,(int __user *)optval)) |
946 | return -EFAULT; | 951 | return -EFAULT; |
947 | v = (v)?1:0; | 952 | v = (v)?1:0; |
948 | rtnl_lock(); | 953 | rtnl_lock(); |
949 | ret = 0; | 954 | ret = 0; |
950 | if (v != mroute_do_pim) { | 955 | if (v != mroute_do_pim) { |
951 | mroute_do_pim = v; | 956 | mroute_do_pim = v; |
952 | mroute_do_assert = v; | 957 | mroute_do_assert = v; |
953 | #ifdef CONFIG_IP_PIMSM_V2 | 958 | #ifdef CONFIG_IP_PIMSM_V2 |
954 | if (mroute_do_pim) | 959 | if (mroute_do_pim) |
955 | ret = inet_add_protocol(&pim_protocol, | 960 | ret = inet_add_protocol(&pim_protocol, |
956 | IPPROTO_PIM); | 961 | IPPROTO_PIM); |
957 | else | 962 | else |
958 | ret = inet_del_protocol(&pim_protocol, | 963 | ret = inet_del_protocol(&pim_protocol, |
959 | IPPROTO_PIM); | 964 | IPPROTO_PIM); |
960 | if (ret < 0) | 965 | if (ret < 0) |
961 | ret = -EAGAIN; | 966 | ret = -EAGAIN; |
962 | #endif | 967 | #endif |
963 | } | ||
964 | rtnl_unlock(); | ||
965 | return ret; | ||
966 | } | 968 | } |
969 | rtnl_unlock(); | ||
970 | return ret; | ||
971 | } | ||
967 | #endif | 972 | #endif |
968 | /* | 973 | /* |
969 | * Spurious command, or MRT_VERSION which you cannot | 974 | * Spurious command, or MRT_VERSION which you cannot |
970 | * set. | 975 | * set. |
971 | */ | 976 | */ |
972 | default: | 977 | default: |
973 | return -ENOPROTOOPT; | 978 | return -ENOPROTOOPT; |
974 | } | 979 | } |
975 | } | 980 | } |
976 | 981 | ||
@@ -983,7 +988,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u | |||
983 | int olr; | 988 | int olr; |
984 | int val; | 989 | int val; |
985 | 990 | ||
986 | if(optname!=MRT_VERSION && | 991 | if (optname!=MRT_VERSION && |
987 | #ifdef CONFIG_IP_PIMSM | 992 | #ifdef CONFIG_IP_PIMSM |
988 | optname!=MRT_PIM && | 993 | optname!=MRT_PIM && |
989 | #endif | 994 | #endif |
@@ -997,17 +1002,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u | |||
997 | if (olr < 0) | 1002 | if (olr < 0) |
998 | return -EINVAL; | 1003 | return -EINVAL; |
999 | 1004 | ||
1000 | if(put_user(olr,optlen)) | 1005 | if (put_user(olr,optlen)) |
1001 | return -EFAULT; | 1006 | return -EFAULT; |
1002 | if(optname==MRT_VERSION) | 1007 | if (optname==MRT_VERSION) |
1003 | val=0x0305; | 1008 | val=0x0305; |
1004 | #ifdef CONFIG_IP_PIMSM | 1009 | #ifdef CONFIG_IP_PIMSM |
1005 | else if(optname==MRT_PIM) | 1010 | else if (optname==MRT_PIM) |
1006 | val=mroute_do_pim; | 1011 | val=mroute_do_pim; |
1007 | #endif | 1012 | #endif |
1008 | else | 1013 | else |
1009 | val=mroute_do_assert; | 1014 | val=mroute_do_assert; |
1010 | if(copy_to_user(optval,&val,olr)) | 1015 | if (copy_to_user(optval,&val,olr)) |
1011 | return -EFAULT; | 1016 | return -EFAULT; |
1012 | return 0; | 1017 | return 0; |
1013 | } | 1018 | } |
@@ -1023,48 +1028,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1023 | struct vif_device *vif; | 1028 | struct vif_device *vif; |
1024 | struct mfc_cache *c; | 1029 | struct mfc_cache *c; |
1025 | 1030 | ||
1026 | switch(cmd) | 1031 | switch (cmd) { |
1027 | { | 1032 | case SIOCGETVIFCNT: |
1028 | case SIOCGETVIFCNT: | 1033 | if (copy_from_user(&vr,arg,sizeof(vr))) |
1029 | if (copy_from_user(&vr,arg,sizeof(vr))) | 1034 | return -EFAULT; |
1030 | return -EFAULT; | 1035 | if (vr.vifi>=maxvif) |
1031 | if(vr.vifi>=maxvif) | 1036 | return -EINVAL; |
1032 | return -EINVAL; | 1037 | read_lock(&mrt_lock); |
1033 | read_lock(&mrt_lock); | 1038 | vif=&vif_table[vr.vifi]; |
1034 | vif=&vif_table[vr.vifi]; | 1039 | if (VIF_EXISTS(vr.vifi)) { |
1035 | if(VIF_EXISTS(vr.vifi)) { | 1040 | vr.icount=vif->pkt_in; |
1036 | vr.icount=vif->pkt_in; | 1041 | vr.ocount=vif->pkt_out; |
1037 | vr.ocount=vif->pkt_out; | 1042 | vr.ibytes=vif->bytes_in; |
1038 | vr.ibytes=vif->bytes_in; | 1043 | vr.obytes=vif->bytes_out; |
1039 | vr.obytes=vif->bytes_out; | ||
1040 | read_unlock(&mrt_lock); | ||
1041 | |||
1042 | if (copy_to_user(arg,&vr,sizeof(vr))) | ||
1043 | return -EFAULT; | ||
1044 | return 0; | ||
1045 | } | ||
1046 | read_unlock(&mrt_lock); | 1044 | read_unlock(&mrt_lock); |
1047 | return -EADDRNOTAVAIL; | ||
1048 | case SIOCGETSGCNT: | ||
1049 | if (copy_from_user(&sr,arg,sizeof(sr))) | ||
1050 | return -EFAULT; | ||
1051 | 1045 | ||
1052 | read_lock(&mrt_lock); | 1046 | if (copy_to_user(arg,&vr,sizeof(vr))) |
1053 | c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); | 1047 | return -EFAULT; |
1054 | if (c) { | 1048 | return 0; |
1055 | sr.pktcnt = c->mfc_un.res.pkt; | 1049 | } |
1056 | sr.bytecnt = c->mfc_un.res.bytes; | 1050 | read_unlock(&mrt_lock); |
1057 | sr.wrong_if = c->mfc_un.res.wrong_if; | 1051 | return -EADDRNOTAVAIL; |
1058 | read_unlock(&mrt_lock); | 1052 | case SIOCGETSGCNT: |
1059 | 1053 | if (copy_from_user(&sr,arg,sizeof(sr))) | |
1060 | if (copy_to_user(arg,&sr,sizeof(sr))) | 1054 | return -EFAULT; |
1061 | return -EFAULT; | 1055 | |
1062 | return 0; | 1056 | read_lock(&mrt_lock); |
1063 | } | 1057 | c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); |
1058 | if (c) { | ||
1059 | sr.pktcnt = c->mfc_un.res.pkt; | ||
1060 | sr.bytecnt = c->mfc_un.res.bytes; | ||
1061 | sr.wrong_if = c->mfc_un.res.wrong_if; | ||
1064 | read_unlock(&mrt_lock); | 1062 | read_unlock(&mrt_lock); |
1065 | return -EADDRNOTAVAIL; | 1063 | |
1066 | default: | 1064 | if (copy_to_user(arg,&sr,sizeof(sr))) |
1067 | return -ENOIOCTLCMD; | 1065 | return -EFAULT; |
1066 | return 0; | ||
1067 | } | ||
1068 | read_unlock(&mrt_lock); | ||
1069 | return -EADDRNOTAVAIL; | ||
1070 | default: | ||
1071 | return -ENOIOCTLCMD; | ||
1068 | } | 1072 | } |
1069 | } | 1073 | } |
1070 | 1074 | ||
@@ -1076,7 +1080,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
1076 | if (event != NETDEV_UNREGISTER) | 1080 | if (event != NETDEV_UNREGISTER) |
1077 | return NOTIFY_DONE; | 1081 | return NOTIFY_DONE; |
1078 | v=&vif_table[0]; | 1082 | v=&vif_table[0]; |
1079 | for(ct=0;ct<maxvif;ct++,v++) { | 1083 | for (ct=0;ct<maxvif;ct++,v++) { |
1080 | if (v->dev==ptr) | 1084 | if (v->dev==ptr) |
1081 | vif_delete(ct); | 1085 | vif_delete(ct); |
1082 | } | 1086 | } |
@@ -1096,11 +1100,17 @@ static struct notifier_block ip_mr_notifier={ | |||
1096 | 1100 | ||
1097 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | 1101 | static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) |
1098 | { | 1102 | { |
1099 | struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); | 1103 | struct iphdr *iph; |
1104 | struct iphdr *old_iph = ip_hdr(skb); | ||
1105 | |||
1106 | skb_push(skb, sizeof(struct iphdr)); | ||
1107 | skb->transport_header = skb->network_header; | ||
1108 | skb_reset_network_header(skb); | ||
1109 | iph = ip_hdr(skb); | ||
1100 | 1110 | ||
1101 | iph->version = 4; | 1111 | iph->version = 4; |
1102 | iph->tos = skb->nh.iph->tos; | 1112 | iph->tos = old_iph->tos; |
1103 | iph->ttl = skb->nh.iph->ttl; | 1113 | iph->ttl = old_iph->ttl; |
1104 | iph->frag_off = 0; | 1114 | iph->frag_off = 0; |
1105 | iph->daddr = daddr; | 1115 | iph->daddr = daddr; |
1106 | iph->saddr = saddr; | 1116 | iph->saddr = saddr; |
@@ -1110,8 +1120,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | |||
1110 | ip_select_ident(iph, skb->dst, NULL); | 1120 | ip_select_ident(iph, skb->dst, NULL); |
1111 | ip_send_check(iph); | 1121 | ip_send_check(iph); |
1112 | 1122 | ||
1113 | skb->h.ipiph = skb->nh.iph; | ||
1114 | skb->nh.iph = iph; | ||
1115 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1123 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
1116 | nf_reset(skb); | 1124 | nf_reset(skb); |
1117 | } | 1125 | } |
@@ -1134,7 +1142,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) | |||
1134 | 1142 | ||
1135 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | 1143 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) |
1136 | { | 1144 | { |
1137 | struct iphdr *iph = skb->nh.iph; | 1145 | const struct iphdr *iph = ip_hdr(skb); |
1138 | struct vif_device *vif = &vif_table[vifi]; | 1146 | struct vif_device *vif = &vif_table[vifi]; |
1139 | struct net_device *dev; | 1147 | struct net_device *dev; |
1140 | struct rtable *rt; | 1148 | struct rtable *rt; |
@@ -1200,8 +1208,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1200 | 1208 | ||
1201 | dst_release(skb->dst); | 1209 | dst_release(skb->dst); |
1202 | skb->dst = &rt->u.dst; | 1210 | skb->dst = &rt->u.dst; |
1203 | iph = skb->nh.iph; | 1211 | ip_decrease_ttl(ip_hdr(skb)); |
1204 | ip_decrease_ttl(iph); | ||
1205 | 1212 | ||
1206 | /* FIXME: forward and output firewalls used to be called here. | 1213 | /* FIXME: forward and output firewalls used to be called here. |
1207 | * What do we do with netfilter? -- RR */ | 1214 | * What do we do with netfilter? -- RR */ |
@@ -1301,7 +1308,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1301 | * Forward the frame | 1308 | * Forward the frame |
1302 | */ | 1309 | */ |
1303 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { | 1310 | for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { |
1304 | if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { | 1311 | if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { |
1305 | if (psend != -1) { | 1312 | if (psend != -1) { |
1306 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 1313 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
1307 | if (skb2) | 1314 | if (skb2) |
@@ -1347,7 +1354,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1347 | if (IPCB(skb)->opt.router_alert) { | 1354 | if (IPCB(skb)->opt.router_alert) { |
1348 | if (ip_call_ra_chain(skb)) | 1355 | if (ip_call_ra_chain(skb)) |
1349 | return 0; | 1356 | return 0; |
1350 | } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ | 1357 | } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ |
1351 | /* IGMPv1 (and broken IGMPv2 implementations sort of | 1358 | /* IGMPv1 (and broken IGMPv2 implementations sort of |
1352 | Cisco IOS <= 11.2(8)) do not put router alert | 1359 | Cisco IOS <= 11.2(8)) do not put router alert |
1353 | option to IGMP packets destined to routable | 1360 | option to IGMP packets destined to routable |
@@ -1366,7 +1373,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1366 | } | 1373 | } |
1367 | 1374 | ||
1368 | read_lock(&mrt_lock); | 1375 | read_lock(&mrt_lock); |
1369 | cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); | 1376 | cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); |
1370 | 1377 | ||
1371 | /* | 1378 | /* |
1372 | * No usable cache entry | 1379 | * No usable cache entry |
@@ -1426,14 +1433,15 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1426 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 1433 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) |
1427 | goto drop; | 1434 | goto drop; |
1428 | 1435 | ||
1429 | pim = (struct igmphdr*)skb->h.raw; | 1436 | pim = igmp_hdr(skb); |
1430 | 1437 | ||
1431 | if (!mroute_do_pim || | 1438 | if (!mroute_do_pim || |
1432 | skb->len < sizeof(*pim) + sizeof(*encap) || | 1439 | skb->len < sizeof(*pim) + sizeof(*encap) || |
1433 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1440 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
1434 | goto drop; | 1441 | goto drop; |
1435 | 1442 | ||
1436 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); | 1443 | encap = (struct iphdr *)(skb_transport_header(skb) + |
1444 | sizeof(struct igmphdr)); | ||
1437 | /* | 1445 | /* |
1438 | Check that: | 1446 | Check that: |
1439 | a. packet is really destinted to a multicast group | 1447 | a. packet is really destinted to a multicast group |
@@ -1455,9 +1463,9 @@ int pim_rcv_v1(struct sk_buff * skb) | |||
1455 | if (reg_dev == NULL) | 1463 | if (reg_dev == NULL) |
1456 | goto drop; | 1464 | goto drop; |
1457 | 1465 | ||
1458 | skb->mac.raw = skb->nh.raw; | 1466 | skb->mac_header = skb->network_header; |
1459 | skb_pull(skb, (u8*)encap - skb->data); | 1467 | skb_pull(skb, (u8*)encap - skb->data); |
1460 | skb->nh.iph = (struct iphdr *)skb->data; | 1468 | skb_reset_network_header(skb); |
1461 | skb->dev = reg_dev; | 1469 | skb->dev = reg_dev; |
1462 | skb->protocol = htons(ETH_P_IP); | 1470 | skb->protocol = htons(ETH_P_IP); |
1463 | skb->ip_summed = 0; | 1471 | skb->ip_summed = 0; |
@@ -1486,7 +1494,7 @@ static int pim_rcv(struct sk_buff * skb) | |||
1486 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) | 1494 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) |
1487 | goto drop; | 1495 | goto drop; |
1488 | 1496 | ||
1489 | pim = (struct pimreghdr*)skb->h.raw; | 1497 | pim = (struct pimreghdr *)skb_transport_header(skb); |
1490 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || | 1498 | if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || |
1491 | (pim->flags&PIM_NULL_REGISTER) || | 1499 | (pim->flags&PIM_NULL_REGISTER) || |
1492 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && | 1500 | (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && |
@@ -1494,7 +1502,8 @@ static int pim_rcv(struct sk_buff * skb) | |||
1494 | goto drop; | 1502 | goto drop; |
1495 | 1503 | ||
1496 | /* check if the inner packet is destined to mcast group */ | 1504 | /* check if the inner packet is destined to mcast group */ |
1497 | encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); | 1505 | encap = (struct iphdr *)(skb_transport_header(skb) + |
1506 | sizeof(struct pimreghdr)); | ||
1498 | if (!MULTICAST(encap->daddr) || | 1507 | if (!MULTICAST(encap->daddr) || |
1499 | encap->tot_len == 0 || | 1508 | encap->tot_len == 0 || |
1500 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) | 1509 | ntohs(encap->tot_len) + sizeof(*pim) > skb->len) |
@@ -1510,9 +1519,9 @@ static int pim_rcv(struct sk_buff * skb) | |||
1510 | if (reg_dev == NULL) | 1519 | if (reg_dev == NULL) |
1511 | goto drop; | 1520 | goto drop; |
1512 | 1521 | ||
1513 | skb->mac.raw = skb->nh.raw; | 1522 | skb->mac_header = skb->network_header; |
1514 | skb_pull(skb, (u8*)encap - skb->data); | 1523 | skb_pull(skb, (u8*)encap - skb->data); |
1515 | skb->nh.iph = (struct iphdr *)skb->data; | 1524 | skb_reset_network_header(skb); |
1516 | skb->dev = reg_dev; | 1525 | skb->dev = reg_dev; |
1517 | skb->protocol = htons(ETH_P_IP); | 1526 | skb->protocol = htons(ETH_P_IP); |
1518 | skb->ip_summed = 0; | 1527 | skb->ip_summed = 0; |
@@ -1537,7 +1546,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1537 | int ct; | 1546 | int ct; |
1538 | struct rtnexthop *nhp; | 1547 | struct rtnexthop *nhp; |
1539 | struct net_device *dev = vif_table[c->mfc_parent].dev; | 1548 | struct net_device *dev = vif_table[c->mfc_parent].dev; |
1540 | u8 *b = skb->tail; | 1549 | u8 *b = skb_tail_pointer(skb); |
1541 | struct rtattr *mp_head; | 1550 | struct rtattr *mp_head; |
1542 | 1551 | ||
1543 | if (dev) | 1552 | if (dev) |
@@ -1557,12 +1566,12 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1557 | } | 1566 | } |
1558 | } | 1567 | } |
1559 | mp_head->rta_type = RTA_MULTIPATH; | 1568 | mp_head->rta_type = RTA_MULTIPATH; |
1560 | mp_head->rta_len = skb->tail - (u8*)mp_head; | 1569 | mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; |
1561 | rtm->rtm_type = RTN_MULTICAST; | 1570 | rtm->rtm_type = RTN_MULTICAST; |
1562 | return 1; | 1571 | return 1; |
1563 | 1572 | ||
1564 | rtattr_failure: | 1573 | rtattr_failure: |
1565 | skb_trim(skb, b - skb->data); | 1574 | nlmsg_trim(skb, b); |
1566 | return -EMSGSIZE; | 1575 | return -EMSGSIZE; |
1567 | } | 1576 | } |
1568 | 1577 | ||
@@ -1577,6 +1586,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1577 | 1586 | ||
1578 | if (cache==NULL) { | 1587 | if (cache==NULL) { |
1579 | struct sk_buff *skb2; | 1588 | struct sk_buff *skb2; |
1589 | struct iphdr *iph; | ||
1580 | struct net_device *dev; | 1590 | struct net_device *dev; |
1581 | int vif; | 1591 | int vif; |
1582 | 1592 | ||
@@ -1596,11 +1606,13 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1596 | return -ENOMEM; | 1606 | return -ENOMEM; |
1597 | } | 1607 | } |
1598 | 1608 | ||
1599 | skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr)); | 1609 | skb_push(skb2, sizeof(struct iphdr)); |
1600 | skb2->nh.iph->ihl = sizeof(struct iphdr)>>2; | 1610 | skb_reset_network_header(skb2); |
1601 | skb2->nh.iph->saddr = rt->rt_src; | 1611 | iph = ip_hdr(skb2); |
1602 | skb2->nh.iph->daddr = rt->rt_dst; | 1612 | iph->ihl = sizeof(struct iphdr) >> 2; |
1603 | skb2->nh.iph->version = 0; | 1613 | iph->saddr = rt->rt_src; |
1614 | iph->daddr = rt->rt_dst; | ||
1615 | iph->version = 0; | ||
1604 | err = ipmr_cache_unresolved(vif, skb2); | 1616 | err = ipmr_cache_unresolved(vif, skb2); |
1605 | read_unlock(&mrt_lock); | 1617 | read_unlock(&mrt_lock); |
1606 | return err; | 1618 | return err; |
@@ -1625,7 +1637,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, | |||
1625 | loff_t pos) | 1637 | loff_t pos) |
1626 | { | 1638 | { |
1627 | for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { | 1639 | for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { |
1628 | if(!VIF_EXISTS(iter->ct)) | 1640 | if (!VIF_EXISTS(iter->ct)) |
1629 | continue; | 1641 | continue; |
1630 | if (pos-- == 0) | 1642 | if (pos-- == 0) |
1631 | return &vif_table[iter->ct]; | 1643 | return &vif_table[iter->ct]; |
@@ -1649,7 +1661,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1649 | return ipmr_vif_seq_idx(iter, 0); | 1661 | return ipmr_vif_seq_idx(iter, 0); |
1650 | 1662 | ||
1651 | while (++iter->ct < maxvif) { | 1663 | while (++iter->ct < maxvif) { |
1652 | if(!VIF_EXISTS(iter->ct)) | 1664 | if (!VIF_EXISTS(iter->ct)) |
1653 | continue; | 1665 | continue; |
1654 | return &vif_table[iter->ct]; | 1666 | return &vif_table[iter->ct]; |
1655 | } | 1667 | } |
@@ -1680,7 +1692,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | |||
1680 | return 0; | 1692 | return 0; |
1681 | } | 1693 | } |
1682 | 1694 | ||
1683 | static struct seq_operations ipmr_vif_seq_ops = { | 1695 | static const struct seq_operations ipmr_vif_seq_ops = { |
1684 | .start = ipmr_vif_seq_start, | 1696 | .start = ipmr_vif_seq_start, |
1685 | .next = ipmr_vif_seq_next, | 1697 | .next = ipmr_vif_seq_next, |
1686 | .stop = ipmr_vif_seq_stop, | 1698 | .stop = ipmr_vif_seq_stop, |
@@ -1732,14 +1744,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) | |||
1732 | it->cache = mfc_cache_array; | 1744 | it->cache = mfc_cache_array; |
1733 | read_lock(&mrt_lock); | 1745 | read_lock(&mrt_lock); |
1734 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) | 1746 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) |
1735 | for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) | 1747 | for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) |
1736 | if (pos-- == 0) | 1748 | if (pos-- == 0) |
1737 | return mfc; | 1749 | return mfc; |
1738 | read_unlock(&mrt_lock); | 1750 | read_unlock(&mrt_lock); |
1739 | 1751 | ||
1740 | it->cache = &mfc_unres_queue; | 1752 | it->cache = &mfc_unres_queue; |
1741 | spin_lock_bh(&mfc_unres_lock); | 1753 | spin_lock_bh(&mfc_unres_lock); |
1742 | for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) | 1754 | for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) |
1743 | if (pos-- == 0) | 1755 | if (pos-- == 0) |
1744 | return mfc; | 1756 | return mfc; |
1745 | spin_unlock_bh(&mfc_unres_lock); | 1757 | spin_unlock_bh(&mfc_unres_lock); |
@@ -1829,9 +1841,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
1829 | mfc->mfc_un.res.wrong_if); | 1841 | mfc->mfc_un.res.wrong_if); |
1830 | 1842 | ||
1831 | if (it->cache != &mfc_unres_queue) { | 1843 | if (it->cache != &mfc_unres_queue) { |
1832 | for(n = mfc->mfc_un.res.minvif; | 1844 | for (n = mfc->mfc_un.res.minvif; |
1833 | n < mfc->mfc_un.res.maxvif; n++ ) { | 1845 | n < mfc->mfc_un.res.maxvif; n++ ) { |
1834 | if(VIF_EXISTS(n) | 1846 | if (VIF_EXISTS(n) |
1835 | && mfc->mfc_un.res.ttls[n] < 255) | 1847 | && mfc->mfc_un.res.ttls[n] < 255) |
1836 | seq_printf(seq, | 1848 | seq_printf(seq, |
1837 | " %2d:%-3d", | 1849 | " %2d:%-3d", |
@@ -1843,7 +1855,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
1843 | return 0; | 1855 | return 0; |
1844 | } | 1856 | } |
1845 | 1857 | ||
1846 | static struct seq_operations ipmr_mfc_seq_ops = { | 1858 | static const struct seq_operations ipmr_mfc_seq_ops = { |
1847 | .start = ipmr_mfc_seq_start, | 1859 | .start = ipmr_mfc_seq_start, |
1848 | .next = ipmr_mfc_seq_next, | 1860 | .next = ipmr_mfc_seq_next, |
1849 | .stop = ipmr_mfc_seq_stop, | 1861 | .stop = ipmr_mfc_seq_stop, |
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 22e104c6a493..15ad5dd2d984 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c | |||
@@ -331,14 +331,14 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, | |||
331 | struct ip_vs_app *app) | 331 | struct ip_vs_app *app) |
332 | { | 332 | { |
333 | int diff; | 333 | int diff; |
334 | unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; | 334 | const unsigned int tcp_offset = ip_hdrlen(*pskb); |
335 | struct tcphdr *th; | 335 | struct tcphdr *th; |
336 | __u32 seq; | 336 | __u32 seq; |
337 | 337 | ||
338 | if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) | 338 | if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) |
339 | return 0; | 339 | return 0; |
340 | 340 | ||
341 | th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); | 341 | th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); |
342 | 342 | ||
343 | /* | 343 | /* |
344 | * Remember seq number in case this pkt gets resized | 344 | * Remember seq number in case this pkt gets resized |
@@ -406,14 +406,14 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, | |||
406 | struct ip_vs_app *app) | 406 | struct ip_vs_app *app) |
407 | { | 407 | { |
408 | int diff; | 408 | int diff; |
409 | unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; | 409 | const unsigned int tcp_offset = ip_hdrlen(*pskb); |
410 | struct tcphdr *th; | 410 | struct tcphdr *th; |
411 | __u32 seq; | 411 | __u32 seq; |
412 | 412 | ||
413 | if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) | 413 | if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) |
414 | return 0; | 414 | return 0; |
415 | 415 | ||
416 | th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); | 416 | th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); |
417 | 417 | ||
418 | /* | 418 | /* |
419 | * Remember seq number in case this pkt gets resized | 419 | * Remember seq number in case this pkt gets resized |
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = { | |||
577 | int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, | 577 | int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, |
578 | char *o_buf, int o_len, char *n_buf, int n_len) | 578 | char *o_buf, int o_len, char *n_buf, int n_len) |
579 | { | 579 | { |
580 | struct iphdr *iph; | ||
581 | int diff; | 580 | int diff; |
582 | int o_offset; | 581 | int o_offset; |
583 | int o_left; | 582 | int o_left; |
@@ -603,12 +602,11 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, | |||
603 | skb_put(skb, diff); | 602 | skb_put(skb, diff); |
604 | memmove(skb->data + o_offset + n_len, | 603 | memmove(skb->data + o_offset + n_len, |
605 | skb->data + o_offset + o_len, o_left); | 604 | skb->data + o_offset + o_len, o_left); |
606 | memcpy(skb->data + o_offset, n_buf, n_len); | 605 | skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); |
607 | } | 606 | } |
608 | 607 | ||
609 | /* must update the iph total length here */ | 608 | /* must update the iph total length here */ |
610 | iph = skb->nh.iph; | 609 | ip_hdr(skb)->tot_len = htons(skb->len); |
611 | iph->tot_len = htons(skb->len); | ||
612 | 610 | ||
613 | LeaveFunction(9); | 611 | LeaveFunction(9); |
614 | return 0; | 612 | return 0; |
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 24d7b66eb6d2..f005a2f929f4 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
212 | __be16 ports[2]) | 212 | __be16 ports[2]) |
213 | { | 213 | { |
214 | struct ip_vs_conn *cp = NULL; | 214 | struct ip_vs_conn *cp = NULL; |
215 | struct iphdr *iph = skb->nh.iph; | 215 | struct iphdr *iph = ip_hdr(skb); |
216 | struct ip_vs_dest *dest; | 216 | struct ip_vs_dest *dest; |
217 | struct ip_vs_conn *ct; | 217 | struct ip_vs_conn *ct; |
218 | __be16 dport; /* destination port to forward */ | 218 | __be16 dport; /* destination port to forward */ |
@@ -381,7 +381,7 @@ struct ip_vs_conn * | |||
381 | ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 381 | ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
382 | { | 382 | { |
383 | struct ip_vs_conn *cp = NULL; | 383 | struct ip_vs_conn *cp = NULL; |
384 | struct iphdr *iph = skb->nh.iph; | 384 | struct iphdr *iph = ip_hdr(skb); |
385 | struct ip_vs_dest *dest; | 385 | struct ip_vs_dest *dest; |
386 | __be16 _ports[2], *pptr; | 386 | __be16 _ports[2], *pptr; |
387 | 387 | ||
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
447 | struct ip_vs_protocol *pp) | 447 | struct ip_vs_protocol *pp) |
448 | { | 448 | { |
449 | __be16 _ports[2], *pptr; | 449 | __be16 _ports[2], *pptr; |
450 | struct iphdr *iph = skb->nh.iph; | 450 | struct iphdr *iph = ip_hdr(skb); |
451 | 451 | ||
452 | pptr = skb_header_pointer(skb, iph->ihl*4, | 452 | pptr = skb_header_pointer(skb, iph->ihl*4, |
453 | sizeof(_ports), _ports); | 453 | sizeof(_ports), _ports); |
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
546 | { | 546 | { |
547 | skb = ip_defrag(skb, user); | 547 | skb = ip_defrag(skb, user); |
548 | if (skb) | 548 | if (skb) |
549 | ip_send_check(skb->nh.iph); | 549 | ip_send_check(ip_hdr(skb)); |
550 | return skb; | 550 | return skb; |
551 | } | 551 | } |
552 | 552 | ||
@@ -557,9 +557,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
557 | void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | 557 | void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, |
558 | struct ip_vs_conn *cp, int inout) | 558 | struct ip_vs_conn *cp, int inout) |
559 | { | 559 | { |
560 | struct iphdr *iph = skb->nh.iph; | 560 | struct iphdr *iph = ip_hdr(skb); |
561 | unsigned int icmp_offset = iph->ihl*4; | 561 | unsigned int icmp_offset = iph->ihl*4; |
562 | struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset); | 562 | struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) + |
563 | icmp_offset); | ||
563 | struct iphdr *ciph = (struct iphdr *)(icmph + 1); | 564 | struct iphdr *ciph = (struct iphdr *)(icmph + 1); |
564 | 565 | ||
565 | if (inout) { | 566 | if (inout) { |
@@ -617,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) | |||
617 | *related = 1; | 618 | *related = 1; |
618 | 619 | ||
619 | /* reassemble IP fragments */ | 620 | /* reassemble IP fragments */ |
620 | if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { | 621 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
621 | skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); | 622 | skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); |
622 | if (!skb) | 623 | if (!skb) |
623 | return NF_STOLEN; | 624 | return NF_STOLEN; |
624 | *pskb = skb; | 625 | *pskb = skb; |
625 | } | 626 | } |
626 | 627 | ||
627 | iph = skb->nh.iph; | 628 | iph = ip_hdr(skb); |
628 | offset = ihl = iph->ihl * 4; | 629 | offset = ihl = iph->ihl * 4; |
629 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); | 630 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); |
630 | if (ic == NULL) | 631 | if (ic == NULL) |
@@ -659,7 +660,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) | |||
659 | return NF_ACCEPT; | 660 | return NF_ACCEPT; |
660 | 661 | ||
661 | /* Is the embedded protocol header present? */ | 662 | /* Is the embedded protocol header present? */ |
662 | if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && | 663 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && |
663 | pp->dont_defrag)) | 664 | pp->dont_defrag)) |
664 | return NF_ACCEPT; | 665 | return NF_ACCEPT; |
665 | 666 | ||
@@ -680,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) | |||
680 | } | 681 | } |
681 | 682 | ||
682 | /* Ensure the checksum is correct */ | 683 | /* Ensure the checksum is correct */ |
683 | if (skb->ip_summed != CHECKSUM_UNNECESSARY && | 684 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { |
684 | ip_vs_checksum_complete(skb, ihl)) { | ||
685 | /* Failed checksum! */ | 685 | /* Failed checksum! */ |
686 | IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", | 686 | IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", |
687 | NIPQUAD(iph->saddr)); | 687 | NIPQUAD(iph->saddr)); |
@@ -712,8 +712,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb) | |||
712 | { | 712 | { |
713 | struct tcphdr _tcph, *th; | 713 | struct tcphdr _tcph, *th; |
714 | 714 | ||
715 | th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, | 715 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); |
716 | sizeof(_tcph), &_tcph); | ||
717 | if (th == NULL) | 716 | if (th == NULL) |
718 | return 0; | 717 | return 0; |
719 | return th->rst; | 718 | return th->rst; |
@@ -740,14 +739,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |||
740 | if (skb->ipvs_property) | 739 | if (skb->ipvs_property) |
741 | return NF_ACCEPT; | 740 | return NF_ACCEPT; |
742 | 741 | ||
743 | iph = skb->nh.iph; | 742 | iph = ip_hdr(skb); |
744 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { | 743 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { |
745 | int related, verdict = ip_vs_out_icmp(pskb, &related); | 744 | int related, verdict = ip_vs_out_icmp(pskb, &related); |
746 | 745 | ||
747 | if (related) | 746 | if (related) |
748 | return verdict; | 747 | return verdict; |
749 | skb = *pskb; | 748 | skb = *pskb; |
750 | iph = skb->nh.iph; | 749 | iph = ip_hdr(skb); |
751 | } | 750 | } |
752 | 751 | ||
753 | pp = ip_vs_proto_get(iph->protocol); | 752 | pp = ip_vs_proto_get(iph->protocol); |
@@ -755,12 +754,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |||
755 | return NF_ACCEPT; | 754 | return NF_ACCEPT; |
756 | 755 | ||
757 | /* reassemble IP fragments */ | 756 | /* reassemble IP fragments */ |
758 | if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) && | 757 | if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && |
759 | !pp->dont_defrag)) { | 758 | !pp->dont_defrag)) { |
760 | skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); | 759 | skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); |
761 | if (!skb) | 760 | if (!skb) |
762 | return NF_STOLEN; | 761 | return NF_STOLEN; |
763 | iph = skb->nh.iph; | 762 | iph = ip_hdr(skb); |
764 | *pskb = skb; | 763 | *pskb = skb; |
765 | } | 764 | } |
766 | 765 | ||
@@ -810,8 +809,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |||
810 | if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) | 809 | if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) |
811 | goto drop; | 810 | goto drop; |
812 | skb = *pskb; | 811 | skb = *pskb; |
813 | skb->nh.iph->saddr = cp->vaddr; | 812 | ip_hdr(skb)->saddr = cp->vaddr; |
814 | ip_send_check(skb->nh.iph); | 813 | ip_send_check(ip_hdr(skb)); |
815 | 814 | ||
816 | /* For policy routing, packets originating from this | 815 | /* For policy routing, packets originating from this |
817 | * machine itself may be routed differently to packets | 816 | * machine itself may be routed differently to packets |
@@ -861,7 +860,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) | |||
861 | *related = 1; | 860 | *related = 1; |
862 | 861 | ||
863 | /* reassemble IP fragments */ | 862 | /* reassemble IP fragments */ |
864 | if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { | 863 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
865 | skb = ip_vs_gather_frags(skb, | 864 | skb = ip_vs_gather_frags(skb, |
866 | hooknum == NF_IP_LOCAL_IN ? | 865 | hooknum == NF_IP_LOCAL_IN ? |
867 | IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); | 866 | IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); |
@@ -870,7 +869,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) | |||
870 | *pskb = skb; | 869 | *pskb = skb; |
871 | } | 870 | } |
872 | 871 | ||
873 | iph = skb->nh.iph; | 872 | iph = ip_hdr(skb); |
874 | offset = ihl = iph->ihl * 4; | 873 | offset = ihl = iph->ihl * 4; |
875 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); | 874 | ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); |
876 | if (ic == NULL) | 875 | if (ic == NULL) |
@@ -905,7 +904,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) | |||
905 | return NF_ACCEPT; | 904 | return NF_ACCEPT; |
906 | 905 | ||
907 | /* Is the embedded protocol header present? */ | 906 | /* Is the embedded protocol header present? */ |
908 | if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && | 907 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && |
909 | pp->dont_defrag)) | 908 | pp->dont_defrag)) |
910 | return NF_ACCEPT; | 909 | return NF_ACCEPT; |
911 | 910 | ||
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) | |||
921 | verdict = NF_DROP; | 920 | verdict = NF_DROP; |
922 | 921 | ||
923 | /* Ensure the checksum is correct */ | 922 | /* Ensure the checksum is correct */ |
924 | if (skb->ip_summed != CHECKSUM_UNNECESSARY && | 923 | if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { |
925 | ip_vs_checksum_complete(skb, ihl)) { | ||
926 | /* Failed checksum! */ | 924 | /* Failed checksum! */ |
927 | IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", | 925 | IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", |
928 | NIPQUAD(iph->saddr)); | 926 | NIPQUAD(iph->saddr)); |
@@ -966,19 +964,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, | |||
966 | || skb->dev == &loopback_dev || skb->sk)) { | 964 | || skb->dev == &loopback_dev || skb->sk)) { |
967 | IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", | 965 | IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", |
968 | skb->pkt_type, | 966 | skb->pkt_type, |
969 | skb->nh.iph->protocol, | 967 | ip_hdr(skb)->protocol, |
970 | NIPQUAD(skb->nh.iph->daddr)); | 968 | NIPQUAD(ip_hdr(skb)->daddr)); |
971 | return NF_ACCEPT; | 969 | return NF_ACCEPT; |
972 | } | 970 | } |
973 | 971 | ||
974 | iph = skb->nh.iph; | 972 | iph = ip_hdr(skb); |
975 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { | 973 | if (unlikely(iph->protocol == IPPROTO_ICMP)) { |
976 | int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); | 974 | int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); |
977 | 975 | ||
978 | if (related) | 976 | if (related) |
979 | return verdict; | 977 | return verdict; |
980 | skb = *pskb; | 978 | skb = *pskb; |
981 | iph = skb->nh.iph; | 979 | iph = ip_hdr(skb); |
982 | } | 980 | } |
983 | 981 | ||
984 | /* Protocol supported? */ | 982 | /* Protocol supported? */ |
@@ -1064,7 +1062,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, | |||
1064 | { | 1062 | { |
1065 | int r; | 1063 | int r; |
1066 | 1064 | ||
1067 | if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP) | 1065 | if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) |
1068 | return NF_ACCEPT; | 1066 | return NF_ACCEPT; |
1069 | 1067 | ||
1070 | return ip_vs_in_icmp(pskb, &r, hooknum); | 1068 | return ip_vs_in_icmp(pskb, &r, hooknum); |
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 502111fba872..dcf5d46aaa5e 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c | |||
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
204 | { | 204 | { |
205 | struct ip_vs_dest *dest; | 205 | struct ip_vs_dest *dest; |
206 | struct ip_vs_dh_bucket *tbl; | 206 | struct ip_vs_dh_bucket *tbl; |
207 | struct iphdr *iph = skb->nh.iph; | 207 | struct iphdr *iph = ip_hdr(skb); |
208 | 208 | ||
209 | IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); | 209 | IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); |
210 | 210 | ||
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 847c47af040c..344ddbbdc756 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c | |||
@@ -159,10 +159,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
159 | return 0; | 159 | return 0; |
160 | 160 | ||
161 | if (cp->app_data == &ip_vs_ftp_pasv) { | 161 | if (cp->app_data == &ip_vs_ftp_pasv) { |
162 | iph = (*pskb)->nh.iph; | 162 | iph = ip_hdr(*pskb); |
163 | th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); | 163 | th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); |
164 | data = (char *)th + (th->doff << 2); | 164 | data = (char *)th + (th->doff << 2); |
165 | data_limit = (*pskb)->tail; | 165 | data_limit = skb_tail_pointer(*pskb); |
166 | 166 | ||
167 | if (ip_vs_ftp_get_addrport(data, data_limit, | 167 | if (ip_vs_ftp_get_addrport(data, data_limit, |
168 | SERVER_STRING, | 168 | SERVER_STRING, |
@@ -262,14 +262,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
262 | /* | 262 | /* |
263 | * Detecting whether it is passive | 263 | * Detecting whether it is passive |
264 | */ | 264 | */ |
265 | iph = (*pskb)->nh.iph; | 265 | iph = ip_hdr(*pskb); |
266 | th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); | 266 | th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); |
267 | 267 | ||
268 | /* Since there may be OPTIONS in the TCP packet and the HLEN is | 268 | /* Since there may be OPTIONS in the TCP packet and the HLEN is |
269 | the length of the header in 32-bit multiples, it is accurate | 269 | the length of the header in 32-bit multiples, it is accurate |
270 | to calculate data address by th+HLEN*4 */ | 270 | to calculate data address by th+HLEN*4 */ |
271 | data = data_start = (char *)th + (th->doff << 2); | 271 | data = data_start = (char *)th + (th->doff << 2); |
272 | data_limit = (*pskb)->tail; | 272 | data_limit = skb_tail_pointer(*pskb); |
273 | 273 | ||
274 | while (data <= data_limit - 6) { | 274 | while (data <= data_limit - 6) { |
275 | if (strnicmp(data, "PASV\r\n", 6) == 0) { | 275 | if (strnicmp(data, "PASV\r\n", 6) == 0) { |
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index c801273cb881..052f4ed59174 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c | |||
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
521 | struct ip_vs_dest *dest; | 521 | struct ip_vs_dest *dest; |
522 | struct ip_vs_lblc_table *tbl; | 522 | struct ip_vs_lblc_table *tbl; |
523 | struct ip_vs_lblc_entry *en; | 523 | struct ip_vs_lblc_entry *en; |
524 | struct iphdr *iph = skb->nh.iph; | 524 | struct iphdr *iph = ip_hdr(skb); |
525 | 525 | ||
526 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); | 526 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); |
527 | 527 | ||
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 23f9b9e73c85..6225acac7a3b 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c | |||
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
775 | struct ip_vs_dest *dest; | 775 | struct ip_vs_dest *dest; |
776 | struct ip_vs_lblcr_table *tbl; | 776 | struct ip_vs_lblcr_table *tbl; |
777 | struct ip_vs_lblcr_entry *en; | 777 | struct ip_vs_lblcr_entry *en; |
778 | struct iphdr *iph = skb->nh.iph; | 778 | struct iphdr *iph = ip_hdr(skb); |
779 | 779 | ||
780 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); | 780 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); |
781 | 781 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 8b0505b09317..a842676e1c69 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c | |||
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb, | |||
52 | if (likely(!inverse)) { | 52 | if (likely(!inverse)) { |
53 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | 53 | cp = ip_vs_conn_in_get(IPPROTO_UDP, |
54 | iph->saddr, | 54 | iph->saddr, |
55 | __constant_htons(PORT_ISAKMP), | 55 | htons(PORT_ISAKMP), |
56 | iph->daddr, | 56 | iph->daddr, |
57 | __constant_htons(PORT_ISAKMP)); | 57 | htons(PORT_ISAKMP)); |
58 | } else { | 58 | } else { |
59 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | 59 | cp = ip_vs_conn_in_get(IPPROTO_UDP, |
60 | iph->daddr, | 60 | iph->daddr, |
61 | __constant_htons(PORT_ISAKMP), | 61 | htons(PORT_ISAKMP), |
62 | iph->saddr, | 62 | iph->saddr, |
63 | __constant_htons(PORT_ISAKMP)); | 63 | htons(PORT_ISAKMP)); |
64 | } | 64 | } |
65 | 65 | ||
66 | if (!cp) { | 66 | if (!cp) { |
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
89 | if (likely(!inverse)) { | 89 | if (likely(!inverse)) { |
90 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | 90 | cp = ip_vs_conn_out_get(IPPROTO_UDP, |
91 | iph->saddr, | 91 | iph->saddr, |
92 | __constant_htons(PORT_ISAKMP), | 92 | htons(PORT_ISAKMP), |
93 | iph->daddr, | 93 | iph->daddr, |
94 | __constant_htons(PORT_ISAKMP)); | 94 | htons(PORT_ISAKMP)); |
95 | } else { | 95 | } else { |
96 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | 96 | cp = ip_vs_conn_out_get(IPPROTO_UDP, |
97 | iph->daddr, | 97 | iph->daddr, |
98 | __constant_htons(PORT_ISAKMP), | 98 | htons(PORT_ISAKMP), |
99 | iph->saddr, | 99 | iph->saddr, |
100 | __constant_htons(PORT_ISAKMP)); | 100 | htons(PORT_ISAKMP)); |
101 | } | 101 | } |
102 | 102 | ||
103 | if (!cp) { | 103 | if (!cp) { |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 16a9ebee2fe6..e65577a77006 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -76,16 +76,15 @@ tcp_conn_schedule(struct sk_buff *skb, | |||
76 | struct ip_vs_service *svc; | 76 | struct ip_vs_service *svc; |
77 | struct tcphdr _tcph, *th; | 77 | struct tcphdr _tcph, *th; |
78 | 78 | ||
79 | th = skb_header_pointer(skb, skb->nh.iph->ihl*4, | 79 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); |
80 | sizeof(_tcph), &_tcph); | ||
81 | if (th == NULL) { | 80 | if (th == NULL) { |
82 | *verdict = NF_DROP; | 81 | *verdict = NF_DROP; |
83 | return 0; | 82 | return 0; |
84 | } | 83 | } |
85 | 84 | ||
86 | if (th->syn && | 85 | if (th->syn && |
87 | (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, | 86 | (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, |
88 | skb->nh.iph->daddr, th->dest))) { | 87 | ip_hdr(skb)->daddr, th->dest))) { |
89 | if (ip_vs_todrop()) { | 88 | if (ip_vs_todrop()) { |
90 | /* | 89 | /* |
91 | * It seems that we are very loaded. | 90 | * It seems that we are very loaded. |
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb, | |||
127 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 126 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
128 | { | 127 | { |
129 | struct tcphdr *tcph; | 128 | struct tcphdr *tcph; |
130 | unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; | 129 | const unsigned int tcphoff = ip_hdrlen(*pskb); |
131 | 130 | ||
132 | /* csum_check requires unshared skb */ | 131 | /* csum_check requires unshared skb */ |
133 | if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) | 132 | if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) |
@@ -143,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb, | |||
143 | return 0; | 142 | return 0; |
144 | } | 143 | } |
145 | 144 | ||
146 | tcph = (void *)(*pskb)->nh.iph + tcphoff; | 145 | tcph = (void *)ip_hdr(*pskb) + tcphoff; |
147 | tcph->source = cp->vport; | 146 | tcph->source = cp->vport; |
148 | 147 | ||
149 | /* Adjust TCP checksums */ | 148 | /* Adjust TCP checksums */ |
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb, | |||
175 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 174 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
176 | { | 175 | { |
177 | struct tcphdr *tcph; | 176 | struct tcphdr *tcph; |
178 | unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; | 177 | const unsigned int tcphoff = ip_hdrlen(*pskb); |
179 | 178 | ||
180 | /* csum_check requires unshared skb */ | 179 | /* csum_check requires unshared skb */ |
181 | if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) | 180 | if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) |
@@ -194,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb, | |||
194 | return 0; | 193 | return 0; |
195 | } | 194 | } |
196 | 195 | ||
197 | tcph = (void *)(*pskb)->nh.iph + tcphoff; | 196 | tcph = (void *)ip_hdr(*pskb) + tcphoff; |
198 | tcph->dest = cp->dport; | 197 | tcph->dest = cp->dport; |
199 | 198 | ||
200 | /* | 199 | /* |
@@ -224,15 +223,15 @@ tcp_dnat_handler(struct sk_buff **pskb, | |||
224 | static int | 223 | static int |
225 | tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | 224 | tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) |
226 | { | 225 | { |
227 | unsigned int tcphoff = skb->nh.iph->ihl*4; | 226 | const unsigned int tcphoff = ip_hdrlen(skb); |
228 | 227 | ||
229 | switch (skb->ip_summed) { | 228 | switch (skb->ip_summed) { |
230 | case CHECKSUM_NONE: | 229 | case CHECKSUM_NONE: |
231 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | 230 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); |
232 | case CHECKSUM_COMPLETE: | 231 | case CHECKSUM_COMPLETE: |
233 | if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | 232 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
234 | skb->len - tcphoff, | 233 | skb->len - tcphoff, |
235 | skb->nh.iph->protocol, skb->csum)) { | 234 | ip_hdr(skb)->protocol, skb->csum)) { |
236 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 235 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, |
237 | "Failed checksum for"); | 236 | "Failed checksum for"); |
238 | return 0; | 237 | return 0; |
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, | |||
467 | { | 466 | { |
468 | struct tcphdr _tcph, *th; | 467 | struct tcphdr _tcph, *th; |
469 | 468 | ||
470 | th = skb_header_pointer(skb, skb->nh.iph->ihl*4, | 469 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); |
471 | sizeof(_tcph), &_tcph); | ||
472 | if (th == NULL) | 470 | if (th == NULL) |
473 | return 0; | 471 | return 0; |
474 | 472 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 03f0a414cfa4..8ee5fe6a101d 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/udp.h> | 22 | #include <linux/udp.h> |
23 | 23 | ||
24 | #include <net/ip_vs.h> | 24 | #include <net/ip_vs.h> |
25 | 25 | #include <net/ip.h> | |
26 | 26 | ||
27 | static struct ip_vs_conn * | 27 | static struct ip_vs_conn * |
28 | udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 28 | udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, |
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
56 | struct ip_vs_conn *cp; | 56 | struct ip_vs_conn *cp; |
57 | __be16 _ports[2], *pptr; | 57 | __be16 _ports[2], *pptr; |
58 | 58 | ||
59 | pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, | 59 | pptr = skb_header_pointer(skb, ip_hdrlen(skb), |
60 | sizeof(_ports), _ports); | 60 | sizeof(_ports), _ports); |
61 | if (pptr == NULL) | 61 | if (pptr == NULL) |
62 | return NULL; | 62 | return NULL; |
@@ -82,15 +82,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
82 | struct ip_vs_service *svc; | 82 | struct ip_vs_service *svc; |
83 | struct udphdr _udph, *uh; | 83 | struct udphdr _udph, *uh; |
84 | 84 | ||
85 | uh = skb_header_pointer(skb, skb->nh.iph->ihl*4, | 85 | uh = skb_header_pointer(skb, ip_hdrlen(skb), |
86 | sizeof(_udph), &_udph); | 86 | sizeof(_udph), &_udph); |
87 | if (uh == NULL) { | 87 | if (uh == NULL) { |
88 | *verdict = NF_DROP; | 88 | *verdict = NF_DROP; |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
92 | if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, | 92 | if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, |
93 | skb->nh.iph->daddr, uh->dest))) { | 93 | ip_hdr(skb)->daddr, uh->dest))) { |
94 | if (ip_vs_todrop()) { | 94 | if (ip_vs_todrop()) { |
95 | /* | 95 | /* |
96 | * It seems that we are very loaded. | 96 | * It seems that we are very loaded. |
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb, | |||
133 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 133 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
134 | { | 134 | { |
135 | struct udphdr *udph; | 135 | struct udphdr *udph; |
136 | unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; | 136 | const unsigned int udphoff = ip_hdrlen(*pskb); |
137 | 137 | ||
138 | /* csum_check requires unshared skb */ | 138 | /* csum_check requires unshared skb */ |
139 | if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) | 139 | if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) |
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb, | |||
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | 153 | ||
154 | udph = (void *)(*pskb)->nh.iph + udphoff; | 154 | udph = (void *)ip_hdr(*pskb) + udphoff; |
155 | udph->source = cp->vport; | 155 | udph->source = cp->vport; |
156 | 156 | ||
157 | /* | 157 | /* |
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb, | |||
187 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | 187 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp) |
188 | { | 188 | { |
189 | struct udphdr *udph; | 189 | struct udphdr *udph; |
190 | unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; | 190 | unsigned int udphoff = ip_hdrlen(*pskb); |
191 | 191 | ||
192 | /* csum_check requires unshared skb */ | 192 | /* csum_check requires unshared skb */ |
193 | if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) | 193 | if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) |
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb, | |||
206 | return 0; | 206 | return 0; |
207 | } | 207 | } |
208 | 208 | ||
209 | udph = (void *)(*pskb)->nh.iph + udphoff; | 209 | udph = (void *)ip_hdr(*pskb) + udphoff; |
210 | udph->dest = cp->dport; | 210 | udph->dest = cp->dport; |
211 | 211 | ||
212 | /* | 212 | /* |
@@ -239,7 +239,7 @@ static int | |||
239 | udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | 239 | udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) |
240 | { | 240 | { |
241 | struct udphdr _udph, *uh; | 241 | struct udphdr _udph, *uh; |
242 | unsigned int udphoff = skb->nh.iph->ihl*4; | 242 | const unsigned int udphoff = ip_hdrlen(skb); |
243 | 243 | ||
244 | uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); | 244 | uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); |
245 | if (uh == NULL) | 245 | if (uh == NULL) |
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
251 | skb->csum = skb_checksum(skb, udphoff, | 251 | skb->csum = skb_checksum(skb, udphoff, |
252 | skb->len - udphoff, 0); | 252 | skb->len - udphoff, 0); |
253 | case CHECKSUM_COMPLETE: | 253 | case CHECKSUM_COMPLETE: |
254 | if (csum_tcpudp_magic(skb->nh.iph->saddr, | 254 | if (csum_tcpudp_magic(ip_hdr(skb)->saddr, |
255 | skb->nh.iph->daddr, | 255 | ip_hdr(skb)->daddr, |
256 | skb->len - udphoff, | 256 | skb->len - udphoff, |
257 | skb->nh.iph->protocol, | 257 | ip_hdr(skb)->protocol, |
258 | skb->csum)) { | 258 | skb->csum)) { |
259 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, | 259 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, |
260 | "Failed checksum for"); | 260 | "Failed checksum for"); |
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 338668f88fe2..1b25b00ef1e1 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c | |||
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
201 | { | 201 | { |
202 | struct ip_vs_dest *dest; | 202 | struct ip_vs_dest *dest; |
203 | struct ip_vs_sh_bucket *tbl; | 203 | struct ip_vs_sh_bucket *tbl; |
204 | struct iphdr *iph = skb->nh.iph; | 204 | struct iphdr *iph = ip_hdr(skb); |
205 | 205 | ||
206 | IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); | 206 | IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); |
207 | 207 | ||
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index e1f77bd7c9a5..900ce29db382 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c | |||
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
156 | struct ip_vs_protocol *pp) | 156 | struct ip_vs_protocol *pp) |
157 | { | 157 | { |
158 | struct rtable *rt; /* Route to the other host */ | 158 | struct rtable *rt; /* Route to the other host */ |
159 | struct iphdr *iph = skb->nh.iph; | 159 | struct iphdr *iph = ip_hdr(skb); |
160 | u8 tos = iph->tos; | 160 | u8 tos = iph->tos; |
161 | int mtu; | 161 | int mtu; |
162 | struct flowi fl = { | 162 | struct flowi fl = { |
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
178 | 178 | ||
179 | /* MTU checking */ | 179 | /* MTU checking */ |
180 | mtu = dst_mtu(&rt->u.dst); | 180 | mtu = dst_mtu(&rt->u.dst); |
181 | if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { | 181 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { |
182 | ip_rt_put(rt); | 182 | ip_rt_put(rt); |
183 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 183 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
184 | IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n"); | 184 | IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n"); |
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
193 | ip_rt_put(rt); | 193 | ip_rt_put(rt); |
194 | return NF_STOLEN; | 194 | return NF_STOLEN; |
195 | } | 195 | } |
196 | ip_send_check(skb->nh.iph); | 196 | ip_send_check(ip_hdr(skb)); |
197 | 197 | ||
198 | /* drop old route */ | 198 | /* drop old route */ |
199 | dst_release(skb->dst); | 199 | dst_release(skb->dst); |
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
226 | { | 226 | { |
227 | struct rtable *rt; /* Route to the other host */ | 227 | struct rtable *rt; /* Route to the other host */ |
228 | int mtu; | 228 | int mtu; |
229 | struct iphdr *iph = skb->nh.iph; | 229 | struct iphdr *iph = ip_hdr(skb); |
230 | 230 | ||
231 | EnterFunction(10); | 231 | EnterFunction(10); |
232 | 232 | ||
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
245 | 245 | ||
246 | /* MTU checking */ | 246 | /* MTU checking */ |
247 | mtu = dst_mtu(&rt->u.dst); | 247 | mtu = dst_mtu(&rt->u.dst); |
248 | if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { | 248 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { |
249 | ip_rt_put(rt); | 249 | ip_rt_put(rt); |
250 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 250 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
251 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); | 251 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); |
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
266 | /* mangle the packet */ | 266 | /* mangle the packet */ |
267 | if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) | 267 | if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) |
268 | goto tx_error; | 268 | goto tx_error; |
269 | skb->nh.iph->daddr = cp->daddr; | 269 | ip_hdr(skb)->daddr = cp->daddr; |
270 | ip_send_check(skb->nh.iph); | 270 | ip_send_check(ip_hdr(skb)); |
271 | 271 | ||
272 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | 272 | IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); |
273 | 273 | ||
@@ -320,19 +320,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
320 | { | 320 | { |
321 | struct rtable *rt; /* Route to the other host */ | 321 | struct rtable *rt; /* Route to the other host */ |
322 | struct net_device *tdev; /* Device to other host */ | 322 | struct net_device *tdev; /* Device to other host */ |
323 | struct iphdr *old_iph = skb->nh.iph; | 323 | struct iphdr *old_iph = ip_hdr(skb); |
324 | u8 tos = old_iph->tos; | 324 | u8 tos = old_iph->tos; |
325 | __be16 df = old_iph->frag_off; | 325 | __be16 df = old_iph->frag_off; |
326 | sk_buff_data_t old_transport_header = skb->transport_header; | ||
326 | struct iphdr *iph; /* Our new IP header */ | 327 | struct iphdr *iph; /* Our new IP header */ |
327 | int max_headroom; /* The extra header space needed */ | 328 | int max_headroom; /* The extra header space needed */ |
328 | int mtu; | 329 | int mtu; |
329 | 330 | ||
330 | EnterFunction(10); | 331 | EnterFunction(10); |
331 | 332 | ||
332 | if (skb->protocol != __constant_htons(ETH_P_IP)) { | 333 | if (skb->protocol != htons(ETH_P_IP)) { |
333 | IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, " | 334 | IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, " |
334 | "ETH_P_IP: %d, skb protocol: %d\n", | 335 | "ETH_P_IP: %d, skb protocol: %d\n", |
335 | __constant_htons(ETH_P_IP), skb->protocol); | 336 | htons(ETH_P_IP), skb->protocol); |
336 | goto tx_error; | 337 | goto tx_error; |
337 | } | 338 | } |
338 | 339 | ||
@@ -350,9 +351,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
350 | if (skb->dst) | 351 | if (skb->dst) |
351 | skb->dst->ops->update_pmtu(skb->dst, mtu); | 352 | skb->dst->ops->update_pmtu(skb->dst, mtu); |
352 | 353 | ||
353 | df |= (old_iph->frag_off&__constant_htons(IP_DF)); | 354 | df |= (old_iph->frag_off & htons(IP_DF)); |
354 | 355 | ||
355 | if ((old_iph->frag_off&__constant_htons(IP_DF)) | 356 | if ((old_iph->frag_off & htons(IP_DF)) |
356 | && mtu < ntohs(old_iph->tot_len)) { | 357 | && mtu < ntohs(old_iph->tot_len)) { |
357 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 358 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
358 | ip_rt_put(rt); | 359 | ip_rt_put(rt); |
@@ -377,15 +378,16 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
377 | } | 378 | } |
378 | kfree_skb(skb); | 379 | kfree_skb(skb); |
379 | skb = new_skb; | 380 | skb = new_skb; |
380 | old_iph = skb->nh.iph; | 381 | old_iph = ip_hdr(skb); |
381 | } | 382 | } |
382 | 383 | ||
383 | skb->h.raw = (void *) old_iph; | 384 | skb->transport_header = old_transport_header; |
384 | 385 | ||
385 | /* fix old IP header checksum */ | 386 | /* fix old IP header checksum */ |
386 | ip_send_check(old_iph); | 387 | ip_send_check(old_iph); |
387 | 388 | ||
388 | skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | 389 | skb_push(skb, sizeof(struct iphdr)); |
390 | skb_reset_network_header(skb); | ||
389 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 391 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
390 | 392 | ||
391 | /* drop old route */ | 393 | /* drop old route */ |
@@ -395,7 +397,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
395 | /* | 397 | /* |
396 | * Push down and install the IPIP header. | 398 | * Push down and install the IPIP header. |
397 | */ | 399 | */ |
398 | iph = skb->nh.iph; | 400 | iph = ip_hdr(skb); |
399 | iph->version = 4; | 401 | iph->version = 4; |
400 | iph->ihl = sizeof(struct iphdr)>>2; | 402 | iph->ihl = sizeof(struct iphdr)>>2; |
401 | iph->frag_off = df; | 403 | iph->frag_off = df; |
@@ -435,7 +437,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
435 | struct ip_vs_protocol *pp) | 437 | struct ip_vs_protocol *pp) |
436 | { | 438 | { |
437 | struct rtable *rt; /* Route to the other host */ | 439 | struct rtable *rt; /* Route to the other host */ |
438 | struct iphdr *iph = skb->nh.iph; | 440 | struct iphdr *iph = ip_hdr(skb); |
439 | int mtu; | 441 | int mtu; |
440 | 442 | ||
441 | EnterFunction(10); | 443 | EnterFunction(10); |
@@ -445,7 +447,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
445 | 447 | ||
446 | /* MTU checking */ | 448 | /* MTU checking */ |
447 | mtu = dst_mtu(&rt->u.dst); | 449 | mtu = dst_mtu(&rt->u.dst); |
448 | if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) { | 450 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { |
449 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 451 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
450 | ip_rt_put(rt); | 452 | ip_rt_put(rt); |
451 | IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n"); | 453 | IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n"); |
@@ -460,7 +462,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
460 | ip_rt_put(rt); | 462 | ip_rt_put(rt); |
461 | return NF_STOLEN; | 463 | return NF_STOLEN; |
462 | } | 464 | } |
463 | ip_send_check(skb->nh.iph); | 465 | ip_send_check(ip_hdr(skb)); |
464 | 466 | ||
465 | /* drop old route */ | 467 | /* drop old route */ |
466 | dst_release(skb->dst); | 468 | dst_release(skb->dst); |
@@ -514,12 +516,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
514 | * mangle and send the packet here (only for VS/NAT) | 516 | * mangle and send the packet here (only for VS/NAT) |
515 | */ | 517 | */ |
516 | 518 | ||
517 | if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos)))) | 519 | if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) |
518 | goto tx_error_icmp; | 520 | goto tx_error_icmp; |
519 | 521 | ||
520 | /* MTU checking */ | 522 | /* MTU checking */ |
521 | mtu = dst_mtu(&rt->u.dst); | 523 | mtu = dst_mtu(&rt->u.dst); |
522 | if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) { | 524 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { |
523 | ip_rt_put(rt); | 525 | ip_rt_put(rt); |
524 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 526 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
525 | IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); | 527 | IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); |
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index 574c735836fc..b03c5ca2c823 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c | |||
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this, | |||
100 | 100 | ||
101 | spin_unlock_bh(&state_lock); | 101 | spin_unlock_bh(&state_lock); |
102 | break; | 102 | break; |
103 | }; | 103 | } |
104 | 104 | ||
105 | return NOTIFY_DONE; | 105 | return NOTIFY_DONE; |
106 | } | 106 | } |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6069a11514f6..b44192924f95 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -10,7 +10,7 @@ | |||
10 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | 10 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ |
11 | int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) | 11 | int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) |
12 | { | 12 | { |
13 | struct iphdr *iph = (*pskb)->nh.iph; | 13 | const struct iphdr *iph = ip_hdr(*pskb); |
14 | struct rtable *rt; | 14 | struct rtable *rt; |
15 | struct flowi fl = {}; | 15 | struct flowi fl = {}; |
16 | struct dst_entry *odst; | 16 | struct dst_entry *odst; |
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) | |||
142 | struct ip_rt_info *rt_info = nf_info_reroute(info); | 142 | struct ip_rt_info *rt_info = nf_info_reroute(info); |
143 | 143 | ||
144 | if (info->hook == NF_IP_LOCAL_OUT) { | 144 | if (info->hook == NF_IP_LOCAL_OUT) { |
145 | const struct iphdr *iph = skb->nh.iph; | 145 | const struct iphdr *iph = ip_hdr(skb); |
146 | 146 | ||
147 | rt_info->tos = iph->tos; | 147 | rt_info->tos = iph->tos; |
148 | rt_info->daddr = iph->daddr; | 148 | rt_info->daddr = iph->daddr; |
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) | |||
155 | const struct ip_rt_info *rt_info = nf_info_reroute(info); | 155 | const struct ip_rt_info *rt_info = nf_info_reroute(info); |
156 | 156 | ||
157 | if (info->hook == NF_IP_LOCAL_OUT) { | 157 | if (info->hook == NF_IP_LOCAL_OUT) { |
158 | struct iphdr *iph = (*pskb)->nh.iph; | 158 | const struct iphdr *iph = ip_hdr(*pskb); |
159 | 159 | ||
160 | if (!(iph->tos == rt_info->tos | 160 | if (!(iph->tos == rt_info->tos |
161 | && iph->daddr == rt_info->daddr | 161 | && iph->daddr == rt_info->daddr |
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) | |||
168 | __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, | 168 | __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, |
169 | unsigned int dataoff, u_int8_t protocol) | 169 | unsigned int dataoff, u_int8_t protocol) |
170 | { | 170 | { |
171 | struct iphdr *iph = skb->nh.iph; | 171 | const struct iphdr *iph = ip_hdr(skb); |
172 | __sum16 csum = 0; | 172 | __sum16 csum = 0; |
173 | 173 | ||
174 | switch (skb->ip_summed) { | 174 | switch (skb->ip_summed) { |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 601808c796ec..46509fae9fd8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT | |||
30 | 30 | ||
31 | If unsure, say Y. | 31 | If unsure, say Y. |
32 | 32 | ||
33 | # connection tracking, helpers and protocols | ||
34 | config IP_NF_CT_ACCT | ||
35 | bool "Connection tracking flow accounting" | ||
36 | depends on IP_NF_CONNTRACK | ||
37 | help | ||
38 | If this option is enabled, the connection tracking code will | ||
39 | keep per-flow packet and byte counters. | ||
40 | |||
41 | Those counters can be used for flow-based accounting or the | ||
42 | `connbytes' match. | ||
43 | |||
44 | If unsure, say `N'. | ||
45 | |||
46 | config IP_NF_CONNTRACK_MARK | ||
47 | bool 'Connection mark tracking support' | ||
48 | depends on IP_NF_CONNTRACK | ||
49 | help | ||
50 | This option enables support for connection marks, used by the | ||
51 | `CONNMARK' target and `connmark' match. Similar to the mark value | ||
52 | of packets, but this mark value is kept in the conntrack session | ||
53 | instead of the individual packets. | ||
54 | |||
55 | config IP_NF_CONNTRACK_SECMARK | ||
56 | bool 'Connection tracking security mark support' | ||
57 | depends on IP_NF_CONNTRACK && NETWORK_SECMARK | ||
58 | help | ||
59 | This option enables security markings to be applied to | ||
60 | connections. Typically they are copied to connections from | ||
61 | packets using the CONNSECMARK target and copied back from | ||
62 | connections to packets with the same target, with the packets | ||
63 | being originally labeled via SECMARK. | ||
64 | |||
65 | If unsure, say 'N'. | ||
66 | |||
67 | config IP_NF_CONNTRACK_EVENTS | ||
68 | bool "Connection tracking events (EXPERIMENTAL)" | ||
69 | depends on EXPERIMENTAL && IP_NF_CONNTRACK | ||
70 | help | ||
71 | If this option is enabled, the connection tracking code will | ||
72 | provide a notifier chain that can be used by other kernel code | ||
73 | to get notified about changes in the connection tracking state. | ||
74 | |||
75 | IF unsure, say `N'. | ||
76 | |||
77 | config IP_NF_CONNTRACK_NETLINK | ||
78 | tristate 'Connection tracking netlink interface (EXPERIMENTAL)' | ||
79 | depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
80 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m | ||
81 | depends on IP_NF_NAT=n || IP_NF_NAT | ||
82 | help | ||
83 | This option enables support for a netlink-based userspace interface | ||
84 | |||
85 | |||
86 | config IP_NF_CT_PROTO_SCTP | ||
87 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' | ||
88 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | ||
89 | help | ||
90 | With this option enabled, the connection tracking code will | ||
91 | be able to do state tracking on SCTP connections. | ||
92 | |||
93 | If you want to compile it as a module, say M here and read | ||
94 | <file:Documentation/modules.txt>. If unsure, say `N'. | ||
95 | |||
96 | config IP_NF_FTP | ||
97 | tristate "FTP protocol support" | ||
98 | depends on IP_NF_CONNTRACK | ||
99 | help | ||
100 | Tracking FTP connections is problematic: special helpers are | ||
101 | required for tracking them, and doing masquerading and other forms | ||
102 | of Network Address Translation on them. | ||
103 | |||
104 | To compile it as a module, choose M here. If unsure, say Y. | ||
105 | |||
106 | config IP_NF_IRC | ||
107 | tristate "IRC protocol support" | ||
108 | depends on IP_NF_CONNTRACK | ||
109 | ---help--- | ||
110 | There is a commonly-used extension to IRC called | ||
111 | Direct Client-to-Client Protocol (DCC). This enables users to send | ||
112 | files to each other, and also chat to each other without the need | ||
113 | of a server. DCC Sending is used anywhere you send files over IRC, | ||
114 | and DCC Chat is most commonly used by Eggdrop bots. If you are | ||
115 | using NAT, this extension will enable you to send files and initiate | ||
116 | chats. Note that you do NOT need this extension to get files or | ||
117 | have others initiate chats, or everything else in IRC. | ||
118 | |||
119 | To compile it as a module, choose M here. If unsure, say Y. | ||
120 | |||
121 | config IP_NF_NETBIOS_NS | ||
122 | tristate "NetBIOS name service protocol support (EXPERIMENTAL)" | ||
123 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | ||
124 | help | ||
125 | NetBIOS name service requests are sent as broadcast messages from an | ||
126 | unprivileged port and responded to with unicast messages to the | ||
127 | same port. This make them hard to firewall properly because connection | ||
128 | tracking doesn't deal with broadcasts. This helper tracks locally | ||
129 | originating NetBIOS name service requests and the corresponding | ||
130 | responses. It relies on correct IP address configuration, specifically | ||
131 | netmask and broadcast address. When properly configured, the output | ||
132 | of "ip address show" should look similar to this: | ||
133 | |||
134 | $ ip -4 address show eth0 | ||
135 | 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000 | ||
136 | inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0 | ||
137 | |||
138 | To compile it as a module, choose M here. If unsure, say N. | ||
139 | |||
140 | config IP_NF_TFTP | ||
141 | tristate "TFTP protocol support" | ||
142 | depends on IP_NF_CONNTRACK | ||
143 | help | ||
144 | TFTP connection tracking helper, this is required depending | ||
145 | on how restrictive your ruleset is. | ||
146 | If you are using a tftp client behind -j SNAT or -j MASQUERADING | ||
147 | you will need this. | ||
148 | |||
149 | To compile it as a module, choose M here. If unsure, say Y. | ||
150 | |||
151 | config IP_NF_AMANDA | ||
152 | tristate "Amanda backup protocol support" | ||
153 | depends on IP_NF_CONNTRACK | ||
154 | select TEXTSEARCH | ||
155 | select TEXTSEARCH_KMP | ||
156 | help | ||
157 | If you are running the Amanda backup package <http://www.amanda.org/> | ||
158 | on this machine or machines that will be MASQUERADED through this | ||
159 | machine, then you may want to enable this feature. This allows the | ||
160 | connection tracking and natting code to allow the sub-channels that | ||
161 | Amanda requires for communication of the backup data, messages and | ||
162 | index. | ||
163 | |||
164 | To compile it as a module, choose M here. If unsure, say Y. | ||
165 | |||
166 | config IP_NF_PPTP | ||
167 | tristate 'PPTP protocol support' | ||
168 | depends on IP_NF_CONNTRACK | ||
169 | help | ||
170 | This module adds support for PPTP (Point to Point Tunnelling | ||
171 | Protocol, RFC2637) connection tracking and NAT. | ||
172 | |||
173 | If you are running PPTP sessions over a stateful firewall or NAT | ||
174 | box, you may want to enable this feature. | ||
175 | |||
176 | Please note that not all PPTP modes of operation are supported yet. | ||
177 | For more info, read top of the file | ||
178 | net/ipv4/netfilter/ip_conntrack_pptp.c | ||
179 | |||
180 | If you want to compile it as a module, say M here and read | ||
181 | Documentation/modules.txt. If unsure, say `N'. | ||
182 | |||
183 | config IP_NF_H323 | ||
184 | tristate 'H.323 protocol support (EXPERIMENTAL)' | ||
185 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | ||
186 | help | ||
187 | H.323 is a VoIP signalling protocol from ITU-T. As one of the most | ||
188 | important VoIP protocols, it is widely used by voice hardware and | ||
189 | software including voice gateways, IP phones, Netmeeting, OpenPhone, | ||
190 | Gnomemeeting, etc. | ||
191 | |||
192 | With this module you can support H.323 on a connection tracking/NAT | ||
193 | firewall. | ||
194 | |||
195 | This module supports RAS, Fast Start, H.245 Tunnelling, Call | ||
196 | Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat, | ||
197 | whiteboard, file transfer, etc. For more information, please | ||
198 | visit http://nath323.sourceforge.net/. | ||
199 | |||
200 | If you want to compile it as a module, say 'M' here and read | ||
201 | Documentation/modules.txt. If unsure, say 'N'. | ||
202 | |||
203 | config IP_NF_SIP | ||
204 | tristate "SIP protocol support (EXPERIMENTAL)" | ||
205 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | ||
206 | help | ||
207 | SIP is an application-layer control protocol that can establish, | ||
208 | modify, and terminate multimedia sessions (conferences) such as | ||
209 | Internet telephony calls. With the ip_conntrack_sip and | ||
210 | the ip_nat_sip modules you can support the protocol on a connection | ||
211 | tracking/NATing firewall. | ||
212 | |||
213 | To compile it as a module, choose M here. If unsure, say Y. | ||
214 | |||
215 | config IP_NF_QUEUE | 33 | config IP_NF_QUEUE |
216 | tristate "IP Userspace queueing via NETLINK (OBSOLETE)" | 34 | tristate "IP Userspace queueing via NETLINK (OBSOLETE)" |
217 | help | 35 | help |
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG | |||
361 | 179 | ||
362 | To compile it as a module, choose M here. If unsure, say N. | 180 | To compile it as a module, choose M here. If unsure, say N. |
363 | 181 | ||
364 | # NAT + specific targets: ip_conntrack | ||
365 | config IP_NF_NAT | ||
366 | tristate "Full NAT" | ||
367 | depends on IP_NF_IPTABLES && IP_NF_CONNTRACK | ||
368 | help | ||
369 | The Full NAT option allows masquerading, port forwarding and other | ||
370 | forms of full Network Address Port Translation. It is controlled by | ||
371 | the `nat' table in iptables: see the man page for iptables(8). | ||
372 | |||
373 | To compile it as a module, choose M here. If unsure, say N. | ||
374 | |||
375 | # NAT + specific targets: nf_conntrack | 182 | # NAT + specific targets: nf_conntrack |
376 | config NF_NAT | 183 | config NF_NAT |
377 | tristate "Full NAT" | 184 | tristate "Full NAT" |
@@ -383,11 +190,6 @@ config NF_NAT | |||
383 | 190 | ||
384 | To compile it as a module, choose M here. If unsure, say N. | 191 | To compile it as a module, choose M here. If unsure, say N. |
385 | 192 | ||
386 | config IP_NF_NAT_NEEDED | ||
387 | bool | ||
388 | depends on IP_NF_NAT | ||
389 | default y | ||
390 | |||
391 | config NF_NAT_NEEDED | 193 | config NF_NAT_NEEDED |
392 | bool | 194 | bool |
393 | depends on NF_NAT | 195 | depends on NF_NAT |
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED | |||
395 | 197 | ||
396 | config IP_NF_TARGET_MASQUERADE | 198 | config IP_NF_TARGET_MASQUERADE |
397 | tristate "MASQUERADE target support" | 199 | tristate "MASQUERADE target support" |
398 | depends on (NF_NAT || IP_NF_NAT) | 200 | depends on NF_NAT |
399 | help | 201 | help |
400 | Masquerading is a special case of NAT: all outgoing connections are | 202 | Masquerading is a special case of NAT: all outgoing connections are |
401 | changed to seem to come from a particular interface's address, and | 203 | changed to seem to come from a particular interface's address, and |
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE | |||
407 | 209 | ||
408 | config IP_NF_TARGET_REDIRECT | 210 | config IP_NF_TARGET_REDIRECT |
409 | tristate "REDIRECT target support" | 211 | tristate "REDIRECT target support" |
410 | depends on (NF_NAT || IP_NF_NAT) | 212 | depends on NF_NAT |
411 | help | 213 | help |
412 | REDIRECT is a special case of NAT: all incoming connections are | 214 | REDIRECT is a special case of NAT: all incoming connections are |
413 | mapped onto the incoming interface's address, causing the packets to | 215 | mapped onto the incoming interface's address, causing the packets to |
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT | |||
418 | 220 | ||
419 | config IP_NF_TARGET_NETMAP | 221 | config IP_NF_TARGET_NETMAP |
420 | tristate "NETMAP target support" | 222 | tristate "NETMAP target support" |
421 | depends on (NF_NAT || IP_NF_NAT) | 223 | depends on NF_NAT |
422 | help | 224 | help |
423 | NETMAP is an implementation of static 1:1 NAT mapping of network | 225 | NETMAP is an implementation of static 1:1 NAT mapping of network |
424 | addresses. It maps the network address part, while keeping the host | 226 | addresses. It maps the network address part, while keeping the host |
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP | |||
429 | 231 | ||
430 | config IP_NF_TARGET_SAME | 232 | config IP_NF_TARGET_SAME |
431 | tristate "SAME target support" | 233 | tristate "SAME target support" |
432 | depends on (NF_NAT || IP_NF_NAT) | 234 | depends on NF_NAT |
433 | help | 235 | help |
434 | This option adds a `SAME' target, which works like the standard SNAT | 236 | This option adds a `SAME' target, which works like the standard SNAT |
435 | target, but attempts to give clients the same IP for all connections. | 237 | target, but attempts to give clients the same IP for all connections. |
436 | 238 | ||
437 | To compile it as a module, choose M here. If unsure, say N. | 239 | To compile it as a module, choose M here. If unsure, say N. |
438 | 240 | ||
439 | config IP_NF_NAT_SNMP_BASIC | ||
440 | tristate "Basic SNMP-ALG support (EXPERIMENTAL)" | ||
441 | depends on EXPERIMENTAL && IP_NF_NAT | ||
442 | ---help--- | ||
443 | |||
444 | This module implements an Application Layer Gateway (ALG) for | ||
445 | SNMP payloads. In conjunction with NAT, it allows a network | ||
446 | management system to access multiple private networks with | ||
447 | conflicting addresses. It works by modifying IP addresses | ||
448 | inside SNMP payloads to match IP-layer NAT mapping. | ||
449 | |||
450 | This is the "basic" form of SNMP-ALG, as described in RFC 2962 | ||
451 | |||
452 | To compile it as a module, choose M here. If unsure, say N. | ||
453 | |||
454 | config NF_NAT_SNMP_BASIC | 241 | config NF_NAT_SNMP_BASIC |
455 | tristate "Basic SNMP-ALG support (EXPERIMENTAL)" | 242 | tristate "Basic SNMP-ALG support (EXPERIMENTAL)" |
456 | depends on EXPERIMENTAL && NF_NAT | 243 | depends on EXPERIMENTAL && NF_NAT |
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE | |||
477 | tristate | 264 | tristate |
478 | depends on NF_NAT && NF_CT_PROTO_GRE | 265 | depends on NF_NAT && NF_CT_PROTO_GRE |
479 | 266 | ||
480 | config IP_NF_NAT_FTP | ||
481 | tristate | ||
482 | depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT | ||
483 | default IP_NF_NAT && IP_NF_FTP | ||
484 | |||
485 | config NF_NAT_FTP | 267 | config NF_NAT_FTP |
486 | tristate | 268 | tristate |
487 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 269 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
488 | default NF_NAT && NF_CONNTRACK_FTP | 270 | default NF_NAT && NF_CONNTRACK_FTP |
489 | 271 | ||
490 | config IP_NF_NAT_IRC | ||
491 | tristate | ||
492 | depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n | ||
493 | default IP_NF_NAT if IP_NF_IRC=y | ||
494 | default m if IP_NF_IRC=m | ||
495 | |||
496 | config NF_NAT_IRC | 272 | config NF_NAT_IRC |
497 | tristate | 273 | tristate |
498 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 274 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
499 | default NF_NAT && NF_CONNTRACK_IRC | 275 | default NF_NAT && NF_CONNTRACK_IRC |
500 | 276 | ||
501 | config IP_NF_NAT_TFTP | ||
502 | tristate | ||
503 | depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n | ||
504 | default IP_NF_NAT if IP_NF_TFTP=y | ||
505 | default m if IP_NF_TFTP=m | ||
506 | |||
507 | config NF_NAT_TFTP | 277 | config NF_NAT_TFTP |
508 | tristate | 278 | tristate |
509 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 279 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
510 | default NF_NAT && NF_CONNTRACK_TFTP | 280 | default NF_NAT && NF_CONNTRACK_TFTP |
511 | 281 | ||
512 | config IP_NF_NAT_AMANDA | ||
513 | tristate | ||
514 | depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n | ||
515 | default IP_NF_NAT if IP_NF_AMANDA=y | ||
516 | default m if IP_NF_AMANDA=m | ||
517 | |||
518 | config NF_NAT_AMANDA | 282 | config NF_NAT_AMANDA |
519 | tristate | 283 | tristate |
520 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 284 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
521 | default NF_NAT && NF_CONNTRACK_AMANDA | 285 | default NF_NAT && NF_CONNTRACK_AMANDA |
522 | 286 | ||
523 | config IP_NF_NAT_PPTP | ||
524 | tristate | ||
525 | depends on IP_NF_NAT!=n && IP_NF_PPTP!=n | ||
526 | default IP_NF_NAT if IP_NF_PPTP=y | ||
527 | default m if IP_NF_PPTP=m | ||
528 | |||
529 | config NF_NAT_PPTP | 287 | config NF_NAT_PPTP |
530 | tristate | 288 | tristate |
531 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 289 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
532 | default NF_NAT && NF_CONNTRACK_PPTP | 290 | default NF_NAT && NF_CONNTRACK_PPTP |
533 | select NF_NAT_PROTO_GRE | 291 | select NF_NAT_PROTO_GRE |
534 | 292 | ||
535 | config IP_NF_NAT_H323 | ||
536 | tristate | ||
537 | depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n | ||
538 | default IP_NF_NAT if IP_NF_H323=y | ||
539 | default m if IP_NF_H323=m | ||
540 | |||
541 | config NF_NAT_H323 | 293 | config NF_NAT_H323 |
542 | tristate | 294 | tristate |
543 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 295 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
544 | default NF_NAT && NF_CONNTRACK_H323 | 296 | default NF_NAT && NF_CONNTRACK_H323 |
545 | 297 | ||
546 | config IP_NF_NAT_SIP | ||
547 | tristate | ||
548 | depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n | ||
549 | default IP_NF_NAT if IP_NF_SIP=y | ||
550 | default m if IP_NF_SIP=m | ||
551 | |||
552 | config NF_NAT_SIP | 298 | config NF_NAT_SIP |
553 | tristate | 299 | tristate |
554 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 300 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL | |||
606 | config IP_NF_TARGET_CLUSTERIP | 352 | config IP_NF_TARGET_CLUSTERIP |
607 | tristate "CLUSTERIP target support (EXPERIMENTAL)" | 353 | tristate "CLUSTERIP target support (EXPERIMENTAL)" |
608 | depends on IP_NF_MANGLE && EXPERIMENTAL | 354 | depends on IP_NF_MANGLE && EXPERIMENTAL |
609 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | 355 | depends on NF_CONNTRACK_IPV4 |
610 | select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK | 356 | select NF_CONNTRACK_MARK |
611 | select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4 | ||
612 | help | 357 | help |
613 | The CLUSTERIP target allows you to build load-balancing clusters of | 358 | The CLUSTERIP target allows you to build load-balancing clusters of |
614 | network servers without having a dedicated load-balancing | 359 | network servers without having a dedicated load-balancing |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 6625ec68180c..409d273f6f82 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -2,8 +2,6 @@ | |||
2 | # Makefile for the netfilter modules on top of IPv4. | 2 | # Makefile for the netfilter modules on top of IPv4. |
3 | # | 3 | # |
4 | 4 | ||
5 | # objects for the standalone - connection tracking / NAT | ||
6 | ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o | ||
7 | # objects for l3 independent conntrack | 5 | # objects for l3 independent conntrack |
8 | nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o | 6 | nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o |
9 | ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) | 7 | ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) |
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o | |||
12 | endif | 10 | endif |
13 | endif | 11 | endif |
14 | 12 | ||
15 | ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o | 13 | nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o |
16 | nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o | ||
17 | ifneq ($(CONFIG_NF_NAT),) | ||
18 | iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o | 14 | iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o |
19 | else | ||
20 | iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o | ||
21 | endif | ||
22 | |||
23 | ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o | ||
24 | ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o | ||
25 | |||
26 | ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o | ||
27 | ip_nat_h323-objs := ip_nat_helper_h323.o | ||
28 | 15 | ||
29 | # connection tracking | 16 | # connection tracking |
30 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o | ||
31 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o | 17 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o |
32 | 18 | ||
33 | obj-$(CONFIG_IP_NF_NAT) += ip_nat.o | ||
34 | obj-$(CONFIG_NF_NAT) += nf_nat.o | 19 | obj-$(CONFIG_NF_NAT) += nf_nat.o |
35 | 20 | ||
36 | # conntrack netlink interface | ||
37 | obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o | ||
38 | |||
39 | |||
40 | # SCTP protocol connection tracking | ||
41 | obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o | ||
42 | |||
43 | # connection tracking helpers | ||
44 | obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o | ||
45 | obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o | ||
46 | obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o | ||
47 | obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o | ||
48 | obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o | ||
49 | obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o | ||
50 | obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o | ||
51 | obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o | ||
52 | |||
53 | # NAT helpers (ip_conntrack) | ||
54 | obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o | ||
55 | obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o | ||
56 | obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o | ||
57 | obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o | ||
58 | obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o | ||
59 | obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o | ||
60 | obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o | ||
61 | |||
62 | # NAT helpers (nf_conntrack) | 21 | # NAT helpers (nf_conntrack) |
63 | obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o | 22 | obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o |
64 | obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o | 23 | obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o |
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o | |||
78 | # the three instances of ip_tables | 37 | # the three instances of ip_tables |
79 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o | 38 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o |
80 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o | 39 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o |
81 | obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o | ||
82 | obj-$(CONFIG_NF_NAT) += iptable_nat.o | 40 | obj-$(CONFIG_NF_NAT) += iptable_nat.o |
83 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o | 41 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o |
84 | 42 | ||
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o | |||
100 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o | 58 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o |
101 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o | 59 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o |
102 | obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o | 60 | obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o |
103 | obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o | ||
104 | obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o | 61 | obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o |
105 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o | 62 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o |
106 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o | 63 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 57b0221f9e24..cae41215e3c7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -245,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
245 | e = get_entry(table_base, private->hook_entry[hook]); | 245 | e = get_entry(table_base, private->hook_entry[hook]); |
246 | back = get_entry(table_base, private->underflow[hook]); | 246 | back = get_entry(table_base, private->underflow[hook]); |
247 | 247 | ||
248 | arp = (*pskb)->nh.arph; | 248 | arp = arp_hdr(*pskb); |
249 | do { | 249 | do { |
250 | if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { | 250 | if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { |
251 | struct arpt_entry_target *t; | 251 | struct arpt_entry_target *t; |
@@ -297,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
297 | t->data); | 297 | t->data); |
298 | 298 | ||
299 | /* Target might have changed stuff. */ | 299 | /* Target might have changed stuff. */ |
300 | arp = (*pskb)->nh.arph; | 300 | arp = arp_hdr(*pskb); |
301 | 301 | ||
302 | if (verdict == ARPT_CONTINUE) | 302 | if (verdict == ARPT_CONTINUE) |
303 | e = (void *)e + e->next_offset; | 303 | e = (void *)e + e->next_offset; |
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 709db4d3f48f..6298d404e7c7 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c | |||
@@ -30,35 +30,35 @@ target(struct sk_buff **pskb, | |||
30 | *pskb = nskb; | 30 | *pskb = nskb; |
31 | } | 31 | } |
32 | 32 | ||
33 | arp = (*pskb)->nh.arph; | 33 | arp = arp_hdr(*pskb); |
34 | arpptr = (*pskb)->nh.raw + sizeof(*arp); | 34 | arpptr = skb_network_header(*pskb) + sizeof(*arp); |
35 | pln = arp->ar_pln; | 35 | pln = arp->ar_pln; |
36 | hln = arp->ar_hln; | 36 | hln = arp->ar_hln; |
37 | /* We assume that pln and hln were checked in the match */ | 37 | /* We assume that pln and hln were checked in the match */ |
38 | if (mangle->flags & ARPT_MANGLE_SDEV) { | 38 | if (mangle->flags & ARPT_MANGLE_SDEV) { |
39 | if (ARPT_DEV_ADDR_LEN_MAX < hln || | 39 | if (ARPT_DEV_ADDR_LEN_MAX < hln || |
40 | (arpptr + hln > (**pskb).tail)) | 40 | (arpptr + hln > skb_tail_pointer(*pskb))) |
41 | return NF_DROP; | 41 | return NF_DROP; |
42 | memcpy(arpptr, mangle->src_devaddr, hln); | 42 | memcpy(arpptr, mangle->src_devaddr, hln); |
43 | } | 43 | } |
44 | arpptr += hln; | 44 | arpptr += hln; |
45 | if (mangle->flags & ARPT_MANGLE_SIP) { | 45 | if (mangle->flags & ARPT_MANGLE_SIP) { |
46 | if (ARPT_MANGLE_ADDR_LEN_MAX < pln || | 46 | if (ARPT_MANGLE_ADDR_LEN_MAX < pln || |
47 | (arpptr + pln > (**pskb).tail)) | 47 | (arpptr + pln > skb_tail_pointer(*pskb))) |
48 | return NF_DROP; | 48 | return NF_DROP; |
49 | memcpy(arpptr, &mangle->u_s.src_ip, pln); | 49 | memcpy(arpptr, &mangle->u_s.src_ip, pln); |
50 | } | 50 | } |
51 | arpptr += pln; | 51 | arpptr += pln; |
52 | if (mangle->flags & ARPT_MANGLE_TDEV) { | 52 | if (mangle->flags & ARPT_MANGLE_TDEV) { |
53 | if (ARPT_DEV_ADDR_LEN_MAX < hln || | 53 | if (ARPT_DEV_ADDR_LEN_MAX < hln || |
54 | (arpptr + hln > (**pskb).tail)) | 54 | (arpptr + hln > skb_tail_pointer(*pskb))) |
55 | return NF_DROP; | 55 | return NF_DROP; |
56 | memcpy(arpptr, mangle->tgt_devaddr, hln); | 56 | memcpy(arpptr, mangle->tgt_devaddr, hln); |
57 | } | 57 | } |
58 | arpptr += hln; | 58 | arpptr += hln; |
59 | if (mangle->flags & ARPT_MANGLE_TIP) { | 59 | if (mangle->flags & ARPT_MANGLE_TIP) { |
60 | if (ARPT_MANGLE_ADDR_LEN_MAX < pln || | 60 | if (ARPT_MANGLE_ADDR_LEN_MAX < pln || |
61 | (arpptr + pln > (**pskb).tail)) | 61 | (arpptr + pln > skb_tail_pointer(*pskb))) |
62 | return NF_DROP; | 62 | return NF_DROP; |
63 | memcpy(arpptr, &mangle->u_t.tgt_ip, pln); | 63 | memcpy(arpptr, &mangle->u_t.tgt_ip, pln); |
64 | } | 64 | } |
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c deleted file mode 100644 index 4f561f52c83a..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ /dev/null | |||
@@ -1,229 +0,0 @@ | |||
1 | /* Amanda extension for IP connection tracking, Version 0.2 | ||
2 | * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> | ||
3 | * based on HW's ip_conntrack_irc.c as well as other modules | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * Module load syntax: | ||
11 | * insmod ip_conntrack_amanda.o [master_timeout=n] | ||
12 | * | ||
13 | * Where master_timeout is the timeout (in seconds) of the master | ||
14 | * connection (port 10080). This defaults to 5 minutes but if | ||
15 | * your clients take longer than 5 minutes to do their work | ||
16 | * before getting back to the Amanda server, you can increase | ||
17 | * this value. | ||
18 | * | ||
19 | */ | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/moduleparam.h> | ||
23 | #include <linux/textsearch.h> | ||
24 | #include <linux/skbuff.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/ip.h> | ||
27 | #include <linux/udp.h> | ||
28 | |||
29 | #include <linux/netfilter.h> | ||
30 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
31 | #include <linux/netfilter_ipv4/ip_conntrack_amanda.h> | ||
32 | |||
33 | static unsigned int master_timeout = 300; | ||
34 | static char *ts_algo = "kmp"; | ||
35 | |||
36 | MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); | ||
37 | MODULE_DESCRIPTION("Amanda connection tracking module"); | ||
38 | MODULE_LICENSE("GPL"); | ||
39 | module_param(master_timeout, uint, 0600); | ||
40 | MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); | ||
41 | module_param(ts_algo, charp, 0400); | ||
42 | MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); | ||
43 | |||
44 | unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, | ||
45 | enum ip_conntrack_info ctinfo, | ||
46 | unsigned int matchoff, | ||
47 | unsigned int matchlen, | ||
48 | struct ip_conntrack_expect *exp); | ||
49 | EXPORT_SYMBOL_GPL(ip_nat_amanda_hook); | ||
50 | |||
51 | enum amanda_strings { | ||
52 | SEARCH_CONNECT, | ||
53 | SEARCH_NEWLINE, | ||
54 | SEARCH_DATA, | ||
55 | SEARCH_MESG, | ||
56 | SEARCH_INDEX, | ||
57 | }; | ||
58 | |||
59 | static struct { | ||
60 | char *string; | ||
61 | size_t len; | ||
62 | struct ts_config *ts; | ||
63 | } search[] = { | ||
64 | [SEARCH_CONNECT] = { | ||
65 | .string = "CONNECT ", | ||
66 | .len = 8, | ||
67 | }, | ||
68 | [SEARCH_NEWLINE] = { | ||
69 | .string = "\n", | ||
70 | .len = 1, | ||
71 | }, | ||
72 | [SEARCH_DATA] = { | ||
73 | .string = "DATA ", | ||
74 | .len = 5, | ||
75 | }, | ||
76 | [SEARCH_MESG] = { | ||
77 | .string = "MESG ", | ||
78 | .len = 5, | ||
79 | }, | ||
80 | [SEARCH_INDEX] = { | ||
81 | .string = "INDEX ", | ||
82 | .len = 6, | ||
83 | }, | ||
84 | }; | ||
85 | |||
86 | static int help(struct sk_buff **pskb, | ||
87 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | ||
88 | { | ||
89 | struct ts_state ts; | ||
90 | struct ip_conntrack_expect *exp; | ||
91 | unsigned int dataoff, start, stop, off, i; | ||
92 | char pbuf[sizeof("65535")], *tmp; | ||
93 | u_int16_t port, len; | ||
94 | int ret = NF_ACCEPT; | ||
95 | typeof(ip_nat_amanda_hook) ip_nat_amanda; | ||
96 | |||
97 | /* Only look at packets from the Amanda server */ | ||
98 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) | ||
99 | return NF_ACCEPT; | ||
100 | |||
101 | /* increase the UDP timeout of the master connection as replies from | ||
102 | * Amanda clients to the server can be quite delayed */ | ||
103 | ip_ct_refresh(ct, *pskb, master_timeout * HZ); | ||
104 | |||
105 | /* No data? */ | ||
106 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
107 | if (dataoff >= (*pskb)->len) { | ||
108 | if (net_ratelimit()) | ||
109 | printk("amanda_help: skblen = %u\n", (*pskb)->len); | ||
110 | return NF_ACCEPT; | ||
111 | } | ||
112 | |||
113 | memset(&ts, 0, sizeof(ts)); | ||
114 | start = skb_find_text(*pskb, dataoff, (*pskb)->len, | ||
115 | search[SEARCH_CONNECT].ts, &ts); | ||
116 | if (start == UINT_MAX) | ||
117 | goto out; | ||
118 | start += dataoff + search[SEARCH_CONNECT].len; | ||
119 | |||
120 | memset(&ts, 0, sizeof(ts)); | ||
121 | stop = skb_find_text(*pskb, start, (*pskb)->len, | ||
122 | search[SEARCH_NEWLINE].ts, &ts); | ||
123 | if (stop == UINT_MAX) | ||
124 | goto out; | ||
125 | stop += start; | ||
126 | |||
127 | for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { | ||
128 | memset(&ts, 0, sizeof(ts)); | ||
129 | off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); | ||
130 | if (off == UINT_MAX) | ||
131 | continue; | ||
132 | off += start + search[i].len; | ||
133 | |||
134 | len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); | ||
135 | if (skb_copy_bits(*pskb, off, pbuf, len)) | ||
136 | break; | ||
137 | pbuf[len] = '\0'; | ||
138 | |||
139 | port = simple_strtoul(pbuf, &tmp, 10); | ||
140 | len = tmp - pbuf; | ||
141 | if (port == 0 || len > 5) | ||
142 | break; | ||
143 | |||
144 | exp = ip_conntrack_expect_alloc(ct); | ||
145 | if (exp == NULL) { | ||
146 | ret = NF_DROP; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | exp->expectfn = NULL; | ||
151 | exp->flags = 0; | ||
152 | |||
153 | exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | ||
154 | exp->tuple.src.u.tcp.port = 0; | ||
155 | exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | ||
156 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
157 | exp->tuple.dst.u.tcp.port = htons(port); | ||
158 | |||
159 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
160 | exp->mask.src.u.tcp.port = 0; | ||
161 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
162 | exp->mask.dst.protonum = 0xFF; | ||
163 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
164 | |||
165 | /* RCU read locked by nf_hook_slow */ | ||
166 | ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook); | ||
167 | if (ip_nat_amanda) | ||
168 | ret = ip_nat_amanda(pskb, ctinfo, off - dataoff, | ||
169 | len, exp); | ||
170 | else if (ip_conntrack_expect_related(exp) != 0) | ||
171 | ret = NF_DROP; | ||
172 | ip_conntrack_expect_put(exp); | ||
173 | } | ||
174 | |||
175 | out: | ||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | static struct ip_conntrack_helper amanda_helper = { | ||
180 | .max_expected = 3, | ||
181 | .timeout = 180, | ||
182 | .me = THIS_MODULE, | ||
183 | .help = help, | ||
184 | .name = "amanda", | ||
185 | |||
186 | .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } }, | ||
187 | .dst = { .protonum = IPPROTO_UDP }, | ||
188 | }, | ||
189 | .mask = { .src = { .u = { 0xFFFF } }, | ||
190 | .dst = { .protonum = 0xFF }, | ||
191 | }, | ||
192 | }; | ||
193 | |||
194 | static void __exit ip_conntrack_amanda_fini(void) | ||
195 | { | ||
196 | int i; | ||
197 | |||
198 | ip_conntrack_helper_unregister(&amanda_helper); | ||
199 | for (i = 0; i < ARRAY_SIZE(search); i++) | ||
200 | textsearch_destroy(search[i].ts); | ||
201 | } | ||
202 | |||
203 | static int __init ip_conntrack_amanda_init(void) | ||
204 | { | ||
205 | int ret, i; | ||
206 | |||
207 | ret = -ENOMEM; | ||
208 | for (i = 0; i < ARRAY_SIZE(search); i++) { | ||
209 | search[i].ts = textsearch_prepare(ts_algo, search[i].string, | ||
210 | search[i].len, | ||
211 | GFP_KERNEL, TS_AUTOLOAD); | ||
212 | if (search[i].ts == NULL) | ||
213 | goto err; | ||
214 | } | ||
215 | ret = ip_conntrack_helper_register(&amanda_helper); | ||
216 | if (ret < 0) | ||
217 | goto err; | ||
218 | return 0; | ||
219 | |||
220 | err: | ||
221 | for (; i >= 0; i--) { | ||
222 | if (search[i].ts) | ||
223 | textsearch_destroy(search[i].ts); | ||
224 | } | ||
225 | return ret; | ||
226 | } | ||
227 | |||
228 | module_init(ip_conntrack_amanda_init); | ||
229 | module_exit(ip_conntrack_amanda_fini); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c deleted file mode 100644 index 23b99ae2cc37..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ /dev/null | |||
@@ -1,1550 +0,0 @@ | |||
1 | /* Connection state tracking for netfilter. This is separated from, | ||
2 | but required by, the NAT layer; it can also be used by an iptables | ||
3 | extension. */ | ||
4 | |||
5 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
6 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> | ||
13 | * - new API and handling of conntrack/nat helpers | ||
14 | * - now capable of multiple expectations for one master | ||
15 | * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> | ||
16 | * - add usage/reference counts to ip_conntrack_expect | ||
17 | * - export ip_conntrack[_expect]_{find_get,put} functions | ||
18 | * */ | ||
19 | |||
20 | #include <linux/types.h> | ||
21 | #include <linux/icmp.h> | ||
22 | #include <linux/ip.h> | ||
23 | #include <linux/netfilter.h> | ||
24 | #include <linux/netfilter_ipv4.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/skbuff.h> | ||
27 | #include <linux/proc_fs.h> | ||
28 | #include <linux/vmalloc.h> | ||
29 | #include <net/checksum.h> | ||
30 | #include <net/ip.h> | ||
31 | #include <linux/stddef.h> | ||
32 | #include <linux/sysctl.h> | ||
33 | #include <linux/slab.h> | ||
34 | #include <linux/random.h> | ||
35 | #include <linux/jhash.h> | ||
36 | #include <linux/err.h> | ||
37 | #include <linux/percpu.h> | ||
38 | #include <linux/moduleparam.h> | ||
39 | #include <linux/notifier.h> | ||
40 | |||
41 | /* ip_conntrack_lock protects the main hash table, protocol/helper/expected | ||
42 | registrations, conntrack timers*/ | ||
43 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
44 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
45 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
46 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
47 | |||
48 | #define IP_CONNTRACK_VERSION "2.4" | ||
49 | |||
50 | #if 0 | ||
51 | #define DEBUGP printk | ||
52 | #else | ||
53 | #define DEBUGP(format, args...) | ||
54 | #endif | ||
55 | |||
56 | DEFINE_RWLOCK(ip_conntrack_lock); | ||
57 | |||
58 | /* ip_conntrack_standalone needs this */ | ||
59 | atomic_t ip_conntrack_count = ATOMIC_INIT(0); | ||
60 | |||
61 | void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; | ||
62 | LIST_HEAD(ip_conntrack_expect_list); | ||
63 | struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; | ||
64 | static LIST_HEAD(helpers); | ||
65 | unsigned int ip_conntrack_htable_size __read_mostly = 0; | ||
66 | int ip_conntrack_max __read_mostly; | ||
67 | struct list_head *ip_conntrack_hash __read_mostly; | ||
68 | static struct kmem_cache *ip_conntrack_cachep __read_mostly; | ||
69 | static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly; | ||
70 | struct ip_conntrack ip_conntrack_untracked; | ||
71 | unsigned int ip_ct_log_invalid __read_mostly; | ||
72 | static LIST_HEAD(unconfirmed); | ||
73 | static int ip_conntrack_vmalloc __read_mostly; | ||
74 | |||
75 | static unsigned int ip_conntrack_next_id; | ||
76 | static unsigned int ip_conntrack_expect_next_id; | ||
77 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
78 | ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain); | ||
79 | ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain); | ||
80 | |||
81 | DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); | ||
82 | |||
83 | /* deliver cached events and clear cache entry - must be called with locally | ||
84 | * disabled softirqs */ | ||
85 | static inline void | ||
86 | __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) | ||
87 | { | ||
88 | DEBUGP("ecache: delivering events for %p\n", ecache->ct); | ||
89 | if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) | ||
90 | atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events, | ||
91 | ecache->ct); | ||
92 | ecache->events = 0; | ||
93 | ip_conntrack_put(ecache->ct); | ||
94 | ecache->ct = NULL; | ||
95 | } | ||
96 | |||
97 | /* Deliver all cached events for a particular conntrack. This is called | ||
98 | * by code prior to async packet handling or freeing the skb */ | ||
99 | void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) | ||
100 | { | ||
101 | struct ip_conntrack_ecache *ecache; | ||
102 | |||
103 | local_bh_disable(); | ||
104 | ecache = &__get_cpu_var(ip_conntrack_ecache); | ||
105 | if (ecache->ct == ct) | ||
106 | __ip_ct_deliver_cached_events(ecache); | ||
107 | local_bh_enable(); | ||
108 | } | ||
109 | |||
110 | void __ip_ct_event_cache_init(struct ip_conntrack *ct) | ||
111 | { | ||
112 | struct ip_conntrack_ecache *ecache; | ||
113 | |||
114 | /* take care of delivering potentially old events */ | ||
115 | ecache = &__get_cpu_var(ip_conntrack_ecache); | ||
116 | BUG_ON(ecache->ct == ct); | ||
117 | if (ecache->ct) | ||
118 | __ip_ct_deliver_cached_events(ecache); | ||
119 | /* initialize for this conntrack/packet */ | ||
120 | ecache->ct = ct; | ||
121 | nf_conntrack_get(&ct->ct_general); | ||
122 | } | ||
123 | |||
124 | /* flush the event cache - touches other CPU's data and must not be called while | ||
125 | * packets are still passing through the code */ | ||
126 | static void ip_ct_event_cache_flush(void) | ||
127 | { | ||
128 | struct ip_conntrack_ecache *ecache; | ||
129 | int cpu; | ||
130 | |||
131 | for_each_possible_cpu(cpu) { | ||
132 | ecache = &per_cpu(ip_conntrack_ecache, cpu); | ||
133 | if (ecache->ct) | ||
134 | ip_conntrack_put(ecache->ct); | ||
135 | } | ||
136 | } | ||
137 | #else | ||
138 | static inline void ip_ct_event_cache_flush(void) {} | ||
139 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
140 | |||
141 | DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); | ||
142 | |||
143 | static int ip_conntrack_hash_rnd_initted; | ||
144 | static unsigned int ip_conntrack_hash_rnd; | ||
145 | |||
146 | static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, | ||
147 | unsigned int size, unsigned int rnd) | ||
148 | { | ||
149 | return (jhash_3words((__force u32)tuple->src.ip, | ||
150 | ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), | ||
151 | (tuple->src.u.all | (tuple->dst.u.all << 16)), | ||
152 | rnd) % size); | ||
153 | } | ||
154 | |||
155 | static u_int32_t | ||
156 | hash_conntrack(const struct ip_conntrack_tuple *tuple) | ||
157 | { | ||
158 | return __hash_conntrack(tuple, ip_conntrack_htable_size, | ||
159 | ip_conntrack_hash_rnd); | ||
160 | } | ||
161 | |||
162 | int | ||
163 | ip_ct_get_tuple(const struct iphdr *iph, | ||
164 | const struct sk_buff *skb, | ||
165 | unsigned int dataoff, | ||
166 | struct ip_conntrack_tuple *tuple, | ||
167 | const struct ip_conntrack_protocol *protocol) | ||
168 | { | ||
169 | /* Never happen */ | ||
170 | if (iph->frag_off & htons(IP_OFFSET)) { | ||
171 | printk("ip_conntrack_core: Frag of proto %u.\n", | ||
172 | iph->protocol); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | tuple->src.ip = iph->saddr; | ||
177 | tuple->dst.ip = iph->daddr; | ||
178 | tuple->dst.protonum = iph->protocol; | ||
179 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | ||
180 | |||
181 | return protocol->pkt_to_tuple(skb, dataoff, tuple); | ||
182 | } | ||
183 | |||
184 | int | ||
185 | ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, | ||
186 | const struct ip_conntrack_tuple *orig, | ||
187 | const struct ip_conntrack_protocol *protocol) | ||
188 | { | ||
189 | inverse->src.ip = orig->dst.ip; | ||
190 | inverse->dst.ip = orig->src.ip; | ||
191 | inverse->dst.protonum = orig->dst.protonum; | ||
192 | inverse->dst.dir = !orig->dst.dir; | ||
193 | |||
194 | return protocol->invert_tuple(inverse, orig); | ||
195 | } | ||
196 | |||
197 | |||
198 | /* ip_conntrack_expect helper functions */ | ||
199 | void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) | ||
200 | { | ||
201 | IP_NF_ASSERT(!timer_pending(&exp->timeout)); | ||
202 | list_del(&exp->list); | ||
203 | CONNTRACK_STAT_INC(expect_delete); | ||
204 | exp->master->expecting--; | ||
205 | ip_conntrack_expect_put(exp); | ||
206 | } | ||
207 | |||
208 | static void expectation_timed_out(unsigned long ul_expect) | ||
209 | { | ||
210 | struct ip_conntrack_expect *exp = (void *)ul_expect; | ||
211 | |||
212 | write_lock_bh(&ip_conntrack_lock); | ||
213 | ip_ct_unlink_expect(exp); | ||
214 | write_unlock_bh(&ip_conntrack_lock); | ||
215 | ip_conntrack_expect_put(exp); | ||
216 | } | ||
217 | |||
218 | struct ip_conntrack_expect * | ||
219 | __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) | ||
220 | { | ||
221 | struct ip_conntrack_expect *i; | ||
222 | |||
223 | list_for_each_entry(i, &ip_conntrack_expect_list, list) { | ||
224 | if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) | ||
225 | return i; | ||
226 | } | ||
227 | return NULL; | ||
228 | } | ||
229 | |||
230 | /* Just find a expectation corresponding to a tuple. */ | ||
231 | struct ip_conntrack_expect * | ||
232 | ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) | ||
233 | { | ||
234 | struct ip_conntrack_expect *i; | ||
235 | |||
236 | read_lock_bh(&ip_conntrack_lock); | ||
237 | i = __ip_conntrack_expect_find(tuple); | ||
238 | if (i) | ||
239 | atomic_inc(&i->use); | ||
240 | read_unlock_bh(&ip_conntrack_lock); | ||
241 | |||
242 | return i; | ||
243 | } | ||
244 | |||
245 | /* If an expectation for this connection is found, it gets delete from | ||
246 | * global list then returned. */ | ||
247 | static struct ip_conntrack_expect * | ||
248 | find_expectation(const struct ip_conntrack_tuple *tuple) | ||
249 | { | ||
250 | struct ip_conntrack_expect *i; | ||
251 | |||
252 | list_for_each_entry(i, &ip_conntrack_expect_list, list) { | ||
253 | /* If master is not in hash table yet (ie. packet hasn't left | ||
254 | this machine yet), how can other end know about expected? | ||
255 | Hence these are not the droids you are looking for (if | ||
256 | master ct never got confirmed, we'd hold a reference to it | ||
257 | and weird things would happen to future packets). */ | ||
258 | if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) | ||
259 | && is_confirmed(i->master)) { | ||
260 | if (i->flags & IP_CT_EXPECT_PERMANENT) { | ||
261 | atomic_inc(&i->use); | ||
262 | return i; | ||
263 | } else if (del_timer(&i->timeout)) { | ||
264 | ip_ct_unlink_expect(i); | ||
265 | return i; | ||
266 | } | ||
267 | } | ||
268 | } | ||
269 | return NULL; | ||
270 | } | ||
271 | |||
272 | /* delete all expectations for this conntrack */ | ||
273 | void ip_ct_remove_expectations(struct ip_conntrack *ct) | ||
274 | { | ||
275 | struct ip_conntrack_expect *i, *tmp; | ||
276 | |||
277 | /* Optimization: most connection never expect any others. */ | ||
278 | if (ct->expecting == 0) | ||
279 | return; | ||
280 | |||
281 | list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) { | ||
282 | if (i->master == ct && del_timer(&i->timeout)) { | ||
283 | ip_ct_unlink_expect(i); | ||
284 | ip_conntrack_expect_put(i); | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | |||
289 | static void | ||
290 | clean_from_lists(struct ip_conntrack *ct) | ||
291 | { | ||
292 | DEBUGP("clean_from_lists(%p)\n", ct); | ||
293 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
294 | list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); | ||
295 | |||
296 | /* Destroy all pending expectations */ | ||
297 | ip_ct_remove_expectations(ct); | ||
298 | } | ||
299 | |||
300 | static void | ||
301 | destroy_conntrack(struct nf_conntrack *nfct) | ||
302 | { | ||
303 | struct ip_conntrack *ct = (struct ip_conntrack *)nfct; | ||
304 | struct ip_conntrack_protocol *proto; | ||
305 | struct ip_conntrack_helper *helper; | ||
306 | typeof(ip_conntrack_destroyed) destroyed; | ||
307 | |||
308 | DEBUGP("destroy_conntrack(%p)\n", ct); | ||
309 | IP_NF_ASSERT(atomic_read(&nfct->use) == 0); | ||
310 | IP_NF_ASSERT(!timer_pending(&ct->timeout)); | ||
311 | |||
312 | ip_conntrack_event(IPCT_DESTROY, ct); | ||
313 | set_bit(IPS_DYING_BIT, &ct->status); | ||
314 | |||
315 | helper = ct->helper; | ||
316 | if (helper && helper->destroy) | ||
317 | helper->destroy(ct); | ||
318 | |||
319 | /* To make sure we don't get any weird locking issues here: | ||
320 | * destroy_conntrack() MUST NOT be called with a write lock | ||
321 | * to ip_conntrack_lock!!! -HW */ | ||
322 | rcu_read_lock(); | ||
323 | proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | ||
324 | if (proto && proto->destroy) | ||
325 | proto->destroy(ct); | ||
326 | |||
327 | destroyed = rcu_dereference(ip_conntrack_destroyed); | ||
328 | if (destroyed) | ||
329 | destroyed(ct); | ||
330 | |||
331 | rcu_read_unlock(); | ||
332 | |||
333 | write_lock_bh(&ip_conntrack_lock); | ||
334 | /* Expectations will have been removed in clean_from_lists, | ||
335 | * except TFTP can create an expectation on the first packet, | ||
336 | * before connection is in the list, so we need to clean here, | ||
337 | * too. */ | ||
338 | ip_ct_remove_expectations(ct); | ||
339 | |||
340 | /* We overload first tuple to link into unconfirmed list. */ | ||
341 | if (!is_confirmed(ct)) { | ||
342 | BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); | ||
343 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
344 | } | ||
345 | |||
346 | CONNTRACK_STAT_INC(delete); | ||
347 | write_unlock_bh(&ip_conntrack_lock); | ||
348 | |||
349 | if (ct->master) | ||
350 | ip_conntrack_put(ct->master); | ||
351 | |||
352 | DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); | ||
353 | ip_conntrack_free(ct); | ||
354 | } | ||
355 | |||
356 | static void death_by_timeout(unsigned long ul_conntrack) | ||
357 | { | ||
358 | struct ip_conntrack *ct = (void *)ul_conntrack; | ||
359 | |||
360 | write_lock_bh(&ip_conntrack_lock); | ||
361 | /* Inside lock so preempt is disabled on module removal path. | ||
362 | * Otherwise we can get spurious warnings. */ | ||
363 | CONNTRACK_STAT_INC(delete_list); | ||
364 | clean_from_lists(ct); | ||
365 | write_unlock_bh(&ip_conntrack_lock); | ||
366 | ip_conntrack_put(ct); | ||
367 | } | ||
368 | |||
369 | struct ip_conntrack_tuple_hash * | ||
370 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, | ||
371 | const struct ip_conntrack *ignored_conntrack) | ||
372 | { | ||
373 | struct ip_conntrack_tuple_hash *h; | ||
374 | unsigned int hash = hash_conntrack(tuple); | ||
375 | |||
376 | list_for_each_entry(h, &ip_conntrack_hash[hash], list) { | ||
377 | if (tuplehash_to_ctrack(h) != ignored_conntrack && | ||
378 | ip_ct_tuple_equal(tuple, &h->tuple)) { | ||
379 | CONNTRACK_STAT_INC(found); | ||
380 | return h; | ||
381 | } | ||
382 | CONNTRACK_STAT_INC(searched); | ||
383 | } | ||
384 | |||
385 | return NULL; | ||
386 | } | ||
387 | |||
388 | /* Find a connection corresponding to a tuple. */ | ||
389 | struct ip_conntrack_tuple_hash * | ||
390 | ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, | ||
391 | const struct ip_conntrack *ignored_conntrack) | ||
392 | { | ||
393 | struct ip_conntrack_tuple_hash *h; | ||
394 | |||
395 | read_lock_bh(&ip_conntrack_lock); | ||
396 | h = __ip_conntrack_find(tuple, ignored_conntrack); | ||
397 | if (h) | ||
398 | atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); | ||
399 | read_unlock_bh(&ip_conntrack_lock); | ||
400 | |||
401 | return h; | ||
402 | } | ||
403 | |||
404 | static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, | ||
405 | unsigned int hash, | ||
406 | unsigned int repl_hash) | ||
407 | { | ||
408 | ct->id = ++ip_conntrack_next_id; | ||
409 | list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, | ||
410 | &ip_conntrack_hash[hash]); | ||
411 | list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, | ||
412 | &ip_conntrack_hash[repl_hash]); | ||
413 | } | ||
414 | |||
415 | void ip_conntrack_hash_insert(struct ip_conntrack *ct) | ||
416 | { | ||
417 | unsigned int hash, repl_hash; | ||
418 | |||
419 | hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
420 | repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
421 | |||
422 | write_lock_bh(&ip_conntrack_lock); | ||
423 | __ip_conntrack_hash_insert(ct, hash, repl_hash); | ||
424 | write_unlock_bh(&ip_conntrack_lock); | ||
425 | } | ||
426 | |||
427 | /* Confirm a connection given skb; places it in hash table */ | ||
428 | int | ||
429 | __ip_conntrack_confirm(struct sk_buff **pskb) | ||
430 | { | ||
431 | unsigned int hash, repl_hash; | ||
432 | struct ip_conntrack_tuple_hash *h; | ||
433 | struct ip_conntrack *ct; | ||
434 | enum ip_conntrack_info ctinfo; | ||
435 | |||
436 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
437 | |||
438 | /* ipt_REJECT uses ip_conntrack_attach to attach related | ||
439 | ICMP/TCP RST packets in other direction. Actual packet | ||
440 | which created connection will be IP_CT_NEW or for an | ||
441 | expected connection, IP_CT_RELATED. */ | ||
442 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
443 | return NF_ACCEPT; | ||
444 | |||
445 | hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
446 | repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
447 | |||
448 | /* We're not in hash table, and we refuse to set up related | ||
449 | connections for unconfirmed conns. But packet copies and | ||
450 | REJECT will give spurious warnings here. */ | ||
451 | /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ | ||
452 | |||
453 | /* No external references means noone else could have | ||
454 | confirmed us. */ | ||
455 | IP_NF_ASSERT(!is_confirmed(ct)); | ||
456 | DEBUGP("Confirming conntrack %p\n", ct); | ||
457 | |||
458 | write_lock_bh(&ip_conntrack_lock); | ||
459 | |||
460 | /* See if there's one in the list already, including reverse: | ||
461 | NAT could have grabbed it without realizing, since we're | ||
462 | not in the hash. If there is, we lost race. */ | ||
463 | list_for_each_entry(h, &ip_conntrack_hash[hash], list) | ||
464 | if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
465 | &h->tuple)) | ||
466 | goto out; | ||
467 | list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) | ||
468 | if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, | ||
469 | &h->tuple)) | ||
470 | goto out; | ||
471 | |||
472 | /* Remove from unconfirmed list */ | ||
473 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
474 | |||
475 | __ip_conntrack_hash_insert(ct, hash, repl_hash); | ||
476 | /* Timer relative to confirmation time, not original | ||
477 | setting time, otherwise we'd get timer wrap in | ||
478 | weird delay cases. */ | ||
479 | ct->timeout.expires += jiffies; | ||
480 | add_timer(&ct->timeout); | ||
481 | atomic_inc(&ct->ct_general.use); | ||
482 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | ||
483 | CONNTRACK_STAT_INC(insert); | ||
484 | write_unlock_bh(&ip_conntrack_lock); | ||
485 | if (ct->helper) | ||
486 | ip_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
487 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
488 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | ||
489 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | ||
490 | ip_conntrack_event_cache(IPCT_NATINFO, *pskb); | ||
491 | #endif | ||
492 | ip_conntrack_event_cache(master_ct(ct) ? | ||
493 | IPCT_RELATED : IPCT_NEW, *pskb); | ||
494 | |||
495 | return NF_ACCEPT; | ||
496 | |||
497 | out: | ||
498 | CONNTRACK_STAT_INC(insert_failed); | ||
499 | write_unlock_bh(&ip_conntrack_lock); | ||
500 | return NF_DROP; | ||
501 | } | ||
502 | |||
503 | /* Returns true if a connection correspondings to the tuple (required | ||
504 | for NAT). */ | ||
505 | int | ||
506 | ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, | ||
507 | const struct ip_conntrack *ignored_conntrack) | ||
508 | { | ||
509 | struct ip_conntrack_tuple_hash *h; | ||
510 | |||
511 | read_lock_bh(&ip_conntrack_lock); | ||
512 | h = __ip_conntrack_find(tuple, ignored_conntrack); | ||
513 | read_unlock_bh(&ip_conntrack_lock); | ||
514 | |||
515 | return h != NULL; | ||
516 | } | ||
517 | |||
518 | /* There's a small race here where we may free a just-assured | ||
519 | connection. Too bad: we're in trouble anyway. */ | ||
520 | static int early_drop(struct list_head *chain) | ||
521 | { | ||
522 | /* Traverse backwards: gives us oldest, which is roughly LRU */ | ||
523 | struct ip_conntrack_tuple_hash *h; | ||
524 | struct ip_conntrack *ct = NULL, *tmp; | ||
525 | int dropped = 0; | ||
526 | |||
527 | read_lock_bh(&ip_conntrack_lock); | ||
528 | list_for_each_entry_reverse(h, chain, list) { | ||
529 | tmp = tuplehash_to_ctrack(h); | ||
530 | if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { | ||
531 | ct = tmp; | ||
532 | atomic_inc(&ct->ct_general.use); | ||
533 | break; | ||
534 | } | ||
535 | } | ||
536 | read_unlock_bh(&ip_conntrack_lock); | ||
537 | |||
538 | if (!ct) | ||
539 | return dropped; | ||
540 | |||
541 | if (del_timer(&ct->timeout)) { | ||
542 | death_by_timeout((unsigned long)ct); | ||
543 | dropped = 1; | ||
544 | CONNTRACK_STAT_INC_ATOMIC(early_drop); | ||
545 | } | ||
546 | ip_conntrack_put(ct); | ||
547 | return dropped; | ||
548 | } | ||
549 | |||
550 | static struct ip_conntrack_helper * | ||
551 | __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) | ||
552 | { | ||
553 | struct ip_conntrack_helper *h; | ||
554 | |||
555 | list_for_each_entry(h, &helpers, list) { | ||
556 | if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) | ||
557 | return h; | ||
558 | } | ||
559 | return NULL; | ||
560 | } | ||
561 | |||
562 | struct ip_conntrack_helper * | ||
563 | ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) | ||
564 | { | ||
565 | struct ip_conntrack_helper *helper; | ||
566 | |||
567 | /* need ip_conntrack_lock to assure that helper exists until | ||
568 | * try_module_get() is called */ | ||
569 | read_lock_bh(&ip_conntrack_lock); | ||
570 | |||
571 | helper = __ip_conntrack_helper_find(tuple); | ||
572 | if (helper) { | ||
573 | /* need to increase module usage count to assure helper will | ||
574 | * not go away while the caller is e.g. busy putting a | ||
575 | * conntrack in the hash that uses the helper */ | ||
576 | if (!try_module_get(helper->me)) | ||
577 | helper = NULL; | ||
578 | } | ||
579 | |||
580 | read_unlock_bh(&ip_conntrack_lock); | ||
581 | |||
582 | return helper; | ||
583 | } | ||
584 | |||
585 | void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) | ||
586 | { | ||
587 | module_put(helper->me); | ||
588 | } | ||
589 | |||
590 | struct ip_conntrack_protocol * | ||
591 | __ip_conntrack_proto_find(u_int8_t protocol) | ||
592 | { | ||
593 | return ip_ct_protos[protocol]; | ||
594 | } | ||
595 | |||
596 | /* this is guaranteed to always return a valid protocol helper, since | ||
597 | * it falls back to generic_protocol */ | ||
598 | struct ip_conntrack_protocol * | ||
599 | ip_conntrack_proto_find_get(u_int8_t protocol) | ||
600 | { | ||
601 | struct ip_conntrack_protocol *p; | ||
602 | |||
603 | rcu_read_lock(); | ||
604 | p = __ip_conntrack_proto_find(protocol); | ||
605 | if (p) { | ||
606 | if (!try_module_get(p->me)) | ||
607 | p = &ip_conntrack_generic_protocol; | ||
608 | } | ||
609 | rcu_read_unlock(); | ||
610 | |||
611 | return p; | ||
612 | } | ||
613 | |||
614 | void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) | ||
615 | { | ||
616 | module_put(p->me); | ||
617 | } | ||
618 | |||
619 | struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, | ||
620 | struct ip_conntrack_tuple *repl) | ||
621 | { | ||
622 | struct ip_conntrack *conntrack; | ||
623 | |||
624 | if (!ip_conntrack_hash_rnd_initted) { | ||
625 | get_random_bytes(&ip_conntrack_hash_rnd, 4); | ||
626 | ip_conntrack_hash_rnd_initted = 1; | ||
627 | } | ||
628 | |||
629 | /* We don't want any race condition at early drop stage */ | ||
630 | atomic_inc(&ip_conntrack_count); | ||
631 | |||
632 | if (ip_conntrack_max | ||
633 | && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { | ||
634 | unsigned int hash = hash_conntrack(orig); | ||
635 | /* Try dropping from this hash chain. */ | ||
636 | if (!early_drop(&ip_conntrack_hash[hash])) { | ||
637 | atomic_dec(&ip_conntrack_count); | ||
638 | if (net_ratelimit()) | ||
639 | printk(KERN_WARNING | ||
640 | "ip_conntrack: table full, dropping" | ||
641 | " packet.\n"); | ||
642 | return ERR_PTR(-ENOMEM); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC); | ||
647 | if (!conntrack) { | ||
648 | DEBUGP("Can't allocate conntrack.\n"); | ||
649 | atomic_dec(&ip_conntrack_count); | ||
650 | return ERR_PTR(-ENOMEM); | ||
651 | } | ||
652 | |||
653 | atomic_set(&conntrack->ct_general.use, 1); | ||
654 | conntrack->ct_general.destroy = destroy_conntrack; | ||
655 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | ||
656 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; | ||
657 | /* Don't set timer yet: wait for confirmation */ | ||
658 | init_timer(&conntrack->timeout); | ||
659 | conntrack->timeout.data = (unsigned long)conntrack; | ||
660 | conntrack->timeout.function = death_by_timeout; | ||
661 | |||
662 | return conntrack; | ||
663 | } | ||
664 | |||
665 | void | ||
666 | ip_conntrack_free(struct ip_conntrack *conntrack) | ||
667 | { | ||
668 | atomic_dec(&ip_conntrack_count); | ||
669 | kmem_cache_free(ip_conntrack_cachep, conntrack); | ||
670 | } | ||
671 | |||
672 | /* Allocate a new conntrack: we return -ENOMEM if classification | ||
673 | * failed due to stress. Otherwise it really is unclassifiable */ | ||
674 | static struct ip_conntrack_tuple_hash * | ||
675 | init_conntrack(struct ip_conntrack_tuple *tuple, | ||
676 | struct ip_conntrack_protocol *protocol, | ||
677 | struct sk_buff *skb) | ||
678 | { | ||
679 | struct ip_conntrack *conntrack; | ||
680 | struct ip_conntrack_tuple repl_tuple; | ||
681 | struct ip_conntrack_expect *exp; | ||
682 | |||
683 | if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { | ||
684 | DEBUGP("Can't invert tuple.\n"); | ||
685 | return NULL; | ||
686 | } | ||
687 | |||
688 | conntrack = ip_conntrack_alloc(tuple, &repl_tuple); | ||
689 | if (conntrack == NULL || IS_ERR(conntrack)) | ||
690 | return (struct ip_conntrack_tuple_hash *)conntrack; | ||
691 | |||
692 | if (!protocol->new(conntrack, skb)) { | ||
693 | ip_conntrack_free(conntrack); | ||
694 | return NULL; | ||
695 | } | ||
696 | |||
697 | write_lock_bh(&ip_conntrack_lock); | ||
698 | exp = find_expectation(tuple); | ||
699 | |||
700 | if (exp) { | ||
701 | DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", | ||
702 | conntrack, exp); | ||
703 | /* Welcome, Mr. Bond. We've been expecting you... */ | ||
704 | __set_bit(IPS_EXPECTED_BIT, &conntrack->status); | ||
705 | conntrack->master = exp->master; | ||
706 | #ifdef CONFIG_IP_NF_CONNTRACK_MARK | ||
707 | conntrack->mark = exp->master->mark; | ||
708 | #endif | ||
709 | #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ | ||
710 | defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) | ||
711 | /* this is ugly, but there is no other place where to put it */ | ||
712 | conntrack->nat.masq_index = exp->master->nat.masq_index; | ||
713 | #endif | ||
714 | #ifdef CONFIG_IP_NF_CONNTRACK_SECMARK | ||
715 | conntrack->secmark = exp->master->secmark; | ||
716 | #endif | ||
717 | nf_conntrack_get(&conntrack->master->ct_general); | ||
718 | CONNTRACK_STAT_INC(expect_new); | ||
719 | } else { | ||
720 | conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); | ||
721 | |||
722 | CONNTRACK_STAT_INC(new); | ||
723 | } | ||
724 | |||
725 | /* Overload tuple linked list to put us in unconfirmed list. */ | ||
726 | list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); | ||
727 | |||
728 | write_unlock_bh(&ip_conntrack_lock); | ||
729 | |||
730 | if (exp) { | ||
731 | if (exp->expectfn) | ||
732 | exp->expectfn(conntrack, exp); | ||
733 | ip_conntrack_expect_put(exp); | ||
734 | } | ||
735 | |||
736 | return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; | ||
737 | } | ||
738 | |||
739 | /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ | ||
740 | static inline struct ip_conntrack * | ||
741 | resolve_normal_ct(struct sk_buff *skb, | ||
742 | struct ip_conntrack_protocol *proto, | ||
743 | int *set_reply, | ||
744 | unsigned int hooknum, | ||
745 | enum ip_conntrack_info *ctinfo) | ||
746 | { | ||
747 | struct ip_conntrack_tuple tuple; | ||
748 | struct ip_conntrack_tuple_hash *h; | ||
749 | struct ip_conntrack *ct; | ||
750 | |||
751 | IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); | ||
752 | |||
753 | if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, | ||
754 | &tuple,proto)) | ||
755 | return NULL; | ||
756 | |||
757 | /* look for tuple match */ | ||
758 | h = ip_conntrack_find_get(&tuple, NULL); | ||
759 | if (!h) { | ||
760 | h = init_conntrack(&tuple, proto, skb); | ||
761 | if (!h) | ||
762 | return NULL; | ||
763 | if (IS_ERR(h)) | ||
764 | return (void *)h; | ||
765 | } | ||
766 | ct = tuplehash_to_ctrack(h); | ||
767 | |||
768 | /* It exists; we have (non-exclusive) reference. */ | ||
769 | if (DIRECTION(h) == IP_CT_DIR_REPLY) { | ||
770 | *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; | ||
771 | /* Please set reply bit if this packet OK */ | ||
772 | *set_reply = 1; | ||
773 | } else { | ||
774 | /* Once we've had two way comms, always ESTABLISHED. */ | ||
775 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | ||
776 | DEBUGP("ip_conntrack_in: normal packet for %p\n", | ||
777 | ct); | ||
778 | *ctinfo = IP_CT_ESTABLISHED; | ||
779 | } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { | ||
780 | DEBUGP("ip_conntrack_in: related packet for %p\n", | ||
781 | ct); | ||
782 | *ctinfo = IP_CT_RELATED; | ||
783 | } else { | ||
784 | DEBUGP("ip_conntrack_in: new packet for %p\n", | ||
785 | ct); | ||
786 | *ctinfo = IP_CT_NEW; | ||
787 | } | ||
788 | *set_reply = 0; | ||
789 | } | ||
790 | skb->nfct = &ct->ct_general; | ||
791 | skb->nfctinfo = *ctinfo; | ||
792 | return ct; | ||
793 | } | ||
794 | |||
795 | /* Netfilter hook itself. */ | ||
796 | unsigned int ip_conntrack_in(unsigned int hooknum, | ||
797 | struct sk_buff **pskb, | ||
798 | const struct net_device *in, | ||
799 | const struct net_device *out, | ||
800 | int (*okfn)(struct sk_buff *)) | ||
801 | { | ||
802 | struct ip_conntrack *ct; | ||
803 | enum ip_conntrack_info ctinfo; | ||
804 | struct ip_conntrack_protocol *proto; | ||
805 | int set_reply = 0; | ||
806 | int ret; | ||
807 | |||
808 | /* Previously seen (loopback or untracked)? Ignore. */ | ||
809 | if ((*pskb)->nfct) { | ||
810 | CONNTRACK_STAT_INC_ATOMIC(ignore); | ||
811 | return NF_ACCEPT; | ||
812 | } | ||
813 | |||
814 | /* Never happen */ | ||
815 | if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { | ||
816 | if (net_ratelimit()) { | ||
817 | printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n", | ||
818 | (*pskb)->nh.iph->protocol, hooknum); | ||
819 | } | ||
820 | return NF_DROP; | ||
821 | } | ||
822 | |||
823 | /* Doesn't cover locally-generated broadcast, so not worth it. */ | ||
824 | #if 0 | ||
825 | /* Ignore broadcast: no `connection'. */ | ||
826 | if ((*pskb)->pkt_type == PACKET_BROADCAST) { | ||
827 | printk("Broadcast packet!\n"); | ||
828 | return NF_ACCEPT; | ||
829 | } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) | ||
830 | == htonl(0x000000FF)) { | ||
831 | printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", | ||
832 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
833 | NIPQUAD((*pskb)->nh.iph->daddr), | ||
834 | (*pskb)->sk, (*pskb)->pkt_type); | ||
835 | } | ||
836 | #endif | ||
837 | |||
838 | /* rcu_read_lock()ed by nf_hook_slow */ | ||
839 | proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); | ||
840 | |||
841 | /* It may be an special packet, error, unclean... | ||
842 | * inverse of the return code tells to the netfilter | ||
843 | * core what to do with the packet. */ | ||
844 | if (proto->error != NULL | ||
845 | && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { | ||
846 | CONNTRACK_STAT_INC_ATOMIC(error); | ||
847 | CONNTRACK_STAT_INC_ATOMIC(invalid); | ||
848 | return -ret; | ||
849 | } | ||
850 | |||
851 | if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { | ||
852 | /* Not valid part of a connection */ | ||
853 | CONNTRACK_STAT_INC_ATOMIC(invalid); | ||
854 | return NF_ACCEPT; | ||
855 | } | ||
856 | |||
857 | if (IS_ERR(ct)) { | ||
858 | /* Too stressed to deal. */ | ||
859 | CONNTRACK_STAT_INC_ATOMIC(drop); | ||
860 | return NF_DROP; | ||
861 | } | ||
862 | |||
863 | IP_NF_ASSERT((*pskb)->nfct); | ||
864 | |||
865 | ret = proto->packet(ct, *pskb, ctinfo); | ||
866 | if (ret < 0) { | ||
867 | /* Invalid: inverse of the return code tells | ||
868 | * the netfilter core what to do*/ | ||
869 | nf_conntrack_put((*pskb)->nfct); | ||
870 | (*pskb)->nfct = NULL; | ||
871 | CONNTRACK_STAT_INC_ATOMIC(invalid); | ||
872 | return -ret; | ||
873 | } | ||
874 | |||
875 | if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) | ||
876 | ip_conntrack_event_cache(IPCT_STATUS, *pskb); | ||
877 | |||
878 | return ret; | ||
879 | } | ||
880 | |||
881 | int invert_tuplepr(struct ip_conntrack_tuple *inverse, | ||
882 | const struct ip_conntrack_tuple *orig) | ||
883 | { | ||
884 | struct ip_conntrack_protocol *proto; | ||
885 | int ret; | ||
886 | |||
887 | rcu_read_lock(); | ||
888 | proto = __ip_conntrack_proto_find(orig->dst.protonum); | ||
889 | ret = ip_ct_invert_tuple(inverse, orig, proto); | ||
890 | rcu_read_unlock(); | ||
891 | |||
892 | return ret; | ||
893 | } | ||
894 | |||
895 | /* Would two expected things clash? */ | ||
896 | static inline int expect_clash(const struct ip_conntrack_expect *a, | ||
897 | const struct ip_conntrack_expect *b) | ||
898 | { | ||
899 | /* Part covered by intersection of masks must be unequal, | ||
900 | otherwise they clash */ | ||
901 | struct ip_conntrack_tuple intersect_mask | ||
902 | = { { a->mask.src.ip & b->mask.src.ip, | ||
903 | { a->mask.src.u.all & b->mask.src.u.all } }, | ||
904 | { a->mask.dst.ip & b->mask.dst.ip, | ||
905 | { a->mask.dst.u.all & b->mask.dst.u.all }, | ||
906 | a->mask.dst.protonum & b->mask.dst.protonum } }; | ||
907 | |||
908 | return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); | ||
909 | } | ||
910 | |||
911 | static inline int expect_matches(const struct ip_conntrack_expect *a, | ||
912 | const struct ip_conntrack_expect *b) | ||
913 | { | ||
914 | return a->master == b->master | ||
915 | && ip_ct_tuple_equal(&a->tuple, &b->tuple) | ||
916 | && ip_ct_tuple_equal(&a->mask, &b->mask); | ||
917 | } | ||
918 | |||
919 | /* Generally a bad idea to call this: could have matched already. */ | ||
920 | void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) | ||
921 | { | ||
922 | struct ip_conntrack_expect *i; | ||
923 | |||
924 | write_lock_bh(&ip_conntrack_lock); | ||
925 | /* choose the the oldest expectation to evict */ | ||
926 | list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { | ||
927 | if (expect_matches(i, exp) && del_timer(&i->timeout)) { | ||
928 | ip_ct_unlink_expect(i); | ||
929 | write_unlock_bh(&ip_conntrack_lock); | ||
930 | ip_conntrack_expect_put(i); | ||
931 | return; | ||
932 | } | ||
933 | } | ||
934 | write_unlock_bh(&ip_conntrack_lock); | ||
935 | } | ||
936 | |||
937 | /* We don't increase the master conntrack refcount for non-fulfilled | ||
938 | * conntracks. During the conntrack destruction, the expectations are | ||
939 | * always killed before the conntrack itself */ | ||
940 | struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me) | ||
941 | { | ||
942 | struct ip_conntrack_expect *new; | ||
943 | |||
944 | new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC); | ||
945 | if (!new) { | ||
946 | DEBUGP("expect_related: OOM allocating expect\n"); | ||
947 | return NULL; | ||
948 | } | ||
949 | new->master = me; | ||
950 | atomic_set(&new->use, 1); | ||
951 | return new; | ||
952 | } | ||
953 | |||
954 | void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) | ||
955 | { | ||
956 | if (atomic_dec_and_test(&exp->use)) | ||
957 | kmem_cache_free(ip_conntrack_expect_cachep, exp); | ||
958 | } | ||
959 | |||
960 | static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) | ||
961 | { | ||
962 | atomic_inc(&exp->use); | ||
963 | exp->master->expecting++; | ||
964 | list_add(&exp->list, &ip_conntrack_expect_list); | ||
965 | |||
966 | init_timer(&exp->timeout); | ||
967 | exp->timeout.data = (unsigned long)exp; | ||
968 | exp->timeout.function = expectation_timed_out; | ||
969 | exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; | ||
970 | add_timer(&exp->timeout); | ||
971 | |||
972 | exp->id = ++ip_conntrack_expect_next_id; | ||
973 | atomic_inc(&exp->use); | ||
974 | CONNTRACK_STAT_INC(expect_create); | ||
975 | } | ||
976 | |||
977 | /* Race with expectations being used means we could have none to find; OK. */ | ||
978 | static void evict_oldest_expect(struct ip_conntrack *master) | ||
979 | { | ||
980 | struct ip_conntrack_expect *i; | ||
981 | |||
982 | list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { | ||
983 | if (i->master == master) { | ||
984 | if (del_timer(&i->timeout)) { | ||
985 | ip_ct_unlink_expect(i); | ||
986 | ip_conntrack_expect_put(i); | ||
987 | } | ||
988 | break; | ||
989 | } | ||
990 | } | ||
991 | } | ||
992 | |||
993 | static inline int refresh_timer(struct ip_conntrack_expect *i) | ||
994 | { | ||
995 | if (!del_timer(&i->timeout)) | ||
996 | return 0; | ||
997 | |||
998 | i->timeout.expires = jiffies + i->master->helper->timeout*HZ; | ||
999 | add_timer(&i->timeout); | ||
1000 | return 1; | ||
1001 | } | ||
1002 | |||
1003 | int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) | ||
1004 | { | ||
1005 | struct ip_conntrack_expect *i; | ||
1006 | int ret; | ||
1007 | |||
1008 | DEBUGP("ip_conntrack_expect_related %p\n", related_to); | ||
1009 | DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); | ||
1010 | DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); | ||
1011 | |||
1012 | write_lock_bh(&ip_conntrack_lock); | ||
1013 | list_for_each_entry(i, &ip_conntrack_expect_list, list) { | ||
1014 | if (expect_matches(i, expect)) { | ||
1015 | /* Refresh timer: if it's dying, ignore.. */ | ||
1016 | if (refresh_timer(i)) { | ||
1017 | ret = 0; | ||
1018 | goto out; | ||
1019 | } | ||
1020 | } else if (expect_clash(i, expect)) { | ||
1021 | ret = -EBUSY; | ||
1022 | goto out; | ||
1023 | } | ||
1024 | } | ||
1025 | |||
1026 | /* Will be over limit? */ | ||
1027 | if (expect->master->helper->max_expected && | ||
1028 | expect->master->expecting >= expect->master->helper->max_expected) | ||
1029 | evict_oldest_expect(expect->master); | ||
1030 | |||
1031 | ip_conntrack_expect_insert(expect); | ||
1032 | ip_conntrack_expect_event(IPEXP_NEW, expect); | ||
1033 | ret = 0; | ||
1034 | out: | ||
1035 | write_unlock_bh(&ip_conntrack_lock); | ||
1036 | return ret; | ||
1037 | } | ||
1038 | |||
1039 | /* Alter reply tuple (maybe alter helper). This is for NAT, and is | ||
1040 | implicitly racy: see __ip_conntrack_confirm */ | ||
1041 | void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, | ||
1042 | const struct ip_conntrack_tuple *newreply) | ||
1043 | { | ||
1044 | write_lock_bh(&ip_conntrack_lock); | ||
1045 | /* Should be unconfirmed, so not in hash table yet */ | ||
1046 | IP_NF_ASSERT(!is_confirmed(conntrack)); | ||
1047 | |||
1048 | DEBUGP("Altering reply tuple of %p to ", conntrack); | ||
1049 | DUMP_TUPLE(newreply); | ||
1050 | |||
1051 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | ||
1052 | if (!conntrack->master && conntrack->expecting == 0) | ||
1053 | conntrack->helper = __ip_conntrack_helper_find(newreply); | ||
1054 | write_unlock_bh(&ip_conntrack_lock); | ||
1055 | } | ||
1056 | |||
1057 | int ip_conntrack_helper_register(struct ip_conntrack_helper *me) | ||
1058 | { | ||
1059 | BUG_ON(me->timeout == 0); | ||
1060 | write_lock_bh(&ip_conntrack_lock); | ||
1061 | list_add(&me->list, &helpers); | ||
1062 | write_unlock_bh(&ip_conntrack_lock); | ||
1063 | |||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | struct ip_conntrack_helper * | ||
1068 | __ip_conntrack_helper_find_byname(const char *name) | ||
1069 | { | ||
1070 | struct ip_conntrack_helper *h; | ||
1071 | |||
1072 | list_for_each_entry(h, &helpers, list) { | ||
1073 | if (!strcmp(h->name, name)) | ||
1074 | return h; | ||
1075 | } | ||
1076 | |||
1077 | return NULL; | ||
1078 | } | ||
1079 | |||
1080 | static inline void unhelp(struct ip_conntrack_tuple_hash *i, | ||
1081 | const struct ip_conntrack_helper *me) | ||
1082 | { | ||
1083 | if (tuplehash_to_ctrack(i)->helper == me) { | ||
1084 | ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); | ||
1085 | tuplehash_to_ctrack(i)->helper = NULL; | ||
1086 | } | ||
1087 | } | ||
1088 | |||
1089 | void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) | ||
1090 | { | ||
1091 | unsigned int i; | ||
1092 | struct ip_conntrack_tuple_hash *h; | ||
1093 | struct ip_conntrack_expect *exp, *tmp; | ||
1094 | |||
1095 | /* Need write lock here, to delete helper. */ | ||
1096 | write_lock_bh(&ip_conntrack_lock); | ||
1097 | list_del(&me->list); | ||
1098 | |||
1099 | /* Get rid of expectations */ | ||
1100 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { | ||
1101 | if (exp->master->helper == me && del_timer(&exp->timeout)) { | ||
1102 | ip_ct_unlink_expect(exp); | ||
1103 | ip_conntrack_expect_put(exp); | ||
1104 | } | ||
1105 | } | ||
1106 | /* Get rid of expecteds, set helpers to NULL. */ | ||
1107 | list_for_each_entry(h, &unconfirmed, list) | ||
1108 | unhelp(h, me); | ||
1109 | for (i = 0; i < ip_conntrack_htable_size; i++) { | ||
1110 | list_for_each_entry(h, &ip_conntrack_hash[i], list) | ||
1111 | unhelp(h, me); | ||
1112 | } | ||
1113 | write_unlock_bh(&ip_conntrack_lock); | ||
1114 | |||
1115 | /* Someone could be still looking at the helper in a bh. */ | ||
1116 | synchronize_net(); | ||
1117 | } | ||
1118 | |||
1119 | /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ | ||
1120 | void __ip_ct_refresh_acct(struct ip_conntrack *ct, | ||
1121 | enum ip_conntrack_info ctinfo, | ||
1122 | const struct sk_buff *skb, | ||
1123 | unsigned long extra_jiffies, | ||
1124 | int do_acct) | ||
1125 | { | ||
1126 | int event = 0; | ||
1127 | |||
1128 | IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); | ||
1129 | IP_NF_ASSERT(skb); | ||
1130 | |||
1131 | write_lock_bh(&ip_conntrack_lock); | ||
1132 | |||
1133 | /* Only update if this is not a fixed timeout */ | ||
1134 | if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { | ||
1135 | write_unlock_bh(&ip_conntrack_lock); | ||
1136 | return; | ||
1137 | } | ||
1138 | |||
1139 | /* If not in hash table, timer will not be active yet */ | ||
1140 | if (!is_confirmed(ct)) { | ||
1141 | ct->timeout.expires = extra_jiffies; | ||
1142 | event = IPCT_REFRESH; | ||
1143 | } else { | ||
1144 | /* Need del_timer for race avoidance (may already be dying). */ | ||
1145 | if (del_timer(&ct->timeout)) { | ||
1146 | ct->timeout.expires = jiffies + extra_jiffies; | ||
1147 | add_timer(&ct->timeout); | ||
1148 | event = IPCT_REFRESH; | ||
1149 | } | ||
1150 | } | ||
1151 | |||
1152 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
1153 | if (do_acct) { | ||
1154 | ct->counters[CTINFO2DIR(ctinfo)].packets++; | ||
1155 | ct->counters[CTINFO2DIR(ctinfo)].bytes += | ||
1156 | ntohs(skb->nh.iph->tot_len); | ||
1157 | if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) | ||
1158 | || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) | ||
1159 | event |= IPCT_COUNTER_FILLING; | ||
1160 | } | ||
1161 | #endif | ||
1162 | |||
1163 | write_unlock_bh(&ip_conntrack_lock); | ||
1164 | |||
1165 | /* must be unlocked when calling event cache */ | ||
1166 | if (event) | ||
1167 | ip_conntrack_event_cache(event, skb); | ||
1168 | } | ||
1169 | |||
1170 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
1171 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
1172 | /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be | ||
1173 | * in ip_conntrack_core, since we don't want the protocols to autoload | ||
1174 | * or depend on ctnetlink */ | ||
1175 | int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, | ||
1176 | const struct ip_conntrack_tuple *tuple) | ||
1177 | { | ||
1178 | NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16), | ||
1179 | &tuple->src.u.tcp.port); | ||
1180 | NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16), | ||
1181 | &tuple->dst.u.tcp.port); | ||
1182 | return 0; | ||
1183 | |||
1184 | nfattr_failure: | ||
1185 | return -1; | ||
1186 | } | ||
1187 | |||
1188 | int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], | ||
1189 | struct ip_conntrack_tuple *t) | ||
1190 | { | ||
1191 | if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) | ||
1192 | return -EINVAL; | ||
1193 | |||
1194 | t->src.u.tcp.port = | ||
1195 | *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); | ||
1196 | t->dst.u.tcp.port = | ||
1197 | *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); | ||
1198 | |||
1199 | return 0; | ||
1200 | } | ||
1201 | #endif | ||
1202 | |||
1203 | /* Returns new sk_buff, or NULL */ | ||
1204 | struct sk_buff * | ||
1205 | ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) | ||
1206 | { | ||
1207 | skb_orphan(skb); | ||
1208 | |||
1209 | local_bh_disable(); | ||
1210 | skb = ip_defrag(skb, user); | ||
1211 | local_bh_enable(); | ||
1212 | |||
1213 | if (skb) | ||
1214 | ip_send_check(skb->nh.iph); | ||
1215 | return skb; | ||
1216 | } | ||
1217 | |||
1218 | /* Used by ipt_REJECT. */ | ||
1219 | static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) | ||
1220 | { | ||
1221 | struct ip_conntrack *ct; | ||
1222 | enum ip_conntrack_info ctinfo; | ||
1223 | |||
1224 | /* This ICMP is in reverse direction to the packet which caused it */ | ||
1225 | ct = ip_conntrack_get(skb, &ctinfo); | ||
1226 | |||
1227 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) | ||
1228 | ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; | ||
1229 | else | ||
1230 | ctinfo = IP_CT_RELATED; | ||
1231 | |||
1232 | /* Attach to new skbuff, and increment count */ | ||
1233 | nskb->nfct = &ct->ct_general; | ||
1234 | nskb->nfctinfo = ctinfo; | ||
1235 | nf_conntrack_get(nskb->nfct); | ||
1236 | } | ||
1237 | |||
1238 | /* Bring out ya dead! */ | ||
1239 | static struct ip_conntrack * | ||
1240 | get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), | ||
1241 | void *data, unsigned int *bucket) | ||
1242 | { | ||
1243 | struct ip_conntrack_tuple_hash *h; | ||
1244 | struct ip_conntrack *ct; | ||
1245 | |||
1246 | write_lock_bh(&ip_conntrack_lock); | ||
1247 | for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { | ||
1248 | list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { | ||
1249 | ct = tuplehash_to_ctrack(h); | ||
1250 | if (iter(ct, data)) | ||
1251 | goto found; | ||
1252 | } | ||
1253 | } | ||
1254 | list_for_each_entry(h, &unconfirmed, list) { | ||
1255 | ct = tuplehash_to_ctrack(h); | ||
1256 | if (iter(ct, data)) | ||
1257 | set_bit(IPS_DYING_BIT, &ct->status); | ||
1258 | } | ||
1259 | write_unlock_bh(&ip_conntrack_lock); | ||
1260 | return NULL; | ||
1261 | |||
1262 | found: | ||
1263 | atomic_inc(&ct->ct_general.use); | ||
1264 | write_unlock_bh(&ip_conntrack_lock); | ||
1265 | return ct; | ||
1266 | } | ||
1267 | |||
1268 | void | ||
1269 | ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) | ||
1270 | { | ||
1271 | struct ip_conntrack *ct; | ||
1272 | unsigned int bucket = 0; | ||
1273 | |||
1274 | while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { | ||
1275 | /* Time to push up daises... */ | ||
1276 | if (del_timer(&ct->timeout)) | ||
1277 | death_by_timeout((unsigned long)ct); | ||
1278 | /* ... else the timer will get him soon. */ | ||
1279 | |||
1280 | ip_conntrack_put(ct); | ||
1281 | } | ||
1282 | } | ||
1283 | |||
1284 | /* Fast function for those who don't want to parse /proc (and I don't | ||
1285 | blame them). */ | ||
1286 | /* Reversing the socket's dst/src point of view gives us the reply | ||
1287 | mapping. */ | ||
1288 | static int | ||
1289 | getorigdst(struct sock *sk, int optval, void __user *user, int *len) | ||
1290 | { | ||
1291 | struct inet_sock *inet = inet_sk(sk); | ||
1292 | struct ip_conntrack_tuple_hash *h; | ||
1293 | struct ip_conntrack_tuple tuple; | ||
1294 | |||
1295 | IP_CT_TUPLE_U_BLANK(&tuple); | ||
1296 | tuple.src.ip = inet->rcv_saddr; | ||
1297 | tuple.src.u.tcp.port = inet->sport; | ||
1298 | tuple.dst.ip = inet->daddr; | ||
1299 | tuple.dst.u.tcp.port = inet->dport; | ||
1300 | tuple.dst.protonum = IPPROTO_TCP; | ||
1301 | |||
1302 | /* We only do TCP at the moment: is there a better way? */ | ||
1303 | if (strcmp(sk->sk_prot->name, "TCP")) { | ||
1304 | DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); | ||
1305 | return -ENOPROTOOPT; | ||
1306 | } | ||
1307 | |||
1308 | if ((unsigned int) *len < sizeof(struct sockaddr_in)) { | ||
1309 | DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", | ||
1310 | *len, sizeof(struct sockaddr_in)); | ||
1311 | return -EINVAL; | ||
1312 | } | ||
1313 | |||
1314 | h = ip_conntrack_find_get(&tuple, NULL); | ||
1315 | if (h) { | ||
1316 | struct sockaddr_in sin; | ||
1317 | struct ip_conntrack *ct = tuplehash_to_ctrack(h); | ||
1318 | |||
1319 | sin.sin_family = AF_INET; | ||
1320 | sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
1321 | .tuple.dst.u.tcp.port; | ||
1322 | sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
1323 | .tuple.dst.ip; | ||
1324 | memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); | ||
1325 | |||
1326 | DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", | ||
1327 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | ||
1328 | ip_conntrack_put(ct); | ||
1329 | if (copy_to_user(user, &sin, sizeof(sin)) != 0) | ||
1330 | return -EFAULT; | ||
1331 | else | ||
1332 | return 0; | ||
1333 | } | ||
1334 | DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", | ||
1335 | NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), | ||
1336 | NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); | ||
1337 | return -ENOENT; | ||
1338 | } | ||
1339 | |||
1340 | static struct nf_sockopt_ops so_getorigdst = { | ||
1341 | .pf = PF_INET, | ||
1342 | .get_optmin = SO_ORIGINAL_DST, | ||
1343 | .get_optmax = SO_ORIGINAL_DST+1, | ||
1344 | .get = &getorigdst, | ||
1345 | }; | ||
1346 | |||
1347 | static int kill_all(struct ip_conntrack *i, void *data) | ||
1348 | { | ||
1349 | return 1; | ||
1350 | } | ||
1351 | |||
1352 | void ip_conntrack_flush(void) | ||
1353 | { | ||
1354 | ip_ct_iterate_cleanup(kill_all, NULL); | ||
1355 | } | ||
1356 | |||
1357 | static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) | ||
1358 | { | ||
1359 | if (vmalloced) | ||
1360 | vfree(hash); | ||
1361 | else | ||
1362 | free_pages((unsigned long)hash, | ||
1363 | get_order(sizeof(struct list_head) * size)); | ||
1364 | } | ||
1365 | |||
1366 | /* Mishearing the voices in his head, our hero wonders how he's | ||
1367 | supposed to kill the mall. */ | ||
1368 | void ip_conntrack_cleanup(void) | ||
1369 | { | ||
1370 | rcu_assign_pointer(ip_ct_attach, NULL); | ||
1371 | |||
1372 | /* This makes sure all current packets have passed through | ||
1373 | netfilter framework. Roll on, two-stage module | ||
1374 | delete... */ | ||
1375 | synchronize_net(); | ||
1376 | |||
1377 | ip_ct_event_cache_flush(); | ||
1378 | i_see_dead_people: | ||
1379 | ip_conntrack_flush(); | ||
1380 | if (atomic_read(&ip_conntrack_count) != 0) { | ||
1381 | schedule(); | ||
1382 | goto i_see_dead_people; | ||
1383 | } | ||
1384 | /* wait until all references to ip_conntrack_untracked are dropped */ | ||
1385 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) | ||
1386 | schedule(); | ||
1387 | |||
1388 | kmem_cache_destroy(ip_conntrack_cachep); | ||
1389 | kmem_cache_destroy(ip_conntrack_expect_cachep); | ||
1390 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, | ||
1391 | ip_conntrack_htable_size); | ||
1392 | nf_unregister_sockopt(&so_getorigdst); | ||
1393 | } | ||
1394 | |||
1395 | static struct list_head *alloc_hashtable(int size, int *vmalloced) | ||
1396 | { | ||
1397 | struct list_head *hash; | ||
1398 | unsigned int i; | ||
1399 | |||
1400 | *vmalloced = 0; | ||
1401 | hash = (void*)__get_free_pages(GFP_KERNEL, | ||
1402 | get_order(sizeof(struct list_head) | ||
1403 | * size)); | ||
1404 | if (!hash) { | ||
1405 | *vmalloced = 1; | ||
1406 | printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); | ||
1407 | hash = vmalloc(sizeof(struct list_head) * size); | ||
1408 | } | ||
1409 | |||
1410 | if (hash) | ||
1411 | for (i = 0; i < size; i++) | ||
1412 | INIT_LIST_HEAD(&hash[i]); | ||
1413 | |||
1414 | return hash; | ||
1415 | } | ||
1416 | |||
1417 | static int set_hashsize(const char *val, struct kernel_param *kp) | ||
1418 | { | ||
1419 | int i, bucket, hashsize, vmalloced; | ||
1420 | int old_vmalloced, old_size; | ||
1421 | int rnd; | ||
1422 | struct list_head *hash, *old_hash; | ||
1423 | struct ip_conntrack_tuple_hash *h; | ||
1424 | |||
1425 | /* On boot, we can set this without any fancy locking. */ | ||
1426 | if (!ip_conntrack_htable_size) | ||
1427 | return param_set_int(val, kp); | ||
1428 | |||
1429 | hashsize = simple_strtol(val, NULL, 0); | ||
1430 | if (!hashsize) | ||
1431 | return -EINVAL; | ||
1432 | |||
1433 | hash = alloc_hashtable(hashsize, &vmalloced); | ||
1434 | if (!hash) | ||
1435 | return -ENOMEM; | ||
1436 | |||
1437 | /* We have to rehash for the new table anyway, so we also can | ||
1438 | * use a new random seed */ | ||
1439 | get_random_bytes(&rnd, 4); | ||
1440 | |||
1441 | write_lock_bh(&ip_conntrack_lock); | ||
1442 | for (i = 0; i < ip_conntrack_htable_size; i++) { | ||
1443 | while (!list_empty(&ip_conntrack_hash[i])) { | ||
1444 | h = list_entry(ip_conntrack_hash[i].next, | ||
1445 | struct ip_conntrack_tuple_hash, list); | ||
1446 | list_del(&h->list); | ||
1447 | bucket = __hash_conntrack(&h->tuple, hashsize, rnd); | ||
1448 | list_add_tail(&h->list, &hash[bucket]); | ||
1449 | } | ||
1450 | } | ||
1451 | old_size = ip_conntrack_htable_size; | ||
1452 | old_vmalloced = ip_conntrack_vmalloc; | ||
1453 | old_hash = ip_conntrack_hash; | ||
1454 | |||
1455 | ip_conntrack_htable_size = hashsize; | ||
1456 | ip_conntrack_vmalloc = vmalloced; | ||
1457 | ip_conntrack_hash = hash; | ||
1458 | ip_conntrack_hash_rnd = rnd; | ||
1459 | write_unlock_bh(&ip_conntrack_lock); | ||
1460 | |||
1461 | free_conntrack_hash(old_hash, old_vmalloced, old_size); | ||
1462 | return 0; | ||
1463 | } | ||
1464 | |||
1465 | module_param_call(hashsize, set_hashsize, param_get_uint, | ||
1466 | &ip_conntrack_htable_size, 0600); | ||
1467 | |||
1468 | int __init ip_conntrack_init(void) | ||
1469 | { | ||
1470 | unsigned int i; | ||
1471 | int ret; | ||
1472 | |||
1473 | /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB | ||
1474 | * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ | ||
1475 | if (!ip_conntrack_htable_size) { | ||
1476 | ip_conntrack_htable_size | ||
1477 | = (((num_physpages << PAGE_SHIFT) / 16384) | ||
1478 | / sizeof(struct list_head)); | ||
1479 | if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) | ||
1480 | ip_conntrack_htable_size = 8192; | ||
1481 | if (ip_conntrack_htable_size < 16) | ||
1482 | ip_conntrack_htable_size = 16; | ||
1483 | } | ||
1484 | ip_conntrack_max = 8 * ip_conntrack_htable_size; | ||
1485 | |||
1486 | printk("ip_conntrack version %s (%u buckets, %d max)" | ||
1487 | " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, | ||
1488 | ip_conntrack_htable_size, ip_conntrack_max, | ||
1489 | sizeof(struct ip_conntrack)); | ||
1490 | |||
1491 | ret = nf_register_sockopt(&so_getorigdst); | ||
1492 | if (ret != 0) { | ||
1493 | printk(KERN_ERR "Unable to register netfilter socket option\n"); | ||
1494 | return ret; | ||
1495 | } | ||
1496 | |||
1497 | ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, | ||
1498 | &ip_conntrack_vmalloc); | ||
1499 | if (!ip_conntrack_hash) { | ||
1500 | printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); | ||
1501 | goto err_unreg_sockopt; | ||
1502 | } | ||
1503 | |||
1504 | ip_conntrack_cachep = kmem_cache_create("ip_conntrack", | ||
1505 | sizeof(struct ip_conntrack), 0, | ||
1506 | 0, NULL, NULL); | ||
1507 | if (!ip_conntrack_cachep) { | ||
1508 | printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); | ||
1509 | goto err_free_hash; | ||
1510 | } | ||
1511 | |||
1512 | ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", | ||
1513 | sizeof(struct ip_conntrack_expect), | ||
1514 | 0, 0, NULL, NULL); | ||
1515 | if (!ip_conntrack_expect_cachep) { | ||
1516 | printk(KERN_ERR "Unable to create ip_expect slab cache\n"); | ||
1517 | goto err_free_conntrack_slab; | ||
1518 | } | ||
1519 | |||
1520 | /* Don't NEED lock here, but good form anyway. */ | ||
1521 | write_lock_bh(&ip_conntrack_lock); | ||
1522 | for (i = 0; i < MAX_IP_CT_PROTO; i++) | ||
1523 | rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol); | ||
1524 | /* Sew in builtin protocols. */ | ||
1525 | rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp); | ||
1526 | rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp); | ||
1527 | rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp); | ||
1528 | write_unlock_bh(&ip_conntrack_lock); | ||
1529 | |||
1530 | /* For use by ipt_REJECT */ | ||
1531 | rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach); | ||
1532 | |||
1533 | /* Set up fake conntrack: | ||
1534 | - to never be deleted, not in any hashes */ | ||
1535 | atomic_set(&ip_conntrack_untracked.ct_general.use, 1); | ||
1536 | /* - and look it like as a confirmed connection */ | ||
1537 | set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); | ||
1538 | |||
1539 | return ret; | ||
1540 | |||
1541 | err_free_conntrack_slab: | ||
1542 | kmem_cache_destroy(ip_conntrack_cachep); | ||
1543 | err_free_hash: | ||
1544 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, | ||
1545 | ip_conntrack_htable_size); | ||
1546 | err_unreg_sockopt: | ||
1547 | nf_unregister_sockopt(&so_getorigdst); | ||
1548 | |||
1549 | return -ENOMEM; | ||
1550 | } | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c deleted file mode 100644 index 1faa68ab9432..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ /dev/null | |||
@@ -1,520 +0,0 @@ | |||
1 | /* FTP extension for IP connection tracking. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/netfilter.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/ctype.h> | ||
15 | #include <net/checksum.h> | ||
16 | #include <net/tcp.h> | ||
17 | |||
18 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
19 | #include <linux/netfilter_ipv4/ip_conntrack_ftp.h> | ||
20 | #include <linux/moduleparam.h> | ||
21 | |||
22 | MODULE_LICENSE("GPL"); | ||
23 | MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | ||
24 | MODULE_DESCRIPTION("ftp connection tracking helper"); | ||
25 | |||
26 | /* This is slow, but it's simple. --RR */ | ||
27 | static char *ftp_buffer; | ||
28 | static DEFINE_SPINLOCK(ip_ftp_lock); | ||
29 | |||
30 | #define MAX_PORTS 8 | ||
31 | static unsigned short ports[MAX_PORTS]; | ||
32 | static int ports_c; | ||
33 | module_param_array(ports, ushort, &ports_c, 0400); | ||
34 | |||
35 | static int loose; | ||
36 | module_param(loose, bool, 0600); | ||
37 | |||
38 | unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, | ||
39 | enum ip_conntrack_info ctinfo, | ||
40 | enum ip_ct_ftp_type type, | ||
41 | unsigned int matchoff, | ||
42 | unsigned int matchlen, | ||
43 | struct ip_conntrack_expect *exp, | ||
44 | u32 *seq); | ||
45 | EXPORT_SYMBOL_GPL(ip_nat_ftp_hook); | ||
46 | |||
47 | #if 0 | ||
48 | #define DEBUGP printk | ||
49 | #else | ||
50 | #define DEBUGP(format, args...) | ||
51 | #endif | ||
52 | |||
53 | static int try_rfc959(const char *, size_t, u_int32_t [], char); | ||
54 | static int try_eprt(const char *, size_t, u_int32_t [], char); | ||
55 | static int try_epsv_response(const char *, size_t, u_int32_t [], char); | ||
56 | |||
57 | static const struct ftp_search { | ||
58 | const char *pattern; | ||
59 | size_t plen; | ||
60 | char skip; | ||
61 | char term; | ||
62 | enum ip_ct_ftp_type ftptype; | ||
63 | int (*getnum)(const char *, size_t, u_int32_t[], char); | ||
64 | } search[IP_CT_DIR_MAX][2] = { | ||
65 | [IP_CT_DIR_ORIGINAL] = { | ||
66 | { | ||
67 | .pattern = "PORT", | ||
68 | .plen = sizeof("PORT") - 1, | ||
69 | .skip = ' ', | ||
70 | .term = '\r', | ||
71 | .ftptype = IP_CT_FTP_PORT, | ||
72 | .getnum = try_rfc959, | ||
73 | }, | ||
74 | { | ||
75 | .pattern = "EPRT", | ||
76 | .plen = sizeof("EPRT") - 1, | ||
77 | .skip = ' ', | ||
78 | .term = '\r', | ||
79 | .ftptype = IP_CT_FTP_EPRT, | ||
80 | .getnum = try_eprt, | ||
81 | }, | ||
82 | }, | ||
83 | [IP_CT_DIR_REPLY] = { | ||
84 | { | ||
85 | .pattern = "227 ", | ||
86 | .plen = sizeof("227 ") - 1, | ||
87 | .skip = '(', | ||
88 | .term = ')', | ||
89 | .ftptype = IP_CT_FTP_PASV, | ||
90 | .getnum = try_rfc959, | ||
91 | }, | ||
92 | { | ||
93 | .pattern = "229 ", | ||
94 | .plen = sizeof("229 ") - 1, | ||
95 | .skip = '(', | ||
96 | .term = ')', | ||
97 | .ftptype = IP_CT_FTP_EPSV, | ||
98 | .getnum = try_epsv_response, | ||
99 | }, | ||
100 | }, | ||
101 | }; | ||
102 | |||
103 | static int try_number(const char *data, size_t dlen, u_int32_t array[], | ||
104 | int array_size, char sep, char term) | ||
105 | { | ||
106 | u_int32_t i, len; | ||
107 | |||
108 | memset(array, 0, sizeof(array[0])*array_size); | ||
109 | |||
110 | /* Keep data pointing at next char. */ | ||
111 | for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { | ||
112 | if (*data >= '0' && *data <= '9') { | ||
113 | array[i] = array[i]*10 + *data - '0'; | ||
114 | } | ||
115 | else if (*data == sep) | ||
116 | i++; | ||
117 | else { | ||
118 | /* Unexpected character; true if it's the | ||
119 | terminator and we're finished. */ | ||
120 | if (*data == term && i == array_size - 1) | ||
121 | return len; | ||
122 | |||
123 | DEBUGP("Char %u (got %u nums) `%u' unexpected\n", | ||
124 | len, i, *data); | ||
125 | return 0; | ||
126 | } | ||
127 | } | ||
128 | DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ | ||
134 | static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6], | ||
135 | char term) | ||
136 | { | ||
137 | return try_number(data, dlen, array, 6, ',', term); | ||
138 | } | ||
139 | |||
140 | /* Grab port: number up to delimiter */ | ||
141 | static int get_port(const char *data, int start, size_t dlen, char delim, | ||
142 | u_int32_t array[2]) | ||
143 | { | ||
144 | u_int16_t port = 0; | ||
145 | int i; | ||
146 | |||
147 | for (i = start; i < dlen; i++) { | ||
148 | /* Finished? */ | ||
149 | if (data[i] == delim) { | ||
150 | if (port == 0) | ||
151 | break; | ||
152 | array[0] = port >> 8; | ||
153 | array[1] = port; | ||
154 | return i + 1; | ||
155 | } | ||
156 | else if (data[i] >= '0' && data[i] <= '9') | ||
157 | port = port*10 + data[i] - '0'; | ||
158 | else /* Some other crap */ | ||
159 | break; | ||
160 | } | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | /* Returns 0, or length of numbers: |1|132.235.1.2|6275| */ | ||
165 | static int try_eprt(const char *data, size_t dlen, u_int32_t array[6], | ||
166 | char term) | ||
167 | { | ||
168 | char delim; | ||
169 | int length; | ||
170 | |||
171 | /* First character is delimiter, then "1" for IPv4, then | ||
172 | delimiter again. */ | ||
173 | if (dlen <= 3) return 0; | ||
174 | delim = data[0]; | ||
175 | if (isdigit(delim) || delim < 33 || delim > 126 | ||
176 | || data[1] != '1' || data[2] != delim) | ||
177 | return 0; | ||
178 | |||
179 | DEBUGP("EPRT: Got |1|!\n"); | ||
180 | /* Now we have IP address. */ | ||
181 | length = try_number(data + 3, dlen - 3, array, 4, '.', delim); | ||
182 | if (length == 0) | ||
183 | return 0; | ||
184 | |||
185 | DEBUGP("EPRT: Got IP address!\n"); | ||
186 | /* Start offset includes initial "|1|", and trailing delimiter */ | ||
187 | return get_port(data, 3 + length + 1, dlen, delim, array+4); | ||
188 | } | ||
189 | |||
190 | /* Returns 0, or length of numbers: |||6446| */ | ||
191 | static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6], | ||
192 | char term) | ||
193 | { | ||
194 | char delim; | ||
195 | |||
196 | /* Three delimiters. */ | ||
197 | if (dlen <= 3) return 0; | ||
198 | delim = data[0]; | ||
199 | if (isdigit(delim) || delim < 33 || delim > 126 | ||
200 | || data[1] != delim || data[2] != delim) | ||
201 | return 0; | ||
202 | |||
203 | return get_port(data, 3, dlen, delim, array+4); | ||
204 | } | ||
205 | |||
206 | /* Return 1 for match, 0 for accept, -1 for partial. */ | ||
207 | static int find_pattern(const char *data, size_t dlen, | ||
208 | const char *pattern, size_t plen, | ||
209 | char skip, char term, | ||
210 | unsigned int *numoff, | ||
211 | unsigned int *numlen, | ||
212 | u_int32_t array[6], | ||
213 | int (*getnum)(const char *, size_t, u_int32_t[], char)) | ||
214 | { | ||
215 | size_t i; | ||
216 | |||
217 | DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); | ||
218 | if (dlen == 0) | ||
219 | return 0; | ||
220 | |||
221 | if (dlen <= plen) { | ||
222 | /* Short packet: try for partial? */ | ||
223 | if (strnicmp(data, pattern, dlen) == 0) | ||
224 | return -1; | ||
225 | else return 0; | ||
226 | } | ||
227 | |||
228 | if (strnicmp(data, pattern, plen) != 0) { | ||
229 | #if 0 | ||
230 | size_t i; | ||
231 | |||
232 | DEBUGP("ftp: string mismatch\n"); | ||
233 | for (i = 0; i < plen; i++) { | ||
234 | DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", | ||
235 | i, data[i], data[i], | ||
236 | pattern[i], pattern[i]); | ||
237 | } | ||
238 | #endif | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | DEBUGP("Pattern matches!\n"); | ||
243 | /* Now we've found the constant string, try to skip | ||
244 | to the 'skip' character */ | ||
245 | for (i = plen; data[i] != skip; i++) | ||
246 | if (i == dlen - 1) return -1; | ||
247 | |||
248 | /* Skip over the last character */ | ||
249 | i++; | ||
250 | |||
251 | DEBUGP("Skipped up to `%c'!\n", skip); | ||
252 | |||
253 | *numoff = i; | ||
254 | *numlen = getnum(data + i, dlen - i, array, term); | ||
255 | if (!*numlen) | ||
256 | return -1; | ||
257 | |||
258 | DEBUGP("Match succeeded!\n"); | ||
259 | return 1; | ||
260 | } | ||
261 | |||
262 | /* Look up to see if we're just after a \n. */ | ||
263 | static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) | ||
264 | { | ||
265 | unsigned int i; | ||
266 | |||
267 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) | ||
268 | if (info->seq_aft_nl[dir][i] == seq) | ||
269 | return 1; | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | /* We don't update if it's older than what we have. */ | ||
274 | static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, | ||
275 | struct sk_buff *skb) | ||
276 | { | ||
277 | unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; | ||
278 | |||
279 | /* Look for oldest: if we find exact match, we're done. */ | ||
280 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { | ||
281 | if (info->seq_aft_nl[dir][i] == nl_seq) | ||
282 | return; | ||
283 | |||
284 | if (oldest == info->seq_aft_nl_num[dir] | ||
285 | || before(info->seq_aft_nl[dir][i], oldest)) | ||
286 | oldest = i; | ||
287 | } | ||
288 | |||
289 | if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { | ||
290 | info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; | ||
291 | ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
292 | } else if (oldest != NUM_SEQ_TO_REMEMBER) { | ||
293 | info->seq_aft_nl[dir][oldest] = nl_seq; | ||
294 | ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
295 | } | ||
296 | } | ||
297 | |||
298 | static int help(struct sk_buff **pskb, | ||
299 | struct ip_conntrack *ct, | ||
300 | enum ip_conntrack_info ctinfo) | ||
301 | { | ||
302 | unsigned int dataoff, datalen; | ||
303 | struct tcphdr _tcph, *th; | ||
304 | char *fb_ptr; | ||
305 | int ret; | ||
306 | u32 seq, array[6] = { 0 }; | ||
307 | int dir = CTINFO2DIR(ctinfo); | ||
308 | unsigned int matchlen, matchoff; | ||
309 | struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info; | ||
310 | struct ip_conntrack_expect *exp; | ||
311 | unsigned int i; | ||
312 | int found = 0, ends_in_nl; | ||
313 | typeof(ip_nat_ftp_hook) ip_nat_ftp; | ||
314 | |||
315 | /* Until there's been traffic both ways, don't look in packets. */ | ||
316 | if (ctinfo != IP_CT_ESTABLISHED | ||
317 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | ||
318 | DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); | ||
319 | return NF_ACCEPT; | ||
320 | } | ||
321 | |||
322 | th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, | ||
323 | sizeof(_tcph), &_tcph); | ||
324 | if (th == NULL) | ||
325 | return NF_ACCEPT; | ||
326 | |||
327 | dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; | ||
328 | /* No data? */ | ||
329 | if (dataoff >= (*pskb)->len) { | ||
330 | DEBUGP("ftp: pskblen = %u\n", (*pskb)->len); | ||
331 | return NF_ACCEPT; | ||
332 | } | ||
333 | datalen = (*pskb)->len - dataoff; | ||
334 | |||
335 | spin_lock_bh(&ip_ftp_lock); | ||
336 | fb_ptr = skb_header_pointer(*pskb, dataoff, | ||
337 | (*pskb)->len - dataoff, ftp_buffer); | ||
338 | BUG_ON(fb_ptr == NULL); | ||
339 | |||
340 | ends_in_nl = (fb_ptr[datalen - 1] == '\n'); | ||
341 | seq = ntohl(th->seq) + datalen; | ||
342 | |||
343 | /* Look up to see if we're just after a \n. */ | ||
344 | if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { | ||
345 | /* Now if this ends in \n, update ftp info. */ | ||
346 | DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", | ||
347 | ct_ftp_info->seq_aft_nl[0][dir] | ||
348 | old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); | ||
349 | ret = NF_ACCEPT; | ||
350 | goto out_update_nl; | ||
351 | } | ||
352 | |||
353 | /* Initialize IP array to expected address (it's not mentioned | ||
354 | in EPSV responses) */ | ||
355 | array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF; | ||
356 | array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF; | ||
357 | array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF; | ||
358 | array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF; | ||
359 | |||
360 | for (i = 0; i < ARRAY_SIZE(search[dir]); i++) { | ||
361 | found = find_pattern(fb_ptr, (*pskb)->len - dataoff, | ||
362 | search[dir][i].pattern, | ||
363 | search[dir][i].plen, | ||
364 | search[dir][i].skip, | ||
365 | search[dir][i].term, | ||
366 | &matchoff, &matchlen, | ||
367 | array, | ||
368 | search[dir][i].getnum); | ||
369 | if (found) break; | ||
370 | } | ||
371 | if (found == -1) { | ||
372 | /* We don't usually drop packets. After all, this is | ||
373 | connection tracking, not packet filtering. | ||
374 | However, it is necessary for accurate tracking in | ||
375 | this case. */ | ||
376 | if (net_ratelimit()) | ||
377 | printk("conntrack_ftp: partial %s %u+%u\n", | ||
378 | search[dir][i].pattern, | ||
379 | ntohl(th->seq), datalen); | ||
380 | ret = NF_DROP; | ||
381 | goto out; | ||
382 | } else if (found == 0) { /* No match */ | ||
383 | ret = NF_ACCEPT; | ||
384 | goto out_update_nl; | ||
385 | } | ||
386 | |||
387 | DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n", | ||
388 | fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff); | ||
389 | |||
390 | /* Allocate expectation which will be inserted */ | ||
391 | exp = ip_conntrack_expect_alloc(ct); | ||
392 | if (exp == NULL) { | ||
393 | ret = NF_DROP; | ||
394 | goto out; | ||
395 | } | ||
396 | |||
397 | /* We refer to the reverse direction ("!dir") tuples here, | ||
398 | * because we're expecting something in the other direction. | ||
399 | * Doesn't matter unless NAT is happening. */ | ||
400 | exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
401 | |||
402 | if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) | ||
403 | != ct->tuplehash[dir].tuple.src.ip) { | ||
404 | /* Enrico Scholz's passive FTP to partially RNAT'd ftp | ||
405 | server: it really wants us to connect to a | ||
406 | different IP address. Simply don't record it for | ||
407 | NAT. */ | ||
408 | DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", | ||
409 | array[0], array[1], array[2], array[3], | ||
410 | NIPQUAD(ct->tuplehash[dir].tuple.src.ip)); | ||
411 | |||
412 | /* Thanks to Cristiano Lincoln Mattos | ||
413 | <lincoln@cesar.org.br> for reporting this potential | ||
414 | problem (DMZ machines opening holes to internal | ||
415 | networks, or the packet filter itself). */ | ||
416 | if (!loose) { | ||
417 | ret = NF_ACCEPT; | ||
418 | goto out_put_expect; | ||
419 | } | ||
420 | exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16) | ||
421 | | (array[2] << 8) | array[3]); | ||
422 | } | ||
423 | |||
424 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
425 | exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]); | ||
426 | exp->tuple.src.u.tcp.port = 0; /* Don't care. */ | ||
427 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
428 | exp->mask = ((struct ip_conntrack_tuple) | ||
429 | { { htonl(0xFFFFFFFF), { 0 } }, | ||
430 | { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }}); | ||
431 | |||
432 | exp->expectfn = NULL; | ||
433 | exp->flags = 0; | ||
434 | |||
435 | /* Now, NAT might want to mangle the packet, and register the | ||
436 | * (possibly changed) expectation itself. */ | ||
437 | ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook); | ||
438 | if (ip_nat_ftp) | ||
439 | ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, | ||
440 | matchoff, matchlen, exp, &seq); | ||
441 | else { | ||
442 | /* Can't expect this? Best to drop packet now. */ | ||
443 | if (ip_conntrack_expect_related(exp) != 0) | ||
444 | ret = NF_DROP; | ||
445 | else | ||
446 | ret = NF_ACCEPT; | ||
447 | } | ||
448 | |||
449 | out_put_expect: | ||
450 | ip_conntrack_expect_put(exp); | ||
451 | |||
452 | out_update_nl: | ||
453 | /* Now if this ends in \n, update ftp info. Seq may have been | ||
454 | * adjusted by NAT code. */ | ||
455 | if (ends_in_nl) | ||
456 | update_nl_seq(seq, ct_ftp_info,dir, *pskb); | ||
457 | out: | ||
458 | spin_unlock_bh(&ip_ftp_lock); | ||
459 | return ret; | ||
460 | } | ||
461 | |||
462 | static struct ip_conntrack_helper ftp[MAX_PORTS]; | ||
463 | static char ftp_names[MAX_PORTS][sizeof("ftp-65535")]; | ||
464 | |||
465 | /* Not __exit: called from init() */ | ||
466 | static void ip_conntrack_ftp_fini(void) | ||
467 | { | ||
468 | int i; | ||
469 | for (i = 0; i < ports_c; i++) { | ||
470 | DEBUGP("ip_ct_ftp: unregistering helper for port %d\n", | ||
471 | ports[i]); | ||
472 | ip_conntrack_helper_unregister(&ftp[i]); | ||
473 | } | ||
474 | |||
475 | kfree(ftp_buffer); | ||
476 | } | ||
477 | |||
478 | static int __init ip_conntrack_ftp_init(void) | ||
479 | { | ||
480 | int i, ret; | ||
481 | char *tmpname; | ||
482 | |||
483 | ftp_buffer = kmalloc(65536, GFP_KERNEL); | ||
484 | if (!ftp_buffer) | ||
485 | return -ENOMEM; | ||
486 | |||
487 | if (ports_c == 0) | ||
488 | ports[ports_c++] = FTP_PORT; | ||
489 | |||
490 | for (i = 0; i < ports_c; i++) { | ||
491 | ftp[i].tuple.src.u.tcp.port = htons(ports[i]); | ||
492 | ftp[i].tuple.dst.protonum = IPPROTO_TCP; | ||
493 | ftp[i].mask.src.u.tcp.port = htons(0xFFFF); | ||
494 | ftp[i].mask.dst.protonum = 0xFF; | ||
495 | ftp[i].max_expected = 1; | ||
496 | ftp[i].timeout = 5 * 60; /* 5 minutes */ | ||
497 | ftp[i].me = THIS_MODULE; | ||
498 | ftp[i].help = help; | ||
499 | |||
500 | tmpname = &ftp_names[i][0]; | ||
501 | if (ports[i] == FTP_PORT) | ||
502 | sprintf(tmpname, "ftp"); | ||
503 | else | ||
504 | sprintf(tmpname, "ftp-%d", ports[i]); | ||
505 | ftp[i].name = tmpname; | ||
506 | |||
507 | DEBUGP("ip_ct_ftp: registering helper for port %d\n", | ||
508 | ports[i]); | ||
509 | ret = ip_conntrack_helper_register(&ftp[i]); | ||
510 | |||
511 | if (ret) { | ||
512 | ip_conntrack_ftp_fini(); | ||
513 | return ret; | ||
514 | } | ||
515 | } | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | module_init(ip_conntrack_ftp_init); | ||
520 | module_exit(ip_conntrack_ftp_fini); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c deleted file mode 100644 index 53eb365ccc7e..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ /dev/null | |||
@@ -1,1841 +0,0 @@ | |||
1 | /* | ||
2 | * H.323 connection tracking helper | ||
3 | * | ||
4 | * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> | ||
5 | * | ||
6 | * This source code is licensed under General Public License version 2. | ||
7 | * | ||
8 | * Based on the 'brute force' H.323 connection tracking module by | ||
9 | * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | ||
10 | * | ||
11 | * For more information, please see http://nath323.sourceforge.net/ | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <net/tcp.h> | ||
18 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
19 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
21 | #include <linux/netfilter_ipv4/ip_conntrack_tuple.h> | ||
22 | #include <linux/netfilter_ipv4/ip_conntrack_h323.h> | ||
23 | #include <linux/moduleparam.h> | ||
24 | #include <linux/ctype.h> | ||
25 | #include <linux/inet.h> | ||
26 | |||
27 | #if 0 | ||
28 | #define DEBUGP printk | ||
29 | #else | ||
30 | #define DEBUGP(format, args...) | ||
31 | #endif | ||
32 | |||
33 | /* Parameters */ | ||
34 | static unsigned int default_rrq_ttl = 300; | ||
35 | module_param(default_rrq_ttl, uint, 0600); | ||
36 | MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ"); | ||
37 | |||
38 | static int gkrouted_only = 1; | ||
39 | module_param(gkrouted_only, int, 0600); | ||
40 | MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper"); | ||
41 | |||
42 | static int callforward_filter = 1; | ||
43 | module_param(callforward_filter, bool, 0600); | ||
44 | MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " | ||
45 | "if both endpoints are on different sides " | ||
46 | "(determined by routing information)"); | ||
47 | |||
48 | /* Hooks for NAT */ | ||
49 | int (*set_h245_addr_hook) (struct sk_buff ** pskb, | ||
50 | unsigned char **data, int dataoff, | ||
51 | H245_TransportAddress * addr, | ||
52 | __be32 ip, u_int16_t port); | ||
53 | int (*set_h225_addr_hook) (struct sk_buff ** pskb, | ||
54 | unsigned char **data, int dataoff, | ||
55 | TransportAddress * addr, | ||
56 | __be32 ip, u_int16_t port); | ||
57 | int (*set_sig_addr_hook) (struct sk_buff ** pskb, | ||
58 | struct ip_conntrack * ct, | ||
59 | enum ip_conntrack_info ctinfo, | ||
60 | unsigned char **data, | ||
61 | TransportAddress * addr, int count); | ||
62 | int (*set_ras_addr_hook) (struct sk_buff ** pskb, | ||
63 | struct ip_conntrack * ct, | ||
64 | enum ip_conntrack_info ctinfo, | ||
65 | unsigned char **data, | ||
66 | TransportAddress * addr, int count); | ||
67 | int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb, | ||
68 | struct ip_conntrack * ct, | ||
69 | enum ip_conntrack_info ctinfo, | ||
70 | unsigned char **data, int dataoff, | ||
71 | H245_TransportAddress * addr, | ||
72 | u_int16_t port, u_int16_t rtp_port, | ||
73 | struct ip_conntrack_expect * rtp_exp, | ||
74 | struct ip_conntrack_expect * rtcp_exp); | ||
75 | int (*nat_t120_hook) (struct sk_buff ** pskb, | ||
76 | struct ip_conntrack * ct, | ||
77 | enum ip_conntrack_info ctinfo, | ||
78 | unsigned char **data, int dataoff, | ||
79 | H245_TransportAddress * addr, u_int16_t port, | ||
80 | struct ip_conntrack_expect * exp); | ||
81 | int (*nat_h245_hook) (struct sk_buff ** pskb, | ||
82 | struct ip_conntrack * ct, | ||
83 | enum ip_conntrack_info ctinfo, | ||
84 | unsigned char **data, int dataoff, | ||
85 | TransportAddress * addr, u_int16_t port, | ||
86 | struct ip_conntrack_expect * exp); | ||
87 | int (*nat_callforwarding_hook) (struct sk_buff ** pskb, | ||
88 | struct ip_conntrack * ct, | ||
89 | enum ip_conntrack_info ctinfo, | ||
90 | unsigned char **data, int dataoff, | ||
91 | TransportAddress * addr, u_int16_t port, | ||
92 | struct ip_conntrack_expect * exp); | ||
93 | int (*nat_q931_hook) (struct sk_buff ** pskb, | ||
94 | struct ip_conntrack * ct, | ||
95 | enum ip_conntrack_info ctinfo, | ||
96 | unsigned char **data, TransportAddress * addr, int idx, | ||
97 | u_int16_t port, struct ip_conntrack_expect * exp); | ||
98 | |||
99 | |||
100 | static DEFINE_SPINLOCK(ip_h323_lock); | ||
101 | static char *h323_buffer; | ||
102 | |||
103 | /****************************************************************************/ | ||
104 | static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
105 | enum ip_conntrack_info ctinfo, | ||
106 | unsigned char **data, int *datalen, int *dataoff) | ||
107 | { | ||
108 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
109 | int dir = CTINFO2DIR(ctinfo); | ||
110 | struct tcphdr _tcph, *th; | ||
111 | int tcpdatalen; | ||
112 | int tcpdataoff; | ||
113 | unsigned char *tpkt; | ||
114 | int tpktlen; | ||
115 | int tpktoff; | ||
116 | |||
117 | /* Get TCP header */ | ||
118 | th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, | ||
119 | sizeof(_tcph), &_tcph); | ||
120 | if (th == NULL) | ||
121 | return 0; | ||
122 | |||
123 | /* Get TCP data offset */ | ||
124 | tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4; | ||
125 | |||
126 | /* Get TCP data length */ | ||
127 | tcpdatalen = (*pskb)->len - tcpdataoff; | ||
128 | if (tcpdatalen <= 0) /* No TCP data */ | ||
129 | goto clear_out; | ||
130 | |||
131 | if (*data == NULL) { /* first TPKT */ | ||
132 | /* Get first TPKT pointer */ | ||
133 | tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, | ||
134 | h323_buffer); | ||
135 | BUG_ON(tpkt == NULL); | ||
136 | |||
137 | /* Validate TPKT identifier */ | ||
138 | if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) { | ||
139 | /* Netmeeting sends TPKT header and data separately */ | ||
140 | if (info->tpkt_len[dir] > 0) { | ||
141 | DEBUGP("ip_ct_h323: previous packet " | ||
142 | "indicated separate TPKT data of %hu " | ||
143 | "bytes\n", info->tpkt_len[dir]); | ||
144 | if (info->tpkt_len[dir] <= tcpdatalen) { | ||
145 | /* Yes, there was a TPKT header | ||
146 | * received */ | ||
147 | *data = tpkt; | ||
148 | *datalen = info->tpkt_len[dir]; | ||
149 | *dataoff = 0; | ||
150 | goto out; | ||
151 | } | ||
152 | |||
153 | /* Fragmented TPKT */ | ||
154 | if (net_ratelimit()) | ||
155 | printk("ip_ct_h323: " | ||
156 | "fragmented TPKT\n"); | ||
157 | goto clear_out; | ||
158 | } | ||
159 | |||
160 | /* It is not even a TPKT */ | ||
161 | return 0; | ||
162 | } | ||
163 | tpktoff = 0; | ||
164 | } else { /* Next TPKT */ | ||
165 | tpktoff = *dataoff + *datalen; | ||
166 | tcpdatalen -= tpktoff; | ||
167 | if (tcpdatalen <= 4) /* No more TPKT */ | ||
168 | goto clear_out; | ||
169 | tpkt = *data + *datalen; | ||
170 | |||
171 | /* Validate TPKT identifier */ | ||
172 | if (tpkt[0] != 0x03 || tpkt[1] != 0) | ||
173 | goto clear_out; | ||
174 | } | ||
175 | |||
176 | /* Validate TPKT length */ | ||
177 | tpktlen = tpkt[2] * 256 + tpkt[3]; | ||
178 | if (tpktlen < 4) | ||
179 | goto clear_out; | ||
180 | if (tpktlen > tcpdatalen) { | ||
181 | if (tcpdatalen == 4) { /* Separate TPKT header */ | ||
182 | /* Netmeeting sends TPKT header and data separately */ | ||
183 | DEBUGP("ip_ct_h323: separate TPKT header indicates " | ||
184 | "there will be TPKT data of %hu bytes\n", | ||
185 | tpktlen - 4); | ||
186 | info->tpkt_len[dir] = tpktlen - 4; | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | if (net_ratelimit()) | ||
191 | printk("ip_ct_h323: incomplete TPKT (fragmented?)\n"); | ||
192 | goto clear_out; | ||
193 | } | ||
194 | |||
195 | /* This is the encapsulated data */ | ||
196 | *data = tpkt + 4; | ||
197 | *datalen = tpktlen - 4; | ||
198 | *dataoff = tpktoff + 4; | ||
199 | |||
200 | out: | ||
201 | /* Clear TPKT length */ | ||
202 | info->tpkt_len[dir] = 0; | ||
203 | return 1; | ||
204 | |||
205 | clear_out: | ||
206 | info->tpkt_len[dir] = 0; | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | /****************************************************************************/ | ||
211 | static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr, | ||
212 | __be32 * ip, u_int16_t * port) | ||
213 | { | ||
214 | unsigned char *p; | ||
215 | |||
216 | if (addr->choice != eH245_TransportAddress_unicastAddress || | ||
217 | addr->unicastAddress.choice != eUnicastAddress_iPAddress) | ||
218 | return 0; | ||
219 | |||
220 | p = data + addr->unicastAddress.iPAddress.network; | ||
221 | *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3])); | ||
222 | *port = (p[4] << 8) | (p[5]); | ||
223 | |||
224 | return 1; | ||
225 | } | ||
226 | |||
227 | /****************************************************************************/ | ||
228 | static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
229 | enum ip_conntrack_info ctinfo, | ||
230 | unsigned char **data, int dataoff, | ||
231 | H245_TransportAddress * addr) | ||
232 | { | ||
233 | int dir = CTINFO2DIR(ctinfo); | ||
234 | int ret = 0; | ||
235 | __be32 ip; | ||
236 | u_int16_t port; | ||
237 | u_int16_t rtp_port; | ||
238 | struct ip_conntrack_expect *rtp_exp; | ||
239 | struct ip_conntrack_expect *rtcp_exp; | ||
240 | typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; | ||
241 | |||
242 | /* Read RTP or RTCP address */ | ||
243 | if (!get_h245_addr(*data, addr, &ip, &port) || | ||
244 | ip != ct->tuplehash[dir].tuple.src.ip || port == 0) | ||
245 | return 0; | ||
246 | |||
247 | /* RTP port is even */ | ||
248 | rtp_port = port & (~1); | ||
249 | |||
250 | /* Create expect for RTP */ | ||
251 | if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
252 | return -1; | ||
253 | rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
254 | rtp_exp->tuple.src.u.udp.port = 0; | ||
255 | rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
256 | rtp_exp->tuple.dst.u.udp.port = htons(rtp_port); | ||
257 | rtp_exp->tuple.dst.protonum = IPPROTO_UDP; | ||
258 | rtp_exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
259 | rtp_exp->mask.src.u.udp.port = 0; | ||
260 | rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
261 | rtp_exp->mask.dst.u.udp.port = htons(0xFFFF); | ||
262 | rtp_exp->mask.dst.protonum = 0xFF; | ||
263 | rtp_exp->flags = 0; | ||
264 | |||
265 | /* Create expect for RTCP */ | ||
266 | if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) { | ||
267 | ip_conntrack_expect_put(rtp_exp); | ||
268 | return -1; | ||
269 | } | ||
270 | rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
271 | rtcp_exp->tuple.src.u.udp.port = 0; | ||
272 | rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
273 | rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1); | ||
274 | rtcp_exp->tuple.dst.protonum = IPPROTO_UDP; | ||
275 | rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
276 | rtcp_exp->mask.src.u.udp.port = 0; | ||
277 | rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
278 | rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF); | ||
279 | rtcp_exp->mask.dst.protonum = 0xFF; | ||
280 | rtcp_exp->flags = 0; | ||
281 | |||
282 | if (ct->tuplehash[dir].tuple.src.ip != | ||
283 | ct->tuplehash[!dir].tuple.dst.ip && | ||
284 | (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) { | ||
285 | /* NAT needed */ | ||
286 | ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, | ||
287 | addr, port, rtp_port, rtp_exp, rtcp_exp); | ||
288 | } else { /* Conntrack only */ | ||
289 | rtp_exp->expectfn = NULL; | ||
290 | rtcp_exp->expectfn = NULL; | ||
291 | |||
292 | if (ip_conntrack_expect_related(rtp_exp) == 0) { | ||
293 | if (ip_conntrack_expect_related(rtcp_exp) == 0) { | ||
294 | DEBUGP("ip_ct_h323: expect RTP " | ||
295 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
296 | NIPQUAD(rtp_exp->tuple.src.ip), | ||
297 | ntohs(rtp_exp->tuple.src.u.udp.port), | ||
298 | NIPQUAD(rtp_exp->tuple.dst.ip), | ||
299 | ntohs(rtp_exp->tuple.dst.u.udp.port)); | ||
300 | DEBUGP("ip_ct_h323: expect RTCP " | ||
301 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
302 | NIPQUAD(rtcp_exp->tuple.src.ip), | ||
303 | ntohs(rtcp_exp->tuple.src.u.udp.port), | ||
304 | NIPQUAD(rtcp_exp->tuple.dst.ip), | ||
305 | ntohs(rtcp_exp->tuple.dst.u.udp.port)); | ||
306 | } else { | ||
307 | ip_conntrack_unexpect_related(rtp_exp); | ||
308 | ret = -1; | ||
309 | } | ||
310 | } else | ||
311 | ret = -1; | ||
312 | } | ||
313 | |||
314 | ip_conntrack_expect_put(rtp_exp); | ||
315 | ip_conntrack_expect_put(rtcp_exp); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | /****************************************************************************/ | ||
321 | static int expect_t120(struct sk_buff **pskb, | ||
322 | struct ip_conntrack *ct, | ||
323 | enum ip_conntrack_info ctinfo, | ||
324 | unsigned char **data, int dataoff, | ||
325 | H245_TransportAddress * addr) | ||
326 | { | ||
327 | int dir = CTINFO2DIR(ctinfo); | ||
328 | int ret = 0; | ||
329 | __be32 ip; | ||
330 | u_int16_t port; | ||
331 | struct ip_conntrack_expect *exp = NULL; | ||
332 | typeof(nat_t120_hook) nat_t120; | ||
333 | |||
334 | /* Read T.120 address */ | ||
335 | if (!get_h245_addr(*data, addr, &ip, &port) || | ||
336 | ip != ct->tuplehash[dir].tuple.src.ip || port == 0) | ||
337 | return 0; | ||
338 | |||
339 | /* Create expect for T.120 connections */ | ||
340 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
341 | return -1; | ||
342 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
343 | exp->tuple.src.u.tcp.port = 0; | ||
344 | exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
345 | exp->tuple.dst.u.tcp.port = htons(port); | ||
346 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
347 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
348 | exp->mask.src.u.tcp.port = 0; | ||
349 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
350 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
351 | exp->mask.dst.protonum = 0xFF; | ||
352 | exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */ | ||
353 | |||
354 | if (ct->tuplehash[dir].tuple.src.ip != | ||
355 | ct->tuplehash[!dir].tuple.dst.ip && | ||
356 | (nat_t120 = rcu_dereference(nat_t120_hook))) { | ||
357 | /* NAT needed */ | ||
358 | ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr, | ||
359 | port, exp); | ||
360 | } else { /* Conntrack only */ | ||
361 | exp->expectfn = NULL; | ||
362 | if (ip_conntrack_expect_related(exp) == 0) { | ||
363 | DEBUGP("ip_ct_h323: expect T.120 " | ||
364 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
365 | NIPQUAD(exp->tuple.src.ip), | ||
366 | ntohs(exp->tuple.src.u.tcp.port), | ||
367 | NIPQUAD(exp->tuple.dst.ip), | ||
368 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
369 | } else | ||
370 | ret = -1; | ||
371 | } | ||
372 | |||
373 | ip_conntrack_expect_put(exp); | ||
374 | |||
375 | return ret; | ||
376 | } | ||
377 | |||
378 | /****************************************************************************/ | ||
379 | static int process_h245_channel(struct sk_buff **pskb, | ||
380 | struct ip_conntrack *ct, | ||
381 | enum ip_conntrack_info ctinfo, | ||
382 | unsigned char **data, int dataoff, | ||
383 | H2250LogicalChannelParameters * channel) | ||
384 | { | ||
385 | int ret; | ||
386 | |||
387 | if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { | ||
388 | /* RTP */ | ||
389 | ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, | ||
390 | &channel->mediaChannel); | ||
391 | if (ret < 0) | ||
392 | return -1; | ||
393 | } | ||
394 | |||
395 | if (channel-> | ||
396 | options & eH2250LogicalChannelParameters_mediaControlChannel) { | ||
397 | /* RTCP */ | ||
398 | ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, | ||
399 | &channel->mediaControlChannel); | ||
400 | if (ret < 0) | ||
401 | return -1; | ||
402 | } | ||
403 | |||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | /****************************************************************************/ | ||
408 | static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
409 | enum ip_conntrack_info ctinfo, | ||
410 | unsigned char **data, int dataoff, | ||
411 | OpenLogicalChannel * olc) | ||
412 | { | ||
413 | int ret; | ||
414 | |||
415 | DEBUGP("ip_ct_h323: OpenLogicalChannel\n"); | ||
416 | |||
417 | if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == | ||
418 | eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) | ||
419 | { | ||
420 | ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, | ||
421 | &olc-> | ||
422 | forwardLogicalChannelParameters. | ||
423 | multiplexParameters. | ||
424 | h2250LogicalChannelParameters); | ||
425 | if (ret < 0) | ||
426 | return -1; | ||
427 | } | ||
428 | |||
429 | if ((olc->options & | ||
430 | eOpenLogicalChannel_reverseLogicalChannelParameters) && | ||
431 | (olc->reverseLogicalChannelParameters.options & | ||
432 | eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters) | ||
433 | && (olc->reverseLogicalChannelParameters.multiplexParameters. | ||
434 | choice == | ||
435 | eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) | ||
436 | { | ||
437 | ret = | ||
438 | process_h245_channel(pskb, ct, ctinfo, data, dataoff, | ||
439 | &olc-> | ||
440 | reverseLogicalChannelParameters. | ||
441 | multiplexParameters. | ||
442 | h2250LogicalChannelParameters); | ||
443 | if (ret < 0) | ||
444 | return -1; | ||
445 | } | ||
446 | |||
447 | if ((olc->options & eOpenLogicalChannel_separateStack) && | ||
448 | olc->forwardLogicalChannelParameters.dataType.choice == | ||
449 | eDataType_data && | ||
450 | olc->forwardLogicalChannelParameters.dataType.data.application. | ||
451 | choice == eDataApplicationCapability_application_t120 && | ||
452 | olc->forwardLogicalChannelParameters.dataType.data.application. | ||
453 | t120.choice == eDataProtocolCapability_separateLANStack && | ||
454 | olc->separateStack.networkAddress.choice == | ||
455 | eNetworkAccessParameters_networkAddress_localAreaAddress) { | ||
456 | ret = expect_t120(pskb, ct, ctinfo, data, dataoff, | ||
457 | &olc->separateStack.networkAddress. | ||
458 | localAreaAddress); | ||
459 | if (ret < 0) | ||
460 | return -1; | ||
461 | } | ||
462 | |||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | /****************************************************************************/ | ||
467 | static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
468 | enum ip_conntrack_info ctinfo, | ||
469 | unsigned char **data, int dataoff, | ||
470 | OpenLogicalChannelAck * olca) | ||
471 | { | ||
472 | H2250LogicalChannelAckParameters *ack; | ||
473 | int ret; | ||
474 | |||
475 | DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n"); | ||
476 | |||
477 | if ((olca->options & | ||
478 | eOpenLogicalChannelAck_reverseLogicalChannelParameters) && | ||
479 | (olca->reverseLogicalChannelParameters.options & | ||
480 | eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters) | ||
481 | && (olca->reverseLogicalChannelParameters.multiplexParameters. | ||
482 | choice == | ||
483 | eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) | ||
484 | { | ||
485 | ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, | ||
486 | &olca-> | ||
487 | reverseLogicalChannelParameters. | ||
488 | multiplexParameters. | ||
489 | h2250LogicalChannelParameters); | ||
490 | if (ret < 0) | ||
491 | return -1; | ||
492 | } | ||
493 | |||
494 | if ((olca->options & | ||
495 | eOpenLogicalChannelAck_forwardMultiplexAckParameters) && | ||
496 | (olca->forwardMultiplexAckParameters.choice == | ||
497 | eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters)) | ||
498 | { | ||
499 | ack = &olca->forwardMultiplexAckParameters. | ||
500 | h2250LogicalChannelAckParameters; | ||
501 | if (ack->options & | ||
502 | eH2250LogicalChannelAckParameters_mediaChannel) { | ||
503 | /* RTP */ | ||
504 | ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, | ||
505 | &ack->mediaChannel); | ||
506 | if (ret < 0) | ||
507 | return -1; | ||
508 | } | ||
509 | |||
510 | if (ack->options & | ||
511 | eH2250LogicalChannelAckParameters_mediaControlChannel) { | ||
512 | /* RTCP */ | ||
513 | ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, | ||
514 | &ack->mediaControlChannel); | ||
515 | if (ret < 0) | ||
516 | return -1; | ||
517 | } | ||
518 | } | ||
519 | |||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /****************************************************************************/ | ||
524 | static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
525 | enum ip_conntrack_info ctinfo, | ||
526 | unsigned char **data, int dataoff, | ||
527 | MultimediaSystemControlMessage * mscm) | ||
528 | { | ||
529 | switch (mscm->choice) { | ||
530 | case eMultimediaSystemControlMessage_request: | ||
531 | if (mscm->request.choice == | ||
532 | eRequestMessage_openLogicalChannel) { | ||
533 | return process_olc(pskb, ct, ctinfo, data, dataoff, | ||
534 | &mscm->request.openLogicalChannel); | ||
535 | } | ||
536 | DEBUGP("ip_ct_h323: H.245 Request %d\n", | ||
537 | mscm->request.choice); | ||
538 | break; | ||
539 | case eMultimediaSystemControlMessage_response: | ||
540 | if (mscm->response.choice == | ||
541 | eResponseMessage_openLogicalChannelAck) { | ||
542 | return process_olca(pskb, ct, ctinfo, data, dataoff, | ||
543 | &mscm->response. | ||
544 | openLogicalChannelAck); | ||
545 | } | ||
546 | DEBUGP("ip_ct_h323: H.245 Response %d\n", | ||
547 | mscm->response.choice); | ||
548 | break; | ||
549 | default: | ||
550 | DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice); | ||
551 | break; | ||
552 | } | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | /****************************************************************************/ | ||
558 | static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
559 | enum ip_conntrack_info ctinfo) | ||
560 | { | ||
561 | static MultimediaSystemControlMessage mscm; | ||
562 | unsigned char *data = NULL; | ||
563 | int datalen; | ||
564 | int dataoff; | ||
565 | int ret; | ||
566 | |||
567 | /* Until there's been traffic both ways, don't look in packets. */ | ||
568 | if (ctinfo != IP_CT_ESTABLISHED | ||
569 | && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { | ||
570 | return NF_ACCEPT; | ||
571 | } | ||
572 | DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len); | ||
573 | |||
574 | spin_lock_bh(&ip_h323_lock); | ||
575 | |||
576 | /* Process each TPKT */ | ||
577 | while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) { | ||
578 | DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n", | ||
579 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
580 | NIPQUAD((*pskb)->nh.iph->daddr), datalen); | ||
581 | |||
582 | /* Decode H.245 signal */ | ||
583 | ret = DecodeMultimediaSystemControlMessage(data, datalen, | ||
584 | &mscm); | ||
585 | if (ret < 0) { | ||
586 | if (net_ratelimit()) | ||
587 | printk("ip_ct_h245: decoding error: %s\n", | ||
588 | ret == H323_ERROR_BOUND ? | ||
589 | "out of bound" : "out of range"); | ||
590 | /* We don't drop when decoding error */ | ||
591 | break; | ||
592 | } | ||
593 | |||
594 | /* Process H.245 signal */ | ||
595 | if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) | ||
596 | goto drop; | ||
597 | } | ||
598 | |||
599 | spin_unlock_bh(&ip_h323_lock); | ||
600 | return NF_ACCEPT; | ||
601 | |||
602 | drop: | ||
603 | spin_unlock_bh(&ip_h323_lock); | ||
604 | if (net_ratelimit()) | ||
605 | printk("ip_ct_h245: packet dropped\n"); | ||
606 | return NF_DROP; | ||
607 | } | ||
608 | |||
609 | /****************************************************************************/ | ||
610 | static struct ip_conntrack_helper ip_conntrack_helper_h245 = { | ||
611 | .name = "H.245", | ||
612 | .me = THIS_MODULE, | ||
613 | .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ , | ||
614 | .timeout = 240, | ||
615 | .tuple = {.dst = {.protonum = IPPROTO_TCP}}, | ||
616 | .mask = {.src = {.u = {0xFFFF}}, | ||
617 | .dst = {.protonum = 0xFF}}, | ||
618 | .help = h245_help | ||
619 | }; | ||
620 | |||
621 | /****************************************************************************/ | ||
622 | void ip_conntrack_h245_expect(struct ip_conntrack *new, | ||
623 | struct ip_conntrack_expect *this) | ||
624 | { | ||
625 | write_lock_bh(&ip_conntrack_lock); | ||
626 | new->helper = &ip_conntrack_helper_h245; | ||
627 | write_unlock_bh(&ip_conntrack_lock); | ||
628 | } | ||
629 | |||
630 | /****************************************************************************/ | ||
631 | int get_h225_addr(unsigned char *data, TransportAddress * addr, | ||
632 | __be32 * ip, u_int16_t * port) | ||
633 | { | ||
634 | unsigned char *p; | ||
635 | |||
636 | if (addr->choice != eTransportAddress_ipAddress) | ||
637 | return 0; | ||
638 | |||
639 | p = data + addr->ipAddress.ip; | ||
640 | *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3])); | ||
641 | *port = (p[4] << 8) | (p[5]); | ||
642 | |||
643 | return 1; | ||
644 | } | ||
645 | |||
646 | /****************************************************************************/ | ||
647 | static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
648 | enum ip_conntrack_info ctinfo, | ||
649 | unsigned char **data, int dataoff, | ||
650 | TransportAddress * addr) | ||
651 | { | ||
652 | int dir = CTINFO2DIR(ctinfo); | ||
653 | int ret = 0; | ||
654 | __be32 ip; | ||
655 | u_int16_t port; | ||
656 | struct ip_conntrack_expect *exp = NULL; | ||
657 | typeof(nat_h245_hook) nat_h245; | ||
658 | |||
659 | /* Read h245Address */ | ||
660 | if (!get_h225_addr(*data, addr, &ip, &port) || | ||
661 | ip != ct->tuplehash[dir].tuple.src.ip || port == 0) | ||
662 | return 0; | ||
663 | |||
664 | /* Create expect for h245 connection */ | ||
665 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
666 | return -1; | ||
667 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
668 | exp->tuple.src.u.tcp.port = 0; | ||
669 | exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
670 | exp->tuple.dst.u.tcp.port = htons(port); | ||
671 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
672 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
673 | exp->mask.src.u.tcp.port = 0; | ||
674 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
675 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
676 | exp->mask.dst.protonum = 0xFF; | ||
677 | exp->flags = 0; | ||
678 | |||
679 | if (ct->tuplehash[dir].tuple.src.ip != | ||
680 | ct->tuplehash[!dir].tuple.dst.ip && | ||
681 | (nat_h245 = rcu_dereference(nat_h245_hook))) { | ||
682 | /* NAT needed */ | ||
683 | ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr, | ||
684 | port, exp); | ||
685 | } else { /* Conntrack only */ | ||
686 | exp->expectfn = ip_conntrack_h245_expect; | ||
687 | |||
688 | if (ip_conntrack_expect_related(exp) == 0) { | ||
689 | DEBUGP("ip_ct_q931: expect H.245 " | ||
690 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
691 | NIPQUAD(exp->tuple.src.ip), | ||
692 | ntohs(exp->tuple.src.u.tcp.port), | ||
693 | NIPQUAD(exp->tuple.dst.ip), | ||
694 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
695 | } else | ||
696 | ret = -1; | ||
697 | } | ||
698 | |||
699 | ip_conntrack_expect_put(exp); | ||
700 | |||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | /* Forwarding declaration */ | ||
705 | void ip_conntrack_q931_expect(struct ip_conntrack *new, | ||
706 | struct ip_conntrack_expect *this); | ||
707 | |||
708 | /****************************************************************************/ | ||
709 | static int expect_callforwarding(struct sk_buff **pskb, | ||
710 | struct ip_conntrack *ct, | ||
711 | enum ip_conntrack_info ctinfo, | ||
712 | unsigned char **data, int dataoff, | ||
713 | TransportAddress * addr) | ||
714 | { | ||
715 | int dir = CTINFO2DIR(ctinfo); | ||
716 | int ret = 0; | ||
717 | __be32 ip; | ||
718 | u_int16_t port; | ||
719 | struct ip_conntrack_expect *exp = NULL; | ||
720 | typeof(nat_callforwarding_hook) nat_callforwarding; | ||
721 | |||
722 | /* Read alternativeAddress */ | ||
723 | if (!get_h225_addr(*data, addr, &ip, &port) || port == 0) | ||
724 | return 0; | ||
725 | |||
726 | /* If the calling party is on the same side of the forward-to party, | ||
727 | * we don't need to track the second call */ | ||
728 | if (callforward_filter) { | ||
729 | struct rtable *rt1, *rt2; | ||
730 | struct flowi fl1 = { | ||
731 | .fl4_dst = ip, | ||
732 | }; | ||
733 | struct flowi fl2 = { | ||
734 | .fl4_dst = ct->tuplehash[!dir].tuple.src.ip, | ||
735 | }; | ||
736 | |||
737 | if (ip_route_output_key(&rt1, &fl1) == 0) { | ||
738 | if (ip_route_output_key(&rt2, &fl2) == 0) { | ||
739 | if (rt1->rt_gateway == rt2->rt_gateway && | ||
740 | rt1->u.dst.dev == rt2->u.dst.dev) | ||
741 | ret = 1; | ||
742 | dst_release(&rt2->u.dst); | ||
743 | } | ||
744 | dst_release(&rt1->u.dst); | ||
745 | } | ||
746 | if (ret) { | ||
747 | DEBUGP("ip_ct_q931: Call Forwarding not tracked\n"); | ||
748 | return 0; | ||
749 | } | ||
750 | } | ||
751 | |||
752 | /* Create expect for the second call leg */ | ||
753 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
754 | return -1; | ||
755 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
756 | exp->tuple.src.u.tcp.port = 0; | ||
757 | exp->tuple.dst.ip = ip; | ||
758 | exp->tuple.dst.u.tcp.port = htons(port); | ||
759 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
760 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
761 | exp->mask.src.u.tcp.port = 0; | ||
762 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
763 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
764 | exp->mask.dst.protonum = 0xFF; | ||
765 | exp->flags = 0; | ||
766 | |||
767 | if (ct->tuplehash[dir].tuple.src.ip != | ||
768 | ct->tuplehash[!dir].tuple.dst.ip && | ||
769 | (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) { | ||
770 | /* Need NAT */ | ||
771 | ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, | ||
772 | addr, port, exp); | ||
773 | } else { /* Conntrack only */ | ||
774 | exp->expectfn = ip_conntrack_q931_expect; | ||
775 | |||
776 | if (ip_conntrack_expect_related(exp) == 0) { | ||
777 | DEBUGP("ip_ct_q931: expect Call Forwarding " | ||
778 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
779 | NIPQUAD(exp->tuple.src.ip), | ||
780 | ntohs(exp->tuple.src.u.tcp.port), | ||
781 | NIPQUAD(exp->tuple.dst.ip), | ||
782 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
783 | } else | ||
784 | ret = -1; | ||
785 | } | ||
786 | |||
787 | ip_conntrack_expect_put(exp); | ||
788 | |||
789 | return ret; | ||
790 | } | ||
791 | |||
792 | /****************************************************************************/ | ||
793 | static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
794 | enum ip_conntrack_info ctinfo, | ||
795 | unsigned char **data, int dataoff, | ||
796 | Setup_UUIE * setup) | ||
797 | { | ||
798 | int dir = CTINFO2DIR(ctinfo); | ||
799 | int ret; | ||
800 | int i; | ||
801 | __be32 ip; | ||
802 | u_int16_t port; | ||
803 | typeof(set_h225_addr_hook) set_h225_addr; | ||
804 | |||
805 | DEBUGP("ip_ct_q931: Setup\n"); | ||
806 | |||
807 | if (setup->options & eSetup_UUIE_h245Address) { | ||
808 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
809 | &setup->h245Address); | ||
810 | if (ret < 0) | ||
811 | return -1; | ||
812 | } | ||
813 | |||
814 | set_h225_addr = rcu_dereference(set_h225_addr_hook); | ||
815 | |||
816 | if ((setup->options & eSetup_UUIE_destCallSignalAddress) && | ||
817 | (set_h225_addr) && | ||
818 | get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) && | ||
819 | ip != ct->tuplehash[!dir].tuple.src.ip) { | ||
820 | DEBUGP("ip_ct_q931: set destCallSignalAddress " | ||
821 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
822 | NIPQUAD(ip), port, | ||
823 | NIPQUAD(ct->tuplehash[!dir].tuple.src.ip), | ||
824 | ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); | ||
825 | ret = set_h225_addr(pskb, data, dataoff, | ||
826 | &setup->destCallSignalAddress, | ||
827 | ct->tuplehash[!dir].tuple.src.ip, | ||
828 | ntohs(ct->tuplehash[!dir].tuple.src. | ||
829 | u.tcp.port)); | ||
830 | if (ret < 0) | ||
831 | return -1; | ||
832 | } | ||
833 | |||
834 | if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && | ||
835 | (set_h225_addr) && | ||
836 | get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port) | ||
837 | && ip != ct->tuplehash[!dir].tuple.dst.ip) { | ||
838 | DEBUGP("ip_ct_q931: set sourceCallSignalAddress " | ||
839 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
840 | NIPQUAD(ip), port, | ||
841 | NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip), | ||
842 | ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); | ||
843 | ret = set_h225_addr(pskb, data, dataoff, | ||
844 | &setup->sourceCallSignalAddress, | ||
845 | ct->tuplehash[!dir].tuple.dst.ip, | ||
846 | ntohs(ct->tuplehash[!dir].tuple.dst. | ||
847 | u.tcp.port)); | ||
848 | if (ret < 0) | ||
849 | return -1; | ||
850 | } | ||
851 | |||
852 | if (setup->options & eSetup_UUIE_fastStart) { | ||
853 | for (i = 0; i < setup->fastStart.count; i++) { | ||
854 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
855 | &setup->fastStart.item[i]); | ||
856 | if (ret < 0) | ||
857 | return -1; | ||
858 | } | ||
859 | } | ||
860 | |||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | /****************************************************************************/ | ||
865 | static int process_callproceeding(struct sk_buff **pskb, | ||
866 | struct ip_conntrack *ct, | ||
867 | enum ip_conntrack_info ctinfo, | ||
868 | unsigned char **data, int dataoff, | ||
869 | CallProceeding_UUIE * callproc) | ||
870 | { | ||
871 | int ret; | ||
872 | int i; | ||
873 | |||
874 | DEBUGP("ip_ct_q931: CallProceeding\n"); | ||
875 | |||
876 | if (callproc->options & eCallProceeding_UUIE_h245Address) { | ||
877 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
878 | &callproc->h245Address); | ||
879 | if (ret < 0) | ||
880 | return -1; | ||
881 | } | ||
882 | |||
883 | if (callproc->options & eCallProceeding_UUIE_fastStart) { | ||
884 | for (i = 0; i < callproc->fastStart.count; i++) { | ||
885 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
886 | &callproc->fastStart.item[i]); | ||
887 | if (ret < 0) | ||
888 | return -1; | ||
889 | } | ||
890 | } | ||
891 | |||
892 | return 0; | ||
893 | } | ||
894 | |||
895 | /****************************************************************************/ | ||
896 | static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
897 | enum ip_conntrack_info ctinfo, | ||
898 | unsigned char **data, int dataoff, | ||
899 | Connect_UUIE * connect) | ||
900 | { | ||
901 | int ret; | ||
902 | int i; | ||
903 | |||
904 | DEBUGP("ip_ct_q931: Connect\n"); | ||
905 | |||
906 | if (connect->options & eConnect_UUIE_h245Address) { | ||
907 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
908 | &connect->h245Address); | ||
909 | if (ret < 0) | ||
910 | return -1; | ||
911 | } | ||
912 | |||
913 | if (connect->options & eConnect_UUIE_fastStart) { | ||
914 | for (i = 0; i < connect->fastStart.count; i++) { | ||
915 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
916 | &connect->fastStart.item[i]); | ||
917 | if (ret < 0) | ||
918 | return -1; | ||
919 | } | ||
920 | } | ||
921 | |||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | /****************************************************************************/ | ||
926 | static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
927 | enum ip_conntrack_info ctinfo, | ||
928 | unsigned char **data, int dataoff, | ||
929 | Alerting_UUIE * alert) | ||
930 | { | ||
931 | int ret; | ||
932 | int i; | ||
933 | |||
934 | DEBUGP("ip_ct_q931: Alerting\n"); | ||
935 | |||
936 | if (alert->options & eAlerting_UUIE_h245Address) { | ||
937 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
938 | &alert->h245Address); | ||
939 | if (ret < 0) | ||
940 | return -1; | ||
941 | } | ||
942 | |||
943 | if (alert->options & eAlerting_UUIE_fastStart) { | ||
944 | for (i = 0; i < alert->fastStart.count; i++) { | ||
945 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
946 | &alert->fastStart.item[i]); | ||
947 | if (ret < 0) | ||
948 | return -1; | ||
949 | } | ||
950 | } | ||
951 | |||
952 | return 0; | ||
953 | } | ||
954 | |||
955 | /****************************************************************************/ | ||
956 | static int process_information(struct sk_buff **pskb, | ||
957 | struct ip_conntrack *ct, | ||
958 | enum ip_conntrack_info ctinfo, | ||
959 | unsigned char **data, int dataoff, | ||
960 | Information_UUIE * info) | ||
961 | { | ||
962 | int ret; | ||
963 | int i; | ||
964 | |||
965 | DEBUGP("ip_ct_q931: Information\n"); | ||
966 | |||
967 | if (info->options & eInformation_UUIE_fastStart) { | ||
968 | for (i = 0; i < info->fastStart.count; i++) { | ||
969 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
970 | &info->fastStart.item[i]); | ||
971 | if (ret < 0) | ||
972 | return -1; | ||
973 | } | ||
974 | } | ||
975 | |||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | /****************************************************************************/ | ||
980 | static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
981 | enum ip_conntrack_info ctinfo, | ||
982 | unsigned char **data, int dataoff, | ||
983 | Facility_UUIE * facility) | ||
984 | { | ||
985 | int ret; | ||
986 | int i; | ||
987 | |||
988 | DEBUGP("ip_ct_q931: Facility\n"); | ||
989 | |||
990 | if (facility->reason.choice == eFacilityReason_callForwarded) { | ||
991 | if (facility->options & eFacility_UUIE_alternativeAddress) | ||
992 | return expect_callforwarding(pskb, ct, ctinfo, data, | ||
993 | dataoff, | ||
994 | &facility-> | ||
995 | alternativeAddress); | ||
996 | return 0; | ||
997 | } | ||
998 | |||
999 | if (facility->options & eFacility_UUIE_h245Address) { | ||
1000 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
1001 | &facility->h245Address); | ||
1002 | if (ret < 0) | ||
1003 | return -1; | ||
1004 | } | ||
1005 | |||
1006 | if (facility->options & eFacility_UUIE_fastStart) { | ||
1007 | for (i = 0; i < facility->fastStart.count; i++) { | ||
1008 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
1009 | &facility->fastStart.item[i]); | ||
1010 | if (ret < 0) | ||
1011 | return -1; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | return 0; | ||
1016 | } | ||
1017 | |||
1018 | /****************************************************************************/ | ||
1019 | static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1020 | enum ip_conntrack_info ctinfo, | ||
1021 | unsigned char **data, int dataoff, | ||
1022 | Progress_UUIE * progress) | ||
1023 | { | ||
1024 | int ret; | ||
1025 | int i; | ||
1026 | |||
1027 | DEBUGP("ip_ct_q931: Progress\n"); | ||
1028 | |||
1029 | if (progress->options & eProgress_UUIE_h245Address) { | ||
1030 | ret = expect_h245(pskb, ct, ctinfo, data, dataoff, | ||
1031 | &progress->h245Address); | ||
1032 | if (ret < 0) | ||
1033 | return -1; | ||
1034 | } | ||
1035 | |||
1036 | if (progress->options & eProgress_UUIE_fastStart) { | ||
1037 | for (i = 0; i < progress->fastStart.count; i++) { | ||
1038 | ret = process_olc(pskb, ct, ctinfo, data, dataoff, | ||
1039 | &progress->fastStart.item[i]); | ||
1040 | if (ret < 0) | ||
1041 | return -1; | ||
1042 | } | ||
1043 | } | ||
1044 | |||
1045 | return 0; | ||
1046 | } | ||
1047 | |||
1048 | /****************************************************************************/ | ||
1049 | static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1050 | enum ip_conntrack_info ctinfo, | ||
1051 | unsigned char **data, int dataoff, Q931 * q931) | ||
1052 | { | ||
1053 | H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu; | ||
1054 | int i; | ||
1055 | int ret = 0; | ||
1056 | |||
1057 | switch (pdu->h323_message_body.choice) { | ||
1058 | case eH323_UU_PDU_h323_message_body_setup: | ||
1059 | ret = process_setup(pskb, ct, ctinfo, data, dataoff, | ||
1060 | &pdu->h323_message_body.setup); | ||
1061 | break; | ||
1062 | case eH323_UU_PDU_h323_message_body_callProceeding: | ||
1063 | ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, | ||
1064 | &pdu->h323_message_body. | ||
1065 | callProceeding); | ||
1066 | break; | ||
1067 | case eH323_UU_PDU_h323_message_body_connect: | ||
1068 | ret = process_connect(pskb, ct, ctinfo, data, dataoff, | ||
1069 | &pdu->h323_message_body.connect); | ||
1070 | break; | ||
1071 | case eH323_UU_PDU_h323_message_body_alerting: | ||
1072 | ret = process_alerting(pskb, ct, ctinfo, data, dataoff, | ||
1073 | &pdu->h323_message_body.alerting); | ||
1074 | break; | ||
1075 | case eH323_UU_PDU_h323_message_body_information: | ||
1076 | ret = process_information(pskb, ct, ctinfo, data, dataoff, | ||
1077 | &pdu->h323_message_body. | ||
1078 | information); | ||
1079 | break; | ||
1080 | case eH323_UU_PDU_h323_message_body_facility: | ||
1081 | ret = process_facility(pskb, ct, ctinfo, data, dataoff, | ||
1082 | &pdu->h323_message_body.facility); | ||
1083 | break; | ||
1084 | case eH323_UU_PDU_h323_message_body_progress: | ||
1085 | ret = process_progress(pskb, ct, ctinfo, data, dataoff, | ||
1086 | &pdu->h323_message_body.progress); | ||
1087 | break; | ||
1088 | default: | ||
1089 | DEBUGP("ip_ct_q931: Q.931 signal %d\n", | ||
1090 | pdu->h323_message_body.choice); | ||
1091 | break; | ||
1092 | } | ||
1093 | |||
1094 | if (ret < 0) | ||
1095 | return -1; | ||
1096 | |||
1097 | if (pdu->options & eH323_UU_PDU_h245Control) { | ||
1098 | for (i = 0; i < pdu->h245Control.count; i++) { | ||
1099 | ret = process_h245(pskb, ct, ctinfo, data, dataoff, | ||
1100 | &pdu->h245Control.item[i]); | ||
1101 | if (ret < 0) | ||
1102 | return -1; | ||
1103 | } | ||
1104 | } | ||
1105 | |||
1106 | return 0; | ||
1107 | } | ||
1108 | |||
1109 | /****************************************************************************/ | ||
1110 | static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1111 | enum ip_conntrack_info ctinfo) | ||
1112 | { | ||
1113 | static Q931 q931; | ||
1114 | unsigned char *data = NULL; | ||
1115 | int datalen; | ||
1116 | int dataoff; | ||
1117 | int ret; | ||
1118 | |||
1119 | /* Until there's been traffic both ways, don't look in packets. */ | ||
1120 | if (ctinfo != IP_CT_ESTABLISHED | ||
1121 | && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { | ||
1122 | return NF_ACCEPT; | ||
1123 | } | ||
1124 | DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len); | ||
1125 | |||
1126 | spin_lock_bh(&ip_h323_lock); | ||
1127 | |||
1128 | /* Process each TPKT */ | ||
1129 | while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) { | ||
1130 | DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n", | ||
1131 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
1132 | NIPQUAD((*pskb)->nh.iph->daddr), datalen); | ||
1133 | |||
1134 | /* Decode Q.931 signal */ | ||
1135 | ret = DecodeQ931(data, datalen, &q931); | ||
1136 | if (ret < 0) { | ||
1137 | if (net_ratelimit()) | ||
1138 | printk("ip_ct_q931: decoding error: %s\n", | ||
1139 | ret == H323_ERROR_BOUND ? | ||
1140 | "out of bound" : "out of range"); | ||
1141 | /* We don't drop when decoding error */ | ||
1142 | break; | ||
1143 | } | ||
1144 | |||
1145 | /* Process Q.931 signal */ | ||
1146 | if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) | ||
1147 | goto drop; | ||
1148 | } | ||
1149 | |||
1150 | spin_unlock_bh(&ip_h323_lock); | ||
1151 | return NF_ACCEPT; | ||
1152 | |||
1153 | drop: | ||
1154 | spin_unlock_bh(&ip_h323_lock); | ||
1155 | if (net_ratelimit()) | ||
1156 | printk("ip_ct_q931: packet dropped\n"); | ||
1157 | return NF_DROP; | ||
1158 | } | ||
1159 | |||
1160 | /****************************************************************************/ | ||
1161 | static struct ip_conntrack_helper ip_conntrack_helper_q931 = { | ||
1162 | .name = "Q.931", | ||
1163 | .me = THIS_MODULE, | ||
1164 | .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ , | ||
1165 | .timeout = 240, | ||
1166 | .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}}, | ||
1167 | .dst = {.protonum = IPPROTO_TCP}}, | ||
1168 | .mask = {.src = {.u = {0xFFFF}}, | ||
1169 | .dst = {.protonum = 0xFF}}, | ||
1170 | .help = q931_help | ||
1171 | }; | ||
1172 | |||
1173 | /****************************************************************************/ | ||
1174 | void ip_conntrack_q931_expect(struct ip_conntrack *new, | ||
1175 | struct ip_conntrack_expect *this) | ||
1176 | { | ||
1177 | write_lock_bh(&ip_conntrack_lock); | ||
1178 | new->helper = &ip_conntrack_helper_q931; | ||
1179 | write_unlock_bh(&ip_conntrack_lock); | ||
1180 | } | ||
1181 | |||
1182 | /****************************************************************************/ | ||
1183 | static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen) | ||
1184 | { | ||
1185 | struct udphdr _uh, *uh; | ||
1186 | int dataoff; | ||
1187 | |||
1188 | uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh), | ||
1189 | &_uh); | ||
1190 | if (uh == NULL) | ||
1191 | return NULL; | ||
1192 | dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh); | ||
1193 | if (dataoff >= (*pskb)->len) | ||
1194 | return NULL; | ||
1195 | *datalen = (*pskb)->len - dataoff; | ||
1196 | return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); | ||
1197 | } | ||
1198 | |||
1199 | /****************************************************************************/ | ||
1200 | static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct, | ||
1201 | __be32 ip, u_int16_t port) | ||
1202 | { | ||
1203 | struct ip_conntrack_expect *exp; | ||
1204 | struct ip_conntrack_tuple tuple; | ||
1205 | |||
1206 | tuple.src.ip = 0; | ||
1207 | tuple.src.u.tcp.port = 0; | ||
1208 | tuple.dst.ip = ip; | ||
1209 | tuple.dst.u.tcp.port = htons(port); | ||
1210 | tuple.dst.protonum = IPPROTO_TCP; | ||
1211 | |||
1212 | exp = __ip_conntrack_expect_find(&tuple); | ||
1213 | if (exp && exp->master == ct) | ||
1214 | return exp; | ||
1215 | return NULL; | ||
1216 | } | ||
1217 | |||
1218 | /****************************************************************************/ | ||
1219 | static int set_expect_timeout(struct ip_conntrack_expect *exp, | ||
1220 | unsigned timeout) | ||
1221 | { | ||
1222 | if (!exp || !del_timer(&exp->timeout)) | ||
1223 | return 0; | ||
1224 | |||
1225 | exp->timeout.expires = jiffies + timeout * HZ; | ||
1226 | add_timer(&exp->timeout); | ||
1227 | |||
1228 | return 1; | ||
1229 | } | ||
1230 | |||
1231 | /****************************************************************************/ | ||
1232 | static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1233 | enum ip_conntrack_info ctinfo, | ||
1234 | unsigned char **data, | ||
1235 | TransportAddress * addr, int count) | ||
1236 | { | ||
1237 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
1238 | int dir = CTINFO2DIR(ctinfo); | ||
1239 | int ret = 0; | ||
1240 | int i; | ||
1241 | __be32 ip; | ||
1242 | u_int16_t port; | ||
1243 | struct ip_conntrack_expect *exp; | ||
1244 | typeof(nat_q931_hook) nat_q931; | ||
1245 | |||
1246 | /* Look for the first related address */ | ||
1247 | for (i = 0; i < count; i++) { | ||
1248 | if (get_h225_addr(*data, &addr[i], &ip, &port) && | ||
1249 | ip == ct->tuplehash[dir].tuple.src.ip && port != 0) | ||
1250 | break; | ||
1251 | } | ||
1252 | |||
1253 | if (i >= count) /* Not found */ | ||
1254 | return 0; | ||
1255 | |||
1256 | /* Create expect for Q.931 */ | ||
1257 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
1258 | return -1; | ||
1259 | exp->tuple.src.ip = gkrouted_only ? /* only accept calls from GK? */ | ||
1260 | ct->tuplehash[!dir].tuple.src.ip : 0; | ||
1261 | exp->tuple.src.u.tcp.port = 0; | ||
1262 | exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
1263 | exp->tuple.dst.u.tcp.port = htons(port); | ||
1264 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
1265 | exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0; | ||
1266 | exp->mask.src.u.tcp.port = 0; | ||
1267 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
1268 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
1269 | exp->mask.dst.protonum = 0xFF; | ||
1270 | exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */ | ||
1271 | |||
1272 | nat_q931 = rcu_dereference(nat_q931_hook); | ||
1273 | if (nat_q931) { /* Need NAT */ | ||
1274 | ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp); | ||
1275 | } else { /* Conntrack only */ | ||
1276 | exp->expectfn = ip_conntrack_q931_expect; | ||
1277 | |||
1278 | if (ip_conntrack_expect_related(exp) == 0) { | ||
1279 | DEBUGP("ip_ct_ras: expect Q.931 " | ||
1280 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
1281 | NIPQUAD(exp->tuple.src.ip), | ||
1282 | ntohs(exp->tuple.src.u.tcp.port), | ||
1283 | NIPQUAD(exp->tuple.dst.ip), | ||
1284 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
1285 | |||
1286 | /* Save port for looking up expect in processing RCF */ | ||
1287 | info->sig_port[dir] = port; | ||
1288 | } else | ||
1289 | ret = -1; | ||
1290 | } | ||
1291 | |||
1292 | ip_conntrack_expect_put(exp); | ||
1293 | |||
1294 | return ret; | ||
1295 | } | ||
1296 | |||
1297 | /****************************************************************************/ | ||
1298 | static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1299 | enum ip_conntrack_info ctinfo, | ||
1300 | unsigned char **data, GatekeeperRequest * grq) | ||
1301 | { | ||
1302 | typeof(set_ras_addr_hook) set_ras_addr; | ||
1303 | |||
1304 | DEBUGP("ip_ct_ras: GRQ\n"); | ||
1305 | |||
1306 | set_ras_addr = rcu_dereference(set_ras_addr_hook); | ||
1307 | if (set_ras_addr) /* NATed */ | ||
1308 | return set_ras_addr(pskb, ct, ctinfo, data, | ||
1309 | &grq->rasAddress, 1); | ||
1310 | return 0; | ||
1311 | } | ||
1312 | |||
1313 | /* Declare before using */ | ||
1314 | static void ip_conntrack_ras_expect(struct ip_conntrack *new, | ||
1315 | struct ip_conntrack_expect *this); | ||
1316 | |||
1317 | /****************************************************************************/ | ||
1318 | static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1319 | enum ip_conntrack_info ctinfo, | ||
1320 | unsigned char **data, GatekeeperConfirm * gcf) | ||
1321 | { | ||
1322 | int dir = CTINFO2DIR(ctinfo); | ||
1323 | int ret = 0; | ||
1324 | __be32 ip; | ||
1325 | u_int16_t port; | ||
1326 | struct ip_conntrack_expect *exp; | ||
1327 | |||
1328 | DEBUGP("ip_ct_ras: GCF\n"); | ||
1329 | |||
1330 | if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port)) | ||
1331 | return 0; | ||
1332 | |||
1333 | /* Registration port is the same as discovery port */ | ||
1334 | if (ip == ct->tuplehash[dir].tuple.src.ip && | ||
1335 | port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) | ||
1336 | return 0; | ||
1337 | |||
1338 | /* Avoid RAS expectation loops. A GCF is never expected. */ | ||
1339 | if (test_bit(IPS_EXPECTED_BIT, &ct->status)) | ||
1340 | return 0; | ||
1341 | |||
1342 | /* Need new expect */ | ||
1343 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
1344 | return -1; | ||
1345 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
1346 | exp->tuple.src.u.tcp.port = 0; | ||
1347 | exp->tuple.dst.ip = ip; | ||
1348 | exp->tuple.dst.u.tcp.port = htons(port); | ||
1349 | exp->tuple.dst.protonum = IPPROTO_UDP; | ||
1350 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
1351 | exp->mask.src.u.tcp.port = 0; | ||
1352 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
1353 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
1354 | exp->mask.dst.protonum = 0xFF; | ||
1355 | exp->flags = 0; | ||
1356 | exp->expectfn = ip_conntrack_ras_expect; | ||
1357 | if (ip_conntrack_expect_related(exp) == 0) { | ||
1358 | DEBUGP("ip_ct_ras: expect RAS " | ||
1359 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
1360 | NIPQUAD(exp->tuple.src.ip), | ||
1361 | ntohs(exp->tuple.src.u.tcp.port), | ||
1362 | NIPQUAD(exp->tuple.dst.ip), | ||
1363 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
1364 | } else | ||
1365 | ret = -1; | ||
1366 | |||
1367 | ip_conntrack_expect_put(exp); | ||
1368 | |||
1369 | return ret; | ||
1370 | } | ||
1371 | |||
1372 | /****************************************************************************/ | ||
1373 | static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1374 | enum ip_conntrack_info ctinfo, | ||
1375 | unsigned char **data, RegistrationRequest * rrq) | ||
1376 | { | ||
1377 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
1378 | int ret; | ||
1379 | typeof(set_ras_addr_hook) set_ras_addr; | ||
1380 | |||
1381 | DEBUGP("ip_ct_ras: RRQ\n"); | ||
1382 | |||
1383 | ret = expect_q931(pskb, ct, ctinfo, data, | ||
1384 | rrq->callSignalAddress.item, | ||
1385 | rrq->callSignalAddress.count); | ||
1386 | if (ret < 0) | ||
1387 | return -1; | ||
1388 | |||
1389 | set_ras_addr = rcu_dereference(set_ras_addr_hook); | ||
1390 | if (set_ras_addr) { | ||
1391 | ret = set_ras_addr(pskb, ct, ctinfo, data, | ||
1392 | rrq->rasAddress.item, | ||
1393 | rrq->rasAddress.count); | ||
1394 | if (ret < 0) | ||
1395 | return -1; | ||
1396 | } | ||
1397 | |||
1398 | if (rrq->options & eRegistrationRequest_timeToLive) { | ||
1399 | DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive); | ||
1400 | info->timeout = rrq->timeToLive; | ||
1401 | } else | ||
1402 | info->timeout = default_rrq_ttl; | ||
1403 | |||
1404 | return 0; | ||
1405 | } | ||
1406 | |||
1407 | /****************************************************************************/ | ||
1408 | static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1409 | enum ip_conntrack_info ctinfo, | ||
1410 | unsigned char **data, RegistrationConfirm * rcf) | ||
1411 | { | ||
1412 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
1413 | int dir = CTINFO2DIR(ctinfo); | ||
1414 | int ret; | ||
1415 | struct ip_conntrack_expect *exp; | ||
1416 | typeof(set_sig_addr_hook) set_sig_addr; | ||
1417 | |||
1418 | DEBUGP("ip_ct_ras: RCF\n"); | ||
1419 | |||
1420 | set_sig_addr = rcu_dereference(set_sig_addr_hook); | ||
1421 | if (set_sig_addr) { | ||
1422 | ret = set_sig_addr(pskb, ct, ctinfo, data, | ||
1423 | rcf->callSignalAddress.item, | ||
1424 | rcf->callSignalAddress.count); | ||
1425 | if (ret < 0) | ||
1426 | return -1; | ||
1427 | } | ||
1428 | |||
1429 | if (rcf->options & eRegistrationConfirm_timeToLive) { | ||
1430 | DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive); | ||
1431 | info->timeout = rcf->timeToLive; | ||
1432 | } | ||
1433 | |||
1434 | if (info->timeout > 0) { | ||
1435 | DEBUGP | ||
1436 | ("ip_ct_ras: set RAS connection timeout to %u seconds\n", | ||
1437 | info->timeout); | ||
1438 | ip_ct_refresh(ct, *pskb, info->timeout * HZ); | ||
1439 | |||
1440 | /* Set expect timeout */ | ||
1441 | read_lock_bh(&ip_conntrack_lock); | ||
1442 | exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip, | ||
1443 | info->sig_port[!dir]); | ||
1444 | if (exp) { | ||
1445 | DEBUGP("ip_ct_ras: set Q.931 expect " | ||
1446 | "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) " | ||
1447 | "timeout to %u seconds\n", | ||
1448 | NIPQUAD(exp->tuple.src.ip), | ||
1449 | ntohs(exp->tuple.src.u.tcp.port), | ||
1450 | NIPQUAD(exp->tuple.dst.ip), | ||
1451 | ntohs(exp->tuple.dst.u.tcp.port), | ||
1452 | info->timeout); | ||
1453 | set_expect_timeout(exp, info->timeout); | ||
1454 | } | ||
1455 | read_unlock_bh(&ip_conntrack_lock); | ||
1456 | } | ||
1457 | |||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | /****************************************************************************/ | ||
1462 | static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1463 | enum ip_conntrack_info ctinfo, | ||
1464 | unsigned char **data, UnregistrationRequest * urq) | ||
1465 | { | ||
1466 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
1467 | int dir = CTINFO2DIR(ctinfo); | ||
1468 | int ret; | ||
1469 | typeof(set_sig_addr_hook) set_sig_addr; | ||
1470 | |||
1471 | DEBUGP("ip_ct_ras: URQ\n"); | ||
1472 | |||
1473 | set_sig_addr = rcu_dereference(set_sig_addr_hook); | ||
1474 | if (set_sig_addr) { | ||
1475 | ret = set_sig_addr(pskb, ct, ctinfo, data, | ||
1476 | urq->callSignalAddress.item, | ||
1477 | urq->callSignalAddress.count); | ||
1478 | if (ret < 0) | ||
1479 | return -1; | ||
1480 | } | ||
1481 | |||
1482 | /* Clear old expect */ | ||
1483 | ip_ct_remove_expectations(ct); | ||
1484 | info->sig_port[dir] = 0; | ||
1485 | info->sig_port[!dir] = 0; | ||
1486 | |||
1487 | /* Give it 30 seconds for UCF or URJ */ | ||
1488 | ip_ct_refresh(ct, *pskb, 30 * HZ); | ||
1489 | |||
1490 | return 0; | ||
1491 | } | ||
1492 | |||
1493 | /****************************************************************************/ | ||
1494 | static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1495 | enum ip_conntrack_info ctinfo, | ||
1496 | unsigned char **data, AdmissionRequest * arq) | ||
1497 | { | ||
1498 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
1499 | int dir = CTINFO2DIR(ctinfo); | ||
1500 | __be32 ip; | ||
1501 | u_int16_t port; | ||
1502 | typeof(set_h225_addr_hook) set_h225_addr; | ||
1503 | |||
1504 | DEBUGP("ip_ct_ras: ARQ\n"); | ||
1505 | |||
1506 | set_h225_addr = rcu_dereference(set_h225_addr_hook); | ||
1507 | if ((arq->options & eAdmissionRequest_destCallSignalAddress) && | ||
1508 | get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) && | ||
1509 | ip == ct->tuplehash[dir].tuple.src.ip && | ||
1510 | port == info->sig_port[dir] && set_h225_addr) { | ||
1511 | /* Answering ARQ */ | ||
1512 | return set_h225_addr(pskb, data, 0, | ||
1513 | &arq->destCallSignalAddress, | ||
1514 | ct->tuplehash[!dir].tuple.dst.ip, | ||
1515 | info->sig_port[!dir]); | ||
1516 | } | ||
1517 | |||
1518 | if ((arq->options & eAdmissionRequest_srcCallSignalAddress) && | ||
1519 | get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) && | ||
1520 | ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) { | ||
1521 | /* Calling ARQ */ | ||
1522 | return set_h225_addr(pskb, data, 0, | ||
1523 | &arq->srcCallSignalAddress, | ||
1524 | ct->tuplehash[!dir].tuple.dst.ip, | ||
1525 | port); | ||
1526 | } | ||
1527 | |||
1528 | return 0; | ||
1529 | } | ||
1530 | |||
1531 | /****************************************************************************/ | ||
1532 | static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1533 | enum ip_conntrack_info ctinfo, | ||
1534 | unsigned char **data, AdmissionConfirm * acf) | ||
1535 | { | ||
1536 | int dir = CTINFO2DIR(ctinfo); | ||
1537 | int ret = 0; | ||
1538 | __be32 ip; | ||
1539 | u_int16_t port; | ||
1540 | struct ip_conntrack_expect *exp; | ||
1541 | typeof(set_sig_addr_hook) set_sig_addr; | ||
1542 | |||
1543 | DEBUGP("ip_ct_ras: ACF\n"); | ||
1544 | |||
1545 | if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port)) | ||
1546 | return 0; | ||
1547 | |||
1548 | if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */ | ||
1549 | set_sig_addr = rcu_dereference(set_sig_addr_hook); | ||
1550 | if (set_sig_addr) | ||
1551 | return set_sig_addr(pskb, ct, ctinfo, data, | ||
1552 | &acf->destCallSignalAddress, 1); | ||
1553 | return 0; | ||
1554 | } | ||
1555 | |||
1556 | /* Need new expect */ | ||
1557 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
1558 | return -1; | ||
1559 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
1560 | exp->tuple.src.u.tcp.port = 0; | ||
1561 | exp->tuple.dst.ip = ip; | ||
1562 | exp->tuple.dst.u.tcp.port = htons(port); | ||
1563 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
1564 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
1565 | exp->mask.src.u.tcp.port = 0; | ||
1566 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
1567 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
1568 | exp->mask.dst.protonum = 0xFF; | ||
1569 | exp->flags = IP_CT_EXPECT_PERMANENT; | ||
1570 | exp->expectfn = ip_conntrack_q931_expect; | ||
1571 | |||
1572 | if (ip_conntrack_expect_related(exp) == 0) { | ||
1573 | DEBUGP("ip_ct_ras: expect Q.931 " | ||
1574 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
1575 | NIPQUAD(exp->tuple.src.ip), | ||
1576 | ntohs(exp->tuple.src.u.tcp.port), | ||
1577 | NIPQUAD(exp->tuple.dst.ip), | ||
1578 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
1579 | } else | ||
1580 | ret = -1; | ||
1581 | |||
1582 | ip_conntrack_expect_put(exp); | ||
1583 | |||
1584 | return ret; | ||
1585 | } | ||
1586 | |||
1587 | /****************************************************************************/ | ||
1588 | static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1589 | enum ip_conntrack_info ctinfo, | ||
1590 | unsigned char **data, LocationRequest * lrq) | ||
1591 | { | ||
1592 | typeof(set_ras_addr_hook) set_ras_addr; | ||
1593 | |||
1594 | DEBUGP("ip_ct_ras: LRQ\n"); | ||
1595 | |||
1596 | set_ras_addr = rcu_dereference(set_ras_addr_hook); | ||
1597 | if (set_ras_addr) | ||
1598 | return set_ras_addr(pskb, ct, ctinfo, data, | ||
1599 | &lrq->replyAddress, 1); | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | /****************************************************************************/ | ||
1604 | static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1605 | enum ip_conntrack_info ctinfo, | ||
1606 | unsigned char **data, LocationConfirm * lcf) | ||
1607 | { | ||
1608 | int dir = CTINFO2DIR(ctinfo); | ||
1609 | int ret = 0; | ||
1610 | __be32 ip; | ||
1611 | u_int16_t port; | ||
1612 | struct ip_conntrack_expect *exp = NULL; | ||
1613 | |||
1614 | DEBUGP("ip_ct_ras: LCF\n"); | ||
1615 | |||
1616 | if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port)) | ||
1617 | return 0; | ||
1618 | |||
1619 | /* Need new expect for call signal */ | ||
1620 | if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) | ||
1621 | return -1; | ||
1622 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
1623 | exp->tuple.src.u.tcp.port = 0; | ||
1624 | exp->tuple.dst.ip = ip; | ||
1625 | exp->tuple.dst.u.tcp.port = htons(port); | ||
1626 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
1627 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
1628 | exp->mask.src.u.tcp.port = 0; | ||
1629 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
1630 | exp->mask.dst.u.tcp.port = htons(0xFFFF); | ||
1631 | exp->mask.dst.protonum = 0xFF; | ||
1632 | exp->flags = IP_CT_EXPECT_PERMANENT; | ||
1633 | exp->expectfn = ip_conntrack_q931_expect; | ||
1634 | |||
1635 | if (ip_conntrack_expect_related(exp) == 0) { | ||
1636 | DEBUGP("ip_ct_ras: expect Q.931 " | ||
1637 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
1638 | NIPQUAD(exp->tuple.src.ip), | ||
1639 | ntohs(exp->tuple.src.u.tcp.port), | ||
1640 | NIPQUAD(exp->tuple.dst.ip), | ||
1641 | ntohs(exp->tuple.dst.u.tcp.port)); | ||
1642 | } else | ||
1643 | ret = -1; | ||
1644 | |||
1645 | ip_conntrack_expect_put(exp); | ||
1646 | |||
1647 | /* Ignore rasAddress */ | ||
1648 | |||
1649 | return ret; | ||
1650 | } | ||
1651 | |||
1652 | /****************************************************************************/ | ||
1653 | static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1654 | enum ip_conntrack_info ctinfo, | ||
1655 | unsigned char **data, InfoRequestResponse * irr) | ||
1656 | { | ||
1657 | int ret; | ||
1658 | typeof(set_ras_addr_hook) set_ras_addr; | ||
1659 | typeof(set_sig_addr_hook) set_sig_addr; | ||
1660 | |||
1661 | DEBUGP("ip_ct_ras: IRR\n"); | ||
1662 | |||
1663 | set_ras_addr = rcu_dereference(set_ras_addr_hook); | ||
1664 | if (set_ras_addr) { | ||
1665 | ret = set_ras_addr(pskb, ct, ctinfo, data, | ||
1666 | &irr->rasAddress, 1); | ||
1667 | if (ret < 0) | ||
1668 | return -1; | ||
1669 | } | ||
1670 | |||
1671 | set_sig_addr = rcu_dereference(set_sig_addr_hook); | ||
1672 | if (set_sig_addr) { | ||
1673 | ret = set_sig_addr(pskb, ct, ctinfo, data, | ||
1674 | irr->callSignalAddress.item, | ||
1675 | irr->callSignalAddress.count); | ||
1676 | if (ret < 0) | ||
1677 | return -1; | ||
1678 | } | ||
1679 | |||
1680 | return 0; | ||
1681 | } | ||
1682 | |||
1683 | /****************************************************************************/ | ||
1684 | static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1685 | enum ip_conntrack_info ctinfo, | ||
1686 | unsigned char **data, RasMessage * ras) | ||
1687 | { | ||
1688 | switch (ras->choice) { | ||
1689 | case eRasMessage_gatekeeperRequest: | ||
1690 | return process_grq(pskb, ct, ctinfo, data, | ||
1691 | &ras->gatekeeperRequest); | ||
1692 | case eRasMessage_gatekeeperConfirm: | ||
1693 | return process_gcf(pskb, ct, ctinfo, data, | ||
1694 | &ras->gatekeeperConfirm); | ||
1695 | case eRasMessage_registrationRequest: | ||
1696 | return process_rrq(pskb, ct, ctinfo, data, | ||
1697 | &ras->registrationRequest); | ||
1698 | case eRasMessage_registrationConfirm: | ||
1699 | return process_rcf(pskb, ct, ctinfo, data, | ||
1700 | &ras->registrationConfirm); | ||
1701 | case eRasMessage_unregistrationRequest: | ||
1702 | return process_urq(pskb, ct, ctinfo, data, | ||
1703 | &ras->unregistrationRequest); | ||
1704 | case eRasMessage_admissionRequest: | ||
1705 | return process_arq(pskb, ct, ctinfo, data, | ||
1706 | &ras->admissionRequest); | ||
1707 | case eRasMessage_admissionConfirm: | ||
1708 | return process_acf(pskb, ct, ctinfo, data, | ||
1709 | &ras->admissionConfirm); | ||
1710 | case eRasMessage_locationRequest: | ||
1711 | return process_lrq(pskb, ct, ctinfo, data, | ||
1712 | &ras->locationRequest); | ||
1713 | case eRasMessage_locationConfirm: | ||
1714 | return process_lcf(pskb, ct, ctinfo, data, | ||
1715 | &ras->locationConfirm); | ||
1716 | case eRasMessage_infoRequestResponse: | ||
1717 | return process_irr(pskb, ct, ctinfo, data, | ||
1718 | &ras->infoRequestResponse); | ||
1719 | default: | ||
1720 | DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice); | ||
1721 | break; | ||
1722 | } | ||
1723 | |||
1724 | return 0; | ||
1725 | } | ||
1726 | |||
1727 | /****************************************************************************/ | ||
1728 | static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
1729 | enum ip_conntrack_info ctinfo) | ||
1730 | { | ||
1731 | static RasMessage ras; | ||
1732 | unsigned char *data; | ||
1733 | int datalen = 0; | ||
1734 | int ret; | ||
1735 | |||
1736 | DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len); | ||
1737 | |||
1738 | spin_lock_bh(&ip_h323_lock); | ||
1739 | |||
1740 | /* Get UDP data */ | ||
1741 | data = get_udp_data(pskb, &datalen); | ||
1742 | if (data == NULL) | ||
1743 | goto accept; | ||
1744 | DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n", | ||
1745 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
1746 | NIPQUAD((*pskb)->nh.iph->daddr), datalen); | ||
1747 | |||
1748 | /* Decode RAS message */ | ||
1749 | ret = DecodeRasMessage(data, datalen, &ras); | ||
1750 | if (ret < 0) { | ||
1751 | if (net_ratelimit()) | ||
1752 | printk("ip_ct_ras: decoding error: %s\n", | ||
1753 | ret == H323_ERROR_BOUND ? | ||
1754 | "out of bound" : "out of range"); | ||
1755 | goto accept; | ||
1756 | } | ||
1757 | |||
1758 | /* Process RAS message */ | ||
1759 | if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) | ||
1760 | goto drop; | ||
1761 | |||
1762 | accept: | ||
1763 | spin_unlock_bh(&ip_h323_lock); | ||
1764 | return NF_ACCEPT; | ||
1765 | |||
1766 | drop: | ||
1767 | spin_unlock_bh(&ip_h323_lock); | ||
1768 | if (net_ratelimit()) | ||
1769 | printk("ip_ct_ras: packet dropped\n"); | ||
1770 | return NF_DROP; | ||
1771 | } | ||
1772 | |||
1773 | /****************************************************************************/ | ||
1774 | static struct ip_conntrack_helper ip_conntrack_helper_ras = { | ||
1775 | .name = "RAS", | ||
1776 | .me = THIS_MODULE, | ||
1777 | .max_expected = 32, | ||
1778 | .timeout = 240, | ||
1779 | .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}}, | ||
1780 | .dst = {.protonum = IPPROTO_UDP}}, | ||
1781 | .mask = {.src = {.u = {0xFFFE}}, | ||
1782 | .dst = {.protonum = 0xFF}}, | ||
1783 | .help = ras_help, | ||
1784 | }; | ||
1785 | |||
1786 | /****************************************************************************/ | ||
1787 | static void ip_conntrack_ras_expect(struct ip_conntrack *new, | ||
1788 | struct ip_conntrack_expect *this) | ||
1789 | { | ||
1790 | write_lock_bh(&ip_conntrack_lock); | ||
1791 | new->helper = &ip_conntrack_helper_ras; | ||
1792 | write_unlock_bh(&ip_conntrack_lock); | ||
1793 | } | ||
1794 | |||
1795 | /****************************************************************************/ | ||
1796 | /* Not __exit - called from init() */ | ||
1797 | static void fini(void) | ||
1798 | { | ||
1799 | ip_conntrack_helper_unregister(&ip_conntrack_helper_ras); | ||
1800 | ip_conntrack_helper_unregister(&ip_conntrack_helper_q931); | ||
1801 | kfree(h323_buffer); | ||
1802 | DEBUGP("ip_ct_h323: fini\n"); | ||
1803 | } | ||
1804 | |||
1805 | /****************************************************************************/ | ||
1806 | static int __init init(void) | ||
1807 | { | ||
1808 | int ret; | ||
1809 | |||
1810 | h323_buffer = kmalloc(65536, GFP_KERNEL); | ||
1811 | if (!h323_buffer) | ||
1812 | return -ENOMEM; | ||
1813 | if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) || | ||
1814 | (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) { | ||
1815 | fini(); | ||
1816 | return ret; | ||
1817 | } | ||
1818 | DEBUGP("ip_ct_h323: init success\n"); | ||
1819 | return 0; | ||
1820 | } | ||
1821 | |||
1822 | /****************************************************************************/ | ||
1823 | module_init(init); | ||
1824 | module_exit(fini); | ||
1825 | |||
1826 | EXPORT_SYMBOL_GPL(get_h225_addr); | ||
1827 | EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect); | ||
1828 | EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect); | ||
1829 | EXPORT_SYMBOL_GPL(set_h245_addr_hook); | ||
1830 | EXPORT_SYMBOL_GPL(set_h225_addr_hook); | ||
1831 | EXPORT_SYMBOL_GPL(set_sig_addr_hook); | ||
1832 | EXPORT_SYMBOL_GPL(set_ras_addr_hook); | ||
1833 | EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook); | ||
1834 | EXPORT_SYMBOL_GPL(nat_t120_hook); | ||
1835 | EXPORT_SYMBOL_GPL(nat_h245_hook); | ||
1836 | EXPORT_SYMBOL_GPL(nat_callforwarding_hook); | ||
1837 | EXPORT_SYMBOL_GPL(nat_q931_hook); | ||
1838 | |||
1839 | MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); | ||
1840 | MODULE_DESCRIPTION("H.323 connection tracking helper"); | ||
1841 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c deleted file mode 100644 index 2b760c5cf709..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ /dev/null | |||
@@ -1,684 +0,0 @@ | |||
1 | /* | ||
2 | * ip_conntrack_pptp.c - Version 3.0 | ||
3 | * | ||
4 | * Connection tracking support for PPTP (Point to Point Tunneling Protocol). | ||
5 | * PPTP is a a protocol for creating virtual private networks. | ||
6 | * It is a specification defined by Microsoft and some vendors | ||
7 | * working with Microsoft. PPTP is built on top of a modified | ||
8 | * version of the Internet Generic Routing Encapsulation Protocol. | ||
9 | * GRE is defined in RFC 1701 and RFC 1702. Documentation of | ||
10 | * PPTP can be found in RFC 2637 | ||
11 | * | ||
12 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
13 | * | ||
14 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
15 | * | ||
16 | * Limitations: | ||
17 | * - We blindly assume that control connections are always | ||
18 | * established in PNS->PAC direction. This is a violation | ||
19 | * of RFFC2673 | ||
20 | * - We can only support one single call within each session | ||
21 | * | ||
22 | * TODO: | ||
23 | * - testing of incoming PPTP calls | ||
24 | * | ||
25 | * Changes: | ||
26 | * 2002-02-05 - Version 1.3 | ||
27 | * - Call ip_conntrack_unexpect_related() from | ||
28 | * pptp_destroy_siblings() to destroy expectations in case | ||
29 | * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen | ||
30 | * (Philip Craig <philipc@snapgear.com>) | ||
31 | * - Add Version information at module loadtime | ||
32 | * 2002-02-10 - Version 1.6 | ||
33 | * - move to C99 style initializers | ||
34 | * - remove second expectation if first arrives | ||
35 | * 2004-10-22 - Version 2.0 | ||
36 | * - merge Mandrake's 2.6.x port with recent 2.6.x API changes | ||
37 | * - fix lots of linear skb assumptions from Mandrake's port | ||
38 | * 2005-06-10 - Version 2.1 | ||
39 | * - use ip_conntrack_expect_free() instead of kfree() on the | ||
40 | * expect's (which are from the slab for quite some time) | ||
41 | * 2005-06-10 - Version 3.0 | ||
42 | * - port helper to post-2.6.11 API changes, | ||
43 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) | ||
44 | * 2005-07-30 - Version 3.1 | ||
45 | * - port helper to 2.6.13 API changes | ||
46 | * | ||
47 | */ | ||
48 | |||
49 | #include <linux/module.h> | ||
50 | #include <linux/netfilter.h> | ||
51 | #include <linux/ip.h> | ||
52 | #include <net/checksum.h> | ||
53 | #include <net/tcp.h> | ||
54 | |||
55 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
56 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
57 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
58 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
59 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
60 | |||
61 | #define IP_CT_PPTP_VERSION "3.1" | ||
62 | |||
63 | MODULE_LICENSE("GPL"); | ||
64 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
65 | MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); | ||
66 | |||
67 | static DEFINE_SPINLOCK(ip_pptp_lock); | ||
68 | |||
69 | int | ||
70 | (*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, | ||
71 | struct ip_conntrack *ct, | ||
72 | enum ip_conntrack_info ctinfo, | ||
73 | struct PptpControlHeader *ctlh, | ||
74 | union pptp_ctrl_union *pptpReq); | ||
75 | |||
76 | int | ||
77 | (*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, | ||
78 | struct ip_conntrack *ct, | ||
79 | enum ip_conntrack_info ctinfo, | ||
80 | struct PptpControlHeader *ctlh, | ||
81 | union pptp_ctrl_union *pptpReq); | ||
82 | |||
83 | void | ||
84 | (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, | ||
85 | struct ip_conntrack_expect *expect_reply); | ||
86 | |||
87 | void | ||
88 | (*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, | ||
89 | struct ip_conntrack_expect *exp); | ||
90 | |||
91 | #if 0 | ||
92 | /* PptpControlMessageType names */ | ||
93 | const char *pptp_msg_name[] = { | ||
94 | "UNKNOWN_MESSAGE", | ||
95 | "START_SESSION_REQUEST", | ||
96 | "START_SESSION_REPLY", | ||
97 | "STOP_SESSION_REQUEST", | ||
98 | "STOP_SESSION_REPLY", | ||
99 | "ECHO_REQUEST", | ||
100 | "ECHO_REPLY", | ||
101 | "OUT_CALL_REQUEST", | ||
102 | "OUT_CALL_REPLY", | ||
103 | "IN_CALL_REQUEST", | ||
104 | "IN_CALL_REPLY", | ||
105 | "IN_CALL_CONNECT", | ||
106 | "CALL_CLEAR_REQUEST", | ||
107 | "CALL_DISCONNECT_NOTIFY", | ||
108 | "WAN_ERROR_NOTIFY", | ||
109 | "SET_LINK_INFO" | ||
110 | }; | ||
111 | EXPORT_SYMBOL(pptp_msg_name); | ||
112 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) | ||
113 | #else | ||
114 | #define DEBUGP(format, args...) | ||
115 | #endif | ||
116 | |||
117 | #define SECS *HZ | ||
118 | #define MINS * 60 SECS | ||
119 | #define HOURS * 60 MINS | ||
120 | |||
121 | #define PPTP_GRE_TIMEOUT (10 MINS) | ||
122 | #define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) | ||
123 | |||
124 | static void pptp_expectfn(struct ip_conntrack *ct, | ||
125 | struct ip_conntrack_expect *exp) | ||
126 | { | ||
127 | typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn; | ||
128 | |||
129 | DEBUGP("increasing timeouts\n"); | ||
130 | |||
131 | /* increase timeout of GRE data channel conntrack entry */ | ||
132 | ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; | ||
133 | ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; | ||
134 | |||
135 | /* Can you see how rusty this code is, compared with the pre-2.6.11 | ||
136 | * one? That's what happened to my shiny newnat of 2002 ;( -HW */ | ||
137 | |||
138 | rcu_read_lock(); | ||
139 | ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn); | ||
140 | if (!ip_nat_pptp_expectfn) { | ||
141 | struct ip_conntrack_tuple inv_t; | ||
142 | struct ip_conntrack_expect *exp_other; | ||
143 | |||
144 | /* obviously this tuple inversion only works until you do NAT */ | ||
145 | invert_tuplepr(&inv_t, &exp->tuple); | ||
146 | DEBUGP("trying to unexpect other dir: "); | ||
147 | DUMP_TUPLE(&inv_t); | ||
148 | |||
149 | exp_other = ip_conntrack_expect_find_get(&inv_t); | ||
150 | if (exp_other) { | ||
151 | /* delete other expectation. */ | ||
152 | DEBUGP("found\n"); | ||
153 | ip_conntrack_unexpect_related(exp_other); | ||
154 | ip_conntrack_expect_put(exp_other); | ||
155 | } else { | ||
156 | DEBUGP("not found\n"); | ||
157 | } | ||
158 | } else { | ||
159 | /* we need more than simple inversion */ | ||
160 | ip_nat_pptp_expectfn(ct, exp); | ||
161 | } | ||
162 | rcu_read_unlock(); | ||
163 | } | ||
164 | |||
165 | static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) | ||
166 | { | ||
167 | struct ip_conntrack_tuple_hash *h; | ||
168 | struct ip_conntrack_expect *exp; | ||
169 | |||
170 | DEBUGP("trying to timeout ct or exp for tuple "); | ||
171 | DUMP_TUPLE(t); | ||
172 | |||
173 | h = ip_conntrack_find_get(t, NULL); | ||
174 | if (h) { | ||
175 | struct ip_conntrack *sibling = tuplehash_to_ctrack(h); | ||
176 | DEBUGP("setting timeout of conntrack %p to 0\n", sibling); | ||
177 | sibling->proto.gre.timeout = 0; | ||
178 | sibling->proto.gre.stream_timeout = 0; | ||
179 | if (del_timer(&sibling->timeout)) | ||
180 | sibling->timeout.function((unsigned long)sibling); | ||
181 | ip_conntrack_put(sibling); | ||
182 | return 1; | ||
183 | } else { | ||
184 | exp = ip_conntrack_expect_find_get(t); | ||
185 | if (exp) { | ||
186 | DEBUGP("unexpect_related of expect %p\n", exp); | ||
187 | ip_conntrack_unexpect_related(exp); | ||
188 | ip_conntrack_expect_put(exp); | ||
189 | return 1; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | |||
197 | /* timeout GRE data connections */ | ||
198 | static void pptp_destroy_siblings(struct ip_conntrack *ct) | ||
199 | { | ||
200 | struct ip_conntrack_tuple t; | ||
201 | |||
202 | ip_ct_gre_keymap_destroy(ct); | ||
203 | /* Since ct->sibling_list has literally rusted away in 2.6.11, | ||
204 | * we now need another way to find out about our sibling | ||
205 | * contrack and expects... -HW */ | ||
206 | |||
207 | /* try original (pns->pac) tuple */ | ||
208 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); | ||
209 | t.dst.protonum = IPPROTO_GRE; | ||
210 | t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; | ||
211 | t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; | ||
212 | |||
213 | if (!destroy_sibling_or_exp(&t)) | ||
214 | DEBUGP("failed to timeout original pns->pac ct/exp\n"); | ||
215 | |||
216 | /* try reply (pac->pns) tuple */ | ||
217 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); | ||
218 | t.dst.protonum = IPPROTO_GRE; | ||
219 | t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; | ||
220 | t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; | ||
221 | |||
222 | if (!destroy_sibling_or_exp(&t)) | ||
223 | DEBUGP("failed to timeout reply pac->pns ct/exp\n"); | ||
224 | } | ||
225 | |||
226 | /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ | ||
227 | static inline int | ||
228 | exp_gre(struct ip_conntrack *ct, | ||
229 | __be16 callid, | ||
230 | __be16 peer_callid) | ||
231 | { | ||
232 | struct ip_conntrack_expect *exp_orig, *exp_reply; | ||
233 | int ret = 1; | ||
234 | typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre; | ||
235 | |||
236 | exp_orig = ip_conntrack_expect_alloc(ct); | ||
237 | if (exp_orig == NULL) | ||
238 | goto out; | ||
239 | |||
240 | exp_reply = ip_conntrack_expect_alloc(ct); | ||
241 | if (exp_reply == NULL) | ||
242 | goto out_put_orig; | ||
243 | |||
244 | /* original direction, PNS->PAC */ | ||
245 | exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | ||
246 | exp_orig->tuple.src.u.gre.key = peer_callid; | ||
247 | exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | ||
248 | exp_orig->tuple.dst.u.gre.key = callid; | ||
249 | exp_orig->tuple.dst.protonum = IPPROTO_GRE; | ||
250 | |||
251 | exp_orig->mask.src.ip = htonl(0xffffffff); | ||
252 | exp_orig->mask.src.u.all = 0; | ||
253 | exp_orig->mask.dst.u.gre.key = htons(0xffff); | ||
254 | exp_orig->mask.dst.ip = htonl(0xffffffff); | ||
255 | exp_orig->mask.dst.protonum = 0xff; | ||
256 | |||
257 | exp_orig->master = ct; | ||
258 | exp_orig->expectfn = pptp_expectfn; | ||
259 | exp_orig->flags = 0; | ||
260 | |||
261 | /* both expectations are identical apart from tuple */ | ||
262 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | ||
263 | |||
264 | /* reply direction, PAC->PNS */ | ||
265 | exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; | ||
266 | exp_reply->tuple.src.u.gre.key = callid; | ||
267 | exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; | ||
268 | exp_reply->tuple.dst.u.gre.key = peer_callid; | ||
269 | exp_reply->tuple.dst.protonum = IPPROTO_GRE; | ||
270 | |||
271 | ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre); | ||
272 | if (ip_nat_pptp_exp_gre) | ||
273 | ip_nat_pptp_exp_gre(exp_orig, exp_reply); | ||
274 | if (ip_conntrack_expect_related(exp_orig) != 0) | ||
275 | goto out_put_both; | ||
276 | if (ip_conntrack_expect_related(exp_reply) != 0) | ||
277 | goto out_unexpect_orig; | ||
278 | |||
279 | /* Add GRE keymap entries */ | ||
280 | if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) | ||
281 | goto out_unexpect_both; | ||
282 | if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { | ||
283 | ip_ct_gre_keymap_destroy(ct); | ||
284 | goto out_unexpect_both; | ||
285 | } | ||
286 | ret = 0; | ||
287 | |||
288 | out_put_both: | ||
289 | ip_conntrack_expect_put(exp_reply); | ||
290 | out_put_orig: | ||
291 | ip_conntrack_expect_put(exp_orig); | ||
292 | out: | ||
293 | return ret; | ||
294 | |||
295 | out_unexpect_both: | ||
296 | ip_conntrack_unexpect_related(exp_reply); | ||
297 | out_unexpect_orig: | ||
298 | ip_conntrack_unexpect_related(exp_orig); | ||
299 | goto out_put_both; | ||
300 | } | ||
301 | |||
302 | static inline int | ||
303 | pptp_inbound_pkt(struct sk_buff **pskb, | ||
304 | struct PptpControlHeader *ctlh, | ||
305 | union pptp_ctrl_union *pptpReq, | ||
306 | unsigned int reqlen, | ||
307 | struct ip_conntrack *ct, | ||
308 | enum ip_conntrack_info ctinfo) | ||
309 | { | ||
310 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
311 | u_int16_t msg; | ||
312 | __be16 cid = 0, pcid = 0; | ||
313 | typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound; | ||
314 | |||
315 | msg = ntohs(ctlh->messageType); | ||
316 | DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); | ||
317 | |||
318 | switch (msg) { | ||
319 | case PPTP_START_SESSION_REPLY: | ||
320 | /* server confirms new control session */ | ||
321 | if (info->sstate < PPTP_SESSION_REQUESTED) | ||
322 | goto invalid; | ||
323 | if (pptpReq->srep.resultCode == PPTP_START_OK) | ||
324 | info->sstate = PPTP_SESSION_CONFIRMED; | ||
325 | else | ||
326 | info->sstate = PPTP_SESSION_ERROR; | ||
327 | break; | ||
328 | |||
329 | case PPTP_STOP_SESSION_REPLY: | ||
330 | /* server confirms end of control session */ | ||
331 | if (info->sstate > PPTP_SESSION_STOPREQ) | ||
332 | goto invalid; | ||
333 | if (pptpReq->strep.resultCode == PPTP_STOP_OK) | ||
334 | info->sstate = PPTP_SESSION_NONE; | ||
335 | else | ||
336 | info->sstate = PPTP_SESSION_ERROR; | ||
337 | break; | ||
338 | |||
339 | case PPTP_OUT_CALL_REPLY: | ||
340 | /* server accepted call, we now expect GRE frames */ | ||
341 | if (info->sstate != PPTP_SESSION_CONFIRMED) | ||
342 | goto invalid; | ||
343 | if (info->cstate != PPTP_CALL_OUT_REQ && | ||
344 | info->cstate != PPTP_CALL_OUT_CONF) | ||
345 | goto invalid; | ||
346 | |||
347 | cid = pptpReq->ocack.callID; | ||
348 | pcid = pptpReq->ocack.peersCallID; | ||
349 | if (info->pns_call_id != pcid) | ||
350 | goto invalid; | ||
351 | DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], | ||
352 | ntohs(cid), ntohs(pcid)); | ||
353 | |||
354 | if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { | ||
355 | info->cstate = PPTP_CALL_OUT_CONF; | ||
356 | info->pac_call_id = cid; | ||
357 | exp_gre(ct, cid, pcid); | ||
358 | } else | ||
359 | info->cstate = PPTP_CALL_NONE; | ||
360 | break; | ||
361 | |||
362 | case PPTP_IN_CALL_REQUEST: | ||
363 | /* server tells us about incoming call request */ | ||
364 | if (info->sstate != PPTP_SESSION_CONFIRMED) | ||
365 | goto invalid; | ||
366 | |||
367 | cid = pptpReq->icreq.callID; | ||
368 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); | ||
369 | info->cstate = PPTP_CALL_IN_REQ; | ||
370 | info->pac_call_id = cid; | ||
371 | break; | ||
372 | |||
373 | case PPTP_IN_CALL_CONNECT: | ||
374 | /* server tells us about incoming call established */ | ||
375 | if (info->sstate != PPTP_SESSION_CONFIRMED) | ||
376 | goto invalid; | ||
377 | if (info->cstate != PPTP_CALL_IN_REP && | ||
378 | info->cstate != PPTP_CALL_IN_CONF) | ||
379 | goto invalid; | ||
380 | |||
381 | pcid = pptpReq->iccon.peersCallID; | ||
382 | cid = info->pac_call_id; | ||
383 | |||
384 | if (info->pns_call_id != pcid) | ||
385 | goto invalid; | ||
386 | |||
387 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); | ||
388 | info->cstate = PPTP_CALL_IN_CONF; | ||
389 | |||
390 | /* we expect a GRE connection from PAC to PNS */ | ||
391 | exp_gre(ct, cid, pcid); | ||
392 | break; | ||
393 | |||
394 | case PPTP_CALL_DISCONNECT_NOTIFY: | ||
395 | /* server confirms disconnect */ | ||
396 | cid = pptpReq->disc.callID; | ||
397 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); | ||
398 | info->cstate = PPTP_CALL_NONE; | ||
399 | |||
400 | /* untrack this call id, unexpect GRE packets */ | ||
401 | pptp_destroy_siblings(ct); | ||
402 | break; | ||
403 | |||
404 | case PPTP_WAN_ERROR_NOTIFY: | ||
405 | case PPTP_ECHO_REQUEST: | ||
406 | case PPTP_ECHO_REPLY: | ||
407 | /* I don't have to explain these ;) */ | ||
408 | break; | ||
409 | default: | ||
410 | goto invalid; | ||
411 | } | ||
412 | |||
413 | ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound); | ||
414 | if (ip_nat_pptp_inbound) | ||
415 | return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); | ||
416 | return NF_ACCEPT; | ||
417 | |||
418 | invalid: | ||
419 | DEBUGP("invalid %s: type=%d cid=%u pcid=%u " | ||
420 | "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", | ||
421 | msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], | ||
422 | msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, | ||
423 | ntohs(info->pns_call_id), ntohs(info->pac_call_id)); | ||
424 | return NF_ACCEPT; | ||
425 | } | ||
426 | |||
427 | static inline int | ||
428 | pptp_outbound_pkt(struct sk_buff **pskb, | ||
429 | struct PptpControlHeader *ctlh, | ||
430 | union pptp_ctrl_union *pptpReq, | ||
431 | unsigned int reqlen, | ||
432 | struct ip_conntrack *ct, | ||
433 | enum ip_conntrack_info ctinfo) | ||
434 | { | ||
435 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
436 | u_int16_t msg; | ||
437 | __be16 cid = 0, pcid = 0; | ||
438 | typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound; | ||
439 | |||
440 | msg = ntohs(ctlh->messageType); | ||
441 | DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); | ||
442 | |||
443 | switch (msg) { | ||
444 | case PPTP_START_SESSION_REQUEST: | ||
445 | /* client requests for new control session */ | ||
446 | if (info->sstate != PPTP_SESSION_NONE) | ||
447 | goto invalid; | ||
448 | info->sstate = PPTP_SESSION_REQUESTED; | ||
449 | break; | ||
450 | case PPTP_STOP_SESSION_REQUEST: | ||
451 | /* client requests end of control session */ | ||
452 | info->sstate = PPTP_SESSION_STOPREQ; | ||
453 | break; | ||
454 | |||
455 | case PPTP_OUT_CALL_REQUEST: | ||
456 | /* client initiating connection to server */ | ||
457 | if (info->sstate != PPTP_SESSION_CONFIRMED) | ||
458 | goto invalid; | ||
459 | info->cstate = PPTP_CALL_OUT_REQ; | ||
460 | /* track PNS call id */ | ||
461 | cid = pptpReq->ocreq.callID; | ||
462 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); | ||
463 | info->pns_call_id = cid; | ||
464 | break; | ||
465 | case PPTP_IN_CALL_REPLY: | ||
466 | /* client answers incoming call */ | ||
467 | if (info->cstate != PPTP_CALL_IN_REQ && | ||
468 | info->cstate != PPTP_CALL_IN_REP) | ||
469 | goto invalid; | ||
470 | |||
471 | cid = pptpReq->icack.callID; | ||
472 | pcid = pptpReq->icack.peersCallID; | ||
473 | if (info->pac_call_id != pcid) | ||
474 | goto invalid; | ||
475 | DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], | ||
476 | ntohs(cid), ntohs(pcid)); | ||
477 | |||
478 | if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { | ||
479 | /* part two of the three-way handshake */ | ||
480 | info->cstate = PPTP_CALL_IN_REP; | ||
481 | info->pns_call_id = cid; | ||
482 | } else | ||
483 | info->cstate = PPTP_CALL_NONE; | ||
484 | break; | ||
485 | |||
486 | case PPTP_CALL_CLEAR_REQUEST: | ||
487 | /* client requests hangup of call */ | ||
488 | if (info->sstate != PPTP_SESSION_CONFIRMED) | ||
489 | goto invalid; | ||
490 | /* FUTURE: iterate over all calls and check if | ||
491 | * call ID is valid. We don't do this without newnat, | ||
492 | * because we only know about last call */ | ||
493 | info->cstate = PPTP_CALL_CLEAR_REQ; | ||
494 | break; | ||
495 | case PPTP_SET_LINK_INFO: | ||
496 | case PPTP_ECHO_REQUEST: | ||
497 | case PPTP_ECHO_REPLY: | ||
498 | /* I don't have to explain these ;) */ | ||
499 | break; | ||
500 | default: | ||
501 | goto invalid; | ||
502 | } | ||
503 | |||
504 | ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound); | ||
505 | if (ip_nat_pptp_outbound) | ||
506 | return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); | ||
507 | return NF_ACCEPT; | ||
508 | |||
509 | invalid: | ||
510 | DEBUGP("invalid %s: type=%d cid=%u pcid=%u " | ||
511 | "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", | ||
512 | msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], | ||
513 | msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, | ||
514 | ntohs(info->pns_call_id), ntohs(info->pac_call_id)); | ||
515 | return NF_ACCEPT; | ||
516 | } | ||
517 | |||
518 | static const unsigned int pptp_msg_size[] = { | ||
519 | [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), | ||
520 | [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), | ||
521 | [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), | ||
522 | [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), | ||
523 | [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), | ||
524 | [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), | ||
525 | [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), | ||
526 | [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), | ||
527 | [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), | ||
528 | [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), | ||
529 | [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), | ||
530 | [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), | ||
531 | [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), | ||
532 | }; | ||
533 | |||
534 | /* track caller id inside control connection, call expect_related */ | ||
535 | static int | ||
536 | conntrack_pptp_help(struct sk_buff **pskb, | ||
537 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | ||
538 | |||
539 | { | ||
540 | int dir = CTINFO2DIR(ctinfo); | ||
541 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
542 | struct tcphdr _tcph, *tcph; | ||
543 | struct pptp_pkt_hdr _pptph, *pptph; | ||
544 | struct PptpControlHeader _ctlh, *ctlh; | ||
545 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
546 | unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; | ||
547 | unsigned int datalen, reqlen, nexthdr_off; | ||
548 | int oldsstate, oldcstate; | ||
549 | int ret; | ||
550 | u_int16_t msg; | ||
551 | |||
552 | /* don't do any tracking before tcp handshake complete */ | ||
553 | if (ctinfo != IP_CT_ESTABLISHED | ||
554 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | ||
555 | DEBUGP("ctinfo = %u, skipping\n", ctinfo); | ||
556 | return NF_ACCEPT; | ||
557 | } | ||
558 | |||
559 | nexthdr_off = (*pskb)->nh.iph->ihl*4; | ||
560 | tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); | ||
561 | BUG_ON(!tcph); | ||
562 | nexthdr_off += tcph->doff * 4; | ||
563 | datalen = tcplen - tcph->doff * 4; | ||
564 | |||
565 | pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); | ||
566 | if (!pptph) { | ||
567 | DEBUGP("no full PPTP header, can't track\n"); | ||
568 | return NF_ACCEPT; | ||
569 | } | ||
570 | nexthdr_off += sizeof(_pptph); | ||
571 | datalen -= sizeof(_pptph); | ||
572 | |||
573 | /* if it's not a control message we can't do anything with it */ | ||
574 | if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || | ||
575 | ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { | ||
576 | DEBUGP("not a control packet\n"); | ||
577 | return NF_ACCEPT; | ||
578 | } | ||
579 | |||
580 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
581 | if (!ctlh) | ||
582 | return NF_ACCEPT; | ||
583 | nexthdr_off += sizeof(_ctlh); | ||
584 | datalen -= sizeof(_ctlh); | ||
585 | |||
586 | reqlen = datalen; | ||
587 | msg = ntohs(ctlh->messageType); | ||
588 | if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) | ||
589 | return NF_ACCEPT; | ||
590 | if (reqlen > sizeof(*pptpReq)) | ||
591 | reqlen = sizeof(*pptpReq); | ||
592 | |||
593 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
594 | if (!pptpReq) | ||
595 | return NF_ACCEPT; | ||
596 | |||
597 | oldsstate = info->sstate; | ||
598 | oldcstate = info->cstate; | ||
599 | |||
600 | spin_lock_bh(&ip_pptp_lock); | ||
601 | |||
602 | /* FIXME: We just blindly assume that the control connection is always | ||
603 | * established from PNS->PAC. However, RFC makes no guarantee */ | ||
604 | if (dir == IP_CT_DIR_ORIGINAL) | ||
605 | /* client -> server (PNS -> PAC) */ | ||
606 | ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, | ||
607 | ctinfo); | ||
608 | else | ||
609 | /* server -> client (PAC -> PNS) */ | ||
610 | ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, | ||
611 | ctinfo); | ||
612 | DEBUGP("sstate: %d->%d, cstate: %d->%d\n", | ||
613 | oldsstate, info->sstate, oldcstate, info->cstate); | ||
614 | spin_unlock_bh(&ip_pptp_lock); | ||
615 | |||
616 | return ret; | ||
617 | } | ||
618 | |||
619 | /* control protocol helper */ | ||
620 | static struct ip_conntrack_helper pptp = { | ||
621 | .list = { NULL, NULL }, | ||
622 | .name = "pptp", | ||
623 | .me = THIS_MODULE, | ||
624 | .max_expected = 2, | ||
625 | .timeout = 5 * 60, | ||
626 | .tuple = { .src = { .ip = 0, | ||
627 | .u = { .tcp = { .port = | ||
628 | __constant_htons(PPTP_CONTROL_PORT) } } | ||
629 | }, | ||
630 | .dst = { .ip = 0, | ||
631 | .u = { .all = 0 }, | ||
632 | .protonum = IPPROTO_TCP | ||
633 | } | ||
634 | }, | ||
635 | .mask = { .src = { .ip = 0, | ||
636 | .u = { .tcp = { .port = __constant_htons(0xffff) } } | ||
637 | }, | ||
638 | .dst = { .ip = 0, | ||
639 | .u = { .all = 0 }, | ||
640 | .protonum = 0xff | ||
641 | } | ||
642 | }, | ||
643 | .help = conntrack_pptp_help, | ||
644 | .destroy = pptp_destroy_siblings, | ||
645 | }; | ||
646 | |||
647 | extern void ip_ct_proto_gre_fini(void); | ||
648 | extern int __init ip_ct_proto_gre_init(void); | ||
649 | |||
650 | /* ip_conntrack_pptp initialization */ | ||
651 | static int __init ip_conntrack_helper_pptp_init(void) | ||
652 | { | ||
653 | int retcode; | ||
654 | |||
655 | retcode = ip_ct_proto_gre_init(); | ||
656 | if (retcode < 0) | ||
657 | return retcode; | ||
658 | |||
659 | DEBUGP(" registering helper\n"); | ||
660 | if ((retcode = ip_conntrack_helper_register(&pptp))) { | ||
661 | printk(KERN_ERR "Unable to register conntrack application " | ||
662 | "helper for pptp: %d\n", retcode); | ||
663 | ip_ct_proto_gre_fini(); | ||
664 | return retcode; | ||
665 | } | ||
666 | |||
667 | printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); | ||
668 | return 0; | ||
669 | } | ||
670 | |||
671 | static void __exit ip_conntrack_helper_pptp_fini(void) | ||
672 | { | ||
673 | ip_conntrack_helper_unregister(&pptp); | ||
674 | ip_ct_proto_gre_fini(); | ||
675 | printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); | ||
676 | } | ||
677 | |||
678 | module_init(ip_conntrack_helper_pptp_init); | ||
679 | module_exit(ip_conntrack_helper_pptp_fini); | ||
680 | |||
681 | EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); | ||
682 | EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); | ||
683 | EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); | ||
684 | EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c deleted file mode 100644 index 053e591f407a..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ /dev/null | |||
@@ -1,314 +0,0 @@ | |||
1 | /* IRC extension for IP connection tracking, Version 1.21 | ||
2 | * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> | ||
3 | * based on RR's ip_conntrack_ftp.c | ||
4 | * | ||
5 | * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | ** | ||
12 | * Module load syntax: | ||
13 | * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS> | ||
14 | * max_dcc_channels=n dcc_timeout=secs | ||
15 | * | ||
16 | * please give the ports of all IRC servers You wish to connect to. | ||
17 | * If You don't specify ports, the default will be port 6667. | ||
18 | * With max_dcc_channels you can define the maximum number of not | ||
19 | * yet answered DCC channels per IRC session (default 8). | ||
20 | * With dcc_timeout you can specify how long the system waits for | ||
21 | * an expected DCC channel (default 300 seconds). | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/module.h> | ||
26 | #include <linux/netfilter.h> | ||
27 | #include <linux/ip.h> | ||
28 | #include <net/checksum.h> | ||
29 | #include <net/tcp.h> | ||
30 | |||
31 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_irc.h> | ||
33 | #include <linux/moduleparam.h> | ||
34 | |||
35 | #define MAX_PORTS 8 | ||
36 | static unsigned short ports[MAX_PORTS]; | ||
37 | static int ports_c; | ||
38 | static unsigned int max_dcc_channels = 8; | ||
39 | static unsigned int dcc_timeout = 300; | ||
40 | /* This is slow, but it's simple. --RR */ | ||
41 | static char *irc_buffer; | ||
42 | static DEFINE_SPINLOCK(irc_buffer_lock); | ||
43 | |||
44 | unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, | ||
45 | enum ip_conntrack_info ctinfo, | ||
46 | unsigned int matchoff, | ||
47 | unsigned int matchlen, | ||
48 | struct ip_conntrack_expect *exp); | ||
49 | EXPORT_SYMBOL_GPL(ip_nat_irc_hook); | ||
50 | |||
51 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
52 | MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); | ||
53 | MODULE_LICENSE("GPL"); | ||
54 | module_param_array(ports, ushort, &ports_c, 0400); | ||
55 | MODULE_PARM_DESC(ports, "port numbers of IRC servers"); | ||
56 | module_param(max_dcc_channels, uint, 0400); | ||
57 | MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); | ||
58 | module_param(dcc_timeout, uint, 0400); | ||
59 | MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); | ||
60 | |||
61 | static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; | ||
62 | #define MINMATCHLEN 5 | ||
63 | |||
64 | #if 0 | ||
65 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ | ||
66 | __FILE__, __FUNCTION__ , ## args) | ||
67 | #else | ||
68 | #define DEBUGP(format, args...) | ||
69 | #endif | ||
70 | |||
71 | static int parse_dcc(char *data, char *data_end, u_int32_t *ip, | ||
72 | u_int16_t *port, char **ad_beg_p, char **ad_end_p) | ||
73 | /* tries to get the ip_addr and port out of a dcc command | ||
74 | return value: -1 on failure, 0 on success | ||
75 | data pointer to first byte of DCC command data | ||
76 | data_end pointer to last byte of dcc command data | ||
77 | ip returns parsed ip of dcc command | ||
78 | port returns parsed port of dcc command | ||
79 | ad_beg_p returns pointer to first byte of addr data | ||
80 | ad_end_p returns pointer to last byte of addr data */ | ||
81 | { | ||
82 | |||
83 | /* at least 12: "AAAAAAAA P\1\n" */ | ||
84 | while (*data++ != ' ') | ||
85 | if (data > data_end - 12) | ||
86 | return -1; | ||
87 | |||
88 | *ad_beg_p = data; | ||
89 | *ip = simple_strtoul(data, &data, 10); | ||
90 | |||
91 | /* skip blanks between ip and port */ | ||
92 | while (*data == ' ') { | ||
93 | if (data >= data_end) | ||
94 | return -1; | ||
95 | data++; | ||
96 | } | ||
97 | |||
98 | *port = simple_strtoul(data, &data, 10); | ||
99 | *ad_end_p = data; | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int help(struct sk_buff **pskb, | ||
105 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | ||
106 | { | ||
107 | unsigned int dataoff; | ||
108 | struct tcphdr _tcph, *th; | ||
109 | char *data, *data_limit, *ib_ptr; | ||
110 | int dir = CTINFO2DIR(ctinfo); | ||
111 | struct ip_conntrack_expect *exp; | ||
112 | u32 seq; | ||
113 | u_int32_t dcc_ip; | ||
114 | u_int16_t dcc_port; | ||
115 | int i, ret = NF_ACCEPT; | ||
116 | char *addr_beg_p, *addr_end_p; | ||
117 | typeof(ip_nat_irc_hook) ip_nat_irc; | ||
118 | |||
119 | DEBUGP("entered\n"); | ||
120 | |||
121 | /* If packet is coming from IRC server */ | ||
122 | if (dir == IP_CT_DIR_REPLY) | ||
123 | return NF_ACCEPT; | ||
124 | |||
125 | /* Until there's been traffic both ways, don't look in packets. */ | ||
126 | if (ctinfo != IP_CT_ESTABLISHED | ||
127 | && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { | ||
128 | DEBUGP("Conntrackinfo = %u\n", ctinfo); | ||
129 | return NF_ACCEPT; | ||
130 | } | ||
131 | |||
132 | /* Not a full tcp header? */ | ||
133 | th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, | ||
134 | sizeof(_tcph), &_tcph); | ||
135 | if (th == NULL) | ||
136 | return NF_ACCEPT; | ||
137 | |||
138 | /* No data? */ | ||
139 | dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; | ||
140 | if (dataoff >= (*pskb)->len) | ||
141 | return NF_ACCEPT; | ||
142 | |||
143 | spin_lock_bh(&irc_buffer_lock); | ||
144 | ib_ptr = skb_header_pointer(*pskb, dataoff, | ||
145 | (*pskb)->len - dataoff, irc_buffer); | ||
146 | BUG_ON(ib_ptr == NULL); | ||
147 | |||
148 | data = ib_ptr; | ||
149 | data_limit = ib_ptr + (*pskb)->len - dataoff; | ||
150 | |||
151 | /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 | ||
152 | * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ | ||
153 | while (data < (data_limit - (19 + MINMATCHLEN))) { | ||
154 | if (memcmp(data, "\1DCC ", 5)) { | ||
155 | data++; | ||
156 | continue; | ||
157 | } | ||
158 | |||
159 | data += 5; | ||
160 | /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ | ||
161 | |||
162 | DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", | ||
163 | NIPQUAD(iph->saddr), ntohs(th->source), | ||
164 | NIPQUAD(iph->daddr), ntohs(th->dest)); | ||
165 | |||
166 | for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { | ||
167 | if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { | ||
168 | /* no match */ | ||
169 | continue; | ||
170 | } | ||
171 | |||
172 | DEBUGP("DCC %s detected\n", dccprotos[i]); | ||
173 | data += strlen(dccprotos[i]); | ||
174 | /* we have at least | ||
175 | * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid | ||
176 | * data left (== 14/13 bytes) */ | ||
177 | if (parse_dcc((char *)data, data_limit, &dcc_ip, | ||
178 | &dcc_port, &addr_beg_p, &addr_end_p)) { | ||
179 | /* unable to parse */ | ||
180 | DEBUGP("unable to parse dcc command\n"); | ||
181 | continue; | ||
182 | } | ||
183 | DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n", | ||
184 | HIPQUAD(dcc_ip), dcc_port); | ||
185 | |||
186 | /* dcc_ip can be the internal OR external (NAT'ed) IP | ||
187 | * Tiago Sousa <mirage@kaotik.org> */ | ||
188 | if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip) | ||
189 | && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) { | ||
190 | if (net_ratelimit()) | ||
191 | printk(KERN_WARNING | ||
192 | "Forged DCC command from " | ||
193 | "%u.%u.%u.%u: %u.%u.%u.%u:%u\n", | ||
194 | NIPQUAD(ct->tuplehash[dir].tuple.src.ip), | ||
195 | HIPQUAD(dcc_ip), dcc_port); | ||
196 | |||
197 | continue; | ||
198 | } | ||
199 | |||
200 | exp = ip_conntrack_expect_alloc(ct); | ||
201 | if (exp == NULL) { | ||
202 | ret = NF_DROP; | ||
203 | goto out; | ||
204 | } | ||
205 | |||
206 | /* save position of address in dcc string, | ||
207 | * necessary for NAT */ | ||
208 | DEBUGP("tcph->seq = %u\n", th->seq); | ||
209 | seq = ntohl(th->seq) + (addr_beg_p - ib_ptr); | ||
210 | |||
211 | /* We refer to the reverse direction ("!dir") | ||
212 | * tuples here, because we're expecting | ||
213 | * something in the other * direction. | ||
214 | * Doesn't matter unless NAT is happening. */ | ||
215 | exp->tuple = ((struct ip_conntrack_tuple) | ||
216 | { { 0, { 0 } }, | ||
217 | { ct->tuplehash[!dir].tuple.dst.ip, | ||
218 | { .tcp = { htons(dcc_port) } }, | ||
219 | IPPROTO_TCP }}); | ||
220 | exp->mask = ((struct ip_conntrack_tuple) | ||
221 | { { 0, { 0 } }, | ||
222 | { htonl(0xFFFFFFFF), | ||
223 | { .tcp = { htons(0xFFFF) } }, 0xFF }}); | ||
224 | exp->expectfn = NULL; | ||
225 | exp->flags = 0; | ||
226 | ip_nat_irc = rcu_dereference(ip_nat_irc_hook); | ||
227 | if (ip_nat_irc) | ||
228 | ret = ip_nat_irc(pskb, ctinfo, | ||
229 | addr_beg_p - ib_ptr, | ||
230 | addr_end_p - addr_beg_p, | ||
231 | exp); | ||
232 | else if (ip_conntrack_expect_related(exp) != 0) | ||
233 | ret = NF_DROP; | ||
234 | ip_conntrack_expect_put(exp); | ||
235 | goto out; | ||
236 | } /* for .. NUM_DCCPROTO */ | ||
237 | } /* while data < ... */ | ||
238 | |||
239 | out: | ||
240 | spin_unlock_bh(&irc_buffer_lock); | ||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | static struct ip_conntrack_helper irc_helpers[MAX_PORTS]; | ||
245 | static char irc_names[MAX_PORTS][sizeof("irc-65535")]; | ||
246 | |||
247 | static void ip_conntrack_irc_fini(void); | ||
248 | |||
249 | static int __init ip_conntrack_irc_init(void) | ||
250 | { | ||
251 | int i, ret; | ||
252 | struct ip_conntrack_helper *hlpr; | ||
253 | char *tmpname; | ||
254 | |||
255 | if (max_dcc_channels < 1) { | ||
256 | printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n"); | ||
257 | return -EBUSY; | ||
258 | } | ||
259 | |||
260 | irc_buffer = kmalloc(65536, GFP_KERNEL); | ||
261 | if (!irc_buffer) | ||
262 | return -ENOMEM; | ||
263 | |||
264 | /* If no port given, default to standard irc port */ | ||
265 | if (ports_c == 0) | ||
266 | ports[ports_c++] = IRC_PORT; | ||
267 | |||
268 | for (i = 0; i < ports_c; i++) { | ||
269 | hlpr = &irc_helpers[i]; | ||
270 | hlpr->tuple.src.u.tcp.port = htons(ports[i]); | ||
271 | hlpr->tuple.dst.protonum = IPPROTO_TCP; | ||
272 | hlpr->mask.src.u.tcp.port = htons(0xFFFF); | ||
273 | hlpr->mask.dst.protonum = 0xFF; | ||
274 | hlpr->max_expected = max_dcc_channels; | ||
275 | hlpr->timeout = dcc_timeout; | ||
276 | hlpr->me = THIS_MODULE; | ||
277 | hlpr->help = help; | ||
278 | |||
279 | tmpname = &irc_names[i][0]; | ||
280 | if (ports[i] == IRC_PORT) | ||
281 | sprintf(tmpname, "irc"); | ||
282 | else | ||
283 | sprintf(tmpname, "irc-%d", i); | ||
284 | hlpr->name = tmpname; | ||
285 | |||
286 | DEBUGP("port #%d: %d\n", i, ports[i]); | ||
287 | |||
288 | ret = ip_conntrack_helper_register(hlpr); | ||
289 | |||
290 | if (ret) { | ||
291 | printk("ip_conntrack_irc: ERROR registering port %d\n", | ||
292 | ports[i]); | ||
293 | ip_conntrack_irc_fini(); | ||
294 | return -EBUSY; | ||
295 | } | ||
296 | } | ||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | /* This function is intentionally _NOT_ defined as __exit, because | ||
301 | * it is needed by the init function */ | ||
302 | static void ip_conntrack_irc_fini(void) | ||
303 | { | ||
304 | int i; | ||
305 | for (i = 0; i < ports_c; i++) { | ||
306 | DEBUGP("unregistering port %d\n", | ||
307 | ports[i]); | ||
308 | ip_conntrack_helper_unregister(&irc_helpers[i]); | ||
309 | } | ||
310 | kfree(irc_buffer); | ||
311 | } | ||
312 | |||
313 | module_init(ip_conntrack_irc_init); | ||
314 | module_exit(ip_conntrack_irc_fini); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c deleted file mode 100644 index cc6dd49c9da0..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * NetBIOS name service broadcast connection tracking helper | ||
3 | * | ||
4 | * (c) 2005 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | /* | ||
12 | * This helper tracks locally originating NetBIOS name service | ||
13 | * requests by issuing permanent expectations (valid until | ||
14 | * timing out) matching all reply connections from the | ||
15 | * destination network. The only NetBIOS specific thing is | ||
16 | * actually the port number. | ||
17 | */ | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/skbuff.h> | ||
22 | #include <linux/netdevice.h> | ||
23 | #include <linux/inetdevice.h> | ||
24 | #include <linux/if_addr.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/ip.h> | ||
27 | #include <net/route.h> | ||
28 | |||
29 | #include <linux/netfilter.h> | ||
30 | #include <linux/netfilter_ipv4.h> | ||
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
33 | |||
34 | #define NMBD_PORT 137 | ||
35 | |||
36 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
37 | MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); | ||
38 | MODULE_LICENSE("GPL"); | ||
39 | |||
40 | static unsigned int timeout = 3; | ||
41 | module_param(timeout, uint, 0400); | ||
42 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); | ||
43 | |||
44 | static int help(struct sk_buff **pskb, | ||
45 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | ||
46 | { | ||
47 | struct ip_conntrack_expect *exp; | ||
48 | struct iphdr *iph = (*pskb)->nh.iph; | ||
49 | struct rtable *rt = (struct rtable *)(*pskb)->dst; | ||
50 | struct in_device *in_dev; | ||
51 | __be32 mask = 0; | ||
52 | |||
53 | /* we're only interested in locally generated packets */ | ||
54 | if ((*pskb)->sk == NULL) | ||
55 | goto out; | ||
56 | if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) | ||
57 | goto out; | ||
58 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
59 | goto out; | ||
60 | |||
61 | rcu_read_lock(); | ||
62 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | ||
63 | if (in_dev != NULL) { | ||
64 | for_primary_ifa(in_dev) { | ||
65 | if (ifa->ifa_broadcast == iph->daddr) { | ||
66 | mask = ifa->ifa_mask; | ||
67 | break; | ||
68 | } | ||
69 | } endfor_ifa(in_dev); | ||
70 | } | ||
71 | rcu_read_unlock(); | ||
72 | |||
73 | if (mask == 0) | ||
74 | goto out; | ||
75 | |||
76 | exp = ip_conntrack_expect_alloc(ct); | ||
77 | if (exp == NULL) | ||
78 | goto out; | ||
79 | |||
80 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
81 | exp->tuple.src.u.udp.port = htons(NMBD_PORT); | ||
82 | |||
83 | exp->mask.src.ip = mask; | ||
84 | exp->mask.src.u.udp.port = htons(0xFFFF); | ||
85 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
86 | exp->mask.dst.u.udp.port = htons(0xFFFF); | ||
87 | exp->mask.dst.protonum = 0xFF; | ||
88 | |||
89 | exp->expectfn = NULL; | ||
90 | exp->flags = IP_CT_EXPECT_PERMANENT; | ||
91 | |||
92 | ip_conntrack_expect_related(exp); | ||
93 | ip_conntrack_expect_put(exp); | ||
94 | |||
95 | ip_ct_refresh(ct, *pskb, timeout * HZ); | ||
96 | out: | ||
97 | return NF_ACCEPT; | ||
98 | } | ||
99 | |||
100 | static struct ip_conntrack_helper helper = { | ||
101 | .name = "netbios-ns", | ||
102 | .tuple = { | ||
103 | .src = { | ||
104 | .u = { | ||
105 | .udp = { | ||
106 | .port = __constant_htons(NMBD_PORT), | ||
107 | } | ||
108 | } | ||
109 | }, | ||
110 | .dst = { | ||
111 | .protonum = IPPROTO_UDP, | ||
112 | }, | ||
113 | }, | ||
114 | .mask = { | ||
115 | .src = { | ||
116 | .u = { | ||
117 | .udp = { | ||
118 | .port = __constant_htons(0xFFFF), | ||
119 | } | ||
120 | } | ||
121 | }, | ||
122 | .dst = { | ||
123 | .protonum = 0xFF, | ||
124 | }, | ||
125 | }, | ||
126 | .max_expected = 1, | ||
127 | .me = THIS_MODULE, | ||
128 | .help = help, | ||
129 | }; | ||
130 | |||
131 | static int __init ip_conntrack_netbios_ns_init(void) | ||
132 | { | ||
133 | helper.timeout = timeout; | ||
134 | return ip_conntrack_helper_register(&helper); | ||
135 | } | ||
136 | |||
137 | static void __exit ip_conntrack_netbios_ns_fini(void) | ||
138 | { | ||
139 | ip_conntrack_helper_unregister(&helper); | ||
140 | } | ||
141 | |||
142 | module_init(ip_conntrack_netbios_ns_init); | ||
143 | module_exit(ip_conntrack_netbios_ns_fini); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c deleted file mode 100644 index 9228b76ccd9a..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ /dev/null | |||
@@ -1,1577 +0,0 @@ | |||
1 | /* Connection tracking via netlink socket. Allows for user space | ||
2 | * protocol helpers and general trouble making from userspace. | ||
3 | * | ||
4 | * (C) 2001 by Jay Schulist <jschlst@samba.org> | ||
5 | * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> | ||
6 | * (C) 2003 by Patrick Mchardy <kaber@trash.net> | ||
7 | * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net> | ||
8 | * | ||
9 | * I've reworked this stuff to use attributes instead of conntrack | ||
10 | * structures. 5.44 am. I need more tea. --pablo 05/07/11. | ||
11 | * | ||
12 | * Initial connection tracking via netlink development funded and | ||
13 | * generally made possible by Network Robots, Inc. (www.networkrobots.com) | ||
14 | * | ||
15 | * Further development of this code funded by Astaro AG (http://www.astaro.com) | ||
16 | * | ||
17 | * This software may be used and distributed according to the terms | ||
18 | * of the GNU General Public License, incorporated herein by reference. | ||
19 | */ | ||
20 | |||
21 | #include <linux/init.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/timer.h> | ||
26 | #include <linux/skbuff.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/netlink.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/interrupt.h> | ||
31 | #include <linux/notifier.h> | ||
32 | |||
33 | #include <linux/netfilter.h> | ||
34 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
35 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
36 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
37 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
38 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
39 | |||
40 | #include <linux/netfilter/nfnetlink.h> | ||
41 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
42 | |||
43 | MODULE_LICENSE("GPL"); | ||
44 | |||
45 | static char __initdata version[] = "0.90"; | ||
46 | |||
47 | static inline int | ||
48 | ctnetlink_dump_tuples_proto(struct sk_buff *skb, | ||
49 | const struct ip_conntrack_tuple *tuple, | ||
50 | struct ip_conntrack_protocol *proto) | ||
51 | { | ||
52 | int ret = 0; | ||
53 | struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); | ||
54 | |||
55 | NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); | ||
56 | |||
57 | if (likely(proto->tuple_to_nfattr)) | ||
58 | ret = proto->tuple_to_nfattr(skb, tuple); | ||
59 | |||
60 | NFA_NEST_END(skb, nest_parms); | ||
61 | |||
62 | return ret; | ||
63 | |||
64 | nfattr_failure: | ||
65 | return -1; | ||
66 | } | ||
67 | |||
68 | static inline int | ||
69 | ctnetlink_dump_tuples_ip(struct sk_buff *skb, | ||
70 | const struct ip_conntrack_tuple *tuple) | ||
71 | { | ||
72 | struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); | ||
73 | |||
74 | NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip); | ||
75 | NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip); | ||
76 | |||
77 | NFA_NEST_END(skb, nest_parms); | ||
78 | |||
79 | return 0; | ||
80 | |||
81 | nfattr_failure: | ||
82 | return -1; | ||
83 | } | ||
84 | |||
85 | static inline int | ||
86 | ctnetlink_dump_tuples(struct sk_buff *skb, | ||
87 | const struct ip_conntrack_tuple *tuple) | ||
88 | { | ||
89 | int ret; | ||
90 | struct ip_conntrack_protocol *proto; | ||
91 | |||
92 | ret = ctnetlink_dump_tuples_ip(skb, tuple); | ||
93 | if (unlikely(ret < 0)) | ||
94 | return ret; | ||
95 | |||
96 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | ||
97 | ret = ctnetlink_dump_tuples_proto(skb, tuple, proto); | ||
98 | ip_conntrack_proto_put(proto); | ||
99 | |||
100 | return ret; | ||
101 | } | ||
102 | |||
103 | static inline int | ||
104 | ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
105 | { | ||
106 | __be32 status = htonl((u_int32_t) ct->status); | ||
107 | NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); | ||
108 | return 0; | ||
109 | |||
110 | nfattr_failure: | ||
111 | return -1; | ||
112 | } | ||
113 | |||
114 | static inline int | ||
115 | ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
116 | { | ||
117 | long timeout_l = ct->timeout.expires - jiffies; | ||
118 | __be32 timeout; | ||
119 | |||
120 | if (timeout_l < 0) | ||
121 | timeout = 0; | ||
122 | else | ||
123 | timeout = htonl(timeout_l / HZ); | ||
124 | |||
125 | NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); | ||
126 | return 0; | ||
127 | |||
128 | nfattr_failure: | ||
129 | return -1; | ||
130 | } | ||
131 | |||
132 | static inline int | ||
133 | ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
134 | { | ||
135 | struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | ||
136 | |||
137 | struct nfattr *nest_proto; | ||
138 | int ret; | ||
139 | |||
140 | if (!proto->to_nfattr) { | ||
141 | ip_conntrack_proto_put(proto); | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | nest_proto = NFA_NEST(skb, CTA_PROTOINFO); | ||
146 | |||
147 | ret = proto->to_nfattr(skb, nest_proto, ct); | ||
148 | |||
149 | ip_conntrack_proto_put(proto); | ||
150 | |||
151 | NFA_NEST_END(skb, nest_proto); | ||
152 | |||
153 | return ret; | ||
154 | |||
155 | nfattr_failure: | ||
156 | ip_conntrack_proto_put(proto); | ||
157 | return -1; | ||
158 | } | ||
159 | |||
160 | static inline int | ||
161 | ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
162 | { | ||
163 | struct nfattr *nest_helper; | ||
164 | |||
165 | if (!ct->helper) | ||
166 | return 0; | ||
167 | |||
168 | nest_helper = NFA_NEST(skb, CTA_HELP); | ||
169 | NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name); | ||
170 | |||
171 | if (ct->helper->to_nfattr) | ||
172 | ct->helper->to_nfattr(skb, ct); | ||
173 | |||
174 | NFA_NEST_END(skb, nest_helper); | ||
175 | |||
176 | return 0; | ||
177 | |||
178 | nfattr_failure: | ||
179 | return -1; | ||
180 | } | ||
181 | |||
182 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
183 | static inline int | ||
184 | ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, | ||
185 | enum ip_conntrack_dir dir) | ||
186 | { | ||
187 | enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; | ||
188 | struct nfattr *nest_count = NFA_NEST(skb, type); | ||
189 | __be32 tmp; | ||
190 | |||
191 | tmp = htonl(ct->counters[dir].packets); | ||
192 | NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp); | ||
193 | |||
194 | tmp = htonl(ct->counters[dir].bytes); | ||
195 | NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp); | ||
196 | |||
197 | NFA_NEST_END(skb, nest_count); | ||
198 | |||
199 | return 0; | ||
200 | |||
201 | nfattr_failure: | ||
202 | return -1; | ||
203 | } | ||
204 | #else | ||
205 | #define ctnetlink_dump_counters(a, b, c) (0) | ||
206 | #endif | ||
207 | |||
208 | #ifdef CONFIG_IP_NF_CONNTRACK_MARK | ||
209 | static inline int | ||
210 | ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
211 | { | ||
212 | __be32 mark = htonl(ct->mark); | ||
213 | |||
214 | NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark); | ||
215 | return 0; | ||
216 | |||
217 | nfattr_failure: | ||
218 | return -1; | ||
219 | } | ||
220 | #else | ||
221 | #define ctnetlink_dump_mark(a, b) (0) | ||
222 | #endif | ||
223 | |||
224 | static inline int | ||
225 | ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
226 | { | ||
227 | __be32 id = htonl(ct->id); | ||
228 | NFA_PUT(skb, CTA_ID, sizeof(__be32), &id); | ||
229 | return 0; | ||
230 | |||
231 | nfattr_failure: | ||
232 | return -1; | ||
233 | } | ||
234 | |||
235 | static inline int | ||
236 | ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
237 | { | ||
238 | __be32 use = htonl(atomic_read(&ct->ct_general.use)); | ||
239 | |||
240 | NFA_PUT(skb, CTA_USE, sizeof(__be32), &use); | ||
241 | return 0; | ||
242 | |||
243 | nfattr_failure: | ||
244 | return -1; | ||
245 | } | ||
246 | |||
247 | #define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) | ||
248 | |||
249 | static int | ||
250 | ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | ||
251 | int event, int nowait, | ||
252 | const struct ip_conntrack *ct) | ||
253 | { | ||
254 | struct nlmsghdr *nlh; | ||
255 | struct nfgenmsg *nfmsg; | ||
256 | struct nfattr *nest_parms; | ||
257 | unsigned char *b; | ||
258 | |||
259 | b = skb->tail; | ||
260 | |||
261 | event |= NFNL_SUBSYS_CTNETLINK << 8; | ||
262 | nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); | ||
263 | nfmsg = NLMSG_DATA(nlh); | ||
264 | |||
265 | nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; | ||
266 | nfmsg->nfgen_family = AF_INET; | ||
267 | nfmsg->version = NFNETLINK_V0; | ||
268 | nfmsg->res_id = 0; | ||
269 | |||
270 | nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); | ||
271 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) | ||
272 | goto nfattr_failure; | ||
273 | NFA_NEST_END(skb, nest_parms); | ||
274 | |||
275 | nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); | ||
276 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) | ||
277 | goto nfattr_failure; | ||
278 | NFA_NEST_END(skb, nest_parms); | ||
279 | |||
280 | if (ctnetlink_dump_status(skb, ct) < 0 || | ||
281 | ctnetlink_dump_timeout(skb, ct) < 0 || | ||
282 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | ||
283 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || | ||
284 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | ||
285 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | ||
286 | ctnetlink_dump_mark(skb, ct) < 0 || | ||
287 | ctnetlink_dump_id(skb, ct) < 0 || | ||
288 | ctnetlink_dump_use(skb, ct) < 0) | ||
289 | goto nfattr_failure; | ||
290 | |||
291 | nlh->nlmsg_len = skb->tail - b; | ||
292 | return skb->len; | ||
293 | |||
294 | nlmsg_failure: | ||
295 | nfattr_failure: | ||
296 | skb_trim(skb, b - skb->data); | ||
297 | return -1; | ||
298 | } | ||
299 | |||
300 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
301 | static int ctnetlink_conntrack_event(struct notifier_block *this, | ||
302 | unsigned long events, void *ptr) | ||
303 | { | ||
304 | struct nlmsghdr *nlh; | ||
305 | struct nfgenmsg *nfmsg; | ||
306 | struct nfattr *nest_parms; | ||
307 | struct ip_conntrack *ct = (struct ip_conntrack *)ptr; | ||
308 | struct sk_buff *skb; | ||
309 | unsigned int type; | ||
310 | unsigned char *b; | ||
311 | unsigned int flags = 0, group; | ||
312 | |||
313 | /* ignore our fake conntrack entry */ | ||
314 | if (ct == &ip_conntrack_untracked) | ||
315 | return NOTIFY_DONE; | ||
316 | |||
317 | if (events & IPCT_DESTROY) { | ||
318 | type = IPCTNL_MSG_CT_DELETE; | ||
319 | group = NFNLGRP_CONNTRACK_DESTROY; | ||
320 | } else if (events & (IPCT_NEW | IPCT_RELATED)) { | ||
321 | type = IPCTNL_MSG_CT_NEW; | ||
322 | flags = NLM_F_CREATE|NLM_F_EXCL; | ||
323 | group = NFNLGRP_CONNTRACK_NEW; | ||
324 | } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { | ||
325 | type = IPCTNL_MSG_CT_NEW; | ||
326 | group = NFNLGRP_CONNTRACK_UPDATE; | ||
327 | } else | ||
328 | return NOTIFY_DONE; | ||
329 | |||
330 | if (!nfnetlink_has_listeners(group)) | ||
331 | return NOTIFY_DONE; | ||
332 | |||
333 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | ||
334 | if (!skb) | ||
335 | return NOTIFY_DONE; | ||
336 | |||
337 | b = skb->tail; | ||
338 | |||
339 | type |= NFNL_SUBSYS_CTNETLINK << 8; | ||
340 | nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); | ||
341 | nfmsg = NLMSG_DATA(nlh); | ||
342 | |||
343 | nlh->nlmsg_flags = flags; | ||
344 | nfmsg->nfgen_family = AF_INET; | ||
345 | nfmsg->version = NFNETLINK_V0; | ||
346 | nfmsg->res_id = 0; | ||
347 | |||
348 | nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); | ||
349 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) | ||
350 | goto nfattr_failure; | ||
351 | NFA_NEST_END(skb, nest_parms); | ||
352 | |||
353 | nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); | ||
354 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) | ||
355 | goto nfattr_failure; | ||
356 | NFA_NEST_END(skb, nest_parms); | ||
357 | |||
358 | if (events & IPCT_DESTROY) { | ||
359 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | ||
360 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | ||
361 | goto nfattr_failure; | ||
362 | } else { | ||
363 | if (ctnetlink_dump_status(skb, ct) < 0) | ||
364 | goto nfattr_failure; | ||
365 | |||
366 | if (ctnetlink_dump_timeout(skb, ct) < 0) | ||
367 | goto nfattr_failure; | ||
368 | |||
369 | if (events & IPCT_PROTOINFO | ||
370 | && ctnetlink_dump_protoinfo(skb, ct) < 0) | ||
371 | goto nfattr_failure; | ||
372 | |||
373 | if ((events & IPCT_HELPER || ct->helper) | ||
374 | && ctnetlink_dump_helpinfo(skb, ct) < 0) | ||
375 | goto nfattr_failure; | ||
376 | |||
377 | #ifdef CONFIG_IP_NF_CONNTRACK_MARK | ||
378 | if ((events & IPCT_MARK || ct->mark) | ||
379 | && ctnetlink_dump_mark(skb, ct) < 0) | ||
380 | goto nfattr_failure; | ||
381 | #endif | ||
382 | |||
383 | if (events & IPCT_COUNTER_FILLING && | ||
384 | (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | ||
385 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) | ||
386 | goto nfattr_failure; | ||
387 | } | ||
388 | |||
389 | nlh->nlmsg_len = skb->tail - b; | ||
390 | nfnetlink_send(skb, 0, group, 0); | ||
391 | return NOTIFY_DONE; | ||
392 | |||
393 | nlmsg_failure: | ||
394 | nfattr_failure: | ||
395 | kfree_skb(skb); | ||
396 | return NOTIFY_DONE; | ||
397 | } | ||
398 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
399 | |||
400 | static int ctnetlink_done(struct netlink_callback *cb) | ||
401 | { | ||
402 | if (cb->args[1]) | ||
403 | ip_conntrack_put((struct ip_conntrack *)cb->args[1]); | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | static int | ||
408 | ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) | ||
409 | { | ||
410 | struct ip_conntrack *ct, *last; | ||
411 | struct ip_conntrack_tuple_hash *h; | ||
412 | struct list_head *i; | ||
413 | |||
414 | read_lock_bh(&ip_conntrack_lock); | ||
415 | last = (struct ip_conntrack *)cb->args[1]; | ||
416 | for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) { | ||
417 | restart: | ||
418 | list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { | ||
419 | h = (struct ip_conntrack_tuple_hash *) i; | ||
420 | if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
421 | continue; | ||
422 | ct = tuplehash_to_ctrack(h); | ||
423 | if (cb->args[1]) { | ||
424 | if (ct != last) | ||
425 | continue; | ||
426 | cb->args[1] = 0; | ||
427 | } | ||
428 | if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
429 | cb->nlh->nlmsg_seq, | ||
430 | IPCTNL_MSG_CT_NEW, | ||
431 | 1, ct) < 0) { | ||
432 | nf_conntrack_get(&ct->ct_general); | ||
433 | cb->args[1] = (unsigned long)ct; | ||
434 | goto out; | ||
435 | } | ||
436 | #ifdef CONFIG_NF_CT_ACCT | ||
437 | if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == | ||
438 | IPCTNL_MSG_CT_GET_CTRZERO) | ||
439 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
440 | #endif | ||
441 | } | ||
442 | if (cb->args[1]) { | ||
443 | cb->args[1] = 0; | ||
444 | goto restart; | ||
445 | } | ||
446 | } | ||
447 | out: | ||
448 | read_unlock_bh(&ip_conntrack_lock); | ||
449 | if (last) | ||
450 | ip_conntrack_put(last); | ||
451 | |||
452 | return skb->len; | ||
453 | } | ||
454 | |||
455 | static const size_t cta_min_ip[CTA_IP_MAX] = { | ||
456 | [CTA_IP_V4_SRC-1] = sizeof(__be32), | ||
457 | [CTA_IP_V4_DST-1] = sizeof(__be32), | ||
458 | }; | ||
459 | |||
460 | static inline int | ||
461 | ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | ||
462 | { | ||
463 | struct nfattr *tb[CTA_IP_MAX]; | ||
464 | |||
465 | nfattr_parse_nested(tb, CTA_IP_MAX, attr); | ||
466 | |||
467 | if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) | ||
468 | return -EINVAL; | ||
469 | |||
470 | if (!tb[CTA_IP_V4_SRC-1]) | ||
471 | return -EINVAL; | ||
472 | tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); | ||
473 | |||
474 | if (!tb[CTA_IP_V4_DST-1]) | ||
475 | return -EINVAL; | ||
476 | tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); | ||
477 | |||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | static const size_t cta_min_proto[CTA_PROTO_MAX] = { | ||
482 | [CTA_PROTO_NUM-1] = sizeof(u_int8_t), | ||
483 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), | ||
484 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), | ||
485 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), | ||
486 | [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), | ||
487 | [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), | ||
488 | }; | ||
489 | |||
490 | static inline int | ||
491 | ctnetlink_parse_tuple_proto(struct nfattr *attr, | ||
492 | struct ip_conntrack_tuple *tuple) | ||
493 | { | ||
494 | struct nfattr *tb[CTA_PROTO_MAX]; | ||
495 | struct ip_conntrack_protocol *proto; | ||
496 | int ret = 0; | ||
497 | |||
498 | nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); | ||
499 | |||
500 | if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) | ||
501 | return -EINVAL; | ||
502 | |||
503 | if (!tb[CTA_PROTO_NUM-1]) | ||
504 | return -EINVAL; | ||
505 | tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); | ||
506 | |||
507 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | ||
508 | |||
509 | if (likely(proto->nfattr_to_tuple)) | ||
510 | ret = proto->nfattr_to_tuple(tb, tuple); | ||
511 | |||
512 | ip_conntrack_proto_put(proto); | ||
513 | |||
514 | return ret; | ||
515 | } | ||
516 | |||
517 | static inline int | ||
518 | ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, | ||
519 | enum ctattr_tuple type) | ||
520 | { | ||
521 | struct nfattr *tb[CTA_TUPLE_MAX]; | ||
522 | int err; | ||
523 | |||
524 | memset(tuple, 0, sizeof(*tuple)); | ||
525 | |||
526 | nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); | ||
527 | |||
528 | if (!tb[CTA_TUPLE_IP-1]) | ||
529 | return -EINVAL; | ||
530 | |||
531 | err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); | ||
532 | if (err < 0) | ||
533 | return err; | ||
534 | |||
535 | if (!tb[CTA_TUPLE_PROTO-1]) | ||
536 | return -EINVAL; | ||
537 | |||
538 | err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); | ||
539 | if (err < 0) | ||
540 | return err; | ||
541 | |||
542 | /* orig and expect tuples get DIR_ORIGINAL */ | ||
543 | if (type == CTA_TUPLE_REPLY) | ||
544 | tuple->dst.dir = IP_CT_DIR_REPLY; | ||
545 | else | ||
546 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | ||
547 | |||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
552 | static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { | ||
553 | [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), | ||
554 | [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), | ||
555 | }; | ||
556 | |||
557 | static int ctnetlink_parse_nat_proto(struct nfattr *attr, | ||
558 | const struct ip_conntrack *ct, | ||
559 | struct ip_nat_range *range) | ||
560 | { | ||
561 | struct nfattr *tb[CTA_PROTONAT_MAX]; | ||
562 | struct ip_nat_protocol *npt; | ||
563 | |||
564 | nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); | ||
565 | |||
566 | if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) | ||
567 | return -EINVAL; | ||
568 | |||
569 | npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | ||
570 | |||
571 | if (!npt->nfattr_to_range) { | ||
572 | ip_nat_proto_put(npt); | ||
573 | return 0; | ||
574 | } | ||
575 | |||
576 | /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ | ||
577 | if (npt->nfattr_to_range(tb, range) > 0) | ||
578 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
579 | |||
580 | ip_nat_proto_put(npt); | ||
581 | |||
582 | return 0; | ||
583 | } | ||
584 | |||
585 | static const size_t cta_min_nat[CTA_NAT_MAX] = { | ||
586 | [CTA_NAT_MINIP-1] = sizeof(__be32), | ||
587 | [CTA_NAT_MAXIP-1] = sizeof(__be32), | ||
588 | }; | ||
589 | |||
590 | static inline int | ||
591 | ctnetlink_parse_nat(struct nfattr *nat, | ||
592 | const struct ip_conntrack *ct, struct ip_nat_range *range) | ||
593 | { | ||
594 | struct nfattr *tb[CTA_NAT_MAX]; | ||
595 | int err; | ||
596 | |||
597 | memset(range, 0, sizeof(*range)); | ||
598 | |||
599 | nfattr_parse_nested(tb, CTA_NAT_MAX, nat); | ||
600 | |||
601 | if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) | ||
602 | return -EINVAL; | ||
603 | |||
604 | if (tb[CTA_NAT_MINIP-1]) | ||
605 | range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); | ||
606 | |||
607 | if (!tb[CTA_NAT_MAXIP-1]) | ||
608 | range->max_ip = range->min_ip; | ||
609 | else | ||
610 | range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); | ||
611 | |||
612 | if (range->min_ip) | ||
613 | range->flags |= IP_NAT_RANGE_MAP_IPS; | ||
614 | |||
615 | if (!tb[CTA_NAT_PROTO-1]) | ||
616 | return 0; | ||
617 | |||
618 | err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); | ||
619 | if (err < 0) | ||
620 | return err; | ||
621 | |||
622 | return 0; | ||
623 | } | ||
624 | #endif | ||
625 | |||
626 | static inline int | ||
627 | ctnetlink_parse_help(struct nfattr *attr, char **helper_name) | ||
628 | { | ||
629 | struct nfattr *tb[CTA_HELP_MAX]; | ||
630 | |||
631 | nfattr_parse_nested(tb, CTA_HELP_MAX, attr); | ||
632 | |||
633 | if (!tb[CTA_HELP_NAME-1]) | ||
634 | return -EINVAL; | ||
635 | |||
636 | *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); | ||
637 | |||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | static const size_t cta_min[CTA_MAX] = { | ||
642 | [CTA_STATUS-1] = sizeof(__be32), | ||
643 | [CTA_TIMEOUT-1] = sizeof(__be32), | ||
644 | [CTA_MARK-1] = sizeof(__be32), | ||
645 | [CTA_USE-1] = sizeof(__be32), | ||
646 | [CTA_ID-1] = sizeof(__be32) | ||
647 | }; | ||
648 | |||
649 | static int | ||
650 | ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
651 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
652 | { | ||
653 | struct ip_conntrack_tuple_hash *h; | ||
654 | struct ip_conntrack_tuple tuple; | ||
655 | struct ip_conntrack *ct; | ||
656 | int err = 0; | ||
657 | |||
658 | if (nfattr_bad_size(cda, CTA_MAX, cta_min)) | ||
659 | return -EINVAL; | ||
660 | |||
661 | if (cda[CTA_TUPLE_ORIG-1]) | ||
662 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); | ||
663 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
664 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); | ||
665 | else { | ||
666 | /* Flush the whole table */ | ||
667 | ip_conntrack_flush(); | ||
668 | return 0; | ||
669 | } | ||
670 | |||
671 | if (err < 0) | ||
672 | return err; | ||
673 | |||
674 | h = ip_conntrack_find_get(&tuple, NULL); | ||
675 | if (!h) | ||
676 | return -ENOENT; | ||
677 | |||
678 | ct = tuplehash_to_ctrack(h); | ||
679 | |||
680 | if (cda[CTA_ID-1]) { | ||
681 | u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); | ||
682 | if (ct->id != id) { | ||
683 | ip_conntrack_put(ct); | ||
684 | return -ENOENT; | ||
685 | } | ||
686 | } | ||
687 | if (del_timer(&ct->timeout)) | ||
688 | ct->timeout.function((unsigned long)ct); | ||
689 | |||
690 | ip_conntrack_put(ct); | ||
691 | |||
692 | return 0; | ||
693 | } | ||
694 | |||
695 | static int | ||
696 | ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
697 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
698 | { | ||
699 | struct ip_conntrack_tuple_hash *h; | ||
700 | struct ip_conntrack_tuple tuple; | ||
701 | struct ip_conntrack *ct; | ||
702 | struct sk_buff *skb2 = NULL; | ||
703 | int err = 0; | ||
704 | |||
705 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
706 | struct nfgenmsg *msg = NLMSG_DATA(nlh); | ||
707 | u32 rlen; | ||
708 | |||
709 | if (msg->nfgen_family != AF_INET) | ||
710 | return -EAFNOSUPPORT; | ||
711 | |||
712 | #ifndef CONFIG_IP_NF_CT_ACCT | ||
713 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) | ||
714 | return -ENOTSUPP; | ||
715 | #endif | ||
716 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
717 | ctnetlink_dump_table, | ||
718 | ctnetlink_done)) != 0) | ||
719 | return -EINVAL; | ||
720 | |||
721 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
722 | if (rlen > skb->len) | ||
723 | rlen = skb->len; | ||
724 | skb_pull(skb, rlen); | ||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | if (nfattr_bad_size(cda, CTA_MAX, cta_min)) | ||
729 | return -EINVAL; | ||
730 | |||
731 | if (cda[CTA_TUPLE_ORIG-1]) | ||
732 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); | ||
733 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
734 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); | ||
735 | else | ||
736 | return -EINVAL; | ||
737 | |||
738 | if (err < 0) | ||
739 | return err; | ||
740 | |||
741 | h = ip_conntrack_find_get(&tuple, NULL); | ||
742 | if (!h) | ||
743 | return -ENOENT; | ||
744 | |||
745 | ct = tuplehash_to_ctrack(h); | ||
746 | |||
747 | err = -ENOMEM; | ||
748 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | ||
749 | if (!skb2) { | ||
750 | ip_conntrack_put(ct); | ||
751 | return -ENOMEM; | ||
752 | } | ||
753 | |||
754 | err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, | ||
755 | IPCTNL_MSG_CT_NEW, 1, ct); | ||
756 | ip_conntrack_put(ct); | ||
757 | if (err <= 0) | ||
758 | goto free; | ||
759 | |||
760 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | ||
761 | if (err < 0) | ||
762 | goto out; | ||
763 | |||
764 | return 0; | ||
765 | |||
766 | free: | ||
767 | kfree_skb(skb2); | ||
768 | out: | ||
769 | return err; | ||
770 | } | ||
771 | |||
772 | static inline int | ||
773 | ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
774 | { | ||
775 | unsigned long d; | ||
776 | unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); | ||
777 | d = ct->status ^ status; | ||
778 | |||
779 | if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) | ||
780 | /* unchangeable */ | ||
781 | return -EINVAL; | ||
782 | |||
783 | if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) | ||
784 | /* SEEN_REPLY bit can only be set */ | ||
785 | return -EINVAL; | ||
786 | |||
787 | |||
788 | if (d & IPS_ASSURED && !(status & IPS_ASSURED)) | ||
789 | /* ASSURED bit can only be set */ | ||
790 | return -EINVAL; | ||
791 | |||
792 | if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { | ||
793 | #ifndef CONFIG_IP_NF_NAT_NEEDED | ||
794 | return -EINVAL; | ||
795 | #else | ||
796 | struct ip_nat_range range; | ||
797 | |||
798 | if (cda[CTA_NAT_DST-1]) { | ||
799 | if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, | ||
800 | &range) < 0) | ||
801 | return -EINVAL; | ||
802 | if (ip_nat_initialized(ct, | ||
803 | HOOK2MANIP(NF_IP_PRE_ROUTING))) | ||
804 | return -EEXIST; | ||
805 | ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); | ||
806 | } | ||
807 | if (cda[CTA_NAT_SRC-1]) { | ||
808 | if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, | ||
809 | &range) < 0) | ||
810 | return -EINVAL; | ||
811 | if (ip_nat_initialized(ct, | ||
812 | HOOK2MANIP(NF_IP_POST_ROUTING))) | ||
813 | return -EEXIST; | ||
814 | ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); | ||
815 | } | ||
816 | #endif | ||
817 | } | ||
818 | |||
819 | /* Be careful here, modifying NAT bits can screw up things, | ||
820 | * so don't let users modify them directly if they don't pass | ||
821 | * ip_nat_range. */ | ||
822 | ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); | ||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | |||
827 | static inline int | ||
828 | ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
829 | { | ||
830 | struct ip_conntrack_helper *helper; | ||
831 | char *helpname; | ||
832 | int err; | ||
833 | |||
834 | /* don't change helper of sibling connections */ | ||
835 | if (ct->master) | ||
836 | return -EINVAL; | ||
837 | |||
838 | err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); | ||
839 | if (err < 0) | ||
840 | return err; | ||
841 | |||
842 | helper = __ip_conntrack_helper_find_byname(helpname); | ||
843 | if (!helper) { | ||
844 | if (!strcmp(helpname, "")) | ||
845 | helper = NULL; | ||
846 | else | ||
847 | return -EINVAL; | ||
848 | } | ||
849 | |||
850 | if (ct->helper) { | ||
851 | if (!helper) { | ||
852 | /* we had a helper before ... */ | ||
853 | ip_ct_remove_expectations(ct); | ||
854 | ct->helper = NULL; | ||
855 | } else { | ||
856 | /* need to zero data of old helper */ | ||
857 | memset(&ct->help, 0, sizeof(ct->help)); | ||
858 | } | ||
859 | } | ||
860 | |||
861 | ct->helper = helper; | ||
862 | |||
863 | return 0; | ||
864 | } | ||
865 | |||
866 | static inline int | ||
867 | ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
868 | { | ||
869 | u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); | ||
870 | |||
871 | if (!del_timer(&ct->timeout)) | ||
872 | return -ETIME; | ||
873 | |||
874 | ct->timeout.expires = jiffies + timeout * HZ; | ||
875 | add_timer(&ct->timeout); | ||
876 | |||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | static inline int | ||
881 | ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
882 | { | ||
883 | struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; | ||
884 | struct ip_conntrack_protocol *proto; | ||
885 | u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; | ||
886 | int err = 0; | ||
887 | |||
888 | nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); | ||
889 | |||
890 | proto = ip_conntrack_proto_find_get(npt); | ||
891 | |||
892 | if (proto->from_nfattr) | ||
893 | err = proto->from_nfattr(tb, ct); | ||
894 | ip_conntrack_proto_put(proto); | ||
895 | |||
896 | return err; | ||
897 | } | ||
898 | |||
899 | static int | ||
900 | ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
901 | { | ||
902 | int err; | ||
903 | |||
904 | if (cda[CTA_HELP-1]) { | ||
905 | err = ctnetlink_change_helper(ct, cda); | ||
906 | if (err < 0) | ||
907 | return err; | ||
908 | } | ||
909 | |||
910 | if (cda[CTA_TIMEOUT-1]) { | ||
911 | err = ctnetlink_change_timeout(ct, cda); | ||
912 | if (err < 0) | ||
913 | return err; | ||
914 | } | ||
915 | |||
916 | if (cda[CTA_STATUS-1]) { | ||
917 | err = ctnetlink_change_status(ct, cda); | ||
918 | if (err < 0) | ||
919 | return err; | ||
920 | } | ||
921 | |||
922 | if (cda[CTA_PROTOINFO-1]) { | ||
923 | err = ctnetlink_change_protoinfo(ct, cda); | ||
924 | if (err < 0) | ||
925 | return err; | ||
926 | } | ||
927 | |||
928 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | ||
929 | if (cda[CTA_MARK-1]) | ||
930 | ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); | ||
931 | #endif | ||
932 | |||
933 | return 0; | ||
934 | } | ||
935 | |||
936 | static int | ||
937 | ctnetlink_create_conntrack(struct nfattr *cda[], | ||
938 | struct ip_conntrack_tuple *otuple, | ||
939 | struct ip_conntrack_tuple *rtuple) | ||
940 | { | ||
941 | struct ip_conntrack *ct; | ||
942 | int err = -EINVAL; | ||
943 | |||
944 | ct = ip_conntrack_alloc(otuple, rtuple); | ||
945 | if (ct == NULL || IS_ERR(ct)) | ||
946 | return -ENOMEM; | ||
947 | |||
948 | if (!cda[CTA_TIMEOUT-1]) | ||
949 | goto err; | ||
950 | ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); | ||
951 | |||
952 | ct->timeout.expires = jiffies + ct->timeout.expires * HZ; | ||
953 | ct->status |= IPS_CONFIRMED; | ||
954 | |||
955 | if (cda[CTA_STATUS-1]) { | ||
956 | err = ctnetlink_change_status(ct, cda); | ||
957 | if (err < 0) | ||
958 | goto err; | ||
959 | } | ||
960 | |||
961 | if (cda[CTA_PROTOINFO-1]) { | ||
962 | err = ctnetlink_change_protoinfo(ct, cda); | ||
963 | if (err < 0) | ||
964 | goto err; | ||
965 | } | ||
966 | |||
967 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | ||
968 | if (cda[CTA_MARK-1]) | ||
969 | ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); | ||
970 | #endif | ||
971 | |||
972 | ct->helper = ip_conntrack_helper_find_get(rtuple); | ||
973 | |||
974 | add_timer(&ct->timeout); | ||
975 | ip_conntrack_hash_insert(ct); | ||
976 | |||
977 | if (ct->helper) | ||
978 | ip_conntrack_helper_put(ct->helper); | ||
979 | |||
980 | return 0; | ||
981 | |||
982 | err: | ||
983 | ip_conntrack_free(ct); | ||
984 | return err; | ||
985 | } | ||
986 | |||
987 | static int | ||
988 | ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
989 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
990 | { | ||
991 | struct ip_conntrack_tuple otuple, rtuple; | ||
992 | struct ip_conntrack_tuple_hash *h = NULL; | ||
993 | int err = 0; | ||
994 | |||
995 | if (nfattr_bad_size(cda, CTA_MAX, cta_min)) | ||
996 | return -EINVAL; | ||
997 | |||
998 | if (cda[CTA_TUPLE_ORIG-1]) { | ||
999 | err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); | ||
1000 | if (err < 0) | ||
1001 | return err; | ||
1002 | } | ||
1003 | |||
1004 | if (cda[CTA_TUPLE_REPLY-1]) { | ||
1005 | err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); | ||
1006 | if (err < 0) | ||
1007 | return err; | ||
1008 | } | ||
1009 | |||
1010 | write_lock_bh(&ip_conntrack_lock); | ||
1011 | if (cda[CTA_TUPLE_ORIG-1]) | ||
1012 | h = __ip_conntrack_find(&otuple, NULL); | ||
1013 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
1014 | h = __ip_conntrack_find(&rtuple, NULL); | ||
1015 | |||
1016 | if (h == NULL) { | ||
1017 | write_unlock_bh(&ip_conntrack_lock); | ||
1018 | err = -ENOENT; | ||
1019 | if (nlh->nlmsg_flags & NLM_F_CREATE) | ||
1020 | err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); | ||
1021 | return err; | ||
1022 | } | ||
1023 | /* implicit 'else' */ | ||
1024 | |||
1025 | /* we only allow nat config for new conntracks */ | ||
1026 | if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { | ||
1027 | err = -EINVAL; | ||
1028 | goto out_unlock; | ||
1029 | } | ||
1030 | |||
1031 | /* We manipulate the conntrack inside the global conntrack table lock, | ||
1032 | * so there's no need to increase the refcount */ | ||
1033 | err = -EEXIST; | ||
1034 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) | ||
1035 | err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); | ||
1036 | |||
1037 | out_unlock: | ||
1038 | write_unlock_bh(&ip_conntrack_lock); | ||
1039 | return err; | ||
1040 | } | ||
1041 | |||
1042 | /*********************************************************************** | ||
1043 | * EXPECT | ||
1044 | ***********************************************************************/ | ||
1045 | |||
1046 | static inline int | ||
1047 | ctnetlink_exp_dump_tuple(struct sk_buff *skb, | ||
1048 | const struct ip_conntrack_tuple *tuple, | ||
1049 | enum ctattr_expect type) | ||
1050 | { | ||
1051 | struct nfattr *nest_parms = NFA_NEST(skb, type); | ||
1052 | |||
1053 | if (ctnetlink_dump_tuples(skb, tuple) < 0) | ||
1054 | goto nfattr_failure; | ||
1055 | |||
1056 | NFA_NEST_END(skb, nest_parms); | ||
1057 | |||
1058 | return 0; | ||
1059 | |||
1060 | nfattr_failure: | ||
1061 | return -1; | ||
1062 | } | ||
1063 | |||
1064 | static inline int | ||
1065 | ctnetlink_exp_dump_mask(struct sk_buff *skb, | ||
1066 | const struct ip_conntrack_tuple *tuple, | ||
1067 | const struct ip_conntrack_tuple *mask) | ||
1068 | { | ||
1069 | int ret; | ||
1070 | struct ip_conntrack_protocol *proto; | ||
1071 | struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK); | ||
1072 | |||
1073 | ret = ctnetlink_dump_tuples_ip(skb, mask); | ||
1074 | if (unlikely(ret < 0)) | ||
1075 | goto nfattr_failure; | ||
1076 | |||
1077 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | ||
1078 | ret = ctnetlink_dump_tuples_proto(skb, mask, proto); | ||
1079 | ip_conntrack_proto_put(proto); | ||
1080 | if (unlikely(ret < 0)) | ||
1081 | goto nfattr_failure; | ||
1082 | |||
1083 | NFA_NEST_END(skb, nest_parms); | ||
1084 | |||
1085 | return 0; | ||
1086 | |||
1087 | nfattr_failure: | ||
1088 | return -1; | ||
1089 | } | ||
1090 | |||
1091 | static inline int | ||
1092 | ctnetlink_exp_dump_expect(struct sk_buff *skb, | ||
1093 | const struct ip_conntrack_expect *exp) | ||
1094 | { | ||
1095 | struct ip_conntrack *master = exp->master; | ||
1096 | __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); | ||
1097 | __be32 id = htonl(exp->id); | ||
1098 | |||
1099 | if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) | ||
1100 | goto nfattr_failure; | ||
1101 | if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0) | ||
1102 | goto nfattr_failure; | ||
1103 | if (ctnetlink_exp_dump_tuple(skb, | ||
1104 | &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
1105 | CTA_EXPECT_MASTER) < 0) | ||
1106 | goto nfattr_failure; | ||
1107 | |||
1108 | NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout); | ||
1109 | NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id); | ||
1110 | |||
1111 | return 0; | ||
1112 | |||
1113 | nfattr_failure: | ||
1114 | return -1; | ||
1115 | } | ||
1116 | |||
1117 | static int | ||
1118 | ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | ||
1119 | int event, | ||
1120 | int nowait, | ||
1121 | const struct ip_conntrack_expect *exp) | ||
1122 | { | ||
1123 | struct nlmsghdr *nlh; | ||
1124 | struct nfgenmsg *nfmsg; | ||
1125 | unsigned char *b; | ||
1126 | |||
1127 | b = skb->tail; | ||
1128 | |||
1129 | event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; | ||
1130 | nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); | ||
1131 | nfmsg = NLMSG_DATA(nlh); | ||
1132 | |||
1133 | nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; | ||
1134 | nfmsg->nfgen_family = AF_INET; | ||
1135 | nfmsg->version = NFNETLINK_V0; | ||
1136 | nfmsg->res_id = 0; | ||
1137 | |||
1138 | if (ctnetlink_exp_dump_expect(skb, exp) < 0) | ||
1139 | goto nfattr_failure; | ||
1140 | |||
1141 | nlh->nlmsg_len = skb->tail - b; | ||
1142 | return skb->len; | ||
1143 | |||
1144 | nlmsg_failure: | ||
1145 | nfattr_failure: | ||
1146 | skb_trim(skb, b - skb->data); | ||
1147 | return -1; | ||
1148 | } | ||
1149 | |||
1150 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1151 | static int ctnetlink_expect_event(struct notifier_block *this, | ||
1152 | unsigned long events, void *ptr) | ||
1153 | { | ||
1154 | struct nlmsghdr *nlh; | ||
1155 | struct nfgenmsg *nfmsg; | ||
1156 | struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; | ||
1157 | struct sk_buff *skb; | ||
1158 | unsigned int type; | ||
1159 | unsigned char *b; | ||
1160 | int flags = 0; | ||
1161 | |||
1162 | if (events & IPEXP_NEW) { | ||
1163 | type = IPCTNL_MSG_EXP_NEW; | ||
1164 | flags = NLM_F_CREATE|NLM_F_EXCL; | ||
1165 | } else | ||
1166 | return NOTIFY_DONE; | ||
1167 | |||
1168 | if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) | ||
1169 | return NOTIFY_DONE; | ||
1170 | |||
1171 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | ||
1172 | if (!skb) | ||
1173 | return NOTIFY_DONE; | ||
1174 | |||
1175 | b = skb->tail; | ||
1176 | |||
1177 | type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; | ||
1178 | nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); | ||
1179 | nfmsg = NLMSG_DATA(nlh); | ||
1180 | |||
1181 | nlh->nlmsg_flags = flags; | ||
1182 | nfmsg->nfgen_family = AF_INET; | ||
1183 | nfmsg->version = NFNETLINK_V0; | ||
1184 | nfmsg->res_id = 0; | ||
1185 | |||
1186 | if (ctnetlink_exp_dump_expect(skb, exp) < 0) | ||
1187 | goto nfattr_failure; | ||
1188 | |||
1189 | nlh->nlmsg_len = skb->tail - b; | ||
1190 | nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); | ||
1191 | return NOTIFY_DONE; | ||
1192 | |||
1193 | nlmsg_failure: | ||
1194 | nfattr_failure: | ||
1195 | kfree_skb(skb); | ||
1196 | return NOTIFY_DONE; | ||
1197 | } | ||
1198 | #endif | ||
1199 | |||
1200 | static int | ||
1201 | ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) | ||
1202 | { | ||
1203 | struct ip_conntrack_expect *exp = NULL; | ||
1204 | struct list_head *i; | ||
1205 | u_int32_t *id = (u_int32_t *) &cb->args[0]; | ||
1206 | |||
1207 | read_lock_bh(&ip_conntrack_lock); | ||
1208 | list_for_each_prev(i, &ip_conntrack_expect_list) { | ||
1209 | exp = (struct ip_conntrack_expect *) i; | ||
1210 | if (exp->id <= *id) | ||
1211 | continue; | ||
1212 | if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
1213 | cb->nlh->nlmsg_seq, | ||
1214 | IPCTNL_MSG_EXP_NEW, | ||
1215 | 1, exp) < 0) | ||
1216 | goto out; | ||
1217 | *id = exp->id; | ||
1218 | } | ||
1219 | out: | ||
1220 | read_unlock_bh(&ip_conntrack_lock); | ||
1221 | |||
1222 | return skb->len; | ||
1223 | } | ||
1224 | |||
1225 | static const size_t cta_min_exp[CTA_EXPECT_MAX] = { | ||
1226 | [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32), | ||
1227 | [CTA_EXPECT_ID-1] = sizeof(__be32) | ||
1228 | }; | ||
1229 | |||
1230 | static int | ||
1231 | ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1232 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1233 | { | ||
1234 | struct ip_conntrack_tuple tuple; | ||
1235 | struct ip_conntrack_expect *exp; | ||
1236 | struct sk_buff *skb2; | ||
1237 | int err = 0; | ||
1238 | |||
1239 | if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) | ||
1240 | return -EINVAL; | ||
1241 | |||
1242 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
1243 | struct nfgenmsg *msg = NLMSG_DATA(nlh); | ||
1244 | u32 rlen; | ||
1245 | |||
1246 | if (msg->nfgen_family != AF_INET) | ||
1247 | return -EAFNOSUPPORT; | ||
1248 | |||
1249 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
1250 | ctnetlink_exp_dump_table, | ||
1251 | ctnetlink_done)) != 0) | ||
1252 | return -EINVAL; | ||
1253 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
1254 | if (rlen > skb->len) | ||
1255 | rlen = skb->len; | ||
1256 | skb_pull(skb, rlen); | ||
1257 | return 0; | ||
1258 | } | ||
1259 | |||
1260 | if (cda[CTA_EXPECT_MASTER-1]) | ||
1261 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER); | ||
1262 | else | ||
1263 | return -EINVAL; | ||
1264 | |||
1265 | if (err < 0) | ||
1266 | return err; | ||
1267 | |||
1268 | exp = ip_conntrack_expect_find_get(&tuple); | ||
1269 | if (!exp) | ||
1270 | return -ENOENT; | ||
1271 | |||
1272 | if (cda[CTA_EXPECT_ID-1]) { | ||
1273 | __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); | ||
1274 | if (exp->id != ntohl(id)) { | ||
1275 | ip_conntrack_expect_put(exp); | ||
1276 | return -ENOENT; | ||
1277 | } | ||
1278 | } | ||
1279 | |||
1280 | err = -ENOMEM; | ||
1281 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | ||
1282 | if (!skb2) | ||
1283 | goto out; | ||
1284 | |||
1285 | err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, | ||
1286 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, | ||
1287 | 1, exp); | ||
1288 | if (err <= 0) | ||
1289 | goto free; | ||
1290 | |||
1291 | ip_conntrack_expect_put(exp); | ||
1292 | |||
1293 | return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | ||
1294 | |||
1295 | free: | ||
1296 | kfree_skb(skb2); | ||
1297 | out: | ||
1298 | ip_conntrack_expect_put(exp); | ||
1299 | return err; | ||
1300 | } | ||
1301 | |||
1302 | static int | ||
1303 | ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1304 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1305 | { | ||
1306 | struct ip_conntrack_expect *exp, *tmp; | ||
1307 | struct ip_conntrack_tuple tuple; | ||
1308 | struct ip_conntrack_helper *h; | ||
1309 | int err; | ||
1310 | |||
1311 | if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) | ||
1312 | return -EINVAL; | ||
1313 | |||
1314 | if (cda[CTA_EXPECT_TUPLE-1]) { | ||
1315 | /* delete a single expect by tuple */ | ||
1316 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1317 | if (err < 0) | ||
1318 | return err; | ||
1319 | |||
1320 | /* bump usage count to 2 */ | ||
1321 | exp = ip_conntrack_expect_find_get(&tuple); | ||
1322 | if (!exp) | ||
1323 | return -ENOENT; | ||
1324 | |||
1325 | if (cda[CTA_EXPECT_ID-1]) { | ||
1326 | __be32 id = | ||
1327 | *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); | ||
1328 | if (exp->id != ntohl(id)) { | ||
1329 | ip_conntrack_expect_put(exp); | ||
1330 | return -ENOENT; | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | /* after list removal, usage count == 1 */ | ||
1335 | ip_conntrack_unexpect_related(exp); | ||
1336 | /* have to put what we 'get' above. | ||
1337 | * after this line usage count == 0 */ | ||
1338 | ip_conntrack_expect_put(exp); | ||
1339 | } else if (cda[CTA_EXPECT_HELP_NAME-1]) { | ||
1340 | char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); | ||
1341 | |||
1342 | /* delete all expectations for this helper */ | ||
1343 | write_lock_bh(&ip_conntrack_lock); | ||
1344 | h = __ip_conntrack_helper_find_byname(name); | ||
1345 | if (!h) { | ||
1346 | write_unlock_bh(&ip_conntrack_lock); | ||
1347 | return -EINVAL; | ||
1348 | } | ||
1349 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, | ||
1350 | list) { | ||
1351 | if (exp->master->helper == h | ||
1352 | && del_timer(&exp->timeout)) { | ||
1353 | ip_ct_unlink_expect(exp); | ||
1354 | ip_conntrack_expect_put(exp); | ||
1355 | } | ||
1356 | } | ||
1357 | write_unlock_bh(&ip_conntrack_lock); | ||
1358 | } else { | ||
1359 | /* This basically means we have to flush everything*/ | ||
1360 | write_lock_bh(&ip_conntrack_lock); | ||
1361 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, | ||
1362 | list) { | ||
1363 | if (del_timer(&exp->timeout)) { | ||
1364 | ip_ct_unlink_expect(exp); | ||
1365 | ip_conntrack_expect_put(exp); | ||
1366 | } | ||
1367 | } | ||
1368 | write_unlock_bh(&ip_conntrack_lock); | ||
1369 | } | ||
1370 | |||
1371 | return 0; | ||
1372 | } | ||
1373 | static int | ||
1374 | ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) | ||
1375 | { | ||
1376 | return -EOPNOTSUPP; | ||
1377 | } | ||
1378 | |||
1379 | static int | ||
1380 | ctnetlink_create_expect(struct nfattr *cda[]) | ||
1381 | { | ||
1382 | struct ip_conntrack_tuple tuple, mask, master_tuple; | ||
1383 | struct ip_conntrack_tuple_hash *h = NULL; | ||
1384 | struct ip_conntrack_expect *exp; | ||
1385 | struct ip_conntrack *ct; | ||
1386 | int err = 0; | ||
1387 | |||
1388 | /* caller guarantees that those three CTA_EXPECT_* exist */ | ||
1389 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1390 | if (err < 0) | ||
1391 | return err; | ||
1392 | err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); | ||
1393 | if (err < 0) | ||
1394 | return err; | ||
1395 | err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER); | ||
1396 | if (err < 0) | ||
1397 | return err; | ||
1398 | |||
1399 | /* Look for master conntrack of this expectation */ | ||
1400 | h = ip_conntrack_find_get(&master_tuple, NULL); | ||
1401 | if (!h) | ||
1402 | return -ENOENT; | ||
1403 | ct = tuplehash_to_ctrack(h); | ||
1404 | |||
1405 | if (!ct->helper) { | ||
1406 | /* such conntrack hasn't got any helper, abort */ | ||
1407 | err = -EINVAL; | ||
1408 | goto out; | ||
1409 | } | ||
1410 | |||
1411 | exp = ip_conntrack_expect_alloc(ct); | ||
1412 | if (!exp) { | ||
1413 | err = -ENOMEM; | ||
1414 | goto out; | ||
1415 | } | ||
1416 | |||
1417 | exp->expectfn = NULL; | ||
1418 | exp->flags = 0; | ||
1419 | exp->master = ct; | ||
1420 | memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); | ||
1421 | memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); | ||
1422 | |||
1423 | err = ip_conntrack_expect_related(exp); | ||
1424 | ip_conntrack_expect_put(exp); | ||
1425 | |||
1426 | out: | ||
1427 | ip_conntrack_put(tuplehash_to_ctrack(h)); | ||
1428 | return err; | ||
1429 | } | ||
1430 | |||
1431 | static int | ||
1432 | ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1433 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1434 | { | ||
1435 | struct ip_conntrack_tuple tuple; | ||
1436 | struct ip_conntrack_expect *exp; | ||
1437 | int err = 0; | ||
1438 | |||
1439 | if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) | ||
1440 | return -EINVAL; | ||
1441 | |||
1442 | if (!cda[CTA_EXPECT_TUPLE-1] | ||
1443 | || !cda[CTA_EXPECT_MASK-1] | ||
1444 | || !cda[CTA_EXPECT_MASTER-1]) | ||
1445 | return -EINVAL; | ||
1446 | |||
1447 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1448 | if (err < 0) | ||
1449 | return err; | ||
1450 | |||
1451 | write_lock_bh(&ip_conntrack_lock); | ||
1452 | exp = __ip_conntrack_expect_find(&tuple); | ||
1453 | |||
1454 | if (!exp) { | ||
1455 | write_unlock_bh(&ip_conntrack_lock); | ||
1456 | err = -ENOENT; | ||
1457 | if (nlh->nlmsg_flags & NLM_F_CREATE) | ||
1458 | err = ctnetlink_create_expect(cda); | ||
1459 | return err; | ||
1460 | } | ||
1461 | |||
1462 | err = -EEXIST; | ||
1463 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) | ||
1464 | err = ctnetlink_change_expect(exp, cda); | ||
1465 | write_unlock_bh(&ip_conntrack_lock); | ||
1466 | |||
1467 | return err; | ||
1468 | } | ||
1469 | |||
1470 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1471 | static struct notifier_block ctnl_notifier = { | ||
1472 | .notifier_call = ctnetlink_conntrack_event, | ||
1473 | }; | ||
1474 | |||
1475 | static struct notifier_block ctnl_notifier_exp = { | ||
1476 | .notifier_call = ctnetlink_expect_event, | ||
1477 | }; | ||
1478 | #endif | ||
1479 | |||
1480 | static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { | ||
1481 | [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, | ||
1482 | .attr_count = CTA_MAX, }, | ||
1483 | [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, | ||
1484 | .attr_count = CTA_MAX, }, | ||
1485 | [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, | ||
1486 | .attr_count = CTA_MAX, }, | ||
1487 | [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, | ||
1488 | .attr_count = CTA_MAX, }, | ||
1489 | }; | ||
1490 | |||
1491 | static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { | ||
1492 | [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, | ||
1493 | .attr_count = CTA_EXPECT_MAX, }, | ||
1494 | [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, | ||
1495 | .attr_count = CTA_EXPECT_MAX, }, | ||
1496 | [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, | ||
1497 | .attr_count = CTA_EXPECT_MAX, }, | ||
1498 | }; | ||
1499 | |||
1500 | static struct nfnetlink_subsystem ctnl_subsys = { | ||
1501 | .name = "conntrack", | ||
1502 | .subsys_id = NFNL_SUBSYS_CTNETLINK, | ||
1503 | .cb_count = IPCTNL_MSG_MAX, | ||
1504 | .cb = ctnl_cb, | ||
1505 | }; | ||
1506 | |||
1507 | static struct nfnetlink_subsystem ctnl_exp_subsys = { | ||
1508 | .name = "conntrack_expect", | ||
1509 | .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, | ||
1510 | .cb_count = IPCTNL_MSG_EXP_MAX, | ||
1511 | .cb = ctnl_exp_cb, | ||
1512 | }; | ||
1513 | |||
1514 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); | ||
1515 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); | ||
1516 | |||
1517 | static int __init ctnetlink_init(void) | ||
1518 | { | ||
1519 | int ret; | ||
1520 | |||
1521 | printk("ctnetlink v%s: registering with nfnetlink.\n", version); | ||
1522 | ret = nfnetlink_subsys_register(&ctnl_subsys); | ||
1523 | if (ret < 0) { | ||
1524 | printk("ctnetlink_init: cannot register with nfnetlink.\n"); | ||
1525 | goto err_out; | ||
1526 | } | ||
1527 | |||
1528 | ret = nfnetlink_subsys_register(&ctnl_exp_subsys); | ||
1529 | if (ret < 0) { | ||
1530 | printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); | ||
1531 | goto err_unreg_subsys; | ||
1532 | } | ||
1533 | |||
1534 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1535 | ret = ip_conntrack_register_notifier(&ctnl_notifier); | ||
1536 | if (ret < 0) { | ||
1537 | printk("ctnetlink_init: cannot register notifier.\n"); | ||
1538 | goto err_unreg_exp_subsys; | ||
1539 | } | ||
1540 | |||
1541 | ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); | ||
1542 | if (ret < 0) { | ||
1543 | printk("ctnetlink_init: cannot expect register notifier.\n"); | ||
1544 | goto err_unreg_notifier; | ||
1545 | } | ||
1546 | #endif | ||
1547 | |||
1548 | return 0; | ||
1549 | |||
1550 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1551 | err_unreg_notifier: | ||
1552 | ip_conntrack_unregister_notifier(&ctnl_notifier); | ||
1553 | err_unreg_exp_subsys: | ||
1554 | nfnetlink_subsys_unregister(&ctnl_exp_subsys); | ||
1555 | #endif | ||
1556 | err_unreg_subsys: | ||
1557 | nfnetlink_subsys_unregister(&ctnl_subsys); | ||
1558 | err_out: | ||
1559 | return ret; | ||
1560 | } | ||
1561 | |||
1562 | static void __exit ctnetlink_exit(void) | ||
1563 | { | ||
1564 | printk("ctnetlink: unregistering from nfnetlink.\n"); | ||
1565 | |||
1566 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1567 | ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp); | ||
1568 | ip_conntrack_unregister_notifier(&ctnl_notifier); | ||
1569 | #endif | ||
1570 | |||
1571 | nfnetlink_subsys_unregister(&ctnl_exp_subsys); | ||
1572 | nfnetlink_subsys_unregister(&ctnl_subsys); | ||
1573 | return; | ||
1574 | } | ||
1575 | |||
1576 | module_init(ctnetlink_init); | ||
1577 | module_exit(ctnetlink_exit); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c deleted file mode 100644 index 88af82e98658..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ /dev/null | |||
@@ -1,74 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/timer.h> | ||
11 | #include <linux/netfilter.h> | ||
12 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
13 | |||
14 | unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; | ||
15 | |||
16 | static int generic_pkt_to_tuple(const struct sk_buff *skb, | ||
17 | unsigned int dataoff, | ||
18 | struct ip_conntrack_tuple *tuple) | ||
19 | { | ||
20 | tuple->src.u.all = 0; | ||
21 | tuple->dst.u.all = 0; | ||
22 | |||
23 | return 1; | ||
24 | } | ||
25 | |||
26 | static int generic_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
27 | const struct ip_conntrack_tuple *orig) | ||
28 | { | ||
29 | tuple->src.u.all = 0; | ||
30 | tuple->dst.u.all = 0; | ||
31 | |||
32 | return 1; | ||
33 | } | ||
34 | |||
35 | /* Print out the per-protocol part of the tuple. */ | ||
36 | static int generic_print_tuple(struct seq_file *s, | ||
37 | const struct ip_conntrack_tuple *tuple) | ||
38 | { | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | /* Print out the private part of the conntrack. */ | ||
43 | static int generic_print_conntrack(struct seq_file *s, | ||
44 | const struct ip_conntrack *state) | ||
45 | { | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | /* Returns verdict for packet, or -1 for invalid. */ | ||
50 | static int packet(struct ip_conntrack *conntrack, | ||
51 | const struct sk_buff *skb, | ||
52 | enum ip_conntrack_info ctinfo) | ||
53 | { | ||
54 | ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout); | ||
55 | return NF_ACCEPT; | ||
56 | } | ||
57 | |||
58 | /* Called when a new connection for this protocol found. */ | ||
59 | static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb) | ||
60 | { | ||
61 | return 1; | ||
62 | } | ||
63 | |||
64 | struct ip_conntrack_protocol ip_conntrack_generic_protocol = | ||
65 | { | ||
66 | .proto = 0, | ||
67 | .name = "unknown", | ||
68 | .pkt_to_tuple = generic_pkt_to_tuple, | ||
69 | .invert_tuple = generic_invert_tuple, | ||
70 | .print_tuple = generic_print_tuple, | ||
71 | .print_conntrack = generic_print_conntrack, | ||
72 | .packet = packet, | ||
73 | .new = new, | ||
74 | }; | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c deleted file mode 100644 index ac1c49ef36a9..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ /dev/null | |||
@@ -1,328 +0,0 @@ | |||
1 | /* | ||
2 | * ip_conntrack_proto_gre.c - Version 3.0 | ||
3 | * | ||
4 | * Connection tracking protocol helper module for GRE. | ||
5 | * | ||
6 | * GRE is a generic encapsulation protocol, which is generally not very | ||
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | ||
8 | * | ||
9 | * It has an optional key field, which may help us distinguishing two | ||
10 | * connections between the same two hosts. | ||
11 | * | ||
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | ||
13 | * | ||
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | ||
15 | * field called "CallID", which serves us for the same purpose as the key | ||
16 | * field in plain GRE. | ||
17 | * | ||
18 | * Documentation about PPTP can be found in RFC 2637 | ||
19 | * | ||
20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
21 | * | ||
22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/netfilter.h> | ||
30 | #include <linux/ip.h> | ||
31 | #include <linux/in.h> | ||
32 | #include <linux/list.h> | ||
33 | #include <linux/seq_file.h> | ||
34 | #include <linux/interrupt.h> | ||
35 | |||
36 | static DEFINE_RWLOCK(ip_ct_gre_lock); | ||
37 | |||
38 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
39 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
40 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
41 | |||
42 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
43 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
44 | |||
45 | MODULE_LICENSE("GPL"); | ||
46 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
47 | MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); | ||
48 | |||
49 | /* shamelessly stolen from ip_conntrack_proto_udp.c */ | ||
50 | #define GRE_TIMEOUT (30*HZ) | ||
51 | #define GRE_STREAM_TIMEOUT (180*HZ) | ||
52 | |||
53 | #if 0 | ||
54 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) | ||
55 | #define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ | ||
56 | NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ | ||
57 | NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) | ||
58 | #else | ||
59 | #define DEBUGP(x, args...) | ||
60 | #define DUMP_TUPLE_GRE(x) | ||
61 | #endif | ||
62 | |||
63 | /* GRE KEYMAP HANDLING FUNCTIONS */ | ||
64 | static LIST_HEAD(gre_keymap_list); | ||
65 | |||
66 | static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, | ||
67 | const struct ip_conntrack_tuple *t) | ||
68 | { | ||
69 | return ((km->tuple.src.ip == t->src.ip) && | ||
70 | (km->tuple.dst.ip == t->dst.ip) && | ||
71 | (km->tuple.dst.protonum == t->dst.protonum) && | ||
72 | (km->tuple.dst.u.all == t->dst.u.all)); | ||
73 | } | ||
74 | |||
75 | /* look up the source key for a given tuple */ | ||
76 | static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) | ||
77 | { | ||
78 | struct ip_ct_gre_keymap *km; | ||
79 | __be16 key = 0; | ||
80 | |||
81 | read_lock_bh(&ip_ct_gre_lock); | ||
82 | list_for_each_entry(km, &gre_keymap_list, list) { | ||
83 | if (gre_key_cmpfn(km, t)) { | ||
84 | key = km->tuple.src.u.gre.key; | ||
85 | break; | ||
86 | } | ||
87 | } | ||
88 | read_unlock_bh(&ip_ct_gre_lock); | ||
89 | |||
90 | DEBUGP("lookup src key 0x%x up key for ", key); | ||
91 | DUMP_TUPLE_GRE(t); | ||
92 | |||
93 | return key; | ||
94 | } | ||
95 | |||
96 | /* add a single keymap entry, associate with specified master ct */ | ||
97 | int | ||
98 | ip_ct_gre_keymap_add(struct ip_conntrack *ct, | ||
99 | struct ip_conntrack_tuple *t, int reply) | ||
100 | { | ||
101 | struct ip_ct_gre_keymap **exist_km, *km; | ||
102 | |||
103 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { | ||
104 | DEBUGP("refusing to add GRE keymap to non-pptp session\n"); | ||
105 | return -1; | ||
106 | } | ||
107 | |||
108 | if (!reply) | ||
109 | exist_km = &ct->help.ct_pptp_info.keymap_orig; | ||
110 | else | ||
111 | exist_km = &ct->help.ct_pptp_info.keymap_reply; | ||
112 | |||
113 | if (*exist_km) { | ||
114 | /* check whether it's a retransmission */ | ||
115 | list_for_each_entry(km, &gre_keymap_list, list) { | ||
116 | if (gre_key_cmpfn(km, t) && km == *exist_km) | ||
117 | return 0; | ||
118 | } | ||
119 | DEBUGP("trying to override keymap_%s for ct %p\n", | ||
120 | reply? "reply":"orig", ct); | ||
121 | return -EEXIST; | ||
122 | } | ||
123 | |||
124 | km = kmalloc(sizeof(*km), GFP_ATOMIC); | ||
125 | if (!km) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | memcpy(&km->tuple, t, sizeof(*t)); | ||
129 | *exist_km = km; | ||
130 | |||
131 | DEBUGP("adding new entry %p: ", km); | ||
132 | DUMP_TUPLE_GRE(&km->tuple); | ||
133 | |||
134 | write_lock_bh(&ip_ct_gre_lock); | ||
135 | list_add_tail(&km->list, &gre_keymap_list); | ||
136 | write_unlock_bh(&ip_ct_gre_lock); | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* destroy the keymap entries associated with specified master ct */ | ||
142 | void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) | ||
143 | { | ||
144 | DEBUGP("entering for ct %p\n", ct); | ||
145 | |||
146 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { | ||
147 | DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | write_lock_bh(&ip_ct_gre_lock); | ||
152 | if (ct->help.ct_pptp_info.keymap_orig) { | ||
153 | DEBUGP("removing %p from list\n", | ||
154 | ct->help.ct_pptp_info.keymap_orig); | ||
155 | list_del(&ct->help.ct_pptp_info.keymap_orig->list); | ||
156 | kfree(ct->help.ct_pptp_info.keymap_orig); | ||
157 | ct->help.ct_pptp_info.keymap_orig = NULL; | ||
158 | } | ||
159 | if (ct->help.ct_pptp_info.keymap_reply) { | ||
160 | DEBUGP("removing %p from list\n", | ||
161 | ct->help.ct_pptp_info.keymap_reply); | ||
162 | list_del(&ct->help.ct_pptp_info.keymap_reply->list); | ||
163 | kfree(ct->help.ct_pptp_info.keymap_reply); | ||
164 | ct->help.ct_pptp_info.keymap_reply = NULL; | ||
165 | } | ||
166 | write_unlock_bh(&ip_ct_gre_lock); | ||
167 | } | ||
168 | |||
169 | |||
170 | /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ | ||
171 | |||
172 | /* invert gre part of tuple */ | ||
173 | static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
174 | const struct ip_conntrack_tuple *orig) | ||
175 | { | ||
176 | tuple->dst.u.gre.key = orig->src.u.gre.key; | ||
177 | tuple->src.u.gre.key = orig->dst.u.gre.key; | ||
178 | |||
179 | return 1; | ||
180 | } | ||
181 | |||
182 | /* gre hdr info to tuple */ | ||
183 | static int gre_pkt_to_tuple(const struct sk_buff *skb, | ||
184 | unsigned int dataoff, | ||
185 | struct ip_conntrack_tuple *tuple) | ||
186 | { | ||
187 | struct gre_hdr_pptp _pgrehdr, *pgrehdr; | ||
188 | __be16 srckey; | ||
189 | struct gre_hdr _grehdr, *grehdr; | ||
190 | |||
191 | /* first only delinearize old RFC1701 GRE header */ | ||
192 | grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); | ||
193 | if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { | ||
194 | /* try to behave like "ip_conntrack_proto_generic" */ | ||
195 | tuple->src.u.all = 0; | ||
196 | tuple->dst.u.all = 0; | ||
197 | return 1; | ||
198 | } | ||
199 | |||
200 | /* PPTP header is variable length, only need up to the call_id field */ | ||
201 | pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); | ||
202 | if (!pgrehdr) | ||
203 | return 1; | ||
204 | |||
205 | if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { | ||
206 | DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | tuple->dst.u.gre.key = pgrehdr->call_id; | ||
211 | srckey = gre_keymap_lookup(tuple); | ||
212 | tuple->src.u.gre.key = srckey; | ||
213 | |||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | /* print gre part of tuple */ | ||
218 | static int gre_print_tuple(struct seq_file *s, | ||
219 | const struct ip_conntrack_tuple *tuple) | ||
220 | { | ||
221 | return seq_printf(s, "srckey=0x%x dstkey=0x%x ", | ||
222 | ntohs(tuple->src.u.gre.key), | ||
223 | ntohs(tuple->dst.u.gre.key)); | ||
224 | } | ||
225 | |||
226 | /* print private data for conntrack */ | ||
227 | static int gre_print_conntrack(struct seq_file *s, | ||
228 | const struct ip_conntrack *ct) | ||
229 | { | ||
230 | return seq_printf(s, "timeout=%u, stream_timeout=%u ", | ||
231 | (ct->proto.gre.timeout / HZ), | ||
232 | (ct->proto.gre.stream_timeout / HZ)); | ||
233 | } | ||
234 | |||
235 | /* Returns verdict for packet, and may modify conntrack */ | ||
236 | static int gre_packet(struct ip_conntrack *ct, | ||
237 | const struct sk_buff *skb, | ||
238 | enum ip_conntrack_info conntrackinfo) | ||
239 | { | ||
240 | /* If we've seen traffic both ways, this is a GRE connection. | ||
241 | * Extend timeout. */ | ||
242 | if (ct->status & IPS_SEEN_REPLY) { | ||
243 | ip_ct_refresh_acct(ct, conntrackinfo, skb, | ||
244 | ct->proto.gre.stream_timeout); | ||
245 | /* Also, more likely to be important, and not a probe. */ | ||
246 | set_bit(IPS_ASSURED_BIT, &ct->status); | ||
247 | ip_conntrack_event_cache(IPCT_STATUS, skb); | ||
248 | } else | ||
249 | ip_ct_refresh_acct(ct, conntrackinfo, skb, | ||
250 | ct->proto.gre.timeout); | ||
251 | |||
252 | return NF_ACCEPT; | ||
253 | } | ||
254 | |||
255 | /* Called when a new connection for this protocol found. */ | ||
256 | static int gre_new(struct ip_conntrack *ct, | ||
257 | const struct sk_buff *skb) | ||
258 | { | ||
259 | DEBUGP(": "); | ||
260 | DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
261 | |||
262 | /* initialize to sane value. Ideally a conntrack helper | ||
263 | * (e.g. in case of pptp) is increasing them */ | ||
264 | ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; | ||
265 | ct->proto.gre.timeout = GRE_TIMEOUT; | ||
266 | |||
267 | return 1; | ||
268 | } | ||
269 | |||
270 | /* Called when a conntrack entry has already been removed from the hashes | ||
271 | * and is about to be deleted from memory */ | ||
272 | static void gre_destroy(struct ip_conntrack *ct) | ||
273 | { | ||
274 | struct ip_conntrack *master = ct->master; | ||
275 | DEBUGP(" entering\n"); | ||
276 | |||
277 | if (!master) | ||
278 | DEBUGP("no master !?!\n"); | ||
279 | else | ||
280 | ip_ct_gre_keymap_destroy(master); | ||
281 | } | ||
282 | |||
283 | /* protocol helper struct */ | ||
284 | static struct ip_conntrack_protocol gre = { | ||
285 | .proto = IPPROTO_GRE, | ||
286 | .name = "gre", | ||
287 | .pkt_to_tuple = gre_pkt_to_tuple, | ||
288 | .invert_tuple = gre_invert_tuple, | ||
289 | .print_tuple = gre_print_tuple, | ||
290 | .print_conntrack = gre_print_conntrack, | ||
291 | .packet = gre_packet, | ||
292 | .new = gre_new, | ||
293 | .destroy = gre_destroy, | ||
294 | .me = THIS_MODULE, | ||
295 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
296 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
297 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
298 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
299 | #endif | ||
300 | }; | ||
301 | |||
302 | /* ip_conntrack_proto_gre initialization */ | ||
303 | int __init ip_ct_proto_gre_init(void) | ||
304 | { | ||
305 | return ip_conntrack_protocol_register(&gre); | ||
306 | } | ||
307 | |||
308 | /* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's | ||
309 | * init() code on errors. | ||
310 | */ | ||
311 | void ip_ct_proto_gre_fini(void) | ||
312 | { | ||
313 | struct list_head *pos, *n; | ||
314 | |||
315 | /* delete all keymap entries */ | ||
316 | write_lock_bh(&ip_ct_gre_lock); | ||
317 | list_for_each_safe(pos, n, &gre_keymap_list) { | ||
318 | DEBUGP("deleting keymap %p at module unload time\n", pos); | ||
319 | list_del(pos); | ||
320 | kfree(pos); | ||
321 | } | ||
322 | write_unlock_bh(&ip_ct_gre_lock); | ||
323 | |||
324 | ip_conntrack_protocol_unregister(&gre); | ||
325 | } | ||
326 | |||
327 | EXPORT_SYMBOL(ip_ct_gre_keymap_add); | ||
328 | EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c deleted file mode 100644 index ad70c81a21e0..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ /dev/null | |||
@@ -1,315 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/timer.h> | ||
11 | #include <linux/netfilter.h> | ||
12 | #include <linux/in.h> | ||
13 | #include <linux/icmp.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <net/ip.h> | ||
17 | #include <net/checksum.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
21 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
22 | |||
23 | unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; | ||
24 | |||
25 | #if 0 | ||
26 | #define DEBUGP printk | ||
27 | #else | ||
28 | #define DEBUGP(format, args...) | ||
29 | #endif | ||
30 | |||
31 | static int icmp_pkt_to_tuple(const struct sk_buff *skb, | ||
32 | unsigned int dataoff, | ||
33 | struct ip_conntrack_tuple *tuple) | ||
34 | { | ||
35 | struct icmphdr _hdr, *hp; | ||
36 | |||
37 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
38 | if (hp == NULL) | ||
39 | return 0; | ||
40 | |||
41 | tuple->dst.u.icmp.type = hp->type; | ||
42 | tuple->src.u.icmp.id = hp->un.echo.id; | ||
43 | tuple->dst.u.icmp.code = hp->code; | ||
44 | |||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | /* Add 1; spaces filled with 0. */ | ||
49 | static const u_int8_t invmap[] = { | ||
50 | [ICMP_ECHO] = ICMP_ECHOREPLY + 1, | ||
51 | [ICMP_ECHOREPLY] = ICMP_ECHO + 1, | ||
52 | [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, | ||
53 | [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, | ||
54 | [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, | ||
55 | [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, | ||
56 | [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, | ||
57 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 | ||
58 | }; | ||
59 | |||
60 | static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
61 | const struct ip_conntrack_tuple *orig) | ||
62 | { | ||
63 | if (orig->dst.u.icmp.type >= sizeof(invmap) | ||
64 | || !invmap[orig->dst.u.icmp.type]) | ||
65 | return 0; | ||
66 | |||
67 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | ||
68 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; | ||
69 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | ||
70 | return 1; | ||
71 | } | ||
72 | |||
73 | /* Print out the per-protocol part of the tuple. */ | ||
74 | static int icmp_print_tuple(struct seq_file *s, | ||
75 | const struct ip_conntrack_tuple *tuple) | ||
76 | { | ||
77 | return seq_printf(s, "type=%u code=%u id=%u ", | ||
78 | tuple->dst.u.icmp.type, | ||
79 | tuple->dst.u.icmp.code, | ||
80 | ntohs(tuple->src.u.icmp.id)); | ||
81 | } | ||
82 | |||
83 | /* Print out the private part of the conntrack. */ | ||
84 | static int icmp_print_conntrack(struct seq_file *s, | ||
85 | const struct ip_conntrack *conntrack) | ||
86 | { | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /* Returns verdict for packet, or -1 for invalid. */ | ||
91 | static int icmp_packet(struct ip_conntrack *ct, | ||
92 | const struct sk_buff *skb, | ||
93 | enum ip_conntrack_info ctinfo) | ||
94 | { | ||
95 | /* Try to delete connection immediately after all replies: | ||
96 | won't actually vanish as we still have skb, and del_timer | ||
97 | means this will only run once even if count hits zero twice | ||
98 | (theoretically possible with SMP) */ | ||
99 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { | ||
100 | if (atomic_dec_and_test(&ct->proto.icmp.count) | ||
101 | && del_timer(&ct->timeout)) | ||
102 | ct->timeout.function((unsigned long)ct); | ||
103 | } else { | ||
104 | atomic_inc(&ct->proto.icmp.count); | ||
105 | ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
106 | ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); | ||
107 | } | ||
108 | |||
109 | return NF_ACCEPT; | ||
110 | } | ||
111 | |||
112 | /* Called when a new connection for this protocol found. */ | ||
113 | static int icmp_new(struct ip_conntrack *conntrack, | ||
114 | const struct sk_buff *skb) | ||
115 | { | ||
116 | static const u_int8_t valid_new[] = { | ||
117 | [ICMP_ECHO] = 1, | ||
118 | [ICMP_TIMESTAMP] = 1, | ||
119 | [ICMP_INFO_REQUEST] = 1, | ||
120 | [ICMP_ADDRESS] = 1 | ||
121 | }; | ||
122 | |||
123 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | ||
124 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { | ||
125 | /* Can't create a new ICMP `conn' with this. */ | ||
126 | DEBUGP("icmp: can't create new conn with type %u\n", | ||
127 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | ||
128 | DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | ||
129 | return 0; | ||
130 | } | ||
131 | atomic_set(&conntrack->proto.icmp.count, 0); | ||
132 | return 1; | ||
133 | } | ||
134 | |||
135 | static int | ||
136 | icmp_error_message(struct sk_buff *skb, | ||
137 | enum ip_conntrack_info *ctinfo, | ||
138 | unsigned int hooknum) | ||
139 | { | ||
140 | struct ip_conntrack_tuple innertuple, origtuple; | ||
141 | struct { | ||
142 | struct icmphdr icmp; | ||
143 | struct iphdr ip; | ||
144 | } _in, *inside; | ||
145 | struct ip_conntrack_protocol *innerproto; | ||
146 | struct ip_conntrack_tuple_hash *h; | ||
147 | int dataoff; | ||
148 | |||
149 | IP_NF_ASSERT(skb->nfct == NULL); | ||
150 | |||
151 | /* Not enough header? */ | ||
152 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | ||
153 | if (inside == NULL) | ||
154 | return -NF_ACCEPT; | ||
155 | |||
156 | /* Ignore ICMP's containing fragments (shouldn't happen) */ | ||
157 | if (inside->ip.frag_off & htons(IP_OFFSET)) { | ||
158 | DEBUGP("icmp_error_track: fragment of proto %u\n", | ||
159 | inside->ip.protocol); | ||
160 | return -NF_ACCEPT; | ||
161 | } | ||
162 | |||
163 | innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); | ||
164 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; | ||
165 | /* Are they talking about one of our connections? */ | ||
166 | if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { | ||
167 | DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); | ||
168 | ip_conntrack_proto_put(innerproto); | ||
169 | return -NF_ACCEPT; | ||
170 | } | ||
171 | |||
172 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | ||
173 | been preserved inside the ICMP. */ | ||
174 | if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { | ||
175 | DEBUGP("icmp_error_track: Can't invert tuple\n"); | ||
176 | ip_conntrack_proto_put(innerproto); | ||
177 | return -NF_ACCEPT; | ||
178 | } | ||
179 | ip_conntrack_proto_put(innerproto); | ||
180 | |||
181 | *ctinfo = IP_CT_RELATED; | ||
182 | |||
183 | h = ip_conntrack_find_get(&innertuple, NULL); | ||
184 | if (!h) { | ||
185 | /* Locally generated ICMPs will match inverted if they | ||
186 | haven't been SNAT'ed yet */ | ||
187 | /* FIXME: NAT code has to handle half-done double NAT --RR */ | ||
188 | if (hooknum == NF_IP_LOCAL_OUT) | ||
189 | h = ip_conntrack_find_get(&origtuple, NULL); | ||
190 | |||
191 | if (!h) { | ||
192 | DEBUGP("icmp_error_track: no match\n"); | ||
193 | return -NF_ACCEPT; | ||
194 | } | ||
195 | /* Reverse direction from that found */ | ||
196 | if (DIRECTION(h) != IP_CT_DIR_REPLY) | ||
197 | *ctinfo += IP_CT_IS_REPLY; | ||
198 | } else { | ||
199 | if (DIRECTION(h) == IP_CT_DIR_REPLY) | ||
200 | *ctinfo += IP_CT_IS_REPLY; | ||
201 | } | ||
202 | |||
203 | /* Update skb to refer to this connection */ | ||
204 | skb->nfct = &tuplehash_to_ctrack(h)->ct_general; | ||
205 | skb->nfctinfo = *ctinfo; | ||
206 | return -NF_ACCEPT; | ||
207 | } | ||
208 | |||
209 | /* Small and modified version of icmp_rcv */ | ||
210 | static int | ||
211 | icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | ||
212 | unsigned int hooknum) | ||
213 | { | ||
214 | struct icmphdr _ih, *icmph; | ||
215 | |||
216 | /* Not enough header? */ | ||
217 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); | ||
218 | if (icmph == NULL) { | ||
219 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
220 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
221 | "ip_ct_icmp: short packet "); | ||
222 | return -NF_ACCEPT; | ||
223 | } | ||
224 | |||
225 | /* See ip_conntrack_proto_tcp.c */ | ||
226 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && | ||
227 | nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) { | ||
228 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
229 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
230 | "ip_ct_icmp: bad ICMP checksum "); | ||
231 | return -NF_ACCEPT; | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * 18 is the highest 'known' ICMP type. Anything else is a mystery | ||
236 | * | ||
237 | * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently | ||
238 | * discarded. | ||
239 | */ | ||
240 | if (icmph->type > NR_ICMP_TYPES) { | ||
241 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
242 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
243 | "ip_ct_icmp: invalid ICMP type "); | ||
244 | return -NF_ACCEPT; | ||
245 | } | ||
246 | |||
247 | /* Need to track icmp error message? */ | ||
248 | if (icmph->type != ICMP_DEST_UNREACH | ||
249 | && icmph->type != ICMP_SOURCE_QUENCH | ||
250 | && icmph->type != ICMP_TIME_EXCEEDED | ||
251 | && icmph->type != ICMP_PARAMETERPROB | ||
252 | && icmph->type != ICMP_REDIRECT) | ||
253 | return NF_ACCEPT; | ||
254 | |||
255 | return icmp_error_message(skb, ctinfo, hooknum); | ||
256 | } | ||
257 | |||
258 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
259 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
260 | static int icmp_tuple_to_nfattr(struct sk_buff *skb, | ||
261 | const struct ip_conntrack_tuple *t) | ||
262 | { | ||
263 | NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16), | ||
264 | &t->src.u.icmp.id); | ||
265 | NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), | ||
266 | &t->dst.u.icmp.type); | ||
267 | NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), | ||
268 | &t->dst.u.icmp.code); | ||
269 | |||
270 | return 0; | ||
271 | |||
272 | nfattr_failure: | ||
273 | return -1; | ||
274 | } | ||
275 | |||
276 | static int icmp_nfattr_to_tuple(struct nfattr *tb[], | ||
277 | struct ip_conntrack_tuple *tuple) | ||
278 | { | ||
279 | if (!tb[CTA_PROTO_ICMP_TYPE-1] | ||
280 | || !tb[CTA_PROTO_ICMP_CODE-1] | ||
281 | || !tb[CTA_PROTO_ICMP_ID-1]) | ||
282 | return -EINVAL; | ||
283 | |||
284 | tuple->dst.u.icmp.type = | ||
285 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); | ||
286 | tuple->dst.u.icmp.code = | ||
287 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); | ||
288 | tuple->src.u.icmp.id = | ||
289 | *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); | ||
290 | |||
291 | if (tuple->dst.u.icmp.type >= sizeof(invmap) | ||
292 | || !invmap[tuple->dst.u.icmp.type]) | ||
293 | return -EINVAL; | ||
294 | |||
295 | return 0; | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | struct ip_conntrack_protocol ip_conntrack_protocol_icmp = | ||
300 | { | ||
301 | .proto = IPPROTO_ICMP, | ||
302 | .name = "icmp", | ||
303 | .pkt_to_tuple = icmp_pkt_to_tuple, | ||
304 | .invert_tuple = icmp_invert_tuple, | ||
305 | .print_tuple = icmp_print_tuple, | ||
306 | .print_conntrack = icmp_print_conntrack, | ||
307 | .packet = icmp_packet, | ||
308 | .new = icmp_new, | ||
309 | .error = icmp_error, | ||
310 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
311 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
312 | .tuple_to_nfattr = icmp_tuple_to_nfattr, | ||
313 | .nfattr_to_tuple = icmp_nfattr_to_tuple, | ||
314 | #endif | ||
315 | }; | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c deleted file mode 100644 index e6942992b2f6..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ /dev/null | |||
@@ -1,659 +0,0 @@ | |||
1 | /* | ||
2 | * Connection tracking protocol helper module for SCTP. | ||
3 | * | ||
4 | * SCTP is defined in RFC 2960. References to various sections in this code | ||
5 | * are to this RFC. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Added support for proc manipulation of timeouts. | ||
14 | */ | ||
15 | |||
16 | #include <linux/types.h> | ||
17 | #include <linux/timer.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/netfilter.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/in.h> | ||
22 | #include <linux/ip.h> | ||
23 | #include <linux/sctp.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | |||
27 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
28 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
29 | |||
30 | #if 0 | ||
31 | #define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) | ||
32 | #else | ||
33 | #define DEBUGP(format, args...) | ||
34 | #endif | ||
35 | |||
36 | /* Protects conntrack->proto.sctp */ | ||
37 | static DEFINE_RWLOCK(sctp_lock); | ||
38 | |||
39 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
40 | closely. They're more complex. --RR | ||
41 | |||
42 | And so for me for SCTP :D -Kiran */ | ||
43 | |||
44 | static const char *sctp_conntrack_names[] = { | ||
45 | "NONE", | ||
46 | "CLOSED", | ||
47 | "COOKIE_WAIT", | ||
48 | "COOKIE_ECHOED", | ||
49 | "ESTABLISHED", | ||
50 | "SHUTDOWN_SENT", | ||
51 | "SHUTDOWN_RECD", | ||
52 | "SHUTDOWN_ACK_SENT", | ||
53 | }; | ||
54 | |||
55 | #define SECS * HZ | ||
56 | #define MINS * 60 SECS | ||
57 | #define HOURS * 60 MINS | ||
58 | #define DAYS * 24 HOURS | ||
59 | |||
60 | static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; | ||
61 | static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; | ||
62 | static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; | ||
63 | static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; | ||
64 | static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; | ||
65 | static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; | ||
66 | static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; | ||
67 | |||
68 | static const unsigned int * sctp_timeouts[] | ||
69 | = { NULL, /* SCTP_CONNTRACK_NONE */ | ||
70 | &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ | ||
71 | &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ | ||
72 | &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ | ||
73 | &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ | ||
74 | &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ | ||
75 | &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ | ||
76 | &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ | ||
77 | }; | ||
78 | |||
79 | #define sNO SCTP_CONNTRACK_NONE | ||
80 | #define sCL SCTP_CONNTRACK_CLOSED | ||
81 | #define sCW SCTP_CONNTRACK_COOKIE_WAIT | ||
82 | #define sCE SCTP_CONNTRACK_COOKIE_ECHOED | ||
83 | #define sES SCTP_CONNTRACK_ESTABLISHED | ||
84 | #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT | ||
85 | #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD | ||
86 | #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT | ||
87 | #define sIV SCTP_CONNTRACK_MAX | ||
88 | |||
89 | /* | ||
90 | These are the descriptions of the states: | ||
91 | |||
92 | NOTE: These state names are tantalizingly similar to the states of an | ||
93 | SCTP endpoint. But the interpretation of the states is a little different, | ||
94 | considering that these are the states of the connection and not of an end | ||
95 | point. Please note the subtleties. -Kiran | ||
96 | |||
97 | NONE - Nothing so far. | ||
98 | COOKIE WAIT - We have seen an INIT chunk in the original direction, or also | ||
99 | an INIT_ACK chunk in the reply direction. | ||
100 | COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. | ||
101 | ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. | ||
102 | SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. | ||
103 | SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. | ||
104 | SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite | ||
105 | to that of the SHUTDOWN chunk. | ||
106 | CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of | ||
107 | the SHUTDOWN chunk. Connection is closed. | ||
108 | */ | ||
109 | |||
110 | /* TODO | ||
111 | - I have assumed that the first INIT is in the original direction. | ||
112 | This messes things when an INIT comes in the reply direction in CLOSED | ||
113 | state. | ||
114 | - Check the error type in the reply dir before transitioning from | ||
115 | cookie echoed to closed. | ||
116 | - Sec 5.2.4 of RFC 2960 | ||
117 | - Multi Homing support. | ||
118 | */ | ||
119 | |||
120 | /* SCTP conntrack state transitions */ | ||
121 | static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { | ||
122 | { | ||
123 | /* ORIGINAL */ | ||
124 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
125 | /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, | ||
126 | /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
127 | /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
128 | /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, | ||
129 | /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
130 | /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ | ||
131 | /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ | ||
132 | /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ | ||
133 | /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
134 | }, | ||
135 | { | ||
136 | /* REPLY */ | ||
137 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
138 | /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ | ||
139 | /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
140 | /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
141 | /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, | ||
142 | /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
143 | /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, | ||
144 | /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ | ||
145 | /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, | ||
146 | /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
147 | } | ||
148 | }; | ||
149 | |||
150 | static int sctp_pkt_to_tuple(const struct sk_buff *skb, | ||
151 | unsigned int dataoff, | ||
152 | struct ip_conntrack_tuple *tuple) | ||
153 | { | ||
154 | sctp_sctphdr_t _hdr, *hp; | ||
155 | |||
156 | DEBUGP(__FUNCTION__); | ||
157 | DEBUGP("\n"); | ||
158 | |||
159 | /* Actually only need first 8 bytes. */ | ||
160 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
161 | if (hp == NULL) | ||
162 | return 0; | ||
163 | |||
164 | tuple->src.u.sctp.port = hp->source; | ||
165 | tuple->dst.u.sctp.port = hp->dest; | ||
166 | return 1; | ||
167 | } | ||
168 | |||
169 | static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
170 | const struct ip_conntrack_tuple *orig) | ||
171 | { | ||
172 | DEBUGP(__FUNCTION__); | ||
173 | DEBUGP("\n"); | ||
174 | |||
175 | tuple->src.u.sctp.port = orig->dst.u.sctp.port; | ||
176 | tuple->dst.u.sctp.port = orig->src.u.sctp.port; | ||
177 | return 1; | ||
178 | } | ||
179 | |||
180 | /* Print out the per-protocol part of the tuple. */ | ||
181 | static int sctp_print_tuple(struct seq_file *s, | ||
182 | const struct ip_conntrack_tuple *tuple) | ||
183 | { | ||
184 | DEBUGP(__FUNCTION__); | ||
185 | DEBUGP("\n"); | ||
186 | |||
187 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
188 | ntohs(tuple->src.u.sctp.port), | ||
189 | ntohs(tuple->dst.u.sctp.port)); | ||
190 | } | ||
191 | |||
192 | /* Print out the private part of the conntrack. */ | ||
193 | static int sctp_print_conntrack(struct seq_file *s, | ||
194 | const struct ip_conntrack *conntrack) | ||
195 | { | ||
196 | enum sctp_conntrack state; | ||
197 | |||
198 | DEBUGP(__FUNCTION__); | ||
199 | DEBUGP("\n"); | ||
200 | |||
201 | read_lock_bh(&sctp_lock); | ||
202 | state = conntrack->proto.sctp.state; | ||
203 | read_unlock_bh(&sctp_lock); | ||
204 | |||
205 | return seq_printf(s, "%s ", sctp_conntrack_names[state]); | ||
206 | } | ||
207 | |||
208 | #define for_each_sctp_chunk(skb, sch, _sch, offset, count) \ | ||
209 | for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \ | ||
210 | offset < skb->len && \ | ||
211 | (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ | ||
212 | offset += (ntohs(sch->length) + 3) & ~3, count++) | ||
213 | |||
214 | /* Some validity checks to make sure the chunks are fine */ | ||
215 | static int do_basic_checks(struct ip_conntrack *conntrack, | ||
216 | const struct sk_buff *skb, | ||
217 | char *map) | ||
218 | { | ||
219 | u_int32_t offset, count; | ||
220 | sctp_chunkhdr_t _sch, *sch; | ||
221 | int flag; | ||
222 | |||
223 | DEBUGP(__FUNCTION__); | ||
224 | DEBUGP("\n"); | ||
225 | |||
226 | flag = 0; | ||
227 | |||
228 | for_each_sctp_chunk (skb, sch, _sch, offset, count) { | ||
229 | DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); | ||
230 | |||
231 | if (sch->type == SCTP_CID_INIT | ||
232 | || sch->type == SCTP_CID_INIT_ACK | ||
233 | || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
234 | flag = 1; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Cookie Ack/Echo chunks not the first OR | ||
239 | * Init / Init Ack / Shutdown compl chunks not the only chunks | ||
240 | * OR zero-length. | ||
241 | */ | ||
242 | if (((sch->type == SCTP_CID_COOKIE_ACK | ||
243 | || sch->type == SCTP_CID_COOKIE_ECHO | ||
244 | || flag) | ||
245 | && count !=0) || !sch->length) { | ||
246 | DEBUGP("Basic checks failed\n"); | ||
247 | return 1; | ||
248 | } | ||
249 | |||
250 | if (map) { | ||
251 | set_bit(sch->type, (void *)map); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | DEBUGP("Basic checks passed\n"); | ||
256 | return count == 0; | ||
257 | } | ||
258 | |||
259 | static int new_state(enum ip_conntrack_dir dir, | ||
260 | enum sctp_conntrack cur_state, | ||
261 | int chunk_type) | ||
262 | { | ||
263 | int i; | ||
264 | |||
265 | DEBUGP(__FUNCTION__); | ||
266 | DEBUGP("\n"); | ||
267 | |||
268 | DEBUGP("Chunk type: %d\n", chunk_type); | ||
269 | |||
270 | switch (chunk_type) { | ||
271 | case SCTP_CID_INIT: | ||
272 | DEBUGP("SCTP_CID_INIT\n"); | ||
273 | i = 0; break; | ||
274 | case SCTP_CID_INIT_ACK: | ||
275 | DEBUGP("SCTP_CID_INIT_ACK\n"); | ||
276 | i = 1; break; | ||
277 | case SCTP_CID_ABORT: | ||
278 | DEBUGP("SCTP_CID_ABORT\n"); | ||
279 | i = 2; break; | ||
280 | case SCTP_CID_SHUTDOWN: | ||
281 | DEBUGP("SCTP_CID_SHUTDOWN\n"); | ||
282 | i = 3; break; | ||
283 | case SCTP_CID_SHUTDOWN_ACK: | ||
284 | DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); | ||
285 | i = 4; break; | ||
286 | case SCTP_CID_ERROR: | ||
287 | DEBUGP("SCTP_CID_ERROR\n"); | ||
288 | i = 5; break; | ||
289 | case SCTP_CID_COOKIE_ECHO: | ||
290 | DEBUGP("SCTP_CID_COOKIE_ECHO\n"); | ||
291 | i = 6; break; | ||
292 | case SCTP_CID_COOKIE_ACK: | ||
293 | DEBUGP("SCTP_CID_COOKIE_ACK\n"); | ||
294 | i = 7; break; | ||
295 | case SCTP_CID_SHUTDOWN_COMPLETE: | ||
296 | DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); | ||
297 | i = 8; break; | ||
298 | default: | ||
299 | /* Other chunks like DATA, SACK, HEARTBEAT and | ||
300 | its ACK do not cause a change in state */ | ||
301 | DEBUGP("Unknown chunk type, Will stay in %s\n", | ||
302 | sctp_conntrack_names[cur_state]); | ||
303 | return cur_state; | ||
304 | } | ||
305 | |||
306 | DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", | ||
307 | dir, sctp_conntrack_names[cur_state], chunk_type, | ||
308 | sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); | ||
309 | |||
310 | return sctp_conntracks[dir][i][cur_state]; | ||
311 | } | ||
312 | |||
313 | /* Returns verdict for packet, or -1 for invalid. */ | ||
314 | static int sctp_packet(struct ip_conntrack *conntrack, | ||
315 | const struct sk_buff *skb, | ||
316 | enum ip_conntrack_info ctinfo) | ||
317 | { | ||
318 | enum sctp_conntrack newconntrack, oldsctpstate; | ||
319 | struct iphdr *iph = skb->nh.iph; | ||
320 | sctp_sctphdr_t _sctph, *sh; | ||
321 | sctp_chunkhdr_t _sch, *sch; | ||
322 | u_int32_t offset, count; | ||
323 | char map[256 / sizeof (char)] = {0}; | ||
324 | |||
325 | DEBUGP(__FUNCTION__); | ||
326 | DEBUGP("\n"); | ||
327 | |||
328 | sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph); | ||
329 | if (sh == NULL) | ||
330 | return -1; | ||
331 | |||
332 | if (do_basic_checks(conntrack, skb, map) != 0) | ||
333 | return -1; | ||
334 | |||
335 | /* Check the verification tag (Sec 8.5) */ | ||
336 | if (!test_bit(SCTP_CID_INIT, (void *)map) | ||
337 | && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) | ||
338 | && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) | ||
339 | && !test_bit(SCTP_CID_ABORT, (void *)map) | ||
340 | && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) | ||
341 | && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
342 | DEBUGP("Verification tag check failed\n"); | ||
343 | return -1; | ||
344 | } | ||
345 | |||
346 | oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; | ||
347 | for_each_sctp_chunk (skb, sch, _sch, offset, count) { | ||
348 | write_lock_bh(&sctp_lock); | ||
349 | |||
350 | /* Special cases of Verification tag check (Sec 8.5.1) */ | ||
351 | if (sch->type == SCTP_CID_INIT) { | ||
352 | /* Sec 8.5.1 (A) */ | ||
353 | if (sh->vtag != 0) { | ||
354 | write_unlock_bh(&sctp_lock); | ||
355 | return -1; | ||
356 | } | ||
357 | } else if (sch->type == SCTP_CID_ABORT) { | ||
358 | /* Sec 8.5.1 (B) */ | ||
359 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
360 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
361 | [1 - CTINFO2DIR(ctinfo)])) { | ||
362 | write_unlock_bh(&sctp_lock); | ||
363 | return -1; | ||
364 | } | ||
365 | } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
366 | /* Sec 8.5.1 (C) */ | ||
367 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
368 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
369 | [1 - CTINFO2DIR(ctinfo)] | ||
370 | && (sch->flags & 1))) { | ||
371 | write_unlock_bh(&sctp_lock); | ||
372 | return -1; | ||
373 | } | ||
374 | } else if (sch->type == SCTP_CID_COOKIE_ECHO) { | ||
375 | /* Sec 8.5.1 (D) */ | ||
376 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
377 | write_unlock_bh(&sctp_lock); | ||
378 | return -1; | ||
379 | } | ||
380 | } | ||
381 | |||
382 | oldsctpstate = conntrack->proto.sctp.state; | ||
383 | newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); | ||
384 | |||
385 | /* Invalid */ | ||
386 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
387 | DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", | ||
388 | CTINFO2DIR(ctinfo), sch->type, oldsctpstate); | ||
389 | write_unlock_bh(&sctp_lock); | ||
390 | return -1; | ||
391 | } | ||
392 | |||
393 | /* If it is an INIT or an INIT ACK note down the vtag */ | ||
394 | if (sch->type == SCTP_CID_INIT | ||
395 | || sch->type == SCTP_CID_INIT_ACK) { | ||
396 | sctp_inithdr_t _inithdr, *ih; | ||
397 | |||
398 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
399 | sizeof(_inithdr), &_inithdr); | ||
400 | if (ih == NULL) { | ||
401 | write_unlock_bh(&sctp_lock); | ||
402 | return -1; | ||
403 | } | ||
404 | DEBUGP("Setting vtag %x for dir %d\n", | ||
405 | ih->init_tag, !CTINFO2DIR(ctinfo)); | ||
406 | conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; | ||
407 | } | ||
408 | |||
409 | conntrack->proto.sctp.state = newconntrack; | ||
410 | if (oldsctpstate != newconntrack) | ||
411 | ip_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
412 | write_unlock_bh(&sctp_lock); | ||
413 | } | ||
414 | |||
415 | ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); | ||
416 | |||
417 | if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED | ||
418 | && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY | ||
419 | && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { | ||
420 | DEBUGP("Setting assured bit\n"); | ||
421 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
422 | ip_conntrack_event_cache(IPCT_STATUS, skb); | ||
423 | } | ||
424 | |||
425 | return NF_ACCEPT; | ||
426 | } | ||
427 | |||
428 | /* Called when a new connection for this protocol found. */ | ||
429 | static int sctp_new(struct ip_conntrack *conntrack, | ||
430 | const struct sk_buff *skb) | ||
431 | { | ||
432 | enum sctp_conntrack newconntrack; | ||
433 | struct iphdr *iph = skb->nh.iph; | ||
434 | sctp_sctphdr_t _sctph, *sh; | ||
435 | sctp_chunkhdr_t _sch, *sch; | ||
436 | u_int32_t offset, count; | ||
437 | char map[256 / sizeof (char)] = {0}; | ||
438 | |||
439 | DEBUGP(__FUNCTION__); | ||
440 | DEBUGP("\n"); | ||
441 | |||
442 | sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph); | ||
443 | if (sh == NULL) | ||
444 | return 0; | ||
445 | |||
446 | if (do_basic_checks(conntrack, skb, map) != 0) | ||
447 | return 0; | ||
448 | |||
449 | /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ | ||
450 | if ((test_bit (SCTP_CID_ABORT, (void *)map)) | ||
451 | || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) | ||
452 | || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | newconntrack = SCTP_CONNTRACK_MAX; | ||
457 | for_each_sctp_chunk (skb, sch, _sch, offset, count) { | ||
458 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
459 | newconntrack = new_state (IP_CT_DIR_ORIGINAL, | ||
460 | SCTP_CONNTRACK_NONE, sch->type); | ||
461 | |||
462 | /* Invalid: delete conntrack */ | ||
463 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
464 | DEBUGP("ip_conntrack_sctp: invalid new deleting.\n"); | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | /* Copy the vtag into the state info */ | ||
469 | if (sch->type == SCTP_CID_INIT) { | ||
470 | if (sh->vtag == 0) { | ||
471 | sctp_inithdr_t _inithdr, *ih; | ||
472 | |||
473 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
474 | sizeof(_inithdr), &_inithdr); | ||
475 | if (ih == NULL) | ||
476 | return 0; | ||
477 | |||
478 | DEBUGP("Setting vtag %x for new conn\n", | ||
479 | ih->init_tag); | ||
480 | |||
481 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = | ||
482 | ih->init_tag; | ||
483 | } else { | ||
484 | /* Sec 8.5.1 (A) */ | ||
485 | return 0; | ||
486 | } | ||
487 | } | ||
488 | /* If it is a shutdown ack OOTB packet, we expect a return | ||
489 | shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ | ||
490 | else { | ||
491 | DEBUGP("Setting vtag %x for new conn OOTB\n", | ||
492 | sh->vtag); | ||
493 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; | ||
494 | } | ||
495 | |||
496 | conntrack->proto.sctp.state = newconntrack; | ||
497 | } | ||
498 | |||
499 | return 1; | ||
500 | } | ||
501 | |||
502 | static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { | ||
503 | .proto = IPPROTO_SCTP, | ||
504 | .name = "sctp", | ||
505 | .pkt_to_tuple = sctp_pkt_to_tuple, | ||
506 | .invert_tuple = sctp_invert_tuple, | ||
507 | .print_tuple = sctp_print_tuple, | ||
508 | .print_conntrack = sctp_print_conntrack, | ||
509 | .packet = sctp_packet, | ||
510 | .new = sctp_new, | ||
511 | .destroy = NULL, | ||
512 | .me = THIS_MODULE, | ||
513 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
514 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
515 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
516 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
517 | #endif | ||
518 | }; | ||
519 | |||
520 | #ifdef CONFIG_SYSCTL | ||
521 | static ctl_table ip_ct_sysctl_table[] = { | ||
522 | { | ||
523 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, | ||
524 | .procname = "ip_conntrack_sctp_timeout_closed", | ||
525 | .data = &ip_ct_sctp_timeout_closed, | ||
526 | .maxlen = sizeof(unsigned int), | ||
527 | .mode = 0644, | ||
528 | .proc_handler = &proc_dointvec_jiffies, | ||
529 | }, | ||
530 | { | ||
531 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, | ||
532 | .procname = "ip_conntrack_sctp_timeout_cookie_wait", | ||
533 | .data = &ip_ct_sctp_timeout_cookie_wait, | ||
534 | .maxlen = sizeof(unsigned int), | ||
535 | .mode = 0644, | ||
536 | .proc_handler = &proc_dointvec_jiffies, | ||
537 | }, | ||
538 | { | ||
539 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, | ||
540 | .procname = "ip_conntrack_sctp_timeout_cookie_echoed", | ||
541 | .data = &ip_ct_sctp_timeout_cookie_echoed, | ||
542 | .maxlen = sizeof(unsigned int), | ||
543 | .mode = 0644, | ||
544 | .proc_handler = &proc_dointvec_jiffies, | ||
545 | }, | ||
546 | { | ||
547 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, | ||
548 | .procname = "ip_conntrack_sctp_timeout_established", | ||
549 | .data = &ip_ct_sctp_timeout_established, | ||
550 | .maxlen = sizeof(unsigned int), | ||
551 | .mode = 0644, | ||
552 | .proc_handler = &proc_dointvec_jiffies, | ||
553 | }, | ||
554 | { | ||
555 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, | ||
556 | .procname = "ip_conntrack_sctp_timeout_shutdown_sent", | ||
557 | .data = &ip_ct_sctp_timeout_shutdown_sent, | ||
558 | .maxlen = sizeof(unsigned int), | ||
559 | .mode = 0644, | ||
560 | .proc_handler = &proc_dointvec_jiffies, | ||
561 | }, | ||
562 | { | ||
563 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, | ||
564 | .procname = "ip_conntrack_sctp_timeout_shutdown_recd", | ||
565 | .data = &ip_ct_sctp_timeout_shutdown_recd, | ||
566 | .maxlen = sizeof(unsigned int), | ||
567 | .mode = 0644, | ||
568 | .proc_handler = &proc_dointvec_jiffies, | ||
569 | }, | ||
570 | { | ||
571 | .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, | ||
572 | .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", | ||
573 | .data = &ip_ct_sctp_timeout_shutdown_ack_sent, | ||
574 | .maxlen = sizeof(unsigned int), | ||
575 | .mode = 0644, | ||
576 | .proc_handler = &proc_dointvec_jiffies, | ||
577 | }, | ||
578 | { .ctl_name = 0 } | ||
579 | }; | ||
580 | |||
581 | static ctl_table ip_ct_netfilter_table[] = { | ||
582 | { | ||
583 | .ctl_name = NET_IPV4_NETFILTER, | ||
584 | .procname = "netfilter", | ||
585 | .mode = 0555, | ||
586 | .child = ip_ct_sysctl_table, | ||
587 | }, | ||
588 | { .ctl_name = 0 } | ||
589 | }; | ||
590 | |||
591 | static ctl_table ip_ct_ipv4_table[] = { | ||
592 | { | ||
593 | .ctl_name = NET_IPV4, | ||
594 | .procname = "ipv4", | ||
595 | .mode = 0555, | ||
596 | .child = ip_ct_netfilter_table, | ||
597 | }, | ||
598 | { .ctl_name = 0 } | ||
599 | }; | ||
600 | |||
601 | static ctl_table ip_ct_net_table[] = { | ||
602 | { | ||
603 | .ctl_name = CTL_NET, | ||
604 | .procname = "net", | ||
605 | .mode = 0555, | ||
606 | .child = ip_ct_ipv4_table, | ||
607 | }, | ||
608 | { .ctl_name = 0 } | ||
609 | }; | ||
610 | |||
611 | static struct ctl_table_header *ip_ct_sysctl_header; | ||
612 | #endif | ||
613 | |||
614 | static int __init ip_conntrack_proto_sctp_init(void) | ||
615 | { | ||
616 | int ret; | ||
617 | |||
618 | ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp); | ||
619 | if (ret) { | ||
620 | printk("ip_conntrack_proto_sctp: protocol register failed\n"); | ||
621 | goto out; | ||
622 | } | ||
623 | |||
624 | #ifdef CONFIG_SYSCTL | ||
625 | ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table); | ||
626 | if (ip_ct_sysctl_header == NULL) { | ||
627 | ret = -ENOMEM; | ||
628 | printk("ip_conntrack_proto_sctp: can't register to sysctl.\n"); | ||
629 | goto cleanup; | ||
630 | } | ||
631 | #endif | ||
632 | |||
633 | return ret; | ||
634 | |||
635 | #ifdef CONFIG_SYSCTL | ||
636 | cleanup: | ||
637 | ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); | ||
638 | #endif | ||
639 | out: | ||
640 | DEBUGP("SCTP conntrack module loading %s\n", | ||
641 | ret ? "failed": "succeeded"); | ||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static void __exit ip_conntrack_proto_sctp_fini(void) | ||
646 | { | ||
647 | ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); | ||
648 | #ifdef CONFIG_SYSCTL | ||
649 | unregister_sysctl_table(ip_ct_sysctl_header); | ||
650 | #endif | ||
651 | DEBUGP("SCTP conntrack module unloaded\n"); | ||
652 | } | ||
653 | |||
654 | module_init(ip_conntrack_proto_sctp_init); | ||
655 | module_exit(ip_conntrack_proto_sctp_fini); | ||
656 | |||
657 | MODULE_LICENSE("GPL"); | ||
658 | MODULE_AUTHOR("Kiran Kumar Immidi"); | ||
659 | MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c deleted file mode 100644 index 0a72eab14620..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ /dev/null | |||
@@ -1,1164 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>: | ||
9 | * - Real stateful connection tracking | ||
10 | * - Modified state transitions table | ||
11 | * - Window scaling support added | ||
12 | * - SACK support added | ||
13 | * | ||
14 | * Willy Tarreau: | ||
15 | * - State table bugfixes | ||
16 | * - More robust state changes | ||
17 | * - Tuning timer parameters | ||
18 | * | ||
19 | * version 2.2 | ||
20 | */ | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | #include <linux/timer.h> | ||
24 | #include <linux/netfilter.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/ip.h> | ||
28 | #include <linux/tcp.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | |||
31 | #include <net/tcp.h> | ||
32 | |||
33 | #include <linux/netfilter_ipv4.h> | ||
34 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
35 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
36 | |||
37 | #if 0 | ||
38 | #define DEBUGP printk | ||
39 | #define DEBUGP_VARS | ||
40 | #else | ||
41 | #define DEBUGP(format, args...) | ||
42 | #endif | ||
43 | |||
44 | /* Protects conntrack->proto.tcp */ | ||
45 | static DEFINE_RWLOCK(tcp_lock); | ||
46 | |||
47 | /* "Be conservative in what you do, | ||
48 | be liberal in what you accept from others." | ||
49 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | ||
50 | int ip_ct_tcp_be_liberal __read_mostly = 0; | ||
51 | |||
52 | /* If it is set to zero, we disable picking up already established | ||
53 | connections. */ | ||
54 | int ip_ct_tcp_loose __read_mostly = 1; | ||
55 | |||
56 | /* Max number of the retransmitted packets without receiving an (acceptable) | ||
57 | ACK from the destination. If this number is reached, a shorter timer | ||
58 | will be started. */ | ||
59 | int ip_ct_tcp_max_retrans __read_mostly = 3; | ||
60 | |||
61 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
62 | closely. They're more complex. --RR */ | ||
63 | |||
64 | static const char *tcp_conntrack_names[] = { | ||
65 | "NONE", | ||
66 | "SYN_SENT", | ||
67 | "SYN_RECV", | ||
68 | "ESTABLISHED", | ||
69 | "FIN_WAIT", | ||
70 | "CLOSE_WAIT", | ||
71 | "LAST_ACK", | ||
72 | "TIME_WAIT", | ||
73 | "CLOSE", | ||
74 | "LISTEN" | ||
75 | }; | ||
76 | |||
77 | #define SECS * HZ | ||
78 | #define MINS * 60 SECS | ||
79 | #define HOURS * 60 MINS | ||
80 | #define DAYS * 24 HOURS | ||
81 | |||
82 | unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; | ||
83 | unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; | ||
84 | unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; | ||
85 | unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; | ||
86 | unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; | ||
87 | unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; | ||
88 | unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; | ||
89 | unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; | ||
90 | |||
91 | /* RFC1122 says the R2 limit should be at least 100 seconds. | ||
92 | Linux uses 15 packets as limit, which corresponds | ||
93 | to ~13-30min depending on RTO. */ | ||
94 | unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; | ||
95 | |||
96 | static const unsigned int * tcp_timeouts[] | ||
97 | = { NULL, /* TCP_CONNTRACK_NONE */ | ||
98 | &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ | ||
99 | &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ | ||
100 | &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ | ||
101 | &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ | ||
102 | &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ | ||
103 | &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ | ||
104 | &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ | ||
105 | &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ | ||
106 | NULL, /* TCP_CONNTRACK_LISTEN */ | ||
107 | }; | ||
108 | |||
109 | #define sNO TCP_CONNTRACK_NONE | ||
110 | #define sSS TCP_CONNTRACK_SYN_SENT | ||
111 | #define sSR TCP_CONNTRACK_SYN_RECV | ||
112 | #define sES TCP_CONNTRACK_ESTABLISHED | ||
113 | #define sFW TCP_CONNTRACK_FIN_WAIT | ||
114 | #define sCW TCP_CONNTRACK_CLOSE_WAIT | ||
115 | #define sLA TCP_CONNTRACK_LAST_ACK | ||
116 | #define sTW TCP_CONNTRACK_TIME_WAIT | ||
117 | #define sCL TCP_CONNTRACK_CLOSE | ||
118 | #define sLI TCP_CONNTRACK_LISTEN | ||
119 | #define sIV TCP_CONNTRACK_MAX | ||
120 | #define sIG TCP_CONNTRACK_IGNORE | ||
121 | |||
122 | /* What TCP flags are set from RST/SYN/FIN/ACK. */ | ||
123 | enum tcp_bit_set { | ||
124 | TCP_SYN_SET, | ||
125 | TCP_SYNACK_SET, | ||
126 | TCP_FIN_SET, | ||
127 | TCP_ACK_SET, | ||
128 | TCP_RST_SET, | ||
129 | TCP_NONE_SET, | ||
130 | }; | ||
131 | |||
132 | /* | ||
133 | * The TCP state transition table needs a few words... | ||
134 | * | ||
135 | * We are the man in the middle. All the packets go through us | ||
136 | * but might get lost in transit to the destination. | ||
137 | * It is assumed that the destinations can't receive segments | ||
138 | * we haven't seen. | ||
139 | * | ||
140 | * The checked segment is in window, but our windows are *not* | ||
141 | * equivalent with the ones of the sender/receiver. We always | ||
142 | * try to guess the state of the current sender. | ||
143 | * | ||
144 | * The meaning of the states are: | ||
145 | * | ||
146 | * NONE: initial state | ||
147 | * SYN_SENT: SYN-only packet seen | ||
148 | * SYN_RECV: SYN-ACK packet seen | ||
149 | * ESTABLISHED: ACK packet seen | ||
150 | * FIN_WAIT: FIN packet seen | ||
151 | * CLOSE_WAIT: ACK seen (after FIN) | ||
152 | * LAST_ACK: FIN seen (after FIN) | ||
153 | * TIME_WAIT: last ACK seen | ||
154 | * CLOSE: closed connection | ||
155 | * | ||
156 | * LISTEN state is not used. | ||
157 | * | ||
158 | * Packets marked as IGNORED (sIG): | ||
159 | * if they may be either invalid or valid | ||
160 | * and the receiver may send back a connection | ||
161 | * closing RST or a SYN/ACK. | ||
162 | * | ||
163 | * Packets marked as INVALID (sIV): | ||
164 | * if they are invalid | ||
165 | * or we do not support the request (simultaneous open) | ||
166 | */ | ||
167 | static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { | ||
168 | { | ||
169 | /* ORIGINAL */ | ||
170 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
171 | /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, | ||
172 | /* | ||
173 | * sNO -> sSS Initialize a new connection | ||
174 | * sSS -> sSS Retransmitted SYN | ||
175 | * sSR -> sIG Late retransmitted SYN? | ||
176 | * sES -> sIG Error: SYNs in window outside the SYN_SENT state | ||
177 | * are errors. Receiver will reply with RST | ||
178 | * and close the connection. | ||
179 | * Or we are not in sync and hold a dead connection. | ||
180 | * sFW -> sIG | ||
181 | * sCW -> sIG | ||
182 | * sLA -> sIG | ||
183 | * sTW -> sSS Reopened connection (RFC 1122). | ||
184 | * sCL -> sSS | ||
185 | */ | ||
186 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
187 | /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
188 | /* | ||
189 | * A SYN/ACK from the client is always invalid: | ||
190 | * - either it tries to set up a simultaneous open, which is | ||
191 | * not supported; | ||
192 | * - or the firewall has just been inserted between the two hosts | ||
193 | * during the session set-up. The SYN will be retransmitted | ||
194 | * by the true client (or it'll time out). | ||
195 | */ | ||
196 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
197 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
198 | /* | ||
199 | * sNO -> sIV Too late and no reason to do anything... | ||
200 | * sSS -> sIV Client migth not send FIN in this state: | ||
201 | * we enforce waiting for a SYN/ACK reply first. | ||
202 | * sSR -> sFW Close started. | ||
203 | * sES -> sFW | ||
204 | * sFW -> sLA FIN seen in both directions, waiting for | ||
205 | * the last ACK. | ||
206 | * Migth be a retransmitted FIN as well... | ||
207 | * sCW -> sLA | ||
208 | * sLA -> sLA Retransmitted FIN. Remain in the same state. | ||
209 | * sTW -> sTW | ||
210 | * sCL -> sCL | ||
211 | */ | ||
212 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
213 | /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
214 | /* | ||
215 | * sNO -> sES Assumed. | ||
216 | * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. | ||
217 | * sSR -> sES Established state is reached. | ||
218 | * sES -> sES :-) | ||
219 | * sFW -> sCW Normal close request answered by ACK. | ||
220 | * sCW -> sCW | ||
221 | * sLA -> sTW Last ACK detected. | ||
222 | * sTW -> sTW Retransmitted last ACK. Remain in the same state. | ||
223 | * sCL -> sCL | ||
224 | */ | ||
225 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
226 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
227 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
228 | }, | ||
229 | { | ||
230 | /* REPLY */ | ||
231 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
232 | /*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
233 | /* | ||
234 | * sNO -> sIV Never reached. | ||
235 | * sSS -> sIV Simultaneous open, not supported | ||
236 | * sSR -> sIV Simultaneous open, not supported. | ||
237 | * sES -> sIV Server may not initiate a connection. | ||
238 | * sFW -> sIV | ||
239 | * sCW -> sIV | ||
240 | * sLA -> sIV | ||
241 | * sTW -> sIV Reopened connection, but server may not do it. | ||
242 | * sCL -> sIV | ||
243 | */ | ||
244 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
245 | /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, | ||
246 | /* | ||
247 | * sSS -> sSR Standard open. | ||
248 | * sSR -> sSR Retransmitted SYN/ACK. | ||
249 | * sES -> sIG Late retransmitted SYN/ACK? | ||
250 | * sFW -> sIG Might be SYN/ACK answering ignored SYN | ||
251 | * sCW -> sIG | ||
252 | * sLA -> sIG | ||
253 | * sTW -> sIG | ||
254 | * sCL -> sIG | ||
255 | */ | ||
256 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
257 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
258 | /* | ||
259 | * sSS -> sIV Server might not send FIN in this state. | ||
260 | * sSR -> sFW Close started. | ||
261 | * sES -> sFW | ||
262 | * sFW -> sLA FIN seen in both directions. | ||
263 | * sCW -> sLA | ||
264 | * sLA -> sLA Retransmitted FIN. | ||
265 | * sTW -> sTW | ||
266 | * sCL -> sCL | ||
267 | */ | ||
268 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
269 | /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
270 | /* | ||
271 | * sSS -> sIG Might be a half-open connection. | ||
272 | * sSR -> sSR Might answer late resent SYN. | ||
273 | * sES -> sES :-) | ||
274 | * sFW -> sCW Normal close request answered by ACK. | ||
275 | * sCW -> sCW | ||
276 | * sLA -> sTW Last ACK detected. | ||
277 | * sTW -> sTW Retransmitted last ACK. | ||
278 | * sCL -> sCL | ||
279 | */ | ||
280 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
281 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
282 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
283 | } | ||
284 | }; | ||
285 | |||
286 | static int tcp_pkt_to_tuple(const struct sk_buff *skb, | ||
287 | unsigned int dataoff, | ||
288 | struct ip_conntrack_tuple *tuple) | ||
289 | { | ||
290 | struct tcphdr _hdr, *hp; | ||
291 | |||
292 | /* Actually only need first 8 bytes. */ | ||
293 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
294 | if (hp == NULL) | ||
295 | return 0; | ||
296 | |||
297 | tuple->src.u.tcp.port = hp->source; | ||
298 | tuple->dst.u.tcp.port = hp->dest; | ||
299 | |||
300 | return 1; | ||
301 | } | ||
302 | |||
303 | static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
304 | const struct ip_conntrack_tuple *orig) | ||
305 | { | ||
306 | tuple->src.u.tcp.port = orig->dst.u.tcp.port; | ||
307 | tuple->dst.u.tcp.port = orig->src.u.tcp.port; | ||
308 | return 1; | ||
309 | } | ||
310 | |||
311 | /* Print out the per-protocol part of the tuple. */ | ||
312 | static int tcp_print_tuple(struct seq_file *s, | ||
313 | const struct ip_conntrack_tuple *tuple) | ||
314 | { | ||
315 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
316 | ntohs(tuple->src.u.tcp.port), | ||
317 | ntohs(tuple->dst.u.tcp.port)); | ||
318 | } | ||
319 | |||
320 | /* Print out the private part of the conntrack. */ | ||
321 | static int tcp_print_conntrack(struct seq_file *s, | ||
322 | const struct ip_conntrack *conntrack) | ||
323 | { | ||
324 | enum tcp_conntrack state; | ||
325 | |||
326 | read_lock_bh(&tcp_lock); | ||
327 | state = conntrack->proto.tcp.state; | ||
328 | read_unlock_bh(&tcp_lock); | ||
329 | |||
330 | return seq_printf(s, "%s ", tcp_conntrack_names[state]); | ||
331 | } | ||
332 | |||
333 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
334 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
335 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, | ||
336 | const struct ip_conntrack *ct) | ||
337 | { | ||
338 | struct nfattr *nest_parms; | ||
339 | |||
340 | read_lock_bh(&tcp_lock); | ||
341 | nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); | ||
342 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), | ||
343 | &ct->proto.tcp.state); | ||
344 | read_unlock_bh(&tcp_lock); | ||
345 | |||
346 | NFA_NEST_END(skb, nest_parms); | ||
347 | |||
348 | return 0; | ||
349 | |||
350 | nfattr_failure: | ||
351 | read_unlock_bh(&tcp_lock); | ||
352 | return -1; | ||
353 | } | ||
354 | |||
355 | static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { | ||
356 | [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), | ||
357 | }; | ||
358 | |||
359 | static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) | ||
360 | { | ||
361 | struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; | ||
362 | struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; | ||
363 | |||
364 | /* updates could not contain anything about the private | ||
365 | * protocol info, in that case skip the parsing */ | ||
366 | if (!attr) | ||
367 | return 0; | ||
368 | |||
369 | nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); | ||
370 | |||
371 | if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) | ||
372 | return -EINVAL; | ||
373 | |||
374 | if (!tb[CTA_PROTOINFO_TCP_STATE-1]) | ||
375 | return -EINVAL; | ||
376 | |||
377 | write_lock_bh(&tcp_lock); | ||
378 | ct->proto.tcp.state = | ||
379 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); | ||
380 | write_unlock_bh(&tcp_lock); | ||
381 | |||
382 | return 0; | ||
383 | } | ||
384 | #endif | ||
385 | |||
386 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) | ||
387 | { | ||
388 | if (tcph->rst) return TCP_RST_SET; | ||
389 | else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); | ||
390 | else if (tcph->fin) return TCP_FIN_SET; | ||
391 | else if (tcph->ack) return TCP_ACK_SET; | ||
392 | else return TCP_NONE_SET; | ||
393 | } | ||
394 | |||
395 | /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering | ||
396 | in IP Filter' by Guido van Rooij. | ||
397 | |||
398 | http://www.nluug.nl/events/sane2000/papers.html | ||
399 | http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz | ||
400 | |||
401 | The boundaries and the conditions are changed according to RFC793: | ||
402 | the packet must intersect the window (i.e. segments may be | ||
403 | after the right or before the left edge) and thus receivers may ACK | ||
404 | segments after the right edge of the window. | ||
405 | |||
406 | td_maxend = max(sack + max(win,1)) seen in reply packets | ||
407 | td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets | ||
408 | td_maxwin += seq + len - sender.td_maxend | ||
409 | if seq + len > sender.td_maxend | ||
410 | td_end = max(seq + len) seen in sent packets | ||
411 | |||
412 | I. Upper bound for valid data: seq <= sender.td_maxend | ||
413 | II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin | ||
414 | III. Upper bound for valid ack: sack <= receiver.td_end | ||
415 | IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW | ||
416 | |||
417 | where sack is the highest right edge of sack block found in the packet. | ||
418 | |||
419 | The upper bound limit for a valid ack is not ignored - | ||
420 | we doesn't have to deal with fragments. | ||
421 | */ | ||
422 | |||
423 | static inline __u32 segment_seq_plus_len(__u32 seq, | ||
424 | size_t len, | ||
425 | struct iphdr *iph, | ||
426 | struct tcphdr *tcph) | ||
427 | { | ||
428 | return (seq + len - (iph->ihl + tcph->doff)*4 | ||
429 | + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); | ||
430 | } | ||
431 | |||
432 | /* Fixme: what about big packets? */ | ||
433 | #define MAXACKWINCONST 66000 | ||
434 | #define MAXACKWINDOW(sender) \ | ||
435 | ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ | ||
436 | : MAXACKWINCONST) | ||
437 | |||
438 | /* | ||
439 | * Simplified tcp_parse_options routine from tcp_input.c | ||
440 | */ | ||
441 | static void tcp_options(const struct sk_buff *skb, | ||
442 | struct iphdr *iph, | ||
443 | struct tcphdr *tcph, | ||
444 | struct ip_ct_tcp_state *state) | ||
445 | { | ||
446 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
447 | unsigned char *ptr; | ||
448 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
449 | |||
450 | if (!length) | ||
451 | return; | ||
452 | |||
453 | ptr = skb_header_pointer(skb, | ||
454 | (iph->ihl * 4) + sizeof(struct tcphdr), | ||
455 | length, buff); | ||
456 | BUG_ON(ptr == NULL); | ||
457 | |||
458 | state->td_scale = | ||
459 | state->flags = 0; | ||
460 | |||
461 | while (length > 0) { | ||
462 | int opcode=*ptr++; | ||
463 | int opsize; | ||
464 | |||
465 | switch (opcode) { | ||
466 | case TCPOPT_EOL: | ||
467 | return; | ||
468 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
469 | length--; | ||
470 | continue; | ||
471 | default: | ||
472 | opsize=*ptr++; | ||
473 | if (opsize < 2) /* "silly options" */ | ||
474 | return; | ||
475 | if (opsize > length) | ||
476 | break; /* don't parse partial options */ | ||
477 | |||
478 | if (opcode == TCPOPT_SACK_PERM | ||
479 | && opsize == TCPOLEN_SACK_PERM) | ||
480 | state->flags |= IP_CT_TCP_FLAG_SACK_PERM; | ||
481 | else if (opcode == TCPOPT_WINDOW | ||
482 | && opsize == TCPOLEN_WINDOW) { | ||
483 | state->td_scale = *(u_int8_t *)ptr; | ||
484 | |||
485 | if (state->td_scale > 14) { | ||
486 | /* See RFC1323 */ | ||
487 | state->td_scale = 14; | ||
488 | } | ||
489 | state->flags |= | ||
490 | IP_CT_TCP_FLAG_WINDOW_SCALE; | ||
491 | } | ||
492 | ptr += opsize - 2; | ||
493 | length -= opsize; | ||
494 | } | ||
495 | } | ||
496 | } | ||
497 | |||
498 | static void tcp_sack(const struct sk_buff *skb, | ||
499 | struct iphdr *iph, | ||
500 | struct tcphdr *tcph, | ||
501 | __u32 *sack) | ||
502 | { | ||
503 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
504 | unsigned char *ptr; | ||
505 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
506 | __u32 tmp; | ||
507 | |||
508 | if (!length) | ||
509 | return; | ||
510 | |||
511 | ptr = skb_header_pointer(skb, | ||
512 | (iph->ihl * 4) + sizeof(struct tcphdr), | ||
513 | length, buff); | ||
514 | BUG_ON(ptr == NULL); | ||
515 | |||
516 | /* Fast path for timestamp-only option */ | ||
517 | if (length == TCPOLEN_TSTAMP_ALIGNED*4 | ||
518 | && *(__be32 *)ptr == | ||
519 | __constant_htonl((TCPOPT_NOP << 24) | ||
520 | | (TCPOPT_NOP << 16) | ||
521 | | (TCPOPT_TIMESTAMP << 8) | ||
522 | | TCPOLEN_TIMESTAMP)) | ||
523 | return; | ||
524 | |||
525 | while (length > 0) { | ||
526 | int opcode=*ptr++; | ||
527 | int opsize, i; | ||
528 | |||
529 | switch (opcode) { | ||
530 | case TCPOPT_EOL: | ||
531 | return; | ||
532 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
533 | length--; | ||
534 | continue; | ||
535 | default: | ||
536 | opsize=*ptr++; | ||
537 | if (opsize < 2) /* "silly options" */ | ||
538 | return; | ||
539 | if (opsize > length) | ||
540 | break; /* don't parse partial options */ | ||
541 | |||
542 | if (opcode == TCPOPT_SACK | ||
543 | && opsize >= (TCPOLEN_SACK_BASE | ||
544 | + TCPOLEN_SACK_PERBLOCK) | ||
545 | && !((opsize - TCPOLEN_SACK_BASE) | ||
546 | % TCPOLEN_SACK_PERBLOCK)) { | ||
547 | for (i = 0; | ||
548 | i < (opsize - TCPOLEN_SACK_BASE); | ||
549 | i += TCPOLEN_SACK_PERBLOCK) { | ||
550 | tmp = ntohl(*((__be32 *)(ptr+i)+1)); | ||
551 | |||
552 | if (after(tmp, *sack)) | ||
553 | *sack = tmp; | ||
554 | } | ||
555 | return; | ||
556 | } | ||
557 | ptr += opsize - 2; | ||
558 | length -= opsize; | ||
559 | } | ||
560 | } | ||
561 | } | ||
562 | |||
563 | static int tcp_in_window(struct ip_ct_tcp *state, | ||
564 | enum ip_conntrack_dir dir, | ||
565 | unsigned int index, | ||
566 | const struct sk_buff *skb, | ||
567 | struct iphdr *iph, | ||
568 | struct tcphdr *tcph) | ||
569 | { | ||
570 | struct ip_ct_tcp_state *sender = &state->seen[dir]; | ||
571 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; | ||
572 | __u32 seq, ack, sack, end, win, swin; | ||
573 | int res; | ||
574 | |||
575 | /* | ||
576 | * Get the required data from the packet. | ||
577 | */ | ||
578 | seq = ntohl(tcph->seq); | ||
579 | ack = sack = ntohl(tcph->ack_seq); | ||
580 | win = ntohs(tcph->window); | ||
581 | end = segment_seq_plus_len(seq, skb->len, iph, tcph); | ||
582 | |||
583 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) | ||
584 | tcp_sack(skb, iph, tcph, &sack); | ||
585 | |||
586 | DEBUGP("tcp_in_window: START\n"); | ||
587 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
588 | "seq=%u ack=%u sack=%u win=%u end=%u\n", | ||
589 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
590 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
591 | seq, ack, sack, win, end); | ||
592 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
593 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
594 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
595 | sender->td_scale, | ||
596 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
597 | receiver->td_scale); | ||
598 | |||
599 | if (sender->td_end == 0) { | ||
600 | /* | ||
601 | * Initialize sender data. | ||
602 | */ | ||
603 | if (tcph->syn && tcph->ack) { | ||
604 | /* | ||
605 | * Outgoing SYN-ACK in reply to a SYN. | ||
606 | */ | ||
607 | sender->td_end = | ||
608 | sender->td_maxend = end; | ||
609 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
610 | |||
611 | tcp_options(skb, iph, tcph, sender); | ||
612 | /* | ||
613 | * RFC 1323: | ||
614 | * Both sides must send the Window Scale option | ||
615 | * to enable window scaling in either direction. | ||
616 | */ | ||
617 | if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE | ||
618 | && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) | ||
619 | sender->td_scale = | ||
620 | receiver->td_scale = 0; | ||
621 | } else { | ||
622 | /* | ||
623 | * We are in the middle of a connection, | ||
624 | * its history is lost for us. | ||
625 | * Let's try to use the data from the packet. | ||
626 | */ | ||
627 | sender->td_end = end; | ||
628 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
629 | sender->td_maxend = end + sender->td_maxwin; | ||
630 | } | ||
631 | } else if (((state->state == TCP_CONNTRACK_SYN_SENT | ||
632 | && dir == IP_CT_DIR_ORIGINAL) | ||
633 | || (state->state == TCP_CONNTRACK_SYN_RECV | ||
634 | && dir == IP_CT_DIR_REPLY)) | ||
635 | && after(end, sender->td_end)) { | ||
636 | /* | ||
637 | * RFC 793: "if a TCP is reinitialized ... then it need | ||
638 | * not wait at all; it must only be sure to use sequence | ||
639 | * numbers larger than those recently used." | ||
640 | */ | ||
641 | sender->td_end = | ||
642 | sender->td_maxend = end; | ||
643 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
644 | |||
645 | tcp_options(skb, iph, tcph, sender); | ||
646 | } | ||
647 | |||
648 | if (!(tcph->ack)) { | ||
649 | /* | ||
650 | * If there is no ACK, just pretend it was set and OK. | ||
651 | */ | ||
652 | ack = sack = receiver->td_end; | ||
653 | } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == | ||
654 | (TCP_FLAG_ACK|TCP_FLAG_RST)) | ||
655 | && (ack == 0)) { | ||
656 | /* | ||
657 | * Broken TCP stacks, that set ACK in RST packets as well | ||
658 | * with zero ack value. | ||
659 | */ | ||
660 | ack = sack = receiver->td_end; | ||
661 | } | ||
662 | |||
663 | if (seq == end | ||
664 | && (!tcph->rst | ||
665 | || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) | ||
666 | /* | ||
667 | * Packets contains no data: we assume it is valid | ||
668 | * and check the ack value only. | ||
669 | * However RST segments are always validated by their | ||
670 | * SEQ number, except when seq == 0 (reset sent answering | ||
671 | * SYN. | ||
672 | */ | ||
673 | seq = end = sender->td_end; | ||
674 | |||
675 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
676 | "seq=%u ack=%u sack =%u win=%u end=%u\n", | ||
677 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
678 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
679 | seq, ack, sack, win, end); | ||
680 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
681 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
682 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
683 | sender->td_scale, | ||
684 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
685 | receiver->td_scale); | ||
686 | |||
687 | DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", | ||
688 | before(seq, sender->td_maxend + 1), | ||
689 | after(end, sender->td_end - receiver->td_maxwin - 1), | ||
690 | before(sack, receiver->td_end + 1), | ||
691 | after(ack, receiver->td_end - MAXACKWINDOW(sender))); | ||
692 | |||
693 | if (before(seq, sender->td_maxend + 1) && | ||
694 | after(end, sender->td_end - receiver->td_maxwin - 1) && | ||
695 | before(sack, receiver->td_end + 1) && | ||
696 | after(ack, receiver->td_end - MAXACKWINDOW(sender))) { | ||
697 | /* | ||
698 | * Take into account window scaling (RFC 1323). | ||
699 | */ | ||
700 | if (!tcph->syn) | ||
701 | win <<= sender->td_scale; | ||
702 | |||
703 | /* | ||
704 | * Update sender data. | ||
705 | */ | ||
706 | swin = win + (sack - ack); | ||
707 | if (sender->td_maxwin < swin) | ||
708 | sender->td_maxwin = swin; | ||
709 | if (after(end, sender->td_end)) | ||
710 | sender->td_end = end; | ||
711 | /* | ||
712 | * Update receiver data. | ||
713 | */ | ||
714 | if (after(end, sender->td_maxend)) | ||
715 | receiver->td_maxwin += end - sender->td_maxend; | ||
716 | if (after(sack + win, receiver->td_maxend - 1)) { | ||
717 | receiver->td_maxend = sack + win; | ||
718 | if (win == 0) | ||
719 | receiver->td_maxend++; | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * Check retransmissions. | ||
724 | */ | ||
725 | if (index == TCP_ACK_SET) { | ||
726 | if (state->last_dir == dir | ||
727 | && state->last_seq == seq | ||
728 | && state->last_ack == ack | ||
729 | && state->last_end == end | ||
730 | && state->last_win == win) | ||
731 | state->retrans++; | ||
732 | else { | ||
733 | state->last_dir = dir; | ||
734 | state->last_seq = seq; | ||
735 | state->last_ack = ack; | ||
736 | state->last_end = end; | ||
737 | state->last_win = win; | ||
738 | state->retrans = 0; | ||
739 | } | ||
740 | } | ||
741 | res = 1; | ||
742 | } else { | ||
743 | res = 0; | ||
744 | if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || | ||
745 | ip_ct_tcp_be_liberal) | ||
746 | res = 1; | ||
747 | if (!res && LOG_INVALID(IPPROTO_TCP)) | ||
748 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
749 | "ip_ct_tcp: %s ", | ||
750 | before(seq, sender->td_maxend + 1) ? | ||
751 | after(end, sender->td_end - receiver->td_maxwin - 1) ? | ||
752 | before(sack, receiver->td_end + 1) ? | ||
753 | after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" | ||
754 | : "ACK is under the lower bound (possible overly delayed ACK)" | ||
755 | : "ACK is over the upper bound (ACKed data not seen yet)" | ||
756 | : "SEQ is under the lower bound (already ACKed data retransmitted)" | ||
757 | : "SEQ is over the upper bound (over the window of the receiver)"); | ||
758 | } | ||
759 | |||
760 | DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " | ||
761 | "receiver end=%u maxend=%u maxwin=%u\n", | ||
762 | res, sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
763 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin); | ||
764 | |||
765 | return res; | ||
766 | } | ||
767 | |||
768 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
769 | /* Update sender->td_end after NAT successfully mangled the packet */ | ||
770 | void ip_conntrack_tcp_update(struct sk_buff *skb, | ||
771 | struct ip_conntrack *conntrack, | ||
772 | enum ip_conntrack_dir dir) | ||
773 | { | ||
774 | struct iphdr *iph = skb->nh.iph; | ||
775 | struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; | ||
776 | __u32 end; | ||
777 | #ifdef DEBUGP_VARS | ||
778 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; | ||
779 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; | ||
780 | #endif | ||
781 | |||
782 | end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); | ||
783 | |||
784 | write_lock_bh(&tcp_lock); | ||
785 | /* | ||
786 | * We have to worry for the ack in the reply packet only... | ||
787 | */ | ||
788 | if (after(end, conntrack->proto.tcp.seen[dir].td_end)) | ||
789 | conntrack->proto.tcp.seen[dir].td_end = end; | ||
790 | conntrack->proto.tcp.last_end = end; | ||
791 | write_unlock_bh(&tcp_lock); | ||
792 | DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
793 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
794 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
795 | sender->td_scale, | ||
796 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
797 | receiver->td_scale); | ||
798 | } | ||
799 | |||
800 | #endif | ||
801 | |||
802 | #define TH_FIN 0x01 | ||
803 | #define TH_SYN 0x02 | ||
804 | #define TH_RST 0x04 | ||
805 | #define TH_PUSH 0x08 | ||
806 | #define TH_ACK 0x10 | ||
807 | #define TH_URG 0x20 | ||
808 | #define TH_ECE 0x40 | ||
809 | #define TH_CWR 0x80 | ||
810 | |||
811 | /* table of valid flag combinations - ECE and CWR are always valid */ | ||
812 | static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = | ||
813 | { | ||
814 | [TH_SYN] = 1, | ||
815 | [TH_SYN|TH_PUSH] = 1, | ||
816 | [TH_SYN|TH_URG] = 1, | ||
817 | [TH_SYN|TH_PUSH|TH_URG] = 1, | ||
818 | [TH_SYN|TH_ACK] = 1, | ||
819 | [TH_SYN|TH_ACK|TH_PUSH] = 1, | ||
820 | [TH_RST] = 1, | ||
821 | [TH_RST|TH_ACK] = 1, | ||
822 | [TH_RST|TH_ACK|TH_PUSH] = 1, | ||
823 | [TH_FIN|TH_ACK] = 1, | ||
824 | [TH_ACK] = 1, | ||
825 | [TH_ACK|TH_PUSH] = 1, | ||
826 | [TH_ACK|TH_URG] = 1, | ||
827 | [TH_ACK|TH_URG|TH_PUSH] = 1, | ||
828 | [TH_FIN|TH_ACK|TH_PUSH] = 1, | ||
829 | [TH_FIN|TH_ACK|TH_URG] = 1, | ||
830 | [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, | ||
831 | }; | ||
832 | |||
833 | /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ | ||
834 | static int tcp_error(struct sk_buff *skb, | ||
835 | enum ip_conntrack_info *ctinfo, | ||
836 | unsigned int hooknum) | ||
837 | { | ||
838 | struct iphdr *iph = skb->nh.iph; | ||
839 | struct tcphdr _tcph, *th; | ||
840 | unsigned int tcplen = skb->len - iph->ihl * 4; | ||
841 | u_int8_t tcpflags; | ||
842 | |||
843 | /* Smaller that minimal TCP header? */ | ||
844 | th = skb_header_pointer(skb, iph->ihl * 4, | ||
845 | sizeof(_tcph), &_tcph); | ||
846 | if (th == NULL) { | ||
847 | if (LOG_INVALID(IPPROTO_TCP)) | ||
848 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
849 | "ip_ct_tcp: short packet "); | ||
850 | return -NF_ACCEPT; | ||
851 | } | ||
852 | |||
853 | /* Not whole TCP header or malformed packet */ | ||
854 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { | ||
855 | if (LOG_INVALID(IPPROTO_TCP)) | ||
856 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
857 | "ip_ct_tcp: truncated/malformed packet "); | ||
858 | return -NF_ACCEPT; | ||
859 | } | ||
860 | |||
861 | /* Checksum invalid? Ignore. | ||
862 | * We skip checking packets on the outgoing path | ||
863 | * because it is assumed to be correct. | ||
864 | */ | ||
865 | /* FIXME: Source route IP option packets --RR */ | ||
866 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && | ||
867 | nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) { | ||
868 | if (LOG_INVALID(IPPROTO_TCP)) | ||
869 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
870 | "ip_ct_tcp: bad TCP checksum "); | ||
871 | return -NF_ACCEPT; | ||
872 | } | ||
873 | |||
874 | /* Check TCP flags. */ | ||
875 | tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); | ||
876 | if (!tcp_valid_flags[tcpflags]) { | ||
877 | if (LOG_INVALID(IPPROTO_TCP)) | ||
878 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
879 | "ip_ct_tcp: invalid TCP flag combination "); | ||
880 | return -NF_ACCEPT; | ||
881 | } | ||
882 | |||
883 | return NF_ACCEPT; | ||
884 | } | ||
885 | |||
886 | /* Returns verdict for packet, or -1 for invalid. */ | ||
887 | static int tcp_packet(struct ip_conntrack *conntrack, | ||
888 | const struct sk_buff *skb, | ||
889 | enum ip_conntrack_info ctinfo) | ||
890 | { | ||
891 | enum tcp_conntrack new_state, old_state; | ||
892 | enum ip_conntrack_dir dir; | ||
893 | struct iphdr *iph = skb->nh.iph; | ||
894 | struct tcphdr *th, _tcph; | ||
895 | unsigned long timeout; | ||
896 | unsigned int index; | ||
897 | |||
898 | th = skb_header_pointer(skb, iph->ihl * 4, | ||
899 | sizeof(_tcph), &_tcph); | ||
900 | BUG_ON(th == NULL); | ||
901 | |||
902 | write_lock_bh(&tcp_lock); | ||
903 | old_state = conntrack->proto.tcp.state; | ||
904 | dir = CTINFO2DIR(ctinfo); | ||
905 | index = get_conntrack_index(th); | ||
906 | new_state = tcp_conntracks[dir][index][old_state]; | ||
907 | |||
908 | switch (new_state) { | ||
909 | case TCP_CONNTRACK_IGNORE: | ||
910 | /* Ignored packets: | ||
911 | * | ||
912 | * a) SYN in ORIGINAL | ||
913 | * b) SYN/ACK in REPLY | ||
914 | * c) ACK in reply direction after initial SYN in original. | ||
915 | */ | ||
916 | if (index == TCP_SYNACK_SET | ||
917 | && conntrack->proto.tcp.last_index == TCP_SYN_SET | ||
918 | && conntrack->proto.tcp.last_dir != dir | ||
919 | && ntohl(th->ack_seq) == | ||
920 | conntrack->proto.tcp.last_end) { | ||
921 | /* This SYN/ACK acknowledges a SYN that we earlier | ||
922 | * ignored as invalid. This means that the client and | ||
923 | * the server are both in sync, while the firewall is | ||
924 | * not. We kill this session and block the SYN/ACK so | ||
925 | * that the client cannot but retransmit its SYN and | ||
926 | * thus initiate a clean new session. | ||
927 | */ | ||
928 | write_unlock_bh(&tcp_lock); | ||
929 | if (LOG_INVALID(IPPROTO_TCP)) | ||
930 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | ||
931 | NULL, "ip_ct_tcp: " | ||
932 | "killing out of sync session "); | ||
933 | if (del_timer(&conntrack->timeout)) | ||
934 | conntrack->timeout.function((unsigned long) | ||
935 | conntrack); | ||
936 | return -NF_DROP; | ||
937 | } | ||
938 | conntrack->proto.tcp.last_index = index; | ||
939 | conntrack->proto.tcp.last_dir = dir; | ||
940 | conntrack->proto.tcp.last_seq = ntohl(th->seq); | ||
941 | conntrack->proto.tcp.last_end = | ||
942 | segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th); | ||
943 | |||
944 | write_unlock_bh(&tcp_lock); | ||
945 | if (LOG_INVALID(IPPROTO_TCP)) | ||
946 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
947 | "ip_ct_tcp: invalid packet ignored "); | ||
948 | return NF_ACCEPT; | ||
949 | case TCP_CONNTRACK_MAX: | ||
950 | /* Invalid packet */ | ||
951 | DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", | ||
952 | dir, get_conntrack_index(th), | ||
953 | old_state); | ||
954 | write_unlock_bh(&tcp_lock); | ||
955 | if (LOG_INVALID(IPPROTO_TCP)) | ||
956 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
957 | "ip_ct_tcp: invalid state "); | ||
958 | return -NF_ACCEPT; | ||
959 | case TCP_CONNTRACK_SYN_SENT: | ||
960 | if (old_state < TCP_CONNTRACK_TIME_WAIT) | ||
961 | break; | ||
962 | if ((conntrack->proto.tcp.seen[dir].flags & | ||
963 | IP_CT_TCP_FLAG_CLOSE_INIT) | ||
964 | || after(ntohl(th->seq), | ||
965 | conntrack->proto.tcp.seen[dir].td_end)) { | ||
966 | /* Attempt to reopen a closed connection. | ||
967 | * Delete this connection and look up again. */ | ||
968 | write_unlock_bh(&tcp_lock); | ||
969 | if (del_timer(&conntrack->timeout)) | ||
970 | conntrack->timeout.function((unsigned long) | ||
971 | conntrack); | ||
972 | return -NF_REPEAT; | ||
973 | } else { | ||
974 | write_unlock_bh(&tcp_lock); | ||
975 | if (LOG_INVALID(IPPROTO_TCP)) | ||
976 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | ||
977 | NULL, "ip_ct_tcp: invalid SYN"); | ||
978 | return -NF_ACCEPT; | ||
979 | } | ||
980 | case TCP_CONNTRACK_CLOSE: | ||
981 | if (index == TCP_RST_SET | ||
982 | && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) | ||
983 | && conntrack->proto.tcp.last_index == TCP_SYN_SET) | ||
984 | || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) | ||
985 | && conntrack->proto.tcp.last_index == TCP_ACK_SET)) | ||
986 | && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { | ||
987 | /* RST sent to invalid SYN or ACK we had let through | ||
988 | * at a) and c) above: | ||
989 | * | ||
990 | * a) SYN was in window then | ||
991 | * c) we hold a half-open connection. | ||
992 | * | ||
993 | * Delete our connection entry. | ||
994 | * We skip window checking, because packet might ACK | ||
995 | * segments we ignored. */ | ||
996 | goto in_window; | ||
997 | } | ||
998 | /* Just fall through */ | ||
999 | default: | ||
1000 | /* Keep compilers happy. */ | ||
1001 | break; | ||
1002 | } | ||
1003 | |||
1004 | if (!tcp_in_window(&conntrack->proto.tcp, dir, index, | ||
1005 | skb, iph, th)) { | ||
1006 | write_unlock_bh(&tcp_lock); | ||
1007 | return -NF_ACCEPT; | ||
1008 | } | ||
1009 | in_window: | ||
1010 | /* From now on we have got in-window packets */ | ||
1011 | conntrack->proto.tcp.last_index = index; | ||
1012 | |||
1013 | DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
1014 | "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", | ||
1015 | NIPQUAD(iph->saddr), ntohs(th->source), | ||
1016 | NIPQUAD(iph->daddr), ntohs(th->dest), | ||
1017 | (th->syn ? 1 : 0), (th->ack ? 1 : 0), | ||
1018 | (th->fin ? 1 : 0), (th->rst ? 1 : 0), | ||
1019 | old_state, new_state); | ||
1020 | |||
1021 | conntrack->proto.tcp.state = new_state; | ||
1022 | if (old_state != new_state | ||
1023 | && (new_state == TCP_CONNTRACK_FIN_WAIT | ||
1024 | || new_state == TCP_CONNTRACK_CLOSE)) | ||
1025 | conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; | ||
1026 | timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans | ||
1027 | && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans | ||
1028 | ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; | ||
1029 | write_unlock_bh(&tcp_lock); | ||
1030 | |||
1031 | ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
1032 | if (new_state != old_state) | ||
1033 | ip_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
1034 | |||
1035 | if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
1036 | /* If only reply is a RST, we can consider ourselves not to | ||
1037 | have an established connection: this is a fairly common | ||
1038 | problem case, so we can delete the conntrack | ||
1039 | immediately. --RR */ | ||
1040 | if (th->rst) { | ||
1041 | if (del_timer(&conntrack->timeout)) | ||
1042 | conntrack->timeout.function((unsigned long) | ||
1043 | conntrack); | ||
1044 | return NF_ACCEPT; | ||
1045 | } | ||
1046 | } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) | ||
1047 | && (old_state == TCP_CONNTRACK_SYN_RECV | ||
1048 | || old_state == TCP_CONNTRACK_ESTABLISHED) | ||
1049 | && new_state == TCP_CONNTRACK_ESTABLISHED) { | ||
1050 | /* Set ASSURED if we see see valid ack in ESTABLISHED | ||
1051 | after SYN_RECV or a valid answer for a picked up | ||
1052 | connection. */ | ||
1053 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
1054 | ip_conntrack_event_cache(IPCT_STATUS, skb); | ||
1055 | } | ||
1056 | ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); | ||
1057 | |||
1058 | return NF_ACCEPT; | ||
1059 | } | ||
1060 | |||
1061 | /* Called when a new connection for this protocol found. */ | ||
1062 | static int tcp_new(struct ip_conntrack *conntrack, | ||
1063 | const struct sk_buff *skb) | ||
1064 | { | ||
1065 | enum tcp_conntrack new_state; | ||
1066 | struct iphdr *iph = skb->nh.iph; | ||
1067 | struct tcphdr *th, _tcph; | ||
1068 | #ifdef DEBUGP_VARS | ||
1069 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; | ||
1070 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; | ||
1071 | #endif | ||
1072 | |||
1073 | th = skb_header_pointer(skb, iph->ihl * 4, | ||
1074 | sizeof(_tcph), &_tcph); | ||
1075 | BUG_ON(th == NULL); | ||
1076 | |||
1077 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
1078 | new_state | ||
1079 | = tcp_conntracks[0][get_conntrack_index(th)] | ||
1080 | [TCP_CONNTRACK_NONE]; | ||
1081 | |||
1082 | /* Invalid: delete conntrack */ | ||
1083 | if (new_state >= TCP_CONNTRACK_MAX) { | ||
1084 | DEBUGP("ip_ct_tcp: invalid new deleting.\n"); | ||
1085 | return 0; | ||
1086 | } | ||
1087 | |||
1088 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | ||
1089 | /* SYN packet */ | ||
1090 | conntrack->proto.tcp.seen[0].td_end = | ||
1091 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
1092 | iph, th); | ||
1093 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
1094 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
1095 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
1096 | conntrack->proto.tcp.seen[0].td_maxend = | ||
1097 | conntrack->proto.tcp.seen[0].td_end; | ||
1098 | |||
1099 | tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); | ||
1100 | conntrack->proto.tcp.seen[1].flags = 0; | ||
1101 | } else if (ip_ct_tcp_loose == 0) { | ||
1102 | /* Don't try to pick up connections. */ | ||
1103 | return 0; | ||
1104 | } else { | ||
1105 | /* | ||
1106 | * We are in the middle of a connection, | ||
1107 | * its history is lost for us. | ||
1108 | * Let's try to use the data from the packet. | ||
1109 | */ | ||
1110 | conntrack->proto.tcp.seen[0].td_end = | ||
1111 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
1112 | iph, th); | ||
1113 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
1114 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
1115 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
1116 | conntrack->proto.tcp.seen[0].td_maxend = | ||
1117 | conntrack->proto.tcp.seen[0].td_end + | ||
1118 | conntrack->proto.tcp.seen[0].td_maxwin; | ||
1119 | conntrack->proto.tcp.seen[0].td_scale = 0; | ||
1120 | |||
1121 | /* We assume SACK and liberal window checking to handle | ||
1122 | * window scaling */ | ||
1123 | conntrack->proto.tcp.seen[0].flags = | ||
1124 | conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | | ||
1125 | IP_CT_TCP_FLAG_BE_LIBERAL; | ||
1126 | } | ||
1127 | |||
1128 | conntrack->proto.tcp.seen[1].td_end = 0; | ||
1129 | conntrack->proto.tcp.seen[1].td_maxend = 0; | ||
1130 | conntrack->proto.tcp.seen[1].td_maxwin = 1; | ||
1131 | conntrack->proto.tcp.seen[1].td_scale = 0; | ||
1132 | |||
1133 | /* tcp_packet will set them */ | ||
1134 | conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; | ||
1135 | conntrack->proto.tcp.last_index = TCP_NONE_SET; | ||
1136 | |||
1137 | DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
1138 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
1139 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
1140 | sender->td_scale, | ||
1141 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
1142 | receiver->td_scale); | ||
1143 | return 1; | ||
1144 | } | ||
1145 | |||
1146 | struct ip_conntrack_protocol ip_conntrack_protocol_tcp = | ||
1147 | { | ||
1148 | .proto = IPPROTO_TCP, | ||
1149 | .name = "tcp", | ||
1150 | .pkt_to_tuple = tcp_pkt_to_tuple, | ||
1151 | .invert_tuple = tcp_invert_tuple, | ||
1152 | .print_tuple = tcp_print_tuple, | ||
1153 | .print_conntrack = tcp_print_conntrack, | ||
1154 | .packet = tcp_packet, | ||
1155 | .new = tcp_new, | ||
1156 | .error = tcp_error, | ||
1157 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
1158 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
1159 | .to_nfattr = tcp_to_nfattr, | ||
1160 | .from_nfattr = nfattr_to_tcp, | ||
1161 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
1162 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
1163 | #endif | ||
1164 | }; | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c deleted file mode 100644 index 14c30c646c7f..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ /dev/null | |||
@@ -1,148 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/timer.h> | ||
11 | #include <linux/netfilter.h> | ||
12 | #include <linux/in.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/udp.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include <net/checksum.h> | ||
17 | #include <linux/netfilter_ipv4.h> | ||
18 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
19 | |||
20 | unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; | ||
21 | unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; | ||
22 | |||
23 | static int udp_pkt_to_tuple(const struct sk_buff *skb, | ||
24 | unsigned int dataoff, | ||
25 | struct ip_conntrack_tuple *tuple) | ||
26 | { | ||
27 | struct udphdr _hdr, *hp; | ||
28 | |||
29 | /* Actually only need first 8 bytes. */ | ||
30 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
31 | if (hp == NULL) | ||
32 | return 0; | ||
33 | |||
34 | tuple->src.u.udp.port = hp->source; | ||
35 | tuple->dst.u.udp.port = hp->dest; | ||
36 | |||
37 | return 1; | ||
38 | } | ||
39 | |||
40 | static int udp_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
41 | const struct ip_conntrack_tuple *orig) | ||
42 | { | ||
43 | tuple->src.u.udp.port = orig->dst.u.udp.port; | ||
44 | tuple->dst.u.udp.port = orig->src.u.udp.port; | ||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | /* Print out the per-protocol part of the tuple. */ | ||
49 | static int udp_print_tuple(struct seq_file *s, | ||
50 | const struct ip_conntrack_tuple *tuple) | ||
51 | { | ||
52 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
53 | ntohs(tuple->src.u.udp.port), | ||
54 | ntohs(tuple->dst.u.udp.port)); | ||
55 | } | ||
56 | |||
57 | /* Print out the private part of the conntrack. */ | ||
58 | static int udp_print_conntrack(struct seq_file *s, | ||
59 | const struct ip_conntrack *conntrack) | ||
60 | { | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | /* Returns verdict for packet, and may modify conntracktype */ | ||
65 | static int udp_packet(struct ip_conntrack *conntrack, | ||
66 | const struct sk_buff *skb, | ||
67 | enum ip_conntrack_info ctinfo) | ||
68 | { | ||
69 | /* If we've seen traffic both ways, this is some kind of UDP | ||
70 | stream. Extend timeout. */ | ||
71 | if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
72 | ip_ct_refresh_acct(conntrack, ctinfo, skb, | ||
73 | ip_ct_udp_timeout_stream); | ||
74 | /* Also, more likely to be important, and not a probe */ | ||
75 | if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
76 | ip_conntrack_event_cache(IPCT_STATUS, skb); | ||
77 | } else | ||
78 | ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); | ||
79 | |||
80 | return NF_ACCEPT; | ||
81 | } | ||
82 | |||
83 | /* Called when a new connection for this protocol found. */ | ||
84 | static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) | ||
85 | { | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | ||
90 | unsigned int hooknum) | ||
91 | { | ||
92 | struct iphdr *iph = skb->nh.iph; | ||
93 | unsigned int udplen = skb->len - iph->ihl * 4; | ||
94 | struct udphdr _hdr, *hdr; | ||
95 | |||
96 | /* Header is too small? */ | ||
97 | hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); | ||
98 | if (hdr == NULL) { | ||
99 | if (LOG_INVALID(IPPROTO_UDP)) | ||
100 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
101 | "ip_ct_udp: short packet "); | ||
102 | return -NF_ACCEPT; | ||
103 | } | ||
104 | |||
105 | /* Truncated/malformed packets */ | ||
106 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { | ||
107 | if (LOG_INVALID(IPPROTO_UDP)) | ||
108 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
109 | "ip_ct_udp: truncated/malformed packet "); | ||
110 | return -NF_ACCEPT; | ||
111 | } | ||
112 | |||
113 | /* Packet with no checksum */ | ||
114 | if (!hdr->check) | ||
115 | return NF_ACCEPT; | ||
116 | |||
117 | /* Checksum invalid? Ignore. | ||
118 | * We skip checking packets on the outgoing path | ||
119 | * because the checksum is assumed to be correct. | ||
120 | * FIXME: Source route IP option packets --RR */ | ||
121 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && | ||
122 | nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { | ||
123 | if (LOG_INVALID(IPPROTO_UDP)) | ||
124 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
125 | "ip_ct_udp: bad UDP checksum "); | ||
126 | return -NF_ACCEPT; | ||
127 | } | ||
128 | |||
129 | return NF_ACCEPT; | ||
130 | } | ||
131 | |||
132 | struct ip_conntrack_protocol ip_conntrack_protocol_udp = | ||
133 | { | ||
134 | .proto = IPPROTO_UDP, | ||
135 | .name = "udp", | ||
136 | .pkt_to_tuple = udp_pkt_to_tuple, | ||
137 | .invert_tuple = udp_invert_tuple, | ||
138 | .print_tuple = udp_print_tuple, | ||
139 | .print_conntrack = udp_print_conntrack, | ||
140 | .packet = udp_packet, | ||
141 | .new = udp_new, | ||
142 | .error = udp_error, | ||
143 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
144 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
145 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
146 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
147 | #endif | ||
148 | }; | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c deleted file mode 100644 index c59a962c1f61..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ /dev/null | |||
@@ -1,520 +0,0 @@ | |||
1 | /* SIP extension for IP connection tracking. | ||
2 | * | ||
3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | ||
4 | * based on RR's ip_conntrack_ftp.c and other modules. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/ctype.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/in.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/udp.h> | ||
17 | |||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/netfilter_ipv4.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
21 | #include <linux/netfilter_ipv4/ip_conntrack_sip.h> | ||
22 | |||
23 | #if 0 | ||
24 | #define DEBUGP printk | ||
25 | #else | ||
26 | #define DEBUGP(format, args...) | ||
27 | #endif | ||
28 | |||
29 | MODULE_LICENSE("GPL"); | ||
30 | MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); | ||
31 | MODULE_DESCRIPTION("SIP connection tracking helper"); | ||
32 | |||
33 | #define MAX_PORTS 8 | ||
34 | static unsigned short ports[MAX_PORTS]; | ||
35 | static int ports_c; | ||
36 | module_param_array(ports, ushort, &ports_c, 0400); | ||
37 | MODULE_PARM_DESC(ports, "port numbers of sip servers"); | ||
38 | |||
39 | static unsigned int sip_timeout = SIP_TIMEOUT; | ||
40 | module_param(sip_timeout, uint, 0600); | ||
41 | MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); | ||
42 | |||
43 | unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb, | ||
44 | enum ip_conntrack_info ctinfo, | ||
45 | struct ip_conntrack *ct, | ||
46 | const char **dptr); | ||
47 | EXPORT_SYMBOL_GPL(ip_nat_sip_hook); | ||
48 | |||
49 | unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb, | ||
50 | enum ip_conntrack_info ctinfo, | ||
51 | struct ip_conntrack_expect *exp, | ||
52 | const char *dptr); | ||
53 | EXPORT_SYMBOL_GPL(ip_nat_sdp_hook); | ||
54 | |||
55 | static int digits_len(const char *dptr, const char *limit, int *shift); | ||
56 | static int epaddr_len(const char *dptr, const char *limit, int *shift); | ||
57 | static int skp_digits_len(const char *dptr, const char *limit, int *shift); | ||
58 | static int skp_epaddr_len(const char *dptr, const char *limit, int *shift); | ||
59 | |||
60 | struct sip_header_nfo { | ||
61 | const char *lname; | ||
62 | const char *sname; | ||
63 | const char *ln_str; | ||
64 | size_t lnlen; | ||
65 | size_t snlen; | ||
66 | size_t ln_strlen; | ||
67 | int case_sensitive; | ||
68 | int (*match_len)(const char *, const char *, int *); | ||
69 | }; | ||
70 | |||
71 | static struct sip_header_nfo ct_sip_hdrs[] = { | ||
72 | [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */ | ||
73 | .lname = "sip:", | ||
74 | .lnlen = sizeof("sip:") - 1, | ||
75 | .ln_str = ":", | ||
76 | .ln_strlen = sizeof(":") - 1, | ||
77 | .match_len = epaddr_len | ||
78 | }, | ||
79 | [POS_REQ_URI] = { /* SIP request URI */ | ||
80 | .lname = "sip:", | ||
81 | .lnlen = sizeof("sip:") - 1, | ||
82 | .ln_str = "@", | ||
83 | .ln_strlen = sizeof("@") - 1, | ||
84 | .match_len = epaddr_len | ||
85 | }, | ||
86 | [POS_FROM] = { /* SIP From header */ | ||
87 | .lname = "From:", | ||
88 | .lnlen = sizeof("From:") - 1, | ||
89 | .sname = "\r\nf:", | ||
90 | .snlen = sizeof("\r\nf:") - 1, | ||
91 | .ln_str = "sip:", | ||
92 | .ln_strlen = sizeof("sip:") - 1, | ||
93 | .match_len = skp_epaddr_len, | ||
94 | }, | ||
95 | [POS_TO] = { /* SIP To header */ | ||
96 | .lname = "To:", | ||
97 | .lnlen = sizeof("To:") - 1, | ||
98 | .sname = "\r\nt:", | ||
99 | .snlen = sizeof("\r\nt:") - 1, | ||
100 | .ln_str = "sip:", | ||
101 | .ln_strlen = sizeof("sip:") - 1, | ||
102 | .match_len = skp_epaddr_len, | ||
103 | }, | ||
104 | [POS_VIA] = { /* SIP Via header */ | ||
105 | .lname = "Via:", | ||
106 | .lnlen = sizeof("Via:") - 1, | ||
107 | .sname = "\r\nv:", | ||
108 | .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */ | ||
109 | .ln_str = "UDP ", | ||
110 | .ln_strlen = sizeof("UDP ") - 1, | ||
111 | .match_len = epaddr_len, | ||
112 | }, | ||
113 | [POS_CONTACT] = { /* SIP Contact header */ | ||
114 | .lname = "Contact:", | ||
115 | .lnlen = sizeof("Contact:") - 1, | ||
116 | .sname = "\r\nm:", | ||
117 | .snlen = sizeof("\r\nm:") - 1, | ||
118 | .ln_str = "sip:", | ||
119 | .ln_strlen = sizeof("sip:") - 1, | ||
120 | .match_len = skp_epaddr_len | ||
121 | }, | ||
122 | [POS_CONTENT] = { /* SIP Content length header */ | ||
123 | .lname = "Content-Length:", | ||
124 | .lnlen = sizeof("Content-Length:") - 1, | ||
125 | .sname = "\r\nl:", | ||
126 | .snlen = sizeof("\r\nl:") - 1, | ||
127 | .ln_str = ":", | ||
128 | .ln_strlen = sizeof(":") - 1, | ||
129 | .match_len = skp_digits_len | ||
130 | }, | ||
131 | [POS_MEDIA] = { /* SDP media info */ | ||
132 | .case_sensitive = 1, | ||
133 | .lname = "\nm=", | ||
134 | .lnlen = sizeof("\nm=") - 1, | ||
135 | .sname = "\rm=", | ||
136 | .snlen = sizeof("\rm=") - 1, | ||
137 | .ln_str = "audio ", | ||
138 | .ln_strlen = sizeof("audio ") - 1, | ||
139 | .match_len = digits_len | ||
140 | }, | ||
141 | [POS_OWNER] = { /* SDP owner address*/ | ||
142 | .case_sensitive = 1, | ||
143 | .lname = "\no=", | ||
144 | .lnlen = sizeof("\no=") - 1, | ||
145 | .sname = "\ro=", | ||
146 | .snlen = sizeof("\ro=") - 1, | ||
147 | .ln_str = "IN IP4 ", | ||
148 | .ln_strlen = sizeof("IN IP4 ") - 1, | ||
149 | .match_len = epaddr_len | ||
150 | }, | ||
151 | [POS_CONNECTION] = { /* SDP connection info */ | ||
152 | .case_sensitive = 1, | ||
153 | .lname = "\nc=", | ||
154 | .lnlen = sizeof("\nc=") - 1, | ||
155 | .sname = "\rc=", | ||
156 | .snlen = sizeof("\rc=") - 1, | ||
157 | .ln_str = "IN IP4 ", | ||
158 | .ln_strlen = sizeof("IN IP4 ") - 1, | ||
159 | .match_len = epaddr_len | ||
160 | }, | ||
161 | [POS_SDP_HEADER] = { /* SDP version header */ | ||
162 | .case_sensitive = 1, | ||
163 | .lname = "\nv=", | ||
164 | .lnlen = sizeof("\nv=") - 1, | ||
165 | .sname = "\rv=", | ||
166 | .snlen = sizeof("\rv=") - 1, | ||
167 | .ln_str = "=", | ||
168 | .ln_strlen = sizeof("=") - 1, | ||
169 | .match_len = digits_len | ||
170 | } | ||
171 | }; | ||
172 | |||
173 | /* get line lenght until first CR or LF seen. */ | ||
174 | int ct_sip_lnlen(const char *line, const char *limit) | ||
175 | { | ||
176 | const char *k = line; | ||
177 | |||
178 | while ((line <= limit) && (*line == '\r' || *line == '\n')) | ||
179 | line++; | ||
180 | |||
181 | while (line <= limit) { | ||
182 | if (*line == '\r' || *line == '\n') | ||
183 | break; | ||
184 | line++; | ||
185 | } | ||
186 | return line - k; | ||
187 | } | ||
188 | EXPORT_SYMBOL_GPL(ct_sip_lnlen); | ||
189 | |||
190 | /* Linear string search, case sensitive. */ | ||
191 | const char *ct_sip_search(const char *needle, const char *haystack, | ||
192 | size_t needle_len, size_t haystack_len, | ||
193 | int case_sensitive) | ||
194 | { | ||
195 | const char *limit = haystack + (haystack_len - needle_len); | ||
196 | |||
197 | while (haystack <= limit) { | ||
198 | if (case_sensitive) { | ||
199 | if (strncmp(haystack, needle, needle_len) == 0) | ||
200 | return haystack; | ||
201 | } else { | ||
202 | if (strnicmp(haystack, needle, needle_len) == 0) | ||
203 | return haystack; | ||
204 | } | ||
205 | haystack++; | ||
206 | } | ||
207 | return NULL; | ||
208 | } | ||
209 | EXPORT_SYMBOL_GPL(ct_sip_search); | ||
210 | |||
211 | static int digits_len(const char *dptr, const char *limit, int *shift) | ||
212 | { | ||
213 | int len = 0; | ||
214 | while (dptr <= limit && isdigit(*dptr)) { | ||
215 | dptr++; | ||
216 | len++; | ||
217 | } | ||
218 | return len; | ||
219 | } | ||
220 | |||
221 | /* get digits lenght, skiping blank spaces. */ | ||
222 | static int skp_digits_len(const char *dptr, const char *limit, int *shift) | ||
223 | { | ||
224 | for (; dptr <= limit && *dptr == ' '; dptr++) | ||
225 | (*shift)++; | ||
226 | |||
227 | return digits_len(dptr, limit, shift); | ||
228 | } | ||
229 | |||
230 | /* Simple ipaddr parser.. */ | ||
231 | static int parse_ipaddr(const char *cp, const char **endp, | ||
232 | __be32 *ipaddr, const char *limit) | ||
233 | { | ||
234 | unsigned long int val; | ||
235 | int i, digit = 0; | ||
236 | |||
237 | for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) { | ||
238 | digit = 0; | ||
239 | if (!isdigit(*cp)) | ||
240 | break; | ||
241 | |||
242 | val = simple_strtoul(cp, (char **)&cp, 10); | ||
243 | if (val > 0xFF) | ||
244 | return -1; | ||
245 | |||
246 | ((u_int8_t *)ipaddr)[i] = val; | ||
247 | digit = 1; | ||
248 | |||
249 | if (*cp != '.') | ||
250 | break; | ||
251 | cp++; | ||
252 | } | ||
253 | if (!digit) | ||
254 | return -1; | ||
255 | |||
256 | if (endp) | ||
257 | *endp = cp; | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | /* skip ip address. returns it lenght. */ | ||
263 | static int epaddr_len(const char *dptr, const char *limit, int *shift) | ||
264 | { | ||
265 | const char *aux = dptr; | ||
266 | __be32 ip; | ||
267 | |||
268 | if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) { | ||
269 | DEBUGP("ip: %s parse failed.!\n", dptr); | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | /* Port number */ | ||
274 | if (*dptr == ':') { | ||
275 | dptr++; | ||
276 | dptr += digits_len(dptr, limit, shift); | ||
277 | } | ||
278 | return dptr - aux; | ||
279 | } | ||
280 | |||
281 | /* get address length, skiping user info. */ | ||
282 | static int skp_epaddr_len(const char *dptr, const char *limit, int *shift) | ||
283 | { | ||
284 | int s = *shift; | ||
285 | |||
286 | /* Search for @, but stop at the end of the line. | ||
287 | * We are inside a sip: URI, so we don't need to worry about | ||
288 | * continuation lines. */ | ||
289 | while (dptr <= limit && | ||
290 | *dptr != '@' && *dptr != '\r' && *dptr != '\n') { | ||
291 | (*shift)++; | ||
292 | dptr++; | ||
293 | } | ||
294 | |||
295 | if (dptr <= limit && *dptr == '@') { | ||
296 | dptr++; | ||
297 | (*shift)++; | ||
298 | } else | ||
299 | *shift = s; | ||
300 | |||
301 | return epaddr_len(dptr, limit, shift); | ||
302 | } | ||
303 | |||
304 | /* Returns 0 if not found, -1 error parsing. */ | ||
305 | int ct_sip_get_info(const char *dptr, size_t dlen, | ||
306 | unsigned int *matchoff, | ||
307 | unsigned int *matchlen, | ||
308 | enum sip_header_pos pos) | ||
309 | { | ||
310 | struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos]; | ||
311 | const char *limit, *aux, *k = dptr; | ||
312 | int shift = 0; | ||
313 | |||
314 | limit = dptr + (dlen - hnfo->lnlen); | ||
315 | |||
316 | while (dptr <= limit) { | ||
317 | if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && | ||
318 | (hnfo->sname == NULL || | ||
319 | strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { | ||
320 | dptr++; | ||
321 | continue; | ||
322 | } | ||
323 | aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen, | ||
324 | ct_sip_lnlen(dptr, limit), | ||
325 | hnfo->case_sensitive); | ||
326 | if (!aux) { | ||
327 | DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, | ||
328 | hnfo->lname); | ||
329 | return -1; | ||
330 | } | ||
331 | aux += hnfo->ln_strlen; | ||
332 | |||
333 | *matchlen = hnfo->match_len(aux, limit, &shift); | ||
334 | if (!*matchlen) | ||
335 | return -1; | ||
336 | |||
337 | *matchoff = (aux - k) + shift; | ||
338 | |||
339 | DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname, | ||
340 | *matchlen); | ||
341 | return 1; | ||
342 | } | ||
343 | DEBUGP("%s header not found.\n", hnfo->lname); | ||
344 | return 0; | ||
345 | } | ||
346 | EXPORT_SYMBOL_GPL(ct_sip_get_info); | ||
347 | |||
348 | static int set_expected_rtp(struct sk_buff **pskb, | ||
349 | struct ip_conntrack *ct, | ||
350 | enum ip_conntrack_info ctinfo, | ||
351 | __be32 ipaddr, u_int16_t port, | ||
352 | const char *dptr) | ||
353 | { | ||
354 | struct ip_conntrack_expect *exp; | ||
355 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
356 | int ret; | ||
357 | typeof(ip_nat_sdp_hook) ip_nat_sdp; | ||
358 | |||
359 | exp = ip_conntrack_expect_alloc(ct); | ||
360 | if (exp == NULL) | ||
361 | return NF_DROP; | ||
362 | |||
363 | exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; | ||
364 | exp->tuple.src.u.udp.port = 0; | ||
365 | exp->tuple.dst.ip = ipaddr; | ||
366 | exp->tuple.dst.u.udp.port = htons(port); | ||
367 | exp->tuple.dst.protonum = IPPROTO_UDP; | ||
368 | |||
369 | exp->mask.src.ip = htonl(0xFFFFFFFF); | ||
370 | exp->mask.src.u.udp.port = 0; | ||
371 | exp->mask.dst.ip = htonl(0xFFFFFFFF); | ||
372 | exp->mask.dst.u.udp.port = htons(0xFFFF); | ||
373 | exp->mask.dst.protonum = 0xFF; | ||
374 | |||
375 | exp->expectfn = NULL; | ||
376 | exp->flags = 0; | ||
377 | |||
378 | ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook); | ||
379 | if (ip_nat_sdp) | ||
380 | ret = ip_nat_sdp(pskb, ctinfo, exp, dptr); | ||
381 | else { | ||
382 | if (ip_conntrack_expect_related(exp) != 0) | ||
383 | ret = NF_DROP; | ||
384 | else | ||
385 | ret = NF_ACCEPT; | ||
386 | } | ||
387 | ip_conntrack_expect_put(exp); | ||
388 | |||
389 | return ret; | ||
390 | } | ||
391 | |||
392 | static int sip_help(struct sk_buff **pskb, | ||
393 | struct ip_conntrack *ct, | ||
394 | enum ip_conntrack_info ctinfo) | ||
395 | { | ||
396 | unsigned int dataoff, datalen; | ||
397 | const char *dptr; | ||
398 | int ret = NF_ACCEPT; | ||
399 | int matchoff, matchlen; | ||
400 | __be32 ipaddr; | ||
401 | u_int16_t port; | ||
402 | typeof(ip_nat_sip_hook) ip_nat_sip; | ||
403 | |||
404 | /* No Data ? */ | ||
405 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
406 | if (dataoff >= (*pskb)->len) { | ||
407 | DEBUGP("skb->len = %u\n", (*pskb)->len); | ||
408 | return NF_ACCEPT; | ||
409 | } | ||
410 | |||
411 | ip_ct_refresh(ct, *pskb, sip_timeout * HZ); | ||
412 | |||
413 | if (!skb_is_nonlinear(*pskb)) | ||
414 | dptr = (*pskb)->data + dataoff; | ||
415 | else { | ||
416 | DEBUGP("Copy of skbuff not supported yet.\n"); | ||
417 | goto out; | ||
418 | } | ||
419 | |||
420 | ip_nat_sip = rcu_dereference(ip_nat_sip_hook); | ||
421 | if (ip_nat_sip) { | ||
422 | if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) { | ||
423 | ret = NF_DROP; | ||
424 | goto out; | ||
425 | } | ||
426 | } | ||
427 | |||
428 | /* After this point NAT, could have mangled skb, so | ||
429 | we need to recalculate payload lenght. */ | ||
430 | datalen = (*pskb)->len - dataoff; | ||
431 | |||
432 | if (datalen < (sizeof("SIP/2.0 200") - 1)) | ||
433 | goto out; | ||
434 | |||
435 | /* RTP info only in some SDP pkts */ | ||
436 | if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 && | ||
437 | memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) { | ||
438 | goto out; | ||
439 | } | ||
440 | /* Get ip and port address from SDP packet. */ | ||
441 | if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, | ||
442 | POS_CONNECTION) > 0) { | ||
443 | |||
444 | /* We'll drop only if there are parse problems. */ | ||
445 | if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr, | ||
446 | dptr + datalen) < 0) { | ||
447 | ret = NF_DROP; | ||
448 | goto out; | ||
449 | } | ||
450 | if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, | ||
451 | POS_MEDIA) > 0) { | ||
452 | |||
453 | port = simple_strtoul(dptr + matchoff, NULL, 10); | ||
454 | if (port < 1024) { | ||
455 | ret = NF_DROP; | ||
456 | goto out; | ||
457 | } | ||
458 | ret = set_expected_rtp(pskb, ct, ctinfo, | ||
459 | ipaddr, port, dptr); | ||
460 | } | ||
461 | } | ||
462 | out: | ||
463 | return ret; | ||
464 | } | ||
465 | |||
466 | static struct ip_conntrack_helper sip[MAX_PORTS]; | ||
467 | static char sip_names[MAX_PORTS][10]; | ||
468 | |||
469 | static void fini(void) | ||
470 | { | ||
471 | int i; | ||
472 | for (i = 0; i < ports_c; i++) { | ||
473 | DEBUGP("unregistering helper for port %d\n", ports[i]); | ||
474 | ip_conntrack_helper_unregister(&sip[i]); | ||
475 | } | ||
476 | } | ||
477 | |||
478 | static int __init init(void) | ||
479 | { | ||
480 | int i, ret; | ||
481 | char *tmpname; | ||
482 | |||
483 | if (ports_c == 0) | ||
484 | ports[ports_c++] = SIP_PORT; | ||
485 | |||
486 | for (i = 0; i < ports_c; i++) { | ||
487 | /* Create helper structure */ | ||
488 | memset(&sip[i], 0, sizeof(struct ip_conntrack_helper)); | ||
489 | |||
490 | sip[i].tuple.dst.protonum = IPPROTO_UDP; | ||
491 | sip[i].tuple.src.u.udp.port = htons(ports[i]); | ||
492 | sip[i].mask.src.u.udp.port = htons(0xFFFF); | ||
493 | sip[i].mask.dst.protonum = 0xFF; | ||
494 | sip[i].max_expected = 2; | ||
495 | sip[i].timeout = 3 * 60; /* 3 minutes */ | ||
496 | sip[i].me = THIS_MODULE; | ||
497 | sip[i].help = sip_help; | ||
498 | |||
499 | tmpname = &sip_names[i][0]; | ||
500 | if (ports[i] == SIP_PORT) | ||
501 | sprintf(tmpname, "sip"); | ||
502 | else | ||
503 | sprintf(tmpname, "sip-%d", i); | ||
504 | sip[i].name = tmpname; | ||
505 | |||
506 | DEBUGP("port #%d: %d\n", i, ports[i]); | ||
507 | |||
508 | ret = ip_conntrack_helper_register(&sip[i]); | ||
509 | if (ret) { | ||
510 | printk("ERROR registering helper for port %d\n", | ||
511 | ports[i]); | ||
512 | fini(); | ||
513 | return ret; | ||
514 | } | ||
515 | } | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | module_init(init); | ||
520 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c deleted file mode 100644 index 56b2f7546d1e..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ /dev/null | |||
@@ -1,962 +0,0 @@ | |||
1 | /* This file contains all the functions required for the standalone | ||
2 | ip_conntrack module. | ||
3 | |||
4 | These are not required by the compatibility layer. | ||
5 | */ | ||
6 | |||
7 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
8 | * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | #include <linux/types.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/skbuff.h> | ||
21 | #include <linux/proc_fs.h> | ||
22 | #include <linux/seq_file.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #ifdef CONFIG_SYSCTL | ||
25 | #include <linux/sysctl.h> | ||
26 | #endif | ||
27 | #include <net/checksum.h> | ||
28 | #include <net/ip.h> | ||
29 | #include <net/route.h> | ||
30 | |||
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
33 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
34 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
35 | |||
36 | #if 0 | ||
37 | #define DEBUGP printk | ||
38 | #else | ||
39 | #define DEBUGP(format, args...) | ||
40 | #endif | ||
41 | |||
42 | MODULE_LICENSE("GPL"); | ||
43 | |||
44 | extern atomic_t ip_conntrack_count; | ||
45 | DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); | ||
46 | |||
47 | static int kill_proto(struct ip_conntrack *i, void *data) | ||
48 | { | ||
49 | return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == | ||
50 | *((u_int8_t *) data)); | ||
51 | } | ||
52 | |||
53 | #ifdef CONFIG_PROC_FS | ||
54 | static int | ||
55 | print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple, | ||
56 | struct ip_conntrack_protocol *proto) | ||
57 | { | ||
58 | seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", | ||
59 | NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); | ||
60 | return proto->print_tuple(s, tuple); | ||
61 | } | ||
62 | |||
63 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
64 | static unsigned int | ||
65 | seq_print_counters(struct seq_file *s, | ||
66 | const struct ip_conntrack_counter *counter) | ||
67 | { | ||
68 | return seq_printf(s, "packets=%llu bytes=%llu ", | ||
69 | (unsigned long long)counter->packets, | ||
70 | (unsigned long long)counter->bytes); | ||
71 | } | ||
72 | #else | ||
73 | #define seq_print_counters(x, y) 0 | ||
74 | #endif | ||
75 | |||
76 | struct ct_iter_state { | ||
77 | unsigned int bucket; | ||
78 | }; | ||
79 | |||
80 | static struct list_head *ct_get_first(struct seq_file *seq) | ||
81 | { | ||
82 | struct ct_iter_state *st = seq->private; | ||
83 | |||
84 | for (st->bucket = 0; | ||
85 | st->bucket < ip_conntrack_htable_size; | ||
86 | st->bucket++) { | ||
87 | if (!list_empty(&ip_conntrack_hash[st->bucket])) | ||
88 | return ip_conntrack_hash[st->bucket].next; | ||
89 | } | ||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) | ||
94 | { | ||
95 | struct ct_iter_state *st = seq->private; | ||
96 | |||
97 | head = head->next; | ||
98 | while (head == &ip_conntrack_hash[st->bucket]) { | ||
99 | if (++st->bucket >= ip_conntrack_htable_size) | ||
100 | return NULL; | ||
101 | head = ip_conntrack_hash[st->bucket].next; | ||
102 | } | ||
103 | return head; | ||
104 | } | ||
105 | |||
106 | static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) | ||
107 | { | ||
108 | struct list_head *head = ct_get_first(seq); | ||
109 | |||
110 | if (head) | ||
111 | while (pos && (head = ct_get_next(seq, head))) | ||
112 | pos--; | ||
113 | return pos ? NULL : head; | ||
114 | } | ||
115 | |||
116 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | ||
117 | { | ||
118 | read_lock_bh(&ip_conntrack_lock); | ||
119 | return ct_get_idx(seq, *pos); | ||
120 | } | ||
121 | |||
122 | static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
123 | { | ||
124 | (*pos)++; | ||
125 | return ct_get_next(s, v); | ||
126 | } | ||
127 | |||
128 | static void ct_seq_stop(struct seq_file *s, void *v) | ||
129 | { | ||
130 | read_unlock_bh(&ip_conntrack_lock); | ||
131 | } | ||
132 | |||
133 | static int ct_seq_show(struct seq_file *s, void *v) | ||
134 | { | ||
135 | const struct ip_conntrack_tuple_hash *hash = v; | ||
136 | const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); | ||
137 | struct ip_conntrack_protocol *proto; | ||
138 | |||
139 | IP_NF_ASSERT(conntrack); | ||
140 | |||
141 | /* we only want to print DIR_ORIGINAL */ | ||
142 | if (DIRECTION(hash)) | ||
143 | return 0; | ||
144 | |||
145 | proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | ||
146 | IP_NF_ASSERT(proto); | ||
147 | |||
148 | if (seq_printf(s, "%-8s %u %ld ", | ||
149 | proto->name, | ||
150 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, | ||
151 | timer_pending(&conntrack->timeout) | ||
152 | ? (long)(conntrack->timeout.expires - jiffies)/HZ | ||
153 | : 0) != 0) | ||
154 | return -ENOSPC; | ||
155 | |||
156 | if (proto->print_conntrack(s, conntrack)) | ||
157 | return -ENOSPC; | ||
158 | |||
159 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
160 | proto)) | ||
161 | return -ENOSPC; | ||
162 | |||
163 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) | ||
164 | return -ENOSPC; | ||
165 | |||
166 | if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) | ||
167 | if (seq_printf(s, "[UNREPLIED] ")) | ||
168 | return -ENOSPC; | ||
169 | |||
170 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, | ||
171 | proto)) | ||
172 | return -ENOSPC; | ||
173 | |||
174 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) | ||
175 | return -ENOSPC; | ||
176 | |||
177 | if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
178 | if (seq_printf(s, "[ASSURED] ")) | ||
179 | return -ENOSPC; | ||
180 | |||
181 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | ||
182 | if (seq_printf(s, "mark=%u ", conntrack->mark)) | ||
183 | return -ENOSPC; | ||
184 | #endif | ||
185 | |||
186 | #ifdef CONFIG_IP_NF_CONNTRACK_SECMARK | ||
187 | if (seq_printf(s, "secmark=%u ", conntrack->secmark)) | ||
188 | return -ENOSPC; | ||
189 | #endif | ||
190 | |||
191 | if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) | ||
192 | return -ENOSPC; | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static struct seq_operations ct_seq_ops = { | ||
198 | .start = ct_seq_start, | ||
199 | .next = ct_seq_next, | ||
200 | .stop = ct_seq_stop, | ||
201 | .show = ct_seq_show | ||
202 | }; | ||
203 | |||
204 | static int ct_open(struct inode *inode, struct file *file) | ||
205 | { | ||
206 | struct seq_file *seq; | ||
207 | struct ct_iter_state *st; | ||
208 | int ret; | ||
209 | |||
210 | st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); | ||
211 | if (st == NULL) | ||
212 | return -ENOMEM; | ||
213 | ret = seq_open(file, &ct_seq_ops); | ||
214 | if (ret) | ||
215 | goto out_free; | ||
216 | seq = file->private_data; | ||
217 | seq->private = st; | ||
218 | memset(st, 0, sizeof(struct ct_iter_state)); | ||
219 | return ret; | ||
220 | out_free: | ||
221 | kfree(st); | ||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | static const struct file_operations ct_file_ops = { | ||
226 | .owner = THIS_MODULE, | ||
227 | .open = ct_open, | ||
228 | .read = seq_read, | ||
229 | .llseek = seq_lseek, | ||
230 | .release = seq_release_private, | ||
231 | }; | ||
232 | |||
233 | /* expects */ | ||
234 | static void *exp_seq_start(struct seq_file *s, loff_t *pos) | ||
235 | { | ||
236 | struct list_head *e = &ip_conntrack_expect_list; | ||
237 | loff_t i; | ||
238 | |||
239 | /* strange seq_file api calls stop even if we fail, | ||
240 | * thus we need to grab lock since stop unlocks */ | ||
241 | read_lock_bh(&ip_conntrack_lock); | ||
242 | |||
243 | if (list_empty(e)) | ||
244 | return NULL; | ||
245 | |||
246 | for (i = 0; i <= *pos; i++) { | ||
247 | e = e->next; | ||
248 | if (e == &ip_conntrack_expect_list) | ||
249 | return NULL; | ||
250 | } | ||
251 | return e; | ||
252 | } | ||
253 | |||
254 | static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
255 | { | ||
256 | struct list_head *e = v; | ||
257 | |||
258 | ++*pos; | ||
259 | e = e->next; | ||
260 | |||
261 | if (e == &ip_conntrack_expect_list) | ||
262 | return NULL; | ||
263 | |||
264 | return e; | ||
265 | } | ||
266 | |||
267 | static void exp_seq_stop(struct seq_file *s, void *v) | ||
268 | { | ||
269 | read_unlock_bh(&ip_conntrack_lock); | ||
270 | } | ||
271 | |||
272 | static int exp_seq_show(struct seq_file *s, void *v) | ||
273 | { | ||
274 | struct ip_conntrack_expect *expect = v; | ||
275 | |||
276 | if (expect->timeout.function) | ||
277 | seq_printf(s, "%ld ", timer_pending(&expect->timeout) | ||
278 | ? (long)(expect->timeout.expires - jiffies)/HZ : 0); | ||
279 | else | ||
280 | seq_printf(s, "- "); | ||
281 | |||
282 | seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); | ||
283 | |||
284 | print_tuple(s, &expect->tuple, | ||
285 | __ip_conntrack_proto_find(expect->tuple.dst.protonum)); | ||
286 | return seq_putc(s, '\n'); | ||
287 | } | ||
288 | |||
289 | static struct seq_operations exp_seq_ops = { | ||
290 | .start = exp_seq_start, | ||
291 | .next = exp_seq_next, | ||
292 | .stop = exp_seq_stop, | ||
293 | .show = exp_seq_show | ||
294 | }; | ||
295 | |||
296 | static int exp_open(struct inode *inode, struct file *file) | ||
297 | { | ||
298 | return seq_open(file, &exp_seq_ops); | ||
299 | } | ||
300 | |||
301 | static const struct file_operations exp_file_ops = { | ||
302 | .owner = THIS_MODULE, | ||
303 | .open = exp_open, | ||
304 | .read = seq_read, | ||
305 | .llseek = seq_lseek, | ||
306 | .release = seq_release | ||
307 | }; | ||
308 | |||
309 | static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) | ||
310 | { | ||
311 | int cpu; | ||
312 | |||
313 | if (*pos == 0) | ||
314 | return SEQ_START_TOKEN; | ||
315 | |||
316 | for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { | ||
317 | if (!cpu_possible(cpu)) | ||
318 | continue; | ||
319 | *pos = cpu+1; | ||
320 | return &per_cpu(ip_conntrack_stat, cpu); | ||
321 | } | ||
322 | |||
323 | return NULL; | ||
324 | } | ||
325 | |||
326 | static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
327 | { | ||
328 | int cpu; | ||
329 | |||
330 | for (cpu = *pos; cpu < NR_CPUS; ++cpu) { | ||
331 | if (!cpu_possible(cpu)) | ||
332 | continue; | ||
333 | *pos = cpu+1; | ||
334 | return &per_cpu(ip_conntrack_stat, cpu); | ||
335 | } | ||
336 | |||
337 | return NULL; | ||
338 | } | ||
339 | |||
340 | static void ct_cpu_seq_stop(struct seq_file *seq, void *v) | ||
341 | { | ||
342 | } | ||
343 | |||
344 | static int ct_cpu_seq_show(struct seq_file *seq, void *v) | ||
345 | { | ||
346 | unsigned int nr_conntracks = atomic_read(&ip_conntrack_count); | ||
347 | struct ip_conntrack_stat *st = v; | ||
348 | |||
349 | if (v == SEQ_START_TOKEN) { | ||
350 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " | ||
355 | "%08x %08x %08x %08x %08x %08x %08x %08x \n", | ||
356 | nr_conntracks, | ||
357 | st->searched, | ||
358 | st->found, | ||
359 | st->new, | ||
360 | st->invalid, | ||
361 | st->ignore, | ||
362 | st->delete, | ||
363 | st->delete_list, | ||
364 | st->insert, | ||
365 | st->insert_failed, | ||
366 | st->drop, | ||
367 | st->early_drop, | ||
368 | st->error, | ||
369 | |||
370 | st->expect_new, | ||
371 | st->expect_create, | ||
372 | st->expect_delete | ||
373 | ); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static struct seq_operations ct_cpu_seq_ops = { | ||
378 | .start = ct_cpu_seq_start, | ||
379 | .next = ct_cpu_seq_next, | ||
380 | .stop = ct_cpu_seq_stop, | ||
381 | .show = ct_cpu_seq_show, | ||
382 | }; | ||
383 | |||
384 | static int ct_cpu_seq_open(struct inode *inode, struct file *file) | ||
385 | { | ||
386 | return seq_open(file, &ct_cpu_seq_ops); | ||
387 | } | ||
388 | |||
389 | static const struct file_operations ct_cpu_seq_fops = { | ||
390 | .owner = THIS_MODULE, | ||
391 | .open = ct_cpu_seq_open, | ||
392 | .read = seq_read, | ||
393 | .llseek = seq_lseek, | ||
394 | .release = seq_release_private, | ||
395 | }; | ||
396 | #endif | ||
397 | |||
398 | static unsigned int ip_confirm(unsigned int hooknum, | ||
399 | struct sk_buff **pskb, | ||
400 | const struct net_device *in, | ||
401 | const struct net_device *out, | ||
402 | int (*okfn)(struct sk_buff *)) | ||
403 | { | ||
404 | /* We've seen it coming out the other side: confirm it */ | ||
405 | return ip_conntrack_confirm(pskb); | ||
406 | } | ||
407 | |||
408 | static unsigned int ip_conntrack_help(unsigned int hooknum, | ||
409 | struct sk_buff **pskb, | ||
410 | const struct net_device *in, | ||
411 | const struct net_device *out, | ||
412 | int (*okfn)(struct sk_buff *)) | ||
413 | { | ||
414 | struct ip_conntrack *ct; | ||
415 | enum ip_conntrack_info ctinfo; | ||
416 | |||
417 | /* This is where we call the helper: as the packet goes out. */ | ||
418 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
419 | if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) { | ||
420 | unsigned int ret; | ||
421 | ret = ct->helper->help(pskb, ct, ctinfo); | ||
422 | if (ret != NF_ACCEPT) | ||
423 | return ret; | ||
424 | } | ||
425 | return NF_ACCEPT; | ||
426 | } | ||
427 | |||
428 | static unsigned int ip_conntrack_defrag(unsigned int hooknum, | ||
429 | struct sk_buff **pskb, | ||
430 | const struct net_device *in, | ||
431 | const struct net_device *out, | ||
432 | int (*okfn)(struct sk_buff *)) | ||
433 | { | ||
434 | #if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) | ||
435 | /* Previously seen (loopback)? Ignore. Do this before | ||
436 | fragment check. */ | ||
437 | if ((*pskb)->nfct) | ||
438 | return NF_ACCEPT; | ||
439 | #endif | ||
440 | |||
441 | /* Gather fragments. */ | ||
442 | if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | ||
443 | *pskb = ip_ct_gather_frags(*pskb, | ||
444 | hooknum == NF_IP_PRE_ROUTING ? | ||
445 | IP_DEFRAG_CONNTRACK_IN : | ||
446 | IP_DEFRAG_CONNTRACK_OUT); | ||
447 | if (!*pskb) | ||
448 | return NF_STOLEN; | ||
449 | } | ||
450 | return NF_ACCEPT; | ||
451 | } | ||
452 | |||
453 | static unsigned int ip_conntrack_local(unsigned int hooknum, | ||
454 | struct sk_buff **pskb, | ||
455 | const struct net_device *in, | ||
456 | const struct net_device *out, | ||
457 | int (*okfn)(struct sk_buff *)) | ||
458 | { | ||
459 | /* root is playing with raw sockets. */ | ||
460 | if ((*pskb)->len < sizeof(struct iphdr) | ||
461 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | ||
462 | if (net_ratelimit()) | ||
463 | printk("ipt_hook: happy cracking.\n"); | ||
464 | return NF_ACCEPT; | ||
465 | } | ||
466 | return ip_conntrack_in(hooknum, pskb, in, out, okfn); | ||
467 | } | ||
468 | |||
469 | /* Connection tracking may drop packets, but never alters them, so | ||
470 | make it the first hook. */ | ||
471 | static struct nf_hook_ops ip_conntrack_ops[] = { | ||
472 | { | ||
473 | .hook = ip_conntrack_defrag, | ||
474 | .owner = THIS_MODULE, | ||
475 | .pf = PF_INET, | ||
476 | .hooknum = NF_IP_PRE_ROUTING, | ||
477 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
478 | }, | ||
479 | { | ||
480 | .hook = ip_conntrack_in, | ||
481 | .owner = THIS_MODULE, | ||
482 | .pf = PF_INET, | ||
483 | .hooknum = NF_IP_PRE_ROUTING, | ||
484 | .priority = NF_IP_PRI_CONNTRACK, | ||
485 | }, | ||
486 | { | ||
487 | .hook = ip_conntrack_defrag, | ||
488 | .owner = THIS_MODULE, | ||
489 | .pf = PF_INET, | ||
490 | .hooknum = NF_IP_LOCAL_OUT, | ||
491 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
492 | }, | ||
493 | { | ||
494 | .hook = ip_conntrack_local, | ||
495 | .owner = THIS_MODULE, | ||
496 | .pf = PF_INET, | ||
497 | .hooknum = NF_IP_LOCAL_OUT, | ||
498 | .priority = NF_IP_PRI_CONNTRACK, | ||
499 | }, | ||
500 | { | ||
501 | .hook = ip_conntrack_help, | ||
502 | .owner = THIS_MODULE, | ||
503 | .pf = PF_INET, | ||
504 | .hooknum = NF_IP_POST_ROUTING, | ||
505 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
506 | }, | ||
507 | { | ||
508 | .hook = ip_conntrack_help, | ||
509 | .owner = THIS_MODULE, | ||
510 | .pf = PF_INET, | ||
511 | .hooknum = NF_IP_LOCAL_IN, | ||
512 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
513 | }, | ||
514 | { | ||
515 | .hook = ip_confirm, | ||
516 | .owner = THIS_MODULE, | ||
517 | .pf = PF_INET, | ||
518 | .hooknum = NF_IP_POST_ROUTING, | ||
519 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
520 | }, | ||
521 | { | ||
522 | .hook = ip_confirm, | ||
523 | .owner = THIS_MODULE, | ||
524 | .pf = PF_INET, | ||
525 | .hooknum = NF_IP_LOCAL_IN, | ||
526 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
527 | }, | ||
528 | }; | ||
529 | |||
530 | /* Sysctl support */ | ||
531 | |||
532 | int ip_conntrack_checksum __read_mostly = 1; | ||
533 | |||
534 | #ifdef CONFIG_SYSCTL | ||
535 | |||
536 | /* From ip_conntrack_core.c */ | ||
537 | extern int ip_conntrack_max; | ||
538 | extern unsigned int ip_conntrack_htable_size; | ||
539 | |||
540 | /* From ip_conntrack_proto_tcp.c */ | ||
541 | extern unsigned int ip_ct_tcp_timeout_syn_sent; | ||
542 | extern unsigned int ip_ct_tcp_timeout_syn_recv; | ||
543 | extern unsigned int ip_ct_tcp_timeout_established; | ||
544 | extern unsigned int ip_ct_tcp_timeout_fin_wait; | ||
545 | extern unsigned int ip_ct_tcp_timeout_close_wait; | ||
546 | extern unsigned int ip_ct_tcp_timeout_last_ack; | ||
547 | extern unsigned int ip_ct_tcp_timeout_time_wait; | ||
548 | extern unsigned int ip_ct_tcp_timeout_close; | ||
549 | extern unsigned int ip_ct_tcp_timeout_max_retrans; | ||
550 | extern int ip_ct_tcp_loose; | ||
551 | extern int ip_ct_tcp_be_liberal; | ||
552 | extern int ip_ct_tcp_max_retrans; | ||
553 | |||
554 | /* From ip_conntrack_proto_udp.c */ | ||
555 | extern unsigned int ip_ct_udp_timeout; | ||
556 | extern unsigned int ip_ct_udp_timeout_stream; | ||
557 | |||
558 | /* From ip_conntrack_proto_icmp.c */ | ||
559 | extern unsigned int ip_ct_icmp_timeout; | ||
560 | |||
561 | /* From ip_conntrack_proto_generic.c */ | ||
562 | extern unsigned int ip_ct_generic_timeout; | ||
563 | |||
564 | /* Log invalid packets of a given protocol */ | ||
565 | static int log_invalid_proto_min = 0; | ||
566 | static int log_invalid_proto_max = 255; | ||
567 | |||
568 | static struct ctl_table_header *ip_ct_sysctl_header; | ||
569 | |||
570 | static ctl_table ip_ct_sysctl_table[] = { | ||
571 | { | ||
572 | .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, | ||
573 | .procname = "ip_conntrack_max", | ||
574 | .data = &ip_conntrack_max, | ||
575 | .maxlen = sizeof(int), | ||
576 | .mode = 0644, | ||
577 | .proc_handler = &proc_dointvec, | ||
578 | }, | ||
579 | { | ||
580 | .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, | ||
581 | .procname = "ip_conntrack_count", | ||
582 | .data = &ip_conntrack_count, | ||
583 | .maxlen = sizeof(int), | ||
584 | .mode = 0444, | ||
585 | .proc_handler = &proc_dointvec, | ||
586 | }, | ||
587 | { | ||
588 | .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, | ||
589 | .procname = "ip_conntrack_buckets", | ||
590 | .data = &ip_conntrack_htable_size, | ||
591 | .maxlen = sizeof(unsigned int), | ||
592 | .mode = 0444, | ||
593 | .proc_handler = &proc_dointvec, | ||
594 | }, | ||
595 | { | ||
596 | .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, | ||
597 | .procname = "ip_conntrack_checksum", | ||
598 | .data = &ip_conntrack_checksum, | ||
599 | .maxlen = sizeof(int), | ||
600 | .mode = 0644, | ||
601 | .proc_handler = &proc_dointvec, | ||
602 | }, | ||
603 | { | ||
604 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, | ||
605 | .procname = "ip_conntrack_tcp_timeout_syn_sent", | ||
606 | .data = &ip_ct_tcp_timeout_syn_sent, | ||
607 | .maxlen = sizeof(unsigned int), | ||
608 | .mode = 0644, | ||
609 | .proc_handler = &proc_dointvec_jiffies, | ||
610 | }, | ||
611 | { | ||
612 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, | ||
613 | .procname = "ip_conntrack_tcp_timeout_syn_recv", | ||
614 | .data = &ip_ct_tcp_timeout_syn_recv, | ||
615 | .maxlen = sizeof(unsigned int), | ||
616 | .mode = 0644, | ||
617 | .proc_handler = &proc_dointvec_jiffies, | ||
618 | }, | ||
619 | { | ||
620 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, | ||
621 | .procname = "ip_conntrack_tcp_timeout_established", | ||
622 | .data = &ip_ct_tcp_timeout_established, | ||
623 | .maxlen = sizeof(unsigned int), | ||
624 | .mode = 0644, | ||
625 | .proc_handler = &proc_dointvec_jiffies, | ||
626 | }, | ||
627 | { | ||
628 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, | ||
629 | .procname = "ip_conntrack_tcp_timeout_fin_wait", | ||
630 | .data = &ip_ct_tcp_timeout_fin_wait, | ||
631 | .maxlen = sizeof(unsigned int), | ||
632 | .mode = 0644, | ||
633 | .proc_handler = &proc_dointvec_jiffies, | ||
634 | }, | ||
635 | { | ||
636 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, | ||
637 | .procname = "ip_conntrack_tcp_timeout_close_wait", | ||
638 | .data = &ip_ct_tcp_timeout_close_wait, | ||
639 | .maxlen = sizeof(unsigned int), | ||
640 | .mode = 0644, | ||
641 | .proc_handler = &proc_dointvec_jiffies, | ||
642 | }, | ||
643 | { | ||
644 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, | ||
645 | .procname = "ip_conntrack_tcp_timeout_last_ack", | ||
646 | .data = &ip_ct_tcp_timeout_last_ack, | ||
647 | .maxlen = sizeof(unsigned int), | ||
648 | .mode = 0644, | ||
649 | .proc_handler = &proc_dointvec_jiffies, | ||
650 | }, | ||
651 | { | ||
652 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, | ||
653 | .procname = "ip_conntrack_tcp_timeout_time_wait", | ||
654 | .data = &ip_ct_tcp_timeout_time_wait, | ||
655 | .maxlen = sizeof(unsigned int), | ||
656 | .mode = 0644, | ||
657 | .proc_handler = &proc_dointvec_jiffies, | ||
658 | }, | ||
659 | { | ||
660 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, | ||
661 | .procname = "ip_conntrack_tcp_timeout_close", | ||
662 | .data = &ip_ct_tcp_timeout_close, | ||
663 | .maxlen = sizeof(unsigned int), | ||
664 | .mode = 0644, | ||
665 | .proc_handler = &proc_dointvec_jiffies, | ||
666 | }, | ||
667 | { | ||
668 | .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, | ||
669 | .procname = "ip_conntrack_udp_timeout", | ||
670 | .data = &ip_ct_udp_timeout, | ||
671 | .maxlen = sizeof(unsigned int), | ||
672 | .mode = 0644, | ||
673 | .proc_handler = &proc_dointvec_jiffies, | ||
674 | }, | ||
675 | { | ||
676 | .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, | ||
677 | .procname = "ip_conntrack_udp_timeout_stream", | ||
678 | .data = &ip_ct_udp_timeout_stream, | ||
679 | .maxlen = sizeof(unsigned int), | ||
680 | .mode = 0644, | ||
681 | .proc_handler = &proc_dointvec_jiffies, | ||
682 | }, | ||
683 | { | ||
684 | .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, | ||
685 | .procname = "ip_conntrack_icmp_timeout", | ||
686 | .data = &ip_ct_icmp_timeout, | ||
687 | .maxlen = sizeof(unsigned int), | ||
688 | .mode = 0644, | ||
689 | .proc_handler = &proc_dointvec_jiffies, | ||
690 | }, | ||
691 | { | ||
692 | .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, | ||
693 | .procname = "ip_conntrack_generic_timeout", | ||
694 | .data = &ip_ct_generic_timeout, | ||
695 | .maxlen = sizeof(unsigned int), | ||
696 | .mode = 0644, | ||
697 | .proc_handler = &proc_dointvec_jiffies, | ||
698 | }, | ||
699 | { | ||
700 | .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, | ||
701 | .procname = "ip_conntrack_log_invalid", | ||
702 | .data = &ip_ct_log_invalid, | ||
703 | .maxlen = sizeof(unsigned int), | ||
704 | .mode = 0644, | ||
705 | .proc_handler = &proc_dointvec_minmax, | ||
706 | .strategy = &sysctl_intvec, | ||
707 | .extra1 = &log_invalid_proto_min, | ||
708 | .extra2 = &log_invalid_proto_max, | ||
709 | }, | ||
710 | { | ||
711 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, | ||
712 | .procname = "ip_conntrack_tcp_timeout_max_retrans", | ||
713 | .data = &ip_ct_tcp_timeout_max_retrans, | ||
714 | .maxlen = sizeof(unsigned int), | ||
715 | .mode = 0644, | ||
716 | .proc_handler = &proc_dointvec_jiffies, | ||
717 | }, | ||
718 | { | ||
719 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, | ||
720 | .procname = "ip_conntrack_tcp_loose", | ||
721 | .data = &ip_ct_tcp_loose, | ||
722 | .maxlen = sizeof(unsigned int), | ||
723 | .mode = 0644, | ||
724 | .proc_handler = &proc_dointvec, | ||
725 | }, | ||
726 | { | ||
727 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, | ||
728 | .procname = "ip_conntrack_tcp_be_liberal", | ||
729 | .data = &ip_ct_tcp_be_liberal, | ||
730 | .maxlen = sizeof(unsigned int), | ||
731 | .mode = 0644, | ||
732 | .proc_handler = &proc_dointvec, | ||
733 | }, | ||
734 | { | ||
735 | .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, | ||
736 | .procname = "ip_conntrack_tcp_max_retrans", | ||
737 | .data = &ip_ct_tcp_max_retrans, | ||
738 | .maxlen = sizeof(unsigned int), | ||
739 | .mode = 0644, | ||
740 | .proc_handler = &proc_dointvec, | ||
741 | }, | ||
742 | { .ctl_name = 0 } | ||
743 | }; | ||
744 | |||
745 | #define NET_IP_CONNTRACK_MAX 2089 | ||
746 | |||
747 | static ctl_table ip_ct_netfilter_table[] = { | ||
748 | { | ||
749 | .ctl_name = NET_IPV4_NETFILTER, | ||
750 | .procname = "netfilter", | ||
751 | .mode = 0555, | ||
752 | .child = ip_ct_sysctl_table, | ||
753 | }, | ||
754 | { | ||
755 | .ctl_name = NET_IP_CONNTRACK_MAX, | ||
756 | .procname = "ip_conntrack_max", | ||
757 | .data = &ip_conntrack_max, | ||
758 | .maxlen = sizeof(int), | ||
759 | .mode = 0644, | ||
760 | .proc_handler = &proc_dointvec | ||
761 | }, | ||
762 | { .ctl_name = 0 } | ||
763 | }; | ||
764 | |||
765 | static ctl_table ip_ct_ipv4_table[] = { | ||
766 | { | ||
767 | .ctl_name = NET_IPV4, | ||
768 | .procname = "ipv4", | ||
769 | .mode = 0555, | ||
770 | .child = ip_ct_netfilter_table, | ||
771 | }, | ||
772 | { .ctl_name = 0 } | ||
773 | }; | ||
774 | |||
775 | static ctl_table ip_ct_net_table[] = { | ||
776 | { | ||
777 | .ctl_name = CTL_NET, | ||
778 | .procname = "net", | ||
779 | .mode = 0555, | ||
780 | .child = ip_ct_ipv4_table, | ||
781 | }, | ||
782 | { .ctl_name = 0 } | ||
783 | }; | ||
784 | |||
785 | EXPORT_SYMBOL(ip_ct_log_invalid); | ||
786 | #endif /* CONFIG_SYSCTL */ | ||
787 | |||
788 | /* FIXME: Allow NULL functions and sub in pointers to generic for | ||
789 | them. --RR */ | ||
790 | int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) | ||
791 | { | ||
792 | int ret = 0; | ||
793 | |||
794 | write_lock_bh(&ip_conntrack_lock); | ||
795 | if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) { | ||
796 | ret = -EBUSY; | ||
797 | goto out; | ||
798 | } | ||
799 | rcu_assign_pointer(ip_ct_protos[proto->proto], proto); | ||
800 | out: | ||
801 | write_unlock_bh(&ip_conntrack_lock); | ||
802 | return ret; | ||
803 | } | ||
804 | |||
805 | void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) | ||
806 | { | ||
807 | write_lock_bh(&ip_conntrack_lock); | ||
808 | rcu_assign_pointer(ip_ct_protos[proto->proto], | ||
809 | &ip_conntrack_generic_protocol); | ||
810 | write_unlock_bh(&ip_conntrack_lock); | ||
811 | synchronize_rcu(); | ||
812 | |||
813 | /* Remove all contrack entries for this protocol */ | ||
814 | ip_ct_iterate_cleanup(kill_proto, &proto->proto); | ||
815 | } | ||
816 | |||
817 | static int __init ip_conntrack_standalone_init(void) | ||
818 | { | ||
819 | #ifdef CONFIG_PROC_FS | ||
820 | struct proc_dir_entry *proc, *proc_exp, *proc_stat; | ||
821 | #endif | ||
822 | int ret = 0; | ||
823 | |||
824 | ret = ip_conntrack_init(); | ||
825 | if (ret < 0) | ||
826 | return ret; | ||
827 | |||
828 | #ifdef CONFIG_PROC_FS | ||
829 | ret = -ENOMEM; | ||
830 | proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); | ||
831 | if (!proc) goto cleanup_init; | ||
832 | |||
833 | proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, | ||
834 | &exp_file_ops); | ||
835 | if (!proc_exp) goto cleanup_proc; | ||
836 | |||
837 | proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); | ||
838 | if (!proc_stat) | ||
839 | goto cleanup_proc_exp; | ||
840 | |||
841 | proc_stat->proc_fops = &ct_cpu_seq_fops; | ||
842 | proc_stat->owner = THIS_MODULE; | ||
843 | #endif | ||
844 | |||
845 | ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops)); | ||
846 | if (ret < 0) { | ||
847 | printk("ip_conntrack: can't register hooks.\n"); | ||
848 | goto cleanup_proc_stat; | ||
849 | } | ||
850 | #ifdef CONFIG_SYSCTL | ||
851 | ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table); | ||
852 | if (ip_ct_sysctl_header == NULL) { | ||
853 | printk("ip_conntrack: can't register to sysctl.\n"); | ||
854 | ret = -ENOMEM; | ||
855 | goto cleanup_hooks; | ||
856 | } | ||
857 | #endif | ||
858 | return ret; | ||
859 | |||
860 | #ifdef CONFIG_SYSCTL | ||
861 | cleanup_hooks: | ||
862 | nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops)); | ||
863 | #endif | ||
864 | cleanup_proc_stat: | ||
865 | #ifdef CONFIG_PROC_FS | ||
866 | remove_proc_entry("ip_conntrack", proc_net_stat); | ||
867 | cleanup_proc_exp: | ||
868 | proc_net_remove("ip_conntrack_expect"); | ||
869 | cleanup_proc: | ||
870 | proc_net_remove("ip_conntrack"); | ||
871 | cleanup_init: | ||
872 | #endif /* CONFIG_PROC_FS */ | ||
873 | ip_conntrack_cleanup(); | ||
874 | return ret; | ||
875 | } | ||
876 | |||
877 | static void __exit ip_conntrack_standalone_fini(void) | ||
878 | { | ||
879 | synchronize_net(); | ||
880 | #ifdef CONFIG_SYSCTL | ||
881 | unregister_sysctl_table(ip_ct_sysctl_header); | ||
882 | #endif | ||
883 | nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops)); | ||
884 | #ifdef CONFIG_PROC_FS | ||
885 | remove_proc_entry("ip_conntrack", proc_net_stat); | ||
886 | proc_net_remove("ip_conntrack_expect"); | ||
887 | proc_net_remove("ip_conntrack"); | ||
888 | #endif /* CONFIG_PROC_FS */ | ||
889 | ip_conntrack_cleanup(); | ||
890 | } | ||
891 | |||
892 | module_init(ip_conntrack_standalone_init); | ||
893 | module_exit(ip_conntrack_standalone_fini); | ||
894 | |||
895 | /* Some modules need us, but don't depend directly on any symbol. | ||
896 | They should call this. */ | ||
897 | void need_conntrack(void) | ||
898 | { | ||
899 | } | ||
900 | |||
901 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
902 | EXPORT_SYMBOL_GPL(ip_conntrack_chain); | ||
903 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); | ||
904 | EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); | ||
905 | EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); | ||
906 | EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); | ||
907 | EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); | ||
908 | #endif | ||
909 | EXPORT_SYMBOL(ip_conntrack_protocol_register); | ||
910 | EXPORT_SYMBOL(ip_conntrack_protocol_unregister); | ||
911 | EXPORT_SYMBOL(ip_ct_get_tuple); | ||
912 | EXPORT_SYMBOL(invert_tuplepr); | ||
913 | EXPORT_SYMBOL(ip_conntrack_alter_reply); | ||
914 | EXPORT_SYMBOL(ip_conntrack_destroyed); | ||
915 | EXPORT_SYMBOL(need_conntrack); | ||
916 | EXPORT_SYMBOL(ip_conntrack_helper_register); | ||
917 | EXPORT_SYMBOL(ip_conntrack_helper_unregister); | ||
918 | EXPORT_SYMBOL(ip_ct_iterate_cleanup); | ||
919 | EXPORT_SYMBOL(__ip_ct_refresh_acct); | ||
920 | |||
921 | EXPORT_SYMBOL(ip_conntrack_expect_alloc); | ||
922 | EXPORT_SYMBOL(ip_conntrack_expect_put); | ||
923 | EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); | ||
924 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); | ||
925 | EXPORT_SYMBOL(ip_conntrack_expect_related); | ||
926 | EXPORT_SYMBOL(ip_conntrack_unexpect_related); | ||
927 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); | ||
928 | EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); | ||
929 | |||
930 | EXPORT_SYMBOL(ip_conntrack_tuple_taken); | ||
931 | EXPORT_SYMBOL(ip_ct_gather_frags); | ||
932 | EXPORT_SYMBOL(ip_conntrack_htable_size); | ||
933 | EXPORT_SYMBOL(ip_conntrack_lock); | ||
934 | EXPORT_SYMBOL(ip_conntrack_hash); | ||
935 | EXPORT_SYMBOL(ip_conntrack_untracked); | ||
936 | EXPORT_SYMBOL_GPL(ip_conntrack_find_get); | ||
937 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
938 | EXPORT_SYMBOL(ip_conntrack_tcp_update); | ||
939 | #endif | ||
940 | |||
941 | EXPORT_SYMBOL_GPL(ip_conntrack_flush); | ||
942 | EXPORT_SYMBOL_GPL(__ip_conntrack_find); | ||
943 | |||
944 | EXPORT_SYMBOL_GPL(ip_conntrack_alloc); | ||
945 | EXPORT_SYMBOL_GPL(ip_conntrack_free); | ||
946 | EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); | ||
947 | |||
948 | EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); | ||
949 | |||
950 | EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); | ||
951 | EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); | ||
952 | EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); | ||
953 | |||
954 | EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); | ||
955 | EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); | ||
956 | EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); | ||
957 | EXPORT_SYMBOL_GPL(ip_conntrack_checksum); | ||
958 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
959 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
960 | EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); | ||
961 | EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); | ||
962 | #endif | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c deleted file mode 100644 index 76e175e7a972..000000000000 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ /dev/null | |||
@@ -1,161 +0,0 @@ | |||
1 | /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License version 2 as | ||
5 | * published by the Free Software Foundation. | ||
6 | * | ||
7 | * Version: 0.0.7 | ||
8 | * | ||
9 | * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org> | ||
10 | * - port to newnat API | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/udp.h> | ||
17 | |||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
21 | #include <linux/netfilter_ipv4/ip_conntrack_tftp.h> | ||
22 | #include <linux/moduleparam.h> | ||
23 | |||
24 | MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); | ||
25 | MODULE_DESCRIPTION("tftp connection tracking helper"); | ||
26 | MODULE_LICENSE("GPL"); | ||
27 | |||
28 | #define MAX_PORTS 8 | ||
29 | static unsigned short ports[MAX_PORTS]; | ||
30 | static int ports_c; | ||
31 | module_param_array(ports, ushort, &ports_c, 0400); | ||
32 | MODULE_PARM_DESC(ports, "port numbers of tftp servers"); | ||
33 | |||
34 | #if 0 | ||
35 | #define DEBUGP(format, args...) printk("%s:%s:" format, \ | ||
36 | __FILE__, __FUNCTION__ , ## args) | ||
37 | #else | ||
38 | #define DEBUGP(format, args...) | ||
39 | #endif | ||
40 | |||
41 | unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb, | ||
42 | enum ip_conntrack_info ctinfo, | ||
43 | struct ip_conntrack_expect *exp); | ||
44 | EXPORT_SYMBOL_GPL(ip_nat_tftp_hook); | ||
45 | |||
46 | static int tftp_help(struct sk_buff **pskb, | ||
47 | struct ip_conntrack *ct, | ||
48 | enum ip_conntrack_info ctinfo) | ||
49 | { | ||
50 | struct tftphdr _tftph, *tfh; | ||
51 | struct ip_conntrack_expect *exp; | ||
52 | unsigned int ret = NF_ACCEPT; | ||
53 | typeof(ip_nat_tftp_hook) ip_nat_tftp; | ||
54 | |||
55 | tfh = skb_header_pointer(*pskb, | ||
56 | (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), | ||
57 | sizeof(_tftph), &_tftph); | ||
58 | if (tfh == NULL) | ||
59 | return NF_ACCEPT; | ||
60 | |||
61 | switch (ntohs(tfh->opcode)) { | ||
62 | /* RRQ and WRQ works the same way */ | ||
63 | case TFTP_OPCODE_READ: | ||
64 | case TFTP_OPCODE_WRITE: | ||
65 | DEBUGP(""); | ||
66 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
67 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
68 | |||
69 | exp = ip_conntrack_expect_alloc(ct); | ||
70 | if (exp == NULL) | ||
71 | return NF_DROP; | ||
72 | |||
73 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
74 | exp->mask.src.ip = htonl(0xffffffff); | ||
75 | exp->mask.src.u.udp.port = 0; | ||
76 | exp->mask.dst.ip = htonl(0xffffffff); | ||
77 | exp->mask.dst.u.udp.port = htons(0xffff); | ||
78 | exp->mask.dst.protonum = 0xff; | ||
79 | exp->expectfn = NULL; | ||
80 | exp->flags = 0; | ||
81 | |||
82 | DEBUGP("expect: "); | ||
83 | DUMP_TUPLE(&exp->tuple); | ||
84 | DUMP_TUPLE(&exp->mask); | ||
85 | ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook); | ||
86 | if (ip_nat_tftp) | ||
87 | ret = ip_nat_tftp(pskb, ctinfo, exp); | ||
88 | else if (ip_conntrack_expect_related(exp) != 0) | ||
89 | ret = NF_DROP; | ||
90 | ip_conntrack_expect_put(exp); | ||
91 | break; | ||
92 | case TFTP_OPCODE_DATA: | ||
93 | case TFTP_OPCODE_ACK: | ||
94 | DEBUGP("Data/ACK opcode\n"); | ||
95 | break; | ||
96 | case TFTP_OPCODE_ERROR: | ||
97 | DEBUGP("Error opcode\n"); | ||
98 | break; | ||
99 | default: | ||
100 | DEBUGP("Unknown opcode\n"); | ||
101 | } | ||
102 | return NF_ACCEPT; | ||
103 | } | ||
104 | |||
105 | static struct ip_conntrack_helper tftp[MAX_PORTS]; | ||
106 | static char tftp_names[MAX_PORTS][sizeof("tftp-65535")]; | ||
107 | |||
108 | static void ip_conntrack_tftp_fini(void) | ||
109 | { | ||
110 | int i; | ||
111 | |||
112 | for (i = 0 ; i < ports_c; i++) { | ||
113 | DEBUGP("unregistering helper for port %d\n", | ||
114 | ports[i]); | ||
115 | ip_conntrack_helper_unregister(&tftp[i]); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static int __init ip_conntrack_tftp_init(void) | ||
120 | { | ||
121 | int i, ret; | ||
122 | char *tmpname; | ||
123 | |||
124 | if (ports_c == 0) | ||
125 | ports[ports_c++] = TFTP_PORT; | ||
126 | |||
127 | for (i = 0; i < ports_c; i++) { | ||
128 | /* Create helper structure */ | ||
129 | memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper)); | ||
130 | |||
131 | tftp[i].tuple.dst.protonum = IPPROTO_UDP; | ||
132 | tftp[i].tuple.src.u.udp.port = htons(ports[i]); | ||
133 | tftp[i].mask.dst.protonum = 0xFF; | ||
134 | tftp[i].mask.src.u.udp.port = htons(0xFFFF); | ||
135 | tftp[i].max_expected = 1; | ||
136 | tftp[i].timeout = 5 * 60; /* 5 minutes */ | ||
137 | tftp[i].me = THIS_MODULE; | ||
138 | tftp[i].help = tftp_help; | ||
139 | |||
140 | tmpname = &tftp_names[i][0]; | ||
141 | if (ports[i] == TFTP_PORT) | ||
142 | sprintf(tmpname, "tftp"); | ||
143 | else | ||
144 | sprintf(tmpname, "tftp-%d", i); | ||
145 | tftp[i].name = tmpname; | ||
146 | |||
147 | DEBUGP("port #%d: %d\n", i, ports[i]); | ||
148 | |||
149 | ret=ip_conntrack_helper_register(&tftp[i]); | ||
150 | if (ret) { | ||
151 | printk("ERROR registering helper for port %d\n", | ||
152 | ports[i]); | ||
153 | ip_conntrack_tftp_fini(); | ||
154 | return(ret); | ||
155 | } | ||
156 | } | ||
157 | return(0); | ||
158 | } | ||
159 | |||
160 | module_init(ip_conntrack_tftp_init); | ||
161 | module_exit(ip_conntrack_tftp_fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c deleted file mode 100644 index 85df1a9aed33..000000000000 --- a/net/ipv4/netfilter/ip_nat_amanda.c +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | /* Amanda extension for TCP NAT alteration. | ||
2 | * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> | ||
3 | * based on a copy of HW's ip_nat_irc.c as well as other modules | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * Module load syntax: | ||
11 | * insmod ip_nat_amanda.o | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/netfilter.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/ip.h> | ||
19 | #include <linux/udp.h> | ||
20 | #include <net/tcp.h> | ||
21 | #include <net/udp.h> | ||
22 | |||
23 | #include <linux/netfilter_ipv4.h> | ||
24 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
25 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
26 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
27 | #include <linux/netfilter_ipv4/ip_conntrack_amanda.h> | ||
28 | |||
29 | |||
30 | MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); | ||
31 | MODULE_DESCRIPTION("Amanda NAT helper"); | ||
32 | MODULE_LICENSE("GPL"); | ||
33 | |||
34 | static unsigned int help(struct sk_buff **pskb, | ||
35 | enum ip_conntrack_info ctinfo, | ||
36 | unsigned int matchoff, | ||
37 | unsigned int matchlen, | ||
38 | struct ip_conntrack_expect *exp) | ||
39 | { | ||
40 | char buffer[sizeof("65535")]; | ||
41 | u_int16_t port; | ||
42 | unsigned int ret; | ||
43 | |||
44 | /* Connection comes from client. */ | ||
45 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
46 | exp->dir = IP_CT_DIR_ORIGINAL; | ||
47 | |||
48 | /* When you see the packet, we need to NAT it the same as the | ||
49 | * this one (ie. same IP: it will be TCP and master is UDP). */ | ||
50 | exp->expectfn = ip_nat_follow_master; | ||
51 | |||
52 | /* Try to get same port: if not, try to change it. */ | ||
53 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
54 | exp->tuple.dst.u.tcp.port = htons(port); | ||
55 | if (ip_conntrack_expect_related(exp) == 0) | ||
56 | break; | ||
57 | } | ||
58 | |||
59 | if (port == 0) | ||
60 | return NF_DROP; | ||
61 | |||
62 | sprintf(buffer, "%u", port); | ||
63 | ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo, | ||
64 | matchoff, matchlen, | ||
65 | buffer, strlen(buffer)); | ||
66 | if (ret != NF_ACCEPT) | ||
67 | ip_conntrack_unexpect_related(exp); | ||
68 | return ret; | ||
69 | } | ||
70 | |||
71 | static void __exit ip_nat_amanda_fini(void) | ||
72 | { | ||
73 | rcu_assign_pointer(ip_nat_amanda_hook, NULL); | ||
74 | synchronize_rcu(); | ||
75 | } | ||
76 | |||
77 | static int __init ip_nat_amanda_init(void) | ||
78 | { | ||
79 | BUG_ON(rcu_dereference(ip_nat_amanda_hook)); | ||
80 | rcu_assign_pointer(ip_nat_amanda_hook, help); | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | module_init(ip_nat_amanda_init); | ||
85 | module_exit(ip_nat_amanda_fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c deleted file mode 100644 index 40737fdbe9a7..000000000000 --- a/net/ipv4/netfilter/ip_nat_core.c +++ /dev/null | |||
@@ -1,634 +0,0 @@ | |||
1 | /* NAT for netfilter; shared with compatibility layer. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/timer.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/netfilter_ipv4.h> | ||
16 | #include <linux/vmalloc.h> | ||
17 | #include <net/checksum.h> | ||
18 | #include <net/icmp.h> | ||
19 | #include <net/ip.h> | ||
20 | #include <net/tcp.h> /* For tcp_prot in getorigdst */ | ||
21 | #include <linux/icmp.h> | ||
22 | #include <linux/udp.h> | ||
23 | #include <linux/jhash.h> | ||
24 | |||
25 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
26 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
27 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
28 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
29 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
30 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
31 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
33 | |||
34 | #if 0 | ||
35 | #define DEBUGP printk | ||
36 | #else | ||
37 | #define DEBUGP(format, args...) | ||
38 | #endif | ||
39 | |||
40 | DEFINE_RWLOCK(ip_nat_lock); | ||
41 | |||
42 | /* Calculated at init based on memory size */ | ||
43 | static unsigned int ip_nat_htable_size; | ||
44 | |||
45 | static struct list_head *bysource; | ||
46 | |||
47 | #define MAX_IP_NAT_PROTO 256 | ||
48 | static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; | ||
49 | |||
50 | static inline struct ip_nat_protocol * | ||
51 | __ip_nat_proto_find(u_int8_t protonum) | ||
52 | { | ||
53 | return rcu_dereference(ip_nat_protos[protonum]); | ||
54 | } | ||
55 | |||
56 | struct ip_nat_protocol * | ||
57 | ip_nat_proto_find_get(u_int8_t protonum) | ||
58 | { | ||
59 | struct ip_nat_protocol *p; | ||
60 | |||
61 | rcu_read_lock(); | ||
62 | p = __ip_nat_proto_find(protonum); | ||
63 | if (!try_module_get(p->me)) | ||
64 | p = &ip_nat_unknown_protocol; | ||
65 | rcu_read_unlock(); | ||
66 | |||
67 | return p; | ||
68 | } | ||
69 | EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); | ||
70 | |||
71 | void | ||
72 | ip_nat_proto_put(struct ip_nat_protocol *p) | ||
73 | { | ||
74 | module_put(p->me); | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(ip_nat_proto_put); | ||
77 | |||
78 | /* We keep an extra hash for each conntrack, for fast searching. */ | ||
79 | static inline unsigned int | ||
80 | hash_by_src(const struct ip_conntrack_tuple *tuple) | ||
81 | { | ||
82 | /* Original src, to ensure we map it consistently if poss. */ | ||
83 | return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all, | ||
84 | tuple->dst.protonum, 0) % ip_nat_htable_size; | ||
85 | } | ||
86 | |||
87 | /* Noone using conntrack by the time this called. */ | ||
88 | static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) | ||
89 | { | ||
90 | if (!(conn->status & IPS_NAT_DONE_MASK)) | ||
91 | return; | ||
92 | |||
93 | write_lock_bh(&ip_nat_lock); | ||
94 | list_del(&conn->nat.info.bysource); | ||
95 | write_unlock_bh(&ip_nat_lock); | ||
96 | } | ||
97 | |||
98 | /* Is this tuple already taken? (not by us) */ | ||
99 | int | ||
100 | ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, | ||
101 | const struct ip_conntrack *ignored_conntrack) | ||
102 | { | ||
103 | /* Conntrack tracking doesn't keep track of outgoing tuples; only | ||
104 | incoming ones. NAT means they don't have a fixed mapping, | ||
105 | so we invert the tuple and look for the incoming reply. | ||
106 | |||
107 | We could keep a separate hash if this proves too slow. */ | ||
108 | struct ip_conntrack_tuple reply; | ||
109 | |||
110 | invert_tuplepr(&reply, tuple); | ||
111 | return ip_conntrack_tuple_taken(&reply, ignored_conntrack); | ||
112 | } | ||
113 | EXPORT_SYMBOL(ip_nat_used_tuple); | ||
114 | |||
115 | /* If we source map this tuple so reply looks like reply_tuple, will | ||
116 | * that meet the constraints of range. */ | ||
117 | static int | ||
118 | in_range(const struct ip_conntrack_tuple *tuple, | ||
119 | const struct ip_nat_range *range) | ||
120 | { | ||
121 | struct ip_nat_protocol *proto; | ||
122 | int ret = 0; | ||
123 | |||
124 | /* If we are supposed to map IPs, then we must be in the | ||
125 | range specified, otherwise let this drag us onto a new src IP. */ | ||
126 | if (range->flags & IP_NAT_RANGE_MAP_IPS) { | ||
127 | if (ntohl(tuple->src.ip) < ntohl(range->min_ip) | ||
128 | || ntohl(tuple->src.ip) > ntohl(range->max_ip)) | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | rcu_read_lock(); | ||
133 | proto = __ip_nat_proto_find(tuple->dst.protonum); | ||
134 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) | ||
135 | || proto->in_range(tuple, IP_NAT_MANIP_SRC, | ||
136 | &range->min, &range->max)) | ||
137 | ret = 1; | ||
138 | rcu_read_unlock(); | ||
139 | |||
140 | return ret; | ||
141 | } | ||
142 | |||
143 | static inline int | ||
144 | same_src(const struct ip_conntrack *ct, | ||
145 | const struct ip_conntrack_tuple *tuple) | ||
146 | { | ||
147 | return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum | ||
148 | == tuple->dst.protonum | ||
149 | && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip | ||
150 | == tuple->src.ip | ||
151 | && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all | ||
152 | == tuple->src.u.all); | ||
153 | } | ||
154 | |||
155 | /* Only called for SRC manip */ | ||
156 | static int | ||
157 | find_appropriate_src(const struct ip_conntrack_tuple *tuple, | ||
158 | struct ip_conntrack_tuple *result, | ||
159 | const struct ip_nat_range *range) | ||
160 | { | ||
161 | unsigned int h = hash_by_src(tuple); | ||
162 | struct ip_conntrack *ct; | ||
163 | |||
164 | read_lock_bh(&ip_nat_lock); | ||
165 | list_for_each_entry(ct, &bysource[h], nat.info.bysource) { | ||
166 | if (same_src(ct, tuple)) { | ||
167 | /* Copy source part from reply tuple. */ | ||
168 | invert_tuplepr(result, | ||
169 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
170 | result->dst = tuple->dst; | ||
171 | |||
172 | if (in_range(result, range)) { | ||
173 | read_unlock_bh(&ip_nat_lock); | ||
174 | return 1; | ||
175 | } | ||
176 | } | ||
177 | } | ||
178 | read_unlock_bh(&ip_nat_lock); | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | /* For [FUTURE] fragmentation handling, we want the least-used | ||
183 | src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus | ||
184 | if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports | ||
185 | 1-65535, we don't do pro-rata allocation based on ports; we choose | ||
186 | the ip with the lowest src-ip/dst-ip/proto usage. | ||
187 | */ | ||
188 | static void | ||
189 | find_best_ips_proto(struct ip_conntrack_tuple *tuple, | ||
190 | const struct ip_nat_range *range, | ||
191 | const struct ip_conntrack *conntrack, | ||
192 | enum ip_nat_manip_type maniptype) | ||
193 | { | ||
194 | __be32 *var_ipp; | ||
195 | /* Host order */ | ||
196 | u_int32_t minip, maxip, j; | ||
197 | |||
198 | /* No IP mapping? Do nothing. */ | ||
199 | if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) | ||
200 | return; | ||
201 | |||
202 | if (maniptype == IP_NAT_MANIP_SRC) | ||
203 | var_ipp = &tuple->src.ip; | ||
204 | else | ||
205 | var_ipp = &tuple->dst.ip; | ||
206 | |||
207 | /* Fast path: only one choice. */ | ||
208 | if (range->min_ip == range->max_ip) { | ||
209 | *var_ipp = range->min_ip; | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | /* Hashing source and destination IPs gives a fairly even | ||
214 | * spread in practice (if there are a small number of IPs | ||
215 | * involved, there usually aren't that many connections | ||
216 | * anyway). The consistency means that servers see the same | ||
217 | * client coming from the same IP (some Internet Banking sites | ||
218 | * like this), even across reboots. */ | ||
219 | minip = ntohl(range->min_ip); | ||
220 | maxip = ntohl(range->max_ip); | ||
221 | j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0); | ||
222 | *var_ipp = htonl(minip + j % (maxip - minip + 1)); | ||
223 | } | ||
224 | |||
225 | /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, | ||
226 | * we change the source to map into the range. For NF_IP_PRE_ROUTING | ||
227 | * and NF_IP_LOCAL_OUT, we change the destination to map into the | ||
228 | * range. It might not be possible to get a unique tuple, but we try. | ||
229 | * At worst (or if we race), we will end up with a final duplicate in | ||
230 | * __ip_conntrack_confirm and drop the packet. */ | ||
231 | static void | ||
232 | get_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
233 | const struct ip_conntrack_tuple *orig_tuple, | ||
234 | const struct ip_nat_range *range, | ||
235 | struct ip_conntrack *conntrack, | ||
236 | enum ip_nat_manip_type maniptype) | ||
237 | { | ||
238 | struct ip_nat_protocol *proto; | ||
239 | |||
240 | /* 1) If this srcip/proto/src-proto-part is currently mapped, | ||
241 | and that same mapping gives a unique tuple within the given | ||
242 | range, use that. | ||
243 | |||
244 | This is only required for source (ie. NAT/masq) mappings. | ||
245 | So far, we don't do local source mappings, so multiple | ||
246 | manips not an issue. */ | ||
247 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
248 | if (find_appropriate_src(orig_tuple, tuple, range)) { | ||
249 | DEBUGP("get_unique_tuple: Found current src map\n"); | ||
250 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) | ||
251 | if (!ip_nat_used_tuple(tuple, conntrack)) | ||
252 | return; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | /* 2) Select the least-used IP/proto combination in the given | ||
257 | range. */ | ||
258 | *tuple = *orig_tuple; | ||
259 | find_best_ips_proto(tuple, range, conntrack, maniptype); | ||
260 | |||
261 | /* 3) The per-protocol part of the manip is made to map into | ||
262 | the range to make a unique tuple. */ | ||
263 | |||
264 | rcu_read_lock(); | ||
265 | proto = __ip_nat_proto_find(orig_tuple->dst.protonum); | ||
266 | |||
267 | /* Change protocol info to have some randomization */ | ||
268 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { | ||
269 | proto->unique_tuple(tuple, range, maniptype, conntrack); | ||
270 | goto out; | ||
271 | } | ||
272 | |||
273 | /* Only bother mapping if it's not already in range and unique */ | ||
274 | if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) | ||
275 | || proto->in_range(tuple, maniptype, &range->min, &range->max)) | ||
276 | && !ip_nat_used_tuple(tuple, conntrack)) | ||
277 | goto out; | ||
278 | |||
279 | /* Last change: get protocol to try to obtain unique tuple. */ | ||
280 | proto->unique_tuple(tuple, range, maniptype, conntrack); | ||
281 | out: | ||
282 | rcu_read_unlock(); | ||
283 | } | ||
284 | |||
285 | unsigned int | ||
286 | ip_nat_setup_info(struct ip_conntrack *conntrack, | ||
287 | const struct ip_nat_range *range, | ||
288 | unsigned int hooknum) | ||
289 | { | ||
290 | struct ip_conntrack_tuple curr_tuple, new_tuple; | ||
291 | struct ip_nat_info *info = &conntrack->nat.info; | ||
292 | int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK); | ||
293 | enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); | ||
294 | |||
295 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING | ||
296 | || hooknum == NF_IP_POST_ROUTING | ||
297 | || hooknum == NF_IP_LOCAL_IN | ||
298 | || hooknum == NF_IP_LOCAL_OUT); | ||
299 | BUG_ON(ip_nat_initialized(conntrack, maniptype)); | ||
300 | |||
301 | /* What we've got will look like inverse of reply. Normally | ||
302 | this is what is in the conntrack, except for prior | ||
303 | manipulations (future optimization: if num_manips == 0, | ||
304 | orig_tp = | ||
305 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ | ||
306 | invert_tuplepr(&curr_tuple, | ||
307 | &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
308 | |||
309 | get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype); | ||
310 | |||
311 | if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) { | ||
312 | struct ip_conntrack_tuple reply; | ||
313 | |||
314 | /* Alter conntrack table so will recognize replies. */ | ||
315 | invert_tuplepr(&reply, &new_tuple); | ||
316 | ip_conntrack_alter_reply(conntrack, &reply); | ||
317 | |||
318 | /* Non-atomic: we own this at the moment. */ | ||
319 | if (maniptype == IP_NAT_MANIP_SRC) | ||
320 | conntrack->status |= IPS_SRC_NAT; | ||
321 | else | ||
322 | conntrack->status |= IPS_DST_NAT; | ||
323 | } | ||
324 | |||
325 | /* Place in source hash if this is the first time. */ | ||
326 | if (have_to_hash) { | ||
327 | unsigned int srchash | ||
328 | = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
329 | .tuple); | ||
330 | write_lock_bh(&ip_nat_lock); | ||
331 | list_add(&info->bysource, &bysource[srchash]); | ||
332 | write_unlock_bh(&ip_nat_lock); | ||
333 | } | ||
334 | |||
335 | /* It's done. */ | ||
336 | if (maniptype == IP_NAT_MANIP_DST) | ||
337 | set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status); | ||
338 | else | ||
339 | set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status); | ||
340 | |||
341 | return NF_ACCEPT; | ||
342 | } | ||
343 | EXPORT_SYMBOL(ip_nat_setup_info); | ||
344 | |||
345 | /* Returns true if succeeded. */ | ||
346 | static int | ||
347 | manip_pkt(u_int16_t proto, | ||
348 | struct sk_buff **pskb, | ||
349 | unsigned int iphdroff, | ||
350 | const struct ip_conntrack_tuple *target, | ||
351 | enum ip_nat_manip_type maniptype) | ||
352 | { | ||
353 | struct iphdr *iph; | ||
354 | struct ip_nat_protocol *p; | ||
355 | |||
356 | if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) | ||
357 | return 0; | ||
358 | |||
359 | iph = (void *)(*pskb)->data + iphdroff; | ||
360 | |||
361 | /* Manipulate protcol part. */ | ||
362 | |||
363 | /* rcu_read_lock()ed by nf_hook_slow */ | ||
364 | p = __ip_nat_proto_find(proto); | ||
365 | if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) | ||
366 | return 0; | ||
367 | |||
368 | iph = (void *)(*pskb)->data + iphdroff; | ||
369 | |||
370 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
371 | nf_csum_replace4(&iph->check, iph->saddr, target->src.ip); | ||
372 | iph->saddr = target->src.ip; | ||
373 | } else { | ||
374 | nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip); | ||
375 | iph->daddr = target->dst.ip; | ||
376 | } | ||
377 | return 1; | ||
378 | } | ||
379 | |||
380 | /* Do packet manipulations according to ip_nat_setup_info. */ | ||
381 | unsigned int ip_nat_packet(struct ip_conntrack *ct, | ||
382 | enum ip_conntrack_info ctinfo, | ||
383 | unsigned int hooknum, | ||
384 | struct sk_buff **pskb) | ||
385 | { | ||
386 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
387 | unsigned long statusbit; | ||
388 | enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum); | ||
389 | |||
390 | if (mtype == IP_NAT_MANIP_SRC) | ||
391 | statusbit = IPS_SRC_NAT; | ||
392 | else | ||
393 | statusbit = IPS_DST_NAT; | ||
394 | |||
395 | /* Invert if this is reply dir. */ | ||
396 | if (dir == IP_CT_DIR_REPLY) | ||
397 | statusbit ^= IPS_NAT_MASK; | ||
398 | |||
399 | /* Non-atomic: these bits don't change. */ | ||
400 | if (ct->status & statusbit) { | ||
401 | struct ip_conntrack_tuple target; | ||
402 | |||
403 | /* We are aiming to look like inverse of other direction. */ | ||
404 | invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
405 | |||
406 | if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) | ||
407 | return NF_DROP; | ||
408 | } | ||
409 | return NF_ACCEPT; | ||
410 | } | ||
411 | EXPORT_SYMBOL_GPL(ip_nat_packet); | ||
412 | |||
413 | /* Dir is direction ICMP is coming from (opposite to packet it contains) */ | ||
414 | int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, | ||
415 | enum ip_conntrack_info ctinfo, | ||
416 | unsigned int hooknum, | ||
417 | struct sk_buff **pskb) | ||
418 | { | ||
419 | struct { | ||
420 | struct icmphdr icmp; | ||
421 | struct iphdr ip; | ||
422 | } *inside; | ||
423 | struct ip_conntrack_protocol *proto; | ||
424 | struct ip_conntrack_tuple inner, target; | ||
425 | int hdrlen = (*pskb)->nh.iph->ihl * 4; | ||
426 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
427 | unsigned long statusbit; | ||
428 | enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); | ||
429 | |||
430 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) | ||
431 | return 0; | ||
432 | |||
433 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | ||
434 | |||
435 | /* We're actually going to mangle it beyond trivial checksum | ||
436 | adjustment, so make sure the current checksum is correct. */ | ||
437 | if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) | ||
438 | return 0; | ||
439 | |||
440 | /* Must be RELATED */ | ||
441 | IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || | ||
442 | (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); | ||
443 | |||
444 | /* Redirects on non-null nats must be dropped, else they'll | ||
445 | start talking to each other without our translation, and be | ||
446 | confused... --RR */ | ||
447 | if (inside->icmp.type == ICMP_REDIRECT) { | ||
448 | /* If NAT isn't finished, assume it and drop. */ | ||
449 | if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) | ||
450 | return 0; | ||
451 | |||
452 | if (ct->status & IPS_NAT_MASK) | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", | ||
457 | *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); | ||
458 | |||
459 | /* rcu_read_lock()ed by nf_hook_slow */ | ||
460 | proto = __ip_conntrack_proto_find(inside->ip.protocol); | ||
461 | if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + | ||
462 | sizeof(struct icmphdr) + inside->ip.ihl*4, | ||
463 | &inner, proto)) | ||
464 | return 0; | ||
465 | |||
466 | /* Change inner back to look like incoming packet. We do the | ||
467 | opposite manip on this hook to normal, because it might not | ||
468 | pass all hooks (locally-generated ICMP). Consider incoming | ||
469 | packet: PREROUTING (DST manip), routing produces ICMP, goes | ||
470 | through POSTROUTING (which must correct the DST manip). */ | ||
471 | if (!manip_pkt(inside->ip.protocol, pskb, | ||
472 | (*pskb)->nh.iph->ihl*4 | ||
473 | + sizeof(inside->icmp), | ||
474 | &ct->tuplehash[!dir].tuple, | ||
475 | !manip)) | ||
476 | return 0; | ||
477 | |||
478 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | ||
479 | /* Reloading "inside" here since manip_pkt inner. */ | ||
480 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | ||
481 | inside->icmp.checksum = 0; | ||
482 | inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, | ||
483 | (*pskb)->len - hdrlen, | ||
484 | 0)); | ||
485 | } | ||
486 | |||
487 | /* Change outer to look the reply to an incoming packet | ||
488 | * (proto 0 means don't invert per-proto part). */ | ||
489 | if (manip == IP_NAT_MANIP_SRC) | ||
490 | statusbit = IPS_SRC_NAT; | ||
491 | else | ||
492 | statusbit = IPS_DST_NAT; | ||
493 | |||
494 | /* Invert if this is reply dir. */ | ||
495 | if (dir == IP_CT_DIR_REPLY) | ||
496 | statusbit ^= IPS_NAT_MASK; | ||
497 | |||
498 | if (ct->status & statusbit) { | ||
499 | invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); | ||
500 | if (!manip_pkt(0, pskb, 0, &target, manip)) | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | return 1; | ||
505 | } | ||
506 | EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); | ||
507 | |||
508 | /* Protocol registration. */ | ||
509 | int ip_nat_protocol_register(struct ip_nat_protocol *proto) | ||
510 | { | ||
511 | int ret = 0; | ||
512 | |||
513 | write_lock_bh(&ip_nat_lock); | ||
514 | if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { | ||
515 | ret = -EBUSY; | ||
516 | goto out; | ||
517 | } | ||
518 | rcu_assign_pointer(ip_nat_protos[proto->protonum], proto); | ||
519 | out: | ||
520 | write_unlock_bh(&ip_nat_lock); | ||
521 | return ret; | ||
522 | } | ||
523 | EXPORT_SYMBOL(ip_nat_protocol_register); | ||
524 | |||
525 | /* Noone stores the protocol anywhere; simply delete it. */ | ||
526 | void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) | ||
527 | { | ||
528 | write_lock_bh(&ip_nat_lock); | ||
529 | rcu_assign_pointer(ip_nat_protos[proto->protonum], | ||
530 | &ip_nat_unknown_protocol); | ||
531 | write_unlock_bh(&ip_nat_lock); | ||
532 | synchronize_rcu(); | ||
533 | } | ||
534 | EXPORT_SYMBOL(ip_nat_protocol_unregister); | ||
535 | |||
536 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
537 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
538 | int | ||
539 | ip_nat_port_range_to_nfattr(struct sk_buff *skb, | ||
540 | const struct ip_nat_range *range) | ||
541 | { | ||
542 | NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), | ||
543 | &range->min.tcp.port); | ||
544 | NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), | ||
545 | &range->max.tcp.port); | ||
546 | |||
547 | return 0; | ||
548 | |||
549 | nfattr_failure: | ||
550 | return -1; | ||
551 | } | ||
552 | |||
553 | int | ||
554 | ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) | ||
555 | { | ||
556 | int ret = 0; | ||
557 | |||
558 | /* we have to return whether we actually parsed something or not */ | ||
559 | |||
560 | if (tb[CTA_PROTONAT_PORT_MIN-1]) { | ||
561 | ret = 1; | ||
562 | range->min.tcp.port = | ||
563 | *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); | ||
564 | } | ||
565 | |||
566 | if (!tb[CTA_PROTONAT_PORT_MAX-1]) { | ||
567 | if (ret) | ||
568 | range->max.tcp.port = range->min.tcp.port; | ||
569 | } else { | ||
570 | ret = 1; | ||
571 | range->max.tcp.port = | ||
572 | *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); | ||
573 | } | ||
574 | |||
575 | return ret; | ||
576 | } | ||
577 | EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); | ||
578 | EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); | ||
579 | #endif | ||
580 | |||
581 | static int __init ip_nat_init(void) | ||
582 | { | ||
583 | size_t i; | ||
584 | |||
585 | /* Leave them the same for the moment. */ | ||
586 | ip_nat_htable_size = ip_conntrack_htable_size; | ||
587 | |||
588 | /* One vmalloc for both hash tables */ | ||
589 | bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size); | ||
590 | if (!bysource) | ||
591 | return -ENOMEM; | ||
592 | |||
593 | /* Sew in builtin protocols. */ | ||
594 | write_lock_bh(&ip_nat_lock); | ||
595 | for (i = 0; i < MAX_IP_NAT_PROTO; i++) | ||
596 | rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol); | ||
597 | rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp); | ||
598 | rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp); | ||
599 | rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp); | ||
600 | write_unlock_bh(&ip_nat_lock); | ||
601 | |||
602 | for (i = 0; i < ip_nat_htable_size; i++) { | ||
603 | INIT_LIST_HEAD(&bysource[i]); | ||
604 | } | ||
605 | |||
606 | /* FIXME: Man, this is a hack. <SIGH> */ | ||
607 | IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL); | ||
608 | rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack); | ||
609 | |||
610 | /* Initialize fake conntrack so that NAT will skip it */ | ||
611 | ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK; | ||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | /* Clear NAT section of all conntracks, in case we're loaded again. */ | ||
616 | static int clean_nat(struct ip_conntrack *i, void *data) | ||
617 | { | ||
618 | memset(&i->nat, 0, sizeof(i->nat)); | ||
619 | i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | static void __exit ip_nat_cleanup(void) | ||
624 | { | ||
625 | ip_ct_iterate_cleanup(&clean_nat, NULL); | ||
626 | rcu_assign_pointer(ip_conntrack_destroyed, NULL); | ||
627 | synchronize_rcu(); | ||
628 | vfree(bysource); | ||
629 | } | ||
630 | |||
631 | MODULE_LICENSE("GPL"); | ||
632 | |||
633 | module_init(ip_nat_init); | ||
634 | module_exit(ip_nat_cleanup); | ||
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c deleted file mode 100644 index 32e01d8dffcb..000000000000 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | /* FTP extension for TCP NAT alteration. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/netfilter_ipv4.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/moduleparam.h> | ||
16 | #include <net/tcp.h> | ||
17 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
18 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_ftp.h> | ||
21 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
22 | |||
23 | MODULE_LICENSE("GPL"); | ||
24 | MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | ||
25 | MODULE_DESCRIPTION("ftp NAT helper"); | ||
26 | |||
27 | #if 0 | ||
28 | #define DEBUGP printk | ||
29 | #else | ||
30 | #define DEBUGP(format, args...) | ||
31 | #endif | ||
32 | |||
33 | /* FIXME: Time out? --RR */ | ||
34 | |||
35 | static int | ||
36 | mangle_rfc959_packet(struct sk_buff **pskb, | ||
37 | __be32 newip, | ||
38 | u_int16_t port, | ||
39 | unsigned int matchoff, | ||
40 | unsigned int matchlen, | ||
41 | struct ip_conntrack *ct, | ||
42 | enum ip_conntrack_info ctinfo, | ||
43 | u32 *seq) | ||
44 | { | ||
45 | char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; | ||
46 | |||
47 | sprintf(buffer, "%u,%u,%u,%u,%u,%u", | ||
48 | NIPQUAD(newip), port>>8, port&0xFF); | ||
49 | |||
50 | DEBUGP("calling ip_nat_mangle_tcp_packet\n"); | ||
51 | |||
52 | *seq += strlen(buffer) - matchlen; | ||
53 | return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, | ||
54 | matchlen, buffer, strlen(buffer)); | ||
55 | } | ||
56 | |||
57 | /* |1|132.235.1.2|6275| */ | ||
58 | static int | ||
59 | mangle_eprt_packet(struct sk_buff **pskb, | ||
60 | __be32 newip, | ||
61 | u_int16_t port, | ||
62 | unsigned int matchoff, | ||
63 | unsigned int matchlen, | ||
64 | struct ip_conntrack *ct, | ||
65 | enum ip_conntrack_info ctinfo, | ||
66 | u32 *seq) | ||
67 | { | ||
68 | char buffer[sizeof("|1|255.255.255.255|65535|")]; | ||
69 | |||
70 | sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); | ||
71 | |||
72 | DEBUGP("calling ip_nat_mangle_tcp_packet\n"); | ||
73 | |||
74 | *seq += strlen(buffer) - matchlen; | ||
75 | return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, | ||
76 | matchlen, buffer, strlen(buffer)); | ||
77 | } | ||
78 | |||
79 | /* |1|132.235.1.2|6275| */ | ||
80 | static int | ||
81 | mangle_epsv_packet(struct sk_buff **pskb, | ||
82 | __be32 newip, | ||
83 | u_int16_t port, | ||
84 | unsigned int matchoff, | ||
85 | unsigned int matchlen, | ||
86 | struct ip_conntrack *ct, | ||
87 | enum ip_conntrack_info ctinfo, | ||
88 | u32 *seq) | ||
89 | { | ||
90 | char buffer[sizeof("|||65535|")]; | ||
91 | |||
92 | sprintf(buffer, "|||%u|", port); | ||
93 | |||
94 | DEBUGP("calling ip_nat_mangle_tcp_packet\n"); | ||
95 | |||
96 | *seq += strlen(buffer) - matchlen; | ||
97 | return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, | ||
98 | matchlen, buffer, strlen(buffer)); | ||
99 | } | ||
100 | |||
101 | static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, | ||
102 | unsigned int, | ||
103 | unsigned int, | ||
104 | struct ip_conntrack *, | ||
105 | enum ip_conntrack_info, | ||
106 | u32 *seq) | ||
107 | = { [IP_CT_FTP_PORT] = mangle_rfc959_packet, | ||
108 | [IP_CT_FTP_PASV] = mangle_rfc959_packet, | ||
109 | [IP_CT_FTP_EPRT] = mangle_eprt_packet, | ||
110 | [IP_CT_FTP_EPSV] = mangle_epsv_packet | ||
111 | }; | ||
112 | |||
113 | /* So, this packet has hit the connection tracking matching code. | ||
114 | Mangle it, and change the expectation to match the new version. */ | ||
115 | static unsigned int ip_nat_ftp(struct sk_buff **pskb, | ||
116 | enum ip_conntrack_info ctinfo, | ||
117 | enum ip_ct_ftp_type type, | ||
118 | unsigned int matchoff, | ||
119 | unsigned int matchlen, | ||
120 | struct ip_conntrack_expect *exp, | ||
121 | u32 *seq) | ||
122 | { | ||
123 | __be32 newip; | ||
124 | u_int16_t port; | ||
125 | int dir = CTINFO2DIR(ctinfo); | ||
126 | struct ip_conntrack *ct = exp->master; | ||
127 | |||
128 | DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); | ||
129 | |||
130 | /* Connection will come from wherever this packet goes, hence !dir */ | ||
131 | newip = ct->tuplehash[!dir].tuple.dst.ip; | ||
132 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
133 | exp->dir = !dir; | ||
134 | |||
135 | /* When you see the packet, we need to NAT it the same as the | ||
136 | * this one. */ | ||
137 | exp->expectfn = ip_nat_follow_master; | ||
138 | |||
139 | /* Try to get same port: if not, try to change it. */ | ||
140 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
141 | exp->tuple.dst.u.tcp.port = htons(port); | ||
142 | if (ip_conntrack_expect_related(exp) == 0) | ||
143 | break; | ||
144 | } | ||
145 | |||
146 | if (port == 0) | ||
147 | return NF_DROP; | ||
148 | |||
149 | if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo, | ||
150 | seq)) { | ||
151 | ip_conntrack_unexpect_related(exp); | ||
152 | return NF_DROP; | ||
153 | } | ||
154 | return NF_ACCEPT; | ||
155 | } | ||
156 | |||
157 | static void __exit ip_nat_ftp_fini(void) | ||
158 | { | ||
159 | rcu_assign_pointer(ip_nat_ftp_hook, NULL); | ||
160 | synchronize_rcu(); | ||
161 | } | ||
162 | |||
163 | static int __init ip_nat_ftp_init(void) | ||
164 | { | ||
165 | BUG_ON(rcu_dereference(ip_nat_ftp_hook)); | ||
166 | rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ | ||
171 | static int warn_set(const char *val, struct kernel_param *kp) | ||
172 | { | ||
173 | printk(KERN_INFO KBUILD_MODNAME | ||
174 | ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); | ||
175 | return 0; | ||
176 | } | ||
177 | module_param_call(ports, warn_set, NULL, NULL, 0); | ||
178 | |||
179 | module_init(ip_nat_ftp_init); | ||
180 | module_exit(ip_nat_ftp_fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c deleted file mode 100644 index dc778cfef58b..000000000000 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ /dev/null | |||
@@ -1,436 +0,0 @@ | |||
1 | /* ip_nat_helper.c - generic support functions for NAT helpers | ||
2 | * | ||
3 | * (C) 2000-2002 Harald Welte <laforge@netfilter.org> | ||
4 | * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>: | ||
11 | * - add support for SACK adjustment | ||
12 | * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>: | ||
13 | * - merge SACK support into newnat API | ||
14 | * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>: | ||
15 | * - make ip_nat_resize_packet more generic (TCP and UDP) | ||
16 | * - add ip_nat_mangle_udp_packet | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kmod.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/timer.h> | ||
22 | #include <linux/skbuff.h> | ||
23 | #include <linux/netfilter_ipv4.h> | ||
24 | #include <net/checksum.h> | ||
25 | #include <net/icmp.h> | ||
26 | #include <net/ip.h> | ||
27 | #include <net/tcp.h> | ||
28 | #include <net/udp.h> | ||
29 | |||
30 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
31 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
32 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
33 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
34 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
35 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
36 | |||
37 | #if 0 | ||
38 | #define DEBUGP printk | ||
39 | #define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); | ||
40 | #else | ||
41 | #define DEBUGP(format, args...) | ||
42 | #define DUMP_OFFSET(x) | ||
43 | #endif | ||
44 | |||
45 | static DEFINE_SPINLOCK(ip_nat_seqofs_lock); | ||
46 | |||
47 | /* Setup TCP sequence correction given this change at this sequence */ | ||
48 | static inline void | ||
49 | adjust_tcp_sequence(u32 seq, | ||
50 | int sizediff, | ||
51 | struct ip_conntrack *ct, | ||
52 | enum ip_conntrack_info ctinfo) | ||
53 | { | ||
54 | int dir; | ||
55 | struct ip_nat_seq *this_way, *other_way; | ||
56 | |||
57 | DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n", | ||
58 | (*skb)->len, new_size); | ||
59 | |||
60 | dir = CTINFO2DIR(ctinfo); | ||
61 | |||
62 | this_way = &ct->nat.info.seq[dir]; | ||
63 | other_way = &ct->nat.info.seq[!dir]; | ||
64 | |||
65 | DEBUGP("ip_nat_resize_packet: Seq_offset before: "); | ||
66 | DUMP_OFFSET(this_way); | ||
67 | |||
68 | spin_lock_bh(&ip_nat_seqofs_lock); | ||
69 | |||
70 | /* SYN adjust. If it's uninitialized, or this is after last | ||
71 | * correction, record it: we don't handle more than one | ||
72 | * adjustment in the window, but do deal with common case of a | ||
73 | * retransmit */ | ||
74 | if (this_way->offset_before == this_way->offset_after | ||
75 | || before(this_way->correction_pos, seq)) { | ||
76 | this_way->correction_pos = seq; | ||
77 | this_way->offset_before = this_way->offset_after; | ||
78 | this_way->offset_after += sizediff; | ||
79 | } | ||
80 | spin_unlock_bh(&ip_nat_seqofs_lock); | ||
81 | |||
82 | DEBUGP("ip_nat_resize_packet: Seq_offset after: "); | ||
83 | DUMP_OFFSET(this_way); | ||
84 | } | ||
85 | |||
86 | /* Frobs data inside this packet, which is linear. */ | ||
87 | static void mangle_contents(struct sk_buff *skb, | ||
88 | unsigned int dataoff, | ||
89 | unsigned int match_offset, | ||
90 | unsigned int match_len, | ||
91 | const char *rep_buffer, | ||
92 | unsigned int rep_len) | ||
93 | { | ||
94 | unsigned char *data; | ||
95 | |||
96 | BUG_ON(skb_is_nonlinear(skb)); | ||
97 | data = (unsigned char *)skb->nh.iph + dataoff; | ||
98 | |||
99 | /* move post-replacement */ | ||
100 | memmove(data + match_offset + rep_len, | ||
101 | data + match_offset + match_len, | ||
102 | skb->tail - (data + match_offset + match_len)); | ||
103 | |||
104 | /* insert data from buffer */ | ||
105 | memcpy(data + match_offset, rep_buffer, rep_len); | ||
106 | |||
107 | /* update skb info */ | ||
108 | if (rep_len > match_len) { | ||
109 | DEBUGP("ip_nat_mangle_packet: Extending packet by " | ||
110 | "%u from %u bytes\n", rep_len - match_len, | ||
111 | skb->len); | ||
112 | skb_put(skb, rep_len - match_len); | ||
113 | } else { | ||
114 | DEBUGP("ip_nat_mangle_packet: Shrinking packet from " | ||
115 | "%u from %u bytes\n", match_len - rep_len, | ||
116 | skb->len); | ||
117 | __skb_trim(skb, skb->len + rep_len - match_len); | ||
118 | } | ||
119 | |||
120 | /* fix IP hdr checksum information */ | ||
121 | skb->nh.iph->tot_len = htons(skb->len); | ||
122 | ip_send_check(skb->nh.iph); | ||
123 | } | ||
124 | |||
125 | /* Unusual, but possible case. */ | ||
126 | static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) | ||
127 | { | ||
128 | struct sk_buff *nskb; | ||
129 | |||
130 | if ((*pskb)->len + extra > 65535) | ||
131 | return 0; | ||
132 | |||
133 | nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); | ||
134 | if (!nskb) | ||
135 | return 0; | ||
136 | |||
137 | /* Transfer socket to new skb. */ | ||
138 | if ((*pskb)->sk) | ||
139 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
140 | kfree_skb(*pskb); | ||
141 | *pskb = nskb; | ||
142 | return 1; | ||
143 | } | ||
144 | |||
145 | /* Generic function for mangling variable-length address changes inside | ||
146 | * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX | ||
147 | * command in FTP). | ||
148 | * | ||
149 | * Takes care about all the nasty sequence number changes, checksumming, | ||
150 | * skb enlargement, ... | ||
151 | * | ||
152 | * */ | ||
153 | int | ||
154 | ip_nat_mangle_tcp_packet(struct sk_buff **pskb, | ||
155 | struct ip_conntrack *ct, | ||
156 | enum ip_conntrack_info ctinfo, | ||
157 | unsigned int match_offset, | ||
158 | unsigned int match_len, | ||
159 | const char *rep_buffer, | ||
160 | unsigned int rep_len) | ||
161 | { | ||
162 | struct iphdr *iph; | ||
163 | struct tcphdr *tcph; | ||
164 | int oldlen, datalen; | ||
165 | |||
166 | if (!skb_make_writable(pskb, (*pskb)->len)) | ||
167 | return 0; | ||
168 | |||
169 | if (rep_len > match_len | ||
170 | && rep_len - match_len > skb_tailroom(*pskb) | ||
171 | && !enlarge_skb(pskb, rep_len - match_len)) | ||
172 | return 0; | ||
173 | |||
174 | SKB_LINEAR_ASSERT(*pskb); | ||
175 | |||
176 | iph = (*pskb)->nh.iph; | ||
177 | tcph = (void *)iph + iph->ihl*4; | ||
178 | |||
179 | oldlen = (*pskb)->len - iph->ihl*4; | ||
180 | mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, | ||
181 | match_offset, match_len, rep_buffer, rep_len); | ||
182 | |||
183 | datalen = (*pskb)->len - iph->ihl*4; | ||
184 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | ||
185 | tcph->check = 0; | ||
186 | tcph->check = tcp_v4_check(datalen, | ||
187 | iph->saddr, iph->daddr, | ||
188 | csum_partial((char *)tcph, | ||
189 | datalen, 0)); | ||
190 | } else | ||
191 | nf_proto_csum_replace2(&tcph->check, *pskb, | ||
192 | htons(oldlen), htons(datalen), 1); | ||
193 | |||
194 | if (rep_len != match_len) { | ||
195 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | ||
196 | adjust_tcp_sequence(ntohl(tcph->seq), | ||
197 | (int)rep_len - (int)match_len, | ||
198 | ct, ctinfo); | ||
199 | /* Tell TCP window tracking about seq change */ | ||
200 | ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo)); | ||
201 | } | ||
202 | return 1; | ||
203 | } | ||
204 | EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); | ||
205 | |||
206 | /* Generic function for mangling variable-length address changes inside | ||
207 | * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX | ||
208 | * command in the Amanda protocol) | ||
209 | * | ||
210 | * Takes care about all the nasty sequence number changes, checksumming, | ||
211 | * skb enlargement, ... | ||
212 | * | ||
213 | * XXX - This function could be merged with ip_nat_mangle_tcp_packet which | ||
214 | * should be fairly easy to do. | ||
215 | */ | ||
216 | int | ||
217 | ip_nat_mangle_udp_packet(struct sk_buff **pskb, | ||
218 | struct ip_conntrack *ct, | ||
219 | enum ip_conntrack_info ctinfo, | ||
220 | unsigned int match_offset, | ||
221 | unsigned int match_len, | ||
222 | const char *rep_buffer, | ||
223 | unsigned int rep_len) | ||
224 | { | ||
225 | struct iphdr *iph; | ||
226 | struct udphdr *udph; | ||
227 | int datalen, oldlen; | ||
228 | |||
229 | /* UDP helpers might accidentally mangle the wrong packet */ | ||
230 | iph = (*pskb)->nh.iph; | ||
231 | if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + | ||
232 | match_offset + match_len) | ||
233 | return 0; | ||
234 | |||
235 | if (!skb_make_writable(pskb, (*pskb)->len)) | ||
236 | return 0; | ||
237 | |||
238 | if (rep_len > match_len | ||
239 | && rep_len - match_len > skb_tailroom(*pskb) | ||
240 | && !enlarge_skb(pskb, rep_len - match_len)) | ||
241 | return 0; | ||
242 | |||
243 | iph = (*pskb)->nh.iph; | ||
244 | udph = (void *)iph + iph->ihl*4; | ||
245 | |||
246 | oldlen = (*pskb)->len - iph->ihl*4; | ||
247 | mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), | ||
248 | match_offset, match_len, rep_buffer, rep_len); | ||
249 | |||
250 | /* update the length of the UDP packet */ | ||
251 | datalen = (*pskb)->len - iph->ihl*4; | ||
252 | udph->len = htons(datalen); | ||
253 | |||
254 | /* fix udp checksum if udp checksum was previously calculated */ | ||
255 | if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) | ||
256 | return 1; | ||
257 | |||
258 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | ||
259 | udph->check = 0; | ||
260 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
261 | datalen, IPPROTO_UDP, | ||
262 | csum_partial((char *)udph, | ||
263 | datalen, 0)); | ||
264 | if (!udph->check) | ||
265 | udph->check = CSUM_MANGLED_0; | ||
266 | } else | ||
267 | nf_proto_csum_replace2(&udph->check, *pskb, | ||
268 | htons(oldlen), htons(datalen), 1); | ||
269 | return 1; | ||
270 | } | ||
271 | EXPORT_SYMBOL(ip_nat_mangle_udp_packet); | ||
272 | |||
273 | /* Adjust one found SACK option including checksum correction */ | ||
274 | static void | ||
275 | sack_adjust(struct sk_buff *skb, | ||
276 | struct tcphdr *tcph, | ||
277 | unsigned int sackoff, | ||
278 | unsigned int sackend, | ||
279 | struct ip_nat_seq *natseq) | ||
280 | { | ||
281 | while (sackoff < sackend) { | ||
282 | struct tcp_sack_block_wire *sack; | ||
283 | __be32 new_start_seq, new_end_seq; | ||
284 | |||
285 | sack = (void *)skb->data + sackoff; | ||
286 | if (after(ntohl(sack->start_seq) - natseq->offset_before, | ||
287 | natseq->correction_pos)) | ||
288 | new_start_seq = htonl(ntohl(sack->start_seq) | ||
289 | - natseq->offset_after); | ||
290 | else | ||
291 | new_start_seq = htonl(ntohl(sack->start_seq) | ||
292 | - natseq->offset_before); | ||
293 | |||
294 | if (after(ntohl(sack->end_seq) - natseq->offset_before, | ||
295 | natseq->correction_pos)) | ||
296 | new_end_seq = htonl(ntohl(sack->end_seq) | ||
297 | - natseq->offset_after); | ||
298 | else | ||
299 | new_end_seq = htonl(ntohl(sack->end_seq) | ||
300 | - natseq->offset_before); | ||
301 | |||
302 | DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", | ||
303 | ntohl(sack->start_seq), new_start_seq, | ||
304 | ntohl(sack->end_seq), new_end_seq); | ||
305 | |||
306 | nf_proto_csum_replace4(&tcph->check, skb, | ||
307 | sack->start_seq, new_start_seq, 0); | ||
308 | nf_proto_csum_replace4(&tcph->check, skb, | ||
309 | sack->end_seq, new_end_seq, 0); | ||
310 | sack->start_seq = new_start_seq; | ||
311 | sack->end_seq = new_end_seq; | ||
312 | sackoff += sizeof(*sack); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | /* TCP SACK sequence number adjustment */ | ||
317 | static inline unsigned int | ||
318 | ip_nat_sack_adjust(struct sk_buff **pskb, | ||
319 | struct tcphdr *tcph, | ||
320 | struct ip_conntrack *ct, | ||
321 | enum ip_conntrack_info ctinfo) | ||
322 | { | ||
323 | unsigned int dir, optoff, optend; | ||
324 | |||
325 | optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); | ||
326 | optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; | ||
327 | |||
328 | if (!skb_make_writable(pskb, optend)) | ||
329 | return 0; | ||
330 | |||
331 | dir = CTINFO2DIR(ctinfo); | ||
332 | |||
333 | while (optoff < optend) { | ||
334 | /* Usually: option, length. */ | ||
335 | unsigned char *op = (*pskb)->data + optoff; | ||
336 | |||
337 | switch (op[0]) { | ||
338 | case TCPOPT_EOL: | ||
339 | return 1; | ||
340 | case TCPOPT_NOP: | ||
341 | optoff++; | ||
342 | continue; | ||
343 | default: | ||
344 | /* no partial options */ | ||
345 | if (optoff + 1 == optend | ||
346 | || optoff + op[1] > optend | ||
347 | || op[1] < 2) | ||
348 | return 0; | ||
349 | if (op[0] == TCPOPT_SACK | ||
350 | && op[1] >= 2+TCPOLEN_SACK_PERBLOCK | ||
351 | && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) | ||
352 | sack_adjust(*pskb, tcph, optoff+2, | ||
353 | optoff+op[1], | ||
354 | &ct->nat.info.seq[!dir]); | ||
355 | optoff += op[1]; | ||
356 | } | ||
357 | } | ||
358 | return 1; | ||
359 | } | ||
360 | |||
361 | /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ | ||
362 | int | ||
363 | ip_nat_seq_adjust(struct sk_buff **pskb, | ||
364 | struct ip_conntrack *ct, | ||
365 | enum ip_conntrack_info ctinfo) | ||
366 | { | ||
367 | struct tcphdr *tcph; | ||
368 | int dir; | ||
369 | __be32 newseq, newack; | ||
370 | struct ip_nat_seq *this_way, *other_way; | ||
371 | |||
372 | dir = CTINFO2DIR(ctinfo); | ||
373 | |||
374 | this_way = &ct->nat.info.seq[dir]; | ||
375 | other_way = &ct->nat.info.seq[!dir]; | ||
376 | |||
377 | if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) | ||
378 | return 0; | ||
379 | |||
380 | tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | ||
381 | if (after(ntohl(tcph->seq), this_way->correction_pos)) | ||
382 | newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); | ||
383 | else | ||
384 | newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); | ||
385 | |||
386 | if (after(ntohl(tcph->ack_seq) - other_way->offset_before, | ||
387 | other_way->correction_pos)) | ||
388 | newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); | ||
389 | else | ||
390 | newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); | ||
391 | |||
392 | nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); | ||
393 | nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); | ||
394 | |||
395 | DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", | ||
396 | ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), | ||
397 | ntohl(newack)); | ||
398 | |||
399 | tcph->seq = newseq; | ||
400 | tcph->ack_seq = newack; | ||
401 | |||
402 | if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo)) | ||
403 | return 0; | ||
404 | |||
405 | ip_conntrack_tcp_update(*pskb, ct, dir); | ||
406 | |||
407 | return 1; | ||
408 | } | ||
409 | EXPORT_SYMBOL(ip_nat_seq_adjust); | ||
410 | |||
411 | /* Setup NAT on this expected conntrack so it follows master. */ | ||
412 | /* If we fail to get a free NAT slot, we'll get dropped on confirm */ | ||
413 | void ip_nat_follow_master(struct ip_conntrack *ct, | ||
414 | struct ip_conntrack_expect *exp) | ||
415 | { | ||
416 | struct ip_nat_range range; | ||
417 | |||
418 | /* This must be a fresh one. */ | ||
419 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | ||
420 | |||
421 | /* Change src to where master sends to */ | ||
422 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
423 | range.min_ip = range.max_ip | ||
424 | = ct->master->tuplehash[!exp->dir].tuple.dst.ip; | ||
425 | /* hook doesn't matter, but it has to do source manip */ | ||
426 | ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); | ||
427 | |||
428 | /* For DST manip, map port here to where it's expected. */ | ||
429 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | ||
430 | range.min = range.max = exp->saved_proto; | ||
431 | range.min_ip = range.max_ip | ||
432 | = ct->master->tuplehash[!exp->dir].tuple.src.ip; | ||
433 | /* hook doesn't matter, but it has to do destination manip */ | ||
434 | ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); | ||
435 | } | ||
436 | EXPORT_SYMBOL(ip_nat_follow_master); | ||
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c deleted file mode 100644 index bdc99ef6159e..000000000000 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ /dev/null | |||
@@ -1,611 +0,0 @@ | |||
1 | /* | ||
2 | * H.323 extension for NAT alteration. | ||
3 | * | ||
4 | * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> | ||
5 | * | ||
6 | * This source code is licensed under General Public License version 2. | ||
7 | * | ||
8 | * Based on the 'brute force' H.323 NAT module by | ||
9 | * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/netfilter_ipv4.h> | ||
14 | #include <linux/netfilter.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/tcp.h> | ||
17 | #include <linux/moduleparam.h> | ||
18 | #include <net/tcp.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
20 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
21 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
22 | #include <linux/netfilter_ipv4/ip_conntrack_tuple.h> | ||
23 | #include <linux/netfilter_ipv4/ip_conntrack_h323.h> | ||
24 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
25 | |||
26 | #if 0 | ||
27 | #define DEBUGP printk | ||
28 | #else | ||
29 | #define DEBUGP(format, args...) | ||
30 | #endif | ||
31 | |||
32 | /****************************************************************************/ | ||
33 | static int set_addr(struct sk_buff **pskb, | ||
34 | unsigned char **data, int dataoff, | ||
35 | unsigned int addroff, __be32 ip, u_int16_t port) | ||
36 | { | ||
37 | enum ip_conntrack_info ctinfo; | ||
38 | struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo); | ||
39 | struct { | ||
40 | __be32 ip; | ||
41 | __be16 port; | ||
42 | } __attribute__ ((__packed__)) buf; | ||
43 | struct tcphdr _tcph, *th; | ||
44 | |||
45 | buf.ip = ip; | ||
46 | buf.port = htons(port); | ||
47 | addroff += dataoff; | ||
48 | |||
49 | if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) { | ||
50 | if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
51 | addroff, sizeof(buf), | ||
52 | (char *) &buf, sizeof(buf))) { | ||
53 | if (net_ratelimit()) | ||
54 | printk("ip_nat_h323: ip_nat_mangle_tcp_packet" | ||
55 | " error\n"); | ||
56 | return -1; | ||
57 | } | ||
58 | |||
59 | /* Relocate data pointer */ | ||
60 | th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, | ||
61 | sizeof(_tcph), &_tcph); | ||
62 | if (th == NULL) | ||
63 | return -1; | ||
64 | *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + | ||
65 | th->doff * 4 + dataoff; | ||
66 | } else { | ||
67 | if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, | ||
68 | addroff, sizeof(buf), | ||
69 | (char *) &buf, sizeof(buf))) { | ||
70 | if (net_ratelimit()) | ||
71 | printk("ip_nat_h323: ip_nat_mangle_udp_packet" | ||
72 | " error\n"); | ||
73 | return -1; | ||
74 | } | ||
75 | /* ip_nat_mangle_udp_packet uses skb_make_writable() to copy | ||
76 | * or pull everything in a linear buffer, so we can safely | ||
77 | * use the skb pointers now */ | ||
78 | *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + | ||
79 | sizeof(struct udphdr); | ||
80 | } | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | /****************************************************************************/ | ||
86 | static int set_h225_addr(struct sk_buff **pskb, | ||
87 | unsigned char **data, int dataoff, | ||
88 | TransportAddress * addr, | ||
89 | __be32 ip, u_int16_t port) | ||
90 | { | ||
91 | return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port); | ||
92 | } | ||
93 | |||
94 | /****************************************************************************/ | ||
95 | static int set_h245_addr(struct sk_buff **pskb, | ||
96 | unsigned char **data, int dataoff, | ||
97 | H245_TransportAddress * addr, | ||
98 | __be32 ip, u_int16_t port) | ||
99 | { | ||
100 | return set_addr(pskb, data, dataoff, | ||
101 | addr->unicastAddress.iPAddress.network, ip, port); | ||
102 | } | ||
103 | |||
104 | /****************************************************************************/ | ||
105 | static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
106 | enum ip_conntrack_info ctinfo, | ||
107 | unsigned char **data, | ||
108 | TransportAddress * addr, int count) | ||
109 | { | ||
110 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
111 | int dir = CTINFO2DIR(ctinfo); | ||
112 | int i; | ||
113 | __be32 ip; | ||
114 | u_int16_t port; | ||
115 | |||
116 | for (i = 0; i < count; i++) { | ||
117 | if (get_h225_addr(*data, &addr[i], &ip, &port)) { | ||
118 | if (ip == ct->tuplehash[dir].tuple.src.ip && | ||
119 | port == info->sig_port[dir]) { | ||
120 | /* GW->GK */ | ||
121 | |||
122 | /* Fix for Gnomemeeting */ | ||
123 | if (i > 0 && | ||
124 | get_h225_addr(*data, &addr[0], | ||
125 | &ip, &port) && | ||
126 | (ntohl(ip) & 0xff000000) == 0x7f000000) | ||
127 | i = 0; | ||
128 | |||
129 | DEBUGP | ||
130 | ("ip_nat_ras: set signal address " | ||
131 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
132 | NIPQUAD(ip), port, | ||
133 | NIPQUAD(ct->tuplehash[!dir].tuple.dst. | ||
134 | ip), info->sig_port[!dir]); | ||
135 | return set_h225_addr(pskb, data, 0, &addr[i], | ||
136 | ct->tuplehash[!dir]. | ||
137 | tuple.dst.ip, | ||
138 | info->sig_port[!dir]); | ||
139 | } else if (ip == ct->tuplehash[dir].tuple.dst.ip && | ||
140 | port == info->sig_port[dir]) { | ||
141 | /* GK->GW */ | ||
142 | DEBUGP | ||
143 | ("ip_nat_ras: set signal address " | ||
144 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
145 | NIPQUAD(ip), port, | ||
146 | NIPQUAD(ct->tuplehash[!dir].tuple.src. | ||
147 | ip), info->sig_port[!dir]); | ||
148 | return set_h225_addr(pskb, data, 0, &addr[i], | ||
149 | ct->tuplehash[!dir]. | ||
150 | tuple.src.ip, | ||
151 | info->sig_port[!dir]); | ||
152 | } | ||
153 | } | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | /****************************************************************************/ | ||
160 | static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
161 | enum ip_conntrack_info ctinfo, | ||
162 | unsigned char **data, | ||
163 | TransportAddress * addr, int count) | ||
164 | { | ||
165 | int dir = CTINFO2DIR(ctinfo); | ||
166 | int i; | ||
167 | __be32 ip; | ||
168 | u_int16_t port; | ||
169 | |||
170 | for (i = 0; i < count; i++) { | ||
171 | if (get_h225_addr(*data, &addr[i], &ip, &port) && | ||
172 | ip == ct->tuplehash[dir].tuple.src.ip && | ||
173 | port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) { | ||
174 | DEBUGP("ip_nat_ras: set rasAddress " | ||
175 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
176 | NIPQUAD(ip), port, | ||
177 | NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip), | ||
178 | ntohs(ct->tuplehash[!dir].tuple.dst.u.udp. | ||
179 | port)); | ||
180 | return set_h225_addr(pskb, data, 0, &addr[i], | ||
181 | ct->tuplehash[!dir].tuple.dst.ip, | ||
182 | ntohs(ct->tuplehash[!dir].tuple. | ||
183 | dst.u.udp.port)); | ||
184 | } | ||
185 | } | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | /****************************************************************************/ | ||
191 | static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
192 | enum ip_conntrack_info ctinfo, | ||
193 | unsigned char **data, int dataoff, | ||
194 | H245_TransportAddress * addr, | ||
195 | u_int16_t port, u_int16_t rtp_port, | ||
196 | struct ip_conntrack_expect *rtp_exp, | ||
197 | struct ip_conntrack_expect *rtcp_exp) | ||
198 | { | ||
199 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
200 | int dir = CTINFO2DIR(ctinfo); | ||
201 | int i; | ||
202 | u_int16_t nated_port; | ||
203 | |||
204 | /* Set expectations for NAT */ | ||
205 | rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; | ||
206 | rtp_exp->expectfn = ip_nat_follow_master; | ||
207 | rtp_exp->dir = !dir; | ||
208 | rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; | ||
209 | rtcp_exp->expectfn = ip_nat_follow_master; | ||
210 | rtcp_exp->dir = !dir; | ||
211 | |||
212 | /* Lookup existing expects */ | ||
213 | for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) { | ||
214 | if (info->rtp_port[i][dir] == rtp_port) { | ||
215 | /* Expected */ | ||
216 | |||
217 | /* Use allocated ports first. This will refresh | ||
218 | * the expects */ | ||
219 | rtp_exp->tuple.dst.u.udp.port = | ||
220 | htons(info->rtp_port[i][dir]); | ||
221 | rtcp_exp->tuple.dst.u.udp.port = | ||
222 | htons(info->rtp_port[i][dir] + 1); | ||
223 | break; | ||
224 | } else if (info->rtp_port[i][dir] == 0) { | ||
225 | /* Not expected */ | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | /* Run out of expectations */ | ||
231 | if (i >= H323_RTP_CHANNEL_MAX) { | ||
232 | if (net_ratelimit()) | ||
233 | printk("ip_nat_h323: out of expectations\n"); | ||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | /* Try to get a pair of ports. */ | ||
238 | for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); | ||
239 | nated_port != 0; nated_port += 2) { | ||
240 | rtp_exp->tuple.dst.u.udp.port = htons(nated_port); | ||
241 | if (ip_conntrack_expect_related(rtp_exp) == 0) { | ||
242 | rtcp_exp->tuple.dst.u.udp.port = | ||
243 | htons(nated_port + 1); | ||
244 | if (ip_conntrack_expect_related(rtcp_exp) == 0) | ||
245 | break; | ||
246 | ip_conntrack_unexpect_related(rtp_exp); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | if (nated_port == 0) { /* No port available */ | ||
251 | if (net_ratelimit()) | ||
252 | printk("ip_nat_h323: out of RTP ports\n"); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | /* Modify signal */ | ||
257 | if (set_h245_addr(pskb, data, dataoff, addr, | ||
258 | ct->tuplehash[!dir].tuple.dst.ip, | ||
259 | (port & 1) ? nated_port + 1 : nated_port) == 0) { | ||
260 | /* Save ports */ | ||
261 | info->rtp_port[i][dir] = rtp_port; | ||
262 | info->rtp_port[i][!dir] = nated_port; | ||
263 | } else { | ||
264 | ip_conntrack_unexpect_related(rtp_exp); | ||
265 | ip_conntrack_unexpect_related(rtcp_exp); | ||
266 | return -1; | ||
267 | } | ||
268 | |||
269 | /* Success */ | ||
270 | DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
271 | NIPQUAD(rtp_exp->tuple.src.ip), | ||
272 | ntohs(rtp_exp->tuple.src.u.udp.port), | ||
273 | NIPQUAD(rtp_exp->tuple.dst.ip), | ||
274 | ntohs(rtp_exp->tuple.dst.u.udp.port)); | ||
275 | DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
276 | NIPQUAD(rtcp_exp->tuple.src.ip), | ||
277 | ntohs(rtcp_exp->tuple.src.u.udp.port), | ||
278 | NIPQUAD(rtcp_exp->tuple.dst.ip), | ||
279 | ntohs(rtcp_exp->tuple.dst.u.udp.port)); | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | /****************************************************************************/ | ||
285 | static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
286 | enum ip_conntrack_info ctinfo, | ||
287 | unsigned char **data, int dataoff, | ||
288 | H245_TransportAddress * addr, u_int16_t port, | ||
289 | struct ip_conntrack_expect *exp) | ||
290 | { | ||
291 | int dir = CTINFO2DIR(ctinfo); | ||
292 | u_int16_t nated_port = port; | ||
293 | |||
294 | /* Set expectations for NAT */ | ||
295 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
296 | exp->expectfn = ip_nat_follow_master; | ||
297 | exp->dir = !dir; | ||
298 | |||
299 | /* Try to get same port: if not, try to change it. */ | ||
300 | for (; nated_port != 0; nated_port++) { | ||
301 | exp->tuple.dst.u.tcp.port = htons(nated_port); | ||
302 | if (ip_conntrack_expect_related(exp) == 0) | ||
303 | break; | ||
304 | } | ||
305 | |||
306 | if (nated_port == 0) { /* No port available */ | ||
307 | if (net_ratelimit()) | ||
308 | printk("ip_nat_h323: out of TCP ports\n"); | ||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | /* Modify signal */ | ||
313 | if (set_h245_addr(pskb, data, dataoff, addr, | ||
314 | ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) { | ||
315 | ip_conntrack_unexpect_related(exp); | ||
316 | return -1; | ||
317 | } | ||
318 | |||
319 | DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
320 | NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), | ||
321 | NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); | ||
322 | |||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | /**************************************************************************** | ||
327 | * This conntrack expect function replaces ip_conntrack_h245_expect() | ||
328 | * which was set by ip_conntrack_helper_h323.c. It calls both | ||
329 | * ip_nat_follow_master() and ip_conntrack_h245_expect() | ||
330 | ****************************************************************************/ | ||
331 | static void ip_nat_h245_expect(struct ip_conntrack *new, | ||
332 | struct ip_conntrack_expect *this) | ||
333 | { | ||
334 | ip_nat_follow_master(new, this); | ||
335 | ip_conntrack_h245_expect(new, this); | ||
336 | } | ||
337 | |||
338 | /****************************************************************************/ | ||
339 | static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
340 | enum ip_conntrack_info ctinfo, | ||
341 | unsigned char **data, int dataoff, | ||
342 | TransportAddress * addr, u_int16_t port, | ||
343 | struct ip_conntrack_expect *exp) | ||
344 | { | ||
345 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
346 | int dir = CTINFO2DIR(ctinfo); | ||
347 | u_int16_t nated_port = port; | ||
348 | |||
349 | /* Set expectations for NAT */ | ||
350 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
351 | exp->expectfn = ip_nat_h245_expect; | ||
352 | exp->dir = !dir; | ||
353 | |||
354 | /* Check existing expects */ | ||
355 | if (info->sig_port[dir] == port) | ||
356 | nated_port = info->sig_port[!dir]; | ||
357 | |||
358 | /* Try to get same port: if not, try to change it. */ | ||
359 | for (; nated_port != 0; nated_port++) { | ||
360 | exp->tuple.dst.u.tcp.port = htons(nated_port); | ||
361 | if (ip_conntrack_expect_related(exp) == 0) | ||
362 | break; | ||
363 | } | ||
364 | |||
365 | if (nated_port == 0) { /* No port available */ | ||
366 | if (net_ratelimit()) | ||
367 | printk("ip_nat_q931: out of TCP ports\n"); | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* Modify signal */ | ||
372 | if (set_h225_addr(pskb, data, dataoff, addr, | ||
373 | ct->tuplehash[!dir].tuple.dst.ip, | ||
374 | nated_port) == 0) { | ||
375 | /* Save ports */ | ||
376 | info->sig_port[dir] = port; | ||
377 | info->sig_port[!dir] = nated_port; | ||
378 | } else { | ||
379 | ip_conntrack_unexpect_related(exp); | ||
380 | return -1; | ||
381 | } | ||
382 | |||
383 | DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
384 | NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), | ||
385 | NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); | ||
386 | |||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | /**************************************************************************** | ||
391 | * This conntrack expect function replaces ip_conntrack_q931_expect() | ||
392 | * which was set by ip_conntrack_helper_h323.c. | ||
393 | ****************************************************************************/ | ||
394 | static void ip_nat_q931_expect(struct ip_conntrack *new, | ||
395 | struct ip_conntrack_expect *this) | ||
396 | { | ||
397 | struct ip_nat_range range; | ||
398 | |||
399 | if (this->tuple.src.ip != 0) { /* Only accept calls from GK */ | ||
400 | ip_nat_follow_master(new, this); | ||
401 | goto out; | ||
402 | } | ||
403 | |||
404 | /* This must be a fresh one. */ | ||
405 | BUG_ON(new->status & IPS_NAT_DONE_MASK); | ||
406 | |||
407 | /* Change src to where master sends to */ | ||
408 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
409 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip; | ||
410 | |||
411 | /* hook doesn't matter, but it has to do source manip */ | ||
412 | ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING); | ||
413 | |||
414 | /* For DST manip, map port here to where it's expected. */ | ||
415 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | ||
416 | range.min = range.max = this->saved_proto; | ||
417 | range.min_ip = range.max_ip = | ||
418 | new->master->tuplehash[!this->dir].tuple.src.ip; | ||
419 | |||
420 | /* hook doesn't matter, but it has to do destination manip */ | ||
421 | ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING); | ||
422 | |||
423 | out: | ||
424 | ip_conntrack_q931_expect(new, this); | ||
425 | } | ||
426 | |||
427 | /****************************************************************************/ | ||
428 | static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
429 | enum ip_conntrack_info ctinfo, | ||
430 | unsigned char **data, TransportAddress * addr, int idx, | ||
431 | u_int16_t port, struct ip_conntrack_expect *exp) | ||
432 | { | ||
433 | struct ip_ct_h323_master *info = &ct->help.ct_h323_info; | ||
434 | int dir = CTINFO2DIR(ctinfo); | ||
435 | u_int16_t nated_port = port; | ||
436 | __be32 ip; | ||
437 | |||
438 | /* Set expectations for NAT */ | ||
439 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
440 | exp->expectfn = ip_nat_q931_expect; | ||
441 | exp->dir = !dir; | ||
442 | |||
443 | /* Check existing expects */ | ||
444 | if (info->sig_port[dir] == port) | ||
445 | nated_port = info->sig_port[!dir]; | ||
446 | |||
447 | /* Try to get same port: if not, try to change it. */ | ||
448 | for (; nated_port != 0; nated_port++) { | ||
449 | exp->tuple.dst.u.tcp.port = htons(nated_port); | ||
450 | if (ip_conntrack_expect_related(exp) == 0) | ||
451 | break; | ||
452 | } | ||
453 | |||
454 | if (nated_port == 0) { /* No port available */ | ||
455 | if (net_ratelimit()) | ||
456 | printk("ip_nat_ras: out of TCP ports\n"); | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | /* Modify signal */ | ||
461 | if (set_h225_addr(pskb, data, 0, &addr[idx], | ||
462 | ct->tuplehash[!dir].tuple.dst.ip, | ||
463 | nated_port) == 0) { | ||
464 | /* Save ports */ | ||
465 | info->sig_port[dir] = port; | ||
466 | info->sig_port[!dir] = nated_port; | ||
467 | |||
468 | /* Fix for Gnomemeeting */ | ||
469 | if (idx > 0 && | ||
470 | get_h225_addr(*data, &addr[0], &ip, &port) && | ||
471 | (ntohl(ip) & 0xff000000) == 0x7f000000) { | ||
472 | set_h225_addr_hook(pskb, data, 0, &addr[0], | ||
473 | ct->tuplehash[!dir].tuple.dst.ip, | ||
474 | info->sig_port[!dir]); | ||
475 | } | ||
476 | } else { | ||
477 | ip_conntrack_unexpect_related(exp); | ||
478 | return -1; | ||
479 | } | ||
480 | |||
481 | /* Success */ | ||
482 | DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
483 | NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), | ||
484 | NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); | ||
485 | |||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | /****************************************************************************/ | ||
490 | static void ip_nat_callforwarding_expect(struct ip_conntrack *new, | ||
491 | struct ip_conntrack_expect *this) | ||
492 | { | ||
493 | struct ip_nat_range range; | ||
494 | |||
495 | /* This must be a fresh one. */ | ||
496 | BUG_ON(new->status & IPS_NAT_DONE_MASK); | ||
497 | |||
498 | /* Change src to where master sends to */ | ||
499 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
500 | range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip; | ||
501 | |||
502 | /* hook doesn't matter, but it has to do source manip */ | ||
503 | ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING); | ||
504 | |||
505 | /* For DST manip, map port here to where it's expected. */ | ||
506 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | ||
507 | range.min = range.max = this->saved_proto; | ||
508 | range.min_ip = range.max_ip = this->saved_ip; | ||
509 | |||
510 | /* hook doesn't matter, but it has to do destination manip */ | ||
511 | ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING); | ||
512 | |||
513 | ip_conntrack_q931_expect(new, this); | ||
514 | } | ||
515 | |||
516 | /****************************************************************************/ | ||
517 | static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct, | ||
518 | enum ip_conntrack_info ctinfo, | ||
519 | unsigned char **data, int dataoff, | ||
520 | TransportAddress * addr, u_int16_t port, | ||
521 | struct ip_conntrack_expect *exp) | ||
522 | { | ||
523 | int dir = CTINFO2DIR(ctinfo); | ||
524 | u_int16_t nated_port; | ||
525 | |||
526 | /* Set expectations for NAT */ | ||
527 | exp->saved_ip = exp->tuple.dst.ip; | ||
528 | exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; | ||
529 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
530 | exp->expectfn = ip_nat_callforwarding_expect; | ||
531 | exp->dir = !dir; | ||
532 | |||
533 | /* Try to get same port: if not, try to change it. */ | ||
534 | for (nated_port = port; nated_port != 0; nated_port++) { | ||
535 | exp->tuple.dst.u.tcp.port = htons(nated_port); | ||
536 | if (ip_conntrack_expect_related(exp) == 0) | ||
537 | break; | ||
538 | } | ||
539 | |||
540 | if (nated_port == 0) { /* No port available */ | ||
541 | if (net_ratelimit()) | ||
542 | printk("ip_nat_q931: out of TCP ports\n"); | ||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | /* Modify signal */ | ||
547 | if (!set_h225_addr(pskb, data, dataoff, addr, | ||
548 | ct->tuplehash[!dir].tuple.dst.ip, | ||
549 | nated_port) == 0) { | ||
550 | ip_conntrack_unexpect_related(exp); | ||
551 | return -1; | ||
552 | } | ||
553 | |||
554 | /* Success */ | ||
555 | DEBUGP("ip_nat_q931: expect Call Forwarding " | ||
556 | "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", | ||
557 | NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), | ||
558 | NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); | ||
559 | |||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | /****************************************************************************/ | ||
564 | static int __init init(void) | ||
565 | { | ||
566 | BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); | ||
567 | BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); | ||
568 | BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); | ||
569 | BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); | ||
570 | BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); | ||
571 | BUG_ON(rcu_dereference(nat_t120_hook) != NULL); | ||
572 | BUG_ON(rcu_dereference(nat_h245_hook) != NULL); | ||
573 | BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); | ||
574 | BUG_ON(rcu_dereference(nat_q931_hook) != NULL); | ||
575 | |||
576 | rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); | ||
577 | rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); | ||
578 | rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); | ||
579 | rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); | ||
580 | rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); | ||
581 | rcu_assign_pointer(nat_t120_hook, nat_t120); | ||
582 | rcu_assign_pointer(nat_h245_hook, nat_h245); | ||
583 | rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); | ||
584 | rcu_assign_pointer(nat_q931_hook, nat_q931); | ||
585 | |||
586 | DEBUGP("ip_nat_h323: init success\n"); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | /****************************************************************************/ | ||
591 | static void __exit fini(void) | ||
592 | { | ||
593 | rcu_assign_pointer(set_h245_addr_hook, NULL); | ||
594 | rcu_assign_pointer(set_h225_addr_hook, NULL); | ||
595 | rcu_assign_pointer(set_sig_addr_hook, NULL); | ||
596 | rcu_assign_pointer(set_ras_addr_hook, NULL); | ||
597 | rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); | ||
598 | rcu_assign_pointer(nat_t120_hook, NULL); | ||
599 | rcu_assign_pointer(nat_h245_hook, NULL); | ||
600 | rcu_assign_pointer(nat_callforwarding_hook, NULL); | ||
601 | rcu_assign_pointer(nat_q931_hook, NULL); | ||
602 | synchronize_rcu(); | ||
603 | } | ||
604 | |||
605 | /****************************************************************************/ | ||
606 | module_init(init); | ||
607 | module_exit(fini); | ||
608 | |||
609 | MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); | ||
610 | MODULE_DESCRIPTION("H.323 NAT helper"); | ||
611 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c deleted file mode 100644 index 24ce4a5023d7..000000000000 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ /dev/null | |||
@@ -1,350 +0,0 @@ | |||
1 | /* | ||
2 | * ip_nat_pptp.c - Version 3.0 | ||
3 | * | ||
4 | * NAT support for PPTP (Point to Point Tunneling Protocol). | ||
5 | * PPTP is a a protocol for creating virtual private networks. | ||
6 | * It is a specification defined by Microsoft and some vendors | ||
7 | * working with Microsoft. PPTP is built on top of a modified | ||
8 | * version of the Internet Generic Routing Encapsulation Protocol. | ||
9 | * GRE is defined in RFC 1701 and RFC 1702. Documentation of | ||
10 | * PPTP can be found in RFC 2637 | ||
11 | * | ||
12 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
13 | * | ||
14 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
15 | * | ||
16 | * TODO: - NAT to a unique tuple, not to TCP source port | ||
17 | * (needs netfilter tuple reservation) | ||
18 | * | ||
19 | * Changes: | ||
20 | * 2002-02-10 - Version 1.3 | ||
21 | * - Use ip_nat_mangle_tcp_packet() because of cloned skb's | ||
22 | * in local connections (Philip Craig <philipc@snapgear.com>) | ||
23 | * - add checks for magicCookie and pptp version | ||
24 | * - make argument list of pptp_{out,in}bound_packet() shorter | ||
25 | * - move to C99 style initializers | ||
26 | * - print version number at module loadtime | ||
27 | * 2003-09-22 - Version 1.5 | ||
28 | * - use SNATed tcp sourceport as callid, since we get called before | ||
29 | * TCP header is mangled (Philip Craig <philipc@snapgear.com>) | ||
30 | * 2004-10-22 - Version 2.0 | ||
31 | * - kernel 2.6.x version | ||
32 | * 2005-06-10 - Version 3.0 | ||
33 | * - kernel >= 2.6.11 version, | ||
34 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) | ||
35 | * | ||
36 | */ | ||
37 | |||
38 | #include <linux/module.h> | ||
39 | #include <linux/ip.h> | ||
40 | #include <linux/tcp.h> | ||
41 | #include <net/tcp.h> | ||
42 | |||
43 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
44 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
45 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
46 | #include <linux/netfilter_ipv4/ip_nat_pptp.h> | ||
47 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
48 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
49 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
50 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
51 | |||
52 | #define IP_NAT_PPTP_VERSION "3.0" | ||
53 | |||
54 | #define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) | ||
55 | |||
56 | MODULE_LICENSE("GPL"); | ||
57 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
58 | MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); | ||
59 | |||
60 | |||
61 | #if 0 | ||
62 | extern const char *pptp_msg_name[]; | ||
63 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ | ||
64 | __FUNCTION__, ## args) | ||
65 | #else | ||
66 | #define DEBUGP(format, args...) | ||
67 | #endif | ||
68 | |||
69 | static void pptp_nat_expected(struct ip_conntrack *ct, | ||
70 | struct ip_conntrack_expect *exp) | ||
71 | { | ||
72 | struct ip_conntrack *master = ct->master; | ||
73 | struct ip_conntrack_expect *other_exp; | ||
74 | struct ip_conntrack_tuple t; | ||
75 | struct ip_ct_pptp_master *ct_pptp_info; | ||
76 | struct ip_nat_pptp *nat_pptp_info; | ||
77 | struct ip_nat_range range; | ||
78 | |||
79 | ct_pptp_info = &master->help.ct_pptp_info; | ||
80 | nat_pptp_info = &master->nat.help.nat_pptp_info; | ||
81 | |||
82 | /* And here goes the grand finale of corrosion... */ | ||
83 | |||
84 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | ||
85 | DEBUGP("we are PNS->PAC\n"); | ||
86 | /* therefore, build tuple for PAC->PNS */ | ||
87 | t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; | ||
88 | t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; | ||
89 | t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; | ||
90 | t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; | ||
91 | t.dst.protonum = IPPROTO_GRE; | ||
92 | } else { | ||
93 | DEBUGP("we are PAC->PNS\n"); | ||
94 | /* build tuple for PNS->PAC */ | ||
95 | t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | ||
96 | t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; | ||
97 | t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | ||
98 | t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; | ||
99 | t.dst.protonum = IPPROTO_GRE; | ||
100 | } | ||
101 | |||
102 | DEBUGP("trying to unexpect other dir: "); | ||
103 | DUMP_TUPLE(&t); | ||
104 | other_exp = ip_conntrack_expect_find_get(&t); | ||
105 | if (other_exp) { | ||
106 | ip_conntrack_unexpect_related(other_exp); | ||
107 | ip_conntrack_expect_put(other_exp); | ||
108 | DEBUGP("success\n"); | ||
109 | } else { | ||
110 | DEBUGP("not found!\n"); | ||
111 | } | ||
112 | |||
113 | /* This must be a fresh one. */ | ||
114 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | ||
115 | |||
116 | /* Change src to where master sends to */ | ||
117 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
118 | range.min_ip = range.max_ip | ||
119 | = ct->master->tuplehash[!exp->dir].tuple.dst.ip; | ||
120 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | ||
121 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
122 | range.min = range.max = exp->saved_proto; | ||
123 | } | ||
124 | /* hook doesn't matter, but it has to do source manip */ | ||
125 | ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); | ||
126 | |||
127 | /* For DST manip, map port here to where it's expected. */ | ||
128 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
129 | range.min_ip = range.max_ip | ||
130 | = ct->master->tuplehash[!exp->dir].tuple.src.ip; | ||
131 | if (exp->dir == IP_CT_DIR_REPLY) { | ||
132 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
133 | range.min = range.max = exp->saved_proto; | ||
134 | } | ||
135 | /* hook doesn't matter, but it has to do destination manip */ | ||
136 | ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); | ||
137 | } | ||
138 | |||
139 | /* outbound packets == from PNS to PAC */ | ||
140 | static int | ||
141 | pptp_outbound_pkt(struct sk_buff **pskb, | ||
142 | struct ip_conntrack *ct, | ||
143 | enum ip_conntrack_info ctinfo, | ||
144 | struct PptpControlHeader *ctlh, | ||
145 | union pptp_ctrl_union *pptpReq) | ||
146 | |||
147 | { | ||
148 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; | ||
149 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | ||
150 | u_int16_t msg; | ||
151 | __be16 new_callid; | ||
152 | unsigned int cid_off; | ||
153 | |||
154 | new_callid = ct_pptp_info->pns_call_id; | ||
155 | |||
156 | switch (msg = ntohs(ctlh->messageType)) { | ||
157 | case PPTP_OUT_CALL_REQUEST: | ||
158 | cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); | ||
159 | /* FIXME: ideally we would want to reserve a call ID | ||
160 | * here. current netfilter NAT core is not able to do | ||
161 | * this :( For now we use TCP source port. This breaks | ||
162 | * multiple calls within one control session */ | ||
163 | |||
164 | /* save original call ID in nat_info */ | ||
165 | nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; | ||
166 | |||
167 | /* don't use tcph->source since we are at a DSTmanip | ||
168 | * hook (e.g. PREROUTING) and pkt is not mangled yet */ | ||
169 | new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; | ||
170 | |||
171 | /* save new call ID in ct info */ | ||
172 | ct_pptp_info->pns_call_id = new_callid; | ||
173 | break; | ||
174 | case PPTP_IN_CALL_REPLY: | ||
175 | cid_off = offsetof(union pptp_ctrl_union, icack.callID); | ||
176 | break; | ||
177 | case PPTP_CALL_CLEAR_REQUEST: | ||
178 | cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); | ||
179 | break; | ||
180 | default: | ||
181 | DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, | ||
182 | (msg <= PPTP_MSG_MAX)? | ||
183 | pptp_msg_name[msg]:pptp_msg_name[0]); | ||
184 | /* fall through */ | ||
185 | |||
186 | case PPTP_SET_LINK_INFO: | ||
187 | /* only need to NAT in case PAC is behind NAT box */ | ||
188 | case PPTP_START_SESSION_REQUEST: | ||
189 | case PPTP_START_SESSION_REPLY: | ||
190 | case PPTP_STOP_SESSION_REQUEST: | ||
191 | case PPTP_STOP_SESSION_REPLY: | ||
192 | case PPTP_ECHO_REQUEST: | ||
193 | case PPTP_ECHO_REPLY: | ||
194 | /* no need to alter packet */ | ||
195 | return NF_ACCEPT; | ||
196 | } | ||
197 | |||
198 | /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass | ||
199 | * down to here */ | ||
200 | DEBUGP("altering call id from 0x%04x to 0x%04x\n", | ||
201 | ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); | ||
202 | |||
203 | /* mangle packet */ | ||
204 | if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
205 | cid_off + sizeof(struct pptp_pkt_hdr) + | ||
206 | sizeof(struct PptpControlHeader), | ||
207 | sizeof(new_callid), (char *)&new_callid, | ||
208 | sizeof(new_callid)) == 0) | ||
209 | return NF_DROP; | ||
210 | |||
211 | return NF_ACCEPT; | ||
212 | } | ||
213 | |||
214 | static void | ||
215 | pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | ||
216 | struct ip_conntrack_expect *expect_reply) | ||
217 | { | ||
218 | struct ip_conntrack *ct = expect_orig->master; | ||
219 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; | ||
220 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | ||
221 | |||
222 | /* save original PAC call ID in nat_info */ | ||
223 | nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; | ||
224 | |||
225 | /* alter expectation for PNS->PAC direction */ | ||
226 | expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; | ||
227 | expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; | ||
228 | expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; | ||
229 | expect_orig->dir = IP_CT_DIR_ORIGINAL; | ||
230 | |||
231 | /* alter expectation for PAC->PNS direction */ | ||
232 | expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; | ||
233 | expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; | ||
234 | expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; | ||
235 | expect_reply->dir = IP_CT_DIR_REPLY; | ||
236 | } | ||
237 | |||
238 | /* inbound packets == from PAC to PNS */ | ||
239 | static int | ||
240 | pptp_inbound_pkt(struct sk_buff **pskb, | ||
241 | struct ip_conntrack *ct, | ||
242 | enum ip_conntrack_info ctinfo, | ||
243 | struct PptpControlHeader *ctlh, | ||
244 | union pptp_ctrl_union *pptpReq) | ||
245 | { | ||
246 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | ||
247 | u_int16_t msg; | ||
248 | __be16 new_pcid; | ||
249 | unsigned int pcid_off; | ||
250 | |||
251 | new_pcid = nat_pptp_info->pns_call_id; | ||
252 | |||
253 | switch (msg = ntohs(ctlh->messageType)) { | ||
254 | case PPTP_OUT_CALL_REPLY: | ||
255 | pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); | ||
256 | break; | ||
257 | case PPTP_IN_CALL_CONNECT: | ||
258 | pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); | ||
259 | break; | ||
260 | case PPTP_IN_CALL_REQUEST: | ||
261 | /* only need to nat in case PAC is behind NAT box */ | ||
262 | return NF_ACCEPT; | ||
263 | case PPTP_WAN_ERROR_NOTIFY: | ||
264 | pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); | ||
265 | break; | ||
266 | case PPTP_CALL_DISCONNECT_NOTIFY: | ||
267 | pcid_off = offsetof(union pptp_ctrl_union, disc.callID); | ||
268 | break; | ||
269 | case PPTP_SET_LINK_INFO: | ||
270 | pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); | ||
271 | break; | ||
272 | |||
273 | default: | ||
274 | DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? | ||
275 | pptp_msg_name[msg]:pptp_msg_name[0]); | ||
276 | /* fall through */ | ||
277 | |||
278 | case PPTP_START_SESSION_REQUEST: | ||
279 | case PPTP_START_SESSION_REPLY: | ||
280 | case PPTP_STOP_SESSION_REQUEST: | ||
281 | case PPTP_STOP_SESSION_REPLY: | ||
282 | case PPTP_ECHO_REQUEST: | ||
283 | case PPTP_ECHO_REPLY: | ||
284 | /* no need to alter packet */ | ||
285 | return NF_ACCEPT; | ||
286 | } | ||
287 | |||
288 | /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, | ||
289 | * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ | ||
290 | |||
291 | /* mangle packet */ | ||
292 | DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", | ||
293 | ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); | ||
294 | |||
295 | if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
296 | pcid_off + sizeof(struct pptp_pkt_hdr) + | ||
297 | sizeof(struct PptpControlHeader), | ||
298 | sizeof(new_pcid), (char *)&new_pcid, | ||
299 | sizeof(new_pcid)) == 0) | ||
300 | return NF_DROP; | ||
301 | return NF_ACCEPT; | ||
302 | } | ||
303 | |||
304 | |||
305 | extern int __init ip_nat_proto_gre_init(void); | ||
306 | extern void __exit ip_nat_proto_gre_fini(void); | ||
307 | |||
308 | static int __init ip_nat_helper_pptp_init(void) | ||
309 | { | ||
310 | int ret; | ||
311 | |||
312 | DEBUGP("%s: registering NAT helper\n", __FILE__); | ||
313 | |||
314 | ret = ip_nat_proto_gre_init(); | ||
315 | if (ret < 0) | ||
316 | return ret; | ||
317 | |||
318 | BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound)); | ||
319 | rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt); | ||
320 | |||
321 | BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound)); | ||
322 | rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt); | ||
323 | |||
324 | BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre)); | ||
325 | rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre); | ||
326 | |||
327 | BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn)); | ||
328 | rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected); | ||
329 | |||
330 | printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | static void __exit ip_nat_helper_pptp_fini(void) | ||
335 | { | ||
336 | DEBUGP("cleanup_module\n" ); | ||
337 | |||
338 | rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL); | ||
339 | rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL); | ||
340 | rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL); | ||
341 | rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL); | ||
342 | synchronize_rcu(); | ||
343 | |||
344 | ip_nat_proto_gre_fini(); | ||
345 | |||
346 | printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); | ||
347 | } | ||
348 | |||
349 | module_init(ip_nat_helper_pptp_init); | ||
350 | module_exit(ip_nat_helper_pptp_fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c deleted file mode 100644 index cfaeea38314f..000000000000 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ /dev/null | |||
@@ -1,122 +0,0 @@ | |||
1 | /* IRC extension for TCP NAT alteration. | ||
2 | * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> | ||
3 | * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation | ||
4 | * based on a copy of RR's ip_nat_ftp.c | ||
5 | * | ||
6 | * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/netfilter_ipv4.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <linux/tcp.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <net/tcp.h> | ||
20 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
21 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
22 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
23 | #include <linux/netfilter_ipv4/ip_conntrack_irc.h> | ||
24 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
25 | #include <linux/moduleparam.h> | ||
26 | |||
27 | #if 0 | ||
28 | #define DEBUGP printk | ||
29 | #else | ||
30 | #define DEBUGP(format, args...) | ||
31 | #endif | ||
32 | |||
33 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
34 | MODULE_DESCRIPTION("IRC (DCC) NAT helper"); | ||
35 | MODULE_LICENSE("GPL"); | ||
36 | |||
37 | static unsigned int help(struct sk_buff **pskb, | ||
38 | enum ip_conntrack_info ctinfo, | ||
39 | unsigned int matchoff, | ||
40 | unsigned int matchlen, | ||
41 | struct ip_conntrack_expect *exp) | ||
42 | { | ||
43 | u_int16_t port; | ||
44 | unsigned int ret; | ||
45 | |||
46 | /* "4294967296 65635 " */ | ||
47 | char buffer[18]; | ||
48 | |||
49 | DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n", | ||
50 | expect->seq, exp_irc_info->len, | ||
51 | ntohl(tcph->seq)); | ||
52 | |||
53 | /* Reply comes from server. */ | ||
54 | exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; | ||
55 | exp->dir = IP_CT_DIR_REPLY; | ||
56 | |||
57 | /* When you see the packet, we need to NAT it the same as the | ||
58 | * this one. */ | ||
59 | exp->expectfn = ip_nat_follow_master; | ||
60 | |||
61 | /* Try to get same port: if not, try to change it. */ | ||
62 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | ||
63 | exp->tuple.dst.u.tcp.port = htons(port); | ||
64 | if (ip_conntrack_expect_related(exp) == 0) | ||
65 | break; | ||
66 | } | ||
67 | |||
68 | if (port == 0) | ||
69 | return NF_DROP; | ||
70 | |||
71 | /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27 | ||
72 | * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28 | ||
73 | * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26 | ||
74 | * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26 | ||
75 | * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27 | ||
76 | * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits, | ||
77 | * 255.255.255.255==4294967296, 10 digits) | ||
78 | * P: bound port (min 1 d, max 5d (65635)) | ||
79 | * F: filename (min 1 d ) | ||
80 | * S: size (min 1 d ) | ||
81 | * 0x01, \n: terminators | ||
82 | */ | ||
83 | |||
84 | /* AAA = "us", ie. where server normally talks to. */ | ||
85 | sprintf(buffer, "%u %u", | ||
86 | ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip), | ||
87 | port); | ||
88 | DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n", | ||
89 | buffer, NIPQUAD(exp->tuple.src.ip), port); | ||
90 | |||
91 | ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, | ||
92 | matchoff, matchlen, buffer, | ||
93 | strlen(buffer)); | ||
94 | if (ret != NF_ACCEPT) | ||
95 | ip_conntrack_unexpect_related(exp); | ||
96 | return ret; | ||
97 | } | ||
98 | |||
99 | static void __exit ip_nat_irc_fini(void) | ||
100 | { | ||
101 | rcu_assign_pointer(ip_nat_irc_hook, NULL); | ||
102 | synchronize_rcu(); | ||
103 | } | ||
104 | |||
105 | static int __init ip_nat_irc_init(void) | ||
106 | { | ||
107 | BUG_ON(rcu_dereference(ip_nat_irc_hook)); | ||
108 | rcu_assign_pointer(ip_nat_irc_hook, help); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ | ||
113 | static int warn_set(const char *val, struct kernel_param *kp) | ||
114 | { | ||
115 | printk(KERN_INFO KBUILD_MODNAME | ||
116 | ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); | ||
117 | return 0; | ||
118 | } | ||
119 | module_param_call(ports, warn_set, NULL, NULL, 0); | ||
120 | |||
121 | module_init(ip_nat_irc_init); | ||
122 | module_exit(ip_nat_irc_fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c deleted file mode 100644 index 95810202d849..000000000000 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ /dev/null | |||
@@ -1,174 +0,0 @@ | |||
1 | /* | ||
2 | * ip_nat_proto_gre.c - Version 2.0 | ||
3 | * | ||
4 | * NAT protocol helper module for GRE. | ||
5 | * | ||
6 | * GRE is a generic encapsulation protocol, which is generally not very | ||
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | ||
8 | * | ||
9 | * It has an optional key field, which may help us distinguishing two | ||
10 | * connections between the same two hosts. | ||
11 | * | ||
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | ||
13 | * | ||
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | ||
15 | * field called "CallID", which serves us for the same purpose as the key | ||
16 | * field in plain GRE. | ||
17 | * | ||
18 | * Documentation about PPTP can be found in RFC 2637 | ||
19 | * | ||
20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
21 | * | ||
22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/ip.h> | ||
28 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
29 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
30 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
31 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
32 | |||
33 | MODULE_LICENSE("GPL"); | ||
34 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
35 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | ||
36 | |||
37 | #if 0 | ||
38 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ | ||
39 | __FUNCTION__, ## args) | ||
40 | #else | ||
41 | #define DEBUGP(x, args...) | ||
42 | #endif | ||
43 | |||
44 | /* is key in given range between min and max */ | ||
45 | static int | ||
46 | gre_in_range(const struct ip_conntrack_tuple *tuple, | ||
47 | enum ip_nat_manip_type maniptype, | ||
48 | const union ip_conntrack_manip_proto *min, | ||
49 | const union ip_conntrack_manip_proto *max) | ||
50 | { | ||
51 | __be16 key; | ||
52 | |||
53 | if (maniptype == IP_NAT_MANIP_SRC) | ||
54 | key = tuple->src.u.gre.key; | ||
55 | else | ||
56 | key = tuple->dst.u.gre.key; | ||
57 | |||
58 | return ntohs(key) >= ntohs(min->gre.key) | ||
59 | && ntohs(key) <= ntohs(max->gre.key); | ||
60 | } | ||
61 | |||
62 | /* generate unique tuple ... */ | ||
63 | static int | ||
64 | gre_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
65 | const struct ip_nat_range *range, | ||
66 | enum ip_nat_manip_type maniptype, | ||
67 | const struct ip_conntrack *conntrack) | ||
68 | { | ||
69 | static u_int16_t key; | ||
70 | __be16 *keyptr; | ||
71 | unsigned int min, i, range_size; | ||
72 | |||
73 | if (maniptype == IP_NAT_MANIP_SRC) | ||
74 | keyptr = &tuple->src.u.gre.key; | ||
75 | else | ||
76 | keyptr = &tuple->dst.u.gre.key; | ||
77 | |||
78 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
79 | DEBUGP("%p: NATing GRE PPTP\n", conntrack); | ||
80 | min = 1; | ||
81 | range_size = 0xffff; | ||
82 | } else { | ||
83 | min = ntohs(range->min.gre.key); | ||
84 | range_size = ntohs(range->max.gre.key) - min + 1; | ||
85 | } | ||
86 | |||
87 | DEBUGP("min = %u, range_size = %u\n", min, range_size); | ||
88 | |||
89 | for (i = 0; i < range_size; i++, key++) { | ||
90 | *keyptr = htons(min + key % range_size); | ||
91 | if (!ip_nat_used_tuple(tuple, conntrack)) | ||
92 | return 1; | ||
93 | } | ||
94 | |||
95 | DEBUGP("%p: no NAT mapping\n", conntrack); | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | /* manipulate a GRE packet according to maniptype */ | ||
101 | static int | ||
102 | gre_manip_pkt(struct sk_buff **pskb, | ||
103 | unsigned int iphdroff, | ||
104 | const struct ip_conntrack_tuple *tuple, | ||
105 | enum ip_nat_manip_type maniptype) | ||
106 | { | ||
107 | struct gre_hdr *greh; | ||
108 | struct gre_hdr_pptp *pgreh; | ||
109 | struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
110 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
111 | |||
112 | /* pgreh includes two optional 32bit fields which are not required | ||
113 | * to be there. That's where the magic '8' comes from */ | ||
114 | if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) | ||
115 | return 0; | ||
116 | |||
117 | greh = (void *)(*pskb)->data + hdroff; | ||
118 | pgreh = (struct gre_hdr_pptp *) greh; | ||
119 | |||
120 | /* we only have destination manip of a packet, since 'source key' | ||
121 | * is not present in the packet itself */ | ||
122 | if (maniptype == IP_NAT_MANIP_DST) { | ||
123 | /* key manipulation is always dest */ | ||
124 | switch (greh->version) { | ||
125 | case 0: | ||
126 | if (!greh->key) { | ||
127 | DEBUGP("can't nat GRE w/o key\n"); | ||
128 | break; | ||
129 | } | ||
130 | if (greh->csum) { | ||
131 | /* FIXME: Never tested this code... */ | ||
132 | nf_proto_csum_replace4(gre_csum(greh), *pskb, | ||
133 | *(gre_key(greh)), | ||
134 | tuple->dst.u.gre.key, 0); | ||
135 | } | ||
136 | *(gre_key(greh)) = tuple->dst.u.gre.key; | ||
137 | break; | ||
138 | case GRE_VERSION_PPTP: | ||
139 | DEBUGP("call_id -> 0x%04x\n", | ||
140 | ntohs(tuple->dst.u.gre.key)); | ||
141 | pgreh->call_id = tuple->dst.u.gre.key; | ||
142 | break; | ||
143 | default: | ||
144 | DEBUGP("can't nat unknown GRE version\n"); | ||
145 | return 0; | ||
146 | break; | ||
147 | } | ||
148 | } | ||
149 | return 1; | ||
150 | } | ||
151 | |||
152 | /* nat helper struct */ | ||
153 | static struct ip_nat_protocol gre = { | ||
154 | .name = "GRE", | ||
155 | .protonum = IPPROTO_GRE, | ||
156 | .manip_pkt = gre_manip_pkt, | ||
157 | .in_range = gre_in_range, | ||
158 | .unique_tuple = gre_unique_tuple, | ||
159 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
160 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
161 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
162 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
163 | #endif | ||
164 | }; | ||
165 | |||
166 | int __init ip_nat_proto_gre_init(void) | ||
167 | { | ||
168 | return ip_nat_protocol_register(&gre); | ||
169 | } | ||
170 | |||
171 | void __exit ip_nat_proto_gre_fini(void) | ||
172 | { | ||
173 | ip_nat_protocol_unregister(&gre); | ||
174 | } | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c deleted file mode 100644 index 22a528ae0380..000000000000 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ /dev/null | |||
@@ -1,87 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/netfilter.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/icmp.h> | ||
14 | #include <linux/if.h> | ||
15 | |||
16 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
17 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
18 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
20 | |||
21 | static int | ||
22 | icmp_in_range(const struct ip_conntrack_tuple *tuple, | ||
23 | enum ip_nat_manip_type maniptype, | ||
24 | const union ip_conntrack_manip_proto *min, | ||
25 | const union ip_conntrack_manip_proto *max) | ||
26 | { | ||
27 | return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && | ||
28 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); | ||
29 | } | ||
30 | |||
31 | static int | ||
32 | icmp_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
33 | const struct ip_nat_range *range, | ||
34 | enum ip_nat_manip_type maniptype, | ||
35 | const struct ip_conntrack *conntrack) | ||
36 | { | ||
37 | static u_int16_t id; | ||
38 | unsigned int range_size; | ||
39 | unsigned int i; | ||
40 | |||
41 | range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; | ||
42 | /* If no range specified... */ | ||
43 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) | ||
44 | range_size = 0xFFFF; | ||
45 | |||
46 | for (i = 0; i < range_size; i++, id++) { | ||
47 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + | ||
48 | (id % range_size)); | ||
49 | if (!ip_nat_used_tuple(tuple, conntrack)) | ||
50 | return 1; | ||
51 | } | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static int | ||
56 | icmp_manip_pkt(struct sk_buff **pskb, | ||
57 | unsigned int iphdroff, | ||
58 | const struct ip_conntrack_tuple *tuple, | ||
59 | enum ip_nat_manip_type maniptype) | ||
60 | { | ||
61 | struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
62 | struct icmphdr *hdr; | ||
63 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
64 | |||
65 | if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) | ||
66 | return 0; | ||
67 | |||
68 | hdr = (struct icmphdr *)((*pskb)->data + hdroff); | ||
69 | nf_proto_csum_replace2(&hdr->checksum, *pskb, | ||
70 | hdr->un.echo.id, tuple->src.u.icmp.id, 0); | ||
71 | hdr->un.echo.id = tuple->src.u.icmp.id; | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | struct ip_nat_protocol ip_nat_protocol_icmp = { | ||
76 | .name = "ICMP", | ||
77 | .protonum = IPPROTO_ICMP, | ||
78 | .me = THIS_MODULE, | ||
79 | .manip_pkt = icmp_manip_pkt, | ||
80 | .in_range = icmp_in_range, | ||
81 | .unique_tuple = icmp_unique_tuple, | ||
82 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
83 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
84 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
85 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
86 | #endif | ||
87 | }; | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c deleted file mode 100644 index 14ff24f53a7a..000000000000 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ /dev/null | |||
@@ -1,154 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/random.h> | ||
12 | #include <linux/netfilter.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/if.h> | ||
16 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
17 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
18 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
20 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
21 | |||
22 | static int | ||
23 | tcp_in_range(const struct ip_conntrack_tuple *tuple, | ||
24 | enum ip_nat_manip_type maniptype, | ||
25 | const union ip_conntrack_manip_proto *min, | ||
26 | const union ip_conntrack_manip_proto *max) | ||
27 | { | ||
28 | __be16 port; | ||
29 | |||
30 | if (maniptype == IP_NAT_MANIP_SRC) | ||
31 | port = tuple->src.u.tcp.port; | ||
32 | else | ||
33 | port = tuple->dst.u.tcp.port; | ||
34 | |||
35 | return ntohs(port) >= ntohs(min->tcp.port) | ||
36 | && ntohs(port) <= ntohs(max->tcp.port); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | tcp_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
41 | const struct ip_nat_range *range, | ||
42 | enum ip_nat_manip_type maniptype, | ||
43 | const struct ip_conntrack *conntrack) | ||
44 | { | ||
45 | static u_int16_t port; | ||
46 | __be16 *portptr; | ||
47 | unsigned int range_size, min, i; | ||
48 | |||
49 | if (maniptype == IP_NAT_MANIP_SRC) | ||
50 | portptr = &tuple->src.u.tcp.port; | ||
51 | else | ||
52 | portptr = &tuple->dst.u.tcp.port; | ||
53 | |||
54 | /* If no range specified... */ | ||
55 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
56 | /* If it's dst rewrite, can't change port */ | ||
57 | if (maniptype == IP_NAT_MANIP_DST) | ||
58 | return 0; | ||
59 | |||
60 | /* Map privileged onto privileged. */ | ||
61 | if (ntohs(*portptr) < 1024) { | ||
62 | /* Loose convention: >> 512 is credential passing */ | ||
63 | if (ntohs(*portptr)<512) { | ||
64 | min = 1; | ||
65 | range_size = 511 - min + 1; | ||
66 | } else { | ||
67 | min = 600; | ||
68 | range_size = 1023 - min + 1; | ||
69 | } | ||
70 | } else { | ||
71 | min = 1024; | ||
72 | range_size = 65535 - 1024 + 1; | ||
73 | } | ||
74 | } else { | ||
75 | min = ntohs(range->min.tcp.port); | ||
76 | range_size = ntohs(range->max.tcp.port) - min + 1; | ||
77 | } | ||
78 | |||
79 | /* Start from random port to avoid prediction */ | ||
80 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | ||
81 | port = net_random(); | ||
82 | |||
83 | for (i = 0; i < range_size; i++, port++) { | ||
84 | *portptr = htons(min + port % range_size); | ||
85 | if (!ip_nat_used_tuple(tuple, conntrack)) { | ||
86 | return 1; | ||
87 | } | ||
88 | } | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static int | ||
93 | tcp_manip_pkt(struct sk_buff **pskb, | ||
94 | unsigned int iphdroff, | ||
95 | const struct ip_conntrack_tuple *tuple, | ||
96 | enum ip_nat_manip_type maniptype) | ||
97 | { | ||
98 | struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
99 | struct tcphdr *hdr; | ||
100 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
101 | __be32 oldip, newip; | ||
102 | __be16 *portptr, newport, oldport; | ||
103 | int hdrsize = 8; /* TCP connection tracking guarantees this much */ | ||
104 | |||
105 | /* this could be a inner header returned in icmp packet; in such | ||
106 | cases we cannot update the checksum field since it is outside of | ||
107 | the 8 bytes of transport layer headers we are guaranteed */ | ||
108 | if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) | ||
109 | hdrsize = sizeof(struct tcphdr); | ||
110 | |||
111 | if (!skb_make_writable(pskb, hdroff + hdrsize)) | ||
112 | return 0; | ||
113 | |||
114 | iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
115 | hdr = (struct tcphdr *)((*pskb)->data + hdroff); | ||
116 | |||
117 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
118 | /* Get rid of src ip and src pt */ | ||
119 | oldip = iph->saddr; | ||
120 | newip = tuple->src.ip; | ||
121 | newport = tuple->src.u.tcp.port; | ||
122 | portptr = &hdr->source; | ||
123 | } else { | ||
124 | /* Get rid of dst ip and dst pt */ | ||
125 | oldip = iph->daddr; | ||
126 | newip = tuple->dst.ip; | ||
127 | newport = tuple->dst.u.tcp.port; | ||
128 | portptr = &hdr->dest; | ||
129 | } | ||
130 | |||
131 | oldport = *portptr; | ||
132 | *portptr = newport; | ||
133 | |||
134 | if (hdrsize < sizeof(*hdr)) | ||
135 | return 1; | ||
136 | |||
137 | nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); | ||
138 | nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); | ||
139 | return 1; | ||
140 | } | ||
141 | |||
142 | struct ip_nat_protocol ip_nat_protocol_tcp = { | ||
143 | .name = "TCP", | ||
144 | .protonum = IPPROTO_TCP, | ||
145 | .me = THIS_MODULE, | ||
146 | .manip_pkt = tcp_manip_pkt, | ||
147 | .in_range = tcp_in_range, | ||
148 | .unique_tuple = tcp_unique_tuple, | ||
149 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
150 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
151 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
152 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
153 | #endif | ||
154 | }; | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c deleted file mode 100644 index dfd521672891..000000000000 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ /dev/null | |||
@@ -1,144 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/random.h> | ||
12 | #include <linux/netfilter.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/udp.h> | ||
15 | #include <linux/if.h> | ||
16 | |||
17 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
18 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
20 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
21 | |||
22 | static int | ||
23 | udp_in_range(const struct ip_conntrack_tuple *tuple, | ||
24 | enum ip_nat_manip_type maniptype, | ||
25 | const union ip_conntrack_manip_proto *min, | ||
26 | const union ip_conntrack_manip_proto *max) | ||
27 | { | ||
28 | __be16 port; | ||
29 | |||
30 | if (maniptype == IP_NAT_MANIP_SRC) | ||
31 | port = tuple->src.u.udp.port; | ||
32 | else | ||
33 | port = tuple->dst.u.udp.port; | ||
34 | |||
35 | return ntohs(port) >= ntohs(min->udp.port) | ||
36 | && ntohs(port) <= ntohs(max->udp.port); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | udp_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
41 | const struct ip_nat_range *range, | ||
42 | enum ip_nat_manip_type maniptype, | ||
43 | const struct ip_conntrack *conntrack) | ||
44 | { | ||
45 | static u_int16_t port; | ||
46 | __be16 *portptr; | ||
47 | unsigned int range_size, min, i; | ||
48 | |||
49 | if (maniptype == IP_NAT_MANIP_SRC) | ||
50 | portptr = &tuple->src.u.udp.port; | ||
51 | else | ||
52 | portptr = &tuple->dst.u.udp.port; | ||
53 | |||
54 | /* If no range specified... */ | ||
55 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
56 | /* If it's dst rewrite, can't change port */ | ||
57 | if (maniptype == IP_NAT_MANIP_DST) | ||
58 | return 0; | ||
59 | |||
60 | if (ntohs(*portptr) < 1024) { | ||
61 | /* Loose convention: >> 512 is credential passing */ | ||
62 | if (ntohs(*portptr)<512) { | ||
63 | min = 1; | ||
64 | range_size = 511 - min + 1; | ||
65 | } else { | ||
66 | min = 600; | ||
67 | range_size = 1023 - min + 1; | ||
68 | } | ||
69 | } else { | ||
70 | min = 1024; | ||
71 | range_size = 65535 - 1024 + 1; | ||
72 | } | ||
73 | } else { | ||
74 | min = ntohs(range->min.udp.port); | ||
75 | range_size = ntohs(range->max.udp.port) - min + 1; | ||
76 | } | ||
77 | |||
78 | /* Start from random port to avoid prediction */ | ||
79 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | ||
80 | port = net_random(); | ||
81 | |||
82 | for (i = 0; i < range_size; i++, port++) { | ||
83 | *portptr = htons(min + port % range_size); | ||
84 | if (!ip_nat_used_tuple(tuple, conntrack)) | ||
85 | return 1; | ||
86 | } | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static int | ||
91 | udp_manip_pkt(struct sk_buff **pskb, | ||
92 | unsigned int iphdroff, | ||
93 | const struct ip_conntrack_tuple *tuple, | ||
94 | enum ip_nat_manip_type maniptype) | ||
95 | { | ||
96 | struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
97 | struct udphdr *hdr; | ||
98 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
99 | __be32 oldip, newip; | ||
100 | __be16 *portptr, newport; | ||
101 | |||
102 | if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) | ||
103 | return 0; | ||
104 | |||
105 | iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
106 | hdr = (struct udphdr *)((*pskb)->data + hdroff); | ||
107 | |||
108 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
109 | /* Get rid of src ip and src pt */ | ||
110 | oldip = iph->saddr; | ||
111 | newip = tuple->src.ip; | ||
112 | newport = tuple->src.u.udp.port; | ||
113 | portptr = &hdr->source; | ||
114 | } else { | ||
115 | /* Get rid of dst ip and dst pt */ | ||
116 | oldip = iph->daddr; | ||
117 | newip = tuple->dst.ip; | ||
118 | newport = tuple->dst.u.udp.port; | ||
119 | portptr = &hdr->dest; | ||
120 | } | ||
121 | |||
122 | if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { | ||
123 | nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); | ||
124 | nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0); | ||
125 | if (!hdr->check) | ||
126 | hdr->check = CSUM_MANGLED_0; | ||
127 | } | ||
128 | *portptr = newport; | ||
129 | return 1; | ||
130 | } | ||
131 | |||
132 | struct ip_nat_protocol ip_nat_protocol_udp = { | ||
133 | .name = "UDP", | ||
134 | .protonum = IPPROTO_UDP, | ||
135 | .me = THIS_MODULE, | ||
136 | .manip_pkt = udp_manip_pkt, | ||
137 | .in_range = udp_in_range, | ||
138 | .unique_tuple = udp_unique_tuple, | ||
139 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
140 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
141 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
142 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
143 | #endif | ||
144 | }; | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c deleted file mode 100644 index 3bf049517246..000000000000 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | /* The "unknown" protocol. This is what is used for protocols we | ||
2 | * don't understand. It's returned by ip_ct_find_proto(). | ||
3 | */ | ||
4 | |||
5 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
6 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <linux/if.h> | ||
17 | |||
18 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
19 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
20 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
21 | |||
22 | static int unknown_in_range(const struct ip_conntrack_tuple *tuple, | ||
23 | enum ip_nat_manip_type manip_type, | ||
24 | const union ip_conntrack_manip_proto *min, | ||
25 | const union ip_conntrack_manip_proto *max) | ||
26 | { | ||
27 | return 1; | ||
28 | } | ||
29 | |||
30 | static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
31 | const struct ip_nat_range *range, | ||
32 | enum ip_nat_manip_type maniptype, | ||
33 | const struct ip_conntrack *conntrack) | ||
34 | { | ||
35 | /* Sorry: we can't help you; if it's not unique, we can't frob | ||
36 | anything. */ | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static int | ||
41 | unknown_manip_pkt(struct sk_buff **pskb, | ||
42 | unsigned int iphdroff, | ||
43 | const struct ip_conntrack_tuple *tuple, | ||
44 | enum ip_nat_manip_type maniptype) | ||
45 | { | ||
46 | return 1; | ||
47 | } | ||
48 | |||
49 | struct ip_nat_protocol ip_nat_unknown_protocol = { | ||
50 | .name = "unknown", | ||
51 | /* .me isn't set: getting a ref to this cannot fail. */ | ||
52 | .manip_pkt = unknown_manip_pkt, | ||
53 | .in_range = unknown_in_range, | ||
54 | .unique_tuple = unknown_unique_tuple, | ||
55 | }; | ||
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c deleted file mode 100644 index 080eb1d92200..000000000000 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ /dev/null | |||
@@ -1,314 +0,0 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* Everything about the rules for NAT. */ | ||
10 | #include <linux/types.h> | ||
11 | #include <linux/ip.h> | ||
12 | #include <linux/netfilter.h> | ||
13 | #include <linux/netfilter_ipv4.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/kmod.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/proc_fs.h> | ||
18 | #include <net/checksum.h> | ||
19 | #include <net/route.h> | ||
20 | #include <linux/bitops.h> | ||
21 | |||
22 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
23 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
24 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
25 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
26 | |||
27 | #if 0 | ||
28 | #define DEBUGP printk | ||
29 | #else | ||
30 | #define DEBUGP(format, args...) | ||
31 | #endif | ||
32 | |||
33 | #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) | ||
34 | |||
35 | static struct | ||
36 | { | ||
37 | struct ipt_replace repl; | ||
38 | struct ipt_standard entries[3]; | ||
39 | struct ipt_error term; | ||
40 | } nat_initial_table __initdata | ||
41 | = { { "nat", NAT_VALID_HOOKS, 4, | ||
42 | sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), | ||
43 | { [NF_IP_PRE_ROUTING] = 0, | ||
44 | [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), | ||
45 | [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, | ||
46 | { [NF_IP_PRE_ROUTING] = 0, | ||
47 | [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), | ||
48 | [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, | ||
49 | 0, NULL, { } }, | ||
50 | { | ||
51 | /* PRE_ROUTING */ | ||
52 | { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, | ||
53 | 0, | ||
54 | sizeof(struct ipt_entry), | ||
55 | sizeof(struct ipt_standard), | ||
56 | 0, { 0, 0 }, { } }, | ||
57 | { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, | ||
58 | -NF_ACCEPT - 1 } }, | ||
59 | /* POST_ROUTING */ | ||
60 | { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, | ||
61 | 0, | ||
62 | sizeof(struct ipt_entry), | ||
63 | sizeof(struct ipt_standard), | ||
64 | 0, { 0, 0 }, { } }, | ||
65 | { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, | ||
66 | -NF_ACCEPT - 1 } }, | ||
67 | /* LOCAL_OUT */ | ||
68 | { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, | ||
69 | 0, | ||
70 | sizeof(struct ipt_entry), | ||
71 | sizeof(struct ipt_standard), | ||
72 | 0, { 0, 0 }, { } }, | ||
73 | { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, | ||
74 | -NF_ACCEPT - 1 } } | ||
75 | }, | ||
76 | /* ERROR */ | ||
77 | { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, | ||
78 | 0, | ||
79 | sizeof(struct ipt_entry), | ||
80 | sizeof(struct ipt_error), | ||
81 | 0, { 0, 0 }, { } }, | ||
82 | { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, | ||
83 | { } }, | ||
84 | "ERROR" | ||
85 | } | ||
86 | } | ||
87 | }; | ||
88 | |||
89 | static struct xt_table nat_table = { | ||
90 | .name = "nat", | ||
91 | .valid_hooks = NAT_VALID_HOOKS, | ||
92 | .lock = RW_LOCK_UNLOCKED, | ||
93 | .me = THIS_MODULE, | ||
94 | .af = AF_INET, | ||
95 | }; | ||
96 | |||
97 | /* Source NAT */ | ||
98 | static unsigned int ipt_snat_target(struct sk_buff **pskb, | ||
99 | const struct net_device *in, | ||
100 | const struct net_device *out, | ||
101 | unsigned int hooknum, | ||
102 | const struct xt_target *target, | ||
103 | const void *targinfo) | ||
104 | { | ||
105 | struct ip_conntrack *ct; | ||
106 | enum ip_conntrack_info ctinfo; | ||
107 | const struct ip_nat_multi_range_compat *mr = targinfo; | ||
108 | |||
109 | IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); | ||
110 | |||
111 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
112 | |||
113 | /* Connection must be valid and new. */ | ||
114 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED | ||
115 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); | ||
116 | IP_NF_ASSERT(out); | ||
117 | |||
118 | return ip_nat_setup_info(ct, &mr->range[0], hooknum); | ||
119 | } | ||
120 | |||
121 | /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ | ||
122 | static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) | ||
123 | { | ||
124 | static int warned = 0; | ||
125 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; | ||
126 | struct rtable *rt; | ||
127 | |||
128 | if (ip_route_output_key(&rt, &fl) != 0) | ||
129 | return; | ||
130 | |||
131 | if (rt->rt_src != srcip && !warned) { | ||
132 | printk("NAT: no longer support implicit source local NAT\n"); | ||
133 | printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", | ||
134 | NIPQUAD(srcip), NIPQUAD(dstip)); | ||
135 | warned = 1; | ||
136 | } | ||
137 | ip_rt_put(rt); | ||
138 | } | ||
139 | |||
140 | static unsigned int ipt_dnat_target(struct sk_buff **pskb, | ||
141 | const struct net_device *in, | ||
142 | const struct net_device *out, | ||
143 | unsigned int hooknum, | ||
144 | const struct xt_target *target, | ||
145 | const void *targinfo) | ||
146 | { | ||
147 | struct ip_conntrack *ct; | ||
148 | enum ip_conntrack_info ctinfo; | ||
149 | const struct ip_nat_multi_range_compat *mr = targinfo; | ||
150 | |||
151 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING | ||
152 | || hooknum == NF_IP_LOCAL_OUT); | ||
153 | |||
154 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
155 | |||
156 | /* Connection must be valid and new. */ | ||
157 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | ||
158 | |||
159 | if (hooknum == NF_IP_LOCAL_OUT | ||
160 | && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) | ||
161 | warn_if_extra_mangle((*pskb)->nh.iph->daddr, | ||
162 | mr->range[0].min_ip); | ||
163 | |||
164 | return ip_nat_setup_info(ct, &mr->range[0], hooknum); | ||
165 | } | ||
166 | |||
167 | static int ipt_snat_checkentry(const char *tablename, | ||
168 | const void *entry, | ||
169 | const struct xt_target *target, | ||
170 | void *targinfo, | ||
171 | unsigned int hook_mask) | ||
172 | { | ||
173 | struct ip_nat_multi_range_compat *mr = targinfo; | ||
174 | |||
175 | /* Must be a valid range */ | ||
176 | if (mr->rangesize != 1) { | ||
177 | printk("SNAT: multiple ranges no longer supported\n"); | ||
178 | return 0; | ||
179 | } | ||
180 | return 1; | ||
181 | } | ||
182 | |||
183 | static int ipt_dnat_checkentry(const char *tablename, | ||
184 | const void *entry, | ||
185 | const struct xt_target *target, | ||
186 | void *targinfo, | ||
187 | unsigned int hook_mask) | ||
188 | { | ||
189 | struct ip_nat_multi_range_compat *mr = targinfo; | ||
190 | |||
191 | /* Must be a valid range */ | ||
192 | if (mr->rangesize != 1) { | ||
193 | printk("DNAT: multiple ranges no longer supported\n"); | ||
194 | return 0; | ||
195 | } | ||
196 | if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { | ||
197 | printk("DNAT: port randomization not supported\n"); | ||
198 | return 0; | ||
199 | } | ||
200 | return 1; | ||
201 | } | ||
202 | |||
203 | inline unsigned int | ||
204 | alloc_null_binding(struct ip_conntrack *conntrack, | ||
205 | struct ip_nat_info *info, | ||
206 | unsigned int hooknum) | ||
207 | { | ||
208 | /* Force range to this IP; let proto decide mapping for | ||
209 | per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). | ||
210 | Use reply in case it's already been mangled (eg local packet). | ||
211 | */ | ||
212 | __be32 ip | ||
213 | = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC | ||
214 | ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip | ||
215 | : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); | ||
216 | struct ip_nat_range range | ||
217 | = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; | ||
218 | |||
219 | DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack, | ||
220 | NIPQUAD(ip)); | ||
221 | return ip_nat_setup_info(conntrack, &range, hooknum); | ||
222 | } | ||
223 | |||
224 | unsigned int | ||
225 | alloc_null_binding_confirmed(struct ip_conntrack *conntrack, | ||
226 | struct ip_nat_info *info, | ||
227 | unsigned int hooknum) | ||
228 | { | ||
229 | __be32 ip | ||
230 | = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC | ||
231 | ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip | ||
232 | : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); | ||
233 | u_int16_t all | ||
234 | = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC | ||
235 | ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all | ||
236 | : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); | ||
237 | struct ip_nat_range range | ||
238 | = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; | ||
239 | |||
240 | DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", | ||
241 | conntrack, NIPQUAD(ip)); | ||
242 | return ip_nat_setup_info(conntrack, &range, hooknum); | ||
243 | } | ||
244 | |||
245 | int ip_nat_rule_find(struct sk_buff **pskb, | ||
246 | unsigned int hooknum, | ||
247 | const struct net_device *in, | ||
248 | const struct net_device *out, | ||
249 | struct ip_conntrack *ct, | ||
250 | struct ip_nat_info *info) | ||
251 | { | ||
252 | int ret; | ||
253 | |||
254 | ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); | ||
255 | |||
256 | if (ret == NF_ACCEPT) { | ||
257 | if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) | ||
258 | /* NUL mapping */ | ||
259 | ret = alloc_null_binding(ct, info, hooknum); | ||
260 | } | ||
261 | return ret; | ||
262 | } | ||
263 | |||
264 | static struct xt_target ipt_snat_reg = { | ||
265 | .name = "SNAT", | ||
266 | .family = AF_INET, | ||
267 | .target = ipt_snat_target, | ||
268 | .targetsize = sizeof(struct ip_nat_multi_range_compat), | ||
269 | .table = "nat", | ||
270 | .hooks = 1 << NF_IP_POST_ROUTING, | ||
271 | .checkentry = ipt_snat_checkentry, | ||
272 | }; | ||
273 | |||
274 | static struct xt_target ipt_dnat_reg = { | ||
275 | .name = "DNAT", | ||
276 | .family = AF_INET, | ||
277 | .target = ipt_dnat_target, | ||
278 | .targetsize = sizeof(struct ip_nat_multi_range_compat), | ||
279 | .table = "nat", | ||
280 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), | ||
281 | .checkentry = ipt_dnat_checkentry, | ||
282 | }; | ||
283 | |||
284 | int __init ip_nat_rule_init(void) | ||
285 | { | ||
286 | int ret; | ||
287 | |||
288 | ret = ipt_register_table(&nat_table, &nat_initial_table.repl); | ||
289 | if (ret != 0) | ||
290 | return ret; | ||
291 | ret = xt_register_target(&ipt_snat_reg); | ||
292 | if (ret != 0) | ||
293 | goto unregister_table; | ||
294 | |||
295 | ret = xt_register_target(&ipt_dnat_reg); | ||
296 | if (ret != 0) | ||
297 | goto unregister_snat; | ||
298 | |||
299 | return ret; | ||
300 | |||
301 | unregister_snat: | ||
302 | xt_unregister_target(&ipt_snat_reg); | ||
303 | unregister_table: | ||
304 | xt_unregister_table(&nat_table); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | void ip_nat_rule_cleanup(void) | ||
310 | { | ||
311 | xt_unregister_target(&ipt_dnat_reg); | ||
312 | xt_unregister_target(&ipt_snat_reg); | ||
313 | ipt_unregister_table(&nat_table); | ||
314 | } | ||
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c deleted file mode 100644 index 325c5a9dc2ef..000000000000 --- a/net/ipv4/netfilter/ip_nat_sip.c +++ /dev/null | |||
@@ -1,282 +0,0 @@ | |||
1 | /* SIP extension for UDP NAT alteration. | ||
2 | * | ||
3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | ||
4 | * based on RR's ip_nat_ftp.c and other modules. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <linux/udp.h> | ||
15 | |||
16 | #include <linux/netfilter_ipv4.h> | ||
17 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
18 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
19 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
20 | #include <linux/netfilter_ipv4/ip_conntrack_sip.h> | ||
21 | |||
22 | MODULE_LICENSE("GPL"); | ||
23 | MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); | ||
24 | MODULE_DESCRIPTION("SIP NAT helper"); | ||
25 | |||
26 | #if 0 | ||
27 | #define DEBUGP printk | ||
28 | #else | ||
29 | #define DEBUGP(format, args...) | ||
30 | #endif | ||
31 | |||
32 | struct addr_map { | ||
33 | struct { | ||
34 | char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
35 | char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
36 | unsigned int srclen, srciplen; | ||
37 | unsigned int dstlen, dstiplen; | ||
38 | } addr[IP_CT_DIR_MAX]; | ||
39 | }; | ||
40 | |||
41 | static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map) | ||
42 | { | ||
43 | struct ip_conntrack_tuple *t; | ||
44 | enum ip_conntrack_dir dir; | ||
45 | unsigned int n; | ||
46 | |||
47 | for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { | ||
48 | t = &ct->tuplehash[dir].tuple; | ||
49 | |||
50 | n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", | ||
51 | NIPQUAD(t->src.ip)); | ||
52 | map->addr[dir].srciplen = n; | ||
53 | n += sprintf(map->addr[dir].src + n, ":%u", | ||
54 | ntohs(t->src.u.udp.port)); | ||
55 | map->addr[dir].srclen = n; | ||
56 | |||
57 | n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u", | ||
58 | NIPQUAD(t->dst.ip)); | ||
59 | map->addr[dir].dstiplen = n; | ||
60 | n += sprintf(map->addr[dir].dst + n, ":%u", | ||
61 | ntohs(t->dst.u.udp.port)); | ||
62 | map->addr[dir].dstlen = n; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, | ||
67 | struct ip_conntrack *ct, const char **dptr, size_t dlen, | ||
68 | enum sip_header_pos pos, struct addr_map *map) | ||
69 | { | ||
70 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
71 | unsigned int matchlen, matchoff, addrlen; | ||
72 | char *addr; | ||
73 | |||
74 | if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0) | ||
75 | return 1; | ||
76 | |||
77 | if ((matchlen == map->addr[dir].srciplen || | ||
78 | matchlen == map->addr[dir].srclen) && | ||
79 | memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { | ||
80 | addr = map->addr[!dir].dst; | ||
81 | addrlen = map->addr[!dir].dstlen; | ||
82 | } else if ((matchlen == map->addr[dir].dstiplen || | ||
83 | matchlen == map->addr[dir].dstlen) && | ||
84 | memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) { | ||
85 | addr = map->addr[!dir].src; | ||
86 | addrlen = map->addr[!dir].srclen; | ||
87 | } else | ||
88 | return 1; | ||
89 | |||
90 | if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, | ||
91 | matchoff, matchlen, addr, addrlen)) | ||
92 | return 0; | ||
93 | *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
94 | return 1; | ||
95 | |||
96 | } | ||
97 | |||
98 | static unsigned int ip_nat_sip(struct sk_buff **pskb, | ||
99 | enum ip_conntrack_info ctinfo, | ||
100 | struct ip_conntrack *ct, | ||
101 | const char **dptr) | ||
102 | { | ||
103 | enum sip_header_pos pos; | ||
104 | struct addr_map map; | ||
105 | int dataoff, datalen; | ||
106 | |||
107 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
108 | datalen = (*pskb)->len - dataoff; | ||
109 | if (datalen < sizeof("SIP/2.0") - 1) | ||
110 | return NF_DROP; | ||
111 | |||
112 | addr_map_init(ct, &map); | ||
113 | |||
114 | /* Basic rules: requests and responses. */ | ||
115 | if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { | ||
116 | /* 10.2: Constructing the REGISTER Request: | ||
117 | * | ||
118 | * The "userinfo" and "@" components of the SIP URI MUST NOT | ||
119 | * be present. | ||
120 | */ | ||
121 | if (datalen >= sizeof("REGISTER") - 1 && | ||
122 | strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) | ||
123 | pos = POS_REG_REQ_URI; | ||
124 | else | ||
125 | pos = POS_REQ_URI; | ||
126 | |||
127 | if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) | ||
128 | return NF_DROP; | ||
129 | } | ||
130 | |||
131 | if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || | ||
132 | !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || | ||
133 | !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || | ||
134 | !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) | ||
135 | return NF_DROP; | ||
136 | return NF_ACCEPT; | ||
137 | } | ||
138 | |||
139 | static unsigned int mangle_sip_packet(struct sk_buff **pskb, | ||
140 | enum ip_conntrack_info ctinfo, | ||
141 | struct ip_conntrack *ct, | ||
142 | const char **dptr, size_t dlen, | ||
143 | char *buffer, int bufflen, | ||
144 | enum sip_header_pos pos) | ||
145 | { | ||
146 | unsigned int matchlen, matchoff; | ||
147 | |||
148 | if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0) | ||
149 | return 0; | ||
150 | |||
151 | if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, | ||
152 | matchoff, matchlen, buffer, bufflen)) | ||
153 | return 0; | ||
154 | |||
155 | /* We need to reload this. Thanks Patrick. */ | ||
156 | *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
157 | return 1; | ||
158 | } | ||
159 | |||
160 | static int mangle_content_len(struct sk_buff **pskb, | ||
161 | enum ip_conntrack_info ctinfo, | ||
162 | struct ip_conntrack *ct, | ||
163 | const char *dptr) | ||
164 | { | ||
165 | unsigned int dataoff, matchoff, matchlen; | ||
166 | char buffer[sizeof("65536")]; | ||
167 | int bufflen; | ||
168 | |||
169 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
170 | |||
171 | /* Get actual SDP lenght */ | ||
172 | if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, | ||
173 | &matchlen, POS_SDP_HEADER) > 0) { | ||
174 | |||
175 | /* since ct_sip_get_info() give us a pointer passing 'v=' | ||
176 | we need to add 2 bytes in this count. */ | ||
177 | int c_len = (*pskb)->len - dataoff - matchoff + 2; | ||
178 | |||
179 | /* Now, update SDP lenght */ | ||
180 | if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, | ||
181 | &matchlen, POS_CONTENT) > 0) { | ||
182 | |||
183 | bufflen = sprintf(buffer, "%u", c_len); | ||
184 | |||
185 | return ip_nat_mangle_udp_packet(pskb, ct, ctinfo, | ||
186 | matchoff, matchlen, | ||
187 | buffer, bufflen); | ||
188 | } | ||
189 | } | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static unsigned int mangle_sdp(struct sk_buff **pskb, | ||
194 | enum ip_conntrack_info ctinfo, | ||
195 | struct ip_conntrack *ct, | ||
196 | __be32 newip, u_int16_t port, | ||
197 | const char *dptr) | ||
198 | { | ||
199 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | ||
200 | unsigned int dataoff, bufflen; | ||
201 | |||
202 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | ||
203 | |||
204 | /* Mangle owner and contact info. */ | ||
205 | bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); | ||
206 | if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, | ||
207 | buffer, bufflen, POS_OWNER)) | ||
208 | return 0; | ||
209 | |||
210 | if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, | ||
211 | buffer, bufflen, POS_CONNECTION)) | ||
212 | return 0; | ||
213 | |||
214 | /* Mangle media port. */ | ||
215 | bufflen = sprintf(buffer, "%u", port); | ||
216 | if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, | ||
217 | buffer, bufflen, POS_MEDIA)) | ||
218 | return 0; | ||
219 | |||
220 | return mangle_content_len(pskb, ctinfo, ct, dptr); | ||
221 | } | ||
222 | |||
223 | /* So, this packet has hit the connection tracking matching code. | ||
224 | Mangle it, and change the expectation to match the new version. */ | ||
225 | static unsigned int ip_nat_sdp(struct sk_buff **pskb, | ||
226 | enum ip_conntrack_info ctinfo, | ||
227 | struct ip_conntrack_expect *exp, | ||
228 | const char *dptr) | ||
229 | { | ||
230 | struct ip_conntrack *ct = exp->master; | ||
231 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
232 | __be32 newip; | ||
233 | u_int16_t port; | ||
234 | |||
235 | DEBUGP("ip_nat_sdp():\n"); | ||
236 | |||
237 | /* Connection will come from reply */ | ||
238 | newip = ct->tuplehash[!dir].tuple.dst.ip; | ||
239 | |||
240 | exp->tuple.dst.ip = newip; | ||
241 | exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; | ||
242 | exp->dir = !dir; | ||
243 | |||
244 | /* When you see the packet, we need to NAT it the same as the | ||
245 | this one. */ | ||
246 | exp->expectfn = ip_nat_follow_master; | ||
247 | |||
248 | /* Try to get same port: if not, try to change it. */ | ||
249 | for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { | ||
250 | exp->tuple.dst.u.udp.port = htons(port); | ||
251 | if (ip_conntrack_expect_related(exp) == 0) | ||
252 | break; | ||
253 | } | ||
254 | |||
255 | if (port == 0) | ||
256 | return NF_DROP; | ||
257 | |||
258 | if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { | ||
259 | ip_conntrack_unexpect_related(exp); | ||
260 | return NF_DROP; | ||
261 | } | ||
262 | return NF_ACCEPT; | ||
263 | } | ||
264 | |||
265 | static void __exit fini(void) | ||
266 | { | ||
267 | rcu_assign_pointer(ip_nat_sip_hook, NULL); | ||
268 | rcu_assign_pointer(ip_nat_sdp_hook, NULL); | ||
269 | synchronize_rcu(); | ||
270 | } | ||
271 | |||
272 | static int __init init(void) | ||
273 | { | ||
274 | BUG_ON(rcu_dereference(ip_nat_sip_hook)); | ||
275 | BUG_ON(rcu_dereference(ip_nat_sdp_hook)); | ||
276 | rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip); | ||
277 | rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp); | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | module_init(init); | ||
282 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c deleted file mode 100644 index e41d0efae515..000000000000 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ /dev/null | |||
@@ -1,1333 +0,0 @@ | |||
1 | /* | ||
2 | * ip_nat_snmp_basic.c | ||
3 | * | ||
4 | * Basic SNMP Application Layer Gateway | ||
5 | * | ||
6 | * This IP NAT module is intended for use with SNMP network | ||
7 | * discovery and monitoring applications where target networks use | ||
8 | * conflicting private address realms. | ||
9 | * | ||
10 | * Static NAT is used to remap the networks from the view of the network | ||
11 | * management system at the IP layer, and this module remaps some application | ||
12 | * layer addresses to match. | ||
13 | * | ||
14 | * The simplest form of ALG is performed, where only tagged IP addresses | ||
15 | * are modified. The module does not need to be MIB aware and only scans | ||
16 | * messages at the ASN.1/BER level. | ||
17 | * | ||
18 | * Currently, only SNMPv1 and SNMPv2 are supported. | ||
19 | * | ||
20 | * More information on ALG and associated issues can be found in | ||
21 | * RFC 2962 | ||
22 | * | ||
23 | * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory | ||
24 | * McLean & Jochen Friedrich, stripped down for use in the kernel. | ||
25 | * | ||
26 | * Copyright (c) 2000 RP Internet (www.rpi.net.au). | ||
27 | * | ||
28 | * This program is free software; you can redistribute it and/or modify | ||
29 | * it under the terms of the GNU General Public License as published by | ||
30 | * the Free Software Foundation; either version 2 of the License, or | ||
31 | * (at your option) any later version. | ||
32 | * This program is distributed in the hope that it will be useful, | ||
33 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
34 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
35 | * GNU General Public License for more details. | ||
36 | * You should have received a copy of the GNU General Public License | ||
37 | * along with this program; if not, write to the Free Software | ||
38 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
39 | * | ||
40 | * Author: James Morris <jmorris@intercode.com.au> | ||
41 | * | ||
42 | * Updates: | ||
43 | * 2000-08-06: Convert to new helper API (Harald Welte). | ||
44 | * | ||
45 | */ | ||
46 | #include <linux/in.h> | ||
47 | #include <linux/module.h> | ||
48 | #include <linux/types.h> | ||
49 | #include <linux/kernel.h> | ||
50 | #include <linux/moduleparam.h> | ||
51 | #include <linux/netfilter_ipv4.h> | ||
52 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
53 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
54 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
55 | #include <linux/ip.h> | ||
56 | #include <linux/udp.h> | ||
57 | #include <net/checksum.h> | ||
58 | #include <net/udp.h> | ||
59 | #include <asm/uaccess.h> | ||
60 | |||
61 | MODULE_LICENSE("GPL"); | ||
62 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | ||
63 | MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); | ||
64 | |||
65 | #define SNMP_PORT 161 | ||
66 | #define SNMP_TRAP_PORT 162 | ||
67 | #define NOCT1(n) (*(u8 *)n) | ||
68 | |||
69 | static int debug; | ||
70 | static DEFINE_SPINLOCK(snmp_lock); | ||
71 | |||
72 | /* | ||
73 | * Application layer address mapping mimics the NAT mapping, but | ||
74 | * only for the first octet in this case (a more flexible system | ||
75 | * can be implemented if needed). | ||
76 | */ | ||
77 | struct oct1_map | ||
78 | { | ||
79 | u_int8_t from; | ||
80 | u_int8_t to; | ||
81 | }; | ||
82 | |||
83 | |||
84 | /***************************************************************************** | ||
85 | * | ||
86 | * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) | ||
87 | * | ||
88 | *****************************************************************************/ | ||
89 | |||
90 | /* Class */ | ||
91 | #define ASN1_UNI 0 /* Universal */ | ||
92 | #define ASN1_APL 1 /* Application */ | ||
93 | #define ASN1_CTX 2 /* Context */ | ||
94 | #define ASN1_PRV 3 /* Private */ | ||
95 | |||
96 | /* Tag */ | ||
97 | #define ASN1_EOC 0 /* End Of Contents */ | ||
98 | #define ASN1_BOL 1 /* Boolean */ | ||
99 | #define ASN1_INT 2 /* Integer */ | ||
100 | #define ASN1_BTS 3 /* Bit String */ | ||
101 | #define ASN1_OTS 4 /* Octet String */ | ||
102 | #define ASN1_NUL 5 /* Null */ | ||
103 | #define ASN1_OJI 6 /* Object Identifier */ | ||
104 | #define ASN1_OJD 7 /* Object Description */ | ||
105 | #define ASN1_EXT 8 /* External */ | ||
106 | #define ASN1_SEQ 16 /* Sequence */ | ||
107 | #define ASN1_SET 17 /* Set */ | ||
108 | #define ASN1_NUMSTR 18 /* Numerical String */ | ||
109 | #define ASN1_PRNSTR 19 /* Printable String */ | ||
110 | #define ASN1_TEXSTR 20 /* Teletext String */ | ||
111 | #define ASN1_VIDSTR 21 /* Video String */ | ||
112 | #define ASN1_IA5STR 22 /* IA5 String */ | ||
113 | #define ASN1_UNITIM 23 /* Universal Time */ | ||
114 | #define ASN1_GENTIM 24 /* General Time */ | ||
115 | #define ASN1_GRASTR 25 /* Graphical String */ | ||
116 | #define ASN1_VISSTR 26 /* Visible String */ | ||
117 | #define ASN1_GENSTR 27 /* General String */ | ||
118 | |||
119 | /* Primitive / Constructed methods*/ | ||
120 | #define ASN1_PRI 0 /* Primitive */ | ||
121 | #define ASN1_CON 1 /* Constructed */ | ||
122 | |||
123 | /* | ||
124 | * Error codes. | ||
125 | */ | ||
126 | #define ASN1_ERR_NOERROR 0 | ||
127 | #define ASN1_ERR_DEC_EMPTY 2 | ||
128 | #define ASN1_ERR_DEC_EOC_MISMATCH 3 | ||
129 | #define ASN1_ERR_DEC_LENGTH_MISMATCH 4 | ||
130 | #define ASN1_ERR_DEC_BADVALUE 5 | ||
131 | |||
132 | /* | ||
133 | * ASN.1 context. | ||
134 | */ | ||
135 | struct asn1_ctx | ||
136 | { | ||
137 | int error; /* Error condition */ | ||
138 | unsigned char *pointer; /* Octet just to be decoded */ | ||
139 | unsigned char *begin; /* First octet */ | ||
140 | unsigned char *end; /* Octet after last octet */ | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | * Octet string (not null terminated) | ||
145 | */ | ||
146 | struct asn1_octstr | ||
147 | { | ||
148 | unsigned char *data; | ||
149 | unsigned int len; | ||
150 | }; | ||
151 | |||
152 | static void asn1_open(struct asn1_ctx *ctx, | ||
153 | unsigned char *buf, | ||
154 | unsigned int len) | ||
155 | { | ||
156 | ctx->begin = buf; | ||
157 | ctx->end = buf + len; | ||
158 | ctx->pointer = buf; | ||
159 | ctx->error = ASN1_ERR_NOERROR; | ||
160 | } | ||
161 | |||
162 | static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) | ||
163 | { | ||
164 | if (ctx->pointer >= ctx->end) { | ||
165 | ctx->error = ASN1_ERR_DEC_EMPTY; | ||
166 | return 0; | ||
167 | } | ||
168 | *ch = *(ctx->pointer)++; | ||
169 | return 1; | ||
170 | } | ||
171 | |||
172 | static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) | ||
173 | { | ||
174 | unsigned char ch; | ||
175 | |||
176 | *tag = 0; | ||
177 | |||
178 | do | ||
179 | { | ||
180 | if (!asn1_octet_decode(ctx, &ch)) | ||
181 | return 0; | ||
182 | *tag <<= 7; | ||
183 | *tag |= ch & 0x7F; | ||
184 | } while ((ch & 0x80) == 0x80); | ||
185 | return 1; | ||
186 | } | ||
187 | |||
188 | static unsigned char asn1_id_decode(struct asn1_ctx *ctx, | ||
189 | unsigned int *cls, | ||
190 | unsigned int *con, | ||
191 | unsigned int *tag) | ||
192 | { | ||
193 | unsigned char ch; | ||
194 | |||
195 | if (!asn1_octet_decode(ctx, &ch)) | ||
196 | return 0; | ||
197 | |||
198 | *cls = (ch & 0xC0) >> 6; | ||
199 | *con = (ch & 0x20) >> 5; | ||
200 | *tag = (ch & 0x1F); | ||
201 | |||
202 | if (*tag == 0x1F) { | ||
203 | if (!asn1_tag_decode(ctx, tag)) | ||
204 | return 0; | ||
205 | } | ||
206 | return 1; | ||
207 | } | ||
208 | |||
209 | static unsigned char asn1_length_decode(struct asn1_ctx *ctx, | ||
210 | unsigned int *def, | ||
211 | unsigned int *len) | ||
212 | { | ||
213 | unsigned char ch, cnt; | ||
214 | |||
215 | if (!asn1_octet_decode(ctx, &ch)) | ||
216 | return 0; | ||
217 | |||
218 | if (ch == 0x80) | ||
219 | *def = 0; | ||
220 | else { | ||
221 | *def = 1; | ||
222 | |||
223 | if (ch < 0x80) | ||
224 | *len = ch; | ||
225 | else { | ||
226 | cnt = (unsigned char) (ch & 0x7F); | ||
227 | *len = 0; | ||
228 | |||
229 | while (cnt > 0) { | ||
230 | if (!asn1_octet_decode(ctx, &ch)) | ||
231 | return 0; | ||
232 | *len <<= 8; | ||
233 | *len |= ch; | ||
234 | cnt--; | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | static unsigned char asn1_header_decode(struct asn1_ctx *ctx, | ||
242 | unsigned char **eoc, | ||
243 | unsigned int *cls, | ||
244 | unsigned int *con, | ||
245 | unsigned int *tag) | ||
246 | { | ||
247 | unsigned int def, len; | ||
248 | |||
249 | if (!asn1_id_decode(ctx, cls, con, tag)) | ||
250 | return 0; | ||
251 | |||
252 | def = len = 0; | ||
253 | if (!asn1_length_decode(ctx, &def, &len)) | ||
254 | return 0; | ||
255 | |||
256 | if (def) | ||
257 | *eoc = ctx->pointer + len; | ||
258 | else | ||
259 | *eoc = NULL; | ||
260 | return 1; | ||
261 | } | ||
262 | |||
263 | static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) | ||
264 | { | ||
265 | unsigned char ch; | ||
266 | |||
267 | if (eoc == 0) { | ||
268 | if (!asn1_octet_decode(ctx, &ch)) | ||
269 | return 0; | ||
270 | |||
271 | if (ch != 0x00) { | ||
272 | ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | if (!asn1_octet_decode(ctx, &ch)) | ||
277 | return 0; | ||
278 | |||
279 | if (ch != 0x00) { | ||
280 | ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; | ||
281 | return 0; | ||
282 | } | ||
283 | return 1; | ||
284 | } else { | ||
285 | if (ctx->pointer != eoc) { | ||
286 | ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH; | ||
287 | return 0; | ||
288 | } | ||
289 | return 1; | ||
290 | } | ||
291 | } | ||
292 | |||
293 | static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) | ||
294 | { | ||
295 | ctx->pointer = eoc; | ||
296 | return 1; | ||
297 | } | ||
298 | |||
299 | static unsigned char asn1_long_decode(struct asn1_ctx *ctx, | ||
300 | unsigned char *eoc, | ||
301 | long *integer) | ||
302 | { | ||
303 | unsigned char ch; | ||
304 | unsigned int len; | ||
305 | |||
306 | if (!asn1_octet_decode(ctx, &ch)) | ||
307 | return 0; | ||
308 | |||
309 | *integer = (signed char) ch; | ||
310 | len = 1; | ||
311 | |||
312 | while (ctx->pointer < eoc) { | ||
313 | if (++len > sizeof (long)) { | ||
314 | ctx->error = ASN1_ERR_DEC_BADVALUE; | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | if (!asn1_octet_decode(ctx, &ch)) | ||
319 | return 0; | ||
320 | |||
321 | *integer <<= 8; | ||
322 | *integer |= ch; | ||
323 | } | ||
324 | return 1; | ||
325 | } | ||
326 | |||
327 | static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, | ||
328 | unsigned char *eoc, | ||
329 | unsigned int *integer) | ||
330 | { | ||
331 | unsigned char ch; | ||
332 | unsigned int len; | ||
333 | |||
334 | if (!asn1_octet_decode(ctx, &ch)) | ||
335 | return 0; | ||
336 | |||
337 | *integer = ch; | ||
338 | if (ch == 0) len = 0; | ||
339 | else len = 1; | ||
340 | |||
341 | while (ctx->pointer < eoc) { | ||
342 | if (++len > sizeof (unsigned int)) { | ||
343 | ctx->error = ASN1_ERR_DEC_BADVALUE; | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | if (!asn1_octet_decode(ctx, &ch)) | ||
348 | return 0; | ||
349 | |||
350 | *integer <<= 8; | ||
351 | *integer |= ch; | ||
352 | } | ||
353 | return 1; | ||
354 | } | ||
355 | |||
356 | static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, | ||
357 | unsigned char *eoc, | ||
358 | unsigned long *integer) | ||
359 | { | ||
360 | unsigned char ch; | ||
361 | unsigned int len; | ||
362 | |||
363 | if (!asn1_octet_decode(ctx, &ch)) | ||
364 | return 0; | ||
365 | |||
366 | *integer = ch; | ||
367 | if (ch == 0) len = 0; | ||
368 | else len = 1; | ||
369 | |||
370 | while (ctx->pointer < eoc) { | ||
371 | if (++len > sizeof (unsigned long)) { | ||
372 | ctx->error = ASN1_ERR_DEC_BADVALUE; | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | if (!asn1_octet_decode(ctx, &ch)) | ||
377 | return 0; | ||
378 | |||
379 | *integer <<= 8; | ||
380 | *integer |= ch; | ||
381 | } | ||
382 | return 1; | ||
383 | } | ||
384 | |||
385 | static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, | ||
386 | unsigned char *eoc, | ||
387 | unsigned char **octets, | ||
388 | unsigned int *len) | ||
389 | { | ||
390 | unsigned char *ptr; | ||
391 | |||
392 | *len = 0; | ||
393 | |||
394 | *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); | ||
395 | if (*octets == NULL) { | ||
396 | if (net_ratelimit()) | ||
397 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | ptr = *octets; | ||
402 | while (ctx->pointer < eoc) { | ||
403 | if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { | ||
404 | kfree(*octets); | ||
405 | *octets = NULL; | ||
406 | return 0; | ||
407 | } | ||
408 | (*len)++; | ||
409 | } | ||
410 | return 1; | ||
411 | } | ||
412 | |||
413 | static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, | ||
414 | unsigned long *subid) | ||
415 | { | ||
416 | unsigned char ch; | ||
417 | |||
418 | *subid = 0; | ||
419 | |||
420 | do { | ||
421 | if (!asn1_octet_decode(ctx, &ch)) | ||
422 | return 0; | ||
423 | |||
424 | *subid <<= 7; | ||
425 | *subid |= ch & 0x7F; | ||
426 | } while ((ch & 0x80) == 0x80); | ||
427 | return 1; | ||
428 | } | ||
429 | |||
430 | static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, | ||
431 | unsigned char *eoc, | ||
432 | unsigned long **oid, | ||
433 | unsigned int *len) | ||
434 | { | ||
435 | unsigned long subid; | ||
436 | unsigned int size; | ||
437 | unsigned long *optr; | ||
438 | |||
439 | size = eoc - ctx->pointer + 1; | ||
440 | *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); | ||
441 | if (*oid == NULL) { | ||
442 | if (net_ratelimit()) | ||
443 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | optr = *oid; | ||
448 | |||
449 | if (!asn1_subid_decode(ctx, &subid)) { | ||
450 | kfree(*oid); | ||
451 | *oid = NULL; | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | if (subid < 40) { | ||
456 | optr [0] = 0; | ||
457 | optr [1] = subid; | ||
458 | } else if (subid < 80) { | ||
459 | optr [0] = 1; | ||
460 | optr [1] = subid - 40; | ||
461 | } else { | ||
462 | optr [0] = 2; | ||
463 | optr [1] = subid - 80; | ||
464 | } | ||
465 | |||
466 | *len = 2; | ||
467 | optr += 2; | ||
468 | |||
469 | while (ctx->pointer < eoc) { | ||
470 | if (++(*len) > size) { | ||
471 | ctx->error = ASN1_ERR_DEC_BADVALUE; | ||
472 | kfree(*oid); | ||
473 | *oid = NULL; | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | if (!asn1_subid_decode(ctx, optr++)) { | ||
478 | kfree(*oid); | ||
479 | *oid = NULL; | ||
480 | return 0; | ||
481 | } | ||
482 | } | ||
483 | return 1; | ||
484 | } | ||
485 | |||
486 | /***************************************************************************** | ||
487 | * | ||
488 | * SNMP decoding routines (gxsnmp author Dirk Wisse) | ||
489 | * | ||
490 | *****************************************************************************/ | ||
491 | |||
492 | /* SNMP Versions */ | ||
493 | #define SNMP_V1 0 | ||
494 | #define SNMP_V2C 1 | ||
495 | #define SNMP_V2 2 | ||
496 | #define SNMP_V3 3 | ||
497 | |||
498 | /* Default Sizes */ | ||
499 | #define SNMP_SIZE_COMM 256 | ||
500 | #define SNMP_SIZE_OBJECTID 128 | ||
501 | #define SNMP_SIZE_BUFCHR 256 | ||
502 | #define SNMP_SIZE_BUFINT 128 | ||
503 | #define SNMP_SIZE_SMALLOBJECTID 16 | ||
504 | |||
505 | /* Requests */ | ||
506 | #define SNMP_PDU_GET 0 | ||
507 | #define SNMP_PDU_NEXT 1 | ||
508 | #define SNMP_PDU_RESPONSE 2 | ||
509 | #define SNMP_PDU_SET 3 | ||
510 | #define SNMP_PDU_TRAP1 4 | ||
511 | #define SNMP_PDU_BULK 5 | ||
512 | #define SNMP_PDU_INFORM 6 | ||
513 | #define SNMP_PDU_TRAP2 7 | ||
514 | |||
515 | /* Errors */ | ||
516 | #define SNMP_NOERROR 0 | ||
517 | #define SNMP_TOOBIG 1 | ||
518 | #define SNMP_NOSUCHNAME 2 | ||
519 | #define SNMP_BADVALUE 3 | ||
520 | #define SNMP_READONLY 4 | ||
521 | #define SNMP_GENERROR 5 | ||
522 | #define SNMP_NOACCESS 6 | ||
523 | #define SNMP_WRONGTYPE 7 | ||
524 | #define SNMP_WRONGLENGTH 8 | ||
525 | #define SNMP_WRONGENCODING 9 | ||
526 | #define SNMP_WRONGVALUE 10 | ||
527 | #define SNMP_NOCREATION 11 | ||
528 | #define SNMP_INCONSISTENTVALUE 12 | ||
529 | #define SNMP_RESOURCEUNAVAILABLE 13 | ||
530 | #define SNMP_COMMITFAILED 14 | ||
531 | #define SNMP_UNDOFAILED 15 | ||
532 | #define SNMP_AUTHORIZATIONERROR 16 | ||
533 | #define SNMP_NOTWRITABLE 17 | ||
534 | #define SNMP_INCONSISTENTNAME 18 | ||
535 | |||
536 | /* General SNMP V1 Traps */ | ||
537 | #define SNMP_TRAP_COLDSTART 0 | ||
538 | #define SNMP_TRAP_WARMSTART 1 | ||
539 | #define SNMP_TRAP_LINKDOWN 2 | ||
540 | #define SNMP_TRAP_LINKUP 3 | ||
541 | #define SNMP_TRAP_AUTFAILURE 4 | ||
542 | #define SNMP_TRAP_EQPNEIGHBORLOSS 5 | ||
543 | #define SNMP_TRAP_ENTSPECIFIC 6 | ||
544 | |||
545 | /* SNMPv1 Types */ | ||
546 | #define SNMP_NULL 0 | ||
547 | #define SNMP_INTEGER 1 /* l */ | ||
548 | #define SNMP_OCTETSTR 2 /* c */ | ||
549 | #define SNMP_DISPLAYSTR 2 /* c */ | ||
550 | #define SNMP_OBJECTID 3 /* ul */ | ||
551 | #define SNMP_IPADDR 4 /* uc */ | ||
552 | #define SNMP_COUNTER 5 /* ul */ | ||
553 | #define SNMP_GAUGE 6 /* ul */ | ||
554 | #define SNMP_TIMETICKS 7 /* ul */ | ||
555 | #define SNMP_OPAQUE 8 /* c */ | ||
556 | |||
557 | /* Additional SNMPv2 Types */ | ||
558 | #define SNMP_UINTEGER 5 /* ul */ | ||
559 | #define SNMP_BITSTR 9 /* uc */ | ||
560 | #define SNMP_NSAP 10 /* uc */ | ||
561 | #define SNMP_COUNTER64 11 /* ul */ | ||
562 | #define SNMP_NOSUCHOBJECT 12 | ||
563 | #define SNMP_NOSUCHINSTANCE 13 | ||
564 | #define SNMP_ENDOFMIBVIEW 14 | ||
565 | |||
566 | union snmp_syntax | ||
567 | { | ||
568 | unsigned char uc[0]; /* 8 bit unsigned */ | ||
569 | char c[0]; /* 8 bit signed */ | ||
570 | unsigned long ul[0]; /* 32 bit unsigned */ | ||
571 | long l[0]; /* 32 bit signed */ | ||
572 | }; | ||
573 | |||
574 | struct snmp_object | ||
575 | { | ||
576 | unsigned long *id; | ||
577 | unsigned int id_len; | ||
578 | unsigned short type; | ||
579 | unsigned int syntax_len; | ||
580 | union snmp_syntax syntax; | ||
581 | }; | ||
582 | |||
583 | struct snmp_request | ||
584 | { | ||
585 | unsigned long id; | ||
586 | unsigned int error_status; | ||
587 | unsigned int error_index; | ||
588 | }; | ||
589 | |||
590 | struct snmp_v1_trap | ||
591 | { | ||
592 | unsigned long *id; | ||
593 | unsigned int id_len; | ||
594 | unsigned long ip_address; /* pointer */ | ||
595 | unsigned int general; | ||
596 | unsigned int specific; | ||
597 | unsigned long time; | ||
598 | }; | ||
599 | |||
600 | /* SNMP types */ | ||
601 | #define SNMP_IPA 0 | ||
602 | #define SNMP_CNT 1 | ||
603 | #define SNMP_GGE 2 | ||
604 | #define SNMP_TIT 3 | ||
605 | #define SNMP_OPQ 4 | ||
606 | #define SNMP_C64 6 | ||
607 | |||
608 | /* SNMP errors */ | ||
609 | #define SERR_NSO 0 | ||
610 | #define SERR_NSI 1 | ||
611 | #define SERR_EOM 2 | ||
612 | |||
613 | static inline void mangle_address(unsigned char *begin, | ||
614 | unsigned char *addr, | ||
615 | const struct oct1_map *map, | ||
616 | __sum16 *check); | ||
617 | struct snmp_cnv | ||
618 | { | ||
619 | unsigned int class; | ||
620 | unsigned int tag; | ||
621 | int syntax; | ||
622 | }; | ||
623 | |||
624 | static struct snmp_cnv snmp_conv [] = | ||
625 | { | ||
626 | {ASN1_UNI, ASN1_NUL, SNMP_NULL}, | ||
627 | {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, | ||
628 | {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, | ||
629 | {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR}, | ||
630 | {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID}, | ||
631 | {ASN1_APL, SNMP_IPA, SNMP_IPADDR}, | ||
632 | {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */ | ||
633 | {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ | ||
634 | {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, | ||
635 | {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, | ||
636 | |||
637 | /* SNMPv2 data types and errors */ | ||
638 | {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, | ||
639 | {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, | ||
640 | {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT}, | ||
641 | {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE}, | ||
642 | {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW}, | ||
643 | {0, 0, -1} | ||
644 | }; | ||
645 | |||
646 | static unsigned char snmp_tag_cls2syntax(unsigned int tag, | ||
647 | unsigned int cls, | ||
648 | unsigned short *syntax) | ||
649 | { | ||
650 | struct snmp_cnv *cnv; | ||
651 | |||
652 | cnv = snmp_conv; | ||
653 | |||
654 | while (cnv->syntax != -1) { | ||
655 | if (cnv->tag == tag && cnv->class == cls) { | ||
656 | *syntax = cnv->syntax; | ||
657 | return 1; | ||
658 | } | ||
659 | cnv++; | ||
660 | } | ||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | ||
665 | struct snmp_object **obj) | ||
666 | { | ||
667 | unsigned int cls, con, tag, len, idlen; | ||
668 | unsigned short type; | ||
669 | unsigned char *eoc, *end, *p; | ||
670 | unsigned long *lp, *id; | ||
671 | unsigned long ul; | ||
672 | long l; | ||
673 | |||
674 | *obj = NULL; | ||
675 | id = NULL; | ||
676 | |||
677 | if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) | ||
678 | return 0; | ||
679 | |||
680 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) | ||
681 | return 0; | ||
682 | |||
683 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
684 | return 0; | ||
685 | |||
686 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) | ||
687 | return 0; | ||
688 | |||
689 | if (!asn1_oid_decode(ctx, end, &id, &idlen)) | ||
690 | return 0; | ||
691 | |||
692 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { | ||
693 | kfree(id); | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | if (con != ASN1_PRI) { | ||
698 | kfree(id); | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | type = 0; | ||
703 | if (!snmp_tag_cls2syntax(tag, cls, &type)) { | ||
704 | kfree(id); | ||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | l = 0; | ||
709 | switch (type) { | ||
710 | case SNMP_INTEGER: | ||
711 | len = sizeof(long); | ||
712 | if (!asn1_long_decode(ctx, end, &l)) { | ||
713 | kfree(id); | ||
714 | return 0; | ||
715 | } | ||
716 | *obj = kmalloc(sizeof(struct snmp_object) + len, | ||
717 | GFP_ATOMIC); | ||
718 | if (*obj == NULL) { | ||
719 | kfree(id); | ||
720 | if (net_ratelimit()) | ||
721 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
722 | return 0; | ||
723 | } | ||
724 | (*obj)->syntax.l[0] = l; | ||
725 | break; | ||
726 | case SNMP_OCTETSTR: | ||
727 | case SNMP_OPAQUE: | ||
728 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | ||
729 | kfree(id); | ||
730 | return 0; | ||
731 | } | ||
732 | *obj = kmalloc(sizeof(struct snmp_object) + len, | ||
733 | GFP_ATOMIC); | ||
734 | if (*obj == NULL) { | ||
735 | kfree(id); | ||
736 | if (net_ratelimit()) | ||
737 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
738 | return 0; | ||
739 | } | ||
740 | memcpy((*obj)->syntax.c, p, len); | ||
741 | kfree(p); | ||
742 | break; | ||
743 | case SNMP_NULL: | ||
744 | case SNMP_NOSUCHOBJECT: | ||
745 | case SNMP_NOSUCHINSTANCE: | ||
746 | case SNMP_ENDOFMIBVIEW: | ||
747 | len = 0; | ||
748 | *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | ||
749 | if (*obj == NULL) { | ||
750 | kfree(id); | ||
751 | if (net_ratelimit()) | ||
752 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
753 | return 0; | ||
754 | } | ||
755 | if (!asn1_null_decode(ctx, end)) { | ||
756 | kfree(id); | ||
757 | kfree(*obj); | ||
758 | *obj = NULL; | ||
759 | return 0; | ||
760 | } | ||
761 | break; | ||
762 | case SNMP_OBJECTID: | ||
763 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { | ||
764 | kfree(id); | ||
765 | return 0; | ||
766 | } | ||
767 | len *= sizeof(unsigned long); | ||
768 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
769 | if (*obj == NULL) { | ||
770 | kfree(lp); | ||
771 | kfree(id); | ||
772 | if (net_ratelimit()) | ||
773 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
774 | return 0; | ||
775 | } | ||
776 | memcpy((*obj)->syntax.ul, lp, len); | ||
777 | kfree(lp); | ||
778 | break; | ||
779 | case SNMP_IPADDR: | ||
780 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | ||
781 | kfree(id); | ||
782 | return 0; | ||
783 | } | ||
784 | if (len != 4) { | ||
785 | kfree(p); | ||
786 | kfree(id); | ||
787 | return 0; | ||
788 | } | ||
789 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
790 | if (*obj == NULL) { | ||
791 | kfree(p); | ||
792 | kfree(id); | ||
793 | if (net_ratelimit()) | ||
794 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
795 | return 0; | ||
796 | } | ||
797 | memcpy((*obj)->syntax.uc, p, len); | ||
798 | kfree(p); | ||
799 | break; | ||
800 | case SNMP_COUNTER: | ||
801 | case SNMP_GAUGE: | ||
802 | case SNMP_TIMETICKS: | ||
803 | len = sizeof(unsigned long); | ||
804 | if (!asn1_ulong_decode(ctx, end, &ul)) { | ||
805 | kfree(id); | ||
806 | return 0; | ||
807 | } | ||
808 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
809 | if (*obj == NULL) { | ||
810 | kfree(id); | ||
811 | if (net_ratelimit()) | ||
812 | printk("OOM in bsalg (%d)\n", __LINE__); | ||
813 | return 0; | ||
814 | } | ||
815 | (*obj)->syntax.ul[0] = ul; | ||
816 | break; | ||
817 | default: | ||
818 | kfree(id); | ||
819 | return 0; | ||
820 | } | ||
821 | |||
822 | (*obj)->syntax_len = len; | ||
823 | (*obj)->type = type; | ||
824 | (*obj)->id = id; | ||
825 | (*obj)->id_len = idlen; | ||
826 | |||
827 | if (!asn1_eoc_decode(ctx, eoc)) { | ||
828 | kfree(id); | ||
829 | kfree(*obj); | ||
830 | *obj = NULL; | ||
831 | return 0; | ||
832 | } | ||
833 | return 1; | ||
834 | } | ||
835 | |||
836 | static unsigned char snmp_request_decode(struct asn1_ctx *ctx, | ||
837 | struct snmp_request *request) | ||
838 | { | ||
839 | unsigned int cls, con, tag; | ||
840 | unsigned char *end; | ||
841 | |||
842 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
843 | return 0; | ||
844 | |||
845 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
846 | return 0; | ||
847 | |||
848 | if (!asn1_ulong_decode(ctx, end, &request->id)) | ||
849 | return 0; | ||
850 | |||
851 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
852 | return 0; | ||
853 | |||
854 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
855 | return 0; | ||
856 | |||
857 | if (!asn1_uint_decode(ctx, end, &request->error_status)) | ||
858 | return 0; | ||
859 | |||
860 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
861 | return 0; | ||
862 | |||
863 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
864 | return 0; | ||
865 | |||
866 | if (!asn1_uint_decode(ctx, end, &request->error_index)) | ||
867 | return 0; | ||
868 | |||
869 | return 1; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * Fast checksum update for possibly oddly-aligned UDP byte, from the | ||
874 | * code example in the draft. | ||
875 | */ | ||
876 | static void fast_csum(__sum16 *csum, | ||
877 | const unsigned char *optr, | ||
878 | const unsigned char *nptr, | ||
879 | int offset) | ||
880 | { | ||
881 | unsigned char s[4]; | ||
882 | |||
883 | if (offset & 1) { | ||
884 | s[0] = s[2] = 0; | ||
885 | s[1] = ~*optr; | ||
886 | s[3] = *nptr; | ||
887 | } else { | ||
888 | s[1] = s[3] = 0; | ||
889 | s[0] = ~*optr; | ||
890 | s[2] = *nptr; | ||
891 | } | ||
892 | |||
893 | *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); | ||
894 | } | ||
895 | |||
896 | /* | ||
897 | * Mangle IP address. | ||
898 | * - begin points to the start of the snmp messgae | ||
899 | * - addr points to the start of the address | ||
900 | */ | ||
901 | static inline void mangle_address(unsigned char *begin, | ||
902 | unsigned char *addr, | ||
903 | const struct oct1_map *map, | ||
904 | __sum16 *check) | ||
905 | { | ||
906 | if (map->from == NOCT1(addr)) { | ||
907 | u_int32_t old; | ||
908 | |||
909 | if (debug) | ||
910 | memcpy(&old, (unsigned char *)addr, sizeof(old)); | ||
911 | |||
912 | *addr = map->to; | ||
913 | |||
914 | /* Update UDP checksum if being used */ | ||
915 | if (*check) { | ||
916 | fast_csum(check, | ||
917 | &map->from, &map->to, addr - begin); | ||
918 | } | ||
919 | |||
920 | if (debug) | ||
921 | printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " | ||
922 | "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); | ||
923 | } | ||
924 | } | ||
925 | |||
926 | static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, | ||
927 | struct snmp_v1_trap *trap, | ||
928 | const struct oct1_map *map, | ||
929 | __sum16 *check) | ||
930 | { | ||
931 | unsigned int cls, con, tag, len; | ||
932 | unsigned char *end; | ||
933 | |||
934 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
935 | return 0; | ||
936 | |||
937 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) | ||
938 | return 0; | ||
939 | |||
940 | if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) | ||
941 | return 0; | ||
942 | |||
943 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
944 | goto err_id_free; | ||
945 | |||
946 | if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || | ||
947 | (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) | ||
948 | goto err_id_free; | ||
949 | |||
950 | if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) | ||
951 | goto err_id_free; | ||
952 | |||
953 | /* IPv4 only */ | ||
954 | if (len != 4) | ||
955 | goto err_addr_free; | ||
956 | |||
957 | mangle_address(ctx->begin, ctx->pointer - 4, map, check); | ||
958 | |||
959 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
960 | goto err_addr_free; | ||
961 | |||
962 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
963 | goto err_addr_free; | ||
964 | |||
965 | if (!asn1_uint_decode(ctx, end, &trap->general)) | ||
966 | goto err_addr_free; | ||
967 | |||
968 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
969 | goto err_addr_free; | ||
970 | |||
971 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
972 | goto err_addr_free; | ||
973 | |||
974 | if (!asn1_uint_decode(ctx, end, &trap->specific)) | ||
975 | goto err_addr_free; | ||
976 | |||
977 | if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) | ||
978 | goto err_addr_free; | ||
979 | |||
980 | if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || | ||
981 | (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) | ||
982 | goto err_addr_free; | ||
983 | |||
984 | if (!asn1_ulong_decode(ctx, end, &trap->time)) | ||
985 | goto err_addr_free; | ||
986 | |||
987 | return 1; | ||
988 | |||
989 | err_addr_free: | ||
990 | kfree((unsigned long *)trap->ip_address); | ||
991 | |||
992 | err_id_free: | ||
993 | kfree(trap->id); | ||
994 | |||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | /***************************************************************************** | ||
999 | * | ||
1000 | * Misc. routines | ||
1001 | * | ||
1002 | *****************************************************************************/ | ||
1003 | |||
1004 | static void hex_dump(unsigned char *buf, size_t len) | ||
1005 | { | ||
1006 | size_t i; | ||
1007 | |||
1008 | for (i = 0; i < len; i++) { | ||
1009 | if (i && !(i % 16)) | ||
1010 | printk("\n"); | ||
1011 | printk("%02x ", *(buf + i)); | ||
1012 | } | ||
1013 | printk("\n"); | ||
1014 | } | ||
1015 | |||
1016 | /* | ||
1017 | * Parse and mangle SNMP message according to mapping. | ||
1018 | * (And this is the fucking 'basic' method). | ||
1019 | */ | ||
1020 | static int snmp_parse_mangle(unsigned char *msg, | ||
1021 | u_int16_t len, | ||
1022 | const struct oct1_map *map, | ||
1023 | __sum16 *check) | ||
1024 | { | ||
1025 | unsigned char *eoc, *end; | ||
1026 | unsigned int cls, con, tag, vers, pdutype; | ||
1027 | struct asn1_ctx ctx; | ||
1028 | struct asn1_octstr comm; | ||
1029 | struct snmp_object **obj; | ||
1030 | |||
1031 | if (debug > 1) | ||
1032 | hex_dump(msg, len); | ||
1033 | |||
1034 | asn1_open(&ctx, msg, len); | ||
1035 | |||
1036 | /* | ||
1037 | * Start of SNMP message. | ||
1038 | */ | ||
1039 | if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) | ||
1040 | return 0; | ||
1041 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) | ||
1042 | return 0; | ||
1043 | |||
1044 | /* | ||
1045 | * Version 1 or 2 handled. | ||
1046 | */ | ||
1047 | if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) | ||
1048 | return 0; | ||
1049 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) | ||
1050 | return 0; | ||
1051 | if (!asn1_uint_decode (&ctx, end, &vers)) | ||
1052 | return 0; | ||
1053 | if (debug > 1) | ||
1054 | printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); | ||
1055 | if (vers > 1) | ||
1056 | return 1; | ||
1057 | |||
1058 | /* | ||
1059 | * Community. | ||
1060 | */ | ||
1061 | if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag)) | ||
1062 | return 0; | ||
1063 | if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS) | ||
1064 | return 0; | ||
1065 | if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len)) | ||
1066 | return 0; | ||
1067 | if (debug > 1) { | ||
1068 | unsigned int i; | ||
1069 | |||
1070 | printk(KERN_DEBUG "bsalg: community: "); | ||
1071 | for (i = 0; i < comm.len; i++) | ||
1072 | printk("%c", comm.data[i]); | ||
1073 | printk("\n"); | ||
1074 | } | ||
1075 | kfree(comm.data); | ||
1076 | |||
1077 | /* | ||
1078 | * PDU type | ||
1079 | */ | ||
1080 | if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype)) | ||
1081 | return 0; | ||
1082 | if (cls != ASN1_CTX || con != ASN1_CON) | ||
1083 | return 0; | ||
1084 | if (debug > 1) { | ||
1085 | unsigned char *pdus[] = { | ||
1086 | [SNMP_PDU_GET] = "get", | ||
1087 | [SNMP_PDU_NEXT] = "get-next", | ||
1088 | [SNMP_PDU_RESPONSE] = "response", | ||
1089 | [SNMP_PDU_SET] = "set", | ||
1090 | [SNMP_PDU_TRAP1] = "trapv1", | ||
1091 | [SNMP_PDU_BULK] = "bulk", | ||
1092 | [SNMP_PDU_INFORM] = "inform", | ||
1093 | [SNMP_PDU_TRAP2] = "trapv2" | ||
1094 | }; | ||
1095 | |||
1096 | if (pdutype > SNMP_PDU_TRAP2) | ||
1097 | printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); | ||
1098 | else | ||
1099 | printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]); | ||
1100 | } | ||
1101 | if (pdutype != SNMP_PDU_RESPONSE && | ||
1102 | pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) | ||
1103 | return 1; | ||
1104 | |||
1105 | /* | ||
1106 | * Request header or v1 trap | ||
1107 | */ | ||
1108 | if (pdutype == SNMP_PDU_TRAP1) { | ||
1109 | struct snmp_v1_trap trap; | ||
1110 | unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); | ||
1111 | |||
1112 | if (ret) { | ||
1113 | kfree(trap.id); | ||
1114 | kfree((unsigned long *)trap.ip_address); | ||
1115 | } else | ||
1116 | return ret; | ||
1117 | |||
1118 | } else { | ||
1119 | struct snmp_request req; | ||
1120 | |||
1121 | if (!snmp_request_decode(&ctx, &req)) | ||
1122 | return 0; | ||
1123 | |||
1124 | if (debug > 1) | ||
1125 | printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " | ||
1126 | "error_index=%u\n", req.id, req.error_status, | ||
1127 | req.error_index); | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * Loop through objects, look for IP addresses to mangle. | ||
1132 | */ | ||
1133 | if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) | ||
1134 | return 0; | ||
1135 | |||
1136 | if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) | ||
1137 | return 0; | ||
1138 | |||
1139 | obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | ||
1140 | if (obj == NULL) { | ||
1141 | if (net_ratelimit()) | ||
1142 | printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); | ||
1143 | return 0; | ||
1144 | } | ||
1145 | |||
1146 | while (!asn1_eoc_decode(&ctx, eoc)) { | ||
1147 | unsigned int i; | ||
1148 | |||
1149 | if (!snmp_object_decode(&ctx, obj)) { | ||
1150 | if (*obj) { | ||
1151 | kfree((*obj)->id); | ||
1152 | kfree(*obj); | ||
1153 | } | ||
1154 | kfree(obj); | ||
1155 | return 0; | ||
1156 | } | ||
1157 | |||
1158 | if (debug > 1) { | ||
1159 | printk(KERN_DEBUG "bsalg: object: "); | ||
1160 | for (i = 0; i < (*obj)->id_len; i++) { | ||
1161 | if (i > 0) | ||
1162 | printk("."); | ||
1163 | printk("%lu", (*obj)->id[i]); | ||
1164 | } | ||
1165 | printk(": type=%u\n", (*obj)->type); | ||
1166 | |||
1167 | } | ||
1168 | |||
1169 | if ((*obj)->type == SNMP_IPADDR) | ||
1170 | mangle_address(ctx.begin, ctx.pointer - 4 , map, check); | ||
1171 | |||
1172 | kfree((*obj)->id); | ||
1173 | kfree(*obj); | ||
1174 | } | ||
1175 | kfree(obj); | ||
1176 | |||
1177 | if (!asn1_eoc_decode(&ctx, eoc)) | ||
1178 | return 0; | ||
1179 | |||
1180 | return 1; | ||
1181 | } | ||
1182 | |||
1183 | /***************************************************************************** | ||
1184 | * | ||
1185 | * NAT routines. | ||
1186 | * | ||
1187 | *****************************************************************************/ | ||
1188 | |||
1189 | /* | ||
1190 | * SNMP translation routine. | ||
1191 | */ | ||
1192 | static int snmp_translate(struct ip_conntrack *ct, | ||
1193 | enum ip_conntrack_info ctinfo, | ||
1194 | struct sk_buff **pskb) | ||
1195 | { | ||
1196 | struct iphdr *iph = (*pskb)->nh.iph; | ||
1197 | struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); | ||
1198 | u_int16_t udplen = ntohs(udph->len); | ||
1199 | u_int16_t paylen = udplen - sizeof(struct udphdr); | ||
1200 | int dir = CTINFO2DIR(ctinfo); | ||
1201 | struct oct1_map map; | ||
1202 | |||
1203 | /* | ||
1204 | * Determine mappping for application layer addresses based | ||
1205 | * on NAT manipulations for the packet. | ||
1206 | */ | ||
1207 | if (dir == IP_CT_DIR_ORIGINAL) { | ||
1208 | /* SNAT traps */ | ||
1209 | map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip); | ||
1210 | map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip); | ||
1211 | } else { | ||
1212 | /* DNAT replies */ | ||
1213 | map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); | ||
1214 | map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip); | ||
1215 | } | ||
1216 | |||
1217 | if (map.from == map.to) | ||
1218 | return NF_ACCEPT; | ||
1219 | |||
1220 | if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), | ||
1221 | paylen, &map, &udph->check)) { | ||
1222 | if (net_ratelimit()) | ||
1223 | printk(KERN_WARNING "bsalg: parser failed\n"); | ||
1224 | return NF_DROP; | ||
1225 | } | ||
1226 | return NF_ACCEPT; | ||
1227 | } | ||
1228 | |||
1229 | /* We don't actually set up expectations, just adjust internal IP | ||
1230 | * addresses if this is being NATted */ | ||
1231 | static int help(struct sk_buff **pskb, | ||
1232 | struct ip_conntrack *ct, | ||
1233 | enum ip_conntrack_info ctinfo) | ||
1234 | { | ||
1235 | int dir = CTINFO2DIR(ctinfo); | ||
1236 | unsigned int ret; | ||
1237 | struct iphdr *iph = (*pskb)->nh.iph; | ||
1238 | struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); | ||
1239 | |||
1240 | /* SNMP replies and originating SNMP traps get mangled */ | ||
1241 | if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) | ||
1242 | return NF_ACCEPT; | ||
1243 | if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) | ||
1244 | return NF_ACCEPT; | ||
1245 | |||
1246 | /* No NAT? */ | ||
1247 | if (!(ct->status & IPS_NAT_MASK)) | ||
1248 | return NF_ACCEPT; | ||
1249 | |||
1250 | /* | ||
1251 | * Make sure the packet length is ok. So far, we were only guaranteed | ||
1252 | * to have a valid length IP header plus 8 bytes, which means we have | ||
1253 | * enough room for a UDP header. Just verify the UDP length field so we | ||
1254 | * can mess around with the payload. | ||
1255 | */ | ||
1256 | if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { | ||
1257 | if (net_ratelimit()) | ||
1258 | printk(KERN_WARNING "SNMP: dropping malformed packet " | ||
1259 | "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", | ||
1260 | NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); | ||
1261 | return NF_DROP; | ||
1262 | } | ||
1263 | |||
1264 | if (!skb_make_writable(pskb, (*pskb)->len)) | ||
1265 | return NF_DROP; | ||
1266 | |||
1267 | spin_lock_bh(&snmp_lock); | ||
1268 | ret = snmp_translate(ct, ctinfo, pskb); | ||
1269 | spin_unlock_bh(&snmp_lock); | ||
1270 | return ret; | ||
1271 | } | ||
1272 | |||
1273 | static struct ip_conntrack_helper snmp_helper = { | ||
1274 | .max_expected = 0, | ||
1275 | .timeout = 180, | ||
1276 | .me = THIS_MODULE, | ||
1277 | .help = help, | ||
1278 | .name = "snmp", | ||
1279 | |||
1280 | .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}}, | ||
1281 | .dst = {.protonum = IPPROTO_UDP}, | ||
1282 | }, | ||
1283 | .mask = {.src = {.u = {0xFFFF}}, | ||
1284 | .dst = {.protonum = 0xFF}, | ||
1285 | }, | ||
1286 | }; | ||
1287 | |||
1288 | static struct ip_conntrack_helper snmp_trap_helper = { | ||
1289 | .max_expected = 0, | ||
1290 | .timeout = 180, | ||
1291 | .me = THIS_MODULE, | ||
1292 | .help = help, | ||
1293 | .name = "snmp_trap", | ||
1294 | |||
1295 | .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}}, | ||
1296 | .dst = {.protonum = IPPROTO_UDP}, | ||
1297 | }, | ||
1298 | .mask = {.src = {.u = {0xFFFF}}, | ||
1299 | .dst = {.protonum = 0xFF}, | ||
1300 | }, | ||
1301 | }; | ||
1302 | |||
1303 | /***************************************************************************** | ||
1304 | * | ||
1305 | * Module stuff. | ||
1306 | * | ||
1307 | *****************************************************************************/ | ||
1308 | |||
1309 | static int __init ip_nat_snmp_basic_init(void) | ||
1310 | { | ||
1311 | int ret = 0; | ||
1312 | |||
1313 | ret = ip_conntrack_helper_register(&snmp_helper); | ||
1314 | if (ret < 0) | ||
1315 | return ret; | ||
1316 | ret = ip_conntrack_helper_register(&snmp_trap_helper); | ||
1317 | if (ret < 0) { | ||
1318 | ip_conntrack_helper_unregister(&snmp_helper); | ||
1319 | return ret; | ||
1320 | } | ||
1321 | return ret; | ||
1322 | } | ||
1323 | |||
1324 | static void __exit ip_nat_snmp_basic_fini(void) | ||
1325 | { | ||
1326 | ip_conntrack_helper_unregister(&snmp_helper); | ||
1327 | ip_conntrack_helper_unregister(&snmp_trap_helper); | ||
1328 | } | ||
1329 | |||
1330 | module_init(ip_nat_snmp_basic_init); | ||
1331 | module_exit(ip_nat_snmp_basic_fini); | ||
1332 | |||
1333 | module_param(debug, int, 0600); | ||
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c deleted file mode 100644 index 6bcfdf6dfcc9..000000000000 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ /dev/null | |||
@@ -1,388 +0,0 @@ | |||
1 | /* This file contains all the functions required for the standalone | ||
2 | ip_nat module. | ||
3 | |||
4 | These are not required by the compatibility layer. | ||
5 | */ | ||
6 | |||
7 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> | ||
17 | * - new API and handling of conntrack/nat helpers | ||
18 | * - now capable of multiple expectations for one master | ||
19 | * */ | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | #include <linux/icmp.h> | ||
23 | #include <linux/ip.h> | ||
24 | #include <linux/netfilter.h> | ||
25 | #include <linux/netfilter_ipv4.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/skbuff.h> | ||
28 | #include <linux/proc_fs.h> | ||
29 | #include <net/ip.h> | ||
30 | #include <net/checksum.h> | ||
31 | #include <linux/spinlock.h> | ||
32 | |||
33 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
34 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
35 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
36 | #include <linux/netfilter_ipv4/ip_nat_core.h> | ||
37 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
38 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
39 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
40 | |||
41 | #if 0 | ||
42 | #define DEBUGP printk | ||
43 | #else | ||
44 | #define DEBUGP(format, args...) | ||
45 | #endif | ||
46 | |||
47 | #ifdef CONFIG_XFRM | ||
48 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | ||
49 | { | ||
50 | struct ip_conntrack *ct; | ||
51 | struct ip_conntrack_tuple *t; | ||
52 | enum ip_conntrack_info ctinfo; | ||
53 | enum ip_conntrack_dir dir; | ||
54 | unsigned long statusbit; | ||
55 | |||
56 | ct = ip_conntrack_get(skb, &ctinfo); | ||
57 | if (ct == NULL) | ||
58 | return; | ||
59 | dir = CTINFO2DIR(ctinfo); | ||
60 | t = &ct->tuplehash[dir].tuple; | ||
61 | |||
62 | if (dir == IP_CT_DIR_ORIGINAL) | ||
63 | statusbit = IPS_DST_NAT; | ||
64 | else | ||
65 | statusbit = IPS_SRC_NAT; | ||
66 | |||
67 | if (ct->status & statusbit) { | ||
68 | fl->fl4_dst = t->dst.ip; | ||
69 | if (t->dst.protonum == IPPROTO_TCP || | ||
70 | t->dst.protonum == IPPROTO_UDP) | ||
71 | fl->fl_ip_dport = t->dst.u.tcp.port; | ||
72 | } | ||
73 | |||
74 | statusbit ^= IPS_NAT_MASK; | ||
75 | |||
76 | if (ct->status & statusbit) { | ||
77 | fl->fl4_src = t->src.ip; | ||
78 | if (t->dst.protonum == IPPROTO_TCP || | ||
79 | t->dst.protonum == IPPROTO_UDP) | ||
80 | fl->fl_ip_sport = t->src.u.tcp.port; | ||
81 | } | ||
82 | } | ||
83 | #endif | ||
84 | |||
85 | static unsigned int | ||
86 | ip_nat_fn(unsigned int hooknum, | ||
87 | struct sk_buff **pskb, | ||
88 | const struct net_device *in, | ||
89 | const struct net_device *out, | ||
90 | int (*okfn)(struct sk_buff *)) | ||
91 | { | ||
92 | struct ip_conntrack *ct; | ||
93 | enum ip_conntrack_info ctinfo; | ||
94 | struct ip_nat_info *info; | ||
95 | /* maniptype == SRC for postrouting. */ | ||
96 | enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); | ||
97 | |||
98 | /* We never see fragments: conntrack defrags on pre-routing | ||
99 | and local-out, and ip_nat_out protects post-routing. */ | ||
100 | IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off | ||
101 | & htons(IP_MF|IP_OFFSET))); | ||
102 | |||
103 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
104 | /* Can't track? It's not due to stress, or conntrack would | ||
105 | have dropped it. Hence it's the user's responsibilty to | ||
106 | packet filter it out, or implement conntrack/NAT for that | ||
107 | protocol. 8) --RR */ | ||
108 | if (!ct) { | ||
109 | /* Exception: ICMP redirect to new connection (not in | ||
110 | hash table yet). We must not let this through, in | ||
111 | case we're doing NAT to the same network. */ | ||
112 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { | ||
113 | struct icmphdr _hdr, *hp; | ||
114 | |||
115 | hp = skb_header_pointer(*pskb, | ||
116 | (*pskb)->nh.iph->ihl*4, | ||
117 | sizeof(_hdr), &_hdr); | ||
118 | if (hp != NULL && | ||
119 | hp->type == ICMP_REDIRECT) | ||
120 | return NF_DROP; | ||
121 | } | ||
122 | return NF_ACCEPT; | ||
123 | } | ||
124 | |||
125 | /* Don't try to NAT if this packet is not conntracked */ | ||
126 | if (ct == &ip_conntrack_untracked) | ||
127 | return NF_ACCEPT; | ||
128 | |||
129 | switch (ctinfo) { | ||
130 | case IP_CT_RELATED: | ||
131 | case IP_CT_RELATED+IP_CT_IS_REPLY: | ||
132 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { | ||
133 | if (!ip_nat_icmp_reply_translation(ct, ctinfo, | ||
134 | hooknum, pskb)) | ||
135 | return NF_DROP; | ||
136 | else | ||
137 | return NF_ACCEPT; | ||
138 | } | ||
139 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ | ||
140 | case IP_CT_NEW: | ||
141 | info = &ct->nat.info; | ||
142 | |||
143 | /* Seen it before? This can happen for loopback, retrans, | ||
144 | or local packets.. */ | ||
145 | if (!ip_nat_initialized(ct, maniptype)) { | ||
146 | unsigned int ret; | ||
147 | |||
148 | if (unlikely(is_confirmed(ct))) | ||
149 | /* NAT module was loaded late */ | ||
150 | ret = alloc_null_binding_confirmed(ct, info, | ||
151 | hooknum); | ||
152 | else if (hooknum == NF_IP_LOCAL_IN) | ||
153 | /* LOCAL_IN hook doesn't have a chain! */ | ||
154 | ret = alloc_null_binding(ct, info, hooknum); | ||
155 | else | ||
156 | ret = ip_nat_rule_find(pskb, hooknum, | ||
157 | in, out, ct, | ||
158 | info); | ||
159 | |||
160 | if (ret != NF_ACCEPT) { | ||
161 | return ret; | ||
162 | } | ||
163 | } else | ||
164 | DEBUGP("Already setup manip %s for ct %p\n", | ||
165 | maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", | ||
166 | ct); | ||
167 | break; | ||
168 | |||
169 | default: | ||
170 | /* ESTABLISHED */ | ||
171 | IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED | ||
172 | || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); | ||
173 | info = &ct->nat.info; | ||
174 | } | ||
175 | |||
176 | IP_NF_ASSERT(info); | ||
177 | return ip_nat_packet(ct, ctinfo, hooknum, pskb); | ||
178 | } | ||
179 | |||
180 | static unsigned int | ||
181 | ip_nat_in(unsigned int hooknum, | ||
182 | struct sk_buff **pskb, | ||
183 | const struct net_device *in, | ||
184 | const struct net_device *out, | ||
185 | int (*okfn)(struct sk_buff *)) | ||
186 | { | ||
187 | unsigned int ret; | ||
188 | __be32 daddr = (*pskb)->nh.iph->daddr; | ||
189 | |||
190 | ret = ip_nat_fn(hooknum, pskb, in, out, okfn); | ||
191 | if (ret != NF_DROP && ret != NF_STOLEN | ||
192 | && daddr != (*pskb)->nh.iph->daddr) { | ||
193 | dst_release((*pskb)->dst); | ||
194 | (*pskb)->dst = NULL; | ||
195 | } | ||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static unsigned int | ||
200 | ip_nat_out(unsigned int hooknum, | ||
201 | struct sk_buff **pskb, | ||
202 | const struct net_device *in, | ||
203 | const struct net_device *out, | ||
204 | int (*okfn)(struct sk_buff *)) | ||
205 | { | ||
206 | #ifdef CONFIG_XFRM | ||
207 | struct ip_conntrack *ct; | ||
208 | enum ip_conntrack_info ctinfo; | ||
209 | #endif | ||
210 | unsigned int ret; | ||
211 | |||
212 | /* root is playing with raw sockets. */ | ||
213 | if ((*pskb)->len < sizeof(struct iphdr) | ||
214 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) | ||
215 | return NF_ACCEPT; | ||
216 | |||
217 | ret = ip_nat_fn(hooknum, pskb, in, out, okfn); | ||
218 | #ifdef CONFIG_XFRM | ||
219 | if (ret != NF_DROP && ret != NF_STOLEN | ||
220 | && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { | ||
221 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
222 | |||
223 | if (ct->tuplehash[dir].tuple.src.ip != | ||
224 | ct->tuplehash[!dir].tuple.dst.ip | ||
225 | || ct->tuplehash[dir].tuple.src.u.all != | ||
226 | ct->tuplehash[!dir].tuple.dst.u.all | ||
227 | ) | ||
228 | return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; | ||
229 | } | ||
230 | #endif | ||
231 | return ret; | ||
232 | } | ||
233 | |||
234 | static unsigned int | ||
235 | ip_nat_local_fn(unsigned int hooknum, | ||
236 | struct sk_buff **pskb, | ||
237 | const struct net_device *in, | ||
238 | const struct net_device *out, | ||
239 | int (*okfn)(struct sk_buff *)) | ||
240 | { | ||
241 | struct ip_conntrack *ct; | ||
242 | enum ip_conntrack_info ctinfo; | ||
243 | unsigned int ret; | ||
244 | |||
245 | /* root is playing with raw sockets. */ | ||
246 | if ((*pskb)->len < sizeof(struct iphdr) | ||
247 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) | ||
248 | return NF_ACCEPT; | ||
249 | |||
250 | ret = ip_nat_fn(hooknum, pskb, in, out, okfn); | ||
251 | if (ret != NF_DROP && ret != NF_STOLEN | ||
252 | && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { | ||
253 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
254 | |||
255 | if (ct->tuplehash[dir].tuple.dst.ip != | ||
256 | ct->tuplehash[!dir].tuple.src.ip) { | ||
257 | if (ip_route_me_harder(pskb, RTN_UNSPEC)) | ||
258 | ret = NF_DROP; | ||
259 | } | ||
260 | #ifdef CONFIG_XFRM | ||
261 | else if (ct->tuplehash[dir].tuple.dst.u.all != | ||
262 | ct->tuplehash[!dir].tuple.src.u.all) | ||
263 | if (ip_xfrm_me_harder(pskb)) | ||
264 | ret = NF_DROP; | ||
265 | #endif | ||
266 | |||
267 | } | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | static unsigned int | ||
272 | ip_nat_adjust(unsigned int hooknum, | ||
273 | struct sk_buff **pskb, | ||
274 | const struct net_device *in, | ||
275 | const struct net_device *out, | ||
276 | int (*okfn)(struct sk_buff *)) | ||
277 | { | ||
278 | struct ip_conntrack *ct; | ||
279 | enum ip_conntrack_info ctinfo; | ||
280 | |||
281 | ct = ip_conntrack_get(*pskb, &ctinfo); | ||
282 | if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { | ||
283 | DEBUGP("ip_nat_standalone: adjusting sequence number\n"); | ||
284 | if (!ip_nat_seq_adjust(pskb, ct, ctinfo)) | ||
285 | return NF_DROP; | ||
286 | } | ||
287 | return NF_ACCEPT; | ||
288 | } | ||
289 | |||
290 | /* We must be after connection tracking and before packet filtering. */ | ||
291 | |||
292 | static struct nf_hook_ops ip_nat_ops[] = { | ||
293 | /* Before packet filtering, change destination */ | ||
294 | { | ||
295 | .hook = ip_nat_in, | ||
296 | .owner = THIS_MODULE, | ||
297 | .pf = PF_INET, | ||
298 | .hooknum = NF_IP_PRE_ROUTING, | ||
299 | .priority = NF_IP_PRI_NAT_DST, | ||
300 | }, | ||
301 | /* After packet filtering, change source */ | ||
302 | { | ||
303 | .hook = ip_nat_out, | ||
304 | .owner = THIS_MODULE, | ||
305 | .pf = PF_INET, | ||
306 | .hooknum = NF_IP_POST_ROUTING, | ||
307 | .priority = NF_IP_PRI_NAT_SRC, | ||
308 | }, | ||
309 | /* After conntrack, adjust sequence number */ | ||
310 | { | ||
311 | .hook = ip_nat_adjust, | ||
312 | .owner = THIS_MODULE, | ||
313 | .pf = PF_INET, | ||
314 | .hooknum = NF_IP_POST_ROUTING, | ||
315 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, | ||
316 | }, | ||
317 | /* Before packet filtering, change destination */ | ||
318 | { | ||
319 | .hook = ip_nat_local_fn, | ||
320 | .owner = THIS_MODULE, | ||
321 | .pf = PF_INET, | ||
322 | .hooknum = NF_IP_LOCAL_OUT, | ||
323 | .priority = NF_IP_PRI_NAT_DST, | ||
324 | }, | ||
325 | /* After packet filtering, change source */ | ||
326 | { | ||
327 | .hook = ip_nat_fn, | ||
328 | .owner = THIS_MODULE, | ||
329 | .pf = PF_INET, | ||
330 | .hooknum = NF_IP_LOCAL_IN, | ||
331 | .priority = NF_IP_PRI_NAT_SRC, | ||
332 | }, | ||
333 | /* After conntrack, adjust sequence number */ | ||
334 | { | ||
335 | .hook = ip_nat_adjust, | ||
336 | .owner = THIS_MODULE, | ||
337 | .pf = PF_INET, | ||
338 | .hooknum = NF_IP_LOCAL_IN, | ||
339 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, | ||
340 | }, | ||
341 | }; | ||
342 | |||
343 | static int __init ip_nat_standalone_init(void) | ||
344 | { | ||
345 | int ret = 0; | ||
346 | |||
347 | need_conntrack(); | ||
348 | |||
349 | #ifdef CONFIG_XFRM | ||
350 | BUG_ON(ip_nat_decode_session != NULL); | ||
351 | ip_nat_decode_session = nat_decode_session; | ||
352 | #endif | ||
353 | ret = ip_nat_rule_init(); | ||
354 | if (ret < 0) { | ||
355 | printk("ip_nat_init: can't setup rules.\n"); | ||
356 | goto cleanup_decode_session; | ||
357 | } | ||
358 | ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops)); | ||
359 | if (ret < 0) { | ||
360 | printk("ip_nat_init: can't register hooks.\n"); | ||
361 | goto cleanup_rule_init; | ||
362 | } | ||
363 | return ret; | ||
364 | |||
365 | cleanup_rule_init: | ||
366 | ip_nat_rule_cleanup(); | ||
367 | cleanup_decode_session: | ||
368 | #ifdef CONFIG_XFRM | ||
369 | ip_nat_decode_session = NULL; | ||
370 | synchronize_net(); | ||
371 | #endif | ||
372 | return ret; | ||
373 | } | ||
374 | |||
375 | static void __exit ip_nat_standalone_fini(void) | ||
376 | { | ||
377 | nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops)); | ||
378 | ip_nat_rule_cleanup(); | ||
379 | #ifdef CONFIG_XFRM | ||
380 | ip_nat_decode_session = NULL; | ||
381 | synchronize_net(); | ||
382 | #endif | ||
383 | } | ||
384 | |||
385 | module_init(ip_nat_standalone_init); | ||
386 | module_exit(ip_nat_standalone_fini); | ||
387 | |||
388 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c deleted file mode 100644 index 604793536fc1..000000000000 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License version 2 as | ||
5 | * published by the Free Software Foundation. | ||
6 | * | ||
7 | * Version: 0.0.7 | ||
8 | * | ||
9 | * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org> | ||
10 | * - Port to newnat API | ||
11 | * | ||
12 | * This module currently supports DNAT: | ||
13 | * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y | ||
14 | * | ||
15 | * and SNAT: | ||
16 | * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x } | ||
17 | * | ||
18 | * It has not been tested with | ||
19 | * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip | ||
20 | * If you do test this please let me know if it works or not. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/netfilter_ipv4.h> | ||
26 | #include <linux/ip.h> | ||
27 | #include <linux/udp.h> | ||
28 | |||
29 | #include <linux/netfilter.h> | ||
30 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
31 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_tftp.h> | ||
33 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
34 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
35 | #include <linux/moduleparam.h> | ||
36 | |||
37 | MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); | ||
38 | MODULE_DESCRIPTION("tftp NAT helper"); | ||
39 | MODULE_LICENSE("GPL"); | ||
40 | |||
41 | static unsigned int help(struct sk_buff **pskb, | ||
42 | enum ip_conntrack_info ctinfo, | ||
43 | struct ip_conntrack_expect *exp) | ||
44 | { | ||
45 | struct ip_conntrack *ct = exp->master; | ||
46 | |||
47 | exp->saved_proto.udp.port | ||
48 | = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; | ||
49 | exp->dir = IP_CT_DIR_REPLY; | ||
50 | exp->expectfn = ip_nat_follow_master; | ||
51 | if (ip_conntrack_expect_related(exp) != 0) | ||
52 | return NF_DROP; | ||
53 | return NF_ACCEPT; | ||
54 | } | ||
55 | |||
56 | static void __exit ip_nat_tftp_fini(void) | ||
57 | { | ||
58 | rcu_assign_pointer(ip_nat_tftp_hook, NULL); | ||
59 | synchronize_rcu(); | ||
60 | } | ||
61 | |||
62 | static int __init ip_nat_tftp_init(void) | ||
63 | { | ||
64 | BUG_ON(rcu_dereference(ip_nat_tftp_hook)); | ||
65 | rcu_assign_pointer(ip_nat_tftp_hook, help); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | module_init(ip_nat_tftp_init); | ||
70 | module_exit(ip_nat_tftp_fini); | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a14798a850d7..702d94db19b9 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -8,18 +8,6 @@ | |||
8 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
10 | * published by the Free Software Foundation. | 10 | * published by the Free Software Foundation. |
11 | * | ||
12 | * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). | ||
13 | * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). | ||
14 | * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian | ||
15 | * Zander). | ||
16 | * 2000-08-01: Added Nick Williams' MAC support. | ||
17 | * 2002-06-25: Code cleanup. | ||
18 | * 2005-01-10: Added /proc counter for dropped packets; fixed so | ||
19 | * packets aren't delivered to user space if they're going | ||
20 | * to be dropped. | ||
21 | * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte) | ||
22 | * | ||
23 | */ | 11 | */ |
24 | #include <linux/module.h> | 12 | #include <linux/module.h> |
25 | #include <linux/skbuff.h> | 13 | #include <linux/skbuff.h> |
@@ -191,12 +179,13 @@ ipq_flush(int verdict) | |||
191 | static struct sk_buff * | 179 | static struct sk_buff * |
192 | ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | 180 | ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) |
193 | { | 181 | { |
194 | unsigned char *old_tail; | 182 | sk_buff_data_t old_tail; |
195 | size_t size = 0; | 183 | size_t size = 0; |
196 | size_t data_len = 0; | 184 | size_t data_len = 0; |
197 | struct sk_buff *skb; | 185 | struct sk_buff *skb; |
198 | struct ipq_packet_msg *pmsg; | 186 | struct ipq_packet_msg *pmsg; |
199 | struct nlmsghdr *nlh; | 187 | struct nlmsghdr *nlh; |
188 | struct timeval tv; | ||
200 | 189 | ||
201 | read_lock_bh(&queue_lock); | 190 | read_lock_bh(&queue_lock); |
202 | 191 | ||
@@ -234,15 +223,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | |||
234 | if (!skb) | 223 | if (!skb) |
235 | goto nlmsg_failure; | 224 | goto nlmsg_failure; |
236 | 225 | ||
237 | old_tail= skb->tail; | 226 | old_tail = skb->tail; |
238 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); | 227 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); |
239 | pmsg = NLMSG_DATA(nlh); | 228 | pmsg = NLMSG_DATA(nlh); |
240 | memset(pmsg, 0, sizeof(*pmsg)); | 229 | memset(pmsg, 0, sizeof(*pmsg)); |
241 | 230 | ||
242 | pmsg->packet_id = (unsigned long )entry; | 231 | pmsg->packet_id = (unsigned long )entry; |
243 | pmsg->data_len = data_len; | 232 | pmsg->data_len = data_len; |
244 | pmsg->timestamp_sec = entry->skb->tstamp.off_sec; | 233 | tv = ktime_to_timeval(entry->skb->tstamp); |
245 | pmsg->timestamp_usec = entry->skb->tstamp.off_usec; | 234 | pmsg->timestamp_sec = tv.tv_sec; |
235 | pmsg->timestamp_usec = tv.tv_usec; | ||
246 | pmsg->mark = entry->skb->mark; | 236 | pmsg->mark = entry->skb->mark; |
247 | pmsg->hook = entry->info->hook; | 237 | pmsg->hook = entry->info->hook; |
248 | pmsg->hw_protocol = entry->skb->protocol; | 238 | pmsg->hw_protocol = entry->skb->protocol; |
@@ -378,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) | |||
378 | } | 368 | } |
379 | if (!skb_make_writable(&e->skb, v->data_len)) | 369 | if (!skb_make_writable(&e->skb, v->data_len)) |
380 | return -ENOMEM; | 370 | return -ENOMEM; |
381 | memcpy(e->skb->data, v->payload, v->data_len); | 371 | skb_copy_to_linear_data(e->skb, v->payload, v->data_len); |
382 | e->skb->ip_summed = CHECKSUM_NONE; | 372 | e->skb->ip_summed = CHECKSUM_NONE; |
383 | 373 | ||
384 | return 0; | 374 | return 0; |
@@ -495,7 +485,7 @@ ipq_rcv_skb(struct sk_buff *skb) | |||
495 | if (skblen < sizeof(*nlh)) | 485 | if (skblen < sizeof(*nlh)) |
496 | return; | 486 | return; |
497 | 487 | ||
498 | nlh = (struct nlmsghdr *)skb->data; | 488 | nlh = nlmsg_hdr(skb); |
499 | nlmsglen = nlh->nlmsg_len; | 489 | nlmsglen = nlh->nlmsg_len; |
500 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) | 490 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) |
501 | return; | 491 | return; |
@@ -678,7 +668,7 @@ static int __init ip_queue_init(void) | |||
678 | 668 | ||
679 | netlink_register_notifier(&ipq_nl_notifier); | 669 | netlink_register_notifier(&ipq_nl_notifier); |
680 | ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, | 670 | ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, |
681 | THIS_MODULE); | 671 | NULL, THIS_MODULE); |
682 | if (ipqnl == NULL) { | 672 | if (ipqnl == NULL) { |
683 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); | 673 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); |
684 | goto cleanup_netlink_notifier; | 674 | goto cleanup_netlink_notifier; |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 50cc4b92e284..e3f83bf160d9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -7,12 +7,6 @@ | |||
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | * | ||
11 | * 19 Jan 2002 Harald Welte <laforge@gnumonks.org> | ||
12 | * - increase module usage count as soon as we have rules inside | ||
13 | * a table | ||
14 | * 08 Oct 2005 Harald Welte <lafore@netfilter.org> | ||
15 | * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables" | ||
16 | */ | 10 | */ |
17 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
18 | #include <linux/capability.h> | 12 | #include <linux/capability.h> |
@@ -198,7 +192,7 @@ int do_match(struct ipt_entry_match *m, | |||
198 | { | 192 | { |
199 | /* Stop iteration if it doesn't match */ | 193 | /* Stop iteration if it doesn't match */ |
200 | if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, | 194 | if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, |
201 | offset, skb->nh.iph->ihl*4, hotdrop)) | 195 | offset, ip_hdrlen(skb), hotdrop)) |
202 | return 1; | 196 | return 1; |
203 | else | 197 | else |
204 | return 0; | 198 | return 0; |
@@ -231,7 +225,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
231 | struct xt_table_info *private; | 225 | struct xt_table_info *private; |
232 | 226 | ||
233 | /* Initialization */ | 227 | /* Initialization */ |
234 | ip = (*pskb)->nh.iph; | 228 | ip = ip_hdr(*pskb); |
235 | datalen = (*pskb)->len - ip->ihl * 4; | 229 | datalen = (*pskb)->len - ip->ihl * 4; |
236 | indev = in ? in->name : nulldevname; | 230 | indev = in ? in->name : nulldevname; |
237 | outdev = out ? out->name : nulldevname; | 231 | outdev = out ? out->name : nulldevname; |
@@ -320,7 +314,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
320 | = 0x57acc001; | 314 | = 0x57acc001; |
321 | #endif | 315 | #endif |
322 | /* Target might have changed stuff. */ | 316 | /* Target might have changed stuff. */ |
323 | ip = (*pskb)->nh.iph; | 317 | ip = ip_hdr(*pskb); |
324 | datalen = (*pskb)->len - ip->ihl * 4; | 318 | datalen = (*pskb)->len - ip->ihl * 4; |
325 | 319 | ||
326 | if (verdict == IPT_CONTINUE) | 320 | if (verdict == IPT_CONTINUE) |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 42b08029e867..40e273421398 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -21,15 +21,12 @@ | |||
21 | #include <linux/if_arp.h> | 21 | #include <linux/if_arp.h> |
22 | #include <linux/proc_fs.h> | 22 | #include <linux/proc_fs.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | |||
25 | #include <net/checksum.h> | ||
26 | |||
27 | #include <linux/netfilter_arp.h> | 24 | #include <linux/netfilter_arp.h> |
28 | |||
29 | #include <linux/netfilter/x_tables.h> | 25 | #include <linux/netfilter/x_tables.h> |
30 | #include <linux/netfilter_ipv4/ip_tables.h> | 26 | #include <linux/netfilter_ipv4/ip_tables.h> |
31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 27 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
32 | #include <net/netfilter/nf_conntrack_compat.h> | 28 | #include <net/netfilter/nf_conntrack.h> |
29 | #include <net/checksum.h> | ||
33 | 30 | ||
34 | #define CLUSTERIP_VERSION "0.8" | 31 | #define CLUSTERIP_VERSION "0.8" |
35 | 32 | ||
@@ -240,7 +237,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | |||
240 | static inline u_int32_t | 237 | static inline u_int32_t |
241 | clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | 238 | clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) |
242 | { | 239 | { |
243 | struct iphdr *iph = skb->nh.iph; | 240 | struct iphdr *iph = ip_hdr(skb); |
244 | unsigned long hashval; | 241 | unsigned long hashval; |
245 | u_int16_t sport, dport; | 242 | u_int16_t sport, dport; |
246 | u_int16_t *ports; | 243 | u_int16_t *ports; |
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb, | |||
310 | const void *targinfo) | 307 | const void *targinfo) |
311 | { | 308 | { |
312 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 309 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
310 | struct nf_conn *ct; | ||
313 | enum ip_conntrack_info ctinfo; | 311 | enum ip_conntrack_info ctinfo; |
314 | u_int32_t *mark, hash; | 312 | u_int32_t hash; |
315 | 313 | ||
316 | /* don't need to clusterip_config_get() here, since refcount | 314 | /* don't need to clusterip_config_get() here, since refcount |
317 | * is only decremented by destroy() - and ip_tables guarantees | 315 | * is only decremented by destroy() - and ip_tables guarantees |
318 | * that the ->target() function isn't called after ->destroy() */ | 316 | * that the ->target() function isn't called after ->destroy() */ |
319 | 317 | ||
320 | mark = nf_ct_get_mark((*pskb), &ctinfo); | 318 | ct = nf_ct_get(*pskb, &ctinfo); |
321 | if (mark == NULL) { | 319 | if (ct == NULL) { |
322 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | 320 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); |
323 | /* FIXME: need to drop invalid ones, since replies | 321 | /* FIXME: need to drop invalid ones, since replies |
324 | * to outgoing connections of other nodes will be | 322 | * to outgoing connections of other nodes will be |
@@ -328,7 +326,7 @@ target(struct sk_buff **pskb, | |||
328 | 326 | ||
329 | /* special case: ICMP error handling. conntrack distinguishes between | 327 | /* special case: ICMP error handling. conntrack distinguishes between |
330 | * error messages (RELATED) and information requests (see below) */ | 328 | * error messages (RELATED) and information requests (see below) */ |
331 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP | 329 | if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP |
332 | && (ctinfo == IP_CT_RELATED | 330 | && (ctinfo == IP_CT_RELATED |
333 | || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) | 331 | || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) |
334 | return XT_CONTINUE; | 332 | return XT_CONTINUE; |
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb, | |||
341 | 339 | ||
342 | switch (ctinfo) { | 340 | switch (ctinfo) { |
343 | case IP_CT_NEW: | 341 | case IP_CT_NEW: |
344 | *mark = hash; | 342 | ct->mark = hash; |
345 | break; | 343 | break; |
346 | case IP_CT_RELATED: | 344 | case IP_CT_RELATED: |
347 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 345 | case IP_CT_RELATED+IP_CT_IS_REPLY: |
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb, | |||
358 | #ifdef DEBUG_CLUSTERP | 356 | #ifdef DEBUG_CLUSTERP |
359 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 357 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
360 | #endif | 358 | #endif |
361 | DEBUGP("hash=%u ct_hash=%u ", hash, *mark); | 359 | DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); |
362 | if (!clusterip_responsible(cipinfo->config, hash)) { | 360 | if (!clusterip_responsible(cipinfo->config, hash)) { |
363 | DEBUGP("not responsible\n"); | 361 | DEBUGP("not responsible\n"); |
364 | return NF_DROP; | 362 | return NF_DROP; |
@@ -521,7 +519,7 @@ arp_mangle(unsigned int hook, | |||
521 | const struct net_device *out, | 519 | const struct net_device *out, |
522 | int (*okfn)(struct sk_buff *)) | 520 | int (*okfn)(struct sk_buff *)) |
523 | { | 521 | { |
524 | struct arphdr *arp = (*pskb)->nh.arph; | 522 | struct arphdr *arp = arp_hdr(*pskb); |
525 | struct arp_payload *payload; | 523 | struct arp_payload *payload; |
526 | struct clusterip_config *c; | 524 | struct clusterip_config *c; |
527 | 525 | ||
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4f565633631d..918ca92e534a 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
@@ -5,14 +5,13 @@ | |||
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | * | ||
9 | * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp | ||
10 | */ | 8 | */ |
11 | 9 | ||
12 | #include <linux/in.h> | 10 | #include <linux/in.h> |
13 | #include <linux/module.h> | 11 | #include <linux/module.h> |
14 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
15 | #include <linux/ip.h> | 13 | #include <linux/ip.h> |
14 | #include <net/ip.h> | ||
16 | #include <linux/tcp.h> | 15 | #include <linux/tcp.h> |
17 | #include <net/checksum.h> | 16 | #include <net/checksum.h> |
18 | 17 | ||
@@ -29,13 +28,13 @@ MODULE_DESCRIPTION("iptables ECN modification module"); | |||
29 | static inline int | 28 | static inline int |
30 | set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | 29 | set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) |
31 | { | 30 | { |
32 | struct iphdr *iph = (*pskb)->nh.iph; | 31 | struct iphdr *iph = ip_hdr(*pskb); |
33 | 32 | ||
34 | if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { | 33 | if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { |
35 | __u8 oldtos; | 34 | __u8 oldtos; |
36 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | 35 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
37 | return 0; | 36 | return 0; |
38 | iph = (*pskb)->nh.iph; | 37 | iph = ip_hdr(*pskb); |
39 | oldtos = iph->tos; | 38 | oldtos = iph->tos; |
40 | iph->tos &= ~IPT_ECN_IP_MASK; | 39 | iph->tos &= ~IPT_ECN_IP_MASK; |
41 | iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); | 40 | iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); |
@@ -52,7 +51,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | |||
52 | __be16 oldval; | 51 | __be16 oldval; |
53 | 52 | ||
54 | /* Not enought header? */ | 53 | /* Not enought header? */ |
55 | tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, | 54 | tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), |
56 | sizeof(_tcph), &_tcph); | 55 | sizeof(_tcph), &_tcph); |
57 | if (!tcph) | 56 | if (!tcph) |
58 | return 0; | 57 | return 0; |
@@ -63,9 +62,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | |||
63 | tcph->cwr == einfo->proto.tcp.cwr))) | 62 | tcph->cwr == einfo->proto.tcp.cwr))) |
64 | return 1; | 63 | return 1; |
65 | 64 | ||
66 | if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) | 65 | if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) |
67 | return 0; | 66 | return 0; |
68 | tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; | 67 | tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); |
69 | 68 | ||
70 | oldval = ((__be16 *)tcph)[6]; | 69 | oldval = ((__be16 *)tcph)[6]; |
71 | if (einfo->operation & IPT_ECN_OP_SET_ECE) | 70 | if (einfo->operation & IPT_ECN_OP_SET_ECE) |
@@ -93,7 +92,7 @@ target(struct sk_buff **pskb, | |||
93 | return NF_DROP; | 92 | return NF_DROP; |
94 | 93 | ||
95 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) | 94 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) |
96 | && (*pskb)->nh.iph->protocol == IPPROTO_TCP) | 95 | && ip_hdr(*pskb)->protocol == IPPROTO_TCP) |
97 | if (!set_ect_tcp(pskb, einfo)) | 96 | if (!set_ect_tcp(pskb, einfo)) |
98 | return NF_DROP; | 97 | return NF_DROP; |
99 | 98 | ||
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index d9c37fd94228..a42c5cd968b1 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf, | |||
399 | /* MAC logging for input chain only. */ | 399 | /* MAC logging for input chain only. */ |
400 | printk("MAC="); | 400 | printk("MAC="); |
401 | if (skb->dev && skb->dev->hard_header_len | 401 | if (skb->dev && skb->dev->hard_header_len |
402 | && skb->mac.raw != (void*)skb->nh.iph) { | 402 | && skb->mac_header != skb->network_header) { |
403 | int i; | 403 | int i; |
404 | unsigned char *p = skb->mac.raw; | 404 | const unsigned char *p = skb_mac_header(skb); |
405 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) | 405 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) |
406 | printk("%02x%c", *p, | 406 | printk("%02x%c", *p, |
407 | i==skb->dev->hard_header_len - 1 | 407 | i==skb->dev->hard_header_len - 1 |
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void) | |||
477 | ret = xt_register_target(&ipt_log_reg); | 477 | ret = xt_register_target(&ipt_log_reg); |
478 | if (ret < 0) | 478 | if (ret < 0) |
479 | return ret; | 479 | return ret; |
480 | if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { | 480 | ret = nf_log_register(PF_INET, &ipt_log_logger); |
481 | printk(KERN_WARNING "ipt_LOG: not logging via system console " | 481 | if (ret < 0 && ret != -EEXIST) |
482 | "since somebody else already registered for PF_INET\n"); | 482 | xt_unregister_target(&ipt_log_reg); |
483 | /* we cannot make module load fail here, since otherwise | 483 | return ret; |
484 | * iptables userspace would abort */ | ||
485 | } | ||
486 | |||
487 | return 0; | ||
488 | } | 484 | } |
489 | 485 | ||
490 | static void __exit ipt_log_fini(void) | 486 | static void __exit ipt_log_fini(void) |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index b5955f3a3f8f..d4f2d7775330 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -19,12 +19,8 @@ | |||
19 | #include <net/ip.h> | 19 | #include <net/ip.h> |
20 | #include <net/checksum.h> | 20 | #include <net/checksum.h> |
21 | #include <net/route.h> | 21 | #include <net/route.h> |
22 | #include <linux/netfilter_ipv4.h> | ||
23 | #ifdef CONFIG_NF_NAT_NEEDED | ||
24 | #include <net/netfilter/nf_nat_rule.h> | 22 | #include <net/netfilter/nf_nat_rule.h> |
25 | #else | 23 | #include <linux/netfilter_ipv4.h> |
26 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
27 | #endif | ||
28 | #include <linux/netfilter/x_tables.h> | 24 | #include <linux/netfilter/x_tables.h> |
29 | 25 | ||
30 | MODULE_LICENSE("GPL"); | 26 | MODULE_LICENSE("GPL"); |
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename, | |||
48 | void *targinfo, | 44 | void *targinfo, |
49 | unsigned int hook_mask) | 45 | unsigned int hook_mask) |
50 | { | 46 | { |
51 | const struct ip_nat_multi_range_compat *mr = targinfo; | 47 | const struct nf_nat_multi_range_compat *mr = targinfo; |
52 | 48 | ||
53 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 49 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { |
54 | DEBUGP("masquerade_check: bad MAP_IPS.\n"); | 50 | DEBUGP("masquerade_check: bad MAP_IPS.\n"); |
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb, | |||
69 | const struct xt_target *target, | 65 | const struct xt_target *target, |
70 | const void *targinfo) | 66 | const void *targinfo) |
71 | { | 67 | { |
72 | #ifdef CONFIG_NF_NAT_NEEDED | 68 | struct nf_conn *ct; |
73 | struct nf_conn_nat *nat; | 69 | struct nf_conn_nat *nat; |
74 | #endif | ||
75 | struct ip_conntrack *ct; | ||
76 | enum ip_conntrack_info ctinfo; | 70 | enum ip_conntrack_info ctinfo; |
77 | struct ip_nat_range newrange; | 71 | struct nf_nat_range newrange; |
78 | const struct ip_nat_multi_range_compat *mr; | 72 | const struct nf_nat_multi_range_compat *mr; |
79 | struct rtable *rt; | 73 | struct rtable *rt; |
80 | __be32 newsrc; | 74 | __be32 newsrc; |
81 | 75 | ||
82 | IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); | 76 | NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); |
83 | 77 | ||
84 | ct = ip_conntrack_get(*pskb, &ctinfo); | 78 | ct = nf_ct_get(*pskb, &ctinfo); |
85 | #ifdef CONFIG_NF_NAT_NEEDED | ||
86 | nat = nfct_nat(ct); | 79 | nat = nfct_nat(ct); |
87 | #endif | 80 | |
88 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED | 81 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED |
89 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); | 82 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); |
90 | 83 | ||
91 | /* Source address is 0.0.0.0 - locally generated packet that is | 84 | /* Source address is 0.0.0.0 - locally generated packet that is |
92 | * probably not supposed to be masqueraded. | 85 | * probably not supposed to be masqueraded. |
93 | */ | 86 | */ |
94 | #ifdef CONFIG_NF_NAT_NEEDED | ||
95 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) | 87 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) |
96 | #else | ||
97 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) | ||
98 | #endif | ||
99 | return NF_ACCEPT; | 88 | return NF_ACCEPT; |
100 | 89 | ||
101 | mr = targinfo; | 90 | mr = targinfo; |
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb, | |||
107 | } | 96 | } |
108 | 97 | ||
109 | write_lock_bh(&masq_lock); | 98 | write_lock_bh(&masq_lock); |
110 | #ifdef CONFIG_NF_NAT_NEEDED | ||
111 | nat->masq_index = out->ifindex; | 99 | nat->masq_index = out->ifindex; |
112 | #else | ||
113 | ct->nat.masq_index = out->ifindex; | ||
114 | #endif | ||
115 | write_unlock_bh(&masq_lock); | 100 | write_unlock_bh(&masq_lock); |
116 | 101 | ||
117 | /* Transfer from original range. */ | 102 | /* Transfer from original range. */ |
118 | newrange = ((struct ip_nat_range) | 103 | newrange = ((struct nf_nat_range) |
119 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 104 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, |
120 | newsrc, newsrc, | 105 | newsrc, newsrc, |
121 | mr->range[0].min, mr->range[0].max }); | 106 | mr->range[0].min, mr->range[0].max }); |
122 | 107 | ||
123 | /* Hand modified range to generic setup. */ | 108 | /* Hand modified range to generic setup. */ |
124 | return ip_nat_setup_info(ct, &newrange, hooknum); | 109 | return nf_nat_setup_info(ct, &newrange, hooknum); |
125 | } | 110 | } |
126 | 111 | ||
127 | static inline int | 112 | static inline int |
128 | device_cmp(struct ip_conntrack *i, void *ifindex) | 113 | device_cmp(struct nf_conn *i, void *ifindex) |
129 | { | 114 | { |
130 | int ret; | ||
131 | #ifdef CONFIG_NF_NAT_NEEDED | ||
132 | struct nf_conn_nat *nat = nfct_nat(i); | 115 | struct nf_conn_nat *nat = nfct_nat(i); |
116 | int ret; | ||
133 | 117 | ||
134 | if (!nat) | 118 | if (!nat) |
135 | return 0; | 119 | return 0; |
136 | #endif | ||
137 | 120 | ||
138 | read_lock_bh(&masq_lock); | 121 | read_lock_bh(&masq_lock); |
139 | #ifdef CONFIG_NF_NAT_NEEDED | ||
140 | ret = (nat->masq_index == (int)(long)ifindex); | 122 | ret = (nat->masq_index == (int)(long)ifindex); |
141 | #else | ||
142 | ret = (i->nat.masq_index == (int)(long)ifindex); | ||
143 | #endif | ||
144 | read_unlock_bh(&masq_lock); | 123 | read_unlock_bh(&masq_lock); |
145 | 124 | ||
146 | return ret; | 125 | return ret; |
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this, | |||
156 | /* Device was downed. Search entire table for | 135 | /* Device was downed. Search entire table for |
157 | conntracks which were associated with that device, | 136 | conntracks which were associated with that device, |
158 | and forget them. */ | 137 | and forget them. */ |
159 | IP_NF_ASSERT(dev->ifindex != 0); | 138 | NF_CT_ASSERT(dev->ifindex != 0); |
160 | 139 | ||
161 | ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); | 140 | nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); |
162 | } | 141 | } |
163 | 142 | ||
164 | return NOTIFY_DONE; | 143 | return NOTIFY_DONE; |
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this, | |||
174 | /* IP address was deleted. Search entire table for | 153 | /* IP address was deleted. Search entire table for |
175 | conntracks which were associated with that device, | 154 | conntracks which were associated with that device, |
176 | and forget them. */ | 155 | and forget them. */ |
177 | IP_NF_ASSERT(dev->ifindex != 0); | 156 | NF_CT_ASSERT(dev->ifindex != 0); |
178 | 157 | ||
179 | ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); | 158 | nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); |
180 | } | 159 | } |
181 | 160 | ||
182 | return NOTIFY_DONE; | 161 | return NOTIFY_DONE; |
@@ -194,7 +173,7 @@ static struct xt_target masquerade = { | |||
194 | .name = "MASQUERADE", | 173 | .name = "MASQUERADE", |
195 | .family = AF_INET, | 174 | .family = AF_INET, |
196 | .target = masquerade_target, | 175 | .target = masquerade_target, |
197 | .targetsize = sizeof(struct ip_nat_multi_range_compat), | 176 | .targetsize = sizeof(struct nf_nat_multi_range_compat), |
198 | .table = "nat", | 177 | .table = "nat", |
199 | .hooks = 1 << NF_IP_POST_ROUTING, | 178 | .hooks = 1 << NF_IP_POST_ROUTING, |
200 | .checkentry = masquerade_check, | 179 | .checkentry = masquerade_check, |
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index fd7aaa347cd8..068c69bce30e 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
@@ -16,11 +16,7 @@ | |||
16 | #include <linux/netfilter.h> | 16 | #include <linux/netfilter.h> |
17 | #include <linux/netfilter_ipv4.h> | 17 | #include <linux/netfilter_ipv4.h> |
18 | #include <linux/netfilter/x_tables.h> | 18 | #include <linux/netfilter/x_tables.h> |
19 | #ifdef CONFIG_NF_NAT_NEEDED | ||
20 | #include <net/netfilter/nf_nat_rule.h> | 19 | #include <net/netfilter/nf_nat_rule.h> |
21 | #else | ||
22 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
23 | #endif | ||
24 | 20 | ||
25 | #define MODULENAME "NETMAP" | 21 | #define MODULENAME "NETMAP" |
26 | MODULE_LICENSE("GPL"); | 22 | MODULE_LICENSE("GPL"); |
@@ -40,7 +36,7 @@ check(const char *tablename, | |||
40 | void *targinfo, | 36 | void *targinfo, |
41 | unsigned int hook_mask) | 37 | unsigned int hook_mask) |
42 | { | 38 | { |
43 | const struct ip_nat_multi_range_compat *mr = targinfo; | 39 | const struct nf_nat_multi_range_compat *mr = targinfo; |
44 | 40 | ||
45 | if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { | 41 | if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { |
46 | DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); | 42 | DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); |
@@ -61,39 +57,39 @@ target(struct sk_buff **pskb, | |||
61 | const struct xt_target *target, | 57 | const struct xt_target *target, |
62 | const void *targinfo) | 58 | const void *targinfo) |
63 | { | 59 | { |
64 | struct ip_conntrack *ct; | 60 | struct nf_conn *ct; |
65 | enum ip_conntrack_info ctinfo; | 61 | enum ip_conntrack_info ctinfo; |
66 | __be32 new_ip, netmask; | 62 | __be32 new_ip, netmask; |
67 | const struct ip_nat_multi_range_compat *mr = targinfo; | 63 | const struct nf_nat_multi_range_compat *mr = targinfo; |
68 | struct ip_nat_range newrange; | 64 | struct nf_nat_range newrange; |
69 | 65 | ||
70 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING | 66 | NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING |
71 | || hooknum == NF_IP_POST_ROUTING | 67 | || hooknum == NF_IP_POST_ROUTING |
72 | || hooknum == NF_IP_LOCAL_OUT); | 68 | || hooknum == NF_IP_LOCAL_OUT); |
73 | ct = ip_conntrack_get(*pskb, &ctinfo); | 69 | ct = nf_ct_get(*pskb, &ctinfo); |
74 | 70 | ||
75 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); | 71 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); |
76 | 72 | ||
77 | if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) | 73 | if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) |
78 | new_ip = (*pskb)->nh.iph->daddr & ~netmask; | 74 | new_ip = ip_hdr(*pskb)->daddr & ~netmask; |
79 | else | 75 | else |
80 | new_ip = (*pskb)->nh.iph->saddr & ~netmask; | 76 | new_ip = ip_hdr(*pskb)->saddr & ~netmask; |
81 | new_ip |= mr->range[0].min_ip & netmask; | 77 | new_ip |= mr->range[0].min_ip & netmask; |
82 | 78 | ||
83 | newrange = ((struct ip_nat_range) | 79 | newrange = ((struct nf_nat_range) |
84 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 80 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, |
85 | new_ip, new_ip, | 81 | new_ip, new_ip, |
86 | mr->range[0].min, mr->range[0].max }); | 82 | mr->range[0].min, mr->range[0].max }); |
87 | 83 | ||
88 | /* Hand modified range to generic setup. */ | 84 | /* Hand modified range to generic setup. */ |
89 | return ip_nat_setup_info(ct, &newrange, hooknum); | 85 | return nf_nat_setup_info(ct, &newrange, hooknum); |
90 | } | 86 | } |
91 | 87 | ||
92 | static struct xt_target target_module = { | 88 | static struct xt_target target_module = { |
93 | .name = MODULENAME, | 89 | .name = MODULENAME, |
94 | .family = AF_INET, | 90 | .family = AF_INET, |
95 | .target = target, | 91 | .target = target, |
96 | .targetsize = sizeof(struct ip_nat_multi_range_compat), | 92 | .targetsize = sizeof(struct nf_nat_multi_range_compat), |
97 | .table = "nat", | 93 | .table = "nat", |
98 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | | 94 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | |
99 | (1 << NF_IP_LOCAL_OUT), | 95 | (1 << NF_IP_LOCAL_OUT), |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index c2b6b80670f8..68cc76a198eb 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c | |||
@@ -19,11 +19,7 @@ | |||
19 | #include <net/checksum.h> | 19 | #include <net/checksum.h> |
20 | #include <linux/netfilter_ipv4.h> | 20 | #include <linux/netfilter_ipv4.h> |
21 | #include <linux/netfilter/x_tables.h> | 21 | #include <linux/netfilter/x_tables.h> |
22 | #ifdef CONFIG_NF_NAT_NEEDED | ||
23 | #include <net/netfilter/nf_nat_rule.h> | 22 | #include <net/netfilter/nf_nat_rule.h> |
24 | #else | ||
25 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
26 | #endif | ||
27 | 23 | ||
28 | MODULE_LICENSE("GPL"); | 24 | MODULE_LICENSE("GPL"); |
29 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 25 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
@@ -43,7 +39,7 @@ redirect_check(const char *tablename, | |||
43 | void *targinfo, | 39 | void *targinfo, |
44 | unsigned int hook_mask) | 40 | unsigned int hook_mask) |
45 | { | 41 | { |
46 | const struct ip_nat_multi_range_compat *mr = targinfo; | 42 | const struct nf_nat_multi_range_compat *mr = targinfo; |
47 | 43 | ||
48 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { | 44 | if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { |
49 | DEBUGP("redirect_check: bad MAP_IPS.\n"); | 45 | DEBUGP("redirect_check: bad MAP_IPS.\n"); |
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb, | |||
64 | const struct xt_target *target, | 60 | const struct xt_target *target, |
65 | const void *targinfo) | 61 | const void *targinfo) |
66 | { | 62 | { |
67 | struct ip_conntrack *ct; | 63 | struct nf_conn *ct; |
68 | enum ip_conntrack_info ctinfo; | 64 | enum ip_conntrack_info ctinfo; |
69 | __be32 newdst; | 65 | __be32 newdst; |
70 | const struct ip_nat_multi_range_compat *mr = targinfo; | 66 | const struct nf_nat_multi_range_compat *mr = targinfo; |
71 | struct ip_nat_range newrange; | 67 | struct nf_nat_range newrange; |
72 | 68 | ||
73 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING | 69 | NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING |
74 | || hooknum == NF_IP_LOCAL_OUT); | 70 | || hooknum == NF_IP_LOCAL_OUT); |
75 | 71 | ||
76 | ct = ip_conntrack_get(*pskb, &ctinfo); | 72 | ct = nf_ct_get(*pskb, &ctinfo); |
77 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); | 73 | NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); |
78 | 74 | ||
79 | /* Local packets: make them go to loopback */ | 75 | /* Local packets: make them go to loopback */ |
80 | if (hooknum == NF_IP_LOCAL_OUT) | 76 | if (hooknum == NF_IP_LOCAL_OUT) |
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb, | |||
96 | } | 92 | } |
97 | 93 | ||
98 | /* Transfer from original range. */ | 94 | /* Transfer from original range. */ |
99 | newrange = ((struct ip_nat_range) | 95 | newrange = ((struct nf_nat_range) |
100 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, | 96 | { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, |
101 | newdst, newdst, | 97 | newdst, newdst, |
102 | mr->range[0].min, mr->range[0].max }); | 98 | mr->range[0].min, mr->range[0].max }); |
103 | 99 | ||
104 | /* Hand modified range to generic setup. */ | 100 | /* Hand modified range to generic setup. */ |
105 | return ip_nat_setup_info(ct, &newrange, hooknum); | 101 | return nf_nat_setup_info(ct, &newrange, hooknum); |
106 | } | 102 | } |
107 | 103 | ||
108 | static struct xt_target redirect_reg = { | 104 | static struct xt_target redirect_reg = { |
109 | .name = "REDIRECT", | 105 | .name = "REDIRECT", |
110 | .family = AF_INET, | 106 | .family = AF_INET, |
111 | .target = redirect_target, | 107 | .target = redirect_target, |
112 | .targetsize = sizeof(struct ip_nat_multi_range_compat), | 108 | .targetsize = sizeof(struct nf_nat_multi_range_compat), |
113 | .table = "nat", | 109 | .table = "nat", |
114 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), | 110 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), |
115 | .checkentry = redirect_check, | 111 | .checkentry = redirect_check, |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 80f739e21824..9041e0741f6f 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -1,7 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * This is a module which is used for rejecting packets. | 2 | * This is a module which is used for rejecting packets. |
3 | * Added support for customized reject packets (Jozsef Kadlecsik). | ||
4 | * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812] | ||
5 | */ | 3 | */ |
6 | 4 | ||
7 | /* (C) 1999-2001 Paul `Rusty' Russell | 5 | /* (C) 1999-2001 Paul `Rusty' Russell |
@@ -43,7 +41,7 @@ MODULE_DESCRIPTION("iptables REJECT target module"); | |||
43 | static void send_reset(struct sk_buff *oldskb, int hook) | 41 | static void send_reset(struct sk_buff *oldskb, int hook) |
44 | { | 42 | { |
45 | struct sk_buff *nskb; | 43 | struct sk_buff *nskb; |
46 | struct iphdr *iph = oldskb->nh.iph; | 44 | struct iphdr *niph; |
47 | struct tcphdr _otcph, *oth, *tcph; | 45 | struct tcphdr _otcph, *oth, *tcph; |
48 | __be16 tmp_port; | 46 | __be16 tmp_port; |
49 | __be32 tmp_addr; | 47 | __be32 tmp_addr; |
@@ -51,10 +49,10 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
51 | unsigned int addr_type; | 49 | unsigned int addr_type; |
52 | 50 | ||
53 | /* IP header checks: fragment. */ | 51 | /* IP header checks: fragment. */ |
54 | if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) | 52 | if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) |
55 | return; | 53 | return; |
56 | 54 | ||
57 | oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, | 55 | oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), |
58 | sizeof(_otcph), &_otcph); | 56 | sizeof(_otcph), &_otcph); |
59 | if (oth == NULL) | 57 | if (oth == NULL) |
60 | return; | 58 | return; |
@@ -64,7 +62,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
64 | return; | 62 | return; |
65 | 63 | ||
66 | /* Check checksum */ | 64 | /* Check checksum */ |
67 | if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) | 65 | if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) |
68 | return; | 66 | return; |
69 | 67 | ||
70 | /* We need a linear, writeable skb. We also need to expand | 68 | /* We need a linear, writeable skb. We also need to expand |
@@ -84,20 +82,21 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
84 | skb_shinfo(nskb)->gso_segs = 0; | 82 | skb_shinfo(nskb)->gso_segs = 0; |
85 | skb_shinfo(nskb)->gso_type = 0; | 83 | skb_shinfo(nskb)->gso_type = 0; |
86 | 84 | ||
87 | tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); | 85 | tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb)); |
88 | 86 | ||
89 | /* Swap source and dest */ | 87 | /* Swap source and dest */ |
90 | tmp_addr = nskb->nh.iph->saddr; | 88 | niph = ip_hdr(nskb); |
91 | nskb->nh.iph->saddr = nskb->nh.iph->daddr; | 89 | tmp_addr = niph->saddr; |
92 | nskb->nh.iph->daddr = tmp_addr; | 90 | niph->saddr = niph->daddr; |
91 | niph->daddr = tmp_addr; | ||
93 | tmp_port = tcph->source; | 92 | tmp_port = tcph->source; |
94 | tcph->source = tcph->dest; | 93 | tcph->source = tcph->dest; |
95 | tcph->dest = tmp_port; | 94 | tcph->dest = tmp_port; |
96 | 95 | ||
97 | /* Truncate to length (no data) */ | 96 | /* Truncate to length (no data) */ |
98 | tcph->doff = sizeof(struct tcphdr)/4; | 97 | tcph->doff = sizeof(struct tcphdr)/4; |
99 | skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); | 98 | skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr)); |
100 | nskb->nh.iph->tot_len = htons(nskb->len); | 99 | niph->tot_len = htons(nskb->len); |
101 | 100 | ||
102 | if (tcph->ack) { | 101 | if (tcph->ack) { |
103 | needs_ack = 0; | 102 | needs_ack = 0; |
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
105 | tcph->ack_seq = 0; | 104 | tcph->ack_seq = 0; |
106 | } else { | 105 | } else { |
107 | needs_ack = 1; | 106 | needs_ack = 1; |
108 | tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin | 107 | tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + |
109 | + oldskb->len - oldskb->nh.iph->ihl*4 | 108 | oldskb->len - ip_hdrlen(oldskb) - |
110 | - (oth->doff<<2)); | 109 | (oth->doff << 2)); |
111 | tcph->seq = 0; | 110 | tcph->seq = 0; |
112 | } | 111 | } |
113 | 112 | ||
@@ -122,14 +121,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
122 | /* Adjust TCP checksum */ | 121 | /* Adjust TCP checksum */ |
123 | tcph->check = 0; | 122 | tcph->check = 0; |
124 | tcph->check = tcp_v4_check(sizeof(struct tcphdr), | 123 | tcph->check = tcp_v4_check(sizeof(struct tcphdr), |
125 | nskb->nh.iph->saddr, | 124 | niph->saddr, niph->daddr, |
126 | nskb->nh.iph->daddr, | ||
127 | csum_partial((char *)tcph, | 125 | csum_partial((char *)tcph, |
128 | sizeof(struct tcphdr), 0)); | 126 | sizeof(struct tcphdr), 0)); |
129 | 127 | ||
130 | /* Set DF, id = 0 */ | 128 | /* Set DF, id = 0 */ |
131 | nskb->nh.iph->frag_off = htons(IP_DF); | 129 | niph->frag_off = htons(IP_DF); |
132 | nskb->nh.iph->id = 0; | 130 | niph->id = 0; |
133 | 131 | ||
134 | addr_type = RTN_UNSPEC; | 132 | addr_type = RTN_UNSPEC; |
135 | if (hook != NF_IP_FORWARD | 133 | if (hook != NF_IP_FORWARD |
@@ -145,12 +143,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
145 | nskb->ip_summed = CHECKSUM_NONE; | 143 | nskb->ip_summed = CHECKSUM_NONE; |
146 | 144 | ||
147 | /* Adjust IP TTL */ | 145 | /* Adjust IP TTL */ |
148 | nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); | 146 | niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); |
149 | 147 | ||
150 | /* Adjust IP checksum */ | 148 | /* Adjust IP checksum */ |
151 | nskb->nh.iph->check = 0; | 149 | niph->check = 0; |
152 | nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, | 150 | niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl); |
153 | nskb->nh.iph->ihl); | ||
154 | 151 | ||
155 | /* "Never happens" */ | 152 | /* "Never happens" */ |
156 | if (nskb->len > dst_mtu(nskb->dst)) | 153 | if (nskb->len > dst_mtu(nskb->dst)) |
@@ -182,7 +179,7 @@ static unsigned int reject(struct sk_buff **pskb, | |||
182 | 179 | ||
183 | /* Our naive response construction doesn't deal with IP | 180 | /* Our naive response construction doesn't deal with IP |
184 | options, and probably shouldn't try. */ | 181 | options, and probably shouldn't try. */ |
185 | if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr)) | 182 | if (ip_hdrlen(*pskb) != sizeof(struct iphdr)) |
186 | return NF_DROP; | 183 | return NF_DROP; |
187 | 184 | ||
188 | /* WARNING: This code causes reentry within iptables. | 185 | /* WARNING: This code causes reentry within iptables. |
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index bd4404e5c688..511e5ff84938 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c | |||
@@ -7,21 +7,6 @@ | |||
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | * | ||
11 | * 010320 Martin Josefsson <gandalf@wlug.westbo.se> | ||
12 | * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things. | ||
13 | * 010728 Martin Josefsson <gandalf@wlug.westbo.se> | ||
14 | * * added --nodst to not include destination-ip in new source | ||
15 | * calculations. | ||
16 | * * added some more sanity-checks. | ||
17 | * 010729 Martin Josefsson <gandalf@wlug.westbo.se> | ||
18 | * * fixed a buggy if-statement in same_check(), should have | ||
19 | * used ntohl() but didn't. | ||
20 | * * added support for multiple ranges. IPT_SAME_MAX_RANGE is | ||
21 | * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h | ||
22 | * and is currently set to 10. | ||
23 | * * added support for 1-address range, nice to have now that | ||
24 | * we have multiple ranges. | ||
25 | */ | 10 | */ |
26 | #include <linux/types.h> | 11 | #include <linux/types.h> |
27 | #include <linux/ip.h> | 12 | #include <linux/ip.h> |
@@ -35,11 +20,7 @@ | |||
35 | #include <net/checksum.h> | 20 | #include <net/checksum.h> |
36 | #include <linux/netfilter_ipv4.h> | 21 | #include <linux/netfilter_ipv4.h> |
37 | #include <linux/netfilter/x_tables.h> | 22 | #include <linux/netfilter/x_tables.h> |
38 | #ifdef CONFIG_NF_NAT_NEEDED | ||
39 | #include <net/netfilter/nf_nat_rule.h> | 23 | #include <net/netfilter/nf_nat_rule.h> |
40 | #else | ||
41 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
42 | #endif | ||
43 | #include <linux/netfilter_ipv4/ipt_SAME.h> | 24 | #include <linux/netfilter_ipv4/ipt_SAME.h> |
44 | 25 | ||
45 | MODULE_LICENSE("GPL"); | 26 | MODULE_LICENSE("GPL"); |
@@ -138,17 +119,17 @@ same_target(struct sk_buff **pskb, | |||
138 | const struct xt_target *target, | 119 | const struct xt_target *target, |
139 | const void *targinfo) | 120 | const void *targinfo) |
140 | { | 121 | { |
141 | struct ip_conntrack *ct; | 122 | struct nf_conn *ct; |
142 | enum ip_conntrack_info ctinfo; | 123 | enum ip_conntrack_info ctinfo; |
143 | u_int32_t tmpip, aindex; | 124 | u_int32_t tmpip, aindex; |
144 | __be32 new_ip; | 125 | __be32 new_ip; |
145 | const struct ipt_same_info *same = targinfo; | 126 | const struct ipt_same_info *same = targinfo; |
146 | struct ip_nat_range newrange; | 127 | struct nf_nat_range newrange; |
147 | const struct ip_conntrack_tuple *t; | 128 | const struct nf_conntrack_tuple *t; |
148 | 129 | ||
149 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING || | 130 | NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || |
150 | hooknum == NF_IP_POST_ROUTING); | 131 | hooknum == NF_IP_POST_ROUTING); |
151 | ct = ip_conntrack_get(*pskb, &ctinfo); | 132 | ct = nf_ct_get(*pskb, &ctinfo); |
152 | 133 | ||
153 | t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | 134 | t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
154 | 135 | ||
@@ -157,17 +138,10 @@ same_target(struct sk_buff **pskb, | |||
157 | Here we calculate the index in same->iparray which | 138 | Here we calculate the index in same->iparray which |
158 | holds the ipaddress we should use */ | 139 | holds the ipaddress we should use */ |
159 | 140 | ||
160 | #ifdef CONFIG_NF_NAT_NEEDED | ||
161 | tmpip = ntohl(t->src.u3.ip); | 141 | tmpip = ntohl(t->src.u3.ip); |
162 | 142 | ||
163 | if (!(same->info & IPT_SAME_NODST)) | 143 | if (!(same->info & IPT_SAME_NODST)) |
164 | tmpip += ntohl(t->dst.u3.ip); | 144 | tmpip += ntohl(t->dst.u3.ip); |
165 | #else | ||
166 | tmpip = ntohl(t->src.ip); | ||
167 | |||
168 | if (!(same->info & IPT_SAME_NODST)) | ||
169 | tmpip += ntohl(t->dst.ip); | ||
170 | #endif | ||
171 | aindex = tmpip % same->ipnum; | 145 | aindex = tmpip % same->ipnum; |
172 | 146 | ||
173 | new_ip = htonl(same->iparray[aindex]); | 147 | new_ip = htonl(same->iparray[aindex]); |
@@ -178,13 +152,13 @@ same_target(struct sk_buff **pskb, | |||
178 | NIPQUAD(new_ip)); | 152 | NIPQUAD(new_ip)); |
179 | 153 | ||
180 | /* Transfer from original range. */ | 154 | /* Transfer from original range. */ |
181 | newrange = ((struct ip_nat_range) | 155 | newrange = ((struct nf_nat_range) |
182 | { same->range[0].flags, new_ip, new_ip, | 156 | { same->range[0].flags, new_ip, new_ip, |
183 | /* FIXME: Use ports from correct range! */ | 157 | /* FIXME: Use ports from correct range! */ |
184 | same->range[0].min, same->range[0].max }); | 158 | same->range[0].min, same->range[0].max }); |
185 | 159 | ||
186 | /* Hand modified range to generic setup. */ | 160 | /* Hand modified range to generic setup. */ |
187 | return ip_nat_setup_info(ct, &newrange, hooknum); | 161 | return nf_nat_setup_info(ct, &newrange, hooknum); |
188 | } | 162 | } |
189 | 163 | ||
190 | static struct xt_target same_reg = { | 164 | static struct xt_target same_reg = { |
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index cedf9f7d9d6e..0ad02f249837 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c | |||
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb, | |||
29 | const void *targinfo) | 29 | const void *targinfo) |
30 | { | 30 | { |
31 | const struct ipt_tos_target_info *tosinfo = targinfo; | 31 | const struct ipt_tos_target_info *tosinfo = targinfo; |
32 | struct iphdr *iph = (*pskb)->nh.iph; | 32 | struct iphdr *iph = ip_hdr(*pskb); |
33 | 33 | ||
34 | if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { | 34 | if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { |
35 | __u8 oldtos; | 35 | __u8 oldtos; |
36 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | 36 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
37 | return NF_DROP; | 37 | return NF_DROP; |
38 | iph = (*pskb)->nh.iph; | 38 | iph = ip_hdr(*pskb); |
39 | oldtos = iph->tos; | 39 | oldtos = iph->tos; |
40 | iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; | 40 | iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; |
41 | nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); | 41 | nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); |
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 64be31c22ba9..a991ec7bd4e7 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c | |||
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb, | |||
32 | if (!skb_make_writable(pskb, (*pskb)->len)) | 32 | if (!skb_make_writable(pskb, (*pskb)->len)) |
33 | return NF_DROP; | 33 | return NF_DROP; |
34 | 34 | ||
35 | iph = (*pskb)->nh.iph; | 35 | iph = ip_hdr(*pskb); |
36 | 36 | ||
37 | switch (info->mode) { | 37 | switch (info->mode) { |
38 | case IPT_TTL_SET: | 38 | case IPT_TTL_SET: |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 9acc018766f2..23b607b33b32 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -2,20 +2,6 @@ | |||
2 | * netfilter module for userspace packet logging daemons | 2 | * netfilter module for userspace packet logging daemons |
3 | * | 3 | * |
4 | * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> | 4 | * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> |
5 | * | ||
6 | * 2000/09/22 ulog-cprange feature added | ||
7 | * 2001/01/04 in-kernel queue as proposed by Sebastian Zander | ||
8 | * <zander@fokus.gmd.de> | ||
9 | * 2001/01/30 per-rule nlgroup conflicts with global queue. | ||
10 | * nlgroup now global (sysctl) | ||
11 | * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at | ||
12 | * module loadtime -HW | ||
13 | * 2002/07/07 remove broken nflog_rcv() function -HW | ||
14 | * 2002/08/29 fix shifted/unshifted nlgroup bug -HW | ||
15 | * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen> | ||
16 | * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT | ||
17 | * resulting in bogus 'error during NLMSG_PUT' messages. | ||
18 | * | ||
19 | * (C) 1999-2001 Paul `Rusty' Russell | 5 | * (C) 1999-2001 Paul `Rusty' Russell |
20 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 6 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> |
21 | * | 7 | * |
@@ -42,8 +28,6 @@ | |||
42 | * flushtimeout: | 28 | * flushtimeout: |
43 | * Specify, after how many hundredths of a second the queue should be | 29 | * Specify, after how many hundredths of a second the queue should be |
44 | * flushed even if it is not full yet. | 30 | * flushed even if it is not full yet. |
45 | * | ||
46 | * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp | ||
47 | */ | 31 | */ |
48 | 32 | ||
49 | #include <linux/module.h> | 33 | #include <linux/module.h> |
@@ -187,6 +171,7 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
187 | ulog_packet_msg_t *pm; | 171 | ulog_packet_msg_t *pm; |
188 | size_t size, copy_len; | 172 | size_t size, copy_len; |
189 | struct nlmsghdr *nlh; | 173 | struct nlmsghdr *nlh; |
174 | struct timeval tv; | ||
190 | 175 | ||
191 | /* ffs == find first bit set, necessary because userspace | 176 | /* ffs == find first bit set, necessary because userspace |
192 | * is already shifting groupnumber, but we need unshifted. | 177 | * is already shifting groupnumber, but we need unshifted. |
@@ -232,13 +217,14 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
232 | pm = NLMSG_DATA(nlh); | 217 | pm = NLMSG_DATA(nlh); |
233 | 218 | ||
234 | /* We might not have a timestamp, get one */ | 219 | /* We might not have a timestamp, get one */ |
235 | if (skb->tstamp.off_sec == 0) | 220 | if (skb->tstamp.tv64 == 0) |
236 | __net_timestamp((struct sk_buff *)skb); | 221 | __net_timestamp((struct sk_buff *)skb); |
237 | 222 | ||
238 | /* copy hook, prefix, timestamp, payload, etc. */ | 223 | /* copy hook, prefix, timestamp, payload, etc. */ |
239 | pm->data_len = copy_len; | 224 | pm->data_len = copy_len; |
240 | put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec); | 225 | tv = ktime_to_timeval(skb->tstamp); |
241 | put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec); | 226 | put_unaligned(tv.tv_sec, &pm->timestamp_sec); |
227 | put_unaligned(tv.tv_usec, &pm->timestamp_usec); | ||
242 | put_unaligned(skb->mark, &pm->mark); | 228 | put_unaligned(skb->mark, &pm->mark); |
243 | pm->hook = hooknum; | 229 | pm->hook = hooknum; |
244 | if (prefix != NULL) | 230 | if (prefix != NULL) |
@@ -249,9 +235,9 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
249 | *(pm->prefix) = '\0'; | 235 | *(pm->prefix) = '\0'; |
250 | 236 | ||
251 | if (in && in->hard_header_len > 0 | 237 | if (in && in->hard_header_len > 0 |
252 | && skb->mac.raw != (void *) skb->nh.iph | 238 | && skb->mac_header != skb->network_header |
253 | && in->hard_header_len <= ULOG_MAC_LEN) { | 239 | && in->hard_header_len <= ULOG_MAC_LEN) { |
254 | memcpy(pm->mac, skb->mac.raw, in->hard_header_len); | 240 | memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); |
255 | pm->mac_len = in->hard_header_len; | 241 | pm->mac_len = in->hard_header_len; |
256 | } else | 242 | } else |
257 | pm->mac_len = 0; | 243 | pm->mac_len = 0; |
@@ -363,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename, | |||
363 | return 1; | 349 | return 1; |
364 | } | 350 | } |
365 | 351 | ||
352 | #ifdef CONFIG_COMPAT | ||
353 | struct compat_ipt_ulog_info { | ||
354 | compat_uint_t nl_group; | ||
355 | compat_size_t copy_range; | ||
356 | compat_size_t qthreshold; | ||
357 | char prefix[ULOG_PREFIX_LEN]; | ||
358 | }; | ||
359 | |||
360 | static void compat_from_user(void *dst, void *src) | ||
361 | { | ||
362 | struct compat_ipt_ulog_info *cl = src; | ||
363 | struct ipt_ulog_info l = { | ||
364 | .nl_group = cl->nl_group, | ||
365 | .copy_range = cl->copy_range, | ||
366 | .qthreshold = cl->qthreshold, | ||
367 | }; | ||
368 | |||
369 | memcpy(l.prefix, cl->prefix, sizeof(l.prefix)); | ||
370 | memcpy(dst, &l, sizeof(l)); | ||
371 | } | ||
372 | |||
373 | static int compat_to_user(void __user *dst, void *src) | ||
374 | { | ||
375 | struct ipt_ulog_info *l = src; | ||
376 | struct compat_ipt_ulog_info cl = { | ||
377 | .nl_group = l->nl_group, | ||
378 | .copy_range = l->copy_range, | ||
379 | .qthreshold = l->qthreshold, | ||
380 | }; | ||
381 | |||
382 | memcpy(cl.prefix, l->prefix, sizeof(cl.prefix)); | ||
383 | return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0; | ||
384 | } | ||
385 | #endif /* CONFIG_COMPAT */ | ||
386 | |||
366 | static struct xt_target ipt_ulog_reg = { | 387 | static struct xt_target ipt_ulog_reg = { |
367 | .name = "ULOG", | 388 | .name = "ULOG", |
368 | .family = AF_INET, | 389 | .family = AF_INET, |
369 | .target = ipt_ulog_target, | 390 | .target = ipt_ulog_target, |
370 | .targetsize = sizeof(struct ipt_ulog_info), | 391 | .targetsize = sizeof(struct ipt_ulog_info), |
371 | .checkentry = ipt_ulog_checkentry, | 392 | .checkentry = ipt_ulog_checkentry, |
393 | #ifdef CONFIG_COMPAT | ||
394 | .compatsize = sizeof(struct compat_ipt_ulog_info), | ||
395 | .compat_from_user = compat_from_user, | ||
396 | .compat_to_user = compat_to_user, | ||
397 | #endif | ||
372 | .me = THIS_MODULE, | 398 | .me = THIS_MODULE, |
373 | }; | 399 | }; |
374 | 400 | ||
@@ -390,14 +416,11 @@ static int __init ipt_ulog_init(void) | |||
390 | } | 416 | } |
391 | 417 | ||
392 | /* initialize ulog_buffers */ | 418 | /* initialize ulog_buffers */ |
393 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) { | 419 | for (i = 0; i < ULOG_MAXNLGROUPS; i++) |
394 | init_timer(&ulog_buffers[i].timer); | 420 | setup_timer(&ulog_buffers[i].timer, ulog_timer, i); |
395 | ulog_buffers[i].timer.function = ulog_timer; | ||
396 | ulog_buffers[i].timer.data = i; | ||
397 | } | ||
398 | 421 | ||
399 | nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, | 422 | nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, |
400 | THIS_MODULE); | 423 | NULL, THIS_MODULE); |
401 | if (!nflognl) | 424 | if (!nflognl) |
402 | return -ENOMEM; | 425 | return -ENOMEM; |
403 | 426 | ||
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index cfa0472617f6..a652a1451552 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c | |||
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb, | |||
33 | int offset, unsigned int protoff, int *hotdrop) | 33 | int offset, unsigned int protoff, int *hotdrop) |
34 | { | 34 | { |
35 | const struct ipt_addrtype_info *info = matchinfo; | 35 | const struct ipt_addrtype_info *info = matchinfo; |
36 | const struct iphdr *iph = skb->nh.iph; | 36 | const struct iphdr *iph = ip_hdr(skb); |
37 | int ret = 1; | 37 | int ret = 1; |
38 | 38 | ||
39 | if (info->source) | 39 | if (info->source) |
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 37508b2cfea6..26218122f865 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c | |||
@@ -1,7 +1,5 @@ | |||
1 | /* IP tables module for matching the value of the IPv4 and TCP ECN bits | 1 | /* IP tables module for matching the value of the IPv4 and TCP ECN bits |
2 | * | 2 | * |
3 | * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp | ||
4 | * | ||
5 | * (C) 2002 by Harald Welte <laforge@gnumonks.org> | 3 | * (C) 2002 by Harald Welte <laforge@gnumonks.org> |
6 | * | 4 | * |
7 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
@@ -11,6 +9,7 @@ | |||
11 | 9 | ||
12 | #include <linux/in.h> | 10 | #include <linux/in.h> |
13 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
12 | #include <net/ip.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
16 | #include <linux/tcp.h> | 15 | #include <linux/tcp.h> |
@@ -26,7 +25,7 @@ MODULE_LICENSE("GPL"); | |||
26 | static inline int match_ip(const struct sk_buff *skb, | 25 | static inline int match_ip(const struct sk_buff *skb, |
27 | const struct ipt_ecn_info *einfo) | 26 | const struct ipt_ecn_info *einfo) |
28 | { | 27 | { |
29 | return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect); | 28 | return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; |
30 | } | 29 | } |
31 | 30 | ||
32 | static inline int match_tcp(const struct sk_buff *skb, | 31 | static inline int match_tcp(const struct sk_buff *skb, |
@@ -38,8 +37,7 @@ static inline int match_tcp(const struct sk_buff *skb, | |||
38 | /* In practice, TCP match does this, so can't fail. But let's | 37 | /* In practice, TCP match does this, so can't fail. But let's |
39 | * be good citizens. | 38 | * be good citizens. |
40 | */ | 39 | */ |
41 | th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, | 40 | th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); |
42 | sizeof(_tcph), &_tcph); | ||
43 | if (th == NULL) { | 41 | if (th == NULL) { |
44 | *hotdrop = 0; | 42 | *hotdrop = 0; |
45 | return 0; | 43 | return 0; |
@@ -80,7 +78,7 @@ static int match(const struct sk_buff *skb, | |||
80 | return 0; | 78 | return 0; |
81 | 79 | ||
82 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { | 80 | if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { |
83 | if (skb->nh.iph->protocol != IPPROTO_TCP) | 81 | if (ip_hdr(skb)->protocol != IPPROTO_TCP) |
84 | return 0; | 82 | return 0; |
85 | if (!match_tcp(skb, info, hotdrop)) | 83 | if (!match_tcp(skb, info, hotdrop)) |
86 | return 0; | 84 | return 0; |
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index bc5d5e6091e4..33af9e940887 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c | |||
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb, | |||
32 | int offset, unsigned int protoff, int *hotdrop) | 32 | int offset, unsigned int protoff, int *hotdrop) |
33 | { | 33 | { |
34 | const struct ipt_iprange_info *info = matchinfo; | 34 | const struct ipt_iprange_info *info = matchinfo; |
35 | const struct iphdr *iph = skb->nh.iph; | 35 | const struct iphdr *iph = ip_hdr(skb); |
36 | 36 | ||
37 | if (info->flags & IPRANGE_SRC) { | 37 | if (info->flags & IPRANGE_SRC) { |
38 | if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) | 38 | if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) |
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index aecb9c48e152..15a9e8bbb7cc 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c | |||
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb, | |||
183 | int ret = info->invert; | 183 | int ret = info->invert; |
184 | 184 | ||
185 | if (info->side == IPT_RECENT_DEST) | 185 | if (info->side == IPT_RECENT_DEST) |
186 | addr = skb->nh.iph->daddr; | 186 | addr = ip_hdr(skb)->daddr; |
187 | else | 187 | else |
188 | addr = skb->nh.iph->saddr; | 188 | addr = ip_hdr(skb)->saddr; |
189 | 189 | ||
190 | ttl = skb->nh.iph->ttl; | 190 | ttl = ip_hdr(skb)->ttl; |
191 | /* use TTL as seen before forwarding */ | 191 | /* use TTL as seen before forwarding */ |
192 | if (out && !skb->sk) | 192 | if (out && !skb->sk) |
193 | ttl++; | 193 | ttl++; |
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 5d33b51d49d8..d314844af12b 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c | |||
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb, | |||
30 | { | 30 | { |
31 | const struct ipt_tos_info *info = matchinfo; | 31 | const struct ipt_tos_info *info = matchinfo; |
32 | 32 | ||
33 | return (skb->nh.iph->tos == info->tos) ^ info->invert; | 33 | return (ip_hdr(skb)->tos == info->tos) ^ info->invert; |
34 | } | 34 | } |
35 | 35 | ||
36 | static struct xt_match tos_match = { | 36 | static struct xt_match tos_match = { |
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index 1eca9f400374..ab02d9e3139c 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c | |||
@@ -1,7 +1,5 @@ | |||
1 | /* IP tables module for matching the value of the TTL | 1 | /* IP tables module for matching the value of the TTL |
2 | * | 2 | * |
3 | * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp | ||
4 | * | ||
5 | * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> | 3 | * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> |
6 | * | 4 | * |
7 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
@@ -26,19 +24,20 @@ static int match(const struct sk_buff *skb, | |||
26 | int offset, unsigned int protoff, int *hotdrop) | 24 | int offset, unsigned int protoff, int *hotdrop) |
27 | { | 25 | { |
28 | const struct ipt_ttl_info *info = matchinfo; | 26 | const struct ipt_ttl_info *info = matchinfo; |
27 | const u8 ttl = ip_hdr(skb)->ttl; | ||
29 | 28 | ||
30 | switch (info->mode) { | 29 | switch (info->mode) { |
31 | case IPT_TTL_EQ: | 30 | case IPT_TTL_EQ: |
32 | return (skb->nh.iph->ttl == info->ttl); | 31 | return (ttl == info->ttl); |
33 | break; | 32 | break; |
34 | case IPT_TTL_NE: | 33 | case IPT_TTL_NE: |
35 | return (!(skb->nh.iph->ttl == info->ttl)); | 34 | return (!(ttl == info->ttl)); |
36 | break; | 35 | break; |
37 | case IPT_TTL_LT: | 36 | case IPT_TTL_LT: |
38 | return (skb->nh.iph->ttl < info->ttl); | 37 | return (ttl < info->ttl); |
39 | break; | 38 | break; |
40 | case IPT_TTL_GT: | 39 | case IPT_TTL_GT: |
41 | return (skb->nh.iph->ttl > info->ttl); | 40 | return (ttl > info->ttl); |
42 | break; | 41 | break; |
43 | default: | 42 | default: |
44 | printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", | 43 | printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index d1d61e97b976..42728909eba0 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/netfilter_ipv4/ip_tables.h> | 15 | #include <linux/netfilter_ipv4/ip_tables.h> |
16 | #include <net/ip.h> | ||
16 | 17 | ||
17 | MODULE_LICENSE("GPL"); | 18 | MODULE_LICENSE("GPL"); |
18 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 19 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook, | |||
102 | { | 103 | { |
103 | /* root is playing with raw sockets. */ | 104 | /* root is playing with raw sockets. */ |
104 | if ((*pskb)->len < sizeof(struct iphdr) | 105 | if ((*pskb)->len < sizeof(struct iphdr) |
105 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | 106 | || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { |
106 | if (net_ratelimit()) | 107 | if (net_ratelimit()) |
107 | printk("ipt_hook: happy cracking.\n"); | 108 | printk("ipt_hook: happy cracking.\n"); |
108 | return NF_ACCEPT; | 109 | return NF_ACCEPT; |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 98b66ef0c714..9278802f2742 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -7,8 +7,6 @@ | |||
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | * | ||
11 | * Extended to all five netfilter hooks by Brad Chapman & Harald Welte | ||
12 | */ | 10 | */ |
13 | #include <linux/module.h> | 11 | #include <linux/module.h> |
14 | #include <linux/netfilter_ipv4/ip_tables.h> | 12 | #include <linux/netfilter_ipv4/ip_tables.h> |
@@ -17,6 +15,7 @@ | |||
17 | #include <net/sock.h> | 15 | #include <net/sock.h> |
18 | #include <net/route.h> | 16 | #include <net/route.h> |
19 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
18 | #include <net/ip.h> | ||
20 | 19 | ||
21 | MODULE_LICENSE("GPL"); | 20 | MODULE_LICENSE("GPL"); |
22 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 21 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
@@ -130,13 +129,14 @@ ipt_local_hook(unsigned int hook, | |||
130 | int (*okfn)(struct sk_buff *)) | 129 | int (*okfn)(struct sk_buff *)) |
131 | { | 130 | { |
132 | unsigned int ret; | 131 | unsigned int ret; |
132 | const struct iphdr *iph; | ||
133 | u_int8_t tos; | 133 | u_int8_t tos; |
134 | __be32 saddr, daddr; | 134 | __be32 saddr, daddr; |
135 | u_int32_t mark; | 135 | u_int32_t mark; |
136 | 136 | ||
137 | /* root is playing with raw sockets. */ | 137 | /* root is playing with raw sockets. */ |
138 | if ((*pskb)->len < sizeof(struct iphdr) | 138 | if ((*pskb)->len < sizeof(struct iphdr) |
139 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | 139 | || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { |
140 | if (net_ratelimit()) | 140 | if (net_ratelimit()) |
141 | printk("ipt_hook: happy cracking.\n"); | 141 | printk("ipt_hook: happy cracking.\n"); |
142 | return NF_ACCEPT; | 142 | return NF_ACCEPT; |
@@ -144,19 +144,23 @@ ipt_local_hook(unsigned int hook, | |||
144 | 144 | ||
145 | /* Save things which could affect route */ | 145 | /* Save things which could affect route */ |
146 | mark = (*pskb)->mark; | 146 | mark = (*pskb)->mark; |
147 | saddr = (*pskb)->nh.iph->saddr; | 147 | iph = ip_hdr(*pskb); |
148 | daddr = (*pskb)->nh.iph->daddr; | 148 | saddr = iph->saddr; |
149 | tos = (*pskb)->nh.iph->tos; | 149 | daddr = iph->daddr; |
150 | tos = iph->tos; | ||
150 | 151 | ||
151 | ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); | 152 | ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); |
152 | /* Reroute for ANY change. */ | 153 | /* Reroute for ANY change. */ |
153 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE | 154 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { |
154 | && ((*pskb)->nh.iph->saddr != saddr | 155 | iph = ip_hdr(*pskb); |
155 | || (*pskb)->nh.iph->daddr != daddr | 156 | |
156 | || (*pskb)->mark != mark | 157 | if (iph->saddr != saddr || |
157 | || (*pskb)->nh.iph->tos != tos)) | 158 | iph->daddr != daddr || |
158 | if (ip_route_me_harder(pskb, RTN_UNSPEC)) | 159 | (*pskb)->mark != mark || |
159 | ret = NF_DROP; | 160 | iph->tos != tos) |
161 | if (ip_route_me_harder(pskb, RTN_UNSPEC)) | ||
162 | ret = NF_DROP; | ||
163 | } | ||
160 | 164 | ||
161 | return ret; | 165 | return ret; |
162 | } | 166 | } |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8f3e92d20df8..0654eaae70c9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -4,14 +4,6 @@ | |||
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - move L3 protocol dependent part to this file. | ||
10 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
11 | * - add get_features() to support various size of conntrack | ||
12 | * structures. | ||
13 | * | ||
14 | * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c | ||
15 | */ | 7 | */ |
16 | 8 | ||
17 | #include <linux/types.h> | 9 | #include <linux/types.h> |
@@ -87,7 +79,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
87 | local_bh_enable(); | 79 | local_bh_enable(); |
88 | 80 | ||
89 | if (skb) | 81 | if (skb) |
90 | ip_send_check(skb->nh.iph); | 82 | ip_send_check(ip_hdr(skb)); |
91 | 83 | ||
92 | return skb; | 84 | return skb; |
93 | } | 85 | } |
@@ -97,16 +89,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, | |||
97 | u_int8_t *protonum) | 89 | u_int8_t *protonum) |
98 | { | 90 | { |
99 | /* Never happen */ | 91 | /* Never happen */ |
100 | if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { | 92 | if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) { |
101 | if (net_ratelimit()) { | 93 | if (net_ratelimit()) { |
102 | printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", | 94 | printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", |
103 | (*pskb)->nh.iph->protocol, hooknum); | 95 | ip_hdr(*pskb)->protocol, hooknum); |
104 | } | 96 | } |
105 | return -NF_DROP; | 97 | return -NF_DROP; |
106 | } | 98 | } |
107 | 99 | ||
108 | *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; | 100 | *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb); |
109 | *protonum = (*pskb)->nh.iph->protocol; | 101 | *protonum = ip_hdr(*pskb)->protocol; |
110 | 102 | ||
111 | return NF_ACCEPT; | 103 | return NF_ACCEPT; |
112 | } | 104 | } |
@@ -152,9 +144,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, | |||
152 | return NF_ACCEPT; | 144 | return NF_ACCEPT; |
153 | 145 | ||
154 | return help->helper->help(pskb, | 146 | return help->helper->help(pskb, |
155 | (*pskb)->nh.raw - (*pskb)->data | 147 | skb_network_offset(*pskb) + ip_hdrlen(*pskb), |
156 | + (*pskb)->nh.iph->ihl*4, | 148 | ct, ctinfo); |
157 | ct, ctinfo); | ||
158 | } | 149 | } |
159 | 150 | ||
160 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | 151 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, |
@@ -171,7 +162,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
171 | #endif | 162 | #endif |
172 | 163 | ||
173 | /* Gather fragments. */ | 164 | /* Gather fragments. */ |
174 | if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | 165 | if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
175 | *pskb = nf_ct_ipv4_gather_frags(*pskb, | 166 | *pskb = nf_ct_ipv4_gather_frags(*pskb, |
176 | hooknum == NF_IP_PRE_ROUTING ? | 167 | hooknum == NF_IP_PRE_ROUTING ? |
177 | IP_DEFRAG_CONNTRACK_IN : | 168 | IP_DEFRAG_CONNTRACK_IN : |
@@ -199,7 +190,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, | |||
199 | { | 190 | { |
200 | /* root is playing with raw sockets. */ | 191 | /* root is playing with raw sockets. */ |
201 | if ((*pskb)->len < sizeof(struct iphdr) | 192 | if ((*pskb)->len < sizeof(struct iphdr) |
202 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | 193 | || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { |
203 | if (net_ratelimit()) | 194 | if (net_ratelimit()) |
204 | printk("ipt_hook: happy cracking.\n"); | 195 | printk("ipt_hook: happy cracking.\n"); |
205 | return NF_ACCEPT; | 196 | return NF_ACCEPT; |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5fd1e5363c1a..f4fc657c1983 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -4,11 +4,6 @@ | |||
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - enable working with Layer 3 protocol independent connection tracking. | ||
10 | * | ||
11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c | ||
12 | */ | 7 | */ |
13 | 8 | ||
14 | #include <linux/types.h> | 9 | #include <linux/types.h> |
@@ -158,7 +153,7 @@ icmp_error_message(struct sk_buff *skb, | |||
158 | NF_CT_ASSERT(skb->nfct == NULL); | 153 | NF_CT_ASSERT(skb->nfct == NULL); |
159 | 154 | ||
160 | /* Not enough header? */ | 155 | /* Not enough header? */ |
161 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | 156 | inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in); |
162 | if (inside == NULL) | 157 | if (inside == NULL) |
163 | return -NF_ACCEPT; | 158 | return -NF_ACCEPT; |
164 | 159 | ||
@@ -172,7 +167,7 @@ icmp_error_message(struct sk_buff *skb, | |||
172 | /* rcu_read_lock()ed by nf_hook_slow */ | 167 | /* rcu_read_lock()ed by nf_hook_slow */ |
173 | innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | 168 | innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); |
174 | 169 | ||
175 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); | 170 | dataoff = ip_hdrlen(skb) + sizeof(inside->icmp); |
176 | /* Are they talking about one of our connections? */ | 171 | /* Are they talking about one of our connections? */ |
177 | if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, | 172 | if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, |
178 | inside->ip.protocol, &origtuple, | 173 | inside->ip.protocol, &origtuple, |
@@ -227,7 +222,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, | |||
227 | struct icmphdr _ih, *icmph; | 222 | struct icmphdr _ih, *icmph; |
228 | 223 | ||
229 | /* Not enough header? */ | 224 | /* Not enough header? */ |
230 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); | 225 | icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); |
231 | if (icmph == NULL) { | 226 | if (icmph == NULL) { |
232 | if (LOG_INVALID(IPPROTO_ICMP)) | 227 | if (LOG_INVALID(IPPROTO_ICMP)) |
233 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | 228 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 452e9d326684..ea02f00d2dac 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
431 | } *inside; | 431 | } *inside; |
432 | struct nf_conntrack_l4proto *l4proto; | 432 | struct nf_conntrack_l4proto *l4proto; |
433 | struct nf_conntrack_tuple inner, target; | 433 | struct nf_conntrack_tuple inner, target; |
434 | int hdrlen = (*pskb)->nh.iph->ihl * 4; | 434 | int hdrlen = ip_hdrlen(*pskb); |
435 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 435 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
436 | unsigned long statusbit; | 436 | unsigned long statusbit; |
437 | enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); | 437 | enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); |
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
439 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) | 439 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) |
440 | return 0; | 440 | return 0; |
441 | 441 | ||
442 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 442 | inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); |
443 | 443 | ||
444 | /* We're actually going to mangle it beyond trivial checksum | 444 | /* We're actually going to mangle it beyond trivial checksum |
445 | adjustment, so make sure the current checksum is correct. */ | 445 | adjustment, so make sure the current checksum is correct. */ |
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
469 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | 469 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); |
470 | 470 | ||
471 | if (!nf_ct_get_tuple(*pskb, | 471 | if (!nf_ct_get_tuple(*pskb, |
472 | (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr), | 472 | ip_hdrlen(*pskb) + sizeof(struct icmphdr), |
473 | (*pskb)->nh.iph->ihl*4 + | 473 | (ip_hdrlen(*pskb) + |
474 | sizeof(struct icmphdr) + inside->ip.ihl*4, | 474 | sizeof(struct icmphdr) + inside->ip.ihl * 4), |
475 | (u_int16_t)AF_INET, | 475 | (u_int16_t)AF_INET, |
476 | inside->ip.protocol, | 476 | inside->ip.protocol, |
477 | &inner, l3proto, l4proto)) | 477 | &inner, l3proto, l4proto)) |
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
483 | packet: PREROUTING (DST manip), routing produces ICMP, goes | 483 | packet: PREROUTING (DST manip), routing produces ICMP, goes |
484 | through POSTROUTING (which must correct the DST manip). */ | 484 | through POSTROUTING (which must correct the DST manip). */ |
485 | if (!manip_pkt(inside->ip.protocol, pskb, | 485 | if (!manip_pkt(inside->ip.protocol, pskb, |
486 | (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp), | 486 | ip_hdrlen(*pskb) + sizeof(inside->icmp), |
487 | &ct->tuplehash[!dir].tuple, | 487 | &ct->tuplehash[!dir].tuple, |
488 | !manip)) | 488 | !manip)) |
489 | return 0; | 489 | return 0; |
490 | 490 | ||
491 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | 491 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { |
492 | /* Reloading "inside" here since manip_pkt inner. */ | 492 | /* Reloading "inside" here since manip_pkt inner. */ |
493 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 493 | inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); |
494 | inside->icmp.checksum = 0; | 494 | inside->icmp.checksum = 0; |
495 | inside->icmp.checksum = | 495 | inside->icmp.checksum = |
496 | csum_fold(skb_checksum(*pskb, hdrlen, | 496 | csum_fold(skb_checksum(*pskb, hdrlen, |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9cbf3f9be13b..fcebc968d37f 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb, | |||
33 | unsigned int addroff, __be32 ip, __be16 port) | 33 | unsigned int addroff, __be32 ip, __be16 port) |
34 | { | 34 | { |
35 | enum ip_conntrack_info ctinfo; | 35 | enum ip_conntrack_info ctinfo; |
36 | struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo); | 36 | struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo); |
37 | struct { | 37 | struct { |
38 | __be32 ip; | 38 | __be32 ip; |
39 | __be16 port; | 39 | __be16 port; |
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb, | |||
44 | buf.port = port; | 44 | buf.port = port; |
45 | addroff += dataoff; | 45 | addroff += dataoff; |
46 | 46 | ||
47 | if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) { | 47 | if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) { |
48 | if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, | 48 | if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, |
49 | addroff, sizeof(buf), | 49 | addroff, sizeof(buf), |
50 | (char *) &buf, sizeof(buf))) { | 50 | (char *) &buf, sizeof(buf))) { |
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb, | |||
55 | } | 55 | } |
56 | 56 | ||
57 | /* Relocate data pointer */ | 57 | /* Relocate data pointer */ |
58 | th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, | 58 | th = skb_header_pointer(*pskb, ip_hdrlen(*pskb), |
59 | sizeof(_tcph), &_tcph); | 59 | sizeof(_tcph), &_tcph); |
60 | if (th == NULL) | 60 | if (th == NULL) |
61 | return -1; | 61 | return -1; |
62 | *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + | 62 | *data = (*pskb)->data + ip_hdrlen(*pskb) + |
63 | th->doff * 4 + dataoff; | 63 | th->doff * 4 + dataoff; |
64 | } else { | 64 | } else { |
65 | if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, | 65 | if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, |
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb, | |||
73 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy | 73 | /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy |
74 | * or pull everything in a linear buffer, so we can safely | 74 | * or pull everything in a linear buffer, so we can safely |
75 | * use the skb pointers now */ | 75 | * use the skb pointers now */ |
76 | *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + | 76 | *data = ((*pskb)->data + ip_hdrlen(*pskb) + |
77 | sizeof(struct udphdr); | 77 | sizeof(struct udphdr)); |
78 | } | 78 | } |
79 | 79 | ||
80 | return 0; | 80 | return 0; |
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, | |||
383 | static void ip_nat_q931_expect(struct nf_conn *new, | 383 | static void ip_nat_q931_expect(struct nf_conn *new, |
384 | struct nf_conntrack_expect *this) | 384 | struct nf_conntrack_expect *this) |
385 | { | 385 | { |
386 | struct ip_nat_range range; | 386 | struct nf_nat_range range; |
387 | 387 | ||
388 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ | 388 | if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ |
389 | nf_nat_follow_master(new, this); | 389 | nf_nat_follow_master(new, this); |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 49a90c39ffce..15b6e5ce3a04 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -87,12 +87,13 @@ static void mangle_contents(struct sk_buff *skb, | |||
87 | unsigned char *data; | 87 | unsigned char *data; |
88 | 88 | ||
89 | BUG_ON(skb_is_nonlinear(skb)); | 89 | BUG_ON(skb_is_nonlinear(skb)); |
90 | data = (unsigned char *)skb->nh.iph + dataoff; | 90 | data = skb_network_header(skb) + dataoff; |
91 | 91 | ||
92 | /* move post-replacement */ | 92 | /* move post-replacement */ |
93 | memmove(data + match_offset + rep_len, | 93 | memmove(data + match_offset + rep_len, |
94 | data + match_offset + match_len, | 94 | data + match_offset + match_len, |
95 | skb->tail - (data + match_offset + match_len)); | 95 | skb->tail - (skb->network_header + dataoff + |
96 | match_offset + match_len)); | ||
96 | 97 | ||
97 | /* insert data from buffer */ | 98 | /* insert data from buffer */ |
98 | memcpy(data + match_offset, rep_buffer, rep_len); | 99 | memcpy(data + match_offset, rep_buffer, rep_len); |
@@ -111,8 +112,8 @@ static void mangle_contents(struct sk_buff *skb, | |||
111 | } | 112 | } |
112 | 113 | ||
113 | /* fix IP hdr checksum information */ | 114 | /* fix IP hdr checksum information */ |
114 | skb->nh.iph->tot_len = htons(skb->len); | 115 | ip_hdr(skb)->tot_len = htons(skb->len); |
115 | ip_send_check(skb->nh.iph); | 116 | ip_send_check(ip_hdr(skb)); |
116 | } | 117 | } |
117 | 118 | ||
118 | /* Unusual, but possible case. */ | 119 | /* Unusual, but possible case. */ |
@@ -152,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
152 | const char *rep_buffer, | 153 | const char *rep_buffer, |
153 | unsigned int rep_len) | 154 | unsigned int rep_len) |
154 | { | 155 | { |
156 | struct rtable *rt = (struct rtable *)(*pskb)->dst; | ||
155 | struct iphdr *iph; | 157 | struct iphdr *iph; |
156 | struct tcphdr *tcph; | 158 | struct tcphdr *tcph; |
157 | int oldlen, datalen; | 159 | int oldlen, datalen; |
@@ -166,7 +168,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
166 | 168 | ||
167 | SKB_LINEAR_ASSERT(*pskb); | 169 | SKB_LINEAR_ASSERT(*pskb); |
168 | 170 | ||
169 | iph = (*pskb)->nh.iph; | 171 | iph = ip_hdr(*pskb); |
170 | tcph = (void *)iph + iph->ihl*4; | 172 | tcph = (void *)iph + iph->ihl*4; |
171 | 173 | ||
172 | oldlen = (*pskb)->len - iph->ihl*4; | 174 | oldlen = (*pskb)->len - iph->ihl*4; |
@@ -175,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
175 | 177 | ||
176 | datalen = (*pskb)->len - iph->ihl*4; | 178 | datalen = (*pskb)->len - iph->ihl*4; |
177 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | 179 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { |
178 | tcph->check = 0; | 180 | if (!(rt->rt_flags & RTCF_LOCAL) && |
179 | tcph->check = tcp_v4_check(datalen, | 181 | (*pskb)->dev->features & NETIF_F_ALL_CSUM) { |
180 | iph->saddr, iph->daddr, | 182 | (*pskb)->ip_summed = CHECKSUM_PARTIAL; |
181 | csum_partial((char *)tcph, | 183 | (*pskb)->csum_start = skb_headroom(*pskb) + |
182 | datalen, 0)); | 184 | skb_network_offset(*pskb) + |
185 | iph->ihl * 4; | ||
186 | (*pskb)->csum_offset = offsetof(struct tcphdr, check); | ||
187 | tcph->check = ~tcp_v4_check(datalen, | ||
188 | iph->saddr, iph->daddr, 0); | ||
189 | } else { | ||
190 | tcph->check = 0; | ||
191 | tcph->check = tcp_v4_check(datalen, | ||
192 | iph->saddr, iph->daddr, | ||
193 | csum_partial((char *)tcph, | ||
194 | datalen, 0)); | ||
195 | } | ||
183 | } else | 196 | } else |
184 | nf_proto_csum_replace2(&tcph->check, *pskb, | 197 | nf_proto_csum_replace2(&tcph->check, *pskb, |
185 | htons(oldlen), htons(datalen), 1); | 198 | htons(oldlen), htons(datalen), 1); |
@@ -190,7 +203,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
190 | (int)rep_len - (int)match_len, | 203 | (int)rep_len - (int)match_len, |
191 | ct, ctinfo); | 204 | ct, ctinfo); |
192 | /* Tell TCP window tracking about seq change */ | 205 | /* Tell TCP window tracking about seq change */ |
193 | nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, | 206 | nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), |
194 | ct, CTINFO2DIR(ctinfo)); | 207 | ct, CTINFO2DIR(ctinfo)); |
195 | } | 208 | } |
196 | return 1; | 209 | return 1; |
@@ -216,12 +229,13 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
216 | const char *rep_buffer, | 229 | const char *rep_buffer, |
217 | unsigned int rep_len) | 230 | unsigned int rep_len) |
218 | { | 231 | { |
232 | struct rtable *rt = (struct rtable *)(*pskb)->dst; | ||
219 | struct iphdr *iph; | 233 | struct iphdr *iph; |
220 | struct udphdr *udph; | 234 | struct udphdr *udph; |
221 | int datalen, oldlen; | 235 | int datalen, oldlen; |
222 | 236 | ||
223 | /* UDP helpers might accidentally mangle the wrong packet */ | 237 | /* UDP helpers might accidentally mangle the wrong packet */ |
224 | iph = (*pskb)->nh.iph; | 238 | iph = ip_hdr(*pskb); |
225 | if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + | 239 | if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + |
226 | match_offset + match_len) | 240 | match_offset + match_len) |
227 | return 0; | 241 | return 0; |
@@ -234,7 +248,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
234 | !enlarge_skb(pskb, rep_len - match_len)) | 248 | !enlarge_skb(pskb, rep_len - match_len)) |
235 | return 0; | 249 | return 0; |
236 | 250 | ||
237 | iph = (*pskb)->nh.iph; | 251 | iph = ip_hdr(*pskb); |
238 | udph = (void *)iph + iph->ihl*4; | 252 | udph = (void *)iph + iph->ihl*4; |
239 | 253 | ||
240 | oldlen = (*pskb)->len - iph->ihl*4; | 254 | oldlen = (*pskb)->len - iph->ihl*4; |
@@ -250,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
250 | return 1; | 264 | return 1; |
251 | 265 | ||
252 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | 266 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { |
253 | udph->check = 0; | 267 | if (!(rt->rt_flags & RTCF_LOCAL) && |
254 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | 268 | (*pskb)->dev->features & NETIF_F_ALL_CSUM) { |
255 | datalen, IPPROTO_UDP, | 269 | (*pskb)->ip_summed = CHECKSUM_PARTIAL; |
256 | csum_partial((char *)udph, | 270 | (*pskb)->csum_start = skb_headroom(*pskb) + |
257 | datalen, 0)); | 271 | skb_network_offset(*pskb) + |
258 | if (!udph->check) | 272 | iph->ihl * 4; |
259 | udph->check = CSUM_MANGLED_0; | 273 | (*pskb)->csum_offset = offsetof(struct udphdr, check); |
274 | udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
275 | datalen, IPPROTO_UDP, | ||
276 | 0); | ||
277 | } else { | ||
278 | udph->check = 0; | ||
279 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
280 | datalen, IPPROTO_UDP, | ||
281 | csum_partial((char *)udph, | ||
282 | datalen, 0)); | ||
283 | if (!udph->check) | ||
284 | udph->check = CSUM_MANGLED_0; | ||
285 | } | ||
260 | } else | 286 | } else |
261 | nf_proto_csum_replace2(&udph->check, *pskb, | 287 | nf_proto_csum_replace2(&udph->check, *pskb, |
262 | htons(oldlen), htons(datalen), 1); | 288 | htons(oldlen), htons(datalen), 1); |
@@ -318,8 +344,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb, | |||
318 | unsigned int dir, optoff, optend; | 344 | unsigned int dir, optoff, optend; |
319 | struct nf_conn_nat *nat = nfct_nat(ct); | 345 | struct nf_conn_nat *nat = nfct_nat(ct); |
320 | 346 | ||
321 | optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); | 347 | optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr); |
322 | optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; | 348 | optend = ip_hdrlen(*pskb) + tcph->doff * 4; |
323 | 349 | ||
324 | if (!skb_make_writable(pskb, optend)) | 350 | if (!skb_make_writable(pskb, optend)) |
325 | return 0; | 351 | return 0; |
@@ -371,10 +397,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, | |||
371 | this_way = &nat->info.seq[dir]; | 397 | this_way = &nat->info.seq[dir]; |
372 | other_way = &nat->info.seq[!dir]; | 398 | other_way = &nat->info.seq[!dir]; |
373 | 399 | ||
374 | if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) | 400 | if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) |
375 | return 0; | 401 | return 0; |
376 | 402 | ||
377 | tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 403 | tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb); |
378 | if (after(ntohl(tcph->seq), this_way->correction_pos)) | 404 | if (after(ntohl(tcph->seq), this_way->correction_pos)) |
379 | newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); | 405 | newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); |
380 | else | 406 | else |
@@ -399,7 +425,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb, | |||
399 | if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) | 425 | if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) |
400 | return 0; | 426 | return 0; |
401 | 427 | ||
402 | nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir); | 428 | nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir); |
403 | 429 | ||
404 | return 1; | 430 | return 1; |
405 | } | 431 | } |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 7ba341c22eaa..a66888749ceb 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
53 | struct nf_conntrack_tuple t; | 53 | struct nf_conntrack_tuple t; |
54 | struct nf_ct_pptp_master *ct_pptp_info; | 54 | struct nf_ct_pptp_master *ct_pptp_info; |
55 | struct nf_nat_pptp *nat_pptp_info; | 55 | struct nf_nat_pptp *nat_pptp_info; |
56 | struct ip_nat_range range; | 56 | struct nf_nat_range range; |
57 | 57 | ||
58 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; | 58 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; |
59 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; | 59 | nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 147a4370cf03..2a283397a8b6 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, | |||
191 | 191 | ||
192 | if (hooknum == NF_IP_LOCAL_OUT && | 192 | if (hooknum == NF_IP_LOCAL_OUT && |
193 | mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) | 193 | mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) |
194 | warn_if_extra_mangle((*pskb)->nh.iph->daddr, | 194 | warn_if_extra_mangle(ip_hdr(*pskb)->daddr, |
195 | mr->range[0].min_ip); | 195 | mr->range[0].min_ip); |
196 | 196 | ||
197 | return nf_nat_setup_info(ct, &mr->range[0], hooknum); | 197 | return nf_nat_setup_info(ct, &mr->range[0], hooknum); |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index b12cd7c314ca..bfd88e4e0685 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | #include <linux/ip.h> | 13 | #include <linux/ip.h> |
14 | #include <net/ip.h> | ||
14 | #include <linux/udp.h> | 15 | #include <linux/udp.h> |
15 | 16 | ||
16 | #include <net/netfilter/nf_nat.h> | 17 | #include <net/netfilter/nf_nat.h> |
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, | |||
92 | if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, | 93 | if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, |
93 | matchoff, matchlen, addr, addrlen)) | 94 | matchoff, matchlen, addr, addrlen)) |
94 | return 0; | 95 | return 0; |
95 | *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | 96 | *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); |
96 | return 1; | 97 | return 1; |
97 | 98 | ||
98 | } | 99 | } |
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, | |||
106 | struct addr_map map; | 107 | struct addr_map map; |
107 | int dataoff, datalen; | 108 | int dataoff, datalen; |
108 | 109 | ||
109 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | 110 | dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); |
110 | datalen = (*pskb)->len - dataoff; | 111 | datalen = (*pskb)->len - dataoff; |
111 | if (datalen < sizeof("SIP/2.0") - 1) | 112 | if (datalen < sizeof("SIP/2.0") - 1) |
112 | return NF_DROP; | 113 | return NF_DROP; |
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb, | |||
155 | return 0; | 156 | return 0; |
156 | 157 | ||
157 | /* We need to reload this. Thanks Patrick. */ | 158 | /* We need to reload this. Thanks Patrick. */ |
158 | *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | 159 | *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); |
159 | return 1; | 160 | return 1; |
160 | } | 161 | } |
161 | 162 | ||
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb, | |||
168 | char buffer[sizeof("65536")]; | 169 | char buffer[sizeof("65536")]; |
169 | int bufflen; | 170 | int bufflen; |
170 | 171 | ||
171 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | 172 | dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); |
172 | 173 | ||
173 | /* Get actual SDP lenght */ | 174 | /* Get actual SDP lenght */ |
174 | if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, | 175 | if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, |
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, | |||
200 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | 201 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; |
201 | unsigned int dataoff, bufflen; | 202 | unsigned int dataoff, bufflen; |
202 | 203 | ||
203 | dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); | 204 | dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); |
204 | 205 | ||
205 | /* Mangle owner and contact info. */ | 206 | /* Mangle owner and contact info. */ |
206 | bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); | 207 | bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ce5c4939a6ee..6e88505d6162 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -38,10 +38,6 @@ | |||
38 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 38 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
39 | * | 39 | * |
40 | * Author: James Morris <jmorris@intercode.com.au> | 40 | * Author: James Morris <jmorris@intercode.com.au> |
41 | * | ||
42 | * Updates: | ||
43 | * 2000-08-06: Convert to new helper API (Harald Welte). | ||
44 | * | ||
45 | */ | 41 | */ |
46 | #include <linux/module.h> | 42 | #include <linux/module.h> |
47 | #include <linux/moduleparam.h> | 43 | #include <linux/moduleparam.h> |
@@ -1194,7 +1190,7 @@ static int snmp_translate(struct nf_conn *ct, | |||
1194 | enum ip_conntrack_info ctinfo, | 1190 | enum ip_conntrack_info ctinfo, |
1195 | struct sk_buff **pskb) | 1191 | struct sk_buff **pskb) |
1196 | { | 1192 | { |
1197 | struct iphdr *iph = (*pskb)->nh.iph; | 1193 | struct iphdr *iph = ip_hdr(*pskb); |
1198 | struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); | 1194 | struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); |
1199 | u_int16_t udplen = ntohs(udph->len); | 1195 | u_int16_t udplen = ntohs(udph->len); |
1200 | u_int16_t paylen = udplen - sizeof(struct udphdr); | 1196 | u_int16_t paylen = udplen - sizeof(struct udphdr); |
@@ -1235,7 +1231,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, | |||
1235 | { | 1231 | { |
1236 | int dir = CTINFO2DIR(ctinfo); | 1232 | int dir = CTINFO2DIR(ctinfo); |
1237 | unsigned int ret; | 1233 | unsigned int ret; |
1238 | struct iphdr *iph = (*pskb)->nh.iph; | 1234 | struct iphdr *iph = ip_hdr(*pskb); |
1239 | struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); | 1235 | struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); |
1240 | 1236 | ||
1241 | /* SNMP replies and originating SNMP traps get mangled */ | 1237 | /* SNMP replies and originating SNMP traps get mangled */ |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 15aa3db8cb33..64bbed2ba780 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum, | |||
86 | 86 | ||
87 | /* We never see fragments: conntrack defrags on pre-routing | 87 | /* We never see fragments: conntrack defrags on pre-routing |
88 | and local-out, and nf_nat_out protects post-routing. */ | 88 | and local-out, and nf_nat_out protects post-routing. */ |
89 | NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off | 89 | NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); |
90 | & htons(IP_MF|IP_OFFSET))); | ||
91 | 90 | ||
92 | ct = nf_ct_get(*pskb, &ctinfo); | 91 | ct = nf_ct_get(*pskb, &ctinfo); |
93 | /* Can't track? It's not due to stress, or conntrack would | 92 | /* Can't track? It's not due to stress, or conntrack would |
@@ -98,11 +97,10 @@ nf_nat_fn(unsigned int hooknum, | |||
98 | /* Exception: ICMP redirect to new connection (not in | 97 | /* Exception: ICMP redirect to new connection (not in |
99 | hash table yet). We must not let this through, in | 98 | hash table yet). We must not let this through, in |
100 | case we're doing NAT to the same network. */ | 99 | case we're doing NAT to the same network. */ |
101 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { | 100 | if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { |
102 | struct icmphdr _hdr, *hp; | 101 | struct icmphdr _hdr, *hp; |
103 | 102 | ||
104 | hp = skb_header_pointer(*pskb, | 103 | hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), |
105 | (*pskb)->nh.iph->ihl*4, | ||
106 | sizeof(_hdr), &_hdr); | 104 | sizeof(_hdr), &_hdr); |
107 | if (hp != NULL && | 105 | if (hp != NULL && |
108 | hp->type == ICMP_REDIRECT) | 106 | hp->type == ICMP_REDIRECT) |
@@ -122,7 +120,7 @@ nf_nat_fn(unsigned int hooknum, | |||
122 | switch (ctinfo) { | 120 | switch (ctinfo) { |
123 | case IP_CT_RELATED: | 121 | case IP_CT_RELATED: |
124 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 122 | case IP_CT_RELATED+IP_CT_IS_REPLY: |
125 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { | 123 | if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { |
126 | if (!nf_nat_icmp_reply_translation(ct, ctinfo, | 124 | if (!nf_nat_icmp_reply_translation(ct, ctinfo, |
127 | hooknum, pskb)) | 125 | hooknum, pskb)) |
128 | return NF_DROP; | 126 | return NF_DROP; |
@@ -177,11 +175,11 @@ nf_nat_in(unsigned int hooknum, | |||
177 | int (*okfn)(struct sk_buff *)) | 175 | int (*okfn)(struct sk_buff *)) |
178 | { | 176 | { |
179 | unsigned int ret; | 177 | unsigned int ret; |
180 | __be32 daddr = (*pskb)->nh.iph->daddr; | 178 | __be32 daddr = ip_hdr(*pskb)->daddr; |
181 | 179 | ||
182 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); | 180 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
183 | if (ret != NF_DROP && ret != NF_STOLEN && | 181 | if (ret != NF_DROP && ret != NF_STOLEN && |
184 | daddr != (*pskb)->nh.iph->daddr) { | 182 | daddr != ip_hdr(*pskb)->daddr) { |
185 | dst_release((*pskb)->dst); | 183 | dst_release((*pskb)->dst); |
186 | (*pskb)->dst = NULL; | 184 | (*pskb)->dst = NULL; |
187 | } | 185 | } |
@@ -203,7 +201,7 @@ nf_nat_out(unsigned int hooknum, | |||
203 | 201 | ||
204 | /* root is playing with raw sockets. */ | 202 | /* root is playing with raw sockets. */ |
205 | if ((*pskb)->len < sizeof(struct iphdr) || | 203 | if ((*pskb)->len < sizeof(struct iphdr) || |
206 | (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) | 204 | ip_hdrlen(*pskb) < sizeof(struct iphdr)) |
207 | return NF_ACCEPT; | 205 | return NF_ACCEPT; |
208 | 206 | ||
209 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); | 207 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
@@ -236,7 +234,7 @@ nf_nat_local_fn(unsigned int hooknum, | |||
236 | 234 | ||
237 | /* root is playing with raw sockets. */ | 235 | /* root is playing with raw sockets. */ |
238 | if ((*pskb)->len < sizeof(struct iphdr) || | 236 | if ((*pskb)->len < sizeof(struct iphdr) || |
239 | (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) | 237 | ip_hdrlen(*pskb) < sizeof(struct iphdr)) |
240 | return NF_ACCEPT; | 238 | return NF_ACCEPT; |
241 | 239 | ||
242 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); | 240 | ret = nf_nat_fn(hooknum, pskb, in, out, okfn); |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ae68a691e8cd..37ab5802ca08 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -87,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = { | |||
87 | .release = single_release, | 87 | .release = single_release, |
88 | }; | 88 | }; |
89 | 89 | ||
90 | static unsigned long | ||
91 | fold_field(void *mib[], int offt) | ||
92 | { | ||
93 | unsigned long res = 0; | ||
94 | int i; | ||
95 | |||
96 | for_each_possible_cpu(i) { | ||
97 | res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); | ||
98 | res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); | ||
99 | } | ||
100 | return res; | ||
101 | } | ||
102 | |||
103 | /* snmp items */ | 90 | /* snmp items */ |
104 | static const struct snmp_mib snmp4_ipstats_list[] = { | 91 | static const struct snmp_mib snmp4_ipstats_list[] = { |
105 | SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES), | 92 | SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES), |
@@ -266,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
266 | 253 | ||
267 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | 254 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) |
268 | seq_printf(seq, " %lu", | 255 | seq_printf(seq, " %lu", |
269 | fold_field((void **) ip_statistics, | 256 | snmp_fold_field((void **)ip_statistics, |
270 | snmp4_ipstats_list[i].entry)); | 257 | snmp4_ipstats_list[i].entry)); |
271 | 258 | ||
272 | seq_puts(seq, "\nIcmp:"); | 259 | seq_puts(seq, "\nIcmp:"); |
273 | for (i = 0; snmp4_icmp_list[i].name != NULL; i++) | 260 | for (i = 0; snmp4_icmp_list[i].name != NULL; i++) |
@@ -276,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
276 | seq_puts(seq, "\nIcmp:"); | 263 | seq_puts(seq, "\nIcmp:"); |
277 | for (i = 0; snmp4_icmp_list[i].name != NULL; i++) | 264 | for (i = 0; snmp4_icmp_list[i].name != NULL; i++) |
278 | seq_printf(seq, " %lu", | 265 | seq_printf(seq, " %lu", |
279 | fold_field((void **) icmp_statistics, | 266 | snmp_fold_field((void **)icmp_statistics, |
280 | snmp4_icmp_list[i].entry)); | 267 | snmp4_icmp_list[i].entry)); |
281 | 268 | ||
282 | seq_puts(seq, "\nTcp:"); | 269 | seq_puts(seq, "\nTcp:"); |
283 | for (i = 0; snmp4_tcp_list[i].name != NULL; i++) | 270 | for (i = 0; snmp4_tcp_list[i].name != NULL; i++) |
@@ -288,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
288 | /* MaxConn field is signed, RFC 2012 */ | 275 | /* MaxConn field is signed, RFC 2012 */ |
289 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) | 276 | if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) |
290 | seq_printf(seq, " %ld", | 277 | seq_printf(seq, " %ld", |
291 | fold_field((void **) tcp_statistics, | 278 | snmp_fold_field((void **)tcp_statistics, |
292 | snmp4_tcp_list[i].entry)); | 279 | snmp4_tcp_list[i].entry)); |
293 | else | 280 | else |
294 | seq_printf(seq, " %lu", | 281 | seq_printf(seq, " %lu", |
295 | fold_field((void **) tcp_statistics, | 282 | snmp_fold_field((void **)tcp_statistics, |
296 | snmp4_tcp_list[i].entry)); | 283 | snmp4_tcp_list[i].entry)); |
297 | } | 284 | } |
298 | 285 | ||
299 | seq_puts(seq, "\nUdp:"); | 286 | seq_puts(seq, "\nUdp:"); |
@@ -303,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
303 | seq_puts(seq, "\nUdp:"); | 290 | seq_puts(seq, "\nUdp:"); |
304 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 291 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
305 | seq_printf(seq, " %lu", | 292 | seq_printf(seq, " %lu", |
306 | fold_field((void **) udp_statistics, | 293 | snmp_fold_field((void **)udp_statistics, |
307 | snmp4_udp_list[i].entry)); | 294 | snmp4_udp_list[i].entry)); |
308 | 295 | ||
309 | /* the UDP and UDP-Lite MIBs are the same */ | 296 | /* the UDP and UDP-Lite MIBs are the same */ |
310 | seq_puts(seq, "\nUdpLite:"); | 297 | seq_puts(seq, "\nUdpLite:"); |
@@ -314,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
314 | seq_puts(seq, "\nUdpLite:"); | 301 | seq_puts(seq, "\nUdpLite:"); |
315 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 302 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
316 | seq_printf(seq, " %lu", | 303 | seq_printf(seq, " %lu", |
317 | fold_field((void **) udplite_statistics, | 304 | snmp_fold_field((void **)udplite_statistics, |
318 | snmp4_udp_list[i].entry) ); | 305 | snmp4_udp_list[i].entry)); |
319 | 306 | ||
320 | seq_putc(seq, '\n'); | 307 | seq_putc(seq, '\n'); |
321 | return 0; | 308 | return 0; |
@@ -348,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
348 | seq_puts(seq, "\nTcpExt:"); | 335 | seq_puts(seq, "\nTcpExt:"); |
349 | for (i = 0; snmp4_net_list[i].name != NULL; i++) | 336 | for (i = 0; snmp4_net_list[i].name != NULL; i++) |
350 | seq_printf(seq, " %lu", | 337 | seq_printf(seq, " %lu", |
351 | fold_field((void **) net_statistics, | 338 | snmp_fold_field((void **)net_statistics, |
352 | snmp4_net_list[i].entry)); | 339 | snmp4_net_list[i].entry)); |
353 | 340 | ||
354 | seq_putc(seq, '\n'); | 341 | seq_putc(seq, '\n'); |
355 | return 0; | 342 | return 0; |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index da70fef82c93..971ab9356e51 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
@@ -45,7 +45,7 @@ | |||
45 | #include <net/ipip.h> | 45 | #include <net/ipip.h> |
46 | #include <linux/igmp.h> | 46 | #include <linux/igmp.h> |
47 | 47 | ||
48 | struct net_protocol *inet_protos[MAX_INET_PROTOS]; | 48 | struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; |
49 | static DEFINE_SPINLOCK(inet_proto_lock); | 49 | static DEFINE_SPINLOCK(inet_proto_lock); |
50 | 50 | ||
51 | /* | 51 | /* |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 87e9c1618100..24d7c9f31918 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) | |||
132 | if (!pskb_may_pull(skb, sizeof(struct icmphdr))) | 132 | if (!pskb_may_pull(skb, sizeof(struct icmphdr))) |
133 | return 1; | 133 | return 1; |
134 | 134 | ||
135 | type = skb->h.icmph->type; | 135 | type = icmp_hdr(skb)->type; |
136 | if (type < 32) { | 136 | if (type < 32) { |
137 | __u32 data = raw_sk(sk)->filter.data; | 137 | __u32 data = raw_sk(sk)->filter.data; |
138 | 138 | ||
@@ -184,8 +184,8 @@ out: | |||
184 | void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) | 184 | void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) |
185 | { | 185 | { |
186 | struct inet_sock *inet = inet_sk(sk); | 186 | struct inet_sock *inet = inet_sk(sk); |
187 | int type = skb->h.icmph->type; | 187 | const int type = icmp_hdr(skb)->type; |
188 | int code = skb->h.icmph->code; | 188 | const int code = icmp_hdr(skb)->code; |
189 | int err = 0; | 189 | int err = 0; |
190 | int harderr = 0; | 190 | int harderr = 0; |
191 | 191 | ||
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) | |||
256 | } | 256 | } |
257 | nf_reset(skb); | 257 | nf_reset(skb); |
258 | 258 | ||
259 | skb_push(skb, skb->data - skb->nh.raw); | 259 | skb_push(skb, skb->data - skb_network_header(skb)); |
260 | 260 | ||
261 | raw_rcv_skb(sk, skb); | 261 | raw_rcv_skb(sk, skb); |
262 | return 0; | 262 | return 0; |
@@ -291,11 +291,13 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
291 | skb->priority = sk->sk_priority; | 291 | skb->priority = sk->sk_priority; |
292 | skb->dst = dst_clone(&rt->u.dst); | 292 | skb->dst = dst_clone(&rt->u.dst); |
293 | 293 | ||
294 | skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length); | 294 | skb_reset_network_header(skb); |
295 | iph = ip_hdr(skb); | ||
296 | skb_put(skb, length); | ||
295 | 297 | ||
296 | skb->ip_summed = CHECKSUM_NONE; | 298 | skb->ip_summed = CHECKSUM_NONE; |
297 | 299 | ||
298 | skb->h.raw = skb->nh.raw; | 300 | skb->transport_header = skb->network_header; |
299 | err = memcpy_fromiovecend((void *)iph, from, 0, length); | 301 | err = memcpy_fromiovecend((void *)iph, from, 0, length); |
300 | if (err) | 302 | if (err) |
301 | goto error_fault; | 303 | goto error_fault; |
@@ -613,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
613 | /* Copy the address. */ | 615 | /* Copy the address. */ |
614 | if (sin) { | 616 | if (sin) { |
615 | sin->sin_family = AF_INET; | 617 | sin->sin_family = AF_INET; |
616 | sin->sin_addr.s_addr = skb->nh.iph->saddr; | 618 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
617 | sin->sin_port = 0; | 619 | sin->sin_port = 0; |
618 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); | 620 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); |
619 | } | 621 | } |
@@ -887,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v) | |||
887 | return 0; | 889 | return 0; |
888 | } | 890 | } |
889 | 891 | ||
890 | static struct seq_operations raw_seq_ops = { | 892 | static const struct seq_operations raw_seq_ops = { |
891 | .start = raw_seq_start, | 893 | .start = raw_seq_start, |
892 | .next = raw_seq_next, | 894 | .next = raw_seq_next, |
893 | .stop = raw_seq_stop, | 895 | .stop = raw_seq_stop, |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 37e0d4d5cf94..cb76e3c725a0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -82,7 +82,6 @@ | |||
82 | #include <linux/proc_fs.h> | 82 | #include <linux/proc_fs.h> |
83 | #include <linux/init.h> | 83 | #include <linux/init.h> |
84 | #include <linux/skbuff.h> | 84 | #include <linux/skbuff.h> |
85 | #include <linux/rtnetlink.h> | ||
86 | #include <linux/inetdevice.h> | 85 | #include <linux/inetdevice.h> |
87 | #include <linux/igmp.h> | 86 | #include <linux/igmp.h> |
88 | #include <linux/pkt_sched.h> | 87 | #include <linux/pkt_sched.h> |
@@ -104,6 +103,7 @@ | |||
104 | #include <net/xfrm.h> | 103 | #include <net/xfrm.h> |
105 | #include <net/ip_mp_alg.h> | 104 | #include <net/ip_mp_alg.h> |
106 | #include <net/netevent.h> | 105 | #include <net/netevent.h> |
106 | #include <net/rtnetlink.h> | ||
107 | #ifdef CONFIG_SYSCTL | 107 | #ifdef CONFIG_SYSCTL |
108 | #include <linux/sysctl.h> | 108 | #include <linux/sysctl.h> |
109 | #endif | 109 | #endif |
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | static struct seq_operations rt_cache_seq_ops = { | 367 | static const struct seq_operations rt_cache_seq_ops = { |
368 | .start = rt_cache_seq_start, | 368 | .start = rt_cache_seq_start, |
369 | .next = rt_cache_seq_next, | 369 | .next = rt_cache_seq_next, |
370 | .stop = rt_cache_seq_stop, | 370 | .stop = rt_cache_seq_stop, |
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) | |||
470 | return 0; | 470 | return 0; |
471 | } | 471 | } |
472 | 472 | ||
473 | static struct seq_operations rt_cpu_seq_ops = { | 473 | static const struct seq_operations rt_cpu_seq_ops = { |
474 | .start = rt_cpu_seq_start, | 474 | .start = rt_cpu_seq_start, |
475 | .next = rt_cpu_seq_next, | 475 | .next = rt_cpu_seq_next, |
476 | .stop = rt_cpu_seq_stop, | 476 | .stop = rt_cpu_seq_stop, |
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1519 | static int ip_rt_bug(struct sk_buff *skb) | 1519 | static int ip_rt_bug(struct sk_buff *skb) |
1520 | { | 1520 | { |
1521 | printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", | 1521 | printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", |
1522 | NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), | 1522 | NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), |
1523 | skb->dev ? skb->dev->name : "?"); | 1523 | skb->dev ? skb->dev->name : "?"); |
1524 | kfree_skb(skb); | 1524 | kfree_skb(skb); |
1525 | return 0; | 1525 | return 0; |
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1698 | printk(KERN_WARNING "martian source %u.%u.%u.%u from " | 1698 | printk(KERN_WARNING "martian source %u.%u.%u.%u from " |
1699 | "%u.%u.%u.%u, on dev %s\n", | 1699 | "%u.%u.%u.%u, on dev %s\n", |
1700 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | 1700 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); |
1701 | if (dev->hard_header_len && skb->mac.raw) { | 1701 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1702 | int i; | 1702 | int i; |
1703 | unsigned char *p = skb->mac.raw; | 1703 | const unsigned char *p = skb_mac_header(skb); |
1704 | printk(KERN_WARNING "ll header: "); | 1704 | printk(KERN_WARNING "ll header: "); |
1705 | for (i = 0; i < dev->hard_header_len; i++, p++) { | 1705 | for (i = 0; i < dev->hard_header_len; i++, p++) { |
1706 | printk("%02x", *p); | 1706 | printk("%02x", *p); |
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2134 | rcu_read_lock(); | 2134 | rcu_read_lock(); |
2135 | if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { | 2135 | if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { |
2136 | int our = ip_check_mc(in_dev, daddr, saddr, | 2136 | int our = ip_check_mc(in_dev, daddr, saddr, |
2137 | skb->nh.iph->protocol); | 2137 | ip_hdr(skb)->protocol); |
2138 | if (our | 2138 | if (our |
2139 | #ifdef CONFIG_IP_MROUTE | 2139 | #ifdef CONFIG_IP_MROUTE |
2140 | || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) | 2140 | || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) |
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) | |||
2396 | 2396 | ||
2397 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2397 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2398 | dev_out = ip_dev_find(oldflp->fl4_src); | 2398 | dev_out = ip_dev_find(oldflp->fl4_src); |
2399 | if (dev_out == NULL) | 2399 | if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind)) |
2400 | goto out; | 2400 | goto out; |
2401 | 2401 | ||
2402 | /* I removed check for oif == dev_out->oif here. | 2402 | /* I removed check for oif == dev_out->oif here. |
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) | |||
2407 | of another iface. --ANK | 2407 | of another iface. --ANK |
2408 | */ | 2408 | */ |
2409 | 2409 | ||
2410 | if (oldflp->oif == 0 | 2410 | if (dev_out && oldflp->oif == 0 |
2411 | && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { | 2411 | && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { |
2412 | /* Special hack: user can direct multicasts | 2412 | /* Special hack: user can direct multicasts |
2413 | and limited broadcast via necessary interface | 2413 | and limited broadcast via necessary interface |
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2683 | id = rt->peer->ip_id_count; | 2683 | id = rt->peer->ip_id_count; |
2684 | if (rt->peer->tcp_ts_stamp) { | 2684 | if (rt->peer->tcp_ts_stamp) { |
2685 | ts = rt->peer->tcp_ts; | 2685 | ts = rt->peer->tcp_ts; |
2686 | tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; | 2686 | tsage = get_seconds() - rt->peer->tcp_ts_stamp; |
2687 | } | 2687 | } |
2688 | } | 2688 | } |
2689 | 2689 | ||
@@ -2721,7 +2721,7 @@ nla_put_failure: | |||
2721 | return -EMSGSIZE; | 2721 | return -EMSGSIZE; |
2722 | } | 2722 | } |
2723 | 2723 | ||
2724 | int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 2724 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
2725 | { | 2725 | { |
2726 | struct rtmsg *rtm; | 2726 | struct rtmsg *rtm; |
2727 | struct nlattr *tb[RTA_MAX+1]; | 2727 | struct nlattr *tb[RTA_MAX+1]; |
@@ -2747,10 +2747,11 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | |||
2747 | /* Reserve room for dummy headers, this skb can pass | 2747 | /* Reserve room for dummy headers, this skb can pass |
2748 | through good chunk of routing engine. | 2748 | through good chunk of routing engine. |
2749 | */ | 2749 | */ |
2750 | skb->mac.raw = skb->nh.raw = skb->data; | 2750 | skb_reset_mac_header(skb); |
2751 | skb_reset_network_header(skb); | ||
2751 | 2752 | ||
2752 | /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ | 2753 | /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ |
2753 | skb->nh.iph->protocol = IPPROTO_ICMP; | 2754 | ip_hdr(skb)->protocol = IPPROTO_ICMP; |
2754 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); | 2755 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); |
2755 | 2756 | ||
2756 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; | 2757 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; |
@@ -3193,6 +3194,8 @@ int __init ip_rt_init(void) | |||
3193 | xfrm_init(); | 3194 | xfrm_init(); |
3194 | xfrm4_init(); | 3195 | xfrm4_init(); |
3195 | #endif | 3196 | #endif |
3197 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | ||
3198 | |||
3196 | return rc; | 3199 | return rc; |
3197 | } | 3200 | } |
3198 | 3201 | ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 33016cc90f0b..2da1be0589a9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -125,10 +125,11 @@ static __u16 const msstab[] = { | |||
125 | __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | 125 | __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) |
126 | { | 126 | { |
127 | struct tcp_sock *tp = tcp_sk(sk); | 127 | struct tcp_sock *tp = tcp_sk(sk); |
128 | const struct iphdr *iph = ip_hdr(skb); | ||
129 | const struct tcphdr *th = tcp_hdr(skb); | ||
128 | int mssind; | 130 | int mssind; |
129 | const __u16 mss = *mssp; | 131 | const __u16 mss = *mssp; |
130 | 132 | ||
131 | |||
132 | tp->last_synq_overflow = jiffies; | 133 | tp->last_synq_overflow = jiffies; |
133 | 134 | ||
134 | /* XXX sort msstab[] by probability? Binary search? */ | 135 | /* XXX sort msstab[] by probability? Binary search? */ |
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | |||
138 | 139 | ||
139 | NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); | 140 | NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); |
140 | 141 | ||
141 | return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr, | 142 | return secure_tcp_syn_cookie(iph->saddr, iph->daddr, |
142 | skb->h.th->source, skb->h.th->dest, | 143 | th->source, th->dest, ntohl(th->seq), |
143 | ntohl(skb->h.th->seq), | ||
144 | jiffies / (HZ * 60), mssind); | 144 | jiffies / (HZ * 60), mssind); |
145 | } | 145 | } |
146 | 146 | ||
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | |||
157 | */ | 157 | */ |
158 | static inline int cookie_check(struct sk_buff *skb, __u32 cookie) | 158 | static inline int cookie_check(struct sk_buff *skb, __u32 cookie) |
159 | { | 159 | { |
160 | __u32 seq; | 160 | const struct iphdr *iph = ip_hdr(skb); |
161 | __u32 mssind; | 161 | const struct tcphdr *th = tcp_hdr(skb); |
162 | 162 | __u32 seq = ntohl(th->seq) - 1; | |
163 | seq = ntohl(skb->h.th->seq)-1; | 163 | __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, |
164 | mssind = check_tcp_syn_cookie(cookie, | 164 | th->source, th->dest, seq, |
165 | skb->nh.iph->saddr, skb->nh.iph->daddr, | 165 | jiffies / (HZ * 60), |
166 | skb->h.th->source, skb->h.th->dest, | 166 | COUNTER_TRIES); |
167 | seq, jiffies / (HZ * 60), COUNTER_TRIES); | ||
168 | 167 | ||
169 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; | 168 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; |
170 | } | 169 | } |
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
191 | struct inet_request_sock *ireq; | 190 | struct inet_request_sock *ireq; |
192 | struct tcp_request_sock *treq; | 191 | struct tcp_request_sock *treq; |
193 | struct tcp_sock *tp = tcp_sk(sk); | 192 | struct tcp_sock *tp = tcp_sk(sk); |
194 | __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; | 193 | const struct tcphdr *th = tcp_hdr(skb); |
194 | __u32 cookie = ntohl(th->ack_seq) - 1; | ||
195 | struct sock *ret = sk; | 195 | struct sock *ret = sk; |
196 | struct request_sock *req; | 196 | struct request_sock *req; |
197 | int mss; | 197 | int mss; |
198 | struct rtable *rt; | 198 | struct rtable *rt; |
199 | __u8 rcv_wscale; | 199 | __u8 rcv_wscale; |
200 | 200 | ||
201 | if (!sysctl_tcp_syncookies || !skb->h.th->ack) | 201 | if (!sysctl_tcp_syncookies || !th->ack) |
202 | goto out; | 202 | goto out; |
203 | 203 | ||
204 | if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || | 204 | if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || |
@@ -220,12 +220,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
220 | } | 220 | } |
221 | ireq = inet_rsk(req); | 221 | ireq = inet_rsk(req); |
222 | treq = tcp_rsk(req); | 222 | treq = tcp_rsk(req); |
223 | treq->rcv_isn = ntohl(skb->h.th->seq) - 1; | 223 | treq->rcv_isn = ntohl(th->seq) - 1; |
224 | treq->snt_isn = cookie; | 224 | treq->snt_isn = cookie; |
225 | req->mss = mss; | 225 | req->mss = mss; |
226 | ireq->rmt_port = skb->h.th->source; | 226 | ireq->rmt_port = th->source; |
227 | ireq->loc_addr = skb->nh.iph->daddr; | 227 | ireq->loc_addr = ip_hdr(skb)->daddr; |
228 | ireq->rmt_addr = skb->nh.iph->saddr; | 228 | ireq->rmt_addr = ip_hdr(skb)->saddr; |
229 | ireq->opt = NULL; | 229 | ireq->opt = NULL; |
230 | 230 | ||
231 | /* We throwed the options of the initial SYN away, so we hope | 231 | /* We throwed the options of the initial SYN away, so we hope |
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
261 | .tos = RT_CONN_FLAGS(sk) } }, | 261 | .tos = RT_CONN_FLAGS(sk) } }, |
262 | .proto = IPPROTO_TCP, | 262 | .proto = IPPROTO_TCP, |
263 | .uli_u = { .ports = | 263 | .uli_u = { .ports = |
264 | { .sport = skb->h.th->dest, | 264 | { .sport = th->dest, |
265 | .dport = skb->h.th->source } } }; | 265 | .dport = th->source } } }; |
266 | security_req_classify_flow(req, &fl); | 266 | security_req_classify_flow(req, &fl); |
267 | if (ip_route_output_key(&rt, &fl)) { | 267 | if (ip_route_output_key(&rt, &fl)) { |
268 | reqsk_free(req); | 268 | reqsk_free(req); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0aa304711a96..6817d6485df5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = { | |||
647 | .proc_handler = &proc_dointvec | 647 | .proc_handler = &proc_dointvec |
648 | }, | 648 | }, |
649 | { | 649 | { |
650 | .ctl_name = NET_TCP_FRTO_RESPONSE, | ||
651 | .procname = "tcp_frto_response", | ||
652 | .data = &sysctl_tcp_frto_response, | ||
653 | .maxlen = sizeof(int), | ||
654 | .mode = 0644, | ||
655 | .proc_handler = &proc_dointvec | ||
656 | }, | ||
657 | { | ||
650 | .ctl_name = NET_TCP_LOW_LATENCY, | 658 | .ctl_name = NET_TCP_LOW_LATENCY, |
651 | .procname = "tcp_low_latency", | 659 | .procname = "tcp_low_latency", |
652 | .data = &sysctl_tcp_low_latency, | 660 | .data = &sysctl_tcp_low_latency, |
@@ -803,6 +811,14 @@ ctl_table ipv4_table[] = { | |||
803 | .proc_handler = &proc_allowed_congestion_control, | 811 | .proc_handler = &proc_allowed_congestion_control, |
804 | .strategy = &strategy_allowed_congestion_control, | 812 | .strategy = &strategy_allowed_congestion_control, |
805 | }, | 813 | }, |
814 | { | ||
815 | .ctl_name = NET_TCP_MAX_SSTHRESH, | ||
816 | .procname = "tcp_max_ssthresh", | ||
817 | .data = &sysctl_tcp_max_ssthresh, | ||
818 | .maxlen = sizeof(int), | ||
819 | .mode = 0644, | ||
820 | .proc_handler = &proc_dointvec, | ||
821 | }, | ||
806 | { .ctl_name = 0 } | 822 | { .ctl_name = 0 } |
807 | }; | 823 | }; |
808 | 824 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3834b10b5115..2cf9a898ce50 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -297,7 +297,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated); | |||
297 | * All the sk_stream_mem_schedule() is of this nature: accounting | 297 | * All the sk_stream_mem_schedule() is of this nature: accounting |
298 | * is strict, actions are advisory and have some latency. | 298 | * is strict, actions are advisory and have some latency. |
299 | */ | 299 | */ |
300 | int tcp_memory_pressure; | 300 | int tcp_memory_pressure __read_mostly; |
301 | 301 | ||
302 | EXPORT_SYMBOL(tcp_memory_pressure); | 302 | EXPORT_SYMBOL(tcp_memory_pressure); |
303 | 303 | ||
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
425 | /* Subtract 1, if FIN is in queue. */ | 425 | /* Subtract 1, if FIN is in queue. */ |
426 | if (answ && !skb_queue_empty(&sk->sk_receive_queue)) | 426 | if (answ && !skb_queue_empty(&sk->sk_receive_queue)) |
427 | answ -= | 427 | answ -= |
428 | ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin; | 428 | tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin; |
429 | } else | 429 | } else |
430 | answ = tp->urg_seq - tp->copied_seq; | 430 | answ = tp->urg_seq - tp->copied_seq; |
431 | release_sock(sk); | 431 | release_sock(sk); |
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
444 | break; | 444 | break; |
445 | default: | 445 | default: |
446 | return -ENOIOCTLCMD; | 446 | return -ENOIOCTLCMD; |
447 | }; | 447 | } |
448 | 448 | ||
449 | return put_user(answ, (int __user *)arg); | 449 | return put_user(answ, (int __user *)arg); |
450 | } | 450 | } |
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp) | |||
460 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); | 460 | return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); |
461 | } | 461 | } |
462 | 462 | ||
463 | static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, | 463 | static inline void skb_entail(struct sock *sk, struct sk_buff *skb) |
464 | struct sk_buff *skb) | ||
465 | { | 464 | { |
465 | struct tcp_sock *tp = tcp_sk(sk); | ||
466 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 466 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); |
467 | 467 | ||
468 | skb->csum = 0; | 468 | skb->csum = 0; |
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, | |||
470 | tcb->flags = TCPCB_FLAG_ACK; | 470 | tcb->flags = TCPCB_FLAG_ACK; |
471 | tcb->sacked = 0; | 471 | tcb->sacked = 0; |
472 | skb_header_release(skb); | 472 | skb_header_release(skb); |
473 | __skb_queue_tail(&sk->sk_write_queue, skb); | 473 | tcp_add_write_queue_tail(sk, skb); |
474 | sk_charge_skb(sk, skb); | 474 | sk_charge_skb(sk, skb); |
475 | if (!sk->sk_send_head) | ||
476 | sk->sk_send_head = skb; | ||
477 | if (tp->nonagle & TCP_NAGLE_PUSH) | 475 | if (tp->nonagle & TCP_NAGLE_PUSH) |
478 | tp->nonagle &= ~TCP_NAGLE_PUSH; | 476 | tp->nonagle &= ~TCP_NAGLE_PUSH; |
479 | } | 477 | } |
@@ -488,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, | |||
488 | } | 486 | } |
489 | } | 487 | } |
490 | 488 | ||
491 | static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags, | 489 | static inline void tcp_push(struct sock *sk, int flags, int mss_now, |
492 | int mss_now, int nonagle) | 490 | int nonagle) |
493 | { | 491 | { |
494 | if (sk->sk_send_head) { | 492 | struct tcp_sock *tp = tcp_sk(sk); |
495 | struct sk_buff *skb = sk->sk_write_queue.prev; | 493 | |
494 | if (tcp_send_head(sk)) { | ||
495 | struct sk_buff *skb = tcp_write_queue_tail(sk); | ||
496 | if (!(flags & MSG_MORE) || forced_push(tp)) | 496 | if (!(flags & MSG_MORE) || forced_push(tp)) |
497 | tcp_mark_push(tp, skb); | 497 | tcp_mark_push(tp, skb); |
498 | tcp_mark_urg(tp, flags, skb); | 498 | tcp_mark_urg(tp, flags, skb); |
499 | __tcp_push_pending_frames(sk, tp, mss_now, | 499 | __tcp_push_pending_frames(sk, mss_now, |
500 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); | 500 | (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); |
501 | } | 501 | } |
502 | } | 502 | } |
@@ -526,13 +526,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
526 | goto do_error; | 526 | goto do_error; |
527 | 527 | ||
528 | while (psize > 0) { | 528 | while (psize > 0) { |
529 | struct sk_buff *skb = sk->sk_write_queue.prev; | 529 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
530 | struct page *page = pages[poffset / PAGE_SIZE]; | 530 | struct page *page = pages[poffset / PAGE_SIZE]; |
531 | int copy, i, can_coalesce; | 531 | int copy, i, can_coalesce; |
532 | int offset = poffset % PAGE_SIZE; | 532 | int offset = poffset % PAGE_SIZE; |
533 | int size = min_t(size_t, psize, PAGE_SIZE - offset); | 533 | int size = min_t(size_t, psize, PAGE_SIZE - offset); |
534 | 534 | ||
535 | if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { | 535 | if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { |
536 | new_segment: | 536 | new_segment: |
537 | if (!sk_stream_memory_free(sk)) | 537 | if (!sk_stream_memory_free(sk)) |
538 | goto wait_for_sndbuf; | 538 | goto wait_for_sndbuf; |
@@ -542,7 +542,7 @@ new_segment: | |||
542 | if (!skb) | 542 | if (!skb) |
543 | goto wait_for_memory; | 543 | goto wait_for_memory; |
544 | 544 | ||
545 | skb_entail(sk, tp, skb); | 545 | skb_entail(sk, skb); |
546 | copy = size_goal; | 546 | copy = size_goal; |
547 | } | 547 | } |
548 | 548 | ||
@@ -588,8 +588,8 @@ new_segment: | |||
588 | 588 | ||
589 | if (forced_push(tp)) { | 589 | if (forced_push(tp)) { |
590 | tcp_mark_push(tp, skb); | 590 | tcp_mark_push(tp, skb); |
591 | __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); | 591 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); |
592 | } else if (skb == sk->sk_send_head) | 592 | } else if (skb == tcp_send_head(sk)) |
593 | tcp_push_one(sk, mss_now); | 593 | tcp_push_one(sk, mss_now); |
594 | continue; | 594 | continue; |
595 | 595 | ||
@@ -597,7 +597,7 @@ wait_for_sndbuf: | |||
597 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 597 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
598 | wait_for_memory: | 598 | wait_for_memory: |
599 | if (copied) | 599 | if (copied) |
600 | tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 600 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); |
601 | 601 | ||
602 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 602 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
603 | goto do_error; | 603 | goto do_error; |
@@ -608,7 +608,7 @@ wait_for_memory: | |||
608 | 608 | ||
609 | out: | 609 | out: |
610 | if (copied) | 610 | if (copied) |
611 | tcp_push(sk, tp, flags, mss_now, tp->nonagle); | 611 | tcp_push(sk, flags, mss_now, tp->nonagle); |
612 | return copied; | 612 | return copied; |
613 | 613 | ||
614 | do_error: | 614 | do_error: |
@@ -639,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
639 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 639 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
640 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 640 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
641 | 641 | ||
642 | static inline int select_size(struct sock *sk, struct tcp_sock *tp) | 642 | static inline int select_size(struct sock *sk) |
643 | { | 643 | { |
644 | struct tcp_sock *tp = tcp_sk(sk); | ||
644 | int tmp = tp->mss_cache; | 645 | int tmp = tp->mss_cache; |
645 | 646 | ||
646 | if (sk->sk_route_caps & NETIF_F_SG) { | 647 | if (sk->sk_route_caps & NETIF_F_SG) { |
@@ -704,9 +705,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
704 | while (seglen > 0) { | 705 | while (seglen > 0) { |
705 | int copy; | 706 | int copy; |
706 | 707 | ||
707 | skb = sk->sk_write_queue.prev; | 708 | skb = tcp_write_queue_tail(sk); |
708 | 709 | ||
709 | if (!sk->sk_send_head || | 710 | if (!tcp_send_head(sk) || |
710 | (copy = size_goal - skb->len) <= 0) { | 711 | (copy = size_goal - skb->len) <= 0) { |
711 | 712 | ||
712 | new_segment: | 713 | new_segment: |
@@ -716,7 +717,7 @@ new_segment: | |||
716 | if (!sk_stream_memory_free(sk)) | 717 | if (!sk_stream_memory_free(sk)) |
717 | goto wait_for_sndbuf; | 718 | goto wait_for_sndbuf; |
718 | 719 | ||
719 | skb = sk_stream_alloc_pskb(sk, select_size(sk, tp), | 720 | skb = sk_stream_alloc_pskb(sk, select_size(sk), |
720 | 0, sk->sk_allocation); | 721 | 0, sk->sk_allocation); |
721 | if (!skb) | 722 | if (!skb) |
722 | goto wait_for_memory; | 723 | goto wait_for_memory; |
@@ -727,7 +728,7 @@ new_segment: | |||
727 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) | 728 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) |
728 | skb->ip_summed = CHECKSUM_PARTIAL; | 729 | skb->ip_summed = CHECKSUM_PARTIAL; |
729 | 730 | ||
730 | skb_entail(sk, tp, skb); | 731 | skb_entail(sk, skb); |
731 | copy = size_goal; | 732 | copy = size_goal; |
732 | } | 733 | } |
733 | 734 | ||
@@ -832,8 +833,8 @@ new_segment: | |||
832 | 833 | ||
833 | if (forced_push(tp)) { | 834 | if (forced_push(tp)) { |
834 | tcp_mark_push(tp, skb); | 835 | tcp_mark_push(tp, skb); |
835 | __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); | 836 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); |
836 | } else if (skb == sk->sk_send_head) | 837 | } else if (skb == tcp_send_head(sk)) |
837 | tcp_push_one(sk, mss_now); | 838 | tcp_push_one(sk, mss_now); |
838 | continue; | 839 | continue; |
839 | 840 | ||
@@ -841,7 +842,7 @@ wait_for_sndbuf: | |||
841 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 842 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
842 | wait_for_memory: | 843 | wait_for_memory: |
843 | if (copied) | 844 | if (copied) |
844 | tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); | 845 | tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); |
845 | 846 | ||
846 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 847 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
847 | goto do_error; | 848 | goto do_error; |
@@ -853,16 +854,18 @@ wait_for_memory: | |||
853 | 854 | ||
854 | out: | 855 | out: |
855 | if (copied) | 856 | if (copied) |
856 | tcp_push(sk, tp, flags, mss_now, tp->nonagle); | 857 | tcp_push(sk, flags, mss_now, tp->nonagle); |
857 | TCP_CHECK_TIMER(sk); | 858 | TCP_CHECK_TIMER(sk); |
858 | release_sock(sk); | 859 | release_sock(sk); |
859 | return copied; | 860 | return copied; |
860 | 861 | ||
861 | do_fault: | 862 | do_fault: |
862 | if (!skb->len) { | 863 | if (!skb->len) { |
863 | if (sk->sk_send_head == skb) | 864 | tcp_unlink_write_queue(skb, sk); |
864 | sk->sk_send_head = NULL; | 865 | /* It is the one place in all of TCP, except connection |
865 | __skb_unlink(skb, &sk->sk_write_queue); | 866 | * reset, where we can be unlinking the send_head. |
867 | */ | ||
868 | tcp_check_send_head(sk, skb); | ||
866 | sk_stream_free_skb(sk, skb); | 869 | sk_stream_free_skb(sk, skb); |
867 | } | 870 | } |
868 | 871 | ||
@@ -1016,9 +1019,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) | |||
1016 | 1019 | ||
1017 | skb_queue_walk(&sk->sk_receive_queue, skb) { | 1020 | skb_queue_walk(&sk->sk_receive_queue, skb) { |
1018 | offset = seq - TCP_SKB_CB(skb)->seq; | 1021 | offset = seq - TCP_SKB_CB(skb)->seq; |
1019 | if (skb->h.th->syn) | 1022 | if (tcp_hdr(skb)->syn) |
1020 | offset--; | 1023 | offset--; |
1021 | if (offset < skb->len || skb->h.th->fin) { | 1024 | if (offset < skb->len || tcp_hdr(skb)->fin) { |
1022 | *off = offset; | 1025 | *off = offset; |
1023 | return skb; | 1026 | return skb; |
1024 | } | 1027 | } |
@@ -1070,7 +1073,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1070 | if (offset != skb->len) | 1073 | if (offset != skb->len) |
1071 | break; | 1074 | break; |
1072 | } | 1075 | } |
1073 | if (skb->h.th->fin) { | 1076 | if (tcp_hdr(skb)->fin) { |
1074 | sk_eat_skb(sk, skb, 0); | 1077 | sk_eat_skb(sk, skb, 0); |
1075 | ++seq; | 1078 | ++seq; |
1076 | break; | 1079 | break; |
@@ -1174,11 +1177,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1174 | break; | 1177 | break; |
1175 | } | 1178 | } |
1176 | offset = *seq - TCP_SKB_CB(skb)->seq; | 1179 | offset = *seq - TCP_SKB_CB(skb)->seq; |
1177 | if (skb->h.th->syn) | 1180 | if (tcp_hdr(skb)->syn) |
1178 | offset--; | 1181 | offset--; |
1179 | if (offset < skb->len) | 1182 | if (offset < skb->len) |
1180 | goto found_ok_skb; | 1183 | goto found_ok_skb; |
1181 | if (skb->h.th->fin) | 1184 | if (tcp_hdr(skb)->fin) |
1182 | goto found_fin_ok; | 1185 | goto found_fin_ok; |
1183 | BUG_TRAP(flags & MSG_PEEK); | 1186 | BUG_TRAP(flags & MSG_PEEK); |
1184 | skb = skb->next; | 1187 | skb = skb->next; |
@@ -1389,12 +1392,12 @@ do_prequeue: | |||
1389 | skip_copy: | 1392 | skip_copy: |
1390 | if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { | 1393 | if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { |
1391 | tp->urg_data = 0; | 1394 | tp->urg_data = 0; |
1392 | tcp_fast_path_check(sk, tp); | 1395 | tcp_fast_path_check(sk); |
1393 | } | 1396 | } |
1394 | if (used + offset < skb->len) | 1397 | if (used + offset < skb->len) |
1395 | continue; | 1398 | continue; |
1396 | 1399 | ||
1397 | if (skb->h.th->fin) | 1400 | if (tcp_hdr(skb)->fin) |
1398 | goto found_fin_ok; | 1401 | goto found_fin_ok; |
1399 | if (!(flags & MSG_PEEK)) { | 1402 | if (!(flags & MSG_PEEK)) { |
1400 | sk_eat_skb(sk, skb, copied_early); | 1403 | sk_eat_skb(sk, skb, copied_early); |
@@ -1563,7 +1566,7 @@ void tcp_close(struct sock *sk, long timeout) | |||
1563 | */ | 1566 | */ |
1564 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | 1567 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { |
1565 | u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - | 1568 | u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - |
1566 | skb->h.th->fin; | 1569 | tcp_hdr(skb)->fin; |
1567 | data_was_unread += len; | 1570 | data_was_unread += len; |
1568 | __kfree_skb(skb); | 1571 | __kfree_skb(skb); |
1569 | } | 1572 | } |
@@ -1732,7 +1735,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1732 | 1735 | ||
1733 | tcp_clear_xmit_timers(sk); | 1736 | tcp_clear_xmit_timers(sk); |
1734 | __skb_queue_purge(&sk->sk_receive_queue); | 1737 | __skb_queue_purge(&sk->sk_receive_queue); |
1735 | sk_stream_writequeue_purge(sk); | 1738 | tcp_write_queue_purge(sk); |
1736 | __skb_queue_purge(&tp->out_of_order_queue); | 1739 | __skb_queue_purge(&tp->out_of_order_queue); |
1737 | #ifdef CONFIG_NET_DMA | 1740 | #ifdef CONFIG_NET_DMA |
1738 | __skb_queue_purge(&sk->sk_async_wait_queue); | 1741 | __skb_queue_purge(&sk->sk_async_wait_queue); |
@@ -1758,7 +1761,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1758 | tcp_set_ca_state(sk, TCP_CA_Open); | 1761 | tcp_set_ca_state(sk, TCP_CA_Open); |
1759 | tcp_clear_retrans(tp); | 1762 | tcp_clear_retrans(tp); |
1760 | inet_csk_delack_init(sk); | 1763 | inet_csk_delack_init(sk); |
1761 | sk->sk_send_head = NULL; | 1764 | tcp_init_send_head(sk); |
1762 | tp->rx_opt.saw_tstamp = 0; | 1765 | tp->rx_opt.saw_tstamp = 0; |
1763 | tcp_sack_reset(&tp->rx_opt); | 1766 | tcp_sack_reset(&tp->rx_opt); |
1764 | __sk_dst_reset(sk); | 1767 | __sk_dst_reset(sk); |
@@ -1830,7 +1833,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
1830 | * for currently queued segments. | 1833 | * for currently queued segments. |
1831 | */ | 1834 | */ |
1832 | tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; | 1835 | tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; |
1833 | tcp_push_pending_frames(sk, tp); | 1836 | tcp_push_pending_frames(sk); |
1834 | } else { | 1837 | } else { |
1835 | tp->nonagle &= ~TCP_NAGLE_OFF; | 1838 | tp->nonagle &= ~TCP_NAGLE_OFF; |
1836 | } | 1839 | } |
@@ -1854,7 +1857,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
1854 | tp->nonagle &= ~TCP_NAGLE_CORK; | 1857 | tp->nonagle &= ~TCP_NAGLE_CORK; |
1855 | if (tp->nonagle&TCP_NAGLE_OFF) | 1858 | if (tp->nonagle&TCP_NAGLE_OFF) |
1856 | tp->nonagle |= TCP_NAGLE_PUSH; | 1859 | tp->nonagle |= TCP_NAGLE_PUSH; |
1857 | tcp_push_pending_frames(sk, tp); | 1860 | tcp_push_pending_frames(sk); |
1858 | } | 1861 | } |
1859 | break; | 1862 | break; |
1860 | 1863 | ||
@@ -1954,7 +1957,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
1954 | default: | 1957 | default: |
1955 | err = -ENOPROTOOPT; | 1958 | err = -ENOPROTOOPT; |
1956 | break; | 1959 | break; |
1957 | }; | 1960 | } |
1961 | |||
1958 | release_sock(sk); | 1962 | release_sock(sk); |
1959 | return err; | 1963 | return err; |
1960 | } | 1964 | } |
@@ -2124,7 +2128,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2124 | return 0; | 2128 | return 0; |
2125 | default: | 2129 | default: |
2126 | return -ENOPROTOOPT; | 2130 | return -ENOPROTOOPT; |
2127 | }; | 2131 | } |
2128 | 2132 | ||
2129 | if (put_user(len, optlen)) | 2133 | if (put_user(len, optlen)) |
2130 | return -EFAULT; | 2134 | return -EFAULT; |
@@ -2170,7 +2174,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | |||
2170 | if (!pskb_may_pull(skb, sizeof(*th))) | 2174 | if (!pskb_may_pull(skb, sizeof(*th))) |
2171 | goto out; | 2175 | goto out; |
2172 | 2176 | ||
2173 | th = skb->h.th; | 2177 | th = tcp_hdr(skb); |
2174 | thlen = th->doff * 4; | 2178 | thlen = th->doff * 4; |
2175 | if (thlen < sizeof(*th)) | 2179 | if (thlen < sizeof(*th)) |
2176 | goto out; | 2180 | goto out; |
@@ -2210,7 +2214,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | |||
2210 | delta = htonl(oldlen + (thlen + len)); | 2214 | delta = htonl(oldlen + (thlen + len)); |
2211 | 2215 | ||
2212 | skb = segs; | 2216 | skb = segs; |
2213 | th = skb->h.th; | 2217 | th = tcp_hdr(skb); |
2214 | seq = ntohl(th->seq); | 2218 | seq = ntohl(th->seq); |
2215 | 2219 | ||
2216 | do { | 2220 | do { |
@@ -2219,23 +2223,25 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | |||
2219 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + | 2223 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + |
2220 | (__force u32)delta)); | 2224 | (__force u32)delta)); |
2221 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 2225 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
2222 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, | 2226 | th->check = |
2223 | skb->csum)); | 2227 | csum_fold(csum_partial(skb_transport_header(skb), |
2228 | thlen, skb->csum)); | ||
2224 | 2229 | ||
2225 | seq += len; | 2230 | seq += len; |
2226 | skb = skb->next; | 2231 | skb = skb->next; |
2227 | th = skb->h.th; | 2232 | th = tcp_hdr(skb); |
2228 | 2233 | ||
2229 | th->seq = htonl(seq); | 2234 | th->seq = htonl(seq); |
2230 | th->cwr = 0; | 2235 | th->cwr = 0; |
2231 | } while (skb->next); | 2236 | } while (skb->next); |
2232 | 2237 | ||
2233 | delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); | 2238 | delta = htonl(oldlen + (skb->tail - skb->transport_header) + |
2239 | skb->data_len); | ||
2234 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + | 2240 | th->check = ~csum_fold((__force __wsum)((__force u32)th->check + |
2235 | (__force u32)delta)); | 2241 | (__force u32)delta)); |
2236 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 2242 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
2237 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, | 2243 | th->check = csum_fold(csum_partial(skb_transport_header(skb), |
2238 | skb->csum)); | 2244 | thlen, skb->csum)); |
2239 | 2245 | ||
2240 | out: | 2246 | out: |
2241 | return segs; | 2247 | return segs; |
@@ -2372,6 +2378,23 @@ void __tcp_put_md5sig_pool(void) | |||
2372 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); | 2378 | EXPORT_SYMBOL(__tcp_put_md5sig_pool); |
2373 | #endif | 2379 | #endif |
2374 | 2380 | ||
2381 | void tcp_done(struct sock *sk) | ||
2382 | { | ||
2383 | if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | ||
2384 | TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); | ||
2385 | |||
2386 | tcp_set_state(sk, TCP_CLOSE); | ||
2387 | tcp_clear_xmit_timers(sk); | ||
2388 | |||
2389 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
2390 | |||
2391 | if (!sock_flag(sk, SOCK_DEAD)) | ||
2392 | sk->sk_state_change(sk); | ||
2393 | else | ||
2394 | inet_csk_destroy_sock(sk); | ||
2395 | } | ||
2396 | EXPORT_SYMBOL_GPL(tcp_done); | ||
2397 | |||
2375 | extern void __skb_cb_too_small_for_tcp(int, int); | 2398 | extern void __skb_cb_too_small_for_tcp(int, int); |
2376 | extern struct tcp_congestion_ops tcp_reno; | 2399 | extern struct tcp_congestion_ops tcp_reno; |
2377 | 2400 | ||
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 5730333cd0ac..281c9f913257 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) | |||
206 | /* Track delayed acknowledgment ratio using sliding window | 206 | /* Track delayed acknowledgment ratio using sliding window |
207 | * ratio = (15*ratio + sample) / 16 | 207 | * ratio = (15*ratio + sample) / 16 |
208 | */ | 208 | */ |
209 | static void bictcp_acked(struct sock *sk, u32 cnt) | 209 | static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) |
210 | { | 210 | { |
211 | const struct inet_connection_sock *icsk = inet_csk(sk); | 211 | const struct inet_connection_sock *icsk = inet_csk(sk); |
212 | 212 | ||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 34ae3f13483a..86b26539e54b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <net/tcp.h> | 13 | #include <net/tcp.h> |
14 | 14 | ||
15 | int sysctl_tcp_max_ssthresh = 0; | ||
16 | |||
15 | static DEFINE_SPINLOCK(tcp_cong_list_lock); | 17 | static DEFINE_SPINLOCK(tcp_cong_list_lock); |
16 | static LIST_HEAD(tcp_cong_list); | 18 | static LIST_HEAD(tcp_cong_list); |
17 | 19 | ||
@@ -124,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name) | |||
124 | #endif | 126 | #endif |
125 | 127 | ||
126 | if (ca) { | 128 | if (ca) { |
127 | ca->non_restricted = 1; /* default is always allowed */ | 129 | ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ |
128 | list_move(&ca->list, &tcp_cong_list); | 130 | list_move(&ca->list, &tcp_cong_list); |
129 | ret = 0; | 131 | ret = 0; |
130 | } | 132 | } |
@@ -179,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) | |||
179 | *buf = '\0'; | 181 | *buf = '\0'; |
180 | rcu_read_lock(); | 182 | rcu_read_lock(); |
181 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 183 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
182 | if (!ca->non_restricted) | 184 | if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) |
183 | continue; | 185 | continue; |
184 | offs += snprintf(buf + offs, maxlen - offs, | 186 | offs += snprintf(buf + offs, maxlen - offs, |
185 | "%s%s", | 187 | "%s%s", |
@@ -210,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val) | |||
210 | } | 212 | } |
211 | } | 213 | } |
212 | 214 | ||
213 | /* pass 2 clear */ | 215 | /* pass 2 clear old values */ |
214 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) | 216 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) |
215 | ca->non_restricted = 0; | 217 | ca->flags &= ~TCP_CONG_NON_RESTRICTED; |
216 | 218 | ||
217 | /* pass 3 mark as allowed */ | 219 | /* pass 3 mark as allowed */ |
218 | while ((name = strsep(&val, " ")) && *name) { | 220 | while ((name = strsep(&val, " ")) && *name) { |
219 | ca = tcp_ca_find(name); | 221 | ca = tcp_ca_find(name); |
220 | WARN_ON(!ca); | 222 | WARN_ON(!ca); |
221 | if (ca) | 223 | if (ca) |
222 | ca->non_restricted = 1; | 224 | ca->flags |= TCP_CONG_NON_RESTRICTED; |
223 | } | 225 | } |
224 | out: | 226 | out: |
225 | spin_unlock(&tcp_cong_list_lock); | 227 | spin_unlock(&tcp_cong_list_lock); |
@@ -254,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
254 | if (!ca) | 256 | if (!ca) |
255 | err = -ENOENT; | 257 | err = -ENOENT; |
256 | 258 | ||
257 | else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) | 259 | else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) |
258 | err = -EPERM; | 260 | err = -EPERM; |
259 | 261 | ||
260 | else if (!try_module_get(ca->owner)) | 262 | else if (!try_module_get(ca->owner)) |
@@ -274,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
274 | 276 | ||
275 | 277 | ||
276 | /* | 278 | /* |
277 | * Linear increase during slow start | 279 | * Slow start (exponential increase) with |
280 | * RFC3742 Limited Slow Start (fast linear increase) support. | ||
278 | */ | 281 | */ |
279 | void tcp_slow_start(struct tcp_sock *tp) | 282 | void tcp_slow_start(struct tcp_sock *tp) |
280 | { | 283 | { |
284 | int cnt = 0; | ||
285 | |||
281 | if (sysctl_tcp_abc) { | 286 | if (sysctl_tcp_abc) { |
282 | /* RFC3465: Slow Start | 287 | /* RFC3465: Slow Start |
283 | * TCP sender SHOULD increase cwnd by the number of | 288 | * TCP sender SHOULD increase cwnd by the number of |
@@ -286,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp) | |||
286 | */ | 291 | */ |
287 | if (tp->bytes_acked < tp->mss_cache) | 292 | if (tp->bytes_acked < tp->mss_cache) |
288 | return; | 293 | return; |
289 | |||
290 | /* We MAY increase by 2 if discovered delayed ack */ | ||
291 | if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) { | ||
292 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
293 | tp->snd_cwnd++; | ||
294 | } | ||
295 | } | 294 | } |
295 | |||
296 | if (sysctl_tcp_max_ssthresh > 0 && | ||
297 | tp->snd_cwnd > sysctl_tcp_max_ssthresh) | ||
298 | cnt += sysctl_tcp_max_ssthresh>>1; | ||
299 | else | ||
300 | cnt += tp->snd_cwnd; | ||
301 | |||
302 | /* RFC3465: We MAY increase by 2 if discovered delayed ack */ | ||
303 | if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) | ||
304 | cnt <<= 1; | ||
296 | tp->bytes_acked = 0; | 305 | tp->bytes_acked = 0; |
297 | 306 | ||
298 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 307 | tp->snd_cwnd_cnt += cnt; |
299 | tp->snd_cwnd++; | 308 | while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { |
309 | tp->snd_cwnd_cnt -= tp->snd_cwnd; | ||
310 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
311 | tp->snd_cwnd++; | ||
312 | } | ||
300 | } | 313 | } |
301 | EXPORT_SYMBOL_GPL(tcp_slow_start); | 314 | EXPORT_SYMBOL_GPL(tcp_slow_start); |
302 | 315 | ||
@@ -358,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk) | |||
358 | EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); | 371 | EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); |
359 | 372 | ||
360 | struct tcp_congestion_ops tcp_reno = { | 373 | struct tcp_congestion_ops tcp_reno = { |
374 | .flags = TCP_CONG_NON_RESTRICTED, | ||
361 | .name = "reno", | 375 | .name = "reno", |
362 | .non_restricted = 1, | ||
363 | .owner = THIS_MODULE, | 376 | .owner = THIS_MODULE, |
364 | .ssthresh = tcp_reno_ssthresh, | 377 | .ssthresh = tcp_reno_ssthresh, |
365 | .cong_avoid = tcp_reno_cong_avoid, | 378 | .cong_avoid = tcp_reno_cong_avoid, |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 9a582fb4ef9f..14224487b16b 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.0 | 2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 |
3 | * | 3 | * |
4 | * This is from the implementation of CUBIC TCP in | 4 | * This is from the implementation of CUBIC TCP in |
5 | * Injong Rhee, Lisong Xu. | 5 | * Injong Rhee, Lisong Xu. |
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_ | |||
51 | module_param(tcp_friendliness, int, 0644); | 51 | module_param(tcp_friendliness, int, 0644); |
52 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | 52 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
53 | 53 | ||
54 | #include <asm/div64.h> | ||
55 | |||
56 | /* BIC TCP Parameters */ | 54 | /* BIC TCP Parameters */ |
57 | struct bictcp { | 55 | struct bictcp { |
58 | u32 cnt; /* increase cwnd by 1 after ACKs */ | 56 | u32 cnt; /* increase cwnd by 1 after ACKs */ |
@@ -93,50 +91,51 @@ static void bictcp_init(struct sock *sk) | |||
93 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 91 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
94 | } | 92 | } |
95 | 93 | ||
96 | /* 64bit divisor, dividend and result. dynamic precision */ | 94 | /* calculate the cubic root of x using a table lookup followed by one |
97 | static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) | 95 | * Newton-Raphson iteration. |
98 | { | 96 | * Avg err ~= 0.195% |
99 | u_int32_t d = divisor; | ||
100 | |||
101 | if (divisor > 0xffffffffULL) { | ||
102 | unsigned int shift = fls(divisor >> 32); | ||
103 | |||
104 | d = divisor >> shift; | ||
105 | dividend >>= shift; | ||
106 | } | ||
107 | |||
108 | /* avoid 64 bit division if possible */ | ||
109 | if (dividend >> 32) | ||
110 | do_div(dividend, d); | ||
111 | else | ||
112 | dividend = (uint32_t) dividend / d; | ||
113 | |||
114 | return dividend; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * calculate the cubic root of x using Newton-Raphson | ||
119 | */ | 97 | */ |
120 | static u32 cubic_root(u64 a) | 98 | static u32 cubic_root(u64 a) |
121 | { | 99 | { |
122 | u32 x, x1; | 100 | u32 x, b, shift; |
123 | 101 | /* | |
124 | /* Initial estimate is based on: | 102 | * cbrt(x) MSB values for x MSB values in [0..63]. |
125 | * cbrt(x) = exp(log(x) / 3) | 103 | * Precomputed then refined by hand - Willy Tarreau |
104 | * | ||
105 | * For x in [0..63], | ||
106 | * v = cbrt(x << 18) - 1 | ||
107 | * cbrt(x) = (v[x] + 10) >> 6 | ||
126 | */ | 108 | */ |
127 | x = 1u << (fls64(a)/3); | 109 | static const u8 v[] = { |
110 | /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, | ||
111 | /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, | ||
112 | /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, | ||
113 | /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, | ||
114 | /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, | ||
115 | /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, | ||
116 | /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, | ||
117 | /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, | ||
118 | }; | ||
119 | |||
120 | b = fls64(a); | ||
121 | if (b < 7) { | ||
122 | /* a in [0..63] */ | ||
123 | return ((u32)v[(u32)a] + 35) >> 6; | ||
124 | } | ||
125 | |||
126 | b = ((b * 84) >> 8) - 1; | ||
127 | shift = (a >> (b * 3)); | ||
128 | |||
129 | x = ((u32)(((u32)v[shift] + 10) << b)) >> 6; | ||
128 | 130 | ||
129 | /* | 131 | /* |
130 | * Iteration based on: | 132 | * Newton-Raphson iteration |
131 | * 2 | 133 | * 2 |
132 | * x = ( 2 * x + a / x ) / 3 | 134 | * x = ( 2 * x + a / x ) / 3 |
133 | * k+1 k k | 135 | * k+1 k k |
134 | */ | 136 | */ |
135 | do { | 137 | x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1))); |
136 | x1 = x; | 138 | x = ((x * 341) >> 10); |
137 | x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3; | ||
138 | } while (abs(x1 - x) > 1); | ||
139 | |||
140 | return x; | 139 | return x; |
141 | } | 140 | } |
142 | 141 | ||
@@ -215,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
215 | if (ca->delay_min > 0) { | 214 | if (ca->delay_min > 0) { |
216 | /* max increment = Smax * rtt / 0.1 */ | 215 | /* max increment = Smax * rtt / 0.1 */ |
217 | min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); | 216 | min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); |
218 | if (ca->cnt < min_cnt) | 217 | |
218 | /* use concave growth when the target is above the origin */ | ||
219 | if (ca->cnt < min_cnt && t >= ca->bic_K) | ||
219 | ca->cnt = min_cnt; | 220 | ca->cnt = min_cnt; |
220 | } | 221 | } |
221 | 222 | ||
@@ -333,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) | |||
333 | /* Track delayed acknowledgment ratio using sliding window | 334 | /* Track delayed acknowledgment ratio using sliding window |
334 | * ratio = (15*ratio + sample) / 16 | 335 | * ratio = (15*ratio + sample) / 16 |
335 | */ | 336 | */ |
336 | static void bictcp_acked(struct sock *sk, u32 cnt) | 337 | static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) |
337 | { | 338 | { |
338 | const struct inet_connection_sock *icsk = inet_csk(sk); | 339 | const struct inet_connection_sock *icsk = inet_csk(sk); |
339 | 340 | ||
@@ -401,4 +402,4 @@ module_exit(cubictcp_unregister); | |||
401 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | 402 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
402 | MODULE_LICENSE("GPL"); | 403 | MODULE_LICENSE("GPL"); |
403 | MODULE_DESCRIPTION("CUBIC TCP"); | 404 | MODULE_DESCRIPTION("CUBIC TCP"); |
404 | MODULE_VERSION("2.0"); | 405 | MODULE_VERSION("2.1"); |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 1020eb48d8d1..4ba4a7ae0a85 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk) | |||
98 | } | 98 | } |
99 | } | 99 | } |
100 | 100 | ||
101 | static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) | 101 | static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last) |
102 | { | 102 | { |
103 | const struct inet_connection_sock *icsk = inet_csk(sk); | 103 | const struct inet_connection_sock *icsk = inet_csk(sk); |
104 | const struct tcp_sock *tp = tcp_sk(sk); | 104 | const struct tcp_sock *tp = tcp_sk(sk); |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 59e691d26f64..e5be35117223 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
144 | ca->snd_cwnd_cents += odd; | 144 | ca->snd_cwnd_cents += odd; |
145 | 145 | ||
146 | /* check when fractions goes >=128 and increase cwnd by 1. */ | 146 | /* check when fractions goes >=128 and increase cwnd by 1. */ |
147 | while(ca->snd_cwnd_cents >= 128) { | 147 | while (ca->snd_cwnd_cents >= 128) { |
148 | tp->snd_cwnd++; | 148 | tp->snd_cwnd++; |
149 | ca->snd_cwnd_cents -= 128; | 149 | ca->snd_cwnd_cents -= 128; |
150 | tp->snd_cwnd_cnt = 0; | 150 | tp->snd_cwnd_cnt = 0; |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c new file mode 100644 index 000000000000..4adc47c55351 --- /dev/null +++ b/net/ipv4/tcp_illinois.c | |||
@@ -0,0 +1,356 @@ | |||
1 | /* | ||
2 | * TCP Illinois congestion control. | ||
3 | * Home page: | ||
4 | * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html | ||
5 | * | ||
6 | * The algorithm is described in: | ||
7 | * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm | ||
8 | * for High-Speed Networks" | ||
9 | * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf | ||
10 | * | ||
11 | * Implemented from description in paper and ns-2 simulation. | ||
12 | * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/inet_diag.h> | ||
18 | #include <asm/div64.h> | ||
19 | #include <net/tcp.h> | ||
20 | |||
21 | #define ALPHA_SHIFT 7 | ||
22 | #define ALPHA_SCALE (1u<<ALPHA_SHIFT) | ||
23 | #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ | ||
24 | #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ | ||
25 | #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ | ||
26 | #define U32_MAX ((u32)~0U) | ||
27 | #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ | ||
28 | |||
29 | #define BETA_SHIFT 6 | ||
30 | #define BETA_SCALE (1u<<BETA_SHIFT) | ||
31 | #define BETA_MIN (BETA_SCALE/8) /* 0.125 */ | ||
32 | #define BETA_MAX (BETA_SCALE/2) /* 0.5 */ | ||
33 | #define BETA_BASE BETA_MAX | ||
34 | |||
35 | static int win_thresh __read_mostly = 15; | ||
36 | module_param(win_thresh, int, 0); | ||
37 | MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); | ||
38 | |||
39 | static int theta __read_mostly = 5; | ||
40 | module_param(theta, int, 0); | ||
41 | MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); | ||
42 | |||
43 | /* TCP Illinois Parameters */ | ||
44 | struct illinois { | ||
45 | u64 sum_rtt; /* sum of rtt's measured within last rtt */ | ||
46 | u16 cnt_rtt; /* # of rtts measured within last rtt */ | ||
47 | u32 base_rtt; /* min of all rtt in usec */ | ||
48 | u32 max_rtt; /* max of all rtt in usec */ | ||
49 | u32 end_seq; /* right edge of current RTT */ | ||
50 | u32 alpha; /* Additive increase */ | ||
51 | u32 beta; /* Muliplicative decrease */ | ||
52 | u16 acked; /* # packets acked by current ACK */ | ||
53 | u8 rtt_above; /* average rtt has gone above threshold */ | ||
54 | u8 rtt_low; /* # of rtts measurements below threshold */ | ||
55 | }; | ||
56 | |||
57 | static void rtt_reset(struct sock *sk) | ||
58 | { | ||
59 | struct tcp_sock *tp = tcp_sk(sk); | ||
60 | struct illinois *ca = inet_csk_ca(sk); | ||
61 | |||
62 | ca->end_seq = tp->snd_nxt; | ||
63 | ca->cnt_rtt = 0; | ||
64 | ca->sum_rtt = 0; | ||
65 | |||
66 | /* TODO: age max_rtt? */ | ||
67 | } | ||
68 | |||
69 | static void tcp_illinois_init(struct sock *sk) | ||
70 | { | ||
71 | struct illinois *ca = inet_csk_ca(sk); | ||
72 | |||
73 | ca->alpha = ALPHA_MAX; | ||
74 | ca->beta = BETA_BASE; | ||
75 | ca->base_rtt = 0x7fffffff; | ||
76 | ca->max_rtt = 0; | ||
77 | |||
78 | ca->acked = 0; | ||
79 | ca->rtt_low = 0; | ||
80 | ca->rtt_above = 0; | ||
81 | |||
82 | rtt_reset(sk); | ||
83 | } | ||
84 | |||
85 | /* Measure RTT for each ack. */ | ||
86 | static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) | ||
87 | { | ||
88 | struct illinois *ca = inet_csk_ca(sk); | ||
89 | u32 rtt; | ||
90 | |||
91 | ca->acked = pkts_acked; | ||
92 | |||
93 | rtt = ktime_to_us(net_timedelta(last)); | ||
94 | |||
95 | /* ignore bogus values, this prevents wraparound in alpha math */ | ||
96 | if (rtt > RTT_MAX) | ||
97 | rtt = RTT_MAX; | ||
98 | |||
99 | /* keep track of minimum RTT seen so far */ | ||
100 | if (ca->base_rtt > rtt) | ||
101 | ca->base_rtt = rtt; | ||
102 | |||
103 | /* and max */ | ||
104 | if (ca->max_rtt < rtt) | ||
105 | ca->max_rtt = rtt; | ||
106 | |||
107 | ++ca->cnt_rtt; | ||
108 | ca->sum_rtt += rtt; | ||
109 | } | ||
110 | |||
111 | /* Maximum queuing delay */ | ||
112 | static inline u32 max_delay(const struct illinois *ca) | ||
113 | { | ||
114 | return ca->max_rtt - ca->base_rtt; | ||
115 | } | ||
116 | |||
117 | /* Average queuing delay */ | ||
118 | static inline u32 avg_delay(const struct illinois *ca) | ||
119 | { | ||
120 | u64 t = ca->sum_rtt; | ||
121 | |||
122 | do_div(t, ca->cnt_rtt); | ||
123 | return t - ca->base_rtt; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Compute value of alpha used for additive increase. | ||
128 | * If small window then use 1.0, equivalent to Reno. | ||
129 | * | ||
130 | * For larger windows, adjust based on average delay. | ||
131 | * A. If average delay is at minimum (we are uncongested), | ||
132 | * then use large alpha (10.0) to increase faster. | ||
133 | * B. If average delay is at maximum (getting congested) | ||
134 | * then use small alpha (0.3) | ||
135 | * | ||
136 | * The result is a convex window growth curve. | ||
137 | */ | ||
138 | static u32 alpha(struct illinois *ca, u32 da, u32 dm) | ||
139 | { | ||
140 | u32 d1 = dm / 100; /* Low threshold */ | ||
141 | |||
142 | if (da <= d1) { | ||
143 | /* If never got out of low delay zone, then use max */ | ||
144 | if (!ca->rtt_above) | ||
145 | return ALPHA_MAX; | ||
146 | |||
147 | /* Wait for 5 good RTT's before allowing alpha to go alpha max. | ||
148 | * This prevents one good RTT from causing sudden window increase. | ||
149 | */ | ||
150 | if (++ca->rtt_low < theta) | ||
151 | return ca->alpha; | ||
152 | |||
153 | ca->rtt_low = 0; | ||
154 | ca->rtt_above = 0; | ||
155 | return ALPHA_MAX; | ||
156 | } | ||
157 | |||
158 | ca->rtt_above = 1; | ||
159 | |||
160 | /* | ||
161 | * Based on: | ||
162 | * | ||
163 | * (dm - d1) amin amax | ||
164 | * k1 = ------------------- | ||
165 | * amax - amin | ||
166 | * | ||
167 | * (dm - d1) amin | ||
168 | * k2 = ---------------- - d1 | ||
169 | * amax - amin | ||
170 | * | ||
171 | * k1 | ||
172 | * alpha = ---------- | ||
173 | * k2 + da | ||
174 | */ | ||
175 | |||
176 | dm -= d1; | ||
177 | da -= d1; | ||
178 | return (dm * ALPHA_MAX) / | ||
179 | (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Beta used for multiplicative decrease. | ||
184 | * For small window sizes returns same value as Reno (0.5) | ||
185 | * | ||
186 | * If delay is small (10% of max) then beta = 1/8 | ||
187 | * If delay is up to 80% of max then beta = 1/2 | ||
188 | * In between is a linear function | ||
189 | */ | ||
190 | static u32 beta(u32 da, u32 dm) | ||
191 | { | ||
192 | u32 d2, d3; | ||
193 | |||
194 | d2 = dm / 10; | ||
195 | if (da <= d2) | ||
196 | return BETA_MIN; | ||
197 | |||
198 | d3 = (8 * dm) / 10; | ||
199 | if (da >= d3 || d3 <= d2) | ||
200 | return BETA_MAX; | ||
201 | |||
202 | /* | ||
203 | * Based on: | ||
204 | * | ||
205 | * bmin d3 - bmax d2 | ||
206 | * k3 = ------------------- | ||
207 | * d3 - d2 | ||
208 | * | ||
209 | * bmax - bmin | ||
210 | * k4 = ------------- | ||
211 | * d3 - d2 | ||
212 | * | ||
213 | * b = k3 + k4 da | ||
214 | */ | ||
215 | return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da) | ||
216 | / (d3 - d2); | ||
217 | } | ||
218 | |||
219 | /* Update alpha and beta values once per RTT */ | ||
220 | static void update_params(struct sock *sk) | ||
221 | { | ||
222 | struct tcp_sock *tp = tcp_sk(sk); | ||
223 | struct illinois *ca = inet_csk_ca(sk); | ||
224 | |||
225 | if (tp->snd_cwnd < win_thresh) { | ||
226 | ca->alpha = ALPHA_BASE; | ||
227 | ca->beta = BETA_BASE; | ||
228 | } else if (ca->cnt_rtt > 0) { | ||
229 | u32 dm = max_delay(ca); | ||
230 | u32 da = avg_delay(ca); | ||
231 | |||
232 | ca->alpha = alpha(ca, da, dm); | ||
233 | ca->beta = beta(da, dm); | ||
234 | } | ||
235 | |||
236 | rtt_reset(sk); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * In case of loss, reset to default values | ||
241 | */ | ||
242 | static void tcp_illinois_state(struct sock *sk, u8 new_state) | ||
243 | { | ||
244 | struct illinois *ca = inet_csk_ca(sk); | ||
245 | |||
246 | if (new_state == TCP_CA_Loss) { | ||
247 | ca->alpha = ALPHA_BASE; | ||
248 | ca->beta = BETA_BASE; | ||
249 | ca->rtt_low = 0; | ||
250 | ca->rtt_above = 0; | ||
251 | rtt_reset(sk); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Increase window in response to successful acknowledgment. | ||
257 | */ | ||
258 | static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | ||
259 | u32 in_flight, int flag) | ||
260 | { | ||
261 | struct tcp_sock *tp = tcp_sk(sk); | ||
262 | struct illinois *ca = inet_csk_ca(sk); | ||
263 | |||
264 | if (after(ack, ca->end_seq)) | ||
265 | update_params(sk); | ||
266 | |||
267 | /* RFC2861 only increase cwnd if fully utilized */ | ||
268 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
269 | return; | ||
270 | |||
271 | /* In slow start */ | ||
272 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
273 | tcp_slow_start(tp); | ||
274 | |||
275 | else { | ||
276 | u32 delta; | ||
277 | |||
278 | /* snd_cwnd_cnt is # of packets since last cwnd increment */ | ||
279 | tp->snd_cwnd_cnt += ca->acked; | ||
280 | ca->acked = 1; | ||
281 | |||
282 | /* This is close approximation of: | ||
283 | * tp->snd_cwnd += alpha/tp->snd_cwnd | ||
284 | */ | ||
285 | delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; | ||
286 | if (delta >= tp->snd_cwnd) { | ||
287 | tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, | ||
288 | (u32) tp->snd_cwnd_clamp); | ||
289 | tp->snd_cwnd_cnt = 0; | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | |||
294 | static u32 tcp_illinois_ssthresh(struct sock *sk) | ||
295 | { | ||
296 | struct tcp_sock *tp = tcp_sk(sk); | ||
297 | struct illinois *ca = inet_csk_ca(sk); | ||
298 | |||
299 | /* Multiplicative decrease */ | ||
300 | return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U); | ||
301 | } | ||
302 | |||
303 | |||
304 | /* Extract info for Tcp socket info provided via netlink. */ | ||
305 | static void tcp_illinois_info(struct sock *sk, u32 ext, | ||
306 | struct sk_buff *skb) | ||
307 | { | ||
308 | const struct illinois *ca = inet_csk_ca(sk); | ||
309 | |||
310 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | ||
311 | struct tcpvegas_info info = { | ||
312 | .tcpv_enabled = 1, | ||
313 | .tcpv_rttcnt = ca->cnt_rtt, | ||
314 | .tcpv_minrtt = ca->base_rtt, | ||
315 | }; | ||
316 | u64 t = ca->sum_rtt; | ||
317 | |||
318 | do_div(t, ca->cnt_rtt); | ||
319 | info.tcpv_rtt = t; | ||
320 | |||
321 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | static struct tcp_congestion_ops tcp_illinois = { | ||
326 | .flags = TCP_CONG_RTT_STAMP, | ||
327 | .init = tcp_illinois_init, | ||
328 | .ssthresh = tcp_illinois_ssthresh, | ||
329 | .min_cwnd = tcp_reno_min_cwnd, | ||
330 | .cong_avoid = tcp_illinois_cong_avoid, | ||
331 | .set_state = tcp_illinois_state, | ||
332 | .get_info = tcp_illinois_info, | ||
333 | .pkts_acked = tcp_illinois_acked, | ||
334 | |||
335 | .owner = THIS_MODULE, | ||
336 | .name = "illinois", | ||
337 | }; | ||
338 | |||
339 | static int __init tcp_illinois_register(void) | ||
340 | { | ||
341 | BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); | ||
342 | return tcp_register_congestion_control(&tcp_illinois); | ||
343 | } | ||
344 | |||
345 | static void __exit tcp_illinois_unregister(void) | ||
346 | { | ||
347 | tcp_unregister_congestion_control(&tcp_illinois); | ||
348 | } | ||
349 | |||
350 | module_init(tcp_illinois_register); | ||
351 | module_exit(tcp_illinois_unregister); | ||
352 | |||
353 | MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); | ||
354 | MODULE_LICENSE("GPL"); | ||
355 | MODULE_DESCRIPTION("TCP Illinois"); | ||
356 | MODULE_VERSION("1.0"); | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1a14191687ac..051f0f815f17 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly; | |||
86 | int sysctl_tcp_rfc1337 __read_mostly; | 86 | int sysctl_tcp_rfc1337 __read_mostly; |
87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; | 87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
88 | int sysctl_tcp_frto __read_mostly; | 88 | int sysctl_tcp_frto __read_mostly; |
89 | int sysctl_tcp_frto_response __read_mostly; | ||
89 | int sysctl_tcp_nometrics_save __read_mostly; | 90 | int sysctl_tcp_nometrics_save __read_mostly; |
90 | 91 | ||
91 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 92 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
@@ -100,6 +101,7 @@ int sysctl_tcp_abc __read_mostly; | |||
100 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 101 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
101 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | 102 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ |
102 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 103 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
104 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | ||
103 | 105 | ||
104 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) | 106 | #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) |
105 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) | 107 | #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) |
@@ -110,6 +112,8 @@ int sysctl_tcp_abc __read_mostly; | |||
110 | #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) | 112 | #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) |
111 | #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) | 113 | #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) |
112 | 114 | ||
115 | #define IsSackFrto() (sysctl_tcp_frto == 0x2) | ||
116 | |||
113 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) | 117 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) |
114 | 118 | ||
115 | /* Adapt the MSS value used to make delayed ack decision to the | 119 | /* Adapt the MSS value used to make delayed ack decision to the |
@@ -136,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
136 | * | 140 | * |
137 | * "len" is invariant segment length, including TCP header. | 141 | * "len" is invariant segment length, including TCP header. |
138 | */ | 142 | */ |
139 | len += skb->data - skb->h.raw; | 143 | len += skb->data - skb_transport_header(skb); |
140 | if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || | 144 | if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || |
141 | /* If PSH is not set, packet should be | 145 | /* If PSH is not set, packet should be |
142 | * full sized, provided peer TCP is not badly broken. | 146 | * full sized, provided peer TCP is not badly broken. |
@@ -144,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
144 | * to handle super-low mtu links fairly. | 148 | * to handle super-low mtu links fairly. |
145 | */ | 149 | */ |
146 | (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && | 150 | (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && |
147 | !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) { | 151 | !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) { |
148 | /* Subtract also invariant (if peer is RFC compliant), | 152 | /* Subtract also invariant (if peer is RFC compliant), |
149 | * tcp header plus fixed timestamp option length. | 153 | * tcp header plus fixed timestamp option length. |
150 | * Resulting "len" is MSS free of SACK jitter. | 154 | * Resulting "len" is MSS free of SACK jitter. |
@@ -231,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
231 | */ | 235 | */ |
232 | 236 | ||
233 | /* Slow part of check#2. */ | 237 | /* Slow part of check#2. */ |
234 | static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | 238 | static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) |
235 | const struct sk_buff *skb) | ||
236 | { | 239 | { |
240 | struct tcp_sock *tp = tcp_sk(sk); | ||
237 | /* Optimize this! */ | 241 | /* Optimize this! */ |
238 | int truesize = tcp_win_from_space(skb->truesize)/2; | 242 | int truesize = tcp_win_from_space(skb->truesize)/2; |
239 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; | 243 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; |
@@ -248,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
248 | return 0; | 252 | return 0; |
249 | } | 253 | } |
250 | 254 | ||
251 | static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | 255 | static void tcp_grow_window(struct sock *sk, |
252 | struct sk_buff *skb) | 256 | struct sk_buff *skb) |
253 | { | 257 | { |
258 | struct tcp_sock *tp = tcp_sk(sk); | ||
259 | |||
254 | /* Check #1 */ | 260 | /* Check #1 */ |
255 | if (tp->rcv_ssthresh < tp->window_clamp && | 261 | if (tp->rcv_ssthresh < tp->window_clamp && |
256 | (int)tp->rcv_ssthresh < tcp_space(sk) && | 262 | (int)tp->rcv_ssthresh < tcp_space(sk) && |
@@ -263,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | |||
263 | if (tcp_win_from_space(skb->truesize) <= skb->len) | 269 | if (tcp_win_from_space(skb->truesize) <= skb->len) |
264 | incr = 2*tp->advmss; | 270 | incr = 2*tp->advmss; |
265 | else | 271 | else |
266 | incr = __tcp_grow_window(sk, tp, skb); | 272 | incr = __tcp_grow_window(sk, skb); |
267 | 273 | ||
268 | if (incr) { | 274 | if (incr) { |
269 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); | 275 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); |
@@ -326,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
326 | } | 332 | } |
327 | 333 | ||
328 | /* 5. Recalculate window clamp after socket hit its memory bounds. */ | 334 | /* 5. Recalculate window clamp after socket hit its memory bounds. */ |
329 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 335 | static void tcp_clamp_window(struct sock *sk) |
330 | { | 336 | { |
337 | struct tcp_sock *tp = tcp_sk(sk); | ||
331 | struct inet_connection_sock *icsk = inet_csk(sk); | 338 | struct inet_connection_sock *icsk = inet_csk(sk); |
332 | 339 | ||
333 | icsk->icsk_ack.quick = 0; | 340 | icsk->icsk_ack.quick = 0; |
@@ -499,8 +506,9 @@ new_measure: | |||
499 | * each ACK we send, he increments snd_cwnd and transmits more of his | 506 | * each ACK we send, he increments snd_cwnd and transmits more of his |
500 | * queue. -DaveM | 507 | * queue. -DaveM |
501 | */ | 508 | */ |
502 | static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) | 509 | static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) |
503 | { | 510 | { |
511 | struct tcp_sock *tp = tcp_sk(sk); | ||
504 | struct inet_connection_sock *icsk = inet_csk(sk); | 512 | struct inet_connection_sock *icsk = inet_csk(sk); |
505 | u32 now; | 513 | u32 now; |
506 | 514 | ||
@@ -541,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
541 | TCP_ECN_check_ce(tp, skb); | 549 | TCP_ECN_check_ce(tp, skb); |
542 | 550 | ||
543 | if (skb->len >= 128) | 551 | if (skb->len >= 128) |
544 | tcp_grow_window(sk, tp, skb); | 552 | tcp_grow_window(sk, skb); |
545 | } | 553 | } |
546 | 554 | ||
547 | /* Called to compute a smoothed rtt estimate. The data fed to this | 555 | /* Called to compute a smoothed rtt estimate. The data fed to this |
@@ -574,7 +582,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
574 | * does not matter how to _calculate_ it. Seems, it was trap | 582 | * does not matter how to _calculate_ it. Seems, it was trap |
575 | * that VJ failed to avoid. 8) | 583 | * that VJ failed to avoid. 8) |
576 | */ | 584 | */ |
577 | if(m == 0) | 585 | if (m == 0) |
578 | m = 1; | 586 | m = 1; |
579 | if (tp->srtt != 0) { | 587 | if (tp->srtt != 0) { |
580 | m -= (tp->srtt >> 3); /* m is now error in rtt est */ | 588 | m -= (tp->srtt >> 3); /* m is now error in rtt est */ |
@@ -759,15 +767,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
759 | } | 767 | } |
760 | 768 | ||
761 | /* Set slow start threshold and cwnd not falling to slow start */ | 769 | /* Set slow start threshold and cwnd not falling to slow start */ |
762 | void tcp_enter_cwr(struct sock *sk) | 770 | void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) |
763 | { | 771 | { |
764 | struct tcp_sock *tp = tcp_sk(sk); | 772 | struct tcp_sock *tp = tcp_sk(sk); |
773 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
765 | 774 | ||
766 | tp->prior_ssthresh = 0; | 775 | tp->prior_ssthresh = 0; |
767 | tp->bytes_acked = 0; | 776 | tp->bytes_acked = 0; |
768 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | 777 | if (icsk->icsk_ca_state < TCP_CA_CWR) { |
769 | tp->undo_marker = 0; | 778 | tp->undo_marker = 0; |
770 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | 779 | if (set_ssthresh) |
780 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
771 | tp->snd_cwnd = min(tp->snd_cwnd, | 781 | tp->snd_cwnd = min(tp->snd_cwnd, |
772 | tcp_packets_in_flight(tp) + 1U); | 782 | tcp_packets_in_flight(tp) + 1U); |
773 | tp->snd_cwnd_cnt = 0; | 783 | tp->snd_cwnd_cnt = 0; |
@@ -934,7 +944,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
934 | { | 944 | { |
935 | const struct inet_connection_sock *icsk = inet_csk(sk); | 945 | const struct inet_connection_sock *icsk = inet_csk(sk); |
936 | struct tcp_sock *tp = tcp_sk(sk); | 946 | struct tcp_sock *tp = tcp_sk(sk); |
937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; | 947 | unsigned char *ptr = (skb_transport_header(ack_skb) + |
948 | TCP_SKB_CB(ack_skb)->sacked); | ||
938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); | 949 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); |
939 | struct sk_buff *cached_skb; | 950 | struct sk_buff *cached_skb; |
940 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; | 951 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; |
@@ -1038,7 +1049,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1038 | cached_skb = tp->fastpath_skb_hint; | 1049 | cached_skb = tp->fastpath_skb_hint; |
1039 | cached_fack_count = tp->fastpath_cnt_hint; | 1050 | cached_fack_count = tp->fastpath_cnt_hint; |
1040 | if (!cached_skb) { | 1051 | if (!cached_skb) { |
1041 | cached_skb = sk->sk_write_queue.next; | 1052 | cached_skb = tcp_write_queue_head(sk); |
1042 | cached_fack_count = 0; | 1053 | cached_fack_count = 0; |
1043 | } | 1054 | } |
1044 | 1055 | ||
@@ -1055,10 +1066,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1055 | if (after(end_seq, tp->high_seq)) | 1066 | if (after(end_seq, tp->high_seq)) |
1056 | flag |= FLAG_DATA_LOST; | 1067 | flag |= FLAG_DATA_LOST; |
1057 | 1068 | ||
1058 | sk_stream_for_retrans_queue_from(skb, sk) { | 1069 | tcp_for_write_queue_from(skb, sk) { |
1059 | int in_sack, pcount; | 1070 | int in_sack, pcount; |
1060 | u8 sacked; | 1071 | u8 sacked; |
1061 | 1072 | ||
1073 | if (skb == tcp_send_head(sk)) | ||
1074 | break; | ||
1075 | |||
1062 | cached_skb = skb; | 1076 | cached_skb = skb; |
1063 | cached_fack_count = fack_count; | 1077 | cached_fack_count = fack_count; |
1064 | if (i == first_sack_index) { | 1078 | if (i == first_sack_index) { |
@@ -1159,6 +1173,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1159 | /* clear lost hint */ | 1173 | /* clear lost hint */ |
1160 | tp->retransmit_skb_hint = NULL; | 1174 | tp->retransmit_skb_hint = NULL; |
1161 | } | 1175 | } |
1176 | /* SACK enhanced F-RTO detection. | ||
1177 | * Set flag if and only if non-rexmitted | ||
1178 | * segments below frto_highmark are | ||
1179 | * SACKed (RFC4138; Appendix B). | ||
1180 | * Clearing correct due to in-order walk | ||
1181 | */ | ||
1182 | if (after(end_seq, tp->frto_highmark)) { | ||
1183 | flag &= ~FLAG_ONLY_ORIG_SACKED; | ||
1184 | } else { | ||
1185 | if (!(sacked & TCPCB_RETRANS)) | ||
1186 | flag |= FLAG_ONLY_ORIG_SACKED; | ||
1187 | } | ||
1162 | } | 1188 | } |
1163 | 1189 | ||
1164 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; | 1190 | TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; |
@@ -1195,7 +1221,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1195 | if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { | 1221 | if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { |
1196 | struct sk_buff *skb; | 1222 | struct sk_buff *skb; |
1197 | 1223 | ||
1198 | sk_stream_for_retrans_queue(skb, sk) { | 1224 | tcp_for_write_queue(skb, sk) { |
1225 | if (skb == tcp_send_head(sk)) | ||
1226 | break; | ||
1199 | if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) | 1227 | if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) |
1200 | break; | 1228 | break; |
1201 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) | 1229 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) |
@@ -1224,7 +1252,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1224 | 1252 | ||
1225 | tp->left_out = tp->sacked_out + tp->lost_out; | 1253 | tp->left_out = tp->sacked_out + tp->lost_out; |
1226 | 1254 | ||
1227 | if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) | 1255 | if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && |
1256 | (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) | ||
1228 | tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); | 1257 | tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); |
1229 | 1258 | ||
1230 | #if FASTRETRANS_DEBUG > 0 | 1259 | #if FASTRETRANS_DEBUG > 0 |
@@ -1236,9 +1265,54 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1236 | return flag; | 1265 | return flag; |
1237 | } | 1266 | } |
1238 | 1267 | ||
1239 | /* RTO occurred, but do not yet enter loss state. Instead, transmit two new | 1268 | /* F-RTO can only be used if these conditions are satisfied: |
1240 | * segments to see from the next ACKs whether any data was really missing. | 1269 | * - there must be some unsent new data |
1241 | * If the RTO was spurious, new ACKs should arrive. | 1270 | * - the advertised window should allow sending it |
1271 | * - TCP has never retransmitted anything other than head (SACK enhanced | ||
1272 | * variant from Appendix B of RFC4138 is more robust here) | ||
1273 | */ | ||
1274 | int tcp_use_frto(struct sock *sk) | ||
1275 | { | ||
1276 | const struct tcp_sock *tp = tcp_sk(sk); | ||
1277 | struct sk_buff *skb; | ||
1278 | |||
1279 | if (!sysctl_tcp_frto || !tcp_send_head(sk) || | ||
1280 | after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, | ||
1281 | tp->snd_una + tp->snd_wnd)) | ||
1282 | return 0; | ||
1283 | |||
1284 | if (IsSackFrto()) | ||
1285 | return 1; | ||
1286 | |||
1287 | /* Avoid expensive walking of rexmit queue if possible */ | ||
1288 | if (tp->retrans_out > 1) | ||
1289 | return 0; | ||
1290 | |||
1291 | skb = tcp_write_queue_head(sk); | ||
1292 | skb = tcp_write_queue_next(sk, skb); /* Skips head */ | ||
1293 | tcp_for_write_queue_from(skb, sk) { | ||
1294 | if (skb == tcp_send_head(sk)) | ||
1295 | break; | ||
1296 | if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) | ||
1297 | return 0; | ||
1298 | /* Short-circuit when first non-SACKed skb has been checked */ | ||
1299 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) | ||
1300 | break; | ||
1301 | } | ||
1302 | return 1; | ||
1303 | } | ||
1304 | |||
1305 | /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO | ||
1306 | * recovery a bit and use heuristics in tcp_process_frto() to detect if | ||
1307 | * the RTO was spurious. Only clear SACKED_RETRANS of the head here to | ||
1308 | * keep retrans_out counting accurate (with SACK F-RTO, other than head | ||
1309 | * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS | ||
1310 | * bits are handled if the Loss state is really to be entered (in | ||
1311 | * tcp_enter_frto_loss). | ||
1312 | * | ||
1313 | * Do like tcp_enter_loss() would; when RTO expires the second time it | ||
1314 | * does: | ||
1315 | * "Reduce ssthresh if it has not yet been made inside this window." | ||
1242 | */ | 1316 | */ |
1243 | void tcp_enter_frto(struct sock *sk) | 1317 | void tcp_enter_frto(struct sock *sk) |
1244 | { | 1318 | { |
@@ -1246,39 +1320,69 @@ void tcp_enter_frto(struct sock *sk) | |||
1246 | struct tcp_sock *tp = tcp_sk(sk); | 1320 | struct tcp_sock *tp = tcp_sk(sk); |
1247 | struct sk_buff *skb; | 1321 | struct sk_buff *skb; |
1248 | 1322 | ||
1249 | tp->frto_counter = 1; | 1323 | if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || |
1250 | |||
1251 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || | ||
1252 | tp->snd_una == tp->high_seq || | 1324 | tp->snd_una == tp->high_seq || |
1253 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { | 1325 | ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && |
1326 | !icsk->icsk_retransmits)) { | ||
1254 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 1327 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1255 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | 1328 | /* Our state is too optimistic in ssthresh() call because cwnd |
1329 | * is not reduced until tcp_enter_frto_loss() when previous FRTO | ||
1330 | * recovery has not yet completed. Pattern would be this: RTO, | ||
1331 | * Cumulative ACK, RTO (2xRTO for the same segment does not end | ||
1332 | * up here twice). | ||
1333 | * RFC4138 should be more specific on what to do, even though | ||
1334 | * RTO is quite unlikely to occur after the first Cumulative ACK | ||
1335 | * due to back-off and complexity of triggering events ... | ||
1336 | */ | ||
1337 | if (tp->frto_counter) { | ||
1338 | u32 stored_cwnd; | ||
1339 | stored_cwnd = tp->snd_cwnd; | ||
1340 | tp->snd_cwnd = 2; | ||
1341 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
1342 | tp->snd_cwnd = stored_cwnd; | ||
1343 | } else { | ||
1344 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | ||
1345 | } | ||
1346 | /* ... in theory, cong.control module could do "any tricks" in | ||
1347 | * ssthresh(), which means that ca_state, lost bits and lost_out | ||
1348 | * counter would have to be faked before the call occurs. We | ||
1349 | * consider that too expensive, unlikely and hacky, so modules | ||
1350 | * using these in ssthresh() must deal these incompatibility | ||
1351 | * issues if they receives CA_EVENT_FRTO and frto_counter != 0 | ||
1352 | */ | ||
1256 | tcp_ca_event(sk, CA_EVENT_FRTO); | 1353 | tcp_ca_event(sk, CA_EVENT_FRTO); |
1257 | } | 1354 | } |
1258 | 1355 | ||
1259 | /* Have to clear retransmission markers here to keep the bookkeeping | ||
1260 | * in shape, even though we are not yet in Loss state. | ||
1261 | * If something was really lost, it is eventually caught up | ||
1262 | * in tcp_enter_frto_loss. | ||
1263 | */ | ||
1264 | tp->retrans_out = 0; | ||
1265 | tp->undo_marker = tp->snd_una; | 1356 | tp->undo_marker = tp->snd_una; |
1266 | tp->undo_retrans = 0; | 1357 | tp->undo_retrans = 0; |
1267 | 1358 | ||
1268 | sk_stream_for_retrans_queue(skb, sk) { | 1359 | skb = tcp_write_queue_head(sk); |
1269 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS; | 1360 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { |
1361 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | ||
1362 | tp->retrans_out -= tcp_skb_pcount(skb); | ||
1270 | } | 1363 | } |
1271 | tcp_sync_left_out(tp); | 1364 | tcp_sync_left_out(tp); |
1272 | 1365 | ||
1273 | tcp_set_ca_state(sk, TCP_CA_Open); | 1366 | /* Earlier loss recovery underway (see RFC4138; Appendix B). |
1274 | tp->frto_highmark = tp->snd_nxt; | 1367 | * The last condition is necessary at least in tp->frto_counter case. |
1368 | */ | ||
1369 | if (IsSackFrto() && (tp->frto_counter || | ||
1370 | ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && | ||
1371 | after(tp->high_seq, tp->snd_una)) { | ||
1372 | tp->frto_highmark = tp->high_seq; | ||
1373 | } else { | ||
1374 | tp->frto_highmark = tp->snd_nxt; | ||
1375 | } | ||
1376 | tcp_set_ca_state(sk, TCP_CA_Disorder); | ||
1377 | tp->high_seq = tp->snd_nxt; | ||
1378 | tp->frto_counter = 1; | ||
1275 | } | 1379 | } |
1276 | 1380 | ||
1277 | /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, | 1381 | /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, |
1278 | * which indicates that we should follow the traditional RTO recovery, | 1382 | * which indicates that we should follow the traditional RTO recovery, |
1279 | * i.e. mark everything lost and do go-back-N retransmission. | 1383 | * i.e. mark everything lost and do go-back-N retransmission. |
1280 | */ | 1384 | */ |
1281 | static void tcp_enter_frto_loss(struct sock *sk) | 1385 | static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) |
1282 | { | 1386 | { |
1283 | struct tcp_sock *tp = tcp_sk(sk); | 1387 | struct tcp_sock *tp = tcp_sk(sk); |
1284 | struct sk_buff *skb; | 1388 | struct sk_buff *skb; |
@@ -1287,10 +1391,23 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1287 | tp->sacked_out = 0; | 1391 | tp->sacked_out = 0; |
1288 | tp->lost_out = 0; | 1392 | tp->lost_out = 0; |
1289 | tp->fackets_out = 0; | 1393 | tp->fackets_out = 0; |
1394 | tp->retrans_out = 0; | ||
1290 | 1395 | ||
1291 | sk_stream_for_retrans_queue(skb, sk) { | 1396 | tcp_for_write_queue(skb, sk) { |
1397 | if (skb == tcp_send_head(sk)) | ||
1398 | break; | ||
1292 | cnt += tcp_skb_pcount(skb); | 1399 | cnt += tcp_skb_pcount(skb); |
1293 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1400 | /* |
1401 | * Count the retransmission made on RTO correctly (only when | ||
1402 | * waiting for the first ACK and did not get it)... | ||
1403 | */ | ||
1404 | if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { | ||
1405 | tp->retrans_out += tcp_skb_pcount(skb); | ||
1406 | /* ...enter this if branch just for the first segment */ | ||
1407 | flag |= FLAG_DATA_ACKED; | ||
1408 | } else { | ||
1409 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | ||
1410 | } | ||
1294 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { | 1411 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { |
1295 | 1412 | ||
1296 | /* Do not mark those segments lost that were | 1413 | /* Do not mark those segments lost that were |
@@ -1308,7 +1425,7 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1308 | } | 1425 | } |
1309 | tcp_sync_left_out(tp); | 1426 | tcp_sync_left_out(tp); |
1310 | 1427 | ||
1311 | tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1; | 1428 | tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; |
1312 | tp->snd_cwnd_cnt = 0; | 1429 | tp->snd_cwnd_cnt = 0; |
1313 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1430 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1314 | tp->undo_marker = 0; | 1431 | tp->undo_marker = 0; |
@@ -1366,7 +1483,9 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1366 | if (!how) | 1483 | if (!how) |
1367 | tp->undo_marker = tp->snd_una; | 1484 | tp->undo_marker = tp->snd_una; |
1368 | 1485 | ||
1369 | sk_stream_for_retrans_queue(skb, sk) { | 1486 | tcp_for_write_queue(skb, sk) { |
1487 | if (skb == tcp_send_head(sk)) | ||
1488 | break; | ||
1370 | cnt += tcp_skb_pcount(skb); | 1489 | cnt += tcp_skb_pcount(skb); |
1371 | if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) | 1490 | if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) |
1372 | tp->undo_marker = 0; | 1491 | tp->undo_marker = 0; |
@@ -1401,14 +1520,14 @@ static int tcp_check_sack_reneging(struct sock *sk) | |||
1401 | * receiver _host_ is heavily congested (or buggy). | 1520 | * receiver _host_ is heavily congested (or buggy). |
1402 | * Do processing similar to RTO timeout. | 1521 | * Do processing similar to RTO timeout. |
1403 | */ | 1522 | */ |
1404 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && | 1523 | if ((skb = tcp_write_queue_head(sk)) != NULL && |
1405 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | 1524 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { |
1406 | struct inet_connection_sock *icsk = inet_csk(sk); | 1525 | struct inet_connection_sock *icsk = inet_csk(sk); |
1407 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); | 1526 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); |
1408 | 1527 | ||
1409 | tcp_enter_loss(sk, 1); | 1528 | tcp_enter_loss(sk, 1); |
1410 | icsk->icsk_retransmits++; | 1529 | icsk->icsk_retransmits++; |
1411 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); | 1530 | tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); |
1412 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1531 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1413 | icsk->icsk_rto, TCP_RTO_MAX); | 1532 | icsk->icsk_rto, TCP_RTO_MAX); |
1414 | return 1; | 1533 | return 1; |
@@ -1426,10 +1545,12 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) | |||
1426 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); | 1545 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); |
1427 | } | 1546 | } |
1428 | 1547 | ||
1429 | static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) | 1548 | static inline int tcp_head_timedout(struct sock *sk) |
1430 | { | 1549 | { |
1550 | struct tcp_sock *tp = tcp_sk(sk); | ||
1551 | |||
1431 | return tp->packets_out && | 1552 | return tp->packets_out && |
1432 | tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); | 1553 | tcp_skb_timedout(sk, tcp_write_queue_head(sk)); |
1433 | } | 1554 | } |
1434 | 1555 | ||
1435 | /* Linux NewReno/SACK/FACK/ECN state machine. | 1556 | /* Linux NewReno/SACK/FACK/ECN state machine. |
@@ -1525,10 +1646,15 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) | |||
1525 | * Main question: may we further continue forward transmission | 1646 | * Main question: may we further continue forward transmission |
1526 | * with the same cwnd? | 1647 | * with the same cwnd? |
1527 | */ | 1648 | */ |
1528 | static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) | 1649 | static int tcp_time_to_recover(struct sock *sk) |
1529 | { | 1650 | { |
1651 | struct tcp_sock *tp = tcp_sk(sk); | ||
1530 | __u32 packets_out; | 1652 | __u32 packets_out; |
1531 | 1653 | ||
1654 | /* Do not perform any recovery during FRTO algorithm */ | ||
1655 | if (tp->frto_counter) | ||
1656 | return 0; | ||
1657 | |||
1532 | /* Trick#1: The loss is proven. */ | 1658 | /* Trick#1: The loss is proven. */ |
1533 | if (tp->lost_out) | 1659 | if (tp->lost_out) |
1534 | return 1; | 1660 | return 1; |
@@ -1540,7 +1666,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) | |||
1540 | /* Trick#3 : when we use RFC2988 timer restart, fast | 1666 | /* Trick#3 : when we use RFC2988 timer restart, fast |
1541 | * retransmit can be triggered by timeout of queue head. | 1667 | * retransmit can be triggered by timeout of queue head. |
1542 | */ | 1668 | */ |
1543 | if (tcp_head_timedout(sk, tp)) | 1669 | if (tcp_head_timedout(sk)) |
1544 | return 1; | 1670 | return 1; |
1545 | 1671 | ||
1546 | /* Trick#4: It is still not OK... But will it be useful to delay | 1672 | /* Trick#4: It is still not OK... But will it be useful to delay |
@@ -1549,7 +1675,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) | |||
1549 | packets_out = tp->packets_out; | 1675 | packets_out = tp->packets_out; |
1550 | if (packets_out <= tp->reordering && | 1676 | if (packets_out <= tp->reordering && |
1551 | tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && | 1677 | tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && |
1552 | !tcp_may_send_now(sk, tp)) { | 1678 | !tcp_may_send_now(sk)) { |
1553 | /* We have nothing to send. This connection is limited | 1679 | /* We have nothing to send. This connection is limited |
1554 | * either by receiver window or by application. | 1680 | * either by receiver window or by application. |
1555 | */ | 1681 | */ |
@@ -1589,8 +1715,10 @@ static void tcp_add_reno_sack(struct sock *sk) | |||
1589 | 1715 | ||
1590 | /* Account for ACK, ACKing some data in Reno Recovery phase. */ | 1716 | /* Account for ACK, ACKing some data in Reno Recovery phase. */ |
1591 | 1717 | ||
1592 | static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked) | 1718 | static void tcp_remove_reno_sacks(struct sock *sk, int acked) |
1593 | { | 1719 | { |
1720 | struct tcp_sock *tp = tcp_sk(sk); | ||
1721 | |||
1594 | if (acked > 0) { | 1722 | if (acked > 0) { |
1595 | /* One ACK acked hole. The rest eat duplicate ACKs. */ | 1723 | /* One ACK acked hole. The rest eat duplicate ACKs. */ |
1596 | if (acked-1 >= tp->sacked_out) | 1724 | if (acked-1 >= tp->sacked_out) |
@@ -1609,9 +1737,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) | |||
1609 | } | 1737 | } |
1610 | 1738 | ||
1611 | /* Mark head of queue up as lost. */ | 1739 | /* Mark head of queue up as lost. */ |
1612 | static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | 1740 | static void tcp_mark_head_lost(struct sock *sk, |
1613 | int packets, u32 high_seq) | 1741 | int packets, u32 high_seq) |
1614 | { | 1742 | { |
1743 | struct tcp_sock *tp = tcp_sk(sk); | ||
1615 | struct sk_buff *skb; | 1744 | struct sk_buff *skb; |
1616 | int cnt; | 1745 | int cnt; |
1617 | 1746 | ||
@@ -1620,11 +1749,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1620 | skb = tp->lost_skb_hint; | 1749 | skb = tp->lost_skb_hint; |
1621 | cnt = tp->lost_cnt_hint; | 1750 | cnt = tp->lost_cnt_hint; |
1622 | } else { | 1751 | } else { |
1623 | skb = sk->sk_write_queue.next; | 1752 | skb = tcp_write_queue_head(sk); |
1624 | cnt = 0; | 1753 | cnt = 0; |
1625 | } | 1754 | } |
1626 | 1755 | ||
1627 | sk_stream_for_retrans_queue_from(skb, sk) { | 1756 | tcp_for_write_queue_from(skb, sk) { |
1757 | if (skb == tcp_send_head(sk)) | ||
1758 | break; | ||
1628 | /* TODO: do this better */ | 1759 | /* TODO: do this better */ |
1629 | /* this is not the most efficient way to do this... */ | 1760 | /* this is not the most efficient way to do this... */ |
1630 | tp->lost_skb_hint = skb; | 1761 | tp->lost_skb_hint = skb; |
@@ -1638,12 +1769,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1638 | 1769 | ||
1639 | /* clear xmit_retransmit_queue hints | 1770 | /* clear xmit_retransmit_queue hints |
1640 | * if this is beyond hint */ | 1771 | * if this is beyond hint */ |
1641 | if(tp->retransmit_skb_hint != NULL && | 1772 | if (tp->retransmit_skb_hint != NULL && |
1642 | before(TCP_SKB_CB(skb)->seq, | 1773 | before(TCP_SKB_CB(skb)->seq, |
1643 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | 1774 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) |
1644 | |||
1645 | tp->retransmit_skb_hint = NULL; | 1775 | tp->retransmit_skb_hint = NULL; |
1646 | } | 1776 | |
1647 | } | 1777 | } |
1648 | } | 1778 | } |
1649 | tcp_sync_left_out(tp); | 1779 | tcp_sync_left_out(tp); |
@@ -1651,15 +1781,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1651 | 1781 | ||
1652 | /* Account newly detected lost packet(s) */ | 1782 | /* Account newly detected lost packet(s) */ |
1653 | 1783 | ||
1654 | static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | 1784 | static void tcp_update_scoreboard(struct sock *sk) |
1655 | { | 1785 | { |
1786 | struct tcp_sock *tp = tcp_sk(sk); | ||
1787 | |||
1656 | if (IsFack(tp)) { | 1788 | if (IsFack(tp)) { |
1657 | int lost = tp->fackets_out - tp->reordering; | 1789 | int lost = tp->fackets_out - tp->reordering; |
1658 | if (lost <= 0) | 1790 | if (lost <= 0) |
1659 | lost = 1; | 1791 | lost = 1; |
1660 | tcp_mark_head_lost(sk, tp, lost, tp->high_seq); | 1792 | tcp_mark_head_lost(sk, lost, tp->high_seq); |
1661 | } else { | 1793 | } else { |
1662 | tcp_mark_head_lost(sk, tp, 1, tp->high_seq); | 1794 | tcp_mark_head_lost(sk, 1, tp->high_seq); |
1663 | } | 1795 | } |
1664 | 1796 | ||
1665 | /* New heuristics: it is possible only after we switched | 1797 | /* New heuristics: it is possible only after we switched |
@@ -1667,13 +1799,15 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1667 | * Hence, we can detect timed out packets during fast | 1799 | * Hence, we can detect timed out packets during fast |
1668 | * retransmit without falling to slow start. | 1800 | * retransmit without falling to slow start. |
1669 | */ | 1801 | */ |
1670 | if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { | 1802 | if (!IsReno(tp) && tcp_head_timedout(sk)) { |
1671 | struct sk_buff *skb; | 1803 | struct sk_buff *skb; |
1672 | 1804 | ||
1673 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint | 1805 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
1674 | : sk->sk_write_queue.next; | 1806 | : tcp_write_queue_head(sk); |
1675 | 1807 | ||
1676 | sk_stream_for_retrans_queue_from(skb, sk) { | 1808 | tcp_for_write_queue_from(skb, sk) { |
1809 | if (skb == tcp_send_head(sk)) | ||
1810 | break; | ||
1677 | if (!tcp_skb_timedout(sk, skb)) | 1811 | if (!tcp_skb_timedout(sk, skb)) |
1678 | break; | 1812 | break; |
1679 | 1813 | ||
@@ -1745,9 +1879,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp) | |||
1745 | /* Undo procedures. */ | 1879 | /* Undo procedures. */ |
1746 | 1880 | ||
1747 | #if FASTRETRANS_DEBUG > 1 | 1881 | #if FASTRETRANS_DEBUG > 1 |
1748 | static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) | 1882 | static void DBGUNDO(struct sock *sk, const char *msg) |
1749 | { | 1883 | { |
1884 | struct tcp_sock *tp = tcp_sk(sk); | ||
1750 | struct inet_sock *inet = inet_sk(sk); | 1885 | struct inet_sock *inet = inet_sk(sk); |
1886 | |||
1751 | printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", | 1887 | printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", |
1752 | msg, | 1888 | msg, |
1753 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 1889 | NIPQUAD(inet->daddr), ntohs(inet->dport), |
@@ -1793,13 +1929,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp) | |||
1793 | } | 1929 | } |
1794 | 1930 | ||
1795 | /* People celebrate: "We love our President!" */ | 1931 | /* People celebrate: "We love our President!" */ |
1796 | static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) | 1932 | static int tcp_try_undo_recovery(struct sock *sk) |
1797 | { | 1933 | { |
1934 | struct tcp_sock *tp = tcp_sk(sk); | ||
1935 | |||
1798 | if (tcp_may_undo(tp)) { | 1936 | if (tcp_may_undo(tp)) { |
1799 | /* Happy end! We did not retransmit anything | 1937 | /* Happy end! We did not retransmit anything |
1800 | * or our original transmission succeeded. | 1938 | * or our original transmission succeeded. |
1801 | */ | 1939 | */ |
1802 | DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); | 1940 | DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); |
1803 | tcp_undo_cwr(sk, 1); | 1941 | tcp_undo_cwr(sk, 1); |
1804 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) | 1942 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) |
1805 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); | 1943 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); |
@@ -1819,10 +1957,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) | |||
1819 | } | 1957 | } |
1820 | 1958 | ||
1821 | /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ | 1959 | /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ |
1822 | static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) | 1960 | static void tcp_try_undo_dsack(struct sock *sk) |
1823 | { | 1961 | { |
1962 | struct tcp_sock *tp = tcp_sk(sk); | ||
1963 | |||
1824 | if (tp->undo_marker && !tp->undo_retrans) { | 1964 | if (tp->undo_marker && !tp->undo_retrans) { |
1825 | DBGUNDO(sk, tp, "D-SACK"); | 1965 | DBGUNDO(sk, "D-SACK"); |
1826 | tcp_undo_cwr(sk, 1); | 1966 | tcp_undo_cwr(sk, 1); |
1827 | tp->undo_marker = 0; | 1967 | tp->undo_marker = 0; |
1828 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); | 1968 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); |
@@ -1831,9 +1971,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) | |||
1831 | 1971 | ||
1832 | /* Undo during fast recovery after partial ACK. */ | 1972 | /* Undo during fast recovery after partial ACK. */ |
1833 | 1973 | ||
1834 | static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, | 1974 | static int tcp_try_undo_partial(struct sock *sk, int acked) |
1835 | int acked) | ||
1836 | { | 1975 | { |
1976 | struct tcp_sock *tp = tcp_sk(sk); | ||
1837 | /* Partial ACK arrived. Force Hoe's retransmit. */ | 1977 | /* Partial ACK arrived. Force Hoe's retransmit. */ |
1838 | int failed = IsReno(tp) || tp->fackets_out>tp->reordering; | 1978 | int failed = IsReno(tp) || tp->fackets_out>tp->reordering; |
1839 | 1979 | ||
@@ -1846,7 +1986,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, | |||
1846 | 1986 | ||
1847 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); | 1987 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); |
1848 | 1988 | ||
1849 | DBGUNDO(sk, tp, "Hoe"); | 1989 | DBGUNDO(sk, "Hoe"); |
1850 | tcp_undo_cwr(sk, 0); | 1990 | tcp_undo_cwr(sk, 0); |
1851 | NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); | 1991 | NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); |
1852 | 1992 | ||
@@ -1860,17 +2000,21 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, | |||
1860 | } | 2000 | } |
1861 | 2001 | ||
1862 | /* Undo during loss recovery after partial ACK. */ | 2002 | /* Undo during loss recovery after partial ACK. */ |
1863 | static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | 2003 | static int tcp_try_undo_loss(struct sock *sk) |
1864 | { | 2004 | { |
2005 | struct tcp_sock *tp = tcp_sk(sk); | ||
2006 | |||
1865 | if (tcp_may_undo(tp)) { | 2007 | if (tcp_may_undo(tp)) { |
1866 | struct sk_buff *skb; | 2008 | struct sk_buff *skb; |
1867 | sk_stream_for_retrans_queue(skb, sk) { | 2009 | tcp_for_write_queue(skb, sk) { |
2010 | if (skb == tcp_send_head(sk)) | ||
2011 | break; | ||
1868 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 2012 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1869 | } | 2013 | } |
1870 | 2014 | ||
1871 | clear_all_retrans_hints(tp); | 2015 | clear_all_retrans_hints(tp); |
1872 | 2016 | ||
1873 | DBGUNDO(sk, tp, "partial loss"); | 2017 | DBGUNDO(sk, "partial loss"); |
1874 | tp->lost_out = 0; | 2018 | tp->lost_out = 0; |
1875 | tp->left_out = tp->sacked_out; | 2019 | tp->left_out = tp->sacked_out; |
1876 | tcp_undo_cwr(sk, 1); | 2020 | tcp_undo_cwr(sk, 1); |
@@ -1892,15 +2036,17 @@ static inline void tcp_complete_cwr(struct sock *sk) | |||
1892 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2036 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
1893 | } | 2037 | } |
1894 | 2038 | ||
1895 | static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) | 2039 | static void tcp_try_to_open(struct sock *sk, int flag) |
1896 | { | 2040 | { |
2041 | struct tcp_sock *tp = tcp_sk(sk); | ||
2042 | |||
1897 | tp->left_out = tp->sacked_out; | 2043 | tp->left_out = tp->sacked_out; |
1898 | 2044 | ||
1899 | if (tp->retrans_out == 0) | 2045 | if (tp->retrans_out == 0) |
1900 | tp->retrans_stamp = 0; | 2046 | tp->retrans_stamp = 0; |
1901 | 2047 | ||
1902 | if (flag&FLAG_ECE) | 2048 | if (flag&FLAG_ECE) |
1903 | tcp_enter_cwr(sk); | 2049 | tcp_enter_cwr(sk, 1); |
1904 | 2050 | ||
1905 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { | 2051 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
1906 | int state = TCP_CA_Open; | 2052 | int state = TCP_CA_Open; |
@@ -1987,7 +2133,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1987 | before(tp->snd_una, tp->high_seq) && | 2133 | before(tp->snd_una, tp->high_seq) && |
1988 | icsk->icsk_ca_state != TCP_CA_Open && | 2134 | icsk->icsk_ca_state != TCP_CA_Open && |
1989 | tp->fackets_out > tp->reordering) { | 2135 | tp->fackets_out > tp->reordering) { |
1990 | tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); | 2136 | tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); |
1991 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); | 2137 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); |
1992 | } | 2138 | } |
1993 | 2139 | ||
@@ -1997,14 +2143,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1997 | /* E. Check state exit conditions. State can be terminated | 2143 | /* E. Check state exit conditions. State can be terminated |
1998 | * when high_seq is ACKed. */ | 2144 | * when high_seq is ACKed. */ |
1999 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 2145 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
2000 | if (!sysctl_tcp_frto) | 2146 | BUG_TRAP(tp->retrans_out == 0); |
2001 | BUG_TRAP(tp->retrans_out == 0); | ||
2002 | tp->retrans_stamp = 0; | 2147 | tp->retrans_stamp = 0; |
2003 | } else if (!before(tp->snd_una, tp->high_seq)) { | 2148 | } else if (!before(tp->snd_una, tp->high_seq)) { |
2004 | switch (icsk->icsk_ca_state) { | 2149 | switch (icsk->icsk_ca_state) { |
2005 | case TCP_CA_Loss: | 2150 | case TCP_CA_Loss: |
2006 | icsk->icsk_retransmits = 0; | 2151 | icsk->icsk_retransmits = 0; |
2007 | if (tcp_try_undo_recovery(sk, tp)) | 2152 | if (tcp_try_undo_recovery(sk)) |
2008 | return; | 2153 | return; |
2009 | break; | 2154 | break; |
2010 | 2155 | ||
@@ -2018,7 +2163,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
2018 | break; | 2163 | break; |
2019 | 2164 | ||
2020 | case TCP_CA_Disorder: | 2165 | case TCP_CA_Disorder: |
2021 | tcp_try_undo_dsack(sk, tp); | 2166 | tcp_try_undo_dsack(sk); |
2022 | if (!tp->undo_marker || | 2167 | if (!tp->undo_marker || |
2023 | /* For SACK case do not Open to allow to undo | 2168 | /* For SACK case do not Open to allow to undo |
2024 | * catching for all duplicate ACKs. */ | 2169 | * catching for all duplicate ACKs. */ |
@@ -2031,7 +2176,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
2031 | case TCP_CA_Recovery: | 2176 | case TCP_CA_Recovery: |
2032 | if (IsReno(tp)) | 2177 | if (IsReno(tp)) |
2033 | tcp_reset_reno_sack(tp); | 2178 | tcp_reset_reno_sack(tp); |
2034 | if (tcp_try_undo_recovery(sk, tp)) | 2179 | if (tcp_try_undo_recovery(sk)) |
2035 | return; | 2180 | return; |
2036 | tcp_complete_cwr(sk); | 2181 | tcp_complete_cwr(sk); |
2037 | break; | 2182 | break; |
@@ -2047,14 +2192,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
2047 | } else { | 2192 | } else { |
2048 | int acked = prior_packets - tp->packets_out; | 2193 | int acked = prior_packets - tp->packets_out; |
2049 | if (IsReno(tp)) | 2194 | if (IsReno(tp)) |
2050 | tcp_remove_reno_sacks(sk, tp, acked); | 2195 | tcp_remove_reno_sacks(sk, acked); |
2051 | is_dupack = tcp_try_undo_partial(sk, tp, acked); | 2196 | is_dupack = tcp_try_undo_partial(sk, acked); |
2052 | } | 2197 | } |
2053 | break; | 2198 | break; |
2054 | case TCP_CA_Loss: | 2199 | case TCP_CA_Loss: |
2055 | if (flag&FLAG_DATA_ACKED) | 2200 | if (flag&FLAG_DATA_ACKED) |
2056 | icsk->icsk_retransmits = 0; | 2201 | icsk->icsk_retransmits = 0; |
2057 | if (!tcp_try_undo_loss(sk, tp)) { | 2202 | if (!tcp_try_undo_loss(sk)) { |
2058 | tcp_moderate_cwnd(tp); | 2203 | tcp_moderate_cwnd(tp); |
2059 | tcp_xmit_retransmit_queue(sk); | 2204 | tcp_xmit_retransmit_queue(sk); |
2060 | return; | 2205 | return; |
@@ -2071,10 +2216,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
2071 | } | 2216 | } |
2072 | 2217 | ||
2073 | if (icsk->icsk_ca_state == TCP_CA_Disorder) | 2218 | if (icsk->icsk_ca_state == TCP_CA_Disorder) |
2074 | tcp_try_undo_dsack(sk, tp); | 2219 | tcp_try_undo_dsack(sk); |
2075 | 2220 | ||
2076 | if (!tcp_time_to_recover(sk, tp)) { | 2221 | if (!tcp_time_to_recover(sk)) { |
2077 | tcp_try_to_open(sk, tp, flag); | 2222 | tcp_try_to_open(sk, flag); |
2078 | return; | 2223 | return; |
2079 | } | 2224 | } |
2080 | 2225 | ||
@@ -2113,8 +2258,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
2113 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2258 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
2114 | } | 2259 | } |
2115 | 2260 | ||
2116 | if (is_dupack || tcp_head_timedout(sk, tp)) | 2261 | if (is_dupack || tcp_head_timedout(sk)) |
2117 | tcp_update_scoreboard(sk, tp); | 2262 | tcp_update_scoreboard(sk); |
2118 | tcp_cwnd_down(sk); | 2263 | tcp_cwnd_down(sk); |
2119 | tcp_xmit_retransmit_queue(sk); | 2264 | tcp_xmit_retransmit_queue(sk); |
2120 | } | 2265 | } |
@@ -2190,8 +2335,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
2190 | * RFC2988 recommends to restart timer to now+rto. | 2335 | * RFC2988 recommends to restart timer to now+rto. |
2191 | */ | 2336 | */ |
2192 | 2337 | ||
2193 | static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) | 2338 | static void tcp_ack_packets_out(struct sock *sk) |
2194 | { | 2339 | { |
2340 | struct tcp_sock *tp = tcp_sk(sk); | ||
2341 | |||
2195 | if (!tp->packets_out) { | 2342 | if (!tp->packets_out) { |
2196 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 2343 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
2197 | } else { | 2344 | } else { |
@@ -2255,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
2255 | return acked; | 2402 | return acked; |
2256 | } | 2403 | } |
2257 | 2404 | ||
2258 | static u32 tcp_usrtt(struct timeval *tv) | ||
2259 | { | ||
2260 | struct timeval now; | ||
2261 | |||
2262 | do_gettimeofday(&now); | ||
2263 | return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); | ||
2264 | } | ||
2265 | |||
2266 | /* Remove acknowledged frames from the retransmission queue. */ | 2405 | /* Remove acknowledged frames from the retransmission queue. */ |
2267 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | 2406 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) |
2268 | { | 2407 | { |
@@ -2273,12 +2412,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2273 | int acked = 0; | 2412 | int acked = 0; |
2274 | __s32 seq_rtt = -1; | 2413 | __s32 seq_rtt = -1; |
2275 | u32 pkts_acked = 0; | 2414 | u32 pkts_acked = 0; |
2276 | void (*rtt_sample)(struct sock *sk, u32 usrtt) | 2415 | ktime_t last_ackt = ktime_set(0,0); |
2277 | = icsk->icsk_ca_ops->rtt_sample; | ||
2278 | struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; | ||
2279 | 2416 | ||
2280 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2417 | while ((skb = tcp_write_queue_head(sk)) && |
2281 | skb != sk->sk_send_head) { | 2418 | skb != tcp_send_head(sk)) { |
2282 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 2419 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
2283 | __u8 sacked = scb->sacked; | 2420 | __u8 sacked = scb->sacked; |
2284 | 2421 | ||
@@ -2318,13 +2455,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2318 | 2455 | ||
2319 | if (sacked) { | 2456 | if (sacked) { |
2320 | if (sacked & TCPCB_RETRANS) { | 2457 | if (sacked & TCPCB_RETRANS) { |
2321 | if(sacked & TCPCB_SACKED_RETRANS) | 2458 | if (sacked & TCPCB_SACKED_RETRANS) |
2322 | tp->retrans_out -= tcp_skb_pcount(skb); | 2459 | tp->retrans_out -= tcp_skb_pcount(skb); |
2323 | acked |= FLAG_RETRANS_DATA_ACKED; | 2460 | acked |= FLAG_RETRANS_DATA_ACKED; |
2324 | seq_rtt = -1; | 2461 | seq_rtt = -1; |
2325 | } else if (seq_rtt < 0) { | 2462 | } else if (seq_rtt < 0) { |
2326 | seq_rtt = now - scb->when; | 2463 | seq_rtt = now - scb->when; |
2327 | skb_get_timestamp(skb, &tv); | 2464 | last_ackt = skb->tstamp; |
2328 | } | 2465 | } |
2329 | if (sacked & TCPCB_SACKED_ACKED) | 2466 | if (sacked & TCPCB_SACKED_ACKED) |
2330 | tp->sacked_out -= tcp_skb_pcount(skb); | 2467 | tp->sacked_out -= tcp_skb_pcount(skb); |
@@ -2337,23 +2474,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2337 | } | 2474 | } |
2338 | } else if (seq_rtt < 0) { | 2475 | } else if (seq_rtt < 0) { |
2339 | seq_rtt = now - scb->when; | 2476 | seq_rtt = now - scb->when; |
2340 | skb_get_timestamp(skb, &tv); | 2477 | last_ackt = skb->tstamp; |
2341 | } | 2478 | } |
2342 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2479 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
2343 | tcp_packets_out_dec(tp, skb); | 2480 | tcp_packets_out_dec(tp, skb); |
2344 | __skb_unlink(skb, &sk->sk_write_queue); | 2481 | tcp_unlink_write_queue(skb, sk); |
2345 | sk_stream_free_skb(sk, skb); | 2482 | sk_stream_free_skb(sk, skb); |
2346 | clear_all_retrans_hints(tp); | 2483 | clear_all_retrans_hints(tp); |
2347 | } | 2484 | } |
2348 | 2485 | ||
2349 | if (acked&FLAG_ACKED) { | 2486 | if (acked&FLAG_ACKED) { |
2487 | const struct tcp_congestion_ops *ca_ops | ||
2488 | = inet_csk(sk)->icsk_ca_ops; | ||
2489 | |||
2350 | tcp_ack_update_rtt(sk, acked, seq_rtt); | 2490 | tcp_ack_update_rtt(sk, acked, seq_rtt); |
2351 | tcp_ack_packets_out(sk, tp); | 2491 | tcp_ack_packets_out(sk); |
2352 | if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) | ||
2353 | (*rtt_sample)(sk, tcp_usrtt(&tv)); | ||
2354 | 2492 | ||
2355 | if (icsk->icsk_ca_ops->pkts_acked) | 2493 | if (ca_ops->pkts_acked) |
2356 | icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); | 2494 | ca_ops->pkts_acked(sk, pkts_acked, last_ackt); |
2357 | } | 2495 | } |
2358 | 2496 | ||
2359 | #if FASTRETRANS_DEBUG > 0 | 2497 | #if FASTRETRANS_DEBUG > 0 |
@@ -2390,7 +2528,7 @@ static void tcp_ack_probe(struct sock *sk) | |||
2390 | 2528 | ||
2391 | /* Was it a usable window open? */ | 2529 | /* Was it a usable window open? */ |
2392 | 2530 | ||
2393 | if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, | 2531 | if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, |
2394 | tp->snd_una + tp->snd_wnd)) { | 2532 | tp->snd_una + tp->snd_wnd)) { |
2395 | icsk->icsk_backoff = 0; | 2533 | icsk->icsk_backoff = 0; |
2396 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); | 2534 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); |
@@ -2433,13 +2571,14 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack | |||
2433 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 | 2571 | * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 |
2434 | * and in FreeBSD. NetBSD's one is even worse.) is wrong. | 2572 | * and in FreeBSD. NetBSD's one is even worse.) is wrong. |
2435 | */ | 2573 | */ |
2436 | static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | 2574 | static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, |
2437 | struct sk_buff *skb, u32 ack, u32 ack_seq) | 2575 | u32 ack_seq) |
2438 | { | 2576 | { |
2577 | struct tcp_sock *tp = tcp_sk(sk); | ||
2439 | int flag = 0; | 2578 | int flag = 0; |
2440 | u32 nwin = ntohs(skb->h.th->window); | 2579 | u32 nwin = ntohs(tcp_hdr(skb)->window); |
2441 | 2580 | ||
2442 | if (likely(!skb->h.th->syn)) | 2581 | if (likely(!tcp_hdr(skb)->syn)) |
2443 | nwin <<= tp->rx_opt.snd_wscale; | 2582 | nwin <<= tp->rx_opt.snd_wscale; |
2444 | 2583 | ||
2445 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { | 2584 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { |
@@ -2453,7 +2592,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | |||
2453 | * fast path is recovered for sending TCP. | 2592 | * fast path is recovered for sending TCP. |
2454 | */ | 2593 | */ |
2455 | tp->pred_flags = 0; | 2594 | tp->pred_flags = 0; |
2456 | tcp_fast_path_check(sk, tp); | 2595 | tcp_fast_path_check(sk); |
2457 | 2596 | ||
2458 | if (nwin > tp->max_window) { | 2597 | if (nwin > tp->max_window) { |
2459 | tp->max_window = nwin; | 2598 | tp->max_window = nwin; |
@@ -2467,39 +2606,128 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | |||
2467 | return flag; | 2606 | return flag; |
2468 | } | 2607 | } |
2469 | 2608 | ||
2470 | static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | 2609 | /* A very conservative spurious RTO response algorithm: reduce cwnd and |
2610 | * continue in congestion avoidance. | ||
2611 | */ | ||
2612 | static void tcp_conservative_spur_to_response(struct tcp_sock *tp) | ||
2613 | { | ||
2614 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | ||
2615 | tp->snd_cwnd_cnt = 0; | ||
2616 | tcp_moderate_cwnd(tp); | ||
2617 | } | ||
2618 | |||
2619 | /* A conservative spurious RTO response algorithm: reduce cwnd using | ||
2620 | * rate halving and continue in congestion avoidance. | ||
2621 | */ | ||
2622 | static void tcp_ratehalving_spur_to_response(struct sock *sk) | ||
2623 | { | ||
2624 | tcp_enter_cwr(sk, 0); | ||
2625 | } | ||
2626 | |||
2627 | static void tcp_undo_spur_to_response(struct sock *sk, int flag) | ||
2628 | { | ||
2629 | if (flag&FLAG_ECE) | ||
2630 | tcp_ratehalving_spur_to_response(sk); | ||
2631 | else | ||
2632 | tcp_undo_cwr(sk, 1); | ||
2633 | } | ||
2634 | |||
2635 | /* F-RTO spurious RTO detection algorithm (RFC4138) | ||
2636 | * | ||
2637 | * F-RTO affects during two new ACKs following RTO (well, almost, see inline | ||
2638 | * comments). State (ACK number) is kept in frto_counter. When ACK advances | ||
2639 | * window (but not to or beyond highest sequence sent before RTO): | ||
2640 | * On First ACK, send two new segments out. | ||
2641 | * On Second ACK, RTO was likely spurious. Do spurious response (response | ||
2642 | * algorithm is not part of the F-RTO detection algorithm | ||
2643 | * given in RFC4138 but can be selected separately). | ||
2644 | * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss | ||
2645 | * and TCP falls back to conventional RTO recovery. | ||
2646 | * | ||
2647 | * Rationale: if the RTO was spurious, new ACKs should arrive from the | ||
2648 | * original window even after we transmit two new data segments. | ||
2649 | * | ||
2650 | * SACK version: | ||
2651 | * on first step, wait until first cumulative ACK arrives, then move to | ||
2652 | * the second step. In second step, the next ACK decides. | ||
2653 | * | ||
2654 | * F-RTO is implemented (mainly) in four functions: | ||
2655 | * - tcp_use_frto() is used to determine if TCP is can use F-RTO | ||
2656 | * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is | ||
2657 | * called when tcp_use_frto() showed green light | ||
2658 | * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm | ||
2659 | * - tcp_enter_frto_loss() is called if there is not enough evidence | ||
2660 | * to prove that the RTO is indeed spurious. It transfers the control | ||
2661 | * from F-RTO to the conventional RTO recovery | ||
2662 | */ | ||
2663 | static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) | ||
2471 | { | 2664 | { |
2472 | struct tcp_sock *tp = tcp_sk(sk); | 2665 | struct tcp_sock *tp = tcp_sk(sk); |
2473 | 2666 | ||
2474 | tcp_sync_left_out(tp); | 2667 | tcp_sync_left_out(tp); |
2475 | 2668 | ||
2476 | if (tp->snd_una == prior_snd_una || | 2669 | /* Duplicate the behavior from Loss state (fastretrans_alert) */ |
2477 | !before(tp->snd_una, tp->frto_highmark)) { | 2670 | if (flag&FLAG_DATA_ACKED) |
2478 | /* RTO was caused by loss, start retransmitting in | 2671 | inet_csk(sk)->icsk_retransmits = 0; |
2479 | * go-back-N slow start | 2672 | |
2480 | */ | 2673 | if (!before(tp->snd_una, tp->frto_highmark)) { |
2481 | tcp_enter_frto_loss(sk); | 2674 | tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag); |
2482 | return; | 2675 | return 1; |
2483 | } | 2676 | } |
2484 | 2677 | ||
2485 | if (tp->frto_counter == 1) { | 2678 | if (!IsSackFrto() || IsReno(tp)) { |
2486 | /* First ACK after RTO advances the window: allow two new | 2679 | /* RFC4138 shortcoming in step 2; should also have case c): |
2487 | * segments out. | 2680 | * ACK isn't duplicate nor advances window, e.g., opposite dir |
2681 | * data, winupdate | ||
2488 | */ | 2682 | */ |
2489 | tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; | 2683 | if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && |
2684 | !(flag&FLAG_FORWARD_PROGRESS)) | ||
2685 | return 1; | ||
2686 | |||
2687 | if (!(flag&FLAG_DATA_ACKED)) { | ||
2688 | tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), | ||
2689 | flag); | ||
2690 | return 1; | ||
2691 | } | ||
2490 | } else { | 2692 | } else { |
2491 | /* Also the second ACK after RTO advances the window. | 2693 | if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { |
2492 | * The RTO was likely spurious. Reduce cwnd and continue | 2694 | /* Prevent sending of new data. */ |
2493 | * in congestion avoidance | 2695 | tp->snd_cwnd = min(tp->snd_cwnd, |
2494 | */ | 2696 | tcp_packets_in_flight(tp)); |
2495 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2697 | return 1; |
2496 | tcp_moderate_cwnd(tp); | 2698 | } |
2699 | |||
2700 | if ((tp->frto_counter == 2) && | ||
2701 | (!(flag&FLAG_FORWARD_PROGRESS) || | ||
2702 | ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { | ||
2703 | /* RFC4138 shortcoming (see comment above) */ | ||
2704 | if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP)) | ||
2705 | return 1; | ||
2706 | |||
2707 | tcp_enter_frto_loss(sk, 3, flag); | ||
2708 | return 1; | ||
2709 | } | ||
2497 | } | 2710 | } |
2498 | 2711 | ||
2499 | /* F-RTO affects on two new ACKs following RTO. | 2712 | if (tp->frto_counter == 1) { |
2500 | * At latest on third ACK the TCP behavior is back to normal. | 2713 | tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; |
2501 | */ | 2714 | tp->frto_counter = 2; |
2502 | tp->frto_counter = (tp->frto_counter + 1) % 3; | 2715 | return 1; |
2716 | } else /* frto_counter == 2 */ { | ||
2717 | switch (sysctl_tcp_frto_response) { | ||
2718 | case 2: | ||
2719 | tcp_undo_spur_to_response(sk, flag); | ||
2720 | break; | ||
2721 | case 1: | ||
2722 | tcp_conservative_spur_to_response(tp); | ||
2723 | break; | ||
2724 | default: | ||
2725 | tcp_ratehalving_spur_to_response(sk); | ||
2726 | break; | ||
2727 | } | ||
2728 | tp->frto_counter = 0; | ||
2729 | } | ||
2730 | return 0; | ||
2503 | } | 2731 | } |
2504 | 2732 | ||
2505 | /* This routine deals with incoming acks, but not outgoing ones. */ | 2733 | /* This routine deals with incoming acks, but not outgoing ones. */ |
@@ -2513,6 +2741,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2513 | u32 prior_in_flight; | 2741 | u32 prior_in_flight; |
2514 | s32 seq_rtt; | 2742 | s32 seq_rtt; |
2515 | int prior_packets; | 2743 | int prior_packets; |
2744 | int frto_cwnd = 0; | ||
2516 | 2745 | ||
2517 | /* If the ack is newer than sent or older than previous acks | 2746 | /* If the ack is newer than sent or older than previous acks |
2518 | * then we can probably ignore it. | 2747 | * then we can probably ignore it. |
@@ -2549,12 +2778,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2549 | else | 2778 | else |
2550 | NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); | 2779 | NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); |
2551 | 2780 | ||
2552 | flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq); | 2781 | flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); |
2553 | 2782 | ||
2554 | if (TCP_SKB_CB(skb)->sacked) | 2783 | if (TCP_SKB_CB(skb)->sacked) |
2555 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 2784 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); |
2556 | 2785 | ||
2557 | if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) | 2786 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) |
2558 | flag |= FLAG_ECE; | 2787 | flag |= FLAG_ECE; |
2559 | 2788 | ||
2560 | tcp_ca_event(sk, CA_EVENT_SLOW_ACK); | 2789 | tcp_ca_event(sk, CA_EVENT_SLOW_ACK); |
@@ -2575,15 +2804,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2575 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt); | 2804 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt); |
2576 | 2805 | ||
2577 | if (tp->frto_counter) | 2806 | if (tp->frto_counter) |
2578 | tcp_process_frto(sk, prior_snd_una); | 2807 | frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag); |
2579 | 2808 | ||
2580 | if (tcp_ack_is_dubious(sk, flag)) { | 2809 | if (tcp_ack_is_dubious(sk, flag)) { |
2581 | /* Advance CWND, if state allows this. */ | 2810 | /* Advance CWND, if state allows this. */ |
2582 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) | 2811 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && |
2812 | tcp_may_raise_cwnd(sk, flag)) | ||
2583 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); | 2813 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); |
2584 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); | 2814 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); |
2585 | } else { | 2815 | } else { |
2586 | if ((flag & FLAG_DATA_ACKED)) | 2816 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) |
2587 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); | 2817 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); |
2588 | } | 2818 | } |
2589 | 2819 | ||
@@ -2599,7 +2829,7 @@ no_queue: | |||
2599 | * being used to time the probes, and is probably far higher than | 2829 | * being used to time the probes, and is probably far higher than |
2600 | * it needs to be for normal retransmission. | 2830 | * it needs to be for normal retransmission. |
2601 | */ | 2831 | */ |
2602 | if (sk->sk_send_head) | 2832 | if (tcp_send_head(sk)) |
2603 | tcp_ack_probe(sk); | 2833 | tcp_ack_probe(sk); |
2604 | return 1; | 2834 | return 1; |
2605 | 2835 | ||
@@ -2620,13 +2850,13 @@ uninteresting_ack: | |||
2620 | void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) | 2850 | void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) |
2621 | { | 2851 | { |
2622 | unsigned char *ptr; | 2852 | unsigned char *ptr; |
2623 | struct tcphdr *th = skb->h.th; | 2853 | struct tcphdr *th = tcp_hdr(skb); |
2624 | int length=(th->doff*4)-sizeof(struct tcphdr); | 2854 | int length=(th->doff*4)-sizeof(struct tcphdr); |
2625 | 2855 | ||
2626 | ptr = (unsigned char *)(th + 1); | 2856 | ptr = (unsigned char *)(th + 1); |
2627 | opt_rx->saw_tstamp = 0; | 2857 | opt_rx->saw_tstamp = 0; |
2628 | 2858 | ||
2629 | while(length>0) { | 2859 | while (length > 0) { |
2630 | int opcode=*ptr++; | 2860 | int opcode=*ptr++; |
2631 | int opsize; | 2861 | int opsize; |
2632 | 2862 | ||
@@ -2642,9 +2872,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2642 | return; | 2872 | return; |
2643 | if (opsize > length) | 2873 | if (opsize > length) |
2644 | return; /* don't parse partial options */ | 2874 | return; /* don't parse partial options */ |
2645 | switch(opcode) { | 2875 | switch (opcode) { |
2646 | case TCPOPT_MSS: | 2876 | case TCPOPT_MSS: |
2647 | if(opsize==TCPOLEN_MSS && th->syn && !estab) { | 2877 | if (opsize==TCPOLEN_MSS && th->syn && !estab) { |
2648 | u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); | 2878 | u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); |
2649 | if (in_mss) { | 2879 | if (in_mss) { |
2650 | if (opt_rx->user_mss && opt_rx->user_mss < in_mss) | 2880 | if (opt_rx->user_mss && opt_rx->user_mss < in_mss) |
@@ -2654,12 +2884,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2654 | } | 2884 | } |
2655 | break; | 2885 | break; |
2656 | case TCPOPT_WINDOW: | 2886 | case TCPOPT_WINDOW: |
2657 | if(opsize==TCPOLEN_WINDOW && th->syn && !estab) | 2887 | if (opsize==TCPOLEN_WINDOW && th->syn && !estab) |
2658 | if (sysctl_tcp_window_scaling) { | 2888 | if (sysctl_tcp_window_scaling) { |
2659 | __u8 snd_wscale = *(__u8 *) ptr; | 2889 | __u8 snd_wscale = *(__u8 *) ptr; |
2660 | opt_rx->wscale_ok = 1; | 2890 | opt_rx->wscale_ok = 1; |
2661 | if (snd_wscale > 14) { | 2891 | if (snd_wscale > 14) { |
2662 | if(net_ratelimit()) | 2892 | if (net_ratelimit()) |
2663 | printk(KERN_INFO "tcp_parse_options: Illegal window " | 2893 | printk(KERN_INFO "tcp_parse_options: Illegal window " |
2664 | "scaling value %d >14 received.\n", | 2894 | "scaling value %d >14 received.\n", |
2665 | snd_wscale); | 2895 | snd_wscale); |
@@ -2669,7 +2899,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2669 | } | 2899 | } |
2670 | break; | 2900 | break; |
2671 | case TCPOPT_TIMESTAMP: | 2901 | case TCPOPT_TIMESTAMP: |
2672 | if(opsize==TCPOLEN_TIMESTAMP) { | 2902 | if (opsize==TCPOLEN_TIMESTAMP) { |
2673 | if ((estab && opt_rx->tstamp_ok) || | 2903 | if ((estab && opt_rx->tstamp_ok) || |
2674 | (!estab && sysctl_tcp_timestamps)) { | 2904 | (!estab && sysctl_tcp_timestamps)) { |
2675 | opt_rx->saw_tstamp = 1; | 2905 | opt_rx->saw_tstamp = 1; |
@@ -2679,7 +2909,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2679 | } | 2909 | } |
2680 | break; | 2910 | break; |
2681 | case TCPOPT_SACK_PERM: | 2911 | case TCPOPT_SACK_PERM: |
2682 | if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { | 2912 | if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { |
2683 | if (sysctl_tcp_sack) { | 2913 | if (sysctl_tcp_sack) { |
2684 | opt_rx->sack_ok = 1; | 2914 | opt_rx->sack_ok = 1; |
2685 | tcp_sack_reset(opt_rx); | 2915 | tcp_sack_reset(opt_rx); |
@@ -2688,7 +2918,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2688 | break; | 2918 | break; |
2689 | 2919 | ||
2690 | case TCPOPT_SACK: | 2920 | case TCPOPT_SACK: |
2691 | if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && | 2921 | if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && |
2692 | !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && | 2922 | !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && |
2693 | opt_rx->sack_ok) { | 2923 | opt_rx->sack_ok) { |
2694 | TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; | 2924 | TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; |
@@ -2701,10 +2931,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2701 | */ | 2931 | */ |
2702 | break; | 2932 | break; |
2703 | #endif | 2933 | #endif |
2704 | }; | 2934 | } |
2935 | |||
2705 | ptr+=opsize-2; | 2936 | ptr+=opsize-2; |
2706 | length-=opsize; | 2937 | length-=opsize; |
2707 | }; | 2938 | } |
2708 | } | 2939 | } |
2709 | } | 2940 | } |
2710 | 2941 | ||
@@ -2737,7 +2968,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | |||
2737 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | 2968 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) |
2738 | { | 2969 | { |
2739 | tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; | 2970 | tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; |
2740 | tp->rx_opt.ts_recent_stamp = xtime.tv_sec; | 2971 | tp->rx_opt.ts_recent_stamp = get_seconds(); |
2741 | } | 2972 | } |
2742 | 2973 | ||
2743 | static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | 2974 | static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) |
@@ -2750,8 +2981,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
2750 | * Not only, also it occurs for expired timestamps. | 2981 | * Not only, also it occurs for expired timestamps. |
2751 | */ | 2982 | */ |
2752 | 2983 | ||
2753 | if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || | 2984 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || |
2754 | xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) | 2985 | get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) |
2755 | tcp_store_ts_recent(tp); | 2986 | tcp_store_ts_recent(tp); |
2756 | } | 2987 | } |
2757 | } | 2988 | } |
@@ -2782,7 +3013,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
2782 | static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) | 3013 | static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) |
2783 | { | 3014 | { |
2784 | struct tcp_sock *tp = tcp_sk(sk); | 3015 | struct tcp_sock *tp = tcp_sk(sk); |
2785 | struct tcphdr *th = skb->h.th; | 3016 | struct tcphdr *th = tcp_hdr(skb); |
2786 | u32 seq = TCP_SKB_CB(skb)->seq; | 3017 | u32 seq = TCP_SKB_CB(skb)->seq; |
2787 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3018 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
2788 | 3019 | ||
@@ -2803,7 +3034,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff * | |||
2803 | { | 3034 | { |
2804 | const struct tcp_sock *tp = tcp_sk(sk); | 3035 | const struct tcp_sock *tp = tcp_sk(sk); |
2805 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && | 3036 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && |
2806 | xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && | 3037 | get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && |
2807 | !tcp_disordered_ack(sk, skb)); | 3038 | !tcp_disordered_ack(sk, skb)); |
2808 | } | 3039 | } |
2809 | 3040 | ||
@@ -2910,7 +3141,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) | |||
2910 | printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", | 3141 | printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", |
2911 | __FUNCTION__, sk->sk_state); | 3142 | __FUNCTION__, sk->sk_state); |
2912 | break; | 3143 | break; |
2913 | }; | 3144 | } |
2914 | 3145 | ||
2915 | /* It _is_ possible, that we have something out-of-order _after_ FIN. | 3146 | /* It _is_ possible, that we have something out-of-order _after_ FIN. |
2916 | * Probably, we should reset in this case. For now drop them. | 3147 | * Probably, we should reset in this case. For now drop them. |
@@ -3009,7 +3240,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
3009 | */ | 3240 | */ |
3010 | tp->rx_opt.num_sacks--; | 3241 | tp->rx_opt.num_sacks--; |
3011 | tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); | 3242 | tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); |
3012 | for(i=this_sack; i < tp->rx_opt.num_sacks; i++) | 3243 | for (i=this_sack; i < tp->rx_opt.num_sacks; i++) |
3013 | sp[i] = sp[i+1]; | 3244 | sp[i] = sp[i+1]; |
3014 | continue; | 3245 | continue; |
3015 | } | 3246 | } |
@@ -3062,7 +3293,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) | |||
3062 | tp->rx_opt.num_sacks--; | 3293 | tp->rx_opt.num_sacks--; |
3063 | sp--; | 3294 | sp--; |
3064 | } | 3295 | } |
3065 | for(; this_sack > 0; this_sack--, sp--) | 3296 | for (; this_sack > 0; this_sack--, sp--) |
3066 | *sp = *(sp-1); | 3297 | *sp = *(sp-1); |
3067 | 3298 | ||
3068 | new_sack: | 3299 | new_sack: |
@@ -3088,7 +3319,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
3088 | return; | 3319 | return; |
3089 | } | 3320 | } |
3090 | 3321 | ||
3091 | for(this_sack = 0; this_sack < num_sacks; ) { | 3322 | for (this_sack = 0; this_sack < num_sacks; ) { |
3092 | /* Check if the start of the sack is covered by RCV.NXT. */ | 3323 | /* Check if the start of the sack is covered by RCV.NXT. */ |
3093 | if (!before(tp->rcv_nxt, sp->start_seq)) { | 3324 | if (!before(tp->rcv_nxt, sp->start_seq)) { |
3094 | int i; | 3325 | int i; |
@@ -3144,8 +3375,8 @@ static void tcp_ofo_queue(struct sock *sk) | |||
3144 | __skb_unlink(skb, &tp->out_of_order_queue); | 3375 | __skb_unlink(skb, &tp->out_of_order_queue); |
3145 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 3376 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
3146 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 3377 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
3147 | if(skb->h.th->fin) | 3378 | if (tcp_hdr(skb)->fin) |
3148 | tcp_fin(skb, sk, skb->h.th); | 3379 | tcp_fin(skb, sk, tcp_hdr(skb)); |
3149 | } | 3380 | } |
3150 | } | 3381 | } |
3151 | 3382 | ||
@@ -3153,7 +3384,7 @@ static int tcp_prune_queue(struct sock *sk); | |||
3153 | 3384 | ||
3154 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 3385 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
3155 | { | 3386 | { |
3156 | struct tcphdr *th = skb->h.th; | 3387 | struct tcphdr *th = tcp_hdr(skb); |
3157 | struct tcp_sock *tp = tcp_sk(sk); | 3388 | struct tcp_sock *tp = tcp_sk(sk); |
3158 | int eaten = -1; | 3389 | int eaten = -1; |
3159 | 3390 | ||
@@ -3210,9 +3441,9 @@ queue_and_out: | |||
3210 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 3441 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
3211 | } | 3442 | } |
3212 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 3443 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
3213 | if(skb->len) | 3444 | if (skb->len) |
3214 | tcp_event_data_recv(sk, tp, skb); | 3445 | tcp_event_data_recv(sk, skb); |
3215 | if(th->fin) | 3446 | if (th->fin) |
3216 | tcp_fin(skb, sk, th); | 3447 | tcp_fin(skb, sk, th); |
3217 | 3448 | ||
3218 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | 3449 | if (!skb_queue_empty(&tp->out_of_order_queue)) { |
@@ -3228,7 +3459,7 @@ queue_and_out: | |||
3228 | if (tp->rx_opt.num_sacks) | 3459 | if (tp->rx_opt.num_sacks) |
3229 | tcp_sack_remove(tp); | 3460 | tcp_sack_remove(tp); |
3230 | 3461 | ||
3231 | tcp_fast_path_check(sk, tp); | 3462 | tcp_fast_path_check(sk); |
3232 | 3463 | ||
3233 | if (eaten > 0) | 3464 | if (eaten > 0) |
3234 | __kfree_skb(skb); | 3465 | __kfree_skb(skb); |
@@ -3392,7 +3623,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3392 | * - bloated or contains data before "start" or | 3623 | * - bloated or contains data before "start" or |
3393 | * overlaps to the next one. | 3624 | * overlaps to the next one. |
3394 | */ | 3625 | */ |
3395 | if (!skb->h.th->syn && !skb->h.th->fin && | 3626 | if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && |
3396 | (tcp_win_from_space(skb->truesize) > skb->len || | 3627 | (tcp_win_from_space(skb->truesize) > skb->len || |
3397 | before(TCP_SKB_CB(skb)->seq, start) || | 3628 | before(TCP_SKB_CB(skb)->seq, start) || |
3398 | (skb->next != tail && | 3629 | (skb->next != tail && |
@@ -3403,7 +3634,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3403 | start = TCP_SKB_CB(skb)->end_seq; | 3634 | start = TCP_SKB_CB(skb)->end_seq; |
3404 | skb = skb->next; | 3635 | skb = skb->next; |
3405 | } | 3636 | } |
3406 | if (skb == tail || skb->h.th->syn || skb->h.th->fin) | 3637 | if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) |
3407 | return; | 3638 | return; |
3408 | 3639 | ||
3409 | while (before(start, end)) { | 3640 | while (before(start, end)) { |
@@ -3419,11 +3650,14 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3419 | nskb = alloc_skb(copy+header, GFP_ATOMIC); | 3650 | nskb = alloc_skb(copy+header, GFP_ATOMIC); |
3420 | if (!nskb) | 3651 | if (!nskb) |
3421 | return; | 3652 | return; |
3653 | |||
3654 | skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head); | ||
3655 | skb_set_network_header(nskb, (skb_network_header(skb) - | ||
3656 | skb->head)); | ||
3657 | skb_set_transport_header(nskb, (skb_transport_header(skb) - | ||
3658 | skb->head)); | ||
3422 | skb_reserve(nskb, header); | 3659 | skb_reserve(nskb, header); |
3423 | memcpy(nskb->head, skb->head, header); | 3660 | memcpy(nskb->head, skb->head, header); |
3424 | nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head); | ||
3425 | nskb->h.raw = nskb->head + (skb->h.raw-skb->head); | ||
3426 | nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); | ||
3427 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | 3661 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
3428 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; | 3662 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; |
3429 | __skb_insert(nskb, skb->prev, skb, list); | 3663 | __skb_insert(nskb, skb->prev, skb, list); |
@@ -3449,7 +3683,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3449 | __kfree_skb(skb); | 3683 | __kfree_skb(skb); |
3450 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); | 3684 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); |
3451 | skb = next; | 3685 | skb = next; |
3452 | if (skb == tail || skb->h.th->syn || skb->h.th->fin) | 3686 | if (skb == tail || |
3687 | tcp_hdr(skb)->syn || | ||
3688 | tcp_hdr(skb)->fin) | ||
3453 | return; | 3689 | return; |
3454 | } | 3690 | } |
3455 | } | 3691 | } |
@@ -3514,7 +3750,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
3514 | NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); | 3750 | NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); |
3515 | 3751 | ||
3516 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | 3752 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) |
3517 | tcp_clamp_window(sk, tp); | 3753 | tcp_clamp_window(sk); |
3518 | else if (tcp_memory_pressure) | 3754 | else if (tcp_memory_pressure) |
3519 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 3755 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
3520 | 3756 | ||
@@ -3583,8 +3819,10 @@ void tcp_cwnd_application_limited(struct sock *sk) | |||
3583 | tp->snd_cwnd_stamp = tcp_time_stamp; | 3819 | tp->snd_cwnd_stamp = tcp_time_stamp; |
3584 | } | 3820 | } |
3585 | 3821 | ||
3586 | static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) | 3822 | static int tcp_should_expand_sndbuf(struct sock *sk) |
3587 | { | 3823 | { |
3824 | struct tcp_sock *tp = tcp_sk(sk); | ||
3825 | |||
3588 | /* If the user specified a specific send buffer setting, do | 3826 | /* If the user specified a specific send buffer setting, do |
3589 | * not modify it. | 3827 | * not modify it. |
3590 | */ | 3828 | */ |
@@ -3616,7 +3854,7 @@ static void tcp_new_space(struct sock *sk) | |||
3616 | { | 3854 | { |
3617 | struct tcp_sock *tp = tcp_sk(sk); | 3855 | struct tcp_sock *tp = tcp_sk(sk); |
3618 | 3856 | ||
3619 | if (tcp_should_expand_sndbuf(sk, tp)) { | 3857 | if (tcp_should_expand_sndbuf(sk)) { |
3620 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | 3858 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + |
3621 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), | 3859 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), |
3622 | demanded = max_t(unsigned int, tp->snd_cwnd, | 3860 | demanded = max_t(unsigned int, tp->snd_cwnd, |
@@ -3640,9 +3878,9 @@ static void tcp_check_space(struct sock *sk) | |||
3640 | } | 3878 | } |
3641 | } | 3879 | } |
3642 | 3880 | ||
3643 | static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) | 3881 | static inline void tcp_data_snd_check(struct sock *sk) |
3644 | { | 3882 | { |
3645 | tcp_push_pending_frames(sk, tp); | 3883 | tcp_push_pending_frames(sk); |
3646 | tcp_check_space(sk); | 3884 | tcp_check_space(sk); |
3647 | } | 3885 | } |
3648 | 3886 | ||
@@ -3790,7 +4028,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) | |||
3790 | int err; | 4028 | int err; |
3791 | 4029 | ||
3792 | local_bh_enable(); | 4030 | local_bh_enable(); |
3793 | if (skb->ip_summed==CHECKSUM_UNNECESSARY) | 4031 | if (skb_csum_unnecessary(skb)) |
3794 | err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); | 4032 | err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); |
3795 | else | 4033 | else |
3796 | err = skb_copy_and_csum_datagram_iovec(skb, hlen, | 4034 | err = skb_copy_and_csum_datagram_iovec(skb, hlen, |
@@ -3822,7 +4060,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb | |||
3822 | 4060 | ||
3823 | static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) | 4061 | static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) |
3824 | { | 4062 | { |
3825 | return skb->ip_summed != CHECKSUM_UNNECESSARY && | 4063 | return !skb_csum_unnecessary(skb) && |
3826 | __tcp_checksum_complete_user(sk, skb); | 4064 | __tcp_checksum_complete_user(sk, skb); |
3827 | } | 4065 | } |
3828 | 4066 | ||
@@ -3840,7 +4078,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen | |||
3840 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 4078 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
3841 | tp->ucopy.dma_chan = get_softnet_dma(); | 4079 | tp->ucopy.dma_chan = get_softnet_dma(); |
3842 | 4080 | ||
3843 | if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) { | 4081 | if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { |
3844 | 4082 | ||
3845 | dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, | 4083 | dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, |
3846 | skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); | 4084 | skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); |
@@ -3856,7 +4094,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen | |||
3856 | tcp_rcv_space_adjust(sk); | 4094 | tcp_rcv_space_adjust(sk); |
3857 | 4095 | ||
3858 | if ((tp->ucopy.len == 0) || | 4096 | if ((tp->ucopy.len == 0) || |
3859 | (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) || | 4097 | (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) || |
3860 | (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { | 4098 | (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { |
3861 | tp->ucopy.wakeup = 1; | 4099 | tp->ucopy.wakeup = 1; |
3862 | sk->sk_data_ready(sk, 0); | 4100 | sk->sk_data_ready(sk, 0); |
@@ -3976,7 +4214,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3976 | */ | 4214 | */ |
3977 | tcp_ack(sk, skb, 0); | 4215 | tcp_ack(sk, skb, 0); |
3978 | __kfree_skb(skb); | 4216 | __kfree_skb(skb); |
3979 | tcp_data_snd_check(sk, tp); | 4217 | tcp_data_snd_check(sk); |
3980 | return 0; | 4218 | return 0; |
3981 | } else { /* Header too small */ | 4219 | } else { /* Header too small */ |
3982 | TCP_INC_STATS_BH(TCP_MIB_INERRS); | 4220 | TCP_INC_STATS_BH(TCP_MIB_INERRS); |
@@ -4047,12 +4285,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4047 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4285 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4048 | } | 4286 | } |
4049 | 4287 | ||
4050 | tcp_event_data_recv(sk, tp, skb); | 4288 | tcp_event_data_recv(sk, skb); |
4051 | 4289 | ||
4052 | if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { | 4290 | if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { |
4053 | /* Well, only one small jumplet in fast path... */ | 4291 | /* Well, only one small jumplet in fast path... */ |
4054 | tcp_ack(sk, skb, FLAG_DATA); | 4292 | tcp_ack(sk, skb, FLAG_DATA); |
4055 | tcp_data_snd_check(sk, tp); | 4293 | tcp_data_snd_check(sk); |
4056 | if (!inet_csk_ack_scheduled(sk)) | 4294 | if (!inet_csk_ack_scheduled(sk)) |
4057 | goto no_ack; | 4295 | goto no_ack; |
4058 | } | 4296 | } |
@@ -4109,7 +4347,7 @@ slow_path: | |||
4109 | goto discard; | 4347 | goto discard; |
4110 | } | 4348 | } |
4111 | 4349 | ||
4112 | if(th->rst) { | 4350 | if (th->rst) { |
4113 | tcp_reset(sk); | 4351 | tcp_reset(sk); |
4114 | goto discard; | 4352 | goto discard; |
4115 | } | 4353 | } |
@@ -4124,7 +4362,7 @@ slow_path: | |||
4124 | } | 4362 | } |
4125 | 4363 | ||
4126 | step5: | 4364 | step5: |
4127 | if(th->ack) | 4365 | if (th->ack) |
4128 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 4366 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
4129 | 4367 | ||
4130 | tcp_rcv_rtt_measure_ts(sk, skb); | 4368 | tcp_rcv_rtt_measure_ts(sk, skb); |
@@ -4135,7 +4373,7 @@ step5: | |||
4135 | /* step 7: process the segment text */ | 4373 | /* step 7: process the segment text */ |
4136 | tcp_data_queue(sk, skb); | 4374 | tcp_data_queue(sk, skb); |
4137 | 4375 | ||
4138 | tcp_data_snd_check(sk, tp); | 4376 | tcp_data_snd_check(sk); |
4139 | tcp_ack_snd_check(sk); | 4377 | tcp_ack_snd_check(sk); |
4140 | return 0; | 4378 | return 0; |
4141 | 4379 | ||
@@ -4412,13 +4650,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4412 | goto discard; | 4650 | goto discard; |
4413 | 4651 | ||
4414 | case TCP_LISTEN: | 4652 | case TCP_LISTEN: |
4415 | if(th->ack) | 4653 | if (th->ack) |
4416 | return 1; | 4654 | return 1; |
4417 | 4655 | ||
4418 | if(th->rst) | 4656 | if (th->rst) |
4419 | goto discard; | 4657 | goto discard; |
4420 | 4658 | ||
4421 | if(th->syn) { | 4659 | if (th->syn) { |
4422 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) | 4660 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) |
4423 | return 1; | 4661 | return 1; |
4424 | 4662 | ||
@@ -4452,7 +4690,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4452 | /* Do step6 onward by hand. */ | 4690 | /* Do step6 onward by hand. */ |
4453 | tcp_urg(sk, skb, th); | 4691 | tcp_urg(sk, skb, th); |
4454 | __kfree_skb(skb); | 4692 | __kfree_skb(skb); |
4455 | tcp_data_snd_check(sk, tp); | 4693 | tcp_data_snd_check(sk); |
4456 | return 0; | 4694 | return 0; |
4457 | } | 4695 | } |
4458 | 4696 | ||
@@ -4474,7 +4712,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4474 | } | 4712 | } |
4475 | 4713 | ||
4476 | /* step 2: check RST bit */ | 4714 | /* step 2: check RST bit */ |
4477 | if(th->rst) { | 4715 | if (th->rst) { |
4478 | tcp_reset(sk); | 4716 | tcp_reset(sk); |
4479 | goto discard; | 4717 | goto discard; |
4480 | } | 4718 | } |
@@ -4497,7 +4735,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4497 | if (th->ack) { | 4735 | if (th->ack) { |
4498 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); | 4736 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); |
4499 | 4737 | ||
4500 | switch(sk->sk_state) { | 4738 | switch (sk->sk_state) { |
4501 | case TCP_SYN_RECV: | 4739 | case TCP_SYN_RECV: |
4502 | if (acceptable) { | 4740 | if (acceptable) { |
4503 | tp->copied_seq = tp->rcv_nxt; | 4741 | tp->copied_seq = tp->rcv_nxt; |
@@ -4644,7 +4882,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4644 | 4882 | ||
4645 | /* tcp_data could move socket to TIME-WAIT */ | 4883 | /* tcp_data could move socket to TIME-WAIT */ |
4646 | if (sk->sk_state != TCP_CLOSE) { | 4884 | if (sk->sk_state != TCP_CLOSE) { |
4647 | tcp_data_snd_check(sk, tp); | 4885 | tcp_data_snd_check(sk); |
4648 | tcp_ack_snd_check(sk); | 4886 | tcp_ack_snd_check(sk); |
4649 | } | 4887 | } |
4650 | 4888 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0ba74bbe7d30..5a3e7f839fc5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly; | |||
88 | #define ICMP_MIN_LENGTH 8 | 88 | #define ICMP_MIN_LENGTH 8 |
89 | 89 | ||
90 | /* Socket used for sending RSTs */ | 90 | /* Socket used for sending RSTs */ |
91 | static struct socket *tcp_socket; | 91 | static struct socket *tcp_socket __read_mostly; |
92 | 92 | ||
93 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); | 93 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); |
94 | 94 | ||
@@ -125,10 +125,10 @@ void tcp_unhash(struct sock *sk) | |||
125 | 125 | ||
126 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) | 126 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) |
127 | { | 127 | { |
128 | return secure_tcp_sequence_number(skb->nh.iph->daddr, | 128 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, |
129 | skb->nh.iph->saddr, | 129 | ip_hdr(skb)->saddr, |
130 | skb->h.th->dest, | 130 | tcp_hdr(skb)->dest, |
131 | skb->h.th->source); | 131 | tcp_hdr(skb)->source); |
132 | } | 132 | } |
133 | 133 | ||
134 | int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | 134 | int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) |
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
149 | */ | 149 | */ |
150 | if (tcptw->tw_ts_recent_stamp && | 150 | if (tcptw->tw_ts_recent_stamp && |
151 | (twp == NULL || (sysctl_tcp_tw_reuse && | 151 | (twp == NULL || (sysctl_tcp_tw_reuse && |
152 | xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { | 152 | get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { |
153 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; | 153 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
154 | if (tp->write_seq == 0) | 154 | if (tp->write_seq == 0) |
155 | tp->write_seq = 1; | 155 | tp->write_seq = 1; |
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
224 | * when trying new connection. | 224 | * when trying new connection. |
225 | */ | 225 | */ |
226 | if (peer != NULL && | 226 | if (peer != NULL && |
227 | peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { | 227 | peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { |
228 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | 228 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; |
229 | tp->rx_opt.ts_recent = peer->tcp_ts; | 229 | tp->rx_opt.ts_recent = peer->tcp_ts; |
230 | } | 230 | } |
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
354 | struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); | 354 | struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); |
355 | struct tcp_sock *tp; | 355 | struct tcp_sock *tp; |
356 | struct inet_sock *inet; | 356 | struct inet_sock *inet; |
357 | int type = skb->h.icmph->type; | 357 | const int type = icmp_hdr(skb)->type; |
358 | int code = skb->h.icmph->code; | 358 | const int code = icmp_hdr(skb)->code; |
359 | struct sock *sk; | 359 | struct sock *sk; |
360 | __u32 seq; | 360 | __u32 seq; |
361 | int err; | 361 | int err; |
@@ -499,11 +499,12 @@ out: | |||
499 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | 499 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) |
500 | { | 500 | { |
501 | struct inet_sock *inet = inet_sk(sk); | 501 | struct inet_sock *inet = inet_sk(sk); |
502 | struct tcphdr *th = skb->h.th; | 502 | struct tcphdr *th = tcp_hdr(skb); |
503 | 503 | ||
504 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 504 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
505 | th->check = ~tcp_v4_check(len, inet->saddr, | 505 | th->check = ~tcp_v4_check(len, inet->saddr, |
506 | inet->daddr, 0); | 506 | inet->daddr, 0); |
507 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
507 | skb->csum_offset = offsetof(struct tcphdr, check); | 508 | skb->csum_offset = offsetof(struct tcphdr, check); |
508 | } else { | 509 | } else { |
509 | th->check = tcp_v4_check(len, inet->saddr, inet->daddr, | 510 | th->check = tcp_v4_check(len, inet->saddr, inet->daddr, |
@@ -515,17 +516,18 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | |||
515 | 516 | ||
516 | int tcp_v4_gso_send_check(struct sk_buff *skb) | 517 | int tcp_v4_gso_send_check(struct sk_buff *skb) |
517 | { | 518 | { |
518 | struct iphdr *iph; | 519 | const struct iphdr *iph; |
519 | struct tcphdr *th; | 520 | struct tcphdr *th; |
520 | 521 | ||
521 | if (!pskb_may_pull(skb, sizeof(*th))) | 522 | if (!pskb_may_pull(skb, sizeof(*th))) |
522 | return -EINVAL; | 523 | return -EINVAL; |
523 | 524 | ||
524 | iph = skb->nh.iph; | 525 | iph = ip_hdr(skb); |
525 | th = skb->h.th; | 526 | th = tcp_hdr(skb); |
526 | 527 | ||
527 | th->check = 0; | 528 | th->check = 0; |
528 | th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); | 529 | th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); |
530 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
529 | skb->csum_offset = offsetof(struct tcphdr, check); | 531 | skb->csum_offset = offsetof(struct tcphdr, check); |
530 | skb->ip_summed = CHECKSUM_PARTIAL; | 532 | skb->ip_summed = CHECKSUM_PARTIAL; |
531 | return 0; | 533 | return 0; |
@@ -546,7 +548,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) | |||
546 | 548 | ||
547 | static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | 549 | static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) |
548 | { | 550 | { |
549 | struct tcphdr *th = skb->h.th; | 551 | struct tcphdr *th = tcp_hdr(skb); |
550 | struct { | 552 | struct { |
551 | struct tcphdr th; | 553 | struct tcphdr th; |
552 | #ifdef CONFIG_TCP_MD5SIG | 554 | #ifdef CONFIG_TCP_MD5SIG |
@@ -585,7 +587,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
585 | arg.iov[0].iov_len = sizeof(rep.th); | 587 | arg.iov[0].iov_len = sizeof(rep.th); |
586 | 588 | ||
587 | #ifdef CONFIG_TCP_MD5SIG | 589 | #ifdef CONFIG_TCP_MD5SIG |
588 | key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL; | 590 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; |
589 | if (key) { | 591 | if (key) { |
590 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | | 592 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | |
591 | (TCPOPT_NOP << 16) | | 593 | (TCPOPT_NOP << 16) | |
@@ -597,14 +599,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
597 | 599 | ||
598 | tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], | 600 | tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], |
599 | key, | 601 | key, |
600 | skb->nh.iph->daddr, | 602 | ip_hdr(skb)->daddr, |
601 | skb->nh.iph->saddr, | 603 | ip_hdr(skb)->saddr, |
602 | &rep.th, IPPROTO_TCP, | 604 | &rep.th, IPPROTO_TCP, |
603 | arg.iov[0].iov_len); | 605 | arg.iov[0].iov_len); |
604 | } | 606 | } |
605 | #endif | 607 | #endif |
606 | arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, | 608 | arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, |
607 | skb->nh.iph->saddr, /* XXX */ | 609 | ip_hdr(skb)->saddr, /* XXX */ |
608 | sizeof(struct tcphdr), IPPROTO_TCP, 0); | 610 | sizeof(struct tcphdr), IPPROTO_TCP, 0); |
609 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; | 611 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
610 | 612 | ||
@@ -622,7 +624,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, | |||
622 | struct sk_buff *skb, u32 seq, u32 ack, | 624 | struct sk_buff *skb, u32 seq, u32 ack, |
623 | u32 win, u32 ts) | 625 | u32 win, u32 ts) |
624 | { | 626 | { |
625 | struct tcphdr *th = skb->h.th; | 627 | struct tcphdr *th = tcp_hdr(skb); |
626 | struct { | 628 | struct { |
627 | struct tcphdr th; | 629 | struct tcphdr th; |
628 | __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) | 630 | __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) |
@@ -670,7 +672,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, | |||
670 | * skb->sk) holds true, but we program defensively. | 672 | * skb->sk) holds true, but we program defensively. |
671 | */ | 673 | */ |
672 | if (!twsk && skb->sk) { | 674 | if (!twsk && skb->sk) { |
673 | key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr); | 675 | key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); |
674 | } else if (twsk && twsk->tw_md5_keylen) { | 676 | } else if (twsk && twsk->tw_md5_keylen) { |
675 | tw_key.key = twsk->tw_md5_key; | 677 | tw_key.key = twsk->tw_md5_key; |
676 | tw_key.keylen = twsk->tw_md5_keylen; | 678 | tw_key.keylen = twsk->tw_md5_keylen; |
@@ -690,14 +692,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, | |||
690 | 692 | ||
691 | tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], | 693 | tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], |
692 | key, | 694 | key, |
693 | skb->nh.iph->daddr, | 695 | ip_hdr(skb)->daddr, |
694 | skb->nh.iph->saddr, | 696 | ip_hdr(skb)->saddr, |
695 | &rep.th, IPPROTO_TCP, | 697 | &rep.th, IPPROTO_TCP, |
696 | arg.iov[0].iov_len); | 698 | arg.iov[0].iov_len); |
697 | } | 699 | } |
698 | #endif | 700 | #endif |
699 | arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, | 701 | arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, |
700 | skb->nh.iph->saddr, /* XXX */ | 702 | ip_hdr(skb)->saddr, /* XXX */ |
701 | arg.iov[0].iov_len, IPPROTO_TCP, 0); | 703 | arg.iov[0].iov_len, IPPROTO_TCP, 0); |
702 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; | 704 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
703 | 705 | ||
@@ -745,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
745 | skb = tcp_make_synack(sk, dst, req); | 747 | skb = tcp_make_synack(sk, dst, req); |
746 | 748 | ||
747 | if (skb) { | 749 | if (skb) { |
748 | struct tcphdr *th = skb->h.th; | 750 | struct tcphdr *th = tcp_hdr(skb); |
749 | 751 | ||
750 | th->check = tcp_v4_check(skb->len, | 752 | th->check = tcp_v4_check(skb->len, |
751 | ireq->loc_addr, | 753 | ireq->loc_addr, |
@@ -781,7 +783,7 @@ static void syn_flood_warning(struct sk_buff *skb) | |||
781 | warntime = jiffies; | 783 | warntime = jiffies; |
782 | printk(KERN_INFO | 784 | printk(KERN_INFO |
783 | "possible SYN flooding on port %d. Sending cookies.\n", | 785 | "possible SYN flooding on port %d. Sending cookies.\n", |
784 | ntohs(skb->h.th->dest)); | 786 | ntohs(tcp_hdr(skb)->dest)); |
785 | } | 787 | } |
786 | } | 788 | } |
787 | #endif | 789 | #endif |
@@ -1133,8 +1135,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | |||
1133 | */ | 1135 | */ |
1134 | __u8 *hash_location = NULL; | 1136 | __u8 *hash_location = NULL; |
1135 | struct tcp_md5sig_key *hash_expected; | 1137 | struct tcp_md5sig_key *hash_expected; |
1136 | struct iphdr *iph = skb->nh.iph; | 1138 | const struct iphdr *iph = ip_hdr(skb); |
1137 | struct tcphdr *th = skb->h.th; | 1139 | struct tcphdr *th = tcp_hdr(skb); |
1138 | int length = (th->doff << 2) - sizeof(struct tcphdr); | 1140 | int length = (th->doff << 2) - sizeof(struct tcphdr); |
1139 | int genhash; | 1141 | int genhash; |
1140 | unsigned char *ptr; | 1142 | unsigned char *ptr; |
@@ -1251,8 +1253,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1251 | struct inet_request_sock *ireq; | 1253 | struct inet_request_sock *ireq; |
1252 | struct tcp_options_received tmp_opt; | 1254 | struct tcp_options_received tmp_opt; |
1253 | struct request_sock *req; | 1255 | struct request_sock *req; |
1254 | __be32 saddr = skb->nh.iph->saddr; | 1256 | __be32 saddr = ip_hdr(skb)->saddr; |
1255 | __be32 daddr = skb->nh.iph->daddr; | 1257 | __be32 daddr = ip_hdr(skb)->daddr; |
1256 | __u32 isn = TCP_SKB_CB(skb)->when; | 1258 | __u32 isn = TCP_SKB_CB(skb)->when; |
1257 | struct dst_entry *dst = NULL; | 1259 | struct dst_entry *dst = NULL; |
1258 | #ifdef CONFIG_SYN_COOKIES | 1260 | #ifdef CONFIG_SYN_COOKIES |
@@ -1327,7 +1329,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1327 | ireq->rmt_addr = saddr; | 1329 | ireq->rmt_addr = saddr; |
1328 | ireq->opt = tcp_v4_save_options(sk, skb); | 1330 | ireq->opt = tcp_v4_save_options(sk, skb); |
1329 | if (!want_cookie) | 1331 | if (!want_cookie) |
1330 | TCP_ECN_create_request(req, skb->h.th); | 1332 | TCP_ECN_create_request(req, tcp_hdr(skb)); |
1331 | 1333 | ||
1332 | if (want_cookie) { | 1334 | if (want_cookie) { |
1333 | #ifdef CONFIG_SYN_COOKIES | 1335 | #ifdef CONFIG_SYN_COOKIES |
@@ -1351,7 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1351 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1353 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1352 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1354 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1353 | peer->v4daddr == saddr) { | 1355 | peer->v4daddr == saddr) { |
1354 | if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && | 1356 | if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && |
1355 | (s32)(peer->tcp_ts - req->ts_recent) > | 1357 | (s32)(peer->tcp_ts - req->ts_recent) > |
1356 | TCP_PAWS_WINDOW) { | 1358 | TCP_PAWS_WINDOW) { |
1357 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); | 1359 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); |
@@ -1375,7 +1377,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1375 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " | 1377 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " |
1376 | "request from %u.%u.%u.%u/%u\n", | 1378 | "request from %u.%u.%u.%u/%u\n", |
1377 | NIPQUAD(saddr), | 1379 | NIPQUAD(saddr), |
1378 | ntohs(skb->h.th->source)); | 1380 | ntohs(tcp_hdr(skb)->source)); |
1379 | dst_release(dst); | 1381 | dst_release(dst); |
1380 | goto drop_and_free; | 1382 | goto drop_and_free; |
1381 | } | 1383 | } |
@@ -1439,7 +1441,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1439 | newinet->opt = ireq->opt; | 1441 | newinet->opt = ireq->opt; |
1440 | ireq->opt = NULL; | 1442 | ireq->opt = NULL; |
1441 | newinet->mc_index = inet_iif(skb); | 1443 | newinet->mc_index = inet_iif(skb); |
1442 | newinet->mc_ttl = skb->nh.iph->ttl; | 1444 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
1443 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | 1445 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1444 | if (newinet->opt) | 1446 | if (newinet->opt) |
1445 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; | 1447 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; |
@@ -1481,8 +1483,8 @@ exit: | |||
1481 | 1483 | ||
1482 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | 1484 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) |
1483 | { | 1485 | { |
1484 | struct tcphdr *th = skb->h.th; | 1486 | struct tcphdr *th = tcp_hdr(skb); |
1485 | struct iphdr *iph = skb->nh.iph; | 1487 | const struct iphdr *iph = ip_hdr(skb); |
1486 | struct sock *nsk; | 1488 | struct sock *nsk; |
1487 | struct request_sock **prev; | 1489 | struct request_sock **prev; |
1488 | /* Find possible connection requests. */ | 1490 | /* Find possible connection requests. */ |
@@ -1491,9 +1493,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1491 | if (req) | 1493 | if (req) |
1492 | return tcp_check_req(sk, skb, req, prev); | 1494 | return tcp_check_req(sk, skb, req, prev); |
1493 | 1495 | ||
1494 | nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, | 1496 | nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, |
1495 | th->source, skb->nh.iph->daddr, | 1497 | iph->daddr, th->dest, inet_iif(skb)); |
1496 | th->dest, inet_iif(skb)); | ||
1497 | 1498 | ||
1498 | if (nsk) { | 1499 | if (nsk) { |
1499 | if (nsk->sk_state != TCP_TIME_WAIT) { | 1500 | if (nsk->sk_state != TCP_TIME_WAIT) { |
@@ -1513,15 +1514,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1513 | 1514 | ||
1514 | static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) | 1515 | static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) |
1515 | { | 1516 | { |
1517 | const struct iphdr *iph = ip_hdr(skb); | ||
1518 | |||
1516 | if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1519 | if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1517 | if (!tcp_v4_check(skb->len, skb->nh.iph->saddr, | 1520 | if (!tcp_v4_check(skb->len, iph->saddr, |
1518 | skb->nh.iph->daddr, skb->csum)) { | 1521 | iph->daddr, skb->csum)) { |
1519 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1522 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1520 | return 0; | 1523 | return 0; |
1521 | } | 1524 | } |
1522 | } | 1525 | } |
1523 | 1526 | ||
1524 | skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr, | 1527 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, |
1525 | skb->len, IPPROTO_TCP, 0); | 1528 | skb->len, IPPROTO_TCP, 0); |
1526 | 1529 | ||
1527 | if (skb->len <= 76) { | 1530 | if (skb->len <= 76) { |
@@ -1555,7 +1558,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1555 | 1558 | ||
1556 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1559 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
1557 | TCP_CHECK_TIMER(sk); | 1560 | TCP_CHECK_TIMER(sk); |
1558 | if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) { | 1561 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
1559 | rsk = sk; | 1562 | rsk = sk; |
1560 | goto reset; | 1563 | goto reset; |
1561 | } | 1564 | } |
@@ -1563,7 +1566,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1563 | return 0; | 1566 | return 0; |
1564 | } | 1567 | } |
1565 | 1568 | ||
1566 | if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb)) | 1569 | if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) |
1567 | goto csum_err; | 1570 | goto csum_err; |
1568 | 1571 | ||
1569 | if (sk->sk_state == TCP_LISTEN) { | 1572 | if (sk->sk_state == TCP_LISTEN) { |
@@ -1581,7 +1584,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1581 | } | 1584 | } |
1582 | 1585 | ||
1583 | TCP_CHECK_TIMER(sk); | 1586 | TCP_CHECK_TIMER(sk); |
1584 | if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) { | 1587 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { |
1585 | rsk = sk; | 1588 | rsk = sk; |
1586 | goto reset; | 1589 | goto reset; |
1587 | } | 1590 | } |
@@ -1610,6 +1613,7 @@ csum_err: | |||
1610 | 1613 | ||
1611 | int tcp_v4_rcv(struct sk_buff *skb) | 1614 | int tcp_v4_rcv(struct sk_buff *skb) |
1612 | { | 1615 | { |
1616 | const struct iphdr *iph; | ||
1613 | struct tcphdr *th; | 1617 | struct tcphdr *th; |
1614 | struct sock *sk; | 1618 | struct sock *sk; |
1615 | int ret; | 1619 | int ret; |
@@ -1623,7 +1627,7 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1623 | if (!pskb_may_pull(skb, sizeof(struct tcphdr))) | 1627 | if (!pskb_may_pull(skb, sizeof(struct tcphdr))) |
1624 | goto discard_it; | 1628 | goto discard_it; |
1625 | 1629 | ||
1626 | th = skb->h.th; | 1630 | th = tcp_hdr(skb); |
1627 | 1631 | ||
1628 | if (th->doff < sizeof(struct tcphdr) / 4) | 1632 | if (th->doff < sizeof(struct tcphdr) / 4) |
1629 | goto bad_packet; | 1633 | goto bad_packet; |
@@ -1634,23 +1638,21 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1634 | * Packet length and doff are validated by header prediction, | 1638 | * Packet length and doff are validated by header prediction, |
1635 | * provided case of th->doff==0 is eliminated. | 1639 | * provided case of th->doff==0 is eliminated. |
1636 | * So, we defer the checks. */ | 1640 | * So, we defer the checks. */ |
1637 | if ((skb->ip_summed != CHECKSUM_UNNECESSARY && | 1641 | if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) |
1638 | tcp_v4_checksum_init(skb))) | ||
1639 | goto bad_packet; | 1642 | goto bad_packet; |
1640 | 1643 | ||
1641 | th = skb->h.th; | 1644 | th = tcp_hdr(skb); |
1645 | iph = ip_hdr(skb); | ||
1642 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); | 1646 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); |
1643 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + | 1647 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + |
1644 | skb->len - th->doff * 4); | 1648 | skb->len - th->doff * 4); |
1645 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); | 1649 | TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); |
1646 | TCP_SKB_CB(skb)->when = 0; | 1650 | TCP_SKB_CB(skb)->when = 0; |
1647 | TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; | 1651 | TCP_SKB_CB(skb)->flags = iph->tos; |
1648 | TCP_SKB_CB(skb)->sacked = 0; | 1652 | TCP_SKB_CB(skb)->sacked = 0; |
1649 | 1653 | ||
1650 | sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, | 1654 | sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, |
1651 | skb->nh.iph->daddr, th->dest, | 1655 | iph->daddr, th->dest, inet_iif(skb)); |
1652 | inet_iif(skb)); | ||
1653 | |||
1654 | if (!sk) | 1656 | if (!sk) |
1655 | goto no_tcp_socket; | 1657 | goto no_tcp_socket; |
1656 | 1658 | ||
@@ -1724,8 +1726,7 @@ do_time_wait: | |||
1724 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { | 1726 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { |
1725 | case TCP_TW_SYN: { | 1727 | case TCP_TW_SYN: { |
1726 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, | 1728 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, |
1727 | skb->nh.iph->daddr, | 1729 | iph->daddr, th->dest, |
1728 | th->dest, | ||
1729 | inet_iif(skb)); | 1730 | inet_iif(skb)); |
1730 | if (sk2) { | 1731 | if (sk2) { |
1731 | inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); | 1732 | inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); |
@@ -1770,7 +1771,7 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1770 | 1771 | ||
1771 | if (peer) { | 1772 | if (peer) { |
1772 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | 1773 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || |
1773 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && | 1774 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && |
1774 | peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { | 1775 | peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { |
1775 | peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; | 1776 | peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; |
1776 | peer->tcp_ts = tp->rx_opt.ts_recent; | 1777 | peer->tcp_ts = tp->rx_opt.ts_recent; |
@@ -1791,7 +1792,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | |||
1791 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 1792 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
1792 | 1793 | ||
1793 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | 1794 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || |
1794 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && | 1795 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && |
1795 | peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { | 1796 | peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { |
1796 | peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; | 1797 | peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; |
1797 | peer->tcp_ts = tcptw->tw_ts_recent; | 1798 | peer->tcp_ts = tcptw->tw_ts_recent; |
@@ -1890,7 +1891,7 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
1890 | tcp_cleanup_congestion_control(sk); | 1891 | tcp_cleanup_congestion_control(sk); |
1891 | 1892 | ||
1892 | /* Cleanup up the write buffer. */ | 1893 | /* Cleanup up the write buffer. */ |
1893 | sk_stream_writequeue_purge(sk); | 1894 | tcp_write_queue_purge(sk); |
1894 | 1895 | ||
1895 | /* Cleans up our, hopefully empty, out_of_order_queue. */ | 1896 | /* Cleans up our, hopefully empty, out_of_order_queue. */ |
1896 | __skb_queue_purge(&tp->out_of_order_queue); | 1897 | __skb_queue_purge(&tp->out_of_order_queue); |
@@ -2293,13 +2294,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req, | |||
2293 | req); | 2294 | req); |
2294 | } | 2295 | } |
2295 | 2296 | ||
2296 | static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | 2297 | static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i) |
2297 | { | 2298 | { |
2298 | int timer_active; | 2299 | int timer_active; |
2299 | unsigned long timer_expires; | 2300 | unsigned long timer_expires; |
2300 | struct tcp_sock *tp = tcp_sk(sp); | 2301 | struct tcp_sock *tp = tcp_sk(sk); |
2301 | const struct inet_connection_sock *icsk = inet_csk(sp); | 2302 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2302 | struct inet_sock *inet = inet_sk(sp); | 2303 | struct inet_sock *inet = inet_sk(sk); |
2303 | __be32 dest = inet->daddr; | 2304 | __be32 dest = inet->daddr; |
2304 | __be32 src = inet->rcv_saddr; | 2305 | __be32 src = inet->rcv_saddr; |
2305 | __u16 destp = ntohs(inet->dport); | 2306 | __u16 destp = ntohs(inet->dport); |
@@ -2311,9 +2312,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2311 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { | 2312 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { |
2312 | timer_active = 4; | 2313 | timer_active = 4; |
2313 | timer_expires = icsk->icsk_timeout; | 2314 | timer_expires = icsk->icsk_timeout; |
2314 | } else if (timer_pending(&sp->sk_timer)) { | 2315 | } else if (timer_pending(&sk->sk_timer)) { |
2315 | timer_active = 2; | 2316 | timer_active = 2; |
2316 | timer_expires = sp->sk_timer.expires; | 2317 | timer_expires = sk->sk_timer.expires; |
2317 | } else { | 2318 | } else { |
2318 | timer_active = 0; | 2319 | timer_active = 0; |
2319 | timer_expires = jiffies; | 2320 | timer_expires = jiffies; |
@@ -2321,17 +2322,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2321 | 2322 | ||
2322 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " | 2323 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " |
2323 | "%08X %5d %8d %lu %d %p %u %u %u %u %d", | 2324 | "%08X %5d %8d %lu %d %p %u %u %u %u %d", |
2324 | i, src, srcp, dest, destp, sp->sk_state, | 2325 | i, src, srcp, dest, destp, sk->sk_state, |
2325 | tp->write_seq - tp->snd_una, | 2326 | tp->write_seq - tp->snd_una, |
2326 | sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog : | 2327 | sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : |
2327 | (tp->rcv_nxt - tp->copied_seq), | 2328 | (tp->rcv_nxt - tp->copied_seq), |
2328 | timer_active, | 2329 | timer_active, |
2329 | jiffies_to_clock_t(timer_expires - jiffies), | 2330 | jiffies_to_clock_t(timer_expires - jiffies), |
2330 | icsk->icsk_retransmits, | 2331 | icsk->icsk_retransmits, |
2331 | sock_i_uid(sp), | 2332 | sock_i_uid(sk), |
2332 | icsk->icsk_probes_out, | 2333 | icsk->icsk_probes_out, |
2333 | sock_i_ino(sp), | 2334 | sock_i_ino(sk), |
2334 | atomic_read(&sp->sk_refcnt), sp, | 2335 | atomic_read(&sk->sk_refcnt), sk, |
2335 | icsk->icsk_rto, | 2336 | icsk->icsk_rto, |
2336 | icsk->icsk_ack.ato, | 2337 | icsk->icsk_ack.ato, |
2337 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, | 2338 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index f0ebaf0e21cb..43294ad9f63e 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) | |||
218 | * 3. calc smoothed OWD (SOWD). | 218 | * 3. calc smoothed OWD (SOWD). |
219 | * Most ideas come from the original TCP-LP implementation. | 219 | * Most ideas come from the original TCP-LP implementation. |
220 | */ | 220 | */ |
221 | static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) | 221 | static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) |
222 | { | 222 | { |
223 | struct lp *lp = inet_csk_ca(sk); | 223 | struct lp *lp = inet_csk_ca(sk); |
224 | s64 mowd = tcp_lp_owd_calculator(sk); | 224 | s64 mowd = tcp_lp_owd_calculator(sk); |
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) | |||
261 | * newReno in increase case. | 261 | * newReno in increase case. |
262 | * We work it out by following the idea from TCP-LP's paper directly | 262 | * We work it out by following the idea from TCP-LP's paper directly |
263 | */ | 263 | */ |
264 | static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) | 264 | static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) |
265 | { | 265 | { |
266 | struct tcp_sock *tp = tcp_sk(sk); | 266 | struct tcp_sock *tp = tcp_sk(sk); |
267 | struct lp *lp = inet_csk_ca(sk); | 267 | struct lp *lp = inet_csk_ca(sk); |
268 | 268 | ||
269 | tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); | ||
270 | |||
269 | /* calc inference */ | 271 | /* calc inference */ |
270 | if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) | 272 | if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) |
271 | lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); | 273 | lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); |
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) | |||
312 | } | 314 | } |
313 | 315 | ||
314 | static struct tcp_congestion_ops tcp_lp = { | 316 | static struct tcp_congestion_ops tcp_lp = { |
317 | .flags = TCP_CONG_RTT_STAMP, | ||
315 | .init = tcp_lp_init, | 318 | .init = tcp_lp_init, |
316 | .ssthresh = tcp_reno_ssthresh, | 319 | .ssthresh = tcp_reno_ssthresh, |
317 | .cong_avoid = tcp_lp_cong_avoid, | 320 | .cong_avoid = tcp_lp_cong_avoid, |
318 | .min_cwnd = tcp_reno_min_cwnd, | 321 | .min_cwnd = tcp_reno_min_cwnd, |
319 | .rtt_sample = tcp_lp_rtt_sample, | ||
320 | .pkts_acked = tcp_lp_pkts_acked, | 322 | .pkts_acked = tcp_lp_pkts_acked, |
321 | 323 | ||
322 | .owner = THIS_MODULE, | 324 | .owner = THIS_MODULE, |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6b5c64f3c925..a12b08fca5ad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -149,7 +149,7 @@ kill_with_rst: | |||
149 | tw->tw_substate = TCP_TIME_WAIT; | 149 | tw->tw_substate = TCP_TIME_WAIT; |
150 | tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 150 | tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
151 | if (tmp_opt.saw_tstamp) { | 151 | if (tmp_opt.saw_tstamp) { |
152 | tcptw->tw_ts_recent_stamp = xtime.tv_sec; | 152 | tcptw->tw_ts_recent_stamp = get_seconds(); |
153 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; | 153 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
154 | } | 154 | } |
155 | 155 | ||
@@ -208,7 +208,7 @@ kill: | |||
208 | 208 | ||
209 | if (tmp_opt.saw_tstamp) { | 209 | if (tmp_opt.saw_tstamp) { |
210 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; | 210 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
211 | tcptw->tw_ts_recent_stamp = xtime.tv_sec; | 211 | tcptw->tw_ts_recent_stamp = get_seconds(); |
212 | } | 212 | } |
213 | 213 | ||
214 | inet_twsk_put(tw); | 214 | inet_twsk_put(tw); |
@@ -246,7 +246,7 @@ kill: | |||
246 | if (paws_reject) | 246 | if (paws_reject) |
247 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); | 247 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); |
248 | 248 | ||
249 | if(!th->rst) { | 249 | if (!th->rst) { |
250 | /* In this case we must reset the TIMEWAIT timer. | 250 | /* In this case we must reset the TIMEWAIT timer. |
251 | * | 251 | * |
252 | * If it is ACKless SYN it may be both old duplicate | 252 | * If it is ACKless SYN it may be both old duplicate |
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
324 | if (tcp_alloc_md5sig_pool() == NULL) | 324 | if (tcp_alloc_md5sig_pool() == NULL) |
325 | BUG(); | 325 | BUG(); |
326 | } | 326 | } |
327 | } while(0); | 327 | } while (0); |
328 | #endif | 328 | #endif |
329 | 329 | ||
330 | /* Linkage updates. */ | 330 | /* Linkage updates. */ |
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
387 | /* Now setup tcp_sock */ | 387 | /* Now setup tcp_sock */ |
388 | newtp = tcp_sk(newsk); | 388 | newtp = tcp_sk(newsk); |
389 | newtp->pred_flags = 0; | 389 | newtp->pred_flags = 0; |
390 | newtp->rcv_nxt = treq->rcv_isn + 1; | 390 | newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; |
391 | newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1; | 391 | newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; |
392 | 392 | ||
393 | tcp_prequeue_init(newtp); | 393 | tcp_prequeue_init(newtp); |
394 | 394 | ||
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
422 | tcp_set_ca_state(newsk, TCP_CA_Open); | 422 | tcp_set_ca_state(newsk, TCP_CA_Open); |
423 | tcp_init_xmit_timers(newsk); | 423 | tcp_init_xmit_timers(newsk); |
424 | skb_queue_head_init(&newtp->out_of_order_queue); | 424 | skb_queue_head_init(&newtp->out_of_order_queue); |
425 | newtp->rcv_wup = treq->rcv_isn + 1; | ||
426 | newtp->write_seq = treq->snt_isn + 1; | 425 | newtp->write_seq = treq->snt_isn + 1; |
427 | newtp->pushed_seq = newtp->write_seq; | 426 | newtp->pushed_seq = newtp->write_seq; |
428 | newtp->copied_seq = treq->rcv_isn + 1; | ||
429 | 427 | ||
430 | newtp->rx_opt.saw_tstamp = 0; | 428 | newtp->rx_opt.saw_tstamp = 0; |
431 | 429 | ||
@@ -440,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
440 | keepalive_time_when(newtp)); | 438 | keepalive_time_when(newtp)); |
441 | 439 | ||
442 | newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; | 440 | newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; |
443 | if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { | 441 | if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { |
444 | if (sysctl_tcp_fack) | 442 | if (sysctl_tcp_fack) |
445 | newtp->rx_opt.sack_ok |= 2; | 443 | newtp->rx_opt.sack_ok |= 2; |
446 | } | 444 | } |
@@ -455,12 +453,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
455 | newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; | 453 | newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; |
456 | newtp->window_clamp = min(newtp->window_clamp, 65535U); | 454 | newtp->window_clamp = min(newtp->window_clamp, 65535U); |
457 | } | 455 | } |
458 | newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale; | 456 | newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) << |
457 | newtp->rx_opt.snd_wscale); | ||
459 | newtp->max_window = newtp->snd_wnd; | 458 | newtp->max_window = newtp->snd_wnd; |
460 | 459 | ||
461 | if (newtp->rx_opt.tstamp_ok) { | 460 | if (newtp->rx_opt.tstamp_ok) { |
462 | newtp->rx_opt.ts_recent = req->ts_recent; | 461 | newtp->rx_opt.ts_recent = req->ts_recent; |
463 | newtp->rx_opt.ts_recent_stamp = xtime.tv_sec; | 462 | newtp->rx_opt.ts_recent_stamp = get_seconds(); |
464 | newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; | 463 | newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; |
465 | } else { | 464 | } else { |
466 | newtp->rx_opt.ts_recent_stamp = 0; | 465 | newtp->rx_opt.ts_recent_stamp = 0; |
@@ -490,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
490 | struct request_sock *req, | 489 | struct request_sock *req, |
491 | struct request_sock **prev) | 490 | struct request_sock **prev) |
492 | { | 491 | { |
493 | struct tcphdr *th = skb->h.th; | 492 | const struct tcphdr *th = tcp_hdr(skb); |
494 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 493 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
495 | int paws_reject = 0; | 494 | int paws_reject = 0; |
496 | struct tcp_options_received tmp_opt; | 495 | struct tcp_options_received tmp_opt; |
@@ -506,7 +505,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
506 | * it can be estimated (approximately) | 505 | * it can be estimated (approximately) |
507 | * from another data. | 506 | * from another data. |
508 | */ | 507 | */ |
509 | tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); | 508 | tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); |
510 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 509 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); |
511 | } | 510 | } |
512 | } | 511 | } |
@@ -712,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child, | |||
712 | int state = child->sk_state; | 711 | int state = child->sk_state; |
713 | 712 | ||
714 | if (!sock_owned_by_user(child)) { | 713 | if (!sock_owned_by_user(child)) { |
715 | ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len); | 714 | ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), |
716 | 715 | skb->len); | |
717 | /* Wakeup parent, send SIGIO */ | 716 | /* Wakeup parent, send SIGIO */ |
718 | if (state == TCP_SYN_RECV && child->sk_state != state) | 717 | if (state == TCP_SYN_RECV && child->sk_state != state) |
719 | parent->sk_data_ready(parent, 0); | 718 | parent->sk_data_ready(parent, 0); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3c24881f2a65..e70a6840cb64 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -62,14 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512; | |||
62 | /* By default, RFC2861 behavior. */ | 62 | /* By default, RFC2861 behavior. */ |
63 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 63 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
64 | 64 | ||
65 | static void update_send_head(struct sock *sk, struct tcp_sock *tp, | 65 | static void update_send_head(struct sock *sk, struct sk_buff *skb) |
66 | struct sk_buff *skb) | ||
67 | { | 66 | { |
68 | sk->sk_send_head = skb->next; | 67 | struct tcp_sock *tp = tcp_sk(sk); |
69 | if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) | 68 | |
70 | sk->sk_send_head = NULL; | 69 | tcp_advance_send_head(sk, skb); |
71 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; | 70 | tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
72 | tcp_packets_out_inc(sk, tp, skb); | 71 | tcp_packets_out_inc(sk, skb); |
73 | } | 72 | } |
74 | 73 | ||
75 | /* SND.NXT, if window was not shrunk. | 74 | /* SND.NXT, if window was not shrunk. |
@@ -78,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp, | |||
78 | * Anything in between SND.UNA...SND.UNA+SND.WND also can be already | 77 | * Anything in between SND.UNA...SND.UNA+SND.WND also can be already |
79 | * invalid. OK, let's make this for now: | 78 | * invalid. OK, let's make this for now: |
80 | */ | 79 | */ |
81 | static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp) | 80 | static inline __u32 tcp_acceptable_seq(struct sock *sk) |
82 | { | 81 | { |
82 | struct tcp_sock *tp = tcp_sk(sk); | ||
83 | |||
83 | if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) | 84 | if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) |
84 | return tp->snd_nxt; | 85 | return tp->snd_nxt; |
85 | else | 86 | else |
@@ -238,7 +239,7 @@ static u16 tcp_select_window(struct sock *sk) | |||
238 | u32 new_win = __tcp_select_window(sk); | 239 | u32 new_win = __tcp_select_window(sk); |
239 | 240 | ||
240 | /* Never shrink the offered window */ | 241 | /* Never shrink the offered window */ |
241 | if(new_win < cur_win) { | 242 | if (new_win < cur_win) { |
242 | /* Danger Will Robinson! | 243 | /* Danger Will Robinson! |
243 | * Don't update rcv_wup/rcv_wnd here or else | 244 | * Don't update rcv_wup/rcv_wnd here or else |
244 | * we will not be able to advertise a zero | 245 | * we will not be able to advertise a zero |
@@ -289,10 +290,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, | |||
289 | (TCPOPT_SACK << 8) | | 290 | (TCPOPT_SACK << 8) | |
290 | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * | 291 | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * |
291 | TCPOLEN_SACK_PERBLOCK))); | 292 | TCPOLEN_SACK_PERBLOCK))); |
292 | for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { | 293 | |
294 | for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { | ||
293 | *ptr++ = htonl(sp[this_sack].start_seq); | 295 | *ptr++ = htonl(sp[this_sack].start_seq); |
294 | *ptr++ = htonl(sp[this_sack].end_seq); | 296 | *ptr++ = htonl(sp[this_sack].end_seq); |
295 | } | 297 | } |
298 | |||
296 | if (tp->rx_opt.dsack) { | 299 | if (tp->rx_opt.dsack) { |
297 | tp->rx_opt.dsack = 0; | 300 | tp->rx_opt.dsack = 0; |
298 | tp->rx_opt.eff_sacks--; | 301 | tp->rx_opt.eff_sacks--; |
@@ -337,7 +340,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, | |||
337 | */ | 340 | */ |
338 | *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); | 341 | *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); |
339 | if (ts) { | 342 | if (ts) { |
340 | if(sack) | 343 | if (sack) |
341 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | 344 | *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | |
342 | (TCPOLEN_SACK_PERM << 16) | | 345 | (TCPOLEN_SACK_PERM << 16) | |
343 | (TCPOPT_TIMESTAMP << 8) | | 346 | (TCPOPT_TIMESTAMP << 8) | |
@@ -349,7 +352,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, | |||
349 | TCPOLEN_TIMESTAMP); | 352 | TCPOLEN_TIMESTAMP); |
350 | *ptr++ = htonl(tstamp); /* TSVAL */ | 353 | *ptr++ = htonl(tstamp); /* TSVAL */ |
351 | *ptr++ = htonl(ts_recent); /* TSECR */ | 354 | *ptr++ = htonl(ts_recent); /* TSECR */ |
352 | } else if(sack) | 355 | } else if (sack) |
353 | *ptr++ = htonl((TCPOPT_NOP << 24) | | 356 | *ptr++ = htonl((TCPOPT_NOP << 24) | |
354 | (TCPOPT_NOP << 16) | | 357 | (TCPOPT_NOP << 16) | |
355 | (TCPOPT_SACK_PERM << 8) | | 358 | (TCPOPT_SACK_PERM << 8) | |
@@ -406,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
406 | /* If congestion control is doing timestamping, we must | 409 | /* If congestion control is doing timestamping, we must |
407 | * take such a timestamp before we potentially clone/copy. | 410 | * take such a timestamp before we potentially clone/copy. |
408 | */ | 411 | */ |
409 | if (icsk->icsk_ca_ops->rtt_sample) | 412 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) |
410 | __net_timestamp(skb); | 413 | __net_timestamp(skb); |
411 | 414 | ||
412 | if (likely(clone_it)) { | 415 | if (likely(clone_it)) { |
@@ -430,7 +433,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
430 | sysctl_flags = 0; | 433 | sysctl_flags = 0; |
431 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 434 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { |
432 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; | 435 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; |
433 | if(sysctl_tcp_timestamps) { | 436 | if (sysctl_tcp_timestamps) { |
434 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; | 437 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; |
435 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; | 438 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; |
436 | } | 439 | } |
@@ -465,11 +468,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
465 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | 468 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; |
466 | #endif | 469 | #endif |
467 | 470 | ||
468 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | 471 | skb_push(skb, tcp_header_size); |
469 | skb->h.th = th; | 472 | skb_reset_transport_header(skb); |
470 | skb_set_owner_w(skb, sk); | 473 | skb_set_owner_w(skb, sk); |
471 | 474 | ||
472 | /* Build TCP header and checksum it. */ | 475 | /* Build TCP header and checksum it. */ |
476 | th = tcp_hdr(skb); | ||
473 | th->source = inet->sport; | 477 | th->source = inet->sport; |
474 | th->dest = inet->dport; | 478 | th->dest = inet->dport; |
475 | th->seq = htonl(tcb->seq); | 479 | th->seq = htonl(tcb->seq); |
@@ -515,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
515 | md5 ? &md5_hash_location : | 519 | md5 ? &md5_hash_location : |
516 | #endif | 520 | #endif |
517 | NULL); | 521 | NULL); |
518 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | 522 | TCP_ECN_send(sk, skb, tcp_header_size); |
519 | } | 523 | } |
520 | 524 | ||
521 | #ifdef CONFIG_TCP_MD5SIG | 525 | #ifdef CONFIG_TCP_MD5SIG |
@@ -524,7 +528,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
524 | tp->af_specific->calc_md5_hash(md5_hash_location, | 528 | tp->af_specific->calc_md5_hash(md5_hash_location, |
525 | md5, | 529 | md5, |
526 | sk, NULL, NULL, | 530 | sk, NULL, NULL, |
527 | skb->h.th, | 531 | tcp_hdr(skb), |
528 | sk->sk_protocol, | 532 | sk->sk_protocol, |
529 | skb->len); | 533 | skb->len); |
530 | } | 534 | } |
@@ -545,7 +549,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
545 | if (likely(err <= 0)) | 549 | if (likely(err <= 0)) |
546 | return err; | 550 | return err; |
547 | 551 | ||
548 | tcp_enter_cwr(sk); | 552 | tcp_enter_cwr(sk, 1); |
549 | 553 | ||
550 | return net_xmit_eval(err); | 554 | return net_xmit_eval(err); |
551 | 555 | ||
@@ -567,12 +571,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
567 | /* Advance write_seq and place onto the write_queue. */ | 571 | /* Advance write_seq and place onto the write_queue. */ |
568 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; | 572 | tp->write_seq = TCP_SKB_CB(skb)->end_seq; |
569 | skb_header_release(skb); | 573 | skb_header_release(skb); |
570 | __skb_queue_tail(&sk->sk_write_queue, skb); | 574 | tcp_add_write_queue_tail(sk, skb); |
571 | sk_charge_skb(sk, skb); | 575 | sk_charge_skb(sk, skb); |
572 | |||
573 | /* Queue it, remembering where we must start sending. */ | ||
574 | if (sk->sk_send_head == NULL) | ||
575 | sk->sk_send_head = skb; | ||
576 | } | 576 | } |
577 | 577 | ||
578 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) | 578 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) |
@@ -705,7 +705,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
705 | 705 | ||
706 | /* Link BUFF into the send queue. */ | 706 | /* Link BUFF into the send queue. */ |
707 | skb_header_release(buff); | 707 | skb_header_release(buff); |
708 | __skb_append(skb, buff, &sk->sk_write_queue); | 708 | tcp_insert_write_queue_after(skb, buff, sk); |
709 | 709 | ||
710 | return 0; | 710 | return 0; |
711 | } | 711 | } |
@@ -736,7 +736,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
736 | } | 736 | } |
737 | skb_shinfo(skb)->nr_frags = k; | 737 | skb_shinfo(skb)->nr_frags = k; |
738 | 738 | ||
739 | skb->tail = skb->data; | 739 | skb_reset_tail_pointer(skb); |
740 | skb->data_len -= len; | 740 | skb->data_len -= len; |
741 | skb->len = skb->data_len; | 741 | skb->len = skb->data_len; |
742 | } | 742 | } |
@@ -930,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
930 | 930 | ||
931 | /* Congestion window validation. (RFC2861) */ | 931 | /* Congestion window validation. (RFC2861) */ |
932 | 932 | ||
933 | static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) | 933 | static void tcp_cwnd_validate(struct sock *sk) |
934 | { | 934 | { |
935 | struct tcp_sock *tp = tcp_sk(sk); | ||
935 | __u32 packets_out = tp->packets_out; | 936 | __u32 packets_out = tp->packets_out; |
936 | 937 | ||
937 | if (packets_out >= tp->snd_cwnd) { | 938 | if (packets_out >= tp->snd_cwnd) { |
@@ -1056,7 +1057,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns | |||
1056 | return !after(end_seq, tp->snd_una + tp->snd_wnd); | 1057 | return !after(end_seq, tp->snd_una + tp->snd_wnd); |
1057 | } | 1058 | } |
1058 | 1059 | ||
1059 | /* This checks if the data bearing packet SKB (usually sk->sk_send_head) | 1060 | /* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) |
1060 | * should be put on the wire right now. If so, it returns the number of | 1061 | * should be put on the wire right now. If so, it returns the number of |
1061 | * packets allowed by the congestion window. | 1062 | * packets allowed by the congestion window. |
1062 | */ | 1063 | */ |
@@ -1079,15 +1080,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, | |||
1079 | return cwnd_quota; | 1080 | return cwnd_quota; |
1080 | } | 1081 | } |
1081 | 1082 | ||
1082 | static inline int tcp_skb_is_last(const struct sock *sk, | 1083 | int tcp_may_send_now(struct sock *sk) |
1083 | const struct sk_buff *skb) | ||
1084 | { | ||
1085 | return skb->next == (struct sk_buff *)&sk->sk_write_queue; | ||
1086 | } | ||
1087 | |||
1088 | int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) | ||
1089 | { | 1084 | { |
1090 | struct sk_buff *skb = sk->sk_send_head; | 1085 | struct tcp_sock *tp = tcp_sk(sk); |
1086 | struct sk_buff *skb = tcp_send_head(sk); | ||
1091 | 1087 | ||
1092 | return (skb && | 1088 | return (skb && |
1093 | tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), | 1089 | tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), |
@@ -1143,7 +1139,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1143 | 1139 | ||
1144 | /* Link BUFF into the send queue. */ | 1140 | /* Link BUFF into the send queue. */ |
1145 | skb_header_release(buff); | 1141 | skb_header_release(buff); |
1146 | __skb_append(skb, buff, &sk->sk_write_queue); | 1142 | tcp_insert_write_queue_after(skb, buff, sk); |
1147 | 1143 | ||
1148 | return 0; | 1144 | return 0; |
1149 | } | 1145 | } |
@@ -1153,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1153 | * | 1149 | * |
1154 | * This algorithm is from John Heffner. | 1150 | * This algorithm is from John Heffner. |
1155 | */ | 1151 | */ |
1156 | static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) | 1152 | static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) |
1157 | { | 1153 | { |
1154 | struct tcp_sock *tp = tcp_sk(sk); | ||
1158 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1155 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1159 | u32 send_win, cong_win, limit, in_flight; | 1156 | u32 send_win, cong_win, limit, in_flight; |
1160 | 1157 | ||
@@ -1249,10 +1246,10 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1249 | 1246 | ||
1250 | /* Have enough data in the send queue to probe? */ | 1247 | /* Have enough data in the send queue to probe? */ |
1251 | len = 0; | 1248 | len = 0; |
1252 | if ((skb = sk->sk_send_head) == NULL) | 1249 | if ((skb = tcp_send_head(sk)) == NULL) |
1253 | return -1; | 1250 | return -1; |
1254 | while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb)) | 1251 | while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb)) |
1255 | skb = skb->next; | 1252 | skb = tcp_write_queue_next(sk, skb); |
1256 | if (len < probe_size) | 1253 | if (len < probe_size) |
1257 | return -1; | 1254 | return -1; |
1258 | 1255 | ||
@@ -1279,9 +1276,9 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1279 | return -1; | 1276 | return -1; |
1280 | sk_charge_skb(sk, nskb); | 1277 | sk_charge_skb(sk, nskb); |
1281 | 1278 | ||
1282 | skb = sk->sk_send_head; | 1279 | skb = tcp_send_head(sk); |
1283 | __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue); | 1280 | tcp_insert_write_queue_before(nskb, skb, sk); |
1284 | sk->sk_send_head = nskb; | 1281 | tcp_advance_send_head(sk, skb); |
1285 | 1282 | ||
1286 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; | 1283 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; |
1287 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; | 1284 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; |
@@ -1292,7 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1292 | 1289 | ||
1293 | len = 0; | 1290 | len = 0; |
1294 | while (len < probe_size) { | 1291 | while (len < probe_size) { |
1295 | next = skb->next; | 1292 | next = tcp_write_queue_next(sk, skb); |
1296 | 1293 | ||
1297 | copy = min_t(int, skb->len, probe_size - len); | 1294 | copy = min_t(int, skb->len, probe_size - len); |
1298 | if (nskb->ip_summed) | 1295 | if (nskb->ip_summed) |
@@ -1305,7 +1302,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1305 | /* We've eaten all the data from this skb. | 1302 | /* We've eaten all the data from this skb. |
1306 | * Throw it away. */ | 1303 | * Throw it away. */ |
1307 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; | 1304 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; |
1308 | __skb_unlink(skb, &sk->sk_write_queue); | 1305 | tcp_unlink_write_queue(skb, sk); |
1309 | sk_stream_free_skb(sk, skb); | 1306 | sk_stream_free_skb(sk, skb); |
1310 | } else { | 1307 | } else { |
1311 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & | 1308 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & |
@@ -1333,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1333 | /* Decrement cwnd here because we are sending | 1330 | /* Decrement cwnd here because we are sending |
1334 | * effectively two packets. */ | 1331 | * effectively two packets. */ |
1335 | tp->snd_cwnd--; | 1332 | tp->snd_cwnd--; |
1336 | update_send_head(sk, tp, nskb); | 1333 | update_send_head(sk, nskb); |
1337 | 1334 | ||
1338 | icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); | 1335 | icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); |
1339 | tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; | 1336 | tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; |
@@ -1377,7 +1374,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1377 | sent_pkts = 1; | 1374 | sent_pkts = 1; |
1378 | } | 1375 | } |
1379 | 1376 | ||
1380 | while ((skb = sk->sk_send_head)) { | 1377 | while ((skb = tcp_send_head(sk))) { |
1381 | unsigned int limit; | 1378 | unsigned int limit; |
1382 | 1379 | ||
1383 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1380 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
@@ -1396,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1396 | nonagle : TCP_NAGLE_PUSH)))) | 1393 | nonagle : TCP_NAGLE_PUSH)))) |
1397 | break; | 1394 | break; |
1398 | } else { | 1395 | } else { |
1399 | if (tcp_tso_should_defer(sk, tp, skb)) | 1396 | if (tcp_tso_should_defer(sk, skb)) |
1400 | break; | 1397 | break; |
1401 | } | 1398 | } |
1402 | 1399 | ||
@@ -1425,31 +1422,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1425 | /* Advance the send_head. This one is sent out. | 1422 | /* Advance the send_head. This one is sent out. |
1426 | * This call will increment packets_out. | 1423 | * This call will increment packets_out. |
1427 | */ | 1424 | */ |
1428 | update_send_head(sk, tp, skb); | 1425 | update_send_head(sk, skb); |
1429 | 1426 | ||
1430 | tcp_minshall_update(tp, mss_now, skb); | 1427 | tcp_minshall_update(tp, mss_now, skb); |
1431 | sent_pkts++; | 1428 | sent_pkts++; |
1432 | } | 1429 | } |
1433 | 1430 | ||
1434 | if (likely(sent_pkts)) { | 1431 | if (likely(sent_pkts)) { |
1435 | tcp_cwnd_validate(sk, tp); | 1432 | tcp_cwnd_validate(sk); |
1436 | return 0; | 1433 | return 0; |
1437 | } | 1434 | } |
1438 | return !tp->packets_out && sk->sk_send_head; | 1435 | return !tp->packets_out && tcp_send_head(sk); |
1439 | } | 1436 | } |
1440 | 1437 | ||
1441 | /* Push out any pending frames which were held back due to | 1438 | /* Push out any pending frames which were held back due to |
1442 | * TCP_CORK or attempt at coalescing tiny packets. | 1439 | * TCP_CORK or attempt at coalescing tiny packets. |
1443 | * The socket must be locked by the caller. | 1440 | * The socket must be locked by the caller. |
1444 | */ | 1441 | */ |
1445 | void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, | 1442 | void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, |
1446 | unsigned int cur_mss, int nonagle) | 1443 | int nonagle) |
1447 | { | 1444 | { |
1448 | struct sk_buff *skb = sk->sk_send_head; | 1445 | struct sk_buff *skb = tcp_send_head(sk); |
1449 | 1446 | ||
1450 | if (skb) { | 1447 | if (skb) { |
1451 | if (tcp_write_xmit(sk, cur_mss, nonagle)) | 1448 | if (tcp_write_xmit(sk, cur_mss, nonagle)) |
1452 | tcp_check_probe_timer(sk, tp); | 1449 | tcp_check_probe_timer(sk); |
1453 | } | 1450 | } |
1454 | } | 1451 | } |
1455 | 1452 | ||
@@ -1459,7 +1456,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, | |||
1459 | void tcp_push_one(struct sock *sk, unsigned int mss_now) | 1456 | void tcp_push_one(struct sock *sk, unsigned int mss_now) |
1460 | { | 1457 | { |
1461 | struct tcp_sock *tp = tcp_sk(sk); | 1458 | struct tcp_sock *tp = tcp_sk(sk); |
1462 | struct sk_buff *skb = sk->sk_send_head; | 1459 | struct sk_buff *skb = tcp_send_head(sk); |
1463 | unsigned int tso_segs, cwnd_quota; | 1460 | unsigned int tso_segs, cwnd_quota; |
1464 | 1461 | ||
1465 | BUG_ON(!skb || skb->len < mss_now); | 1462 | BUG_ON(!skb || skb->len < mss_now); |
@@ -1493,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1493 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1490 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1494 | 1491 | ||
1495 | if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { | 1492 | if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { |
1496 | update_send_head(sk, tp, skb); | 1493 | update_send_head(sk, skb); |
1497 | tcp_cwnd_validate(sk, tp); | 1494 | tcp_cwnd_validate(sk); |
1498 | return; | 1495 | return; |
1499 | } | 1496 | } |
1500 | } | 1497 | } |
@@ -1620,7 +1617,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1620 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) | 1617 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) |
1621 | { | 1618 | { |
1622 | struct tcp_sock *tp = tcp_sk(sk); | 1619 | struct tcp_sock *tp = tcp_sk(sk); |
1623 | struct sk_buff *next_skb = skb->next; | 1620 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1624 | 1621 | ||
1625 | /* The first test we must make is that neither of these two | 1622 | /* The first test we must make is that neither of these two |
1626 | * SKB's are still referenced by someone else. | 1623 | * SKB's are still referenced by someone else. |
@@ -1630,7 +1627,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1630 | u16 flags = TCP_SKB_CB(skb)->flags; | 1627 | u16 flags = TCP_SKB_CB(skb)->flags; |
1631 | 1628 | ||
1632 | /* Also punt if next skb has been SACK'd. */ | 1629 | /* Also punt if next skb has been SACK'd. */ |
1633 | if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) | 1630 | if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) |
1634 | return; | 1631 | return; |
1635 | 1632 | ||
1636 | /* Next skb is out of window. */ | 1633 | /* Next skb is out of window. */ |
@@ -1652,9 +1649,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1652 | clear_all_retrans_hints(tp); | 1649 | clear_all_retrans_hints(tp); |
1653 | 1650 | ||
1654 | /* Ok. We will be able to collapse the packet. */ | 1651 | /* Ok. We will be able to collapse the packet. */ |
1655 | __skb_unlink(next_skb, &sk->sk_write_queue); | 1652 | tcp_unlink_write_queue(next_skb, sk); |
1656 | 1653 | ||
1657 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); | 1654 | skb_copy_from_linear_data(next_skb, |
1655 | skb_put(skb, next_skb_size), | ||
1656 | next_skb_size); | ||
1658 | 1657 | ||
1659 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) | 1658 | if (next_skb->ip_summed == CHECKSUM_PARTIAL) |
1660 | skb->ip_summed = CHECKSUM_PARTIAL; | 1659 | skb->ip_summed = CHECKSUM_PARTIAL; |
@@ -1706,7 +1705,9 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1706 | unsigned int mss = tcp_current_mss(sk, 0); | 1705 | unsigned int mss = tcp_current_mss(sk, 0); |
1707 | int lost = 0; | 1706 | int lost = 0; |
1708 | 1707 | ||
1709 | sk_stream_for_retrans_queue(skb, sk) { | 1708 | tcp_for_write_queue(skb, sk) { |
1709 | if (skb == tcp_send_head(sk)) | ||
1710 | break; | ||
1710 | if (skb->len > mss && | 1711 | if (skb->len > mss && |
1711 | !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { | 1712 | !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { |
1712 | if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { | 1713 | if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { |
@@ -1788,13 +1789,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1788 | } | 1789 | } |
1789 | 1790 | ||
1790 | /* Collapse two adjacent packets if worthwhile and we can. */ | 1791 | /* Collapse two adjacent packets if worthwhile and we can. */ |
1791 | if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && | 1792 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && |
1792 | (skb->len < (cur_mss >> 1)) && | 1793 | (skb->len < (cur_mss >> 1)) && |
1793 | (skb->next != sk->sk_send_head) && | 1794 | (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && |
1794 | (skb->next != (struct sk_buff *)&sk->sk_write_queue) && | 1795 | (!tcp_skb_is_last(sk, skb)) && |
1795 | (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && | 1796 | (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && |
1796 | (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) && | 1797 | (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && |
1797 | (sysctl_tcp_retrans_collapse != 0)) | 1798 | (sysctl_tcp_retrans_collapse != 0)) |
1798 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 1799 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
1799 | 1800 | ||
1800 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) | 1801 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) |
@@ -1804,9 +1805,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1804 | * retransmit when old data is attached. So strip it off | 1805 | * retransmit when old data is attached. So strip it off |
1805 | * since it is cheap to do so and saves bytes on the network. | 1806 | * since it is cheap to do so and saves bytes on the network. |
1806 | */ | 1807 | */ |
1807 | if(skb->len > 0 && | 1808 | if (skb->len > 0 && |
1808 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && | 1809 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && |
1809 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { | 1810 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { |
1810 | if (!pskb_trim(skb, 0)) { | 1811 | if (!pskb_trim(skb, 0)) { |
1811 | TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; | 1812 | TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; |
1812 | skb_shinfo(skb)->gso_segs = 1; | 1813 | skb_shinfo(skb)->gso_segs = 1; |
@@ -1872,15 +1873,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1872 | skb = tp->retransmit_skb_hint; | 1873 | skb = tp->retransmit_skb_hint; |
1873 | packet_cnt = tp->retransmit_cnt_hint; | 1874 | packet_cnt = tp->retransmit_cnt_hint; |
1874 | }else{ | 1875 | }else{ |
1875 | skb = sk->sk_write_queue.next; | 1876 | skb = tcp_write_queue_head(sk); |
1876 | packet_cnt = 0; | 1877 | packet_cnt = 0; |
1877 | } | 1878 | } |
1878 | 1879 | ||
1879 | /* First pass: retransmit lost packets. */ | 1880 | /* First pass: retransmit lost packets. */ |
1880 | if (tp->lost_out) { | 1881 | if (tp->lost_out) { |
1881 | sk_stream_for_retrans_queue_from(skb, sk) { | 1882 | tcp_for_write_queue_from(skb, sk) { |
1882 | __u8 sacked = TCP_SKB_CB(skb)->sacked; | 1883 | __u8 sacked = TCP_SKB_CB(skb)->sacked; |
1883 | 1884 | ||
1885 | if (skb == tcp_send_head(sk)) | ||
1886 | break; | ||
1884 | /* we could do better than to assign each time */ | 1887 | /* we could do better than to assign each time */ |
1885 | tp->retransmit_skb_hint = skb; | 1888 | tp->retransmit_skb_hint = skb; |
1886 | tp->retransmit_cnt_hint = packet_cnt; | 1889 | tp->retransmit_cnt_hint = packet_cnt; |
@@ -1906,8 +1909,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1906 | else | 1909 | else |
1907 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); | 1910 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); |
1908 | 1911 | ||
1909 | if (skb == | 1912 | if (skb == tcp_write_queue_head(sk)) |
1910 | skb_peek(&sk->sk_write_queue)) | ||
1911 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1913 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1912 | inet_csk(sk)->icsk_rto, | 1914 | inet_csk(sk)->icsk_rto, |
1913 | TCP_RTO_MAX); | 1915 | TCP_RTO_MAX); |
@@ -1937,18 +1939,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1937 | * segments to send. | 1939 | * segments to send. |
1938 | */ | 1940 | */ |
1939 | 1941 | ||
1940 | if (tcp_may_send_now(sk, tp)) | 1942 | if (tcp_may_send_now(sk)) |
1941 | return; | 1943 | return; |
1942 | 1944 | ||
1943 | if (tp->forward_skb_hint) { | 1945 | if (tp->forward_skb_hint) { |
1944 | skb = tp->forward_skb_hint; | 1946 | skb = tp->forward_skb_hint; |
1945 | packet_cnt = tp->forward_cnt_hint; | 1947 | packet_cnt = tp->forward_cnt_hint; |
1946 | } else{ | 1948 | } else{ |
1947 | skb = sk->sk_write_queue.next; | 1949 | skb = tcp_write_queue_head(sk); |
1948 | packet_cnt = 0; | 1950 | packet_cnt = 0; |
1949 | } | 1951 | } |
1950 | 1952 | ||
1951 | sk_stream_for_retrans_queue_from(skb, sk) { | 1953 | tcp_for_write_queue_from(skb, sk) { |
1954 | if (skb == tcp_send_head(sk)) | ||
1955 | break; | ||
1952 | tp->forward_cnt_hint = packet_cnt; | 1956 | tp->forward_cnt_hint = packet_cnt; |
1953 | tp->forward_skb_hint = skb; | 1957 | tp->forward_skb_hint = skb; |
1954 | 1958 | ||
@@ -1973,7 +1977,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1973 | break; | 1977 | break; |
1974 | } | 1978 | } |
1975 | 1979 | ||
1976 | if (skb == skb_peek(&sk->sk_write_queue)) | 1980 | if (skb == tcp_write_queue_head(sk)) |
1977 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 1981 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1978 | inet_csk(sk)->icsk_rto, | 1982 | inet_csk(sk)->icsk_rto, |
1979 | TCP_RTO_MAX); | 1983 | TCP_RTO_MAX); |
@@ -1989,7 +1993,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1989 | void tcp_send_fin(struct sock *sk) | 1993 | void tcp_send_fin(struct sock *sk) |
1990 | { | 1994 | { |
1991 | struct tcp_sock *tp = tcp_sk(sk); | 1995 | struct tcp_sock *tp = tcp_sk(sk); |
1992 | struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); | 1996 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
1993 | int mss_now; | 1997 | int mss_now; |
1994 | 1998 | ||
1995 | /* Optimization, tack on the FIN if we have a queue of | 1999 | /* Optimization, tack on the FIN if we have a queue of |
@@ -1998,7 +2002,7 @@ void tcp_send_fin(struct sock *sk) | |||
1998 | */ | 2002 | */ |
1999 | mss_now = tcp_current_mss(sk, 1); | 2003 | mss_now = tcp_current_mss(sk, 1); |
2000 | 2004 | ||
2001 | if (sk->sk_send_head != NULL) { | 2005 | if (tcp_send_head(sk) != NULL) { |
2002 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2006 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; |
2003 | TCP_SKB_CB(skb)->end_seq++; | 2007 | TCP_SKB_CB(skb)->end_seq++; |
2004 | tp->write_seq++; | 2008 | tp->write_seq++; |
@@ -2025,7 +2029,7 @@ void tcp_send_fin(struct sock *sk) | |||
2025 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; | 2029 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; |
2026 | tcp_queue_skb(sk, skb); | 2030 | tcp_queue_skb(sk, skb); |
2027 | } | 2031 | } |
2028 | __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF); | 2032 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); |
2029 | } | 2033 | } |
2030 | 2034 | ||
2031 | /* We get here when a process closes a file descriptor (either due to | 2035 | /* We get here when a process closes a file descriptor (either due to |
@@ -2035,7 +2039,6 @@ void tcp_send_fin(struct sock *sk) | |||
2035 | */ | 2039 | */ |
2036 | void tcp_send_active_reset(struct sock *sk, gfp_t priority) | 2040 | void tcp_send_active_reset(struct sock *sk, gfp_t priority) |
2037 | { | 2041 | { |
2038 | struct tcp_sock *tp = tcp_sk(sk); | ||
2039 | struct sk_buff *skb; | 2042 | struct sk_buff *skb; |
2040 | 2043 | ||
2041 | /* NOTE: No TCP options attached and we never retransmit this. */ | 2044 | /* NOTE: No TCP options attached and we never retransmit this. */ |
@@ -2055,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2055 | skb_shinfo(skb)->gso_type = 0; | 2058 | skb_shinfo(skb)->gso_type = 0; |
2056 | 2059 | ||
2057 | /* Send it off. */ | 2060 | /* Send it off. */ |
2058 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); | 2061 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk); |
2059 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; | 2062 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; |
2060 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2063 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2061 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2064 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
@@ -2071,7 +2074,7 @@ int tcp_send_synack(struct sock *sk) | |||
2071 | { | 2074 | { |
2072 | struct sk_buff* skb; | 2075 | struct sk_buff* skb; |
2073 | 2076 | ||
2074 | skb = skb_peek(&sk->sk_write_queue); | 2077 | skb = tcp_write_queue_head(sk); |
2075 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { | 2078 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { |
2076 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); | 2079 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); |
2077 | return -EFAULT; | 2080 | return -EFAULT; |
@@ -2081,9 +2084,9 @@ int tcp_send_synack(struct sock *sk) | |||
2081 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); | 2084 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); |
2082 | if (nskb == NULL) | 2085 | if (nskb == NULL) |
2083 | return -ENOMEM; | 2086 | return -ENOMEM; |
2084 | __skb_unlink(skb, &sk->sk_write_queue); | 2087 | tcp_unlink_write_queue(skb, sk); |
2085 | skb_header_release(nskb); | 2088 | skb_header_release(nskb); |
2086 | __skb_queue_head(&sk->sk_write_queue, nskb); | 2089 | __tcp_add_write_queue_head(sk, nskb); |
2087 | sk_stream_free_skb(sk, skb); | 2090 | sk_stream_free_skb(sk, skb); |
2088 | sk_charge_skb(sk, nskb); | 2091 | sk_charge_skb(sk, nskb); |
2089 | skb = nskb; | 2092 | skb = nskb; |
@@ -2133,8 +2136,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2133 | if (md5) | 2136 | if (md5) |
2134 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; | 2137 | tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; |
2135 | #endif | 2138 | #endif |
2136 | skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); | 2139 | skb_push(skb, tcp_header_size); |
2140 | skb_reset_transport_header(skb); | ||
2137 | 2141 | ||
2142 | th = tcp_hdr(skb); | ||
2138 | memset(th, 0, sizeof(struct tcphdr)); | 2143 | memset(th, 0, sizeof(struct tcphdr)); |
2139 | th->syn = 1; | 2144 | th->syn = 1; |
2140 | th->ack = 1; | 2145 | th->ack = 1; |
@@ -2188,7 +2193,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2188 | tp->af_specific->calc_md5_hash(md5_hash_location, | 2193 | tp->af_specific->calc_md5_hash(md5_hash_location, |
2189 | md5, | 2194 | md5, |
2190 | NULL, dst, req, | 2195 | NULL, dst, req, |
2191 | skb->h.th, sk->sk_protocol, | 2196 | tcp_hdr(skb), sk->sk_protocol, |
2192 | skb->len); | 2197 | skb->len); |
2193 | } | 2198 | } |
2194 | #endif | 2199 | #endif |
@@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk) | |||
2271 | skb_reserve(buff, MAX_TCP_HEADER); | 2276 | skb_reserve(buff, MAX_TCP_HEADER); |
2272 | 2277 | ||
2273 | TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; | 2278 | TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; |
2274 | TCP_ECN_send_syn(sk, tp, buff); | 2279 | TCP_ECN_send_syn(sk, buff); |
2275 | TCP_SKB_CB(buff)->sacked = 0; | 2280 | TCP_SKB_CB(buff)->sacked = 0; |
2276 | skb_shinfo(buff)->gso_segs = 1; | 2281 | skb_shinfo(buff)->gso_segs = 1; |
2277 | skb_shinfo(buff)->gso_size = 0; | 2282 | skb_shinfo(buff)->gso_size = 0; |
@@ -2285,7 +2290,7 @@ int tcp_connect(struct sock *sk) | |||
2285 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 2290 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
2286 | tp->retrans_stamp = TCP_SKB_CB(buff)->when; | 2291 | tp->retrans_stamp = TCP_SKB_CB(buff)->when; |
2287 | skb_header_release(buff); | 2292 | skb_header_release(buff); |
2288 | __skb_queue_tail(&sk->sk_write_queue, buff); | 2293 | __tcp_add_write_queue_tail(sk, buff); |
2289 | sk_charge_skb(sk, buff); | 2294 | sk_charge_skb(sk, buff); |
2290 | tp->packets_out += tcp_skb_pcount(buff); | 2295 | tp->packets_out += tcp_skb_pcount(buff); |
2291 | tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); | 2296 | tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); |
@@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk) | |||
2363 | { | 2368 | { |
2364 | /* If we have been reset, we may not send again. */ | 2369 | /* If we have been reset, we may not send again. */ |
2365 | if (sk->sk_state != TCP_CLOSE) { | 2370 | if (sk->sk_state != TCP_CLOSE) { |
2366 | struct tcp_sock *tp = tcp_sk(sk); | ||
2367 | struct sk_buff *buff; | 2371 | struct sk_buff *buff; |
2368 | 2372 | ||
2369 | /* We are not putting this on the write queue, so | 2373 | /* We are not putting this on the write queue, so |
@@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk) | |||
2389 | skb_shinfo(buff)->gso_type = 0; | 2393 | skb_shinfo(buff)->gso_type = 0; |
2390 | 2394 | ||
2391 | /* Send it off, this clears delayed acks for us. */ | 2395 | /* Send it off, this clears delayed acks for us. */ |
2392 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); | 2396 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk); |
2393 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 2397 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
2394 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); | 2398 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); |
2395 | } | 2399 | } |
@@ -2441,7 +2445,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2441 | struct tcp_sock *tp = tcp_sk(sk); | 2445 | struct tcp_sock *tp = tcp_sk(sk); |
2442 | struct sk_buff *skb; | 2446 | struct sk_buff *skb; |
2443 | 2447 | ||
2444 | if ((skb = sk->sk_send_head) != NULL && | 2448 | if ((skb = tcp_send_head(sk)) != NULL && |
2445 | before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { | 2449 | before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { |
2446 | int err; | 2450 | int err; |
2447 | unsigned int mss = tcp_current_mss(sk, 0); | 2451 | unsigned int mss = tcp_current_mss(sk, 0); |
@@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2467 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2471 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2468 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2472 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2469 | if (!err) { | 2473 | if (!err) { |
2470 | update_send_head(sk, tp, skb); | 2474 | update_send_head(sk, skb); |
2471 | } | 2475 | } |
2472 | return err; | 2476 | return err; |
2473 | } else { | 2477 | } else { |
@@ -2491,7 +2495,7 @@ void tcp_send_probe0(struct sock *sk) | |||
2491 | 2495 | ||
2492 | err = tcp_write_wakeup(sk); | 2496 | err = tcp_write_wakeup(sk); |
2493 | 2497 | ||
2494 | if (tp->packets_out || !sk->sk_send_head) { | 2498 | if (tp->packets_out || !tcp_send_head(sk)) { |
2495 | /* Cancel probe timer, if it is not required. */ | 2499 | /* Cancel probe timer, if it is not required. */ |
2496 | icsk->icsk_probes_out = 0; | 2500 | icsk->icsk_probes_out = 0; |
2497 | icsk->icsk_backoff = 0; | 2501 | icsk->icsk_backoff = 0; |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 61f406f27294..3938d5dbdf20 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/kfifo.h> | 28 | #include <linux/kfifo.h> |
29 | #include <linux/ktime.h> | ||
30 | #include <linux/time.h> | ||
29 | #include <linux/vmalloc.h> | 31 | #include <linux/vmalloc.h> |
30 | 32 | ||
31 | #include <net/tcp.h> | 33 | #include <net/tcp.h> |
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); | |||
34 | MODULE_DESCRIPTION("TCP cwnd snooper"); | 36 | MODULE_DESCRIPTION("TCP cwnd snooper"); |
35 | MODULE_LICENSE("GPL"); | 37 | MODULE_LICENSE("GPL"); |
36 | 38 | ||
37 | static int port = 0; | 39 | static int port __read_mostly = 0; |
38 | MODULE_PARM_DESC(port, "Port to match (0=all)"); | 40 | MODULE_PARM_DESC(port, "Port to match (0=all)"); |
39 | module_param(port, int, 0); | 41 | module_param(port, int, 0); |
40 | 42 | ||
41 | static int bufsize = 64*1024; | 43 | static int bufsize __read_mostly = 64*1024; |
42 | MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); | 44 | MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); |
43 | module_param(bufsize, int, 0); | 45 | module_param(bufsize, int, 0); |
44 | 46 | ||
47 | static int full __read_mostly; | ||
48 | MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); | ||
49 | module_param(full, int, 0); | ||
50 | |||
45 | static const char procname[] = "tcpprobe"; | 51 | static const char procname[] = "tcpprobe"; |
46 | 52 | ||
47 | struct { | 53 | struct { |
48 | struct kfifo *fifo; | 54 | struct kfifo *fifo; |
49 | spinlock_t lock; | 55 | spinlock_t lock; |
50 | wait_queue_head_t wait; | 56 | wait_queue_head_t wait; |
51 | struct timeval tstart; | 57 | ktime_t start; |
58 | u32 lastcwnd; | ||
52 | } tcpw; | 59 | } tcpw; |
53 | 60 | ||
61 | /* | ||
62 | * Print to log with timestamps. | ||
63 | * FIXME: causes an extra copy | ||
64 | */ | ||
54 | static void printl(const char *fmt, ...) | 65 | static void printl(const char *fmt, ...) |
55 | { | 66 | { |
56 | va_list args; | 67 | va_list args; |
57 | int len; | 68 | int len; |
58 | struct timeval now; | 69 | struct timespec tv; |
59 | char tbuf[256]; | 70 | char tbuf[256]; |
60 | 71 | ||
61 | va_start(args, fmt); | 72 | va_start(args, fmt); |
62 | do_gettimeofday(&now); | 73 | /* want monotonic time since start of tcp_probe */ |
74 | tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start)); | ||
63 | 75 | ||
64 | now.tv_sec -= tcpw.tstart.tv_sec; | 76 | len = sprintf(tbuf, "%lu.%09lu ", |
65 | now.tv_usec -= tcpw.tstart.tv_usec; | 77 | (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec); |
66 | if (now.tv_usec < 0) { | ||
67 | --now.tv_sec; | ||
68 | now.tv_usec += 1000000; | ||
69 | } | ||
70 | |||
71 | len = sprintf(tbuf, "%lu.%06lu ", | ||
72 | (unsigned long) now.tv_sec, | ||
73 | (unsigned long) now.tv_usec); | ||
74 | len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); | 78 | len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); |
75 | va_end(args); | 79 | va_end(args); |
76 | 80 | ||
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...) | |||
78 | wake_up(&tcpw.wait); | 82 | wake_up(&tcpw.wait); |
79 | } | 83 | } |
80 | 84 | ||
81 | static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk, | 85 | /* |
82 | struct msghdr *msg, size_t size) | 86 | * Hook inserted to be called before each receive packet. |
87 | * Note: arguments must match tcp_rcv_established()! | ||
88 | */ | ||
89 | static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
90 | struct tcphdr *th, unsigned len) | ||
83 | { | 91 | { |
84 | const struct tcp_sock *tp = tcp_sk(sk); | 92 | const struct tcp_sock *tp = tcp_sk(sk); |
85 | const struct inet_sock *inet = inet_sk(sk); | 93 | const struct inet_sock *inet = inet_sk(sk); |
86 | 94 | ||
87 | if (port == 0 || ntohs(inet->dport) == port || | 95 | /* Only update if port matches */ |
88 | ntohs(inet->sport) == port) { | 96 | if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) |
97 | && (full || tp->snd_cwnd != tcpw.lastcwnd)) { | ||
89 | printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", | 98 | printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", |
90 | NIPQUAD(inet->saddr), ntohs(inet->sport), | 99 | NIPQUAD(inet->saddr), ntohs(inet->sport), |
91 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 100 | NIPQUAD(inet->daddr), ntohs(inet->dport), |
92 | size, tp->snd_nxt, tp->snd_una, | 101 | skb->len, tp->snd_nxt, tp->snd_una, |
93 | tp->snd_cwnd, tcp_current_ssthresh(sk), | 102 | tp->snd_cwnd, tcp_current_ssthresh(sk), |
94 | tp->snd_wnd); | 103 | tp->snd_wnd, tp->srtt >> 3); |
104 | tcpw.lastcwnd = tp->snd_cwnd; | ||
95 | } | 105 | } |
96 | 106 | ||
97 | jprobe_return(); | 107 | jprobe_return(); |
98 | return 0; | 108 | return 0; |
99 | } | 109 | } |
100 | 110 | ||
101 | static struct jprobe tcp_send_probe = { | 111 | static struct jprobe tcp_probe = { |
102 | .kp = { | 112 | .kp = { |
103 | .symbol_name = "tcp_sendmsg", | 113 | .symbol_name = "tcp_rcv_established", |
104 | }, | 114 | }, |
105 | .entry = JPROBE_ENTRY(jtcp_sendmsg), | 115 | .entry = JPROBE_ENTRY(jtcp_rcv_established), |
106 | }; | 116 | }; |
107 | 117 | ||
108 | 118 | ||
109 | static int tcpprobe_open(struct inode * inode, struct file * file) | 119 | static int tcpprobe_open(struct inode * inode, struct file * file) |
110 | { | 120 | { |
111 | kfifo_reset(tcpw.fifo); | 121 | kfifo_reset(tcpw.fifo); |
112 | do_gettimeofday(&tcpw.tstart); | 122 | tcpw.start = ktime_get(); |
113 | return 0; | 123 | return 0; |
114 | } | 124 | } |
115 | 125 | ||
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void) | |||
162 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) | 172 | if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) |
163 | goto err0; | 173 | goto err0; |
164 | 174 | ||
165 | ret = register_jprobe(&tcp_send_probe); | 175 | ret = register_jprobe(&tcp_probe); |
166 | if (ret) | 176 | if (ret) |
167 | goto err1; | 177 | goto err1; |
168 | 178 | ||
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void) | |||
180 | { | 190 | { |
181 | kfifo_free(tcpw.fifo); | 191 | kfifo_free(tcpw.fifo); |
182 | proc_net_remove(procname); | 192 | proc_net_remove(procname); |
183 | unregister_jprobe(&tcp_send_probe); | 193 | unregister_jprobe(&tcp_probe); |
184 | 194 | ||
185 | } | 195 | } |
186 | module_exit(tcpprobe_exit); | 196 | module_exit(tcpprobe_exit); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a9243cfc1bea..2ca97b20929d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
233 | struct tcp_sock *tp = tcp_sk(sk); | 233 | struct tcp_sock *tp = tcp_sk(sk); |
234 | int max_probes; | 234 | int max_probes; |
235 | 235 | ||
236 | if (tp->packets_out || !sk->sk_send_head) { | 236 | if (tp->packets_out || !tcp_send_head(sk)) { |
237 | icsk->icsk_probes_out = 0; | 237 | icsk->icsk_probes_out = 0; |
238 | return; | 238 | return; |
239 | } | 239 | } |
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
284 | if (!tp->packets_out) | 284 | if (!tp->packets_out) |
285 | goto out; | 285 | goto out; |
286 | 286 | ||
287 | BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); | 287 | BUG_TRAP(!tcp_write_queue_empty(sk)); |
288 | 288 | ||
289 | if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && | 289 | if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && |
290 | !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { | 290 | !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { |
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
306 | goto out; | 306 | goto out; |
307 | } | 307 | } |
308 | tcp_enter_loss(sk, 0); | 308 | tcp_enter_loss(sk, 0); |
309 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); | 309 | tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); |
310 | __sk_dst_reset(sk); | 310 | __sk_dst_reset(sk); |
311 | goto out_reset_timer; | 311 | goto out_reset_timer; |
312 | } | 312 | } |
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
341 | tcp_enter_loss(sk, 0); | 341 | tcp_enter_loss(sk, 0); |
342 | } | 342 | } |
343 | 343 | ||
344 | if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) { | 344 | if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { |
345 | /* Retransmission failed because of local congestion, | 345 | /* Retransmission failed because of local congestion, |
346 | * do not backoff. | 346 | * do not backoff. |
347 | */ | 347 | */ |
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
482 | elapsed = keepalive_time_when(tp); | 482 | elapsed = keepalive_time_when(tp); |
483 | 483 | ||
484 | /* It is alive without keepalive 8) */ | 484 | /* It is alive without keepalive 8) */ |
485 | if (tp->packets_out || sk->sk_send_head) | 485 | if (tp->packets_out || tcp_send_head(sk)) |
486 | goto resched; | 486 | goto resched; |
487 | 487 | ||
488 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 488 | elapsed = tcp_time_stamp - tp->rcv_tstamp; |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 5c484dceb967..73e19cf7df21 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -38,6 +38,8 @@ | |||
38 | 38 | ||
39 | #include <net/tcp.h> | 39 | #include <net/tcp.h> |
40 | 40 | ||
41 | #include "tcp_vegas.h" | ||
42 | |||
41 | /* Default values of the Vegas variables, in fixed-point representation | 43 | /* Default values of the Vegas variables, in fixed-point representation |
42 | * with V_PARAM_SHIFT bits to the right of the binary point. | 44 | * with V_PARAM_SHIFT bits to the right of the binary point. |
43 | */ | 45 | */ |
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644); | |||
54 | MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); | 56 | MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); |
55 | 57 | ||
56 | 58 | ||
57 | /* Vegas variables */ | ||
58 | struct vegas { | ||
59 | u32 beg_snd_nxt; /* right edge during last RTT */ | ||
60 | u32 beg_snd_una; /* left edge during last RTT */ | ||
61 | u32 beg_snd_cwnd; /* saves the size of the cwnd */ | ||
62 | u8 doing_vegas_now;/* if true, do vegas for this RTT */ | ||
63 | u16 cntRTT; /* # of RTTs measured within last RTT */ | ||
64 | u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ | ||
65 | u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ | ||
66 | }; | ||
67 | |||
68 | /* There are several situations when we must "re-start" Vegas: | 59 | /* There are several situations when we must "re-start" Vegas: |
69 | * | 60 | * |
70 | * o when a connection is established | 61 | * o when a connection is established |
@@ -81,7 +72,7 @@ struct vegas { | |||
81 | * Instead we must wait until the completion of an RTT during | 72 | * Instead we must wait until the completion of an RTT during |
82 | * which we actually receive ACKs. | 73 | * which we actually receive ACKs. |
83 | */ | 74 | */ |
84 | static inline void vegas_enable(struct sock *sk) | 75 | static void vegas_enable(struct sock *sk) |
85 | { | 76 | { |
86 | const struct tcp_sock *tp = tcp_sk(sk); | 77 | const struct tcp_sock *tp = tcp_sk(sk); |
87 | struct vegas *vegas = inet_csk_ca(sk); | 78 | struct vegas *vegas = inet_csk_ca(sk); |
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk) | |||
104 | vegas->doing_vegas_now = 0; | 95 | vegas->doing_vegas_now = 0; |
105 | } | 96 | } |
106 | 97 | ||
107 | static void tcp_vegas_init(struct sock *sk) | 98 | void tcp_vegas_init(struct sock *sk) |
108 | { | 99 | { |
109 | struct vegas *vegas = inet_csk_ca(sk); | 100 | struct vegas *vegas = inet_csk_ca(sk); |
110 | 101 | ||
111 | vegas->baseRTT = 0x7fffffff; | 102 | vegas->baseRTT = 0x7fffffff; |
112 | vegas_enable(sk); | 103 | vegas_enable(sk); |
113 | } | 104 | } |
105 | EXPORT_SYMBOL_GPL(tcp_vegas_init); | ||
114 | 106 | ||
115 | /* Do RTT sampling needed for Vegas. | 107 | /* Do RTT sampling needed for Vegas. |
116 | * Basically we: | 108 | * Basically we: |
@@ -120,10 +112,13 @@ static void tcp_vegas_init(struct sock *sk) | |||
120 | * o min-filter RTT samples from a much longer window (forever for now) | 112 | * o min-filter RTT samples from a much longer window (forever for now) |
121 | * to find the propagation delay (baseRTT) | 113 | * to find the propagation delay (baseRTT) |
122 | */ | 114 | */ |
123 | static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) | 115 | void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) |
124 | { | 116 | { |
125 | struct vegas *vegas = inet_csk_ca(sk); | 117 | struct vegas *vegas = inet_csk_ca(sk); |
126 | u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ | 118 | u32 vrtt; |
119 | |||
120 | /* Never allow zero rtt or baseRTT */ | ||
121 | vrtt = ktime_to_us(net_timedelta(last)) + 1; | ||
127 | 122 | ||
128 | /* Filter to find propagation delay: */ | 123 | /* Filter to find propagation delay: */ |
129 | if (vrtt < vegas->baseRTT) | 124 | if (vrtt < vegas->baseRTT) |
@@ -135,8 +130,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) | |||
135 | vegas->minRTT = min(vegas->minRTT, vrtt); | 130 | vegas->minRTT = min(vegas->minRTT, vrtt); |
136 | vegas->cntRTT++; | 131 | vegas->cntRTT++; |
137 | } | 132 | } |
133 | EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); | ||
138 | 134 | ||
139 | static void tcp_vegas_state(struct sock *sk, u8 ca_state) | 135 | void tcp_vegas_state(struct sock *sk, u8 ca_state) |
140 | { | 136 | { |
141 | 137 | ||
142 | if (ca_state == TCP_CA_Open) | 138 | if (ca_state == TCP_CA_Open) |
@@ -144,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) | |||
144 | else | 140 | else |
145 | vegas_disable(sk); | 141 | vegas_disable(sk); |
146 | } | 142 | } |
143 | EXPORT_SYMBOL_GPL(tcp_vegas_state); | ||
147 | 144 | ||
148 | /* | 145 | /* |
149 | * If the connection is idle and we are restarting, | 146 | * If the connection is idle and we are restarting, |
@@ -154,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) | |||
154 | * packets, _then_ we can make Vegas calculations | 151 | * packets, _then_ we can make Vegas calculations |
155 | * again. | 152 | * again. |
156 | */ | 153 | */ |
157 | static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) | 154 | void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) |
158 | { | 155 | { |
159 | if (event == CA_EVENT_CWND_RESTART || | 156 | if (event == CA_EVENT_CWND_RESTART || |
160 | event == CA_EVENT_TX_START) | 157 | event == CA_EVENT_TX_START) |
161 | tcp_vegas_init(sk); | 158 | tcp_vegas_init(sk); |
162 | } | 159 | } |
160 | EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); | ||
163 | 161 | ||
164 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | 162 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, |
165 | u32 seq_rtt, u32 in_flight, int flag) | 163 | u32 seq_rtt, u32 in_flight, int flag) |
@@ -336,30 +334,29 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
336 | } | 334 | } |
337 | 335 | ||
338 | /* Extract info for Tcp socket info provided via netlink. */ | 336 | /* Extract info for Tcp socket info provided via netlink. */ |
339 | static void tcp_vegas_get_info(struct sock *sk, u32 ext, | 337 | void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) |
340 | struct sk_buff *skb) | ||
341 | { | 338 | { |
342 | const struct vegas *ca = inet_csk_ca(sk); | 339 | const struct vegas *ca = inet_csk_ca(sk); |
343 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 340 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
344 | struct tcpvegas_info *info; | 341 | struct tcpvegas_info info = { |
345 | 342 | .tcpv_enabled = ca->doing_vegas_now, | |
346 | info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, | 343 | .tcpv_rttcnt = ca->cntRTT, |
347 | sizeof(*info))); | 344 | .tcpv_rtt = ca->baseRTT, |
348 | 345 | .tcpv_minrtt = ca->minRTT, | |
349 | info->tcpv_enabled = ca->doing_vegas_now; | 346 | }; |
350 | info->tcpv_rttcnt = ca->cntRTT; | 347 | |
351 | info->tcpv_rtt = ca->baseRTT; | 348 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); |
352 | info->tcpv_minrtt = ca->minRTT; | ||
353 | rtattr_failure: ; | ||
354 | } | 349 | } |
355 | } | 350 | } |
351 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); | ||
356 | 352 | ||
357 | static struct tcp_congestion_ops tcp_vegas = { | 353 | static struct tcp_congestion_ops tcp_vegas = { |
354 | .flags = TCP_CONG_RTT_STAMP, | ||
358 | .init = tcp_vegas_init, | 355 | .init = tcp_vegas_init, |
359 | .ssthresh = tcp_reno_ssthresh, | 356 | .ssthresh = tcp_reno_ssthresh, |
360 | .cong_avoid = tcp_vegas_cong_avoid, | 357 | .cong_avoid = tcp_vegas_cong_avoid, |
361 | .min_cwnd = tcp_reno_min_cwnd, | 358 | .min_cwnd = tcp_reno_min_cwnd, |
362 | .rtt_sample = tcp_vegas_rtt_calc, | 359 | .pkts_acked = tcp_vegas_pkts_acked, |
363 | .set_state = tcp_vegas_state, | 360 | .set_state = tcp_vegas_state, |
364 | .cwnd_event = tcp_vegas_cwnd_event, | 361 | .cwnd_event = tcp_vegas_cwnd_event, |
365 | .get_info = tcp_vegas_get_info, | 362 | .get_info = tcp_vegas_get_info, |
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h new file mode 100644 index 000000000000..502fa8183634 --- /dev/null +++ b/net/ipv4/tcp_vegas.h | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * TCP Vegas congestion control interface | ||
3 | */ | ||
4 | #ifndef __TCP_VEGAS_H | ||
5 | #define __TCP_VEGAS_H 1 | ||
6 | |||
7 | /* Vegas variables */ | ||
8 | struct vegas { | ||
9 | u32 beg_snd_nxt; /* right edge during last RTT */ | ||
10 | u32 beg_snd_una; /* left edge during last RTT */ | ||
11 | u32 beg_snd_cwnd; /* saves the size of the cwnd */ | ||
12 | u8 doing_vegas_now;/* if true, do vegas for this RTT */ | ||
13 | u16 cntRTT; /* # of RTTs measured within last RTT */ | ||
14 | u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ | ||
15 | u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ | ||
16 | }; | ||
17 | |||
18 | extern void tcp_vegas_init(struct sock *sk); | ||
19 | extern void tcp_vegas_state(struct sock *sk, u8 ca_state); | ||
20 | extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last); | ||
21 | extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); | ||
22 | extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); | ||
23 | |||
24 | #endif /* __TCP_VEGAS_H */ | ||
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ce57bf302f6c..9edb340f2f95 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk) | |||
69 | } | 69 | } |
70 | 70 | ||
71 | /* Do rtt sampling needed for Veno. */ | 71 | /* Do rtt sampling needed for Veno. */ |
72 | static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt) | 72 | static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) |
73 | { | 73 | { |
74 | struct veno *veno = inet_csk_ca(sk); | 74 | struct veno *veno = inet_csk_ca(sk); |
75 | u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */ | 75 | u32 vrtt; |
76 | |||
77 | /* Never allow zero rtt or baseRTT */ | ||
78 | vrtt = ktime_to_us(net_timedelta(last)) + 1; | ||
76 | 79 | ||
77 | /* Filter to find propagation delay: */ | 80 | /* Filter to find propagation delay: */ |
78 | if (vrtt < veno->basertt) | 81 | if (vrtt < veno->basertt) |
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk) | |||
199 | } | 202 | } |
200 | 203 | ||
201 | static struct tcp_congestion_ops tcp_veno = { | 204 | static struct tcp_congestion_ops tcp_veno = { |
205 | .flags = TCP_CONG_RTT_STAMP, | ||
202 | .init = tcp_veno_init, | 206 | .init = tcp_veno_init, |
203 | .ssthresh = tcp_veno_ssthresh, | 207 | .ssthresh = tcp_veno_ssthresh, |
204 | .cong_avoid = tcp_veno_cong_avoid, | 208 | .cong_avoid = tcp_veno_cong_avoid, |
205 | .rtt_sample = tcp_veno_rtt_calc, | 209 | .pkts_acked = tcp_veno_pkts_acked, |
206 | .set_state = tcp_veno_state, | 210 | .set_state = tcp_veno_state, |
207 | .cwnd_event = tcp_veno_cwnd_event, | 211 | .cwnd_event = tcp_veno_cwnd_event, |
208 | 212 | ||
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 4e1b61032a9c..e61e09dd513e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta) | |||
100 | * Called after processing group of packets. | 100 | * Called after processing group of packets. |
101 | * but all westwood needs is the last sample of srtt. | 101 | * but all westwood needs is the last sample of srtt. |
102 | */ | 102 | */ |
103 | static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) | 103 | static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) |
104 | { | 104 | { |
105 | struct westwood *w = inet_csk_ca(sk); | 105 | struct westwood *w = inet_csk_ca(sk); |
106 | if (cnt > 0) | 106 | if (cnt > 0) |
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) | |||
226 | struct tcp_sock *tp = tcp_sk(sk); | 226 | struct tcp_sock *tp = tcp_sk(sk); |
227 | struct westwood *w = inet_csk_ca(sk); | 227 | struct westwood *w = inet_csk_ca(sk); |
228 | 228 | ||
229 | switch(event) { | 229 | switch (event) { |
230 | case CA_EVENT_FAST_ACK: | 230 | case CA_EVENT_FAST_ACK: |
231 | westwood_fast_bw(sk); | 231 | westwood_fast_bw(sk); |
232 | break; | 232 | break; |
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, | |||
260 | { | 260 | { |
261 | const struct westwood *ca = inet_csk_ca(sk); | 261 | const struct westwood *ca = inet_csk_ca(sk); |
262 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 262 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
263 | struct rtattr *rta; | 263 | struct tcpvegas_info info = { |
264 | struct tcpvegas_info *info; | 264 | .tcpv_enabled = 1, |
265 | 265 | .tcpv_rtt = jiffies_to_usecs(ca->rtt), | |
266 | rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); | 266 | .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), |
267 | info = RTA_DATA(rta); | 267 | }; |
268 | info->tcpv_enabled = 1; | 268 | |
269 | info->tcpv_rttcnt = 0; | 269 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); |
270 | info->tcpv_rtt = jiffies_to_usecs(ca->rtt); | ||
271 | info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); | ||
272 | rtattr_failure: ; | ||
273 | } | 270 | } |
274 | } | 271 | } |
275 | 272 | ||
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c new file mode 100644 index 000000000000..545ed237ab53 --- /dev/null +++ b/net/ipv4/tcp_yeah.c | |||
@@ -0,0 +1,268 @@ | |||
1 | /* | ||
2 | * | ||
3 | * YeAH TCP | ||
4 | * | ||
5 | * For further details look at: | ||
6 | * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf | ||
7 | * | ||
8 | */ | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/skbuff.h> | ||
12 | #include <linux/inet_diag.h> | ||
13 | |||
14 | #include <net/tcp.h> | ||
15 | |||
16 | #include "tcp_vegas.h" | ||
17 | |||
18 | #define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck | ||
19 | #define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt | ||
20 | #define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss | ||
21 | #define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion | ||
22 | #define TCP_YEAH_PHY 8 //lin maximum delta from base | ||
23 | #define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss | ||
24 | #define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count | ||
25 | |||
26 | #define TCP_SCALABLE_AI_CNT 100U | ||
27 | |||
28 | /* YeAH variables */ | ||
29 | struct yeah { | ||
30 | struct vegas vegas; /* must be first */ | ||
31 | |||
32 | /* YeAH */ | ||
33 | u32 lastQ; | ||
34 | u32 doing_reno_now; | ||
35 | |||
36 | u32 reno_count; | ||
37 | u32 fast_count; | ||
38 | |||
39 | u32 pkts_acked; | ||
40 | }; | ||
41 | |||
42 | static void tcp_yeah_init(struct sock *sk) | ||
43 | { | ||
44 | struct tcp_sock *tp = tcp_sk(sk); | ||
45 | struct yeah *yeah = inet_csk_ca(sk); | ||
46 | |||
47 | tcp_vegas_init(sk); | ||
48 | |||
49 | yeah->doing_reno_now = 0; | ||
50 | yeah->lastQ = 0; | ||
51 | |||
52 | yeah->reno_count = 2; | ||
53 | |||
54 | /* Ensure the MD arithmetic works. This is somewhat pedantic, | ||
55 | * since I don't think we will see a cwnd this large. :) */ | ||
56 | tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); | ||
57 | |||
58 | } | ||
59 | |||
60 | |||
61 | static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) | ||
62 | { | ||
63 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
64 | struct yeah *yeah = inet_csk_ca(sk); | ||
65 | |||
66 | if (icsk->icsk_ca_state == TCP_CA_Open) | ||
67 | yeah->pkts_acked = pkts_acked; | ||
68 | |||
69 | tcp_vegas_pkts_acked(sk, pkts_acked, last); | ||
70 | } | ||
71 | |||
72 | static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, | ||
73 | u32 seq_rtt, u32 in_flight, int flag) | ||
74 | { | ||
75 | struct tcp_sock *tp = tcp_sk(sk); | ||
76 | struct yeah *yeah = inet_csk_ca(sk); | ||
77 | |||
78 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
79 | return; | ||
80 | |||
81 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
82 | tcp_slow_start(tp); | ||
83 | |||
84 | else if (!yeah->doing_reno_now) { | ||
85 | /* Scalable */ | ||
86 | |||
87 | tp->snd_cwnd_cnt+=yeah->pkts_acked; | ||
88 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ | ||
89 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
90 | tp->snd_cwnd++; | ||
91 | tp->snd_cwnd_cnt = 0; | ||
92 | } | ||
93 | |||
94 | yeah->pkts_acked = 1; | ||
95 | |||
96 | } else { | ||
97 | /* Reno */ | ||
98 | |||
99 | if (tp->snd_cwnd_cnt < tp->snd_cwnd) | ||
100 | tp->snd_cwnd_cnt++; | ||
101 | |||
102 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
103 | tp->snd_cwnd++; | ||
104 | tp->snd_cwnd_cnt = 0; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. | ||
109 | * | ||
110 | * These are so named because they represent the approximate values | ||
111 | * of snd_una and snd_nxt at the beginning of the current RTT. More | ||
112 | * precisely, they represent the amount of data sent during the RTT. | ||
113 | * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, | ||
114 | * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding | ||
115 | * bytes of data have been ACKed during the course of the RTT, giving | ||
116 | * an "actual" rate of: | ||
117 | * | ||
118 | * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration) | ||
119 | * | ||
120 | * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una, | ||
121 | * because delayed ACKs can cover more than one segment, so they | ||
122 | * don't line up yeahly with the boundaries of RTTs. | ||
123 | * | ||
124 | * Another unfortunate fact of life is that delayed ACKs delay the | ||
125 | * advance of the left edge of our send window, so that the number | ||
126 | * of bytes we send in an RTT is often less than our cwnd will allow. | ||
127 | * So we keep track of our cwnd separately, in v_beg_snd_cwnd. | ||
128 | */ | ||
129 | |||
130 | if (after(ack, yeah->vegas.beg_snd_nxt)) { | ||
131 | |||
132 | /* We do the Vegas calculations only if we got enough RTT | ||
133 | * samples that we can be reasonably sure that we got | ||
134 | * at least one RTT sample that wasn't from a delayed ACK. | ||
135 | * If we only had 2 samples total, | ||
136 | * then that means we're getting only 1 ACK per RTT, which | ||
137 | * means they're almost certainly delayed ACKs. | ||
138 | * If we have 3 samples, we should be OK. | ||
139 | */ | ||
140 | |||
141 | if (yeah->vegas.cntRTT > 2) { | ||
142 | u32 rtt, queue; | ||
143 | u64 bw; | ||
144 | |||
145 | /* We have enough RTT samples, so, using the Vegas | ||
146 | * algorithm, we determine if we should increase or | ||
147 | * decrease cwnd, and by how much. | ||
148 | */ | ||
149 | |||
150 | /* Pluck out the RTT we are using for the Vegas | ||
151 | * calculations. This is the min RTT seen during the | ||
152 | * last RTT. Taking the min filters out the effects | ||
153 | * of delayed ACKs, at the cost of noticing congestion | ||
154 | * a bit later. | ||
155 | */ | ||
156 | rtt = yeah->vegas.minRTT; | ||
157 | |||
158 | /* Compute excess number of packets above bandwidth | ||
159 | * Avoid doing full 64 bit divide. | ||
160 | */ | ||
161 | bw = tp->snd_cwnd; | ||
162 | bw *= rtt - yeah->vegas.baseRTT; | ||
163 | do_div(bw, rtt); | ||
164 | queue = bw; | ||
165 | |||
166 | if (queue > TCP_YEAH_ALPHA || | ||
167 | rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { | ||
168 | if (queue > TCP_YEAH_ALPHA | ||
169 | && tp->snd_cwnd > yeah->reno_count) { | ||
170 | u32 reduction = min(queue / TCP_YEAH_GAMMA , | ||
171 | tp->snd_cwnd >> TCP_YEAH_EPSILON); | ||
172 | |||
173 | tp->snd_cwnd -= reduction; | ||
174 | |||
175 | tp->snd_cwnd = max(tp->snd_cwnd, | ||
176 | yeah->reno_count); | ||
177 | |||
178 | tp->snd_ssthresh = tp->snd_cwnd; | ||
179 | } | ||
180 | |||
181 | if (yeah->reno_count <= 2) | ||
182 | yeah->reno_count = max(tp->snd_cwnd>>1, 2U); | ||
183 | else | ||
184 | yeah->reno_count++; | ||
185 | |||
186 | yeah->doing_reno_now = min(yeah->doing_reno_now + 1, | ||
187 | 0xffffffU); | ||
188 | } else { | ||
189 | yeah->fast_count++; | ||
190 | |||
191 | if (yeah->fast_count > TCP_YEAH_ZETA) { | ||
192 | yeah->reno_count = 2; | ||
193 | yeah->fast_count = 0; | ||
194 | } | ||
195 | |||
196 | yeah->doing_reno_now = 0; | ||
197 | } | ||
198 | |||
199 | yeah->lastQ = queue; | ||
200 | |||
201 | } | ||
202 | |||
203 | /* Save the extent of the current window so we can use this | ||
204 | * at the end of the next RTT. | ||
205 | */ | ||
206 | yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt; | ||
207 | yeah->vegas.beg_snd_nxt = tp->snd_nxt; | ||
208 | yeah->vegas.beg_snd_cwnd = tp->snd_cwnd; | ||
209 | |||
210 | /* Wipe the slate clean for the next RTT. */ | ||
211 | yeah->vegas.cntRTT = 0; | ||
212 | yeah->vegas.minRTT = 0x7fffffff; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | static u32 tcp_yeah_ssthresh(struct sock *sk) { | ||
217 | const struct tcp_sock *tp = tcp_sk(sk); | ||
218 | struct yeah *yeah = inet_csk_ca(sk); | ||
219 | u32 reduction; | ||
220 | |||
221 | if (yeah->doing_reno_now < TCP_YEAH_RHO) { | ||
222 | reduction = yeah->lastQ; | ||
223 | |||
224 | reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); | ||
225 | |||
226 | reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); | ||
227 | } else | ||
228 | reduction = max(tp->snd_cwnd>>1,2U); | ||
229 | |||
230 | yeah->fast_count = 0; | ||
231 | yeah->reno_count = max(yeah->reno_count>>1, 2U); | ||
232 | |||
233 | return tp->snd_cwnd - reduction; | ||
234 | } | ||
235 | |||
236 | static struct tcp_congestion_ops tcp_yeah = { | ||
237 | .flags = TCP_CONG_RTT_STAMP, | ||
238 | .init = tcp_yeah_init, | ||
239 | .ssthresh = tcp_yeah_ssthresh, | ||
240 | .cong_avoid = tcp_yeah_cong_avoid, | ||
241 | .min_cwnd = tcp_reno_min_cwnd, | ||
242 | .set_state = tcp_vegas_state, | ||
243 | .cwnd_event = tcp_vegas_cwnd_event, | ||
244 | .get_info = tcp_vegas_get_info, | ||
245 | .pkts_acked = tcp_yeah_pkts_acked, | ||
246 | |||
247 | .owner = THIS_MODULE, | ||
248 | .name = "yeah", | ||
249 | }; | ||
250 | |||
251 | static int __init tcp_yeah_register(void) | ||
252 | { | ||
253 | BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE); | ||
254 | tcp_register_congestion_control(&tcp_yeah); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | static void __exit tcp_yeah_unregister(void) | ||
259 | { | ||
260 | tcp_unregister_congestion_control(&tcp_yeah); | ||
261 | } | ||
262 | |||
263 | module_init(tcp_yeah_register); | ||
264 | module_exit(tcp_yeah_unregister); | ||
265 | |||
266 | MODULE_AUTHOR("Angelo P. Castellani"); | ||
267 | MODULE_LICENSE("GPL"); | ||
268 | MODULE_DESCRIPTION("YeAH TCP"); | ||
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h new file mode 100644 index 000000000000..ed3b7198f23c --- /dev/null +++ b/net/ipv4/tcp_yeah.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/skbuff.h> | ||
4 | #include <linux/inet_diag.h> | ||
5 | #include <asm/div64.h> | ||
6 | |||
7 | #include <net/tcp.h> | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fc620a7c1db4..cec0f2cc49b7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -175,7 +175,8 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
175 | ; | 175 | ; |
176 | } | 176 | } |
177 | result = best; | 177 | result = best; |
178 | for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { | 178 | for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; |
179 | i++, result += UDP_HTABLE_SIZE) { | ||
179 | if (result > sysctl_local_port_range[1]) | 180 | if (result > sysctl_local_port_range[1]) |
180 | result = sysctl_local_port_range[0] | 181 | result = sysctl_local_port_range[0] |
181 | + ((result - sysctl_local_port_range[0]) & | 182 | + ((result - sysctl_local_port_range[0]) & |
@@ -212,13 +213,13 @@ fail: | |||
212 | return error; | 213 | return error; |
213 | } | 214 | } |
214 | 215 | ||
215 | __inline__ int udp_get_port(struct sock *sk, unsigned short snum, | 216 | int udp_get_port(struct sock *sk, unsigned short snum, |
216 | int (*scmp)(const struct sock *, const struct sock *)) | 217 | int (*scmp)(const struct sock *, const struct sock *)) |
217 | { | 218 | { |
218 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); | 219 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); |
219 | } | 220 | } |
220 | 221 | ||
221 | inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 222 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
222 | { | 223 | { |
223 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 224 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
224 | 225 | ||
@@ -270,10 +271,10 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, | |||
270 | continue; | 271 | continue; |
271 | score+=2; | 272 | score+=2; |
272 | } | 273 | } |
273 | if(score == 9) { | 274 | if (score == 9) { |
274 | result = sk; | 275 | result = sk; |
275 | break; | 276 | break; |
276 | } else if(score > badness) { | 277 | } else if (score > badness) { |
277 | result = sk; | 278 | result = sk; |
278 | badness = score; | 279 | badness = score; |
279 | } | 280 | } |
@@ -329,8 +330,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | |||
329 | struct inet_sock *inet; | 330 | struct inet_sock *inet; |
330 | struct iphdr *iph = (struct iphdr*)skb->data; | 331 | struct iphdr *iph = (struct iphdr*)skb->data; |
331 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | 332 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); |
332 | int type = skb->h.icmph->type; | 333 | const int type = icmp_hdr(skb)->type; |
333 | int code = skb->h.icmph->code; | 334 | const int code = icmp_hdr(skb)->code; |
334 | struct sock *sk; | 335 | struct sock *sk; |
335 | int harderr; | 336 | int harderr; |
336 | int err; | 337 | int err; |
@@ -390,7 +391,7 @@ out: | |||
390 | sock_put(sk); | 391 | sock_put(sk); |
391 | } | 392 | } |
392 | 393 | ||
393 | __inline__ void udp_err(struct sk_buff *skb, u32 info) | 394 | void udp_err(struct sk_buff *skb, u32 info) |
394 | { | 395 | { |
395 | return __udp4_lib_err(skb, info, udp_hash); | 396 | return __udp4_lib_err(skb, info, udp_hash); |
396 | } | 397 | } |
@@ -419,13 +420,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | |||
419 | __be32 src, __be32 dst, int len ) | 420 | __be32 src, __be32 dst, int len ) |
420 | { | 421 | { |
421 | unsigned int offset; | 422 | unsigned int offset; |
422 | struct udphdr *uh = skb->h.uh; | 423 | struct udphdr *uh = udp_hdr(skb); |
423 | __wsum csum = 0; | 424 | __wsum csum = 0; |
424 | 425 | ||
425 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 426 | if (skb_queue_len(&sk->sk_write_queue) == 1) { |
426 | /* | 427 | /* |
427 | * Only one fragment on the socket. | 428 | * Only one fragment on the socket. |
428 | */ | 429 | */ |
430 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
429 | skb->csum_offset = offsetof(struct udphdr, check); | 431 | skb->csum_offset = offsetof(struct udphdr, check); |
430 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | 432 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); |
431 | } else { | 433 | } else { |
@@ -434,7 +436,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | |||
434 | * fragments on the socket so that all csums of sk_buffs | 436 | * fragments on the socket so that all csums of sk_buffs |
435 | * should be together | 437 | * should be together |
436 | */ | 438 | */ |
437 | offset = skb->h.raw - skb->data; | 439 | offset = skb_transport_offset(skb); |
438 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 440 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); |
439 | 441 | ||
440 | skb->ip_summed = CHECKSUM_NONE; | 442 | skb->ip_summed = CHECKSUM_NONE; |
@@ -469,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
469 | /* | 471 | /* |
470 | * Create a UDP header | 472 | * Create a UDP header |
471 | */ | 473 | */ |
472 | uh = skb->h.uh; | 474 | uh = udp_hdr(skb); |
473 | uh->source = fl->fl_ip_sport; | 475 | uh->source = fl->fl_ip_sport; |
474 | uh->dest = fl->fl_ip_dport; | 476 | uh->dest = fl->fl_ip_dport; |
475 | uh->len = htons(up->len); | 477 | uh->len = htons(up->len); |
@@ -765,38 +767,38 @@ out: | |||
765 | 767 | ||
766 | int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 768 | int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
767 | { | 769 | { |
768 | switch(cmd) | 770 | switch (cmd) { |
771 | case SIOCOUTQ: | ||
769 | { | 772 | { |
770 | case SIOCOUTQ: | 773 | int amount = atomic_read(&sk->sk_wmem_alloc); |
771 | { | 774 | return put_user(amount, (int __user *)arg); |
772 | int amount = atomic_read(&sk->sk_wmem_alloc); | 775 | } |
773 | return put_user(amount, (int __user *)arg); | ||
774 | } | ||
775 | 776 | ||
776 | case SIOCINQ: | 777 | case SIOCINQ: |
777 | { | 778 | { |
778 | struct sk_buff *skb; | 779 | struct sk_buff *skb; |
779 | unsigned long amount; | 780 | unsigned long amount; |
780 | 781 | ||
781 | amount = 0; | 782 | amount = 0; |
782 | spin_lock_bh(&sk->sk_receive_queue.lock); | 783 | spin_lock_bh(&sk->sk_receive_queue.lock); |
783 | skb = skb_peek(&sk->sk_receive_queue); | 784 | skb = skb_peek(&sk->sk_receive_queue); |
784 | if (skb != NULL) { | 785 | if (skb != NULL) { |
785 | /* | 786 | /* |
786 | * We will only return the amount | 787 | * We will only return the amount |
787 | * of this packet since that is all | 788 | * of this packet since that is all |
788 | * that will be read. | 789 | * that will be read. |
789 | */ | 790 | */ |
790 | amount = skb->len - sizeof(struct udphdr); | 791 | amount = skb->len - sizeof(struct udphdr); |
791 | } | ||
792 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
793 | return put_user(amount, (int __user *)arg); | ||
794 | } | 792 | } |
793 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
794 | return put_user(amount, (int __user *)arg); | ||
795 | } | ||
795 | 796 | ||
796 | default: | 797 | default: |
797 | return -ENOIOCTLCMD; | 798 | return -ENOIOCTLCMD; |
798 | } | 799 | } |
799 | return(0); | 800 | |
801 | return 0; | ||
800 | } | 802 | } |
801 | 803 | ||
802 | /* | 804 | /* |
@@ -810,7 +812,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
810 | struct inet_sock *inet = inet_sk(sk); | 812 | struct inet_sock *inet = inet_sk(sk); |
811 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 813 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
812 | struct sk_buff *skb; | 814 | struct sk_buff *skb; |
813 | int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); | 815 | unsigned int ulen, copied; |
816 | int err; | ||
817 | int is_udplite = IS_UDPLITE(sk); | ||
814 | 818 | ||
815 | /* | 819 | /* |
816 | * Check any passed addresses | 820 | * Check any passed addresses |
@@ -826,28 +830,25 @@ try_again: | |||
826 | if (!skb) | 830 | if (!skb) |
827 | goto out; | 831 | goto out; |
828 | 832 | ||
829 | copied = skb->len - sizeof(struct udphdr); | 833 | ulen = skb->len - sizeof(struct udphdr); |
830 | if (copied > len) { | 834 | copied = len; |
831 | copied = len; | 835 | if (copied > ulen) |
836 | copied = ulen; | ||
837 | else if (copied < ulen) | ||
832 | msg->msg_flags |= MSG_TRUNC; | 838 | msg->msg_flags |= MSG_TRUNC; |
833 | } | ||
834 | 839 | ||
835 | /* | 840 | /* |
836 | * Decide whether to checksum and/or copy data. | 841 | * If checksum is needed at all, try to do it while copying the |
837 | * | 842 | * data. If the data is truncated, or if we only want a partial |
838 | * UDP: checksum may have been computed in HW, | 843 | * coverage checksum (UDP-Lite), do it before the copy. |
839 | * (re-)compute it if message is truncated. | ||
840 | * UDP-Lite: always needs to checksum, no HW support. | ||
841 | */ | 844 | */ |
842 | copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); | ||
843 | 845 | ||
844 | if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { | 846 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { |
845 | if (__udp_lib_checksum_complete(skb)) | 847 | if (udp_lib_checksum_complete(skb)) |
846 | goto csum_copy_err; | 848 | goto csum_copy_err; |
847 | copy_only = 1; | ||
848 | } | 849 | } |
849 | 850 | ||
850 | if (copy_only) | 851 | if (skb_csum_unnecessary(skb)) |
851 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 852 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
852 | msg->msg_iov, copied ); | 853 | msg->msg_iov, copied ); |
853 | else { | 854 | else { |
@@ -866,8 +867,8 @@ try_again: | |||
866 | if (sin) | 867 | if (sin) |
867 | { | 868 | { |
868 | sin->sin_family = AF_INET; | 869 | sin->sin_family = AF_INET; |
869 | sin->sin_port = skb->h.uh->source; | 870 | sin->sin_port = udp_hdr(skb)->source; |
870 | sin->sin_addr.s_addr = skb->nh.iph->saddr; | 871 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
871 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | 872 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); |
872 | } | 873 | } |
873 | if (inet->cmsg_flags) | 874 | if (inet->cmsg_flags) |
@@ -875,7 +876,7 @@ try_again: | |||
875 | 876 | ||
876 | err = copied; | 877 | err = copied; |
877 | if (flags & MSG_TRUNC) | 878 | if (flags & MSG_TRUNC) |
878 | err = skb->len - sizeof(struct udphdr); | 879 | err = ulen; |
879 | 880 | ||
880 | out_free: | 881 | out_free: |
881 | skb_free_datagram(sk, skb); | 882 | skb_free_datagram(sk, skb); |
@@ -949,7 +950,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
949 | return 1; | 950 | return 1; |
950 | 951 | ||
951 | /* Now we can get the pointers */ | 952 | /* Now we can get the pointers */ |
952 | uh = skb->h.uh; | 953 | uh = udp_hdr(skb); |
953 | udpdata = (__u8 *)uh + sizeof(struct udphdr); | 954 | udpdata = (__u8 *)uh + sizeof(struct udphdr); |
954 | udpdata32 = (__be32 *)udpdata; | 955 | udpdata32 = (__be32 *)udpdata; |
955 | 956 | ||
@@ -959,7 +960,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
959 | /* Check if this is a keepalive packet. If so, eat it. */ | 960 | /* Check if this is a keepalive packet. If so, eat it. */ |
960 | if (len == 1 && udpdata[0] == 0xff) { | 961 | if (len == 1 && udpdata[0] == 0xff) { |
961 | return 0; | 962 | return 0; |
962 | } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) { | 963 | } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { |
963 | /* ESP Packet without Non-ESP header */ | 964 | /* ESP Packet without Non-ESP header */ |
964 | len = sizeof(struct udphdr); | 965 | len = sizeof(struct udphdr); |
965 | } else | 966 | } else |
@@ -990,7 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
990 | return 0; | 991 | return 0; |
991 | 992 | ||
992 | /* Now we can update and verify the packet length... */ | 993 | /* Now we can update and verify the packet length... */ |
993 | iph = skb->nh.iph; | 994 | iph = ip_hdr(skb); |
994 | iphlen = iph->ihl << 2; | 995 | iphlen = iph->ihl << 2; |
995 | iph->tot_len = htons(ntohs(iph->tot_len) - len); | 996 | iph->tot_len = htons(ntohs(iph->tot_len) - len); |
996 | if (skb->len < iphlen + len) { | 997 | if (skb->len < iphlen + len) { |
@@ -1002,7 +1003,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
1002 | * transport header to point to ESP. Keep UDP on the stack | 1003 | * transport header to point to ESP. Keep UDP on the stack |
1003 | * for later. | 1004 | * for later. |
1004 | */ | 1005 | */ |
1005 | skb->h.raw = skb_pull(skb, len); | 1006 | __skb_pull(skb, len); |
1007 | skb_reset_transport_header(skb); | ||
1006 | 1008 | ||
1007 | /* modify the protocol (it's ESP!) */ | 1009 | /* modify the protocol (it's ESP!) */ |
1008 | iph->protocol = IPPROTO_ESP; | 1010 | iph->protocol = IPPROTO_ESP; |
@@ -1095,10 +1097,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1095 | } | 1097 | } |
1096 | } | 1098 | } |
1097 | 1099 | ||
1098 | if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { | 1100 | if (sk->sk_filter) { |
1099 | if (__udp_lib_checksum_complete(skb)) | 1101 | if (udp_lib_checksum_complete(skb)) |
1100 | goto drop; | 1102 | goto drop; |
1101 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1102 | } | 1103 | } |
1103 | 1104 | ||
1104 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | 1105 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
@@ -1143,10 +1144,10 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1143 | 1144 | ||
1144 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | 1145 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, |
1145 | uh->source, saddr, dif); | 1146 | uh->source, saddr, dif); |
1146 | if(sknext) | 1147 | if (sknext) |
1147 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1148 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1148 | 1149 | ||
1149 | if(skb1) { | 1150 | if (skb1) { |
1150 | int ret = udp_queue_rcv_skb(sk, skb1); | 1151 | int ret = udp_queue_rcv_skb(sk, skb1); |
1151 | if (ret > 0) | 1152 | if (ret > 0) |
1152 | /* we should probably re-process instead | 1153 | /* we should probably re-process instead |
@@ -1154,7 +1155,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1154 | kfree_skb(skb1); | 1155 | kfree_skb(skb1); |
1155 | } | 1156 | } |
1156 | sk = sknext; | 1157 | sk = sknext; |
1157 | } while(sknext); | 1158 | } while (sknext); |
1158 | } else | 1159 | } else |
1159 | kfree_skb(skb); | 1160 | kfree_skb(skb); |
1160 | read_unlock(&udp_hash_lock); | 1161 | read_unlock(&udp_hash_lock); |
@@ -1166,25 +1167,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1166 | * Otherwise, csum completion requires chacksumming packet body, | 1167 | * Otherwise, csum completion requires chacksumming packet body, |
1167 | * including udp header and folding it to skb->csum. | 1168 | * including udp header and folding it to skb->csum. |
1168 | */ | 1169 | */ |
1169 | static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) | 1170 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, |
1171 | int proto) | ||
1170 | { | 1172 | { |
1173 | const struct iphdr *iph; | ||
1174 | int err; | ||
1175 | |||
1176 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1177 | UDP_SKB_CB(skb)->cscov = skb->len; | ||
1178 | |||
1179 | if (proto == IPPROTO_UDPLITE) { | ||
1180 | err = udplite_checksum_init(skb, uh); | ||
1181 | if (err) | ||
1182 | return err; | ||
1183 | } | ||
1184 | |||
1185 | iph = ip_hdr(skb); | ||
1171 | if (uh->check == 0) { | 1186 | if (uh->check == 0) { |
1172 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1187 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1173 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1188 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1174 | if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | 1189 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, |
1175 | skb->len, IPPROTO_UDP, skb->csum )) | 1190 | proto, skb->csum)) |
1176 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1191 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1177 | } | 1192 | } |
1178 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 1193 | if (!skb_csum_unnecessary(skb)) |
1179 | skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, | 1194 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, |
1180 | skb->nh.iph->daddr, | 1195 | skb->len, proto, 0); |
1181 | skb->len, IPPROTO_UDP, 0); | ||
1182 | /* Probably, we should checksum udp header (it should be in cache | 1196 | /* Probably, we should checksum udp header (it should be in cache |
1183 | * in any case) and data in tiny packets (< rx copybreak). | 1197 | * in any case) and data in tiny packets (< rx copybreak). |
1184 | */ | 1198 | */ |
1185 | 1199 | ||
1186 | /* UDP = UDP-Lite with a non-partial checksum coverage */ | 1200 | return 0; |
1187 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1188 | } | 1201 | } |
1189 | 1202 | ||
1190 | /* | 1203 | /* |
@@ -1192,14 +1205,14 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) | |||
1192 | */ | 1205 | */ |
1193 | 1206 | ||
1194 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | 1207 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], |
1195 | int is_udplite) | 1208 | int proto) |
1196 | { | 1209 | { |
1197 | struct sock *sk; | 1210 | struct sock *sk; |
1198 | struct udphdr *uh = skb->h.uh; | 1211 | struct udphdr *uh = udp_hdr(skb); |
1199 | unsigned short ulen; | 1212 | unsigned short ulen; |
1200 | struct rtable *rt = (struct rtable*)skb->dst; | 1213 | struct rtable *rt = (struct rtable*)skb->dst; |
1201 | __be32 saddr = skb->nh.iph->saddr; | 1214 | __be32 saddr = ip_hdr(skb)->saddr; |
1202 | __be32 daddr = skb->nh.iph->daddr; | 1215 | __be32 daddr = ip_hdr(skb)->daddr; |
1203 | 1216 | ||
1204 | /* | 1217 | /* |
1205 | * Validate the packet. | 1218 | * Validate the packet. |
@@ -1211,20 +1224,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1211 | if (ulen > skb->len) | 1224 | if (ulen > skb->len) |
1212 | goto short_packet; | 1225 | goto short_packet; |
1213 | 1226 | ||
1214 | if(! is_udplite ) { /* UDP validates ulen. */ | 1227 | if (proto == IPPROTO_UDP) { |
1215 | 1228 | /* UDP validates ulen. */ | |
1216 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | 1229 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) |
1217 | goto short_packet; | 1230 | goto short_packet; |
1218 | uh = skb->h.uh; | 1231 | uh = udp_hdr(skb); |
1219 | |||
1220 | udp4_csum_init(skb, uh); | ||
1221 | |||
1222 | } else { /* UDP-Lite validates cscov. */ | ||
1223 | if (udplite4_csum_init(skb, uh)) | ||
1224 | goto csum_error; | ||
1225 | } | 1232 | } |
1226 | 1233 | ||
1227 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1234 | if (udp4_csum_init(skb, uh, proto)) |
1235 | goto csum_error; | ||
1236 | |||
1237 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | ||
1228 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | 1238 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1229 | 1239 | ||
1230 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, | 1240 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, |
@@ -1250,7 +1260,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1250 | if (udp_lib_checksum_complete(skb)) | 1260 | if (udp_lib_checksum_complete(skb)) |
1251 | goto csum_error; | 1261 | goto csum_error; |
1252 | 1262 | ||
1253 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); | 1263 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); |
1254 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 1264 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
1255 | 1265 | ||
1256 | /* | 1266 | /* |
@@ -1258,11 +1268,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1258 | * don't wanna listen. Ignore it. | 1268 | * don't wanna listen. Ignore it. |
1259 | */ | 1269 | */ |
1260 | kfree_skb(skb); | 1270 | kfree_skb(skb); |
1261 | return(0); | 1271 | return 0; |
1262 | 1272 | ||
1263 | short_packet: | 1273 | short_packet: |
1264 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | 1274 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", |
1265 | is_udplite? "-Lite" : "", | 1275 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1266 | NIPQUAD(saddr), | 1276 | NIPQUAD(saddr), |
1267 | ntohs(uh->source), | 1277 | ntohs(uh->source), |
1268 | ulen, | 1278 | ulen, |
@@ -1277,21 +1287,21 @@ csum_error: | |||
1277 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1287 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1278 | */ | 1288 | */ |
1279 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | 1289 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", |
1280 | is_udplite? "-Lite" : "", | 1290 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1281 | NIPQUAD(saddr), | 1291 | NIPQUAD(saddr), |
1282 | ntohs(uh->source), | 1292 | ntohs(uh->source), |
1283 | NIPQUAD(daddr), | 1293 | NIPQUAD(daddr), |
1284 | ntohs(uh->dest), | 1294 | ntohs(uh->dest), |
1285 | ulen); | 1295 | ulen); |
1286 | drop: | 1296 | drop: |
1287 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | 1297 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); |
1288 | kfree_skb(skb); | 1298 | kfree_skb(skb); |
1289 | return(0); | 1299 | return 0; |
1290 | } | 1300 | } |
1291 | 1301 | ||
1292 | __inline__ int udp_rcv(struct sk_buff *skb) | 1302 | int udp_rcv(struct sk_buff *skb) |
1293 | { | 1303 | { |
1294 | return __udp4_lib_rcv(skb, udp_hash, 0); | 1304 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); |
1295 | } | 1305 | } |
1296 | 1306 | ||
1297 | int udp_destroy_sock(struct sock *sk) | 1307 | int udp_destroy_sock(struct sock *sk) |
@@ -1313,13 +1323,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1313 | int val; | 1323 | int val; |
1314 | int err = 0; | 1324 | int err = 0; |
1315 | 1325 | ||
1316 | if(optlen<sizeof(int)) | 1326 | if (optlen<sizeof(int)) |
1317 | return -EINVAL; | 1327 | return -EINVAL; |
1318 | 1328 | ||
1319 | if (get_user(val, (int __user *)optval)) | 1329 | if (get_user(val, (int __user *)optval)) |
1320 | return -EFAULT; | 1330 | return -EFAULT; |
1321 | 1331 | ||
1322 | switch(optname) { | 1332 | switch (optname) { |
1323 | case UDP_CORK: | 1333 | case UDP_CORK: |
1324 | if (val != 0) { | 1334 | if (val != 0) { |
1325 | up->corkflag = 1; | 1335 | up->corkflag = 1; |
@@ -1373,7 +1383,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1373 | default: | 1383 | default: |
1374 | err = -ENOPROTOOPT; | 1384 | err = -ENOPROTOOPT; |
1375 | break; | 1385 | break; |
1376 | }; | 1386 | } |
1377 | 1387 | ||
1378 | return err; | 1388 | return err; |
1379 | } | 1389 | } |
@@ -1404,15 +1414,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1404 | struct udp_sock *up = udp_sk(sk); | 1414 | struct udp_sock *up = udp_sk(sk); |
1405 | int val, len; | 1415 | int val, len; |
1406 | 1416 | ||
1407 | if(get_user(len,optlen)) | 1417 | if (get_user(len,optlen)) |
1408 | return -EFAULT; | 1418 | return -EFAULT; |
1409 | 1419 | ||
1410 | len = min_t(unsigned int, len, sizeof(int)); | 1420 | len = min_t(unsigned int, len, sizeof(int)); |
1411 | 1421 | ||
1412 | if(len < 0) | 1422 | if (len < 0) |
1413 | return -EINVAL; | 1423 | return -EINVAL; |
1414 | 1424 | ||
1415 | switch(optname) { | 1425 | switch (optname) { |
1416 | case UDP_CORK: | 1426 | case UDP_CORK: |
1417 | val = up->corkflag; | 1427 | val = up->corkflag; |
1418 | break; | 1428 | break; |
@@ -1433,11 +1443,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1433 | 1443 | ||
1434 | default: | 1444 | default: |
1435 | return -ENOPROTOOPT; | 1445 | return -ENOPROTOOPT; |
1436 | }; | 1446 | } |
1437 | 1447 | ||
1438 | if(put_user(len, optlen)) | 1448 | if (put_user(len, optlen)) |
1439 | return -EFAULT; | 1449 | return -EFAULT; |
1440 | if(copy_to_user(optval, &val,len)) | 1450 | if (copy_to_user(optval, &val,len)) |
1441 | return -EFAULT; | 1451 | return -EFAULT; |
1442 | return 0; | 1452 | return 0; |
1443 | } | 1453 | } |
@@ -1486,15 +1496,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1486 | struct sk_buff *skb; | 1496 | struct sk_buff *skb; |
1487 | 1497 | ||
1488 | spin_lock_bh(&rcvq->lock); | 1498 | spin_lock_bh(&rcvq->lock); |
1489 | while ((skb = skb_peek(rcvq)) != NULL) { | 1499 | while ((skb = skb_peek(rcvq)) != NULL && |
1490 | if (udp_lib_checksum_complete(skb)) { | 1500 | udp_lib_checksum_complete(skb)) { |
1491 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); | 1501 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); |
1492 | __skb_unlink(skb, rcvq); | 1502 | __skb_unlink(skb, rcvq); |
1493 | kfree_skb(skb); | 1503 | kfree_skb(skb); |
1494 | } else { | ||
1495 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1496 | break; | ||
1497 | } | ||
1498 | } | 1504 | } |
1499 | spin_unlock_bh(&rcvq->lock); | 1505 | spin_unlock_bh(&rcvq->lock); |
1500 | 1506 | ||
@@ -1573,7 +1579,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1573 | struct sock *sk = udp_get_first(seq); | 1579 | struct sock *sk = udp_get_first(seq); |
1574 | 1580 | ||
1575 | if (sk) | 1581 | if (sk) |
1576 | while(pos && (sk = udp_get_next(seq, sk)) != NULL) | 1582 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) |
1577 | --pos; | 1583 | --pos; |
1578 | return pos ? NULL : sk; | 1584 | return pos ? NULL : sk; |
1579 | } | 1585 | } |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index b28fe1edf98b..f34fd686a8f1 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum) | |||
31 | 31 | ||
32 | static int udplite_rcv(struct sk_buff *skb) | 32 | static int udplite_rcv(struct sk_buff *skb) |
33 | { | 33 | { |
34 | return __udp4_lib_rcv(skb, udplite_hash, 1); | 34 | return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); |
35 | } | 35 | } |
36 | 36 | ||
37 | static void udplite_err(struct sk_buff *skb, u32 info) | 37 | static void udplite_err(struct sk_buff *skb, u32 info) |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 78e80deb7e89..5ceca951d73f 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 | |||
28 | switch (nexthdr) { | 28 | switch (nexthdr) { |
29 | case IPPROTO_IPIP: | 29 | case IPPROTO_IPIP: |
30 | case IPPROTO_IPV6: | 30 | case IPPROTO_IPV6: |
31 | *spi = skb->nh.iph->saddr; | 31 | *spi = ip_hdr(skb)->saddr; |
32 | *seq = 0; | 32 | *seq = 0; |
33 | return 0; | 33 | return 0; |
34 | } | 34 | } |
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 | |||
39 | #ifdef CONFIG_NETFILTER | 39 | #ifdef CONFIG_NETFILTER |
40 | static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) | 40 | static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) |
41 | { | 41 | { |
42 | struct iphdr *iph = skb->nh.iph; | ||
43 | |||
44 | if (skb->dst == NULL) { | 42 | if (skb->dst == NULL) { |
43 | const struct iphdr *iph = ip_hdr(skb); | ||
44 | |||
45 | if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, | 45 | if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, |
46 | skb->dev)) | 46 | skb->dev)) |
47 | goto drop; | 47 | goto drop; |
@@ -55,18 +55,18 @@ drop: | |||
55 | 55 | ||
56 | int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) | 56 | int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) |
57 | { | 57 | { |
58 | int err; | ||
59 | __be32 spi, seq; | 58 | __be32 spi, seq; |
60 | struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; | 59 | struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; |
61 | struct xfrm_state *x; | 60 | struct xfrm_state *x; |
62 | int xfrm_nr = 0; | 61 | int xfrm_nr = 0; |
63 | int decaps = 0; | 62 | int decaps = 0; |
63 | int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); | ||
64 | 64 | ||
65 | if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0) | 65 | if (err != 0) |
66 | goto drop; | 66 | goto drop; |
67 | 67 | ||
68 | do { | 68 | do { |
69 | struct iphdr *iph = skb->nh.iph; | 69 | const struct iphdr *iph = ip_hdr(skb); |
70 | 70 | ||
71 | if (xfrm_nr == XFRM_MAX_DEPTH) | 71 | if (xfrm_nr == XFRM_MAX_DEPTH) |
72 | goto drop; | 72 | goto drop; |
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) | |||
113 | break; | 113 | break; |
114 | } | 114 | } |
115 | 115 | ||
116 | if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0) | 116 | err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); |
117 | if (err < 0) | ||
117 | goto drop; | 118 | goto drop; |
118 | } while (!err); | 119 | } while (!err); |
119 | 120 | ||
@@ -146,15 +147,15 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) | |||
146 | return 0; | 147 | return 0; |
147 | } else { | 148 | } else { |
148 | #ifdef CONFIG_NETFILTER | 149 | #ifdef CONFIG_NETFILTER |
149 | __skb_push(skb, skb->data - skb->nh.raw); | 150 | __skb_push(skb, skb->data - skb_network_header(skb)); |
150 | skb->nh.iph->tot_len = htons(skb->len); | 151 | ip_hdr(skb)->tot_len = htons(skb->len); |
151 | ip_send_check(skb->nh.iph); | 152 | ip_send_check(ip_hdr(skb)); |
152 | 153 | ||
153 | NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, | 154 | NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, |
154 | xfrm4_rcv_encap_finish); | 155 | xfrm4_rcv_encap_finish); |
155 | return 0; | 156 | return 0; |
156 | #else | 157 | #else |
157 | return -skb->nh.iph->protocol; | 158 | return -ip_hdr(skb)->protocol; |
158 | #endif | 159 | #endif |
159 | } | 160 | } |
160 | 161 | ||
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index d419e15d9803..a73e710740c2 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c | |||
@@ -29,20 +29,21 @@ | |||
29 | */ | 29 | */ |
30 | static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) | 30 | static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) |
31 | { | 31 | { |
32 | struct iphdr *iph, *top_iph = NULL; | 32 | struct iphdr *iph, *top_iph; |
33 | int hdrlen, optlen; | 33 | int hdrlen, optlen; |
34 | 34 | ||
35 | iph = skb->nh.iph; | 35 | iph = ip_hdr(skb); |
36 | skb->h.ipiph = iph; | 36 | skb->transport_header = skb->network_header; |
37 | 37 | ||
38 | hdrlen = 0; | 38 | hdrlen = 0; |
39 | optlen = iph->ihl * 4 - sizeof(*iph); | 39 | optlen = iph->ihl * 4 - sizeof(*iph); |
40 | if (unlikely(optlen)) | 40 | if (unlikely(optlen)) |
41 | hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); | 41 | hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); |
42 | 42 | ||
43 | skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen); | 43 | skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen); |
44 | top_iph = skb->nh.iph; | 44 | skb_reset_network_header(skb); |
45 | skb->h.raw += sizeof(*iph) - hdrlen; | 45 | top_iph = ip_hdr(skb); |
46 | skb->transport_header += sizeof(*iph) - hdrlen; | ||
46 | 47 | ||
47 | memmove(top_iph, iph, sizeof(*iph)); | 48 | memmove(top_iph, iph, sizeof(*iph)); |
48 | if (unlikely(optlen)) { | 49 | if (unlikely(optlen)) { |
@@ -50,7 +51,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) | |||
50 | 51 | ||
51 | BUG_ON(optlen < 0); | 52 | BUG_ON(optlen < 0); |
52 | 53 | ||
53 | ph = (struct ip_beet_phdr *)skb->h.raw; | 54 | ph = (struct ip_beet_phdr *)skb_transport_header(skb); |
54 | ph->padlen = 4 - (optlen & 4); | 55 | ph->padlen = 4 - (optlen & 4); |
55 | ph->hdrlen = optlen / 8; | 56 | ph->hdrlen = optlen / 8; |
56 | ph->nexthdr = top_iph->protocol; | 57 | ph->nexthdr = top_iph->protocol; |
@@ -69,20 +70,18 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) | |||
69 | 70 | ||
70 | static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) | 71 | static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) |
71 | { | 72 | { |
72 | struct iphdr *iph = skb->nh.iph; | 73 | struct iphdr *iph = ip_hdr(skb); |
73 | int phlen = 0; | 74 | int phlen = 0; |
74 | int optlen = 0; | 75 | int optlen = 0; |
75 | __u8 ph_nexthdr = 0, protocol = 0; | 76 | u8 ph_nexthdr = 0; |
76 | int err = -EINVAL; | 77 | int err = -EINVAL; |
77 | 78 | ||
78 | protocol = iph->protocol; | ||
79 | |||
80 | if (unlikely(iph->protocol == IPPROTO_BEETPH)) { | 79 | if (unlikely(iph->protocol == IPPROTO_BEETPH)) { |
81 | struct ip_beet_phdr *ph; | 80 | struct ip_beet_phdr *ph; |
82 | 81 | ||
83 | if (!pskb_may_pull(skb, sizeof(*ph))) | 82 | if (!pskb_may_pull(skb, sizeof(*ph))) |
84 | goto out; | 83 | goto out; |
85 | ph = (struct ip_beet_phdr *)(skb->h.ipiph + 1); | 84 | ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1); |
86 | 85 | ||
87 | phlen = sizeof(*ph) + ph->padlen; | 86 | phlen = sizeof(*ph) + ph->padlen; |
88 | optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); | 87 | optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); |
@@ -96,22 +95,20 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) | |||
96 | ph_nexthdr = ph->nexthdr; | 95 | ph_nexthdr = ph->nexthdr; |
97 | } | 96 | } |
98 | 97 | ||
99 | skb->nh.raw = skb->data + (phlen - sizeof(*iph)); | 98 | skb_set_network_header(skb, phlen - sizeof(*iph)); |
100 | memmove(skb->nh.raw, iph, sizeof(*iph)); | 99 | memmove(skb_network_header(skb), iph, sizeof(*iph)); |
101 | skb->h.raw = skb->data + (phlen + optlen); | 100 | skb_set_transport_header(skb, phlen + optlen); |
102 | skb->data = skb->h.raw; | 101 | skb->data = skb_transport_header(skb); |
103 | 102 | ||
104 | iph = skb->nh.iph; | 103 | iph = ip_hdr(skb); |
105 | iph->ihl = (sizeof(*iph) + optlen) / 4; | 104 | iph->ihl = (sizeof(*iph) + optlen) / 4; |
106 | iph->tot_len = htons(skb->len + iph->ihl * 4); | 105 | iph->tot_len = htons(skb->len + iph->ihl * 4); |
107 | iph->daddr = x->sel.daddr.a4; | 106 | iph->daddr = x->sel.daddr.a4; |
108 | iph->saddr = x->sel.saddr.a4; | 107 | iph->saddr = x->sel.saddr.a4; |
109 | if (ph_nexthdr) | 108 | if (ph_nexthdr) |
110 | iph->protocol = ph_nexthdr; | 109 | iph->protocol = ph_nexthdr; |
111 | else | ||
112 | iph->protocol = protocol; | ||
113 | iph->check = 0; | 110 | iph->check = 0; |
114 | iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); | 111 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); |
115 | err = 0; | 112 | err = 0; |
116 | out: | 113 | out: |
117 | return err; | 114 | return err; |
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 92676b7e4034..601047161ea6 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c | |||
@@ -23,16 +23,13 @@ | |||
23 | */ | 23 | */ |
24 | static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) | 24 | static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) |
25 | { | 25 | { |
26 | struct iphdr *iph; | 26 | struct iphdr *iph = ip_hdr(skb); |
27 | int ihl; | 27 | int ihl = iph->ihl * 4; |
28 | 28 | ||
29 | iph = skb->nh.iph; | 29 | skb->transport_header = skb->network_header + ihl; |
30 | skb->h.ipiph = iph; | 30 | skb_push(skb, x->props.header_len); |
31 | 31 | skb_reset_network_header(skb); | |
32 | ihl = iph->ihl * 4; | 32 | memmove(skb_network_header(skb), iph, ihl); |
33 | skb->h.raw += ihl; | ||
34 | |||
35 | skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); | ||
36 | return 0; | 33 | return 0; |
37 | } | 34 | } |
38 | 35 | ||
@@ -46,12 +43,15 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) | |||
46 | */ | 43 | */ |
47 | static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) | 44 | static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) |
48 | { | 45 | { |
49 | int ihl = skb->data - skb->h.raw; | 46 | int ihl = skb->data - skb_transport_header(skb); |
50 | 47 | ||
51 | if (skb->h.raw != skb->nh.raw) | 48 | if (skb->transport_header != skb->network_header) { |
52 | skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl); | 49 | memmove(skb_transport_header(skb), |
53 | skb->nh.iph->tot_len = htons(skb->len + ihl); | 50 | skb_network_header(skb), ihl); |
54 | skb->h.raw = skb->data; | 51 | skb->network_header = skb->transport_header; |
52 | } | ||
53 | ip_hdr(skb)->tot_len = htons(skb->len + ihl); | ||
54 | skb_reset_transport_header(skb); | ||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ceb4376f572a..a2f2e6a5ec5d 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -16,8 +16,8 @@ | |||
16 | 16 | ||
17 | static inline void ipip_ecn_decapsulate(struct sk_buff *skb) | 17 | static inline void ipip_ecn_decapsulate(struct sk_buff *skb) |
18 | { | 18 | { |
19 | struct iphdr *outer_iph = skb->nh.iph; | 19 | struct iphdr *outer_iph = ip_hdr(skb); |
20 | struct iphdr *inner_iph = skb->h.ipiph; | 20 | struct iphdr *inner_iph = ipip_hdr(skb); |
21 | 21 | ||
22 | if (INET_ECN_is_ce(outer_iph->tos)) | 22 | if (INET_ECN_is_ce(outer_iph->tos)) |
23 | IP_ECN_set_ce(inner_iph); | 23 | IP_ECN_set_ce(inner_iph); |
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) | |||
26 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) | 26 | static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) |
27 | { | 27 | { |
28 | if (INET_ECN_is_ce(iph->tos)) | 28 | if (INET_ECN_is_ce(iph->tos)) |
29 | IP6_ECN_set_ce(skb->nh.ipv6h); | 29 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
30 | } | 30 | } |
31 | 31 | ||
32 | /* Add encapsulation header. | 32 | /* Add encapsulation header. |
@@ -46,11 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
46 | struct iphdr *iph, *top_iph; | 46 | struct iphdr *iph, *top_iph; |
47 | int flags; | 47 | int flags; |
48 | 48 | ||
49 | iph = skb->nh.iph; | 49 | iph = ip_hdr(skb); |
50 | skb->h.ipiph = iph; | 50 | skb->transport_header = skb->network_header; |
51 | 51 | ||
52 | skb->nh.raw = skb_push(skb, x->props.header_len); | 52 | skb_push(skb, x->props.header_len); |
53 | top_iph = skb->nh.iph; | 53 | skb_reset_network_header(skb); |
54 | top_iph = ip_hdr(skb); | ||
54 | 55 | ||
55 | top_iph->ihl = 5; | 56 | top_iph->ihl = 5; |
56 | top_iph->version = 4; | 57 | top_iph->version = 4; |
@@ -90,10 +91,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
90 | 91 | ||
91 | static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | 92 | static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) |
92 | { | 93 | { |
93 | struct iphdr *iph = skb->nh.iph; | 94 | struct iphdr *iph = ip_hdr(skb); |
95 | const unsigned char *old_mac; | ||
94 | int err = -EINVAL; | 96 | int err = -EINVAL; |
95 | 97 | ||
96 | switch(iph->protocol){ | 98 | switch (iph->protocol){ |
97 | case IPPROTO_IPIP: | 99 | case IPPROTO_IPIP: |
98 | break; | 100 | break; |
99 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 101 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
@@ -111,10 +113,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | |||
111 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 113 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
112 | goto out; | 114 | goto out; |
113 | 115 | ||
114 | iph = skb->nh.iph; | 116 | iph = ip_hdr(skb); |
115 | if (iph->protocol == IPPROTO_IPIP) { | 117 | if (iph->protocol == IPPROTO_IPIP) { |
116 | if (x->props.flags & XFRM_STATE_DECAP_DSCP) | 118 | if (x->props.flags & XFRM_STATE_DECAP_DSCP) |
117 | ipv4_copy_dscp(iph, skb->h.ipiph); | 119 | ipv4_copy_dscp(iph, ipip_hdr(skb)); |
118 | if (!(x->props.flags & XFRM_STATE_NOECN)) | 120 | if (!(x->props.flags & XFRM_STATE_NOECN)) |
119 | ipip_ecn_decapsulate(skb); | 121 | ipip_ecn_decapsulate(skb); |
120 | } | 122 | } |
@@ -125,9 +127,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) | |||
125 | skb->protocol = htons(ETH_P_IPV6); | 127 | skb->protocol = htons(ETH_P_IPV6); |
126 | } | 128 | } |
127 | #endif | 129 | #endif |
128 | skb->mac.raw = memmove(skb->data - skb->mac_len, | 130 | old_mac = skb_mac_header(skb); |
129 | skb->mac.raw, skb->mac_len); | 131 | skb_set_mac_header(skb, -skb->mac_len); |
130 | skb->nh.raw = skb->data; | 132 | memmove(skb_mac_header(skb), old_mac, skb->mac_len); |
133 | skb_reset_network_header(skb); | ||
131 | err = 0; | 134 | err = 0; |
132 | 135 | ||
133 | out: | 136 | out: |
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 038ca160fe2c..44ef208a75cb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) | |||
22 | { | 22 | { |
23 | int mtu, ret = 0; | 23 | int mtu, ret = 0; |
24 | struct dst_entry *dst; | 24 | struct dst_entry *dst; |
25 | struct iphdr *iph = skb->nh.iph; | ||
26 | 25 | ||
27 | if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) | 26 | if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) |
28 | goto out; | 27 | goto out; |
29 | 28 | ||
30 | IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; | 29 | IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; |
31 | 30 | ||
32 | if (!(iph->frag_off & htons(IP_DF)) || skb->local_df) | 31 | if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) |
33 | goto out; | 32 | goto out; |
34 | 33 | ||
35 | dst = skb->dst; | 34 | dst = skb->dst; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 5d51a2af34c1..4ff8ed30024f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
119 | 119 | ||
120 | if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { | 120 | if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { |
121 | unsigned short encap_family = xfrm[i]->props.family; | 121 | unsigned short encap_family = xfrm[i]->props.family; |
122 | switch(encap_family) { | 122 | switch (encap_family) { |
123 | case AF_INET: | 123 | case AF_INET: |
124 | fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; | 124 | fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; |
125 | fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; | 125 | fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; |
@@ -209,8 +209,8 @@ error: | |||
209 | static void | 209 | static void |
210 | _decode_session4(struct sk_buff *skb, struct flowi *fl) | 210 | _decode_session4(struct sk_buff *skb, struct flowi *fl) |
211 | { | 211 | { |
212 | struct iphdr *iph = skb->nh.iph; | 212 | struct iphdr *iph = ip_hdr(skb); |
213 | u8 *xprth = skb->nh.raw + iph->ihl*4; | 213 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
214 | 214 | ||
215 | memset(fl, 0, sizeof(struct flowi)); | 215 | memset(fl, 0, sizeof(struct flowi)); |
216 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 216 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { |
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) | |||
263 | default: | 263 | default: |
264 | fl->fl_ipsec_spi = 0; | 264 | fl->fl_ipsec_spi = 0; |
265 | break; | 265 | break; |
266 | }; | 266 | } |
267 | } | 267 | } |
268 | fl->proto = iph->protocol; | 268 | fl->proto = iph->protocol; |
269 | fl->fl4_dst = iph->daddr; | 269 | fl->fl4_dst = iph->daddr; |
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 3eef06454da9..568510304553 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c | |||
@@ -12,9 +12,8 @@ | |||
12 | 12 | ||
13 | static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) | 13 | static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) |
14 | { | 14 | { |
15 | struct iphdr *iph; | 15 | struct iphdr *iph = ip_hdr(skb); |
16 | 16 | ||
17 | iph = skb->nh.iph; | ||
18 | iph->tot_len = htons(skb->len); | 17 | iph->tot_len = htons(skb->len); |
19 | ip_send_check(iph); | 18 | ip_send_check(iph); |
20 | 19 | ||