diff options
Diffstat (limited to 'net/ipv4')
59 files changed, 1931 insertions, 3033 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a5a1050595d1..cbb505ba9324 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER | |||
55 | 55 | ||
56 | If unsure, say N here. | 56 | If unsure, say N here. |
57 | 57 | ||
58 | choice | ||
59 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" | ||
60 | depends on IP_ADVANCED_ROUTER | ||
61 | default ASK_IP_FIB_HASH | ||
62 | |||
63 | config ASK_IP_FIB_HASH | ||
64 | bool "FIB_HASH" | ||
65 | ---help--- | ||
66 | Current FIB is very proven and good enough for most users. | ||
67 | |||
68 | config IP_FIB_TRIE | ||
69 | bool "FIB_TRIE" | ||
70 | ---help--- | ||
71 | Use new experimental LC-trie as FIB lookup algorithm. | ||
72 | This improves lookup performance if you have a large | ||
73 | number of routes. | ||
74 | |||
75 | LC-trie is a longest matching prefix lookup algorithm which | ||
76 | performs better than FIB_HASH for large routing tables. | ||
77 | But, it consumes more memory and is more complex. | ||
78 | |||
79 | LC-trie is described in: | ||
80 | |||
81 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | ||
82 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, | ||
83 | June 1999 | ||
84 | |||
85 | An experimental study of compression methods for dynamic tries | ||
86 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | ||
87 | <http://www.csc.kth.se/~snilsson/software/dyntrie2/> | ||
88 | |||
89 | endchoice | ||
90 | |||
91 | config IP_FIB_HASH | ||
92 | def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER | ||
93 | |||
94 | config IP_FIB_TRIE_STATS | 58 | config IP_FIB_TRIE_STATS |
95 | bool "FIB TRIE statistics" | 59 | bool "FIB TRIE statistics" |
96 | depends on IP_FIB_TRIE | 60 | depends on IP_ADVANCED_ROUTER |
97 | ---help--- | 61 | ---help--- |
98 | Keep track of statistics on structure of FIB TRIE table. | 62 | Keep track of statistics on structure of FIB TRIE table. |
99 | Useful for testing and measuring TRIE performance. | 63 | Useful for testing and measuring TRIE performance. |
@@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE | |||
140 | handled by the klogd daemon which is responsible for kernel messages | 104 | handled by the klogd daemon which is responsible for kernel messages |
141 | ("man klogd"). | 105 | ("man klogd"). |
142 | 106 | ||
107 | config IP_ROUTE_CLASSID | ||
108 | bool | ||
109 | |||
143 | config IP_PNP | 110 | config IP_PNP |
144 | bool "IP: kernel level autoconfiguration" | 111 | bool "IP: kernel level autoconfiguration" |
145 | help | 112 | help |
@@ -657,4 +624,3 @@ config TCP_MD5SIG | |||
657 | on the Internet. | 624 | on the Internet. |
658 | 625 | ||
659 | If unsure, say N. | 626 | If unsure, say N. |
660 | |||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a75..0dc772d0d125 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
14 | inet_fragment.o | 14 | inet_fragment.o |
15 | 15 | ||
16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
17 | obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o | ||
18 | obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o | ||
19 | obj-$(CONFIG_PROC_FS) += proc.o | 17 | obj-$(CONFIG_PROC_FS) += proc.o |
20 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o | 18 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o |
21 | obj-$(CONFIG_IP_MROUTE) += ipmr.o | 19 | obj-$(CONFIG_IP_MROUTE) += ipmr.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 45b89d7bda5a..807d83c02ef6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1101,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly; | |||
1101 | static int inet_sk_reselect_saddr(struct sock *sk) | 1101 | static int inet_sk_reselect_saddr(struct sock *sk) |
1102 | { | 1102 | { |
1103 | struct inet_sock *inet = inet_sk(sk); | 1103 | struct inet_sock *inet = inet_sk(sk); |
1104 | int err; | ||
1105 | struct rtable *rt; | ||
1106 | __be32 old_saddr = inet->inet_saddr; | 1104 | __be32 old_saddr = inet->inet_saddr; |
1107 | __be32 new_saddr; | ||
1108 | __be32 daddr = inet->inet_daddr; | 1105 | __be32 daddr = inet->inet_daddr; |
1106 | struct rtable *rt; | ||
1107 | __be32 new_saddr; | ||
1109 | 1108 | ||
1110 | if (inet->opt && inet->opt->srr) | 1109 | if (inet->opt && inet->opt->srr) |
1111 | daddr = inet->opt->faddr; | 1110 | daddr = inet->opt->faddr; |
1112 | 1111 | ||
1113 | /* Query new route. */ | 1112 | /* Query new route. */ |
1114 | err = ip_route_connect(&rt, daddr, 0, | 1113 | rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), |
1115 | RT_CONN_FLAGS(sk), | 1114 | sk->sk_bound_dev_if, sk->sk_protocol, |
1116 | sk->sk_bound_dev_if, | 1115 | inet->inet_sport, inet->inet_dport, sk, false); |
1117 | sk->sk_protocol, | 1116 | if (IS_ERR(rt)) |
1118 | inet->inet_sport, inet->inet_dport, sk, 0); | 1117 | return PTR_ERR(rt); |
1119 | if (err) | ||
1120 | return err; | ||
1121 | 1118 | ||
1122 | sk_setup_caps(sk, &rt->dst); | 1119 | sk_setup_caps(sk, &rt->dst); |
1123 | 1120 | ||
@@ -1160,25 +1157,16 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1160 | daddr = inet->inet_daddr; | 1157 | daddr = inet->inet_daddr; |
1161 | if (inet->opt && inet->opt->srr) | 1158 | if (inet->opt && inet->opt->srr) |
1162 | daddr = inet->opt->faddr; | 1159 | daddr = inet->opt->faddr; |
1163 | { | 1160 | rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, |
1164 | struct flowi fl = { | 1161 | inet->inet_dport, inet->inet_sport, |
1165 | .oif = sk->sk_bound_dev_if, | 1162 | sk->sk_protocol, RT_CONN_FLAGS(sk), |
1166 | .mark = sk->sk_mark, | 1163 | sk->sk_bound_dev_if); |
1167 | .fl4_dst = daddr, | 1164 | if (!IS_ERR(rt)) { |
1168 | .fl4_src = inet->inet_saddr, | 1165 | err = 0; |
1169 | .fl4_tos = RT_CONN_FLAGS(sk), | ||
1170 | .proto = sk->sk_protocol, | ||
1171 | .flags = inet_sk_flowi_flags(sk), | ||
1172 | .fl_ip_sport = inet->inet_sport, | ||
1173 | .fl_ip_dport = inet->inet_dport, | ||
1174 | }; | ||
1175 | |||
1176 | security_sk_classify_flow(sk, &fl); | ||
1177 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); | ||
1178 | } | ||
1179 | if (!err) | ||
1180 | sk_setup_caps(sk, &rt->dst); | 1166 | sk_setup_caps(sk, &rt->dst); |
1181 | else { | 1167 | } else { |
1168 | err = PTR_ERR(rt); | ||
1169 | |||
1182 | /* Routing failed... */ | 1170 | /* Routing failed... */ |
1183 | sk->sk_route_caps = 0; | 1171 | sk->sk_route_caps = 0; |
1184 | /* | 1172 | /* |
@@ -1231,7 +1219,7 @@ out: | |||
1231 | return err; | 1219 | return err; |
1232 | } | 1220 | } |
1233 | 1221 | ||
1234 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | 1222 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) |
1235 | { | 1223 | { |
1236 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1224 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
1237 | struct iphdr *iph; | 1225 | struct iphdr *iph; |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961bec70ab..4286fd3cc0e2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -201,11 +201,14 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
201 | top_iph->ttl = 0; | 201 | top_iph->ttl = 0; |
202 | top_iph->check = 0; | 202 | top_iph->check = 0; |
203 | 203 | ||
204 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | 204 | if (x->props.flags & XFRM_STATE_ALIGN4) |
205 | ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
206 | else | ||
207 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
205 | 208 | ||
206 | ah->reserved = 0; | 209 | ah->reserved = 0; |
207 | ah->spi = x->id.spi; | 210 | ah->spi = x->id.spi; |
208 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); | 211 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); |
209 | 212 | ||
210 | sg_init_table(sg, nfrags); | 213 | sg_init_table(sg, nfrags); |
211 | skb_to_sgvec(skb, sg, 0, skb->len); | 214 | skb_to_sgvec(skb, sg, 0, skb->len); |
@@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
299 | nexthdr = ah->nexthdr; | 302 | nexthdr = ah->nexthdr; |
300 | ah_hlen = (ah->hdrlen + 2) << 2; | 303 | ah_hlen = (ah->hdrlen + 2) << 2; |
301 | 304 | ||
302 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | 305 | if (x->props.flags & XFRM_STATE_ALIGN4) { |
303 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | 306 | if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && |
304 | goto out; | 307 | ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) |
308 | goto out; | ||
309 | } else { | ||
310 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | ||
311 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | ||
312 | goto out; | ||
313 | } | ||
305 | 314 | ||
306 | if (!pskb_may_pull(skb, ah_hlen)) | 315 | if (!pskb_may_pull(skb, ah_hlen)) |
307 | goto out; | 316 | goto out; |
@@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) | |||
450 | 459 | ||
451 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); | 460 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); |
452 | 461 | ||
453 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | 462 | if (x->props.flags & XFRM_STATE_ALIGN4) |
454 | ahp->icv_trunc_len); | 463 | x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + |
464 | ahp->icv_trunc_len); | ||
465 | else | ||
466 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | ||
467 | ahp->icv_trunc_len); | ||
455 | if (x->props.mode == XFRM_MODE_TUNNEL) | 468 | if (x->props.mode == XFRM_MODE_TUNNEL) |
456 | x->props.header_len += sizeof(struct iphdr); | 469 | x->props.header_len += sizeof(struct iphdr); |
457 | x->data = ahp; | 470 | x->data = ahp; |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7927589813b5..090d273d7865 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -433,14 +433,13 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) | |||
433 | 433 | ||
434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | 434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) |
435 | { | 435 | { |
436 | struct flowi fl = { .fl4_dst = sip, | ||
437 | .fl4_src = tip }; | ||
438 | struct rtable *rt; | 436 | struct rtable *rt; |
439 | int flag = 0; | 437 | int flag = 0; |
440 | /*unsigned long now; */ | 438 | /*unsigned long now; */ |
441 | struct net *net = dev_net(dev); | 439 | struct net *net = dev_net(dev); |
442 | 440 | ||
443 | if (ip_route_output_key(net, &rt, &fl) < 0) | 441 | rt = ip_route_output(net, sip, tip, 0, 0); |
442 | if (IS_ERR(rt)) | ||
444 | return 1; | 443 | return 1; |
445 | if (rt->dst.dev != dev) { | 444 | if (rt->dst.dev != dev) { |
446 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); | 445 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); |
@@ -1061,12 +1060,10 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1061 | if (r->arp_flags & ATF_PERM) | 1060 | if (r->arp_flags & ATF_PERM) |
1062 | r->arp_flags |= ATF_COM; | 1061 | r->arp_flags |= ATF_COM; |
1063 | if (dev == NULL) { | 1062 | if (dev == NULL) { |
1064 | struct flowi fl = { .fl4_dst = ip, | 1063 | struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); |
1065 | .fl4_tos = RTO_ONLINK }; | 1064 | |
1066 | struct rtable *rt; | 1065 | if (IS_ERR(rt)) |
1067 | err = ip_route_output_key(net, &rt, &fl); | 1066 | return PTR_ERR(rt); |
1068 | if (err != 0) | ||
1069 | return err; | ||
1070 | dev = rt->dst.dev; | 1067 | dev = rt->dst.dev; |
1071 | ip_rt_put(rt); | 1068 | ip_rt_put(rt); |
1072 | if (!dev) | 1069 | if (!dev) |
@@ -1177,7 +1174,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r, | |||
1177 | static int arp_req_delete(struct net *net, struct arpreq *r, | 1174 | static int arp_req_delete(struct net *net, struct arpreq *r, |
1178 | struct net_device *dev) | 1175 | struct net_device *dev) |
1179 | { | 1176 | { |
1180 | int err; | ||
1181 | __be32 ip; | 1177 | __be32 ip; |
1182 | 1178 | ||
1183 | if (r->arp_flags & ATF_PUBL) | 1179 | if (r->arp_flags & ATF_PUBL) |
@@ -1185,12 +1181,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r, | |||
1185 | 1181 | ||
1186 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; | 1182 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; |
1187 | if (dev == NULL) { | 1183 | if (dev == NULL) { |
1188 | struct flowi fl = { .fl4_dst = ip, | 1184 | struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); |
1189 | .fl4_tos = RTO_ONLINK }; | 1185 | if (IS_ERR(rt)) |
1190 | struct rtable *rt; | 1186 | return PTR_ERR(rt); |
1191 | err = ip_route_output_key(net, &rt, &fl); | ||
1192 | if (err != 0) | ||
1193 | return err; | ||
1194 | dev = rt->dst.dev; | 1187 | dev = rt->dst.dev; |
1195 | ip_rt_put(rt); | 1188 | ip_rt_put(rt); |
1196 | if (!dev) | 1189 | if (!dev) |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 174be6caa5c8..85bd24ca4f6d 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
46 | if (!saddr) | 46 | if (!saddr) |
47 | saddr = inet->mc_addr; | 47 | saddr = inet->mc_addr; |
48 | } | 48 | } |
49 | err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, | 49 | rt = ip_route_connect(usin->sin_addr.s_addr, saddr, |
50 | RT_CONN_FLAGS(sk), oif, | 50 | RT_CONN_FLAGS(sk), oif, |
51 | sk->sk_protocol, | 51 | sk->sk_protocol, |
52 | inet->inet_sport, usin->sin_port, sk, 1); | 52 | inet->inet_sport, usin->sin_port, sk, true); |
53 | if (err) { | 53 | if (IS_ERR(rt)) { |
54 | err = PTR_ERR(rt); | ||
54 | if (err == -ENETUNREACH) | 55 | if (err == -ENETUNREACH) |
55 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 56 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
56 | return err; | 57 | return err; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 036652c8166d..6d85800daeb7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/inetdevice.h> | 51 | #include <linux/inetdevice.h> |
52 | #include <linux/igmp.h> | 52 | #include <linux/igmp.h> |
53 | #include <linux/slab.h> | 53 | #include <linux/slab.h> |
54 | #include <linux/hash.h> | ||
54 | #ifdef CONFIG_SYSCTL | 55 | #ifdef CONFIG_SYSCTL |
55 | #include <linux/sysctl.h> | 56 | #include <linux/sysctl.h> |
56 | #endif | 57 | #endif |
@@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { | |||
92 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | 93 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, |
93 | }; | 94 | }; |
94 | 95 | ||
96 | /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE | ||
97 | * value. So if you change this define, make appropriate changes to | ||
98 | * inet_addr_hash as well. | ||
99 | */ | ||
100 | #define IN4_ADDR_HSIZE 256 | ||
101 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; | ||
102 | static DEFINE_SPINLOCK(inet_addr_hash_lock); | ||
103 | |||
104 | static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) | ||
105 | { | ||
106 | u32 val = (__force u32) addr ^ hash_ptr(net, 8); | ||
107 | |||
108 | return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & | ||
109 | (IN4_ADDR_HSIZE - 1)); | ||
110 | } | ||
111 | |||
112 | static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) | ||
113 | { | ||
114 | unsigned int hash = inet_addr_hash(net, ifa->ifa_local); | ||
115 | |||
116 | spin_lock(&inet_addr_hash_lock); | ||
117 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); | ||
118 | spin_unlock(&inet_addr_hash_lock); | ||
119 | } | ||
120 | |||
121 | static void inet_hash_remove(struct in_ifaddr *ifa) | ||
122 | { | ||
123 | spin_lock(&inet_addr_hash_lock); | ||
124 | hlist_del_init_rcu(&ifa->hash); | ||
125 | spin_unlock(&inet_addr_hash_lock); | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * __ip_dev_find - find the first device with a given source address. | ||
130 | * @net: the net namespace | ||
131 | * @addr: the source address | ||
132 | * @devref: if true, take a reference on the found device | ||
133 | * | ||
134 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
135 | */ | ||
136 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
137 | { | ||
138 | unsigned int hash = inet_addr_hash(net, addr); | ||
139 | struct net_device *result = NULL; | ||
140 | struct in_ifaddr *ifa; | ||
141 | struct hlist_node *node; | ||
142 | |||
143 | rcu_read_lock(); | ||
144 | hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { | ||
145 | struct net_device *dev = ifa->ifa_dev->dev; | ||
146 | |||
147 | if (!net_eq(dev_net(dev), net)) | ||
148 | continue; | ||
149 | if (ifa->ifa_local == addr) { | ||
150 | result = dev; | ||
151 | break; | ||
152 | } | ||
153 | } | ||
154 | if (result && devref) | ||
155 | dev_hold(result); | ||
156 | rcu_read_unlock(); | ||
157 | return result; | ||
158 | } | ||
159 | EXPORT_SYMBOL(__ip_dev_find); | ||
160 | |||
95 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); | 161 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); |
96 | 162 | ||
97 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); | 163 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); |
@@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
265 | } | 331 | } |
266 | 332 | ||
267 | if (!do_promote) { | 333 | if (!do_promote) { |
334 | inet_hash_remove(ifa); | ||
268 | *ifap1 = ifa->ifa_next; | 335 | *ifap1 = ifa->ifa_next; |
269 | 336 | ||
270 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); | 337 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); |
@@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
281 | /* 2. Unlink it */ | 348 | /* 2. Unlink it */ |
282 | 349 | ||
283 | *ifap = ifa1->ifa_next; | 350 | *ifap = ifa1->ifa_next; |
351 | inet_hash_remove(ifa1); | ||
284 | 352 | ||
285 | /* 3. Announce address deletion */ | 353 | /* 3. Announce address deletion */ |
286 | 354 | ||
@@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
368 | ifa->ifa_next = *ifap; | 436 | ifa->ifa_next = *ifap; |
369 | *ifap = ifa; | 437 | *ifap = ifa; |
370 | 438 | ||
439 | inet_hash_insert(dev_net(in_dev->dev), ifa); | ||
440 | |||
371 | /* Send message first, then call notifier. | 441 | /* Send message first, then call notifier. |
372 | Notifier will trigger FIB update, so that | 442 | Notifier will trigger FIB update, so that |
373 | listeners of netlink will know about new ifaddr */ | 443 | listeners of netlink will know about new ifaddr */ |
@@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) | |||
521 | if (tb[IFA_ADDRESS] == NULL) | 591 | if (tb[IFA_ADDRESS] == NULL) |
522 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; | 592 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; |
523 | 593 | ||
594 | INIT_HLIST_NODE(&ifa->hash); | ||
524 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; | 595 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; |
525 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); | 596 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); |
526 | ifa->ifa_flags = ifm->ifa_flags; | 597 | ifa->ifa_flags = ifm->ifa_flags; |
@@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
728 | if (!ifa) { | 799 | if (!ifa) { |
729 | ret = -ENOBUFS; | 800 | ret = -ENOBUFS; |
730 | ifa = inet_alloc_ifa(); | 801 | ifa = inet_alloc_ifa(); |
802 | INIT_HLIST_NODE(&ifa->hash); | ||
731 | if (!ifa) | 803 | if (!ifa) |
732 | break; | 804 | break; |
733 | if (colon) | 805 | if (colon) |
@@ -1084,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1084 | struct in_ifaddr *ifa = inet_alloc_ifa(); | 1156 | struct in_ifaddr *ifa = inet_alloc_ifa(); |
1085 | 1157 | ||
1086 | if (ifa) { | 1158 | if (ifa) { |
1159 | INIT_HLIST_NODE(&ifa->hash); | ||
1087 | ifa->ifa_local = | 1160 | ifa->ifa_local = |
1088 | ifa->ifa_address = htonl(INADDR_LOOPBACK); | 1161 | ifa->ifa_address = htonl(INADDR_LOOPBACK); |
1089 | ifa->ifa_prefixlen = 8; | 1162 | ifa->ifa_prefixlen = 8; |
@@ -1720,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = { | |||
1720 | 1793 | ||
1721 | void __init devinet_init(void) | 1794 | void __init devinet_init(void) |
1722 | { | 1795 | { |
1796 | int i; | ||
1797 | |||
1798 | for (i = 0; i < IN4_ADDR_HSIZE; i++) | ||
1799 | INIT_HLIST_HEAD(&inet_addr_lst[i]); | ||
1800 | |||
1723 | register_pernet_subsys(&devinet_ops); | 1801 | register_pernet_subsys(&devinet_ops); |
1724 | 1802 | ||
1725 | register_gifconf(PF_INET, inet_gifconf); | 1803 | register_gifconf(PF_INET, inet_gifconf); |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e42a905180f0..03f994bcf7de 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); | |||
33 | * | 33 | * |
34 | * TODO: Use spare space in skb for this where possible. | 34 | * TODO: Use spare space in skb for this where possible. |
35 | */ | 35 | */ |
36 | static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) | 36 | static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) |
37 | { | 37 | { |
38 | unsigned int len; | 38 | unsigned int len; |
39 | 39 | ||
40 | len = crypto_aead_ivsize(aead); | 40 | len = seqhilen; |
41 | |||
42 | len += crypto_aead_ivsize(aead); | ||
43 | |||
41 | if (len) { | 44 | if (len) { |
42 | len += crypto_aead_alignmask(aead) & | 45 | len += crypto_aead_alignmask(aead) & |
43 | ~(crypto_tfm_ctx_alignment() - 1); | 46 | ~(crypto_tfm_ctx_alignment() - 1); |
@@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) | |||
52 | return kmalloc(len, GFP_ATOMIC); | 55 | return kmalloc(len, GFP_ATOMIC); |
53 | } | 56 | } |
54 | 57 | ||
55 | static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) | 58 | static inline __be32 *esp_tmp_seqhi(void *tmp) |
59 | { | ||
60 | return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); | ||
61 | } | ||
62 | static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) | ||
56 | { | 63 | { |
57 | return crypto_aead_ivsize(aead) ? | 64 | return crypto_aead_ivsize(aead) ? |
58 | PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; | 65 | PTR_ALIGN((u8 *)tmp + seqhilen, |
66 | crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; | ||
59 | } | 67 | } |
60 | 68 | ||
61 | static inline struct aead_givcrypt_request *esp_tmp_givreq( | 69 | static inline struct aead_givcrypt_request *esp_tmp_givreq( |
@@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
122 | int plen; | 130 | int plen; |
123 | int tfclen; | 131 | int tfclen; |
124 | int nfrags; | 132 | int nfrags; |
133 | int assoclen; | ||
134 | int sglists; | ||
135 | int seqhilen; | ||
136 | __be32 *seqhi; | ||
125 | 137 | ||
126 | /* skb is pure payload to encrypt */ | 138 | /* skb is pure payload to encrypt */ |
127 | 139 | ||
@@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
151 | goto error; | 163 | goto error; |
152 | nfrags = err; | 164 | nfrags = err; |
153 | 165 | ||
154 | tmp = esp_alloc_tmp(aead, nfrags + 1); | 166 | assoclen = sizeof(*esph); |
167 | sglists = 1; | ||
168 | seqhilen = 0; | ||
169 | |||
170 | if (x->props.flags & XFRM_STATE_ESN) { | ||
171 | sglists += 2; | ||
172 | seqhilen += sizeof(__be32); | ||
173 | assoclen += seqhilen; | ||
174 | } | ||
175 | |||
176 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); | ||
155 | if (!tmp) | 177 | if (!tmp) |
156 | goto error; | 178 | goto error; |
157 | 179 | ||
158 | iv = esp_tmp_iv(aead, tmp); | 180 | seqhi = esp_tmp_seqhi(tmp); |
181 | iv = esp_tmp_iv(aead, tmp, seqhilen); | ||
159 | req = esp_tmp_givreq(aead, iv); | 182 | req = esp_tmp_givreq(aead, iv); |
160 | asg = esp_givreq_sg(aead, req); | 183 | asg = esp_givreq_sg(aead, req); |
161 | sg = asg + 1; | 184 | sg = asg + sglists; |
162 | 185 | ||
163 | /* Fill padding... */ | 186 | /* Fill padding... */ |
164 | tail = skb_tail_pointer(trailer); | 187 | tail = skb_tail_pointer(trailer); |
@@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
215 | } | 238 | } |
216 | 239 | ||
217 | esph->spi = x->id.spi; | 240 | esph->spi = x->id.spi; |
218 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); | 241 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); |
219 | 242 | ||
220 | sg_init_table(sg, nfrags); | 243 | sg_init_table(sg, nfrags); |
221 | skb_to_sgvec(skb, sg, | 244 | skb_to_sgvec(skb, sg, |
222 | esph->enc_data + crypto_aead_ivsize(aead) - skb->data, | 245 | esph->enc_data + crypto_aead_ivsize(aead) - skb->data, |
223 | clen + alen); | 246 | clen + alen); |
224 | sg_init_one(asg, esph, sizeof(*esph)); | 247 | |
248 | if ((x->props.flags & XFRM_STATE_ESN)) { | ||
249 | sg_init_table(asg, 3); | ||
250 | sg_set_buf(asg, &esph->spi, sizeof(__be32)); | ||
251 | *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); | ||
252 | sg_set_buf(asg + 1, seqhi, seqhilen); | ||
253 | sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); | ||
254 | } else | ||
255 | sg_init_one(asg, esph, sizeof(*esph)); | ||
225 | 256 | ||
226 | aead_givcrypt_set_callback(req, 0, esp_output_done, skb); | 257 | aead_givcrypt_set_callback(req, 0, esp_output_done, skb); |
227 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); | 258 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); |
228 | aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); | 259 | aead_givcrypt_set_assoc(req, asg, assoclen); |
229 | aead_givcrypt_set_giv(req, esph->enc_data, | 260 | aead_givcrypt_set_giv(req, esph->enc_data, |
230 | XFRM_SKB_CB(skb)->seq.output); | 261 | XFRM_SKB_CB(skb)->seq.output.low); |
231 | 262 | ||
232 | ESP_SKB_CB(skb)->tmp = tmp; | 263 | ESP_SKB_CB(skb)->tmp = tmp; |
233 | err = crypto_aead_givencrypt(req); | 264 | err = crypto_aead_givencrypt(req); |
@@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
346 | struct sk_buff *trailer; | 377 | struct sk_buff *trailer; |
347 | int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); | 378 | int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); |
348 | int nfrags; | 379 | int nfrags; |
380 | int assoclen; | ||
381 | int sglists; | ||
382 | int seqhilen; | ||
383 | __be32 *seqhi; | ||
349 | void *tmp; | 384 | void *tmp; |
350 | u8 *iv; | 385 | u8 *iv; |
351 | struct scatterlist *sg; | 386 | struct scatterlist *sg; |
@@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
362 | goto out; | 397 | goto out; |
363 | nfrags = err; | 398 | nfrags = err; |
364 | 399 | ||
400 | assoclen = sizeof(*esph); | ||
401 | sglists = 1; | ||
402 | seqhilen = 0; | ||
403 | |||
404 | if (x->props.flags & XFRM_STATE_ESN) { | ||
405 | sglists += 2; | ||
406 | seqhilen += sizeof(__be32); | ||
407 | assoclen += seqhilen; | ||
408 | } | ||
409 | |||
365 | err = -ENOMEM; | 410 | err = -ENOMEM; |
366 | tmp = esp_alloc_tmp(aead, nfrags + 1); | 411 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); |
367 | if (!tmp) | 412 | if (!tmp) |
368 | goto out; | 413 | goto out; |
369 | 414 | ||
370 | ESP_SKB_CB(skb)->tmp = tmp; | 415 | ESP_SKB_CB(skb)->tmp = tmp; |
371 | iv = esp_tmp_iv(aead, tmp); | 416 | seqhi = esp_tmp_seqhi(tmp); |
417 | iv = esp_tmp_iv(aead, tmp, seqhilen); | ||
372 | req = esp_tmp_req(aead, iv); | 418 | req = esp_tmp_req(aead, iv); |
373 | asg = esp_req_sg(aead, req); | 419 | asg = esp_req_sg(aead, req); |
374 | sg = asg + 1; | 420 | sg = asg + sglists; |
375 | 421 | ||
376 | skb->ip_summed = CHECKSUM_NONE; | 422 | skb->ip_summed = CHECKSUM_NONE; |
377 | 423 | ||
@@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
382 | 428 | ||
383 | sg_init_table(sg, nfrags); | 429 | sg_init_table(sg, nfrags); |
384 | skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); | 430 | skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); |
385 | sg_init_one(asg, esph, sizeof(*esph)); | 431 | |
432 | if ((x->props.flags & XFRM_STATE_ESN)) { | ||
433 | sg_init_table(asg, 3); | ||
434 | sg_set_buf(asg, &esph->spi, sizeof(__be32)); | ||
435 | *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; | ||
436 | sg_set_buf(asg + 1, seqhi, seqhilen); | ||
437 | sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); | ||
438 | } else | ||
439 | sg_init_one(asg, esph, sizeof(*esph)); | ||
386 | 440 | ||
387 | aead_request_set_callback(req, 0, esp_input_done, skb); | 441 | aead_request_set_callback(req, 0, esp_input_done, skb); |
388 | aead_request_set_crypt(req, sg, sg, elen, iv); | 442 | aead_request_set_crypt(req, sg, sg, elen, iv); |
389 | aead_request_set_assoc(req, asg, sizeof(*esph)); | 443 | aead_request_set_assoc(req, asg, assoclen); |
390 | 444 | ||
391 | err = crypto_aead_decrypt(req); | 445 | err = crypto_aead_decrypt(req); |
392 | if (err == -EINPROGRESS) | 446 | if (err == -EINPROGRESS) |
@@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x) | |||
500 | goto error; | 554 | goto error; |
501 | 555 | ||
502 | err = -ENAMETOOLONG; | 556 | err = -ENAMETOOLONG; |
503 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", | 557 | |
504 | x->aalg ? x->aalg->alg_name : "digest_null", | 558 | if ((x->props.flags & XFRM_STATE_ESN)) { |
505 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | 559 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, |
506 | goto error; | 560 | "authencesn(%s,%s)", |
561 | x->aalg ? x->aalg->alg_name : "digest_null", | ||
562 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
563 | goto error; | ||
564 | } else { | ||
565 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, | ||
566 | "authenc(%s,%s)", | ||
567 | x->aalg ? x->aalg->alg_name : "digest_null", | ||
568 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
569 | goto error; | ||
570 | } | ||
507 | 571 | ||
508 | aead = crypto_alloc_aead(authenc_name, 0, 0); | 572 | aead = crypto_alloc_aead(authenc_name, 0, 0); |
509 | err = PTR_ERR(aead); | 573 | err = PTR_ERR(aead); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd43a878..a373a259253c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) | |||
51 | { | 51 | { |
52 | struct fib_table *local_table, *main_table; | 52 | struct fib_table *local_table, *main_table; |
53 | 53 | ||
54 | local_table = fib_hash_table(RT_TABLE_LOCAL); | 54 | local_table = fib_trie_table(RT_TABLE_LOCAL); |
55 | if (local_table == NULL) | 55 | if (local_table == NULL) |
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | main_table = fib_hash_table(RT_TABLE_MAIN); | 58 | main_table = fib_trie_table(RT_TABLE_MAIN); |
59 | if (main_table == NULL) | 59 | if (main_table == NULL) |
60 | goto fail; | 60 | goto fail; |
61 | 61 | ||
@@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) | |||
82 | if (tb) | 82 | if (tb) |
83 | return tb; | 83 | return tb; |
84 | 84 | ||
85 | tb = fib_hash_table(id); | 85 | tb = fib_trie_table(id); |
86 | if (!tb) | 86 | if (!tb) |
87 | return NULL; | 87 | return NULL; |
88 | h = id & (FIB_TABLE_HASHSZ - 1); | 88 | h = id & (FIB_TABLE_HASHSZ - 1); |
@@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) | |||
114 | } | 114 | } |
115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | 115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ |
116 | 116 | ||
117 | void fib_select_default(struct net *net, | ||
118 | const struct flowi *flp, struct fib_result *res) | ||
119 | { | ||
120 | struct fib_table *tb; | ||
121 | int table = RT_TABLE_MAIN; | ||
122 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
123 | if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) | ||
124 | return; | ||
125 | table = res->r->table; | ||
126 | #endif | ||
127 | tb = fib_get_table(net, table); | ||
128 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | ||
129 | fib_table_select_default(tb, flp, res); | ||
130 | } | ||
131 | |||
132 | static void fib_flush(struct net *net) | 117 | static void fib_flush(struct net *net) |
133 | { | 118 | { |
134 | int flushed = 0; | 119 | int flushed = 0; |
@@ -147,46 +132,6 @@ static void fib_flush(struct net *net) | |||
147 | rt_cache_flush(net, -1); | 132 | rt_cache_flush(net, -1); |
148 | } | 133 | } |
149 | 134 | ||
150 | /** | ||
151 | * __ip_dev_find - find the first device with a given source address. | ||
152 | * @net: the net namespace | ||
153 | * @addr: the source address | ||
154 | * @devref: if true, take a reference on the found device | ||
155 | * | ||
156 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
157 | */ | ||
158 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
159 | { | ||
160 | struct flowi fl = { | ||
161 | .fl4_dst = addr, | ||
162 | }; | ||
163 | struct fib_result res = { 0 }; | ||
164 | struct net_device *dev = NULL; | ||
165 | struct fib_table *local_table; | ||
166 | |||
167 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
168 | res.r = NULL; | ||
169 | #endif | ||
170 | |||
171 | rcu_read_lock(); | ||
172 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | ||
173 | if (!local_table || | ||
174 | fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
175 | rcu_read_unlock(); | ||
176 | return NULL; | ||
177 | } | ||
178 | if (res.type != RTN_LOCAL) | ||
179 | goto out; | ||
180 | dev = FIB_RES_DEV(res); | ||
181 | |||
182 | if (dev && devref) | ||
183 | dev_hold(dev); | ||
184 | out: | ||
185 | rcu_read_unlock(); | ||
186 | return dev; | ||
187 | } | ||
188 | EXPORT_SYMBOL(__ip_dev_find); | ||
189 | |||
190 | /* | 135 | /* |
191 | * Find address type as if only "dev" was present in the system. If | 136 | * Find address type as if only "dev" was present in the system. If |
192 | * on_dev is NULL then all interfaces are taken into consideration. | 137 | * on_dev is NULL then all interfaces are taken into consideration. |
@@ -195,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
195 | const struct net_device *dev, | 140 | const struct net_device *dev, |
196 | __be32 addr) | 141 | __be32 addr) |
197 | { | 142 | { |
198 | struct flowi fl = { .fl4_dst = addr }; | 143 | struct flowi4 fl4 = { .daddr = addr }; |
199 | struct fib_result res; | 144 | struct fib_result res; |
200 | unsigned ret = RTN_BROADCAST; | 145 | unsigned ret = RTN_BROADCAST; |
201 | struct fib_table *local_table; | 146 | struct fib_table *local_table; |
@@ -213,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
213 | if (local_table) { | 158 | if (local_table) { |
214 | ret = RTN_UNICAST; | 159 | ret = RTN_UNICAST; |
215 | rcu_read_lock(); | 160 | rcu_read_lock(); |
216 | if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | 161 | if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { |
217 | if (!dev || dev == res.fi->fib_dev) | 162 | if (!dev || dev == res.fi->fib_dev) |
218 | ret = res.type; | 163 | ret = res.type; |
219 | } | 164 | } |
@@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
248 | u32 *itag, u32 mark) | 193 | u32 *itag, u32 mark) |
249 | { | 194 | { |
250 | struct in_device *in_dev; | 195 | struct in_device *in_dev; |
251 | struct flowi fl = { | 196 | struct flowi4 fl4; |
252 | .fl4_dst = src, | ||
253 | .fl4_src = dst, | ||
254 | .fl4_tos = tos, | ||
255 | .mark = mark, | ||
256 | .iif = oif | ||
257 | }; | ||
258 | struct fib_result res; | 197 | struct fib_result res; |
259 | int no_addr, rpf, accept_local; | 198 | int no_addr, rpf, accept_local; |
260 | bool dev_match; | 199 | bool dev_match; |
261 | int ret; | 200 | int ret; |
262 | struct net *net; | 201 | struct net *net; |
263 | 202 | ||
203 | fl4.flowi4_oif = 0; | ||
204 | fl4.flowi4_iif = oif; | ||
205 | fl4.flowi4_mark = mark; | ||
206 | fl4.daddr = src; | ||
207 | fl4.saddr = dst; | ||
208 | fl4.flowi4_tos = tos; | ||
209 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
210 | |||
264 | no_addr = rpf = accept_local = 0; | 211 | no_addr = rpf = accept_local = 0; |
265 | in_dev = __in_dev_get_rcu(dev); | 212 | in_dev = __in_dev_get_rcu(dev); |
266 | if (in_dev) { | 213 | if (in_dev) { |
@@ -268,14 +215,14 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
268 | rpf = IN_DEV_RPFILTER(in_dev); | 215 | rpf = IN_DEV_RPFILTER(in_dev); |
269 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | 216 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); |
270 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 217 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) |
271 | fl.mark = 0; | 218 | fl4.flowi4_mark = 0; |
272 | } | 219 | } |
273 | 220 | ||
274 | if (in_dev == NULL) | 221 | if (in_dev == NULL) |
275 | goto e_inval; | 222 | goto e_inval; |
276 | 223 | ||
277 | net = dev_net(dev); | 224 | net = dev_net(dev); |
278 | if (fib_lookup(net, &fl, &res)) | 225 | if (fib_lookup(net, &fl4, &res)) |
279 | goto last_resort; | 226 | goto last_resort; |
280 | if (res.type != RTN_UNICAST) { | 227 | if (res.type != RTN_UNICAST) { |
281 | if (res.type != RTN_LOCAL || !accept_local) | 228 | if (res.type != RTN_LOCAL || !accept_local) |
@@ -306,10 +253,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
306 | goto last_resort; | 253 | goto last_resort; |
307 | if (rpf == 1) | 254 | if (rpf == 1) |
308 | goto e_rpf; | 255 | goto e_rpf; |
309 | fl.oif = dev->ifindex; | 256 | fl4.flowi4_oif = dev->ifindex; |
310 | 257 | ||
311 | ret = 0; | 258 | ret = 0; |
312 | if (fib_lookup(net, &fl, &res) == 0) { | 259 | if (fib_lookup(net, &fl4, &res) == 0) { |
313 | if (res.type == RTN_UNICAST) { | 260 | if (res.type == RTN_UNICAST) { |
314 | *spec_dst = FIB_RES_PREFSRC(res); | 261 | *spec_dst = FIB_RES_PREFSRC(res); |
315 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 262 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
@@ -849,11 +796,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
849 | { | 796 | { |
850 | 797 | ||
851 | struct fib_result res; | 798 | struct fib_result res; |
852 | struct flowi fl = { | 799 | struct flowi4 fl4 = { |
853 | .mark = frn->fl_mark, | 800 | .flowi4_mark = frn->fl_mark, |
854 | .fl4_dst = frn->fl_addr, | 801 | .daddr = frn->fl_addr, |
855 | .fl4_tos = frn->fl_tos, | 802 | .flowi4_tos = frn->fl_tos, |
856 | .fl4_scope = frn->fl_scope, | 803 | .flowi4_scope = frn->fl_scope, |
857 | }; | 804 | }; |
858 | 805 | ||
859 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 806 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -866,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
866 | 813 | ||
867 | frn->tb_id = tb->tb_id; | 814 | frn->tb_id = tb->tb_id; |
868 | rcu_read_lock(); | 815 | rcu_read_lock(); |
869 | frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); | 816 | frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); |
870 | 817 | ||
871 | if (!frn->err) { | 818 | if (!frn->err) { |
872 | frn->prefixlen = res.prefixlen; | 819 | frn->prefixlen = res.prefixlen; |
@@ -945,10 +892,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
945 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 892 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
946 | fib_sync_up(dev); | 893 | fib_sync_up(dev); |
947 | #endif | 894 | #endif |
895 | fib_update_nh_saddrs(dev); | ||
948 | rt_cache_flush(dev_net(dev), -1); | 896 | rt_cache_flush(dev_net(dev), -1); |
949 | break; | 897 | break; |
950 | case NETDEV_DOWN: | 898 | case NETDEV_DOWN: |
951 | fib_del_ifaddr(ifa); | 899 | fib_del_ifaddr(ifa); |
900 | fib_update_nh_saddrs(dev); | ||
952 | if (ifa->ifa_dev->ifa_list == NULL) { | 901 | if (ifa->ifa_dev->ifa_list == NULL) { |
953 | /* Last address was deleted from this interface. | 902 | /* Last address was deleted from this interface. |
954 | * Disable IP. | 903 | * Disable IP. |
@@ -1101,5 +1050,5 @@ void __init ip_fib_init(void) | |||
1101 | register_netdevice_notifier(&fib_netdev_notifier); | 1050 | register_netdevice_notifier(&fib_netdev_notifier); |
1102 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 1051 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
1103 | 1052 | ||
1104 | fib_hash_init(); | 1053 | fib_trie_init(); |
1105 | } | 1054 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index b3acb0417b21..000000000000 --- a/net/ipv4/fib_hash.c +++ /dev/null | |||
@@ -1,1133 +0,0 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IPv4 FIB: lookup engine and maintenance routines. | ||
7 | * | ||
8 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <linux/bitops.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/sockios.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/inet.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/netdevice.h> | ||
30 | #include <linux/if_arp.h> | ||
31 | #include <linux/proc_fs.h> | ||
32 | #include <linux/skbuff.h> | ||
33 | #include <linux/netlink.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/slab.h> | ||
36 | |||
37 | #include <net/net_namespace.h> | ||
38 | #include <net/ip.h> | ||
39 | #include <net/protocol.h> | ||
40 | #include <net/route.h> | ||
41 | #include <net/tcp.h> | ||
42 | #include <net/sock.h> | ||
43 | #include <net/ip_fib.h> | ||
44 | |||
45 | #include "fib_lookup.h" | ||
46 | |||
47 | static struct kmem_cache *fn_hash_kmem __read_mostly; | ||
48 | static struct kmem_cache *fn_alias_kmem __read_mostly; | ||
49 | |||
50 | struct fib_node { | ||
51 | struct hlist_node fn_hash; | ||
52 | struct list_head fn_alias; | ||
53 | __be32 fn_key; | ||
54 | struct fib_alias fn_embedded_alias; | ||
55 | }; | ||
56 | |||
57 | #define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) | ||
58 | |||
59 | struct fn_zone { | ||
60 | struct fn_zone __rcu *fz_next; /* Next not empty zone */ | ||
61 | struct hlist_head __rcu *fz_hash; /* Hash table pointer */ | ||
62 | seqlock_t fz_lock; | ||
63 | u32 fz_hashmask; /* (fz_divisor - 1) */ | ||
64 | |||
65 | u8 fz_order; /* Zone order (0..32) */ | ||
66 | u8 fz_revorder; /* 32 - fz_order */ | ||
67 | __be32 fz_mask; /* inet_make_mask(order) */ | ||
68 | #define FZ_MASK(fz) ((fz)->fz_mask) | ||
69 | |||
70 | struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; | ||
71 | |||
72 | int fz_nent; /* Number of entries */ | ||
73 | int fz_divisor; /* Hash size (mask+1) */ | ||
74 | }; | ||
75 | |||
76 | struct fn_hash { | ||
77 | struct fn_zone *fn_zones[33]; | ||
78 | struct fn_zone __rcu *fn_zone_list; | ||
79 | }; | ||
80 | |||
81 | static inline u32 fn_hash(__be32 key, struct fn_zone *fz) | ||
82 | { | ||
83 | u32 h = ntohl(key) >> fz->fz_revorder; | ||
84 | h ^= (h>>20); | ||
85 | h ^= (h>>10); | ||
86 | h ^= (h>>5); | ||
87 | h &= fz->fz_hashmask; | ||
88 | return h; | ||
89 | } | ||
90 | |||
91 | static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) | ||
92 | { | ||
93 | return dst & FZ_MASK(fz); | ||
94 | } | ||
95 | |||
96 | static unsigned int fib_hash_genid; | ||
97 | |||
98 | #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) | ||
99 | |||
100 | static struct hlist_head *fz_hash_alloc(int divisor) | ||
101 | { | ||
102 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
103 | |||
104 | if (size <= PAGE_SIZE) | ||
105 | return kzalloc(size, GFP_KERNEL); | ||
106 | |||
107 | return (struct hlist_head *) | ||
108 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); | ||
109 | } | ||
110 | |||
111 | /* The fib hash lock must be held when this is called. */ | ||
112 | static inline void fn_rebuild_zone(struct fn_zone *fz, | ||
113 | struct hlist_head *old_ht, | ||
114 | int old_divisor) | ||
115 | { | ||
116 | int i; | ||
117 | |||
118 | for (i = 0; i < old_divisor; i++) { | ||
119 | struct hlist_node *node, *n; | ||
120 | struct fib_node *f; | ||
121 | |||
122 | hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { | ||
123 | struct hlist_head *new_head; | ||
124 | |||
125 | hlist_del_rcu(&f->fn_hash); | ||
126 | |||
127 | new_head = rcu_dereference_protected(fz->fz_hash, 1) + | ||
128 | fn_hash(f->fn_key, fz); | ||
129 | hlist_add_head_rcu(&f->fn_hash, new_head); | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static void fz_hash_free(struct hlist_head *hash, int divisor) | ||
135 | { | ||
136 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
137 | |||
138 | if (size <= PAGE_SIZE) | ||
139 | kfree(hash); | ||
140 | else | ||
141 | free_pages((unsigned long)hash, get_order(size)); | ||
142 | } | ||
143 | |||
144 | static void fn_rehash_zone(struct fn_zone *fz) | ||
145 | { | ||
146 | struct hlist_head *ht, *old_ht; | ||
147 | int old_divisor, new_divisor; | ||
148 | u32 new_hashmask; | ||
149 | |||
150 | new_divisor = old_divisor = fz->fz_divisor; | ||
151 | |||
152 | switch (old_divisor) { | ||
153 | case EMBEDDED_HASH_SIZE: | ||
154 | new_divisor *= EMBEDDED_HASH_SIZE; | ||
155 | break; | ||
156 | case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: | ||
157 | new_divisor *= (EMBEDDED_HASH_SIZE/2); | ||
158 | break; | ||
159 | default: | ||
160 | if ((old_divisor << 1) > FZ_MAX_DIVISOR) { | ||
161 | printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); | ||
162 | return; | ||
163 | } | ||
164 | new_divisor = (old_divisor << 1); | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | new_hashmask = (new_divisor - 1); | ||
169 | |||
170 | #if RT_CACHE_DEBUG >= 2 | ||
171 | printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", | ||
172 | fz->fz_order, old_divisor); | ||
173 | #endif | ||
174 | |||
175 | ht = fz_hash_alloc(new_divisor); | ||
176 | |||
177 | if (ht) { | ||
178 | struct fn_zone nfz; | ||
179 | |||
180 | memcpy(&nfz, fz, sizeof(nfz)); | ||
181 | |||
182 | write_seqlock_bh(&fz->fz_lock); | ||
183 | old_ht = rcu_dereference_protected(fz->fz_hash, 1); | ||
184 | RCU_INIT_POINTER(nfz.fz_hash, ht); | ||
185 | nfz.fz_hashmask = new_hashmask; | ||
186 | nfz.fz_divisor = new_divisor; | ||
187 | fn_rebuild_zone(&nfz, old_ht, old_divisor); | ||
188 | fib_hash_genid++; | ||
189 | rcu_assign_pointer(fz->fz_hash, ht); | ||
190 | fz->fz_hashmask = new_hashmask; | ||
191 | fz->fz_divisor = new_divisor; | ||
192 | write_sequnlock_bh(&fz->fz_lock); | ||
193 | |||
194 | if (old_ht != fz->fz_embedded_hash) { | ||
195 | synchronize_rcu(); | ||
196 | fz_hash_free(old_ht, old_divisor); | ||
197 | } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | static void fn_free_node_rcu(struct rcu_head *head) | ||
202 | { | ||
203 | struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); | ||
204 | |||
205 | kmem_cache_free(fn_hash_kmem, f); | ||
206 | } | ||
207 | |||
208 | static inline void fn_free_node(struct fib_node *f) | ||
209 | { | ||
210 | call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); | ||
211 | } | ||
212 | |||
213 | static void fn_free_alias_rcu(struct rcu_head *head) | ||
214 | { | ||
215 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); | ||
216 | |||
217 | kmem_cache_free(fn_alias_kmem, fa); | ||
218 | } | ||
219 | |||
220 | static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) | ||
221 | { | ||
222 | fib_release_info(fa->fa_info); | ||
223 | if (fa == &f->fn_embedded_alias) | ||
224 | fa->fa_info = NULL; | ||
225 | else | ||
226 | call_rcu(&fa->rcu, fn_free_alias_rcu); | ||
227 | } | ||
228 | |||
229 | static struct fn_zone * | ||
230 | fn_new_zone(struct fn_hash *table, int z) | ||
231 | { | ||
232 | int i; | ||
233 | struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); | ||
234 | if (!fz) | ||
235 | return NULL; | ||
236 | |||
237 | seqlock_init(&fz->fz_lock); | ||
238 | fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; | ||
239 | fz->fz_hashmask = fz->fz_divisor - 1; | ||
240 | RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); | ||
241 | fz->fz_order = z; | ||
242 | fz->fz_revorder = 32 - z; | ||
243 | fz->fz_mask = inet_make_mask(z); | ||
244 | |||
245 | /* Find the first not empty zone with more specific mask */ | ||
246 | for (i = z + 1; i <= 32; i++) | ||
247 | if (table->fn_zones[i]) | ||
248 | break; | ||
249 | if (i > 32) { | ||
250 | /* No more specific masks, we are the first. */ | ||
251 | rcu_assign_pointer(fz->fz_next, | ||
252 | rtnl_dereference(table->fn_zone_list)); | ||
253 | rcu_assign_pointer(table->fn_zone_list, fz); | ||
254 | } else { | ||
255 | rcu_assign_pointer(fz->fz_next, | ||
256 | rtnl_dereference(table->fn_zones[i]->fz_next)); | ||
257 | rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); | ||
258 | } | ||
259 | table->fn_zones[z] = fz; | ||
260 | fib_hash_genid++; | ||
261 | return fz; | ||
262 | } | ||
263 | |||
264 | int fib_table_lookup(struct fib_table *tb, | ||
265 | const struct flowi *flp, struct fib_result *res, | ||
266 | int fib_flags) | ||
267 | { | ||
268 | int err; | ||
269 | struct fn_zone *fz; | ||
270 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
271 | |||
272 | rcu_read_lock(); | ||
273 | for (fz = rcu_dereference(t->fn_zone_list); | ||
274 | fz != NULL; | ||
275 | fz = rcu_dereference(fz->fz_next)) { | ||
276 | struct hlist_head *head; | ||
277 | struct hlist_node *node; | ||
278 | struct fib_node *f; | ||
279 | __be32 k; | ||
280 | unsigned int seq; | ||
281 | |||
282 | do { | ||
283 | seq = read_seqbegin(&fz->fz_lock); | ||
284 | k = fz_key(flp->fl4_dst, fz); | ||
285 | |||
286 | head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); | ||
287 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
288 | if (f->fn_key != k) | ||
289 | continue; | ||
290 | |||
291 | err = fib_semantic_match(&f->fn_alias, | ||
292 | flp, res, | ||
293 | fz->fz_order, fib_flags); | ||
294 | if (err <= 0) | ||
295 | goto out; | ||
296 | } | ||
297 | } while (read_seqretry(&fz->fz_lock, seq)); | ||
298 | } | ||
299 | err = 1; | ||
300 | out: | ||
301 | rcu_read_unlock(); | ||
302 | return err; | ||
303 | } | ||
304 | |||
305 | void fib_table_select_default(struct fib_table *tb, | ||
306 | const struct flowi *flp, struct fib_result *res) | ||
307 | { | ||
308 | int order, last_idx; | ||
309 | struct hlist_node *node; | ||
310 | struct fib_node *f; | ||
311 | struct fib_info *fi = NULL; | ||
312 | struct fib_info *last_resort; | ||
313 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
314 | struct fn_zone *fz = t->fn_zones[0]; | ||
315 | struct hlist_head *head; | ||
316 | |||
317 | if (fz == NULL) | ||
318 | return; | ||
319 | |||
320 | last_idx = -1; | ||
321 | last_resort = NULL; | ||
322 | order = -1; | ||
323 | |||
324 | rcu_read_lock(); | ||
325 | head = rcu_dereference(fz->fz_hash); | ||
326 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
327 | struct fib_alias *fa; | ||
328 | |||
329 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
330 | struct fib_info *next_fi = fa->fa_info; | ||
331 | |||
332 | if (fa->fa_scope != res->scope || | ||
333 | fa->fa_type != RTN_UNICAST) | ||
334 | continue; | ||
335 | |||
336 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
337 | break; | ||
338 | if (!next_fi->fib_nh[0].nh_gw || | ||
339 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
340 | continue; | ||
341 | |||
342 | fib_alias_accessed(fa); | ||
343 | |||
344 | if (fi == NULL) { | ||
345 | if (next_fi != res->fi) | ||
346 | break; | ||
347 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
348 | &last_idx, tb->tb_default)) { | ||
349 | fib_result_assign(res, fi); | ||
350 | tb->tb_default = order; | ||
351 | goto out; | ||
352 | } | ||
353 | fi = next_fi; | ||
354 | order++; | ||
355 | } | ||
356 | } | ||
357 | |||
358 | if (order <= 0 || fi == NULL) { | ||
359 | tb->tb_default = -1; | ||
360 | goto out; | ||
361 | } | ||
362 | |||
363 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
364 | tb->tb_default)) { | ||
365 | fib_result_assign(res, fi); | ||
366 | tb->tb_default = order; | ||
367 | goto out; | ||
368 | } | ||
369 | |||
370 | if (last_idx >= 0) | ||
371 | fib_result_assign(res, last_resort); | ||
372 | tb->tb_default = last_idx; | ||
373 | out: | ||
374 | rcu_read_unlock(); | ||
375 | } | ||
376 | |||
377 | /* Insert node F to FZ. */ | ||
378 | static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) | ||
379 | { | ||
380 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); | ||
381 | |||
382 | hlist_add_head_rcu(&f->fn_hash, head); | ||
383 | } | ||
384 | |||
385 | /* Return the node in FZ matching KEY. */ | ||
386 | static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | ||
387 | { | ||
388 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); | ||
389 | struct hlist_node *node; | ||
390 | struct fib_node *f; | ||
391 | |||
392 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
393 | if (f->fn_key == key) | ||
394 | return f; | ||
395 | } | ||
396 | |||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | |||
401 | static struct fib_alias *fib_fast_alloc(struct fib_node *f) | ||
402 | { | ||
403 | struct fib_alias *fa = &f->fn_embedded_alias; | ||
404 | |||
405 | if (fa->fa_info != NULL) | ||
406 | fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | ||
407 | return fa; | ||
408 | } | ||
409 | |||
410 | /* Caller must hold RTNL. */ | ||
411 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | ||
412 | { | ||
413 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
414 | struct fib_node *new_f = NULL; | ||
415 | struct fib_node *f; | ||
416 | struct fib_alias *fa, *new_fa; | ||
417 | struct fn_zone *fz; | ||
418 | struct fib_info *fi; | ||
419 | u8 tos = cfg->fc_tos; | ||
420 | __be32 key; | ||
421 | int err; | ||
422 | |||
423 | if (cfg->fc_dst_len > 32) | ||
424 | return -EINVAL; | ||
425 | |||
426 | fz = table->fn_zones[cfg->fc_dst_len]; | ||
427 | if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) | ||
428 | return -ENOBUFS; | ||
429 | |||
430 | key = 0; | ||
431 | if (cfg->fc_dst) { | ||
432 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
433 | return -EINVAL; | ||
434 | key = fz_key(cfg->fc_dst, fz); | ||
435 | } | ||
436 | |||
437 | fi = fib_create_info(cfg); | ||
438 | if (IS_ERR(fi)) | ||
439 | return PTR_ERR(fi); | ||
440 | |||
441 | if (fz->fz_nent > (fz->fz_divisor<<1) && | ||
442 | fz->fz_divisor < FZ_MAX_DIVISOR && | ||
443 | (cfg->fc_dst_len == 32 || | ||
444 | (1 << cfg->fc_dst_len) > fz->fz_divisor)) | ||
445 | fn_rehash_zone(fz); | ||
446 | |||
447 | f = fib_find_node(fz, key); | ||
448 | |||
449 | if (!f) | ||
450 | fa = NULL; | ||
451 | else | ||
452 | fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); | ||
453 | |||
454 | /* Now fa, if non-NULL, points to the first fib alias | ||
455 | * with the same keys [prefix,tos,priority], if such key already | ||
456 | * exists or to the node before which we will insert new one. | ||
457 | * | ||
458 | * If fa is NULL, we will need to allocate a new one and | ||
459 | * insert to the head of f. | ||
460 | * | ||
461 | * If f is NULL, no fib node matched the destination key | ||
462 | * and we need to allocate a new one of those as well. | ||
463 | */ | ||
464 | |||
465 | if (fa && fa->fa_tos == tos && | ||
466 | fa->fa_info->fib_priority == fi->fib_priority) { | ||
467 | struct fib_alias *fa_first, *fa_match; | ||
468 | |||
469 | err = -EEXIST; | ||
470 | if (cfg->fc_nlflags & NLM_F_EXCL) | ||
471 | goto out; | ||
472 | |||
473 | /* We have 2 goals: | ||
474 | * 1. Find exact match for type, scope, fib_info to avoid | ||
475 | * duplicate routes | ||
476 | * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it | ||
477 | */ | ||
478 | fa_match = NULL; | ||
479 | fa_first = fa; | ||
480 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
481 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
482 | if (fa->fa_tos != tos) | ||
483 | break; | ||
484 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
485 | break; | ||
486 | if (fa->fa_type == cfg->fc_type && | ||
487 | fa->fa_scope == cfg->fc_scope && | ||
488 | fa->fa_info == fi) { | ||
489 | fa_match = fa; | ||
490 | break; | ||
491 | } | ||
492 | } | ||
493 | |||
494 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | ||
495 | u8 state; | ||
496 | |||
497 | fa = fa_first; | ||
498 | if (fa_match) { | ||
499 | if (fa == fa_match) | ||
500 | err = 0; | ||
501 | goto out; | ||
502 | } | ||
503 | err = -ENOBUFS; | ||
504 | new_fa = fib_fast_alloc(f); | ||
505 | if (new_fa == NULL) | ||
506 | goto out; | ||
507 | |||
508 | new_fa->fa_tos = fa->fa_tos; | ||
509 | new_fa->fa_info = fi; | ||
510 | new_fa->fa_type = cfg->fc_type; | ||
511 | new_fa->fa_scope = cfg->fc_scope; | ||
512 | state = fa->fa_state; | ||
513 | new_fa->fa_state = state & ~FA_S_ACCESSED; | ||
514 | fib_hash_genid++; | ||
515 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); | ||
516 | |||
517 | fn_free_alias(fa, f); | ||
518 | if (state & FA_S_ACCESSED) | ||
519 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
520 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, | ||
521 | tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | /* Error if we find a perfect match which | ||
526 | * uses the same scope, type, and nexthop | ||
527 | * information. | ||
528 | */ | ||
529 | if (fa_match) | ||
530 | goto out; | ||
531 | |||
532 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) | ||
533 | fa = fa_first; | ||
534 | } | ||
535 | |||
536 | err = -ENOENT; | ||
537 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) | ||
538 | goto out; | ||
539 | |||
540 | err = -ENOBUFS; | ||
541 | |||
542 | if (!f) { | ||
543 | new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); | ||
544 | if (new_f == NULL) | ||
545 | goto out; | ||
546 | |||
547 | INIT_HLIST_NODE(&new_f->fn_hash); | ||
548 | INIT_LIST_HEAD(&new_f->fn_alias); | ||
549 | new_f->fn_key = key; | ||
550 | f = new_f; | ||
551 | } | ||
552 | |||
553 | new_fa = fib_fast_alloc(f); | ||
554 | if (new_fa == NULL) | ||
555 | goto out; | ||
556 | |||
557 | new_fa->fa_info = fi; | ||
558 | new_fa->fa_tos = tos; | ||
559 | new_fa->fa_type = cfg->fc_type; | ||
560 | new_fa->fa_scope = cfg->fc_scope; | ||
561 | new_fa->fa_state = 0; | ||
562 | |||
563 | /* | ||
564 | * Insert new entry to the list. | ||
565 | */ | ||
566 | |||
567 | if (new_f) | ||
568 | fib_insert_node(fz, new_f); | ||
569 | list_add_tail_rcu(&new_fa->fa_list, | ||
570 | (fa ? &fa->fa_list : &f->fn_alias)); | ||
571 | fib_hash_genid++; | ||
572 | |||
573 | if (new_f) | ||
574 | fz->fz_nent++; | ||
575 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
576 | |||
577 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, | ||
578 | &cfg->fc_nlinfo, 0); | ||
579 | return 0; | ||
580 | |||
581 | out: | ||
582 | if (new_f) | ||
583 | kmem_cache_free(fn_hash_kmem, new_f); | ||
584 | fib_release_info(fi); | ||
585 | return err; | ||
586 | } | ||
587 | |||
588 | int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | ||
589 | { | ||
590 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
591 | struct fib_node *f; | ||
592 | struct fib_alias *fa, *fa_to_delete; | ||
593 | struct fn_zone *fz; | ||
594 | __be32 key; | ||
595 | |||
596 | if (cfg->fc_dst_len > 32) | ||
597 | return -EINVAL; | ||
598 | |||
599 | if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) | ||
600 | return -ESRCH; | ||
601 | |||
602 | key = 0; | ||
603 | if (cfg->fc_dst) { | ||
604 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
605 | return -EINVAL; | ||
606 | key = fz_key(cfg->fc_dst, fz); | ||
607 | } | ||
608 | |||
609 | f = fib_find_node(fz, key); | ||
610 | |||
611 | if (!f) | ||
612 | fa = NULL; | ||
613 | else | ||
614 | fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); | ||
615 | if (!fa) | ||
616 | return -ESRCH; | ||
617 | |||
618 | fa_to_delete = NULL; | ||
619 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
620 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
621 | struct fib_info *fi = fa->fa_info; | ||
622 | |||
623 | if (fa->fa_tos != cfg->fc_tos) | ||
624 | break; | ||
625 | |||
626 | if ((!cfg->fc_type || | ||
627 | fa->fa_type == cfg->fc_type) && | ||
628 | (cfg->fc_scope == RT_SCOPE_NOWHERE || | ||
629 | fa->fa_scope == cfg->fc_scope) && | ||
630 | (!cfg->fc_protocol || | ||
631 | fi->fib_protocol == cfg->fc_protocol) && | ||
632 | fib_nh_match(cfg, fi) == 0) { | ||
633 | fa_to_delete = fa; | ||
634 | break; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | if (fa_to_delete) { | ||
639 | int kill_fn; | ||
640 | |||
641 | fa = fa_to_delete; | ||
642 | rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, | ||
643 | tb->tb_id, &cfg->fc_nlinfo, 0); | ||
644 | |||
645 | kill_fn = 0; | ||
646 | list_del_rcu(&fa->fa_list); | ||
647 | if (list_empty(&f->fn_alias)) { | ||
648 | hlist_del_rcu(&f->fn_hash); | ||
649 | kill_fn = 1; | ||
650 | } | ||
651 | fib_hash_genid++; | ||
652 | |||
653 | if (fa->fa_state & FA_S_ACCESSED) | ||
654 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
655 | fn_free_alias(fa, f); | ||
656 | if (kill_fn) { | ||
657 | fn_free_node(f); | ||
658 | fz->fz_nent--; | ||
659 | } | ||
660 | |||
661 | return 0; | ||
662 | } | ||
663 | return -ESRCH; | ||
664 | } | ||
665 | |||
666 | static int fn_flush_list(struct fn_zone *fz, int idx) | ||
667 | { | ||
668 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; | ||
669 | struct hlist_node *node, *n; | ||
670 | struct fib_node *f; | ||
671 | int found = 0; | ||
672 | |||
673 | hlist_for_each_entry_safe(f, node, n, head, fn_hash) { | ||
674 | struct fib_alias *fa, *fa_node; | ||
675 | int kill_f; | ||
676 | |||
677 | kill_f = 0; | ||
678 | list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { | ||
679 | struct fib_info *fi = fa->fa_info; | ||
680 | |||
681 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | ||
682 | list_del_rcu(&fa->fa_list); | ||
683 | if (list_empty(&f->fn_alias)) { | ||
684 | hlist_del_rcu(&f->fn_hash); | ||
685 | kill_f = 1; | ||
686 | } | ||
687 | fib_hash_genid++; | ||
688 | |||
689 | fn_free_alias(fa, f); | ||
690 | found++; | ||
691 | } | ||
692 | } | ||
693 | if (kill_f) { | ||
694 | fn_free_node(f); | ||
695 | fz->fz_nent--; | ||
696 | } | ||
697 | } | ||
698 | return found; | ||
699 | } | ||
700 | |||
701 | /* caller must hold RTNL. */ | ||
702 | int fib_table_flush(struct fib_table *tb) | ||
703 | { | ||
704 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
705 | struct fn_zone *fz; | ||
706 | int found = 0; | ||
707 | |||
708 | for (fz = rtnl_dereference(table->fn_zone_list); | ||
709 | fz != NULL; | ||
710 | fz = rtnl_dereference(fz->fz_next)) { | ||
711 | int i; | ||
712 | |||
713 | for (i = fz->fz_divisor - 1; i >= 0; i--) | ||
714 | found += fn_flush_list(fz, i); | ||
715 | } | ||
716 | return found; | ||
717 | } | ||
718 | |||
719 | void fib_free_table(struct fib_table *tb) | ||
720 | { | ||
721 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
722 | struct fn_zone *fz, *next; | ||
723 | |||
724 | next = table->fn_zone_list; | ||
725 | while (next != NULL) { | ||
726 | fz = next; | ||
727 | next = fz->fz_next; | ||
728 | |||
729 | if (fz->fz_hash != fz->fz_embedded_hash) | ||
730 | fz_hash_free(fz->fz_hash, fz->fz_divisor); | ||
731 | |||
732 | kfree(fz); | ||
733 | } | ||
734 | |||
735 | kfree(tb); | ||
736 | } | ||
737 | |||
738 | static inline int | ||
739 | fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | ||
740 | struct fib_table *tb, | ||
741 | struct fn_zone *fz, | ||
742 | struct hlist_head *head) | ||
743 | { | ||
744 | struct hlist_node *node; | ||
745 | struct fib_node *f; | ||
746 | int i, s_i; | ||
747 | |||
748 | s_i = cb->args[4]; | ||
749 | i = 0; | ||
750 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
751 | struct fib_alias *fa; | ||
752 | |||
753 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
754 | if (i < s_i) | ||
755 | goto next; | ||
756 | |||
757 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, | ||
758 | cb->nlh->nlmsg_seq, | ||
759 | RTM_NEWROUTE, | ||
760 | tb->tb_id, | ||
761 | fa->fa_type, | ||
762 | fa->fa_scope, | ||
763 | f->fn_key, | ||
764 | fz->fz_order, | ||
765 | fa->fa_tos, | ||
766 | fa->fa_info, | ||
767 | NLM_F_MULTI) < 0) { | ||
768 | cb->args[4] = i; | ||
769 | return -1; | ||
770 | } | ||
771 | next: | ||
772 | i++; | ||
773 | } | ||
774 | } | ||
775 | cb->args[4] = i; | ||
776 | return skb->len; | ||
777 | } | ||
778 | |||
779 | static inline int | ||
780 | fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | ||
781 | struct fib_table *tb, | ||
782 | struct fn_zone *fz) | ||
783 | { | ||
784 | int h, s_h; | ||
785 | struct hlist_head *head = rcu_dereference(fz->fz_hash); | ||
786 | |||
787 | if (head == NULL) | ||
788 | return skb->len; | ||
789 | s_h = cb->args[3]; | ||
790 | for (h = s_h; h < fz->fz_divisor; h++) { | ||
791 | if (hlist_empty(head + h)) | ||
792 | continue; | ||
793 | if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { | ||
794 | cb->args[3] = h; | ||
795 | return -1; | ||
796 | } | ||
797 | memset(&cb->args[4], 0, | ||
798 | sizeof(cb->args) - 4*sizeof(cb->args[0])); | ||
799 | } | ||
800 | cb->args[3] = h; | ||
801 | return skb->len; | ||
802 | } | ||
803 | |||
804 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | ||
805 | struct netlink_callback *cb) | ||
806 | { | ||
807 | int m = 0, s_m; | ||
808 | struct fn_zone *fz; | ||
809 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
810 | |||
811 | s_m = cb->args[2]; | ||
812 | rcu_read_lock(); | ||
813 | for (fz = rcu_dereference(table->fn_zone_list); | ||
814 | fz != NULL; | ||
815 | fz = rcu_dereference(fz->fz_next), m++) { | ||
816 | if (m < s_m) | ||
817 | continue; | ||
818 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { | ||
819 | cb->args[2] = m; | ||
820 | rcu_read_unlock(); | ||
821 | return -1; | ||
822 | } | ||
823 | memset(&cb->args[3], 0, | ||
824 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | ||
825 | } | ||
826 | rcu_read_unlock(); | ||
827 | cb->args[2] = m; | ||
828 | return skb->len; | ||
829 | } | ||
830 | |||
831 | void __init fib_hash_init(void) | ||
832 | { | ||
833 | fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), | ||
834 | 0, SLAB_PANIC, NULL); | ||
835 | |||
836 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), | ||
837 | 0, SLAB_PANIC, NULL); | ||
838 | |||
839 | } | ||
840 | |||
841 | struct fib_table *fib_hash_table(u32 id) | ||
842 | { | ||
843 | struct fib_table *tb; | ||
844 | |||
845 | tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), | ||
846 | GFP_KERNEL); | ||
847 | if (tb == NULL) | ||
848 | return NULL; | ||
849 | |||
850 | tb->tb_id = id; | ||
851 | tb->tb_default = -1; | ||
852 | |||
853 | memset(tb->tb_data, 0, sizeof(struct fn_hash)); | ||
854 | return tb; | ||
855 | } | ||
856 | |||
857 | /* ------------------------------------------------------------------------ */ | ||
858 | #ifdef CONFIG_PROC_FS | ||
859 | |||
860 | struct fib_iter_state { | ||
861 | struct seq_net_private p; | ||
862 | struct fn_zone *zone; | ||
863 | int bucket; | ||
864 | struct hlist_head *hash_head; | ||
865 | struct fib_node *fn; | ||
866 | struct fib_alias *fa; | ||
867 | loff_t pos; | ||
868 | unsigned int genid; | ||
869 | int valid; | ||
870 | }; | ||
871 | |||
872 | static struct fib_alias *fib_get_first(struct seq_file *seq) | ||
873 | { | ||
874 | struct fib_iter_state *iter = seq->private; | ||
875 | struct fib_table *main_table; | ||
876 | struct fn_hash *table; | ||
877 | |||
878 | main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); | ||
879 | table = (struct fn_hash *)main_table->tb_data; | ||
880 | |||
881 | iter->bucket = 0; | ||
882 | iter->hash_head = NULL; | ||
883 | iter->fn = NULL; | ||
884 | iter->fa = NULL; | ||
885 | iter->pos = 0; | ||
886 | iter->genid = fib_hash_genid; | ||
887 | iter->valid = 1; | ||
888 | |||
889 | for (iter->zone = rcu_dereference(table->fn_zone_list); | ||
890 | iter->zone != NULL; | ||
891 | iter->zone = rcu_dereference(iter->zone->fz_next)) { | ||
892 | int maxslot; | ||
893 | |||
894 | if (!iter->zone->fz_nent) | ||
895 | continue; | ||
896 | |||
897 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
898 | maxslot = iter->zone->fz_divisor; | ||
899 | |||
900 | for (iter->bucket = 0; iter->bucket < maxslot; | ||
901 | ++iter->bucket, ++iter->hash_head) { | ||
902 | struct hlist_node *node; | ||
903 | struct fib_node *fn; | ||
904 | |||
905 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
906 | struct fib_alias *fa; | ||
907 | |||
908 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
909 | iter->fn = fn; | ||
910 | iter->fa = fa; | ||
911 | goto out; | ||
912 | } | ||
913 | } | ||
914 | } | ||
915 | } | ||
916 | out: | ||
917 | return iter->fa; | ||
918 | } | ||
919 | |||
920 | static struct fib_alias *fib_get_next(struct seq_file *seq) | ||
921 | { | ||
922 | struct fib_iter_state *iter = seq->private; | ||
923 | struct fib_node *fn; | ||
924 | struct fib_alias *fa; | ||
925 | |||
926 | /* Advance FA, if any. */ | ||
927 | fn = iter->fn; | ||
928 | fa = iter->fa; | ||
929 | if (fa) { | ||
930 | BUG_ON(!fn); | ||
931 | list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { | ||
932 | iter->fa = fa; | ||
933 | goto out; | ||
934 | } | ||
935 | } | ||
936 | |||
937 | fa = iter->fa = NULL; | ||
938 | |||
939 | /* Advance FN. */ | ||
940 | if (fn) { | ||
941 | struct hlist_node *node = &fn->fn_hash; | ||
942 | hlist_for_each_entry_continue(fn, node, fn_hash) { | ||
943 | iter->fn = fn; | ||
944 | |||
945 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
946 | iter->fa = fa; | ||
947 | goto out; | ||
948 | } | ||
949 | } | ||
950 | } | ||
951 | |||
952 | fn = iter->fn = NULL; | ||
953 | |||
954 | /* Advance hash chain. */ | ||
955 | if (!iter->zone) | ||
956 | goto out; | ||
957 | |||
958 | for (;;) { | ||
959 | struct hlist_node *node; | ||
960 | int maxslot; | ||
961 | |||
962 | maxslot = iter->zone->fz_divisor; | ||
963 | |||
964 | while (++iter->bucket < maxslot) { | ||
965 | iter->hash_head++; | ||
966 | |||
967 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
968 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
969 | iter->fn = fn; | ||
970 | iter->fa = fa; | ||
971 | goto out; | ||
972 | } | ||
973 | } | ||
974 | } | ||
975 | |||
976 | iter->zone = rcu_dereference(iter->zone->fz_next); | ||
977 | |||
978 | if (!iter->zone) | ||
979 | goto out; | ||
980 | |||
981 | iter->bucket = 0; | ||
982 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
983 | |||
984 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
985 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
986 | iter->fn = fn; | ||
987 | iter->fa = fa; | ||
988 | goto out; | ||
989 | } | ||
990 | } | ||
991 | } | ||
992 | out: | ||
993 | iter->pos++; | ||
994 | return fa; | ||
995 | } | ||
996 | |||
997 | static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) | ||
998 | { | ||
999 | struct fib_iter_state *iter = seq->private; | ||
1000 | struct fib_alias *fa; | ||
1001 | |||
1002 | if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { | ||
1003 | fa = iter->fa; | ||
1004 | pos -= iter->pos; | ||
1005 | } else | ||
1006 | fa = fib_get_first(seq); | ||
1007 | |||
1008 | if (fa) | ||
1009 | while (pos && (fa = fib_get_next(seq))) | ||
1010 | --pos; | ||
1011 | return pos ? NULL : fa; | ||
1012 | } | ||
1013 | |||
1014 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) | ||
1015 | __acquires(RCU) | ||
1016 | { | ||
1017 | void *v = NULL; | ||
1018 | |||
1019 | rcu_read_lock(); | ||
1020 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) | ||
1021 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | ||
1022 | return v; | ||
1023 | } | ||
1024 | |||
1025 | static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1026 | { | ||
1027 | ++*pos; | ||
1028 | return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); | ||
1029 | } | ||
1030 | |||
1031 | static void fib_seq_stop(struct seq_file *seq, void *v) | ||
1032 | __releases(RCU) | ||
1033 | { | ||
1034 | rcu_read_unlock(); | ||
1035 | } | ||
1036 | |||
1037 | static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) | ||
1038 | { | ||
1039 | static const unsigned type2flags[RTN_MAX + 1] = { | ||
1040 | [7] = RTF_REJECT, | ||
1041 | [8] = RTF_REJECT, | ||
1042 | }; | ||
1043 | unsigned flags = type2flags[type]; | ||
1044 | |||
1045 | if (fi && fi->fib_nh->nh_gw) | ||
1046 | flags |= RTF_GATEWAY; | ||
1047 | if (mask == htonl(0xFFFFFFFF)) | ||
1048 | flags |= RTF_HOST; | ||
1049 | flags |= RTF_UP; | ||
1050 | return flags; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * This outputs /proc/net/route. | ||
1055 | * | ||
1056 | * It always works in backward compatibility mode. | ||
1057 | * The format of the file is not supposed to be changed. | ||
1058 | */ | ||
1059 | static int fib_seq_show(struct seq_file *seq, void *v) | ||
1060 | { | ||
1061 | struct fib_iter_state *iter; | ||
1062 | int len; | ||
1063 | __be32 prefix, mask; | ||
1064 | unsigned flags; | ||
1065 | struct fib_node *f; | ||
1066 | struct fib_alias *fa; | ||
1067 | struct fib_info *fi; | ||
1068 | |||
1069 | if (v == SEQ_START_TOKEN) { | ||
1070 | seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " | ||
1071 | "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" | ||
1072 | "\tWindow\tIRTT"); | ||
1073 | goto out; | ||
1074 | } | ||
1075 | |||
1076 | iter = seq->private; | ||
1077 | f = iter->fn; | ||
1078 | fa = iter->fa; | ||
1079 | fi = fa->fa_info; | ||
1080 | prefix = f->fn_key; | ||
1081 | mask = FZ_MASK(iter->zone); | ||
1082 | flags = fib_flag_trans(fa->fa_type, mask, fi); | ||
1083 | if (fi) | ||
1084 | seq_printf(seq, | ||
1085 | "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
1086 | fi->fib_dev ? fi->fib_dev->name : "*", prefix, | ||
1087 | fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, | ||
1088 | mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), | ||
1089 | fi->fib_window, | ||
1090 | fi->fib_rtt >> 3, &len); | ||
1091 | else | ||
1092 | seq_printf(seq, | ||
1093 | "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
1094 | prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); | ||
1095 | |||
1096 | seq_printf(seq, "%*s\n", 127 - len, ""); | ||
1097 | out: | ||
1098 | return 0; | ||
1099 | } | ||
1100 | |||
1101 | static const struct seq_operations fib_seq_ops = { | ||
1102 | .start = fib_seq_start, | ||
1103 | .next = fib_seq_next, | ||
1104 | .stop = fib_seq_stop, | ||
1105 | .show = fib_seq_show, | ||
1106 | }; | ||
1107 | |||
1108 | static int fib_seq_open(struct inode *inode, struct file *file) | ||
1109 | { | ||
1110 | return seq_open_net(inode, file, &fib_seq_ops, | ||
1111 | sizeof(struct fib_iter_state)); | ||
1112 | } | ||
1113 | |||
1114 | static const struct file_operations fib_seq_fops = { | ||
1115 | .owner = THIS_MODULE, | ||
1116 | .open = fib_seq_open, | ||
1117 | .read = seq_read, | ||
1118 | .llseek = seq_lseek, | ||
1119 | .release = seq_release_net, | ||
1120 | }; | ||
1121 | |||
1122 | int __net_init fib_proc_init(struct net *net) | ||
1123 | { | ||
1124 | if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) | ||
1125 | return -ENOMEM; | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1129 | void __net_exit fib_proc_exit(struct net *net) | ||
1130 | { | ||
1131 | proc_net_remove(net, "route"); | ||
1132 | } | ||
1133 | #endif /* CONFIG_PROC_FS */ | ||
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0ec651..4ec323875a02 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
@@ -25,9 +25,6 @@ static inline void fib_alias_accessed(struct fib_alias *fa) | |||
25 | } | 25 | } |
26 | 26 | ||
27 | /* Exported by fib_semantics.c */ | 27 | /* Exported by fib_semantics.c */ |
28 | extern int fib_semantic_match(struct list_head *head, | ||
29 | const struct flowi *flp, | ||
30 | struct fib_result *res, int prefixlen, int fib_flags); | ||
31 | extern void fib_release_info(struct fib_info *); | 28 | extern void fib_release_info(struct fib_info *); |
32 | extern struct fib_info *fib_create_info(struct fib_config *cfg); | 29 | extern struct fib_info *fib_create_info(struct fib_config *cfg); |
33 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); | 30 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); |
@@ -51,4 +48,11 @@ static inline void fib_result_assign(struct fib_result *res, | |||
51 | res->fi = fi; | 48 | res->fi = fi; |
52 | } | 49 | } |
53 | 50 | ||
51 | struct fib_prop { | ||
52 | int error; | ||
53 | u8 scope; | ||
54 | }; | ||
55 | |||
56 | extern const struct fib_prop fib_props[RTN_MAX + 1]; | ||
57 | |||
54 | #endif /* _FIB_LOOKUP_H */ | 58 | #endif /* _FIB_LOOKUP_H */ |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24f5c7b..a53bb1b5b118 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -41,19 +41,19 @@ struct fib4_rule { | |||
41 | __be32 srcmask; | 41 | __be32 srcmask; |
42 | __be32 dst; | 42 | __be32 dst; |
43 | __be32 dstmask; | 43 | __be32 dstmask; |
44 | #ifdef CONFIG_NET_CLS_ROUTE | 44 | #ifdef CONFIG_IP_ROUTE_CLASSID |
45 | u32 tclassid; | 45 | u32 tclassid; |
46 | #endif | 46 | #endif |
47 | }; | 47 | }; |
48 | 48 | ||
49 | #ifdef CONFIG_NET_CLS_ROUTE | 49 | #ifdef CONFIG_IP_ROUTE_CLASSID |
50 | u32 fib_rules_tclass(struct fib_result *res) | 50 | u32 fib_rules_tclass(const struct fib_result *res) |
51 | { | 51 | { |
52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; | 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
53 | } | 53 | } |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | 56 | int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) |
57 | { | 57 | { |
58 | struct fib_lookup_arg arg = { | 58 | struct fib_lookup_arg arg = { |
59 | .result = res, | 59 | .result = res, |
@@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |||
61 | }; | 61 | }; |
62 | int err; | 62 | int err; |
63 | 63 | ||
64 | err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); | 64 | err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); |
65 | res->r = arg.rule; | 65 | res->r = arg.rule; |
66 | 66 | ||
67 | return err; | 67 | return err; |
@@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, | |||
95 | if (!tbl) | 95 | if (!tbl) |
96 | goto errout; | 96 | goto errout; |
97 | 97 | ||
98 | err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); | 98 | err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); |
99 | if (err > 0) | 99 | if (err > 0) |
100 | err = -EAGAIN; | 100 | err = -EAGAIN; |
101 | errout: | 101 | errout: |
@@ -106,14 +106,15 @@ errout: | |||
106 | static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) | 106 | static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) |
107 | { | 107 | { |
108 | struct fib4_rule *r = (struct fib4_rule *) rule; | 108 | struct fib4_rule *r = (struct fib4_rule *) rule; |
109 | __be32 daddr = fl->fl4_dst; | 109 | struct flowi4 *fl4 = &fl->u.ip4; |
110 | __be32 saddr = fl->fl4_src; | 110 | __be32 daddr = fl4->daddr; |
111 | __be32 saddr = fl4->saddr; | ||
111 | 112 | ||
112 | if (((saddr ^ r->src) & r->srcmask) || | 113 | if (((saddr ^ r->src) & r->srcmask) || |
113 | ((daddr ^ r->dst) & r->dstmask)) | 114 | ((daddr ^ r->dst) & r->dstmask)) |
114 | return 0; | 115 | return 0; |
115 | 116 | ||
116 | if (r->tos && (r->tos != fl->fl4_tos)) | 117 | if (r->tos && (r->tos != fl4->flowi4_tos)) |
117 | return 0; | 118 | return 0; |
118 | 119 | ||
119 | return 1; | 120 | return 1; |
@@ -165,7 +166,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
165 | if (frh->dst_len) | 166 | if (frh->dst_len) |
166 | rule4->dst = nla_get_be32(tb[FRA_DST]); | 167 | rule4->dst = nla_get_be32(tb[FRA_DST]); |
167 | 168 | ||
168 | #ifdef CONFIG_NET_CLS_ROUTE | 169 | #ifdef CONFIG_IP_ROUTE_CLASSID |
169 | if (tb[FRA_FLOW]) | 170 | if (tb[FRA_FLOW]) |
170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | 171 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); |
171 | #endif | 172 | #endif |
@@ -195,7 +196,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | |||
195 | if (frh->tos && (rule4->tos != frh->tos)) | 196 | if (frh->tos && (rule4->tos != frh->tos)) |
196 | return 0; | 197 | return 0; |
197 | 198 | ||
198 | #ifdef CONFIG_NET_CLS_ROUTE | 199 | #ifdef CONFIG_IP_ROUTE_CLASSID |
199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) | 200 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
200 | return 0; | 201 | return 0; |
201 | #endif | 202 | #endif |
@@ -224,7 +225,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
224 | if (rule4->src_len) | 225 | if (rule4->src_len) |
225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 226 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); |
226 | 227 | ||
227 | #ifdef CONFIG_NET_CLS_ROUTE | 228 | #ifdef CONFIG_IP_ROUTE_CLASSID |
228 | if (rule4->tclassid) | 229 | if (rule4->tclassid) |
229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 230 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
230 | #endif | 231 | #endif |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 12d3dc3df1b7..622ac4c95026 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -49,7 +49,7 @@ | |||
49 | static DEFINE_SPINLOCK(fib_info_lock); | 49 | static DEFINE_SPINLOCK(fib_info_lock); |
50 | static struct hlist_head *fib_info_hash; | 50 | static struct hlist_head *fib_info_hash; |
51 | static struct hlist_head *fib_info_laddrhash; | 51 | static struct hlist_head *fib_info_laddrhash; |
52 | static unsigned int fib_hash_size; | 52 | static unsigned int fib_info_hash_size; |
53 | static unsigned int fib_info_cnt; | 53 | static unsigned int fib_info_cnt; |
54 | 54 | ||
55 | #define DEVINDEX_HASHBITS 8 | 55 | #define DEVINDEX_HASHBITS 8 |
@@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock); | |||
90 | #define endfor_nexthops(fi) } | 90 | #define endfor_nexthops(fi) } |
91 | 91 | ||
92 | 92 | ||
93 | static const struct | 93 | const struct fib_prop fib_props[RTN_MAX + 1] = { |
94 | { | ||
95 | int error; | ||
96 | u8 scope; | ||
97 | } fib_props[RTN_MAX + 1] = { | ||
98 | [RTN_UNSPEC] = { | 94 | [RTN_UNSPEC] = { |
99 | .error = 0, | 95 | .error = 0, |
100 | .scope = RT_SCOPE_NOWHERE, | 96 | .scope = RT_SCOPE_NOWHERE, |
@@ -152,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
152 | { | 148 | { |
153 | struct fib_info *fi = container_of(head, struct fib_info, rcu); | 149 | struct fib_info *fi = container_of(head, struct fib_info, rcu); |
154 | 150 | ||
151 | if (fi->fib_metrics != (u32 *) dst_default_metrics) | ||
152 | kfree(fi->fib_metrics); | ||
155 | kfree(fi); | 153 | kfree(fi); |
156 | } | 154 | } |
157 | 155 | ||
@@ -200,7 +198,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | |||
200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 198 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
201 | nh->nh_weight != onh->nh_weight || | 199 | nh->nh_weight != onh->nh_weight || |
202 | #endif | 200 | #endif |
203 | #ifdef CONFIG_NET_CLS_ROUTE | 201 | #ifdef CONFIG_IP_ROUTE_CLASSID |
204 | nh->nh_tclassid != onh->nh_tclassid || | 202 | nh->nh_tclassid != onh->nh_tclassid || |
205 | #endif | 203 | #endif |
206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) | 204 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) |
@@ -221,7 +219,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) | |||
221 | 219 | ||
222 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) | 220 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) |
223 | { | 221 | { |
224 | unsigned int mask = (fib_hash_size - 1); | 222 | unsigned int mask = (fib_info_hash_size - 1); |
225 | unsigned int val = fi->fib_nhs; | 223 | unsigned int val = fi->fib_nhs; |
226 | 224 | ||
227 | val ^= fi->fib_protocol; | 225 | val ^= fi->fib_protocol; |
@@ -422,7 +420,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
422 | 420 | ||
423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 421 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 422 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
425 | #ifdef CONFIG_NET_CLS_ROUTE | 423 | #ifdef CONFIG_IP_ROUTE_CLASSID |
426 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 424 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 425 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
428 | #endif | 426 | #endif |
@@ -476,7 +474,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) | |||
476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 474 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
477 | if (nla && nla_get_be32(nla) != nh->nh_gw) | 475 | if (nla && nla_get_be32(nla) != nh->nh_gw) |
478 | return 1; | 476 | return 1; |
479 | #ifdef CONFIG_NET_CLS_ROUTE | 477 | #ifdef CONFIG_IP_ROUTE_CLASSID |
480 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 478 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) | 479 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
482 | return 1; | 480 | return 1; |
@@ -562,16 +560,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
562 | } | 560 | } |
563 | rcu_read_lock(); | 561 | rcu_read_lock(); |
564 | { | 562 | { |
565 | struct flowi fl = { | 563 | struct flowi4 fl4 = { |
566 | .fl4_dst = nh->nh_gw, | 564 | .daddr = nh->nh_gw, |
567 | .fl4_scope = cfg->fc_scope + 1, | 565 | .flowi4_scope = cfg->fc_scope + 1, |
568 | .oif = nh->nh_oif, | 566 | .flowi4_oif = nh->nh_oif, |
569 | }; | 567 | }; |
570 | 568 | ||
571 | /* It is not necessary, but requires a bit of thinking */ | 569 | /* It is not necessary, but requires a bit of thinking */ |
572 | if (fl.fl4_scope < RT_SCOPE_LINK) | 570 | if (fl4.flowi4_scope < RT_SCOPE_LINK) |
573 | fl.fl4_scope = RT_SCOPE_LINK; | 571 | fl4.flowi4_scope = RT_SCOPE_LINK; |
574 | err = fib_lookup(net, &fl, &res); | 572 | err = fib_lookup(net, &fl4, &res); |
575 | if (err) { | 573 | if (err) { |
576 | rcu_read_unlock(); | 574 | rcu_read_unlock(); |
577 | return err; | 575 | return err; |
@@ -613,14 +611,14 @@ out: | |||
613 | 611 | ||
614 | static inline unsigned int fib_laddr_hashfn(__be32 val) | 612 | static inline unsigned int fib_laddr_hashfn(__be32 val) |
615 | { | 613 | { |
616 | unsigned int mask = (fib_hash_size - 1); | 614 | unsigned int mask = (fib_info_hash_size - 1); |
617 | 615 | ||
618 | return ((__force u32)val ^ | 616 | return ((__force u32)val ^ |
619 | ((__force u32)val >> 7) ^ | 617 | ((__force u32)val >> 7) ^ |
620 | ((__force u32)val >> 14)) & mask; | 618 | ((__force u32)val >> 14)) & mask; |
621 | } | 619 | } |
622 | 620 | ||
623 | static struct hlist_head *fib_hash_alloc(int bytes) | 621 | static struct hlist_head *fib_info_hash_alloc(int bytes) |
624 | { | 622 | { |
625 | if (bytes <= PAGE_SIZE) | 623 | if (bytes <= PAGE_SIZE) |
626 | return kzalloc(bytes, GFP_KERNEL); | 624 | return kzalloc(bytes, GFP_KERNEL); |
@@ -630,7 +628,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) | |||
630 | get_order(bytes)); | 628 | get_order(bytes)); |
631 | } | 629 | } |
632 | 630 | ||
633 | static void fib_hash_free(struct hlist_head *hash, int bytes) | 631 | static void fib_info_hash_free(struct hlist_head *hash, int bytes) |
634 | { | 632 | { |
635 | if (!hash) | 633 | if (!hash) |
636 | return; | 634 | return; |
@@ -641,18 +639,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) | |||
641 | free_pages((unsigned long) hash, get_order(bytes)); | 639 | free_pages((unsigned long) hash, get_order(bytes)); |
642 | } | 640 | } |
643 | 641 | ||
644 | static void fib_hash_move(struct hlist_head *new_info_hash, | 642 | static void fib_info_hash_move(struct hlist_head *new_info_hash, |
645 | struct hlist_head *new_laddrhash, | 643 | struct hlist_head *new_laddrhash, |
646 | unsigned int new_size) | 644 | unsigned int new_size) |
647 | { | 645 | { |
648 | struct hlist_head *old_info_hash, *old_laddrhash; | 646 | struct hlist_head *old_info_hash, *old_laddrhash; |
649 | unsigned int old_size = fib_hash_size; | 647 | unsigned int old_size = fib_info_hash_size; |
650 | unsigned int i, bytes; | 648 | unsigned int i, bytes; |
651 | 649 | ||
652 | spin_lock_bh(&fib_info_lock); | 650 | spin_lock_bh(&fib_info_lock); |
653 | old_info_hash = fib_info_hash; | 651 | old_info_hash = fib_info_hash; |
654 | old_laddrhash = fib_info_laddrhash; | 652 | old_laddrhash = fib_info_laddrhash; |
655 | fib_hash_size = new_size; | 653 | fib_info_hash_size = new_size; |
656 | 654 | ||
657 | for (i = 0; i < old_size; i++) { | 655 | for (i = 0; i < old_size; i++) { |
658 | struct hlist_head *head = &fib_info_hash[i]; | 656 | struct hlist_head *head = &fib_info_hash[i]; |
@@ -693,8 +691,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash, | |||
693 | spin_unlock_bh(&fib_info_lock); | 691 | spin_unlock_bh(&fib_info_lock); |
694 | 692 | ||
695 | bytes = old_size * sizeof(struct hlist_head *); | 693 | bytes = old_size * sizeof(struct hlist_head *); |
696 | fib_hash_free(old_info_hash, bytes); | 694 | fib_info_hash_free(old_info_hash, bytes); |
697 | fib_hash_free(old_laddrhash, bytes); | 695 | fib_info_hash_free(old_laddrhash, bytes); |
698 | } | 696 | } |
699 | 697 | ||
700 | struct fib_info *fib_create_info(struct fib_config *cfg) | 698 | struct fib_info *fib_create_info(struct fib_config *cfg) |
@@ -705,6 +703,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
705 | int nhs = 1; | 703 | int nhs = 1; |
706 | struct net *net = cfg->fc_nlinfo.nl_net; | 704 | struct net *net = cfg->fc_nlinfo.nl_net; |
707 | 705 | ||
706 | if (cfg->fc_type > RTN_MAX) | ||
707 | goto err_inval; | ||
708 | |||
708 | /* Fast check to catch the most weird cases */ | 709 | /* Fast check to catch the most weird cases */ |
709 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) | 710 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) |
710 | goto err_inval; | 711 | goto err_inval; |
@@ -718,8 +719,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
718 | #endif | 719 | #endif |
719 | 720 | ||
720 | err = -ENOBUFS; | 721 | err = -ENOBUFS; |
721 | if (fib_info_cnt >= fib_hash_size) { | 722 | if (fib_info_cnt >= fib_info_hash_size) { |
722 | unsigned int new_size = fib_hash_size << 1; | 723 | unsigned int new_size = fib_info_hash_size << 1; |
723 | struct hlist_head *new_info_hash; | 724 | struct hlist_head *new_info_hash; |
724 | struct hlist_head *new_laddrhash; | 725 | struct hlist_head *new_laddrhash; |
725 | unsigned int bytes; | 726 | unsigned int bytes; |
@@ -727,21 +728,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
727 | if (!new_size) | 728 | if (!new_size) |
728 | new_size = 1; | 729 | new_size = 1; |
729 | bytes = new_size * sizeof(struct hlist_head *); | 730 | bytes = new_size * sizeof(struct hlist_head *); |
730 | new_info_hash = fib_hash_alloc(bytes); | 731 | new_info_hash = fib_info_hash_alloc(bytes); |
731 | new_laddrhash = fib_hash_alloc(bytes); | 732 | new_laddrhash = fib_info_hash_alloc(bytes); |
732 | if (!new_info_hash || !new_laddrhash) { | 733 | if (!new_info_hash || !new_laddrhash) { |
733 | fib_hash_free(new_info_hash, bytes); | 734 | fib_info_hash_free(new_info_hash, bytes); |
734 | fib_hash_free(new_laddrhash, bytes); | 735 | fib_info_hash_free(new_laddrhash, bytes); |
735 | } else | 736 | } else |
736 | fib_hash_move(new_info_hash, new_laddrhash, new_size); | 737 | fib_info_hash_move(new_info_hash, new_laddrhash, new_size); |
737 | 738 | ||
738 | if (!fib_hash_size) | 739 | if (!fib_info_hash_size) |
739 | goto failure; | 740 | goto failure; |
740 | } | 741 | } |
741 | 742 | ||
742 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); | 743 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); |
743 | if (fi == NULL) | 744 | if (fi == NULL) |
744 | goto failure; | 745 | goto failure; |
746 | if (cfg->fc_mx) { | ||
747 | fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); | ||
748 | if (!fi->fib_metrics) | ||
749 | goto failure; | ||
750 | } else | ||
751 | fi->fib_metrics = (u32 *) dst_default_metrics; | ||
745 | fib_info_cnt++; | 752 | fib_info_cnt++; |
746 | 753 | ||
747 | fi->fib_net = hold_net(net); | 754 | fi->fib_net = hold_net(net); |
@@ -779,7 +786,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
779 | goto err_inval; | 786 | goto err_inval; |
780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) | 787 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
781 | goto err_inval; | 788 | goto err_inval; |
782 | #ifdef CONFIG_NET_CLS_ROUTE | 789 | #ifdef CONFIG_IP_ROUTE_CLASSID |
783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) | 790 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
784 | goto err_inval; | 791 | goto err_inval; |
785 | #endif | 792 | #endif |
@@ -792,7 +799,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
792 | nh->nh_oif = cfg->fc_oif; | 799 | nh->nh_oif = cfg->fc_oif; |
793 | nh->nh_gw = cfg->fc_gw; | 800 | nh->nh_gw = cfg->fc_gw; |
794 | nh->nh_flags = cfg->fc_flags; | 801 | nh->nh_flags = cfg->fc_flags; |
795 | #ifdef CONFIG_NET_CLS_ROUTE | 802 | #ifdef CONFIG_IP_ROUTE_CLASSID |
796 | nh->nh_tclassid = cfg->fc_flow; | 803 | nh->nh_tclassid = cfg->fc_flow; |
797 | #endif | 804 | #endif |
798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 805 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -804,6 +811,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
804 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) | 811 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) |
805 | goto err_inval; | 812 | goto err_inval; |
806 | goto link_it; | 813 | goto link_it; |
814 | } else { | ||
815 | switch (cfg->fc_type) { | ||
816 | case RTN_UNICAST: | ||
817 | case RTN_LOCAL: | ||
818 | case RTN_BROADCAST: | ||
819 | case RTN_ANYCAST: | ||
820 | case RTN_MULTICAST: | ||
821 | break; | ||
822 | default: | ||
823 | goto err_inval; | ||
824 | } | ||
807 | } | 825 | } |
808 | 826 | ||
809 | if (cfg->fc_scope > RT_SCOPE_HOST) | 827 | if (cfg->fc_scope > RT_SCOPE_HOST) |
@@ -835,6 +853,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
835 | goto err_inval; | 853 | goto err_inval; |
836 | } | 854 | } |
837 | 855 | ||
856 | change_nexthops(fi) { | ||
857 | nexthop_nh->nh_cfg_scope = cfg->fc_scope; | ||
858 | nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, | ||
859 | nexthop_nh->nh_gw, | ||
860 | nexthop_nh->nh_cfg_scope); | ||
861 | } endfor_nexthops(fi) | ||
862 | |||
838 | link_it: | 863 | link_it: |
839 | ofi = fib_find_info(fi); | 864 | ofi = fib_find_info(fi); |
840 | if (ofi) { | 865 | if (ofi) { |
@@ -880,84 +905,6 @@ failure: | |||
880 | return ERR_PTR(err); | 905 | return ERR_PTR(err); |
881 | } | 906 | } |
882 | 907 | ||
883 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | ||
884 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | ||
885 | struct fib_result *res, int prefixlen, int fib_flags) | ||
886 | { | ||
887 | struct fib_alias *fa; | ||
888 | int nh_sel = 0; | ||
889 | |||
890 | list_for_each_entry_rcu(fa, head, fa_list) { | ||
891 | int err; | ||
892 | |||
893 | if (fa->fa_tos && | ||
894 | fa->fa_tos != flp->fl4_tos) | ||
895 | continue; | ||
896 | |||
897 | if (fa->fa_scope < flp->fl4_scope) | ||
898 | continue; | ||
899 | |||
900 | fib_alias_accessed(fa); | ||
901 | |||
902 | err = fib_props[fa->fa_type].error; | ||
903 | if (err == 0) { | ||
904 | struct fib_info *fi = fa->fa_info; | ||
905 | |||
906 | if (fi->fib_flags & RTNH_F_DEAD) | ||
907 | continue; | ||
908 | |||
909 | switch (fa->fa_type) { | ||
910 | case RTN_UNICAST: | ||
911 | case RTN_LOCAL: | ||
912 | case RTN_BROADCAST: | ||
913 | case RTN_ANYCAST: | ||
914 | case RTN_MULTICAST: | ||
915 | for_nexthops(fi) { | ||
916 | if (nh->nh_flags & RTNH_F_DEAD) | ||
917 | continue; | ||
918 | if (!flp->oif || flp->oif == nh->nh_oif) | ||
919 | break; | ||
920 | } | ||
921 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
922 | if (nhsel < fi->fib_nhs) { | ||
923 | nh_sel = nhsel; | ||
924 | goto out_fill_res; | ||
925 | } | ||
926 | #else | ||
927 | if (nhsel < 1) | ||
928 | goto out_fill_res; | ||
929 | #endif | ||
930 | endfor_nexthops(fi); | ||
931 | continue; | ||
932 | |||
933 | default: | ||
934 | pr_warning("fib_semantic_match bad type %#x\n", | ||
935 | fa->fa_type); | ||
936 | return -EINVAL; | ||
937 | } | ||
938 | } | ||
939 | return err; | ||
940 | } | ||
941 | return 1; | ||
942 | |||
943 | out_fill_res: | ||
944 | res->prefixlen = prefixlen; | ||
945 | res->nh_sel = nh_sel; | ||
946 | res->type = fa->fa_type; | ||
947 | res->scope = fa->fa_scope; | ||
948 | res->fi = fa->fa_info; | ||
949 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | ||
950 | atomic_inc(&res->fi->fib_clntref); | ||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | /* Find appropriate source address to this destination */ | ||
955 | |||
956 | __be32 __fib_res_prefsrc(struct fib_result *res) | ||
957 | { | ||
958 | return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); | ||
959 | } | ||
960 | |||
961 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 908 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
962 | u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, | 909 | u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, |
963 | struct fib_info *fi, unsigned int flags) | 910 | struct fib_info *fi, unsigned int flags) |
@@ -1002,7 +949,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1002 | 949 | ||
1003 | if (fi->fib_nh->nh_oif) | 950 | if (fi->fib_nh->nh_oif) |
1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 951 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
1005 | #ifdef CONFIG_NET_CLS_ROUTE | 952 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1006 | if (fi->fib_nh[0].nh_tclassid) | 953 | if (fi->fib_nh[0].nh_tclassid) |
1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 954 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
1008 | #endif | 955 | #endif |
@@ -1027,7 +974,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1027 | 974 | ||
1028 | if (nh->nh_gw) | 975 | if (nh->nh_gw) |
1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 976 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); |
1030 | #ifdef CONFIG_NET_CLS_ROUTE | 977 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1031 | if (nh->nh_tclassid) | 978 | if (nh->nh_tclassid) |
1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 979 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
1033 | #endif | 980 | #endif |
@@ -1125,6 +1072,80 @@ int fib_sync_down_dev(struct net_device *dev, int force) | |||
1125 | return ret; | 1072 | return ret; |
1126 | } | 1073 | } |
1127 | 1074 | ||
1075 | /* Must be invoked inside of an RCU protected region. */ | ||
1076 | void fib_select_default(struct fib_result *res) | ||
1077 | { | ||
1078 | struct fib_info *fi = NULL, *last_resort = NULL; | ||
1079 | struct list_head *fa_head = res->fa_head; | ||
1080 | struct fib_table *tb = res->table; | ||
1081 | int order = -1, last_idx = -1; | ||
1082 | struct fib_alias *fa; | ||
1083 | |||
1084 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
1085 | struct fib_info *next_fi = fa->fa_info; | ||
1086 | |||
1087 | if (fa->fa_scope != res->scope || | ||
1088 | fa->fa_type != RTN_UNICAST) | ||
1089 | continue; | ||
1090 | |||
1091 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
1092 | break; | ||
1093 | if (!next_fi->fib_nh[0].nh_gw || | ||
1094 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
1095 | continue; | ||
1096 | |||
1097 | fib_alias_accessed(fa); | ||
1098 | |||
1099 | if (fi == NULL) { | ||
1100 | if (next_fi != res->fi) | ||
1101 | break; | ||
1102 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
1103 | &last_idx, tb->tb_default)) { | ||
1104 | fib_result_assign(res, fi); | ||
1105 | tb->tb_default = order; | ||
1106 | goto out; | ||
1107 | } | ||
1108 | fi = next_fi; | ||
1109 | order++; | ||
1110 | } | ||
1111 | |||
1112 | if (order <= 0 || fi == NULL) { | ||
1113 | tb->tb_default = -1; | ||
1114 | goto out; | ||
1115 | } | ||
1116 | |||
1117 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
1118 | tb->tb_default)) { | ||
1119 | fib_result_assign(res, fi); | ||
1120 | tb->tb_default = order; | ||
1121 | goto out; | ||
1122 | } | ||
1123 | |||
1124 | if (last_idx >= 0) | ||
1125 | fib_result_assign(res, last_resort); | ||
1126 | tb->tb_default = last_idx; | ||
1127 | out: | ||
1128 | return; | ||
1129 | } | ||
1130 | |||
1131 | void fib_update_nh_saddrs(struct net_device *dev) | ||
1132 | { | ||
1133 | struct hlist_head *head; | ||
1134 | struct hlist_node *node; | ||
1135 | struct fib_nh *nh; | ||
1136 | unsigned int hash; | ||
1137 | |||
1138 | hash = fib_devindex_hashfn(dev->ifindex); | ||
1139 | head = &fib_info_devhash[hash]; | ||
1140 | hlist_for_each_entry(nh, node, head, nh_hash) { | ||
1141 | if (nh->nh_dev != dev) | ||
1142 | continue; | ||
1143 | nh->nh_saddr = inet_select_addr(nh->nh_dev, | ||
1144 | nh->nh_gw, | ||
1145 | nh->nh_cfg_scope); | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1128 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1149 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1129 | 1150 | ||
1130 | /* | 1151 | /* |
@@ -1189,7 +1210,7 @@ int fib_sync_up(struct net_device *dev) | |||
1189 | * The algorithm is suboptimal, but it provides really | 1210 | * The algorithm is suboptimal, but it provides really |
1190 | * fair weighted route distribution. | 1211 | * fair weighted route distribution. |
1191 | */ | 1212 | */ |
1192 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | 1213 | void fib_select_multipath(struct fib_result *res) |
1193 | { | 1214 | { |
1194 | struct fib_info *fi = res->fi; | 1215 | struct fib_info *fi = res->fi; |
1195 | int w; | 1216 | int w; |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f280348e0fd..3d28a35c2e1a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -95,7 +95,7 @@ typedef unsigned int t_key; | |||
95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | 95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) |
96 | #define IS_LEAF(n) (n->parent & T_LEAF) | 96 | #define IS_LEAF(n) (n->parent & T_LEAF) |
97 | 97 | ||
98 | struct node { | 98 | struct rt_trie_node { |
99 | unsigned long parent; | 99 | unsigned long parent; |
100 | t_key key; | 100 | t_key key; |
101 | }; | 101 | }; |
@@ -126,7 +126,7 @@ struct tnode { | |||
126 | struct work_struct work; | 126 | struct work_struct work; |
127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
128 | }; | 128 | }; |
129 | struct node *child[0]; | 129 | struct rt_trie_node *child[0]; |
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,16 +151,16 @@ struct trie_stat { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | struct trie { | 153 | struct trie { |
154 | struct node *trie; | 154 | struct rt_trie_node *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
157 | #endif | 157 | #endif |
158 | }; | 158 | }; |
159 | 159 | ||
160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); |
161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
162 | int wasfull); | 162 | int wasfull); |
163 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); |
164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
165 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
166 | /* tnodes to free after resize(); protected by RTNL */ | 166 | /* tnodes to free after resize(); protected by RTNL */ |
@@ -177,12 +177,12 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 179 | ||
180 | static inline struct tnode *node_parent(struct node *node) | 180 | static inline struct tnode *node_parent(struct rt_trie_node *node) |
181 | { | 181 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); |
183 | } | 183 | } |
184 | 184 | ||
185 | static inline struct tnode *node_parent_rcu(struct node *node) | 185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) |
186 | { | 186 | { |
187 | struct tnode *ret = node_parent(node); | 187 | struct tnode *ret = node_parent(node); |
188 | 188 | ||
@@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) | |||
192 | /* Same as rcu_assign_pointer | 192 | /* Same as rcu_assign_pointer |
193 | * but that macro() assumes that value is a pointer. | 193 | * but that macro() assumes that value is a pointer. |
194 | */ | 194 | */ |
195 | static inline void node_set_parent(struct node *node, struct tnode *ptr) | 195 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
196 | { | 196 | { |
197 | smp_wmb(); | 197 | smp_wmb(); |
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 199 | } |
200 | 200 | ||
201 | static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) | 201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) |
202 | { | 202 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 203 | BUG_ON(i >= 1U << tn->bits); |
204 | 204 | ||
205 | return tn->child[i]; | 205 | return tn->child[i]; |
206 | } | 206 | } |
207 | 207 | ||
208 | static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) |
209 | { | 209 | { |
210 | struct node *ret = tnode_get_child(tn, i); | 210 | struct rt_trie_node *ret = tnode_get_child(tn, i); |
211 | 211 | ||
212 | return rcu_dereference_rtnl(ret); | 212 | return rcu_dereference_rtnl(ret); |
213 | } | 213 | } |
@@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) | |||
217 | return 1 << tn->bits; | 217 | return 1 << tn->bits; |
218 | } | 218 | } |
219 | 219 | ||
220 | static inline t_key mask_pfx(t_key k, unsigned short l) | 220 | static inline t_key mask_pfx(t_key k, unsigned int l) |
221 | { | 221 | { |
222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); | 222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); |
223 | } | 223 | } |
224 | 224 | ||
225 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 225 | static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) |
226 | { | 226 | { |
227 | if (offset < KEYLENGTH) | 227 | if (offset < KEYLENGTH) |
228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
@@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
378 | { | 378 | { |
379 | struct tnode *tn = container_of(head, struct tnode, rcu); | 379 | struct tnode *tn = container_of(head, struct tnode, rcu); |
380 | size_t size = sizeof(struct tnode) + | 380 | size_t size = sizeof(struct tnode) + |
381 | (sizeof(struct node *) << tn->bits); | 381 | (sizeof(struct rt_trie_node *) << tn->bits); |
382 | 382 | ||
383 | if (size <= PAGE_SIZE) | 383 | if (size <= PAGE_SIZE) |
384 | kfree(tn); | 384 | kfree(tn); |
@@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) | |||
402 | tn->tnode_free = tnode_free_head; | 402 | tn->tnode_free = tnode_free_head; |
403 | tnode_free_head = tn; | 403 | tnode_free_head = tn; |
404 | tnode_free_size += sizeof(struct tnode) + | 404 | tnode_free_size += sizeof(struct tnode) + |
405 | (sizeof(struct node *) << tn->bits); | 405 | (sizeof(struct rt_trie_node *) << tn->bits); |
406 | } | 406 | } |
407 | 407 | ||
408 | static void tnode_free_flush(void) | 408 | static void tnode_free_flush(void) |
@@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
443 | 443 | ||
444 | static struct tnode *tnode_new(t_key key, int pos, int bits) | 444 | static struct tnode *tnode_new(t_key key, int pos, int bits) |
445 | { | 445 | { |
446 | size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); | 446 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); |
447 | struct tnode *tn = tnode_alloc(sz); | 447 | struct tnode *tn = tnode_alloc(sz); |
448 | 448 | ||
449 | if (tn) { | 449 | if (tn) { |
@@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
456 | } | 456 | } |
457 | 457 | ||
458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), | 458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), |
459 | sizeof(struct node) << bits); | 459 | sizeof(struct rt_trie_node) << bits); |
460 | return tn; | 460 | return tn; |
461 | } | 461 | } |
462 | 462 | ||
@@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
465 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 465 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
466 | */ | 466 | */ |
467 | 467 | ||
468 | static inline int tnode_full(const struct tnode *tn, const struct node *n) | 468 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) |
469 | { | 469 | { |
470 | if (n == NULL || IS_LEAF(n)) | 470 | if (n == NULL || IS_LEAF(n)) |
471 | return 0; | 471 | return 0; |
@@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) | |||
474 | } | 474 | } |
475 | 475 | ||
476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, | 476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, |
477 | struct node *n) | 477 | struct rt_trie_node *n) |
478 | { | 478 | { |
479 | tnode_put_child_reorg(tn, i, n, -1); | 479 | tnode_put_child_reorg(tn, i, n, -1); |
480 | } | 480 | } |
@@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
484 | * Update the value of full_children and empty_children. | 484 | * Update the value of full_children and empty_children. |
485 | */ | 485 | */ |
486 | 486 | ||
487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
488 | int wasfull) | 488 | int wasfull) |
489 | { | 489 | { |
490 | struct node *chi = tn->child[i]; | 490 | struct rt_trie_node *chi = tn->child[i]; |
491 | int isfull; | 491 | int isfull; |
492 | 492 | ||
493 | BUG_ON(i >= 1<<tn->bits); | 493 | BUG_ON(i >= 1<<tn->bits); |
@@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
515 | } | 515 | } |
516 | 516 | ||
517 | #define MAX_WORK 10 | 517 | #define MAX_WORK 10 |
518 | static struct node *resize(struct trie *t, struct tnode *tn) | 518 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) |
519 | { | 519 | { |
520 | int i; | 520 | int i; |
521 | struct tnode *old_tn; | 521 | struct tnode *old_tn; |
@@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
605 | 605 | ||
606 | /* Keep root node larger */ | 606 | /* Keep root node larger */ |
607 | 607 | ||
608 | if (!node_parent((struct node *)tn)) { | 608 | if (!node_parent((struct rt_trie_node *)tn)) { |
609 | inflate_threshold_use = inflate_threshold_root; | 609 | inflate_threshold_use = inflate_threshold_root; |
610 | halve_threshold_use = halve_threshold_root; | 610 | halve_threshold_use = halve_threshold_root; |
611 | } else { | 611 | } else { |
@@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
635 | 635 | ||
636 | /* Return if at least one inflate is run */ | 636 | /* Return if at least one inflate is run */ |
637 | if (max_work != MAX_WORK) | 637 | if (max_work != MAX_WORK) |
638 | return (struct node *) tn; | 638 | return (struct rt_trie_node *) tn; |
639 | 639 | ||
640 | /* | 640 | /* |
641 | * Halve as long as the number of empty children in this | 641 | * Halve as long as the number of empty children in this |
@@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
663 | if (tn->empty_children == tnode_child_length(tn) - 1) { | 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
664 | one_child: | 664 | one_child: |
665 | for (i = 0; i < tnode_child_length(tn); i++) { | 665 | for (i = 0; i < tnode_child_length(tn); i++) { |
666 | struct node *n; | 666 | struct rt_trie_node *n; |
667 | 667 | ||
668 | n = tn->child[i]; | 668 | n = tn->child[i]; |
669 | if (!n) | 669 | if (!n) |
@@ -676,7 +676,7 @@ one_child: | |||
676 | return n; | 676 | return n; |
677 | } | 677 | } |
678 | } | 678 | } |
679 | return (struct node *) tn; | 679 | return (struct rt_trie_node *) tn; |
680 | } | 680 | } |
681 | 681 | ||
682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
@@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
723 | goto nomem; | 723 | goto nomem; |
724 | } | 724 | } |
725 | 725 | ||
726 | put_child(t, tn, 2*i, (struct node *) left); | 726 | put_child(t, tn, 2*i, (struct rt_trie_node *) left); |
727 | put_child(t, tn, 2*i+1, (struct node *) right); | 727 | put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); |
728 | } | 728 | } |
729 | } | 729 | } |
730 | 730 | ||
731 | for (i = 0; i < olen; i++) { | 731 | for (i = 0; i < olen; i++) { |
732 | struct tnode *inode; | 732 | struct tnode *inode; |
733 | struct node *node = tnode_get_child(oldtnode, i); | 733 | struct rt_trie_node *node = tnode_get_child(oldtnode, i); |
734 | struct tnode *left, *right; | 734 | struct tnode *left, *right; |
735 | int size, j; | 735 | int size, j; |
736 | 736 | ||
@@ -825,7 +825,7 @@ nomem: | |||
825 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 825 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
826 | { | 826 | { |
827 | struct tnode *oldtnode = tn; | 827 | struct tnode *oldtnode = tn; |
828 | struct node *left, *right; | 828 | struct rt_trie_node *left, *right; |
829 | int i; | 829 | int i; |
830 | int olen = tnode_child_length(tn); | 830 | int olen = tnode_child_length(tn); |
831 | 831 | ||
@@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
856 | if (!newn) | 856 | if (!newn) |
857 | goto nomem; | 857 | goto nomem; |
858 | 858 | ||
859 | put_child(t, tn, i/2, (struct node *)newn); | 859 | put_child(t, tn, i/2, (struct rt_trie_node *)newn); |
860 | } | 860 | } |
861 | 861 | ||
862 | } | 862 | } |
@@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) | |||
958 | { | 958 | { |
959 | int pos; | 959 | int pos; |
960 | struct tnode *tn; | 960 | struct tnode *tn; |
961 | struct node *n; | 961 | struct rt_trie_node *n; |
962 | 962 | ||
963 | pos = 0; | 963 | pos = 0; |
964 | n = rcu_dereference_rtnl(t->trie); | 964 | n = rcu_dereference_rtnl(t->trie); |
@@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
993 | 993 | ||
994 | key = tn->key; | 994 | key = tn->key; |
995 | 995 | ||
996 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 996 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { |
997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); | 999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); |
1000 | 1000 | ||
1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, | 1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, |
1002 | (struct node *)tn, wasfull); | 1002 | (struct rt_trie_node *)tn, wasfull); |
1003 | 1003 | ||
1004 | tp = node_parent((struct node *) tn); | 1004 | tp = node_parent((struct rt_trie_node *) tn); |
1005 | if (!tp) | 1005 | if (!tp) |
1006 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1006 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1007 | 1007 | ||
1008 | tnode_free_flush(); | 1008 | tnode_free_flush(); |
1009 | if (!tp) | 1009 | if (!tp) |
@@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1015 | if (IS_TNODE(tn)) | 1015 | if (IS_TNODE(tn)) |
1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1017 | 1017 | ||
1018 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1018 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1019 | tnode_free_flush(); | 1019 | tnode_free_flush(); |
1020 | } | 1020 | } |
1021 | 1021 | ||
@@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1025 | { | 1025 | { |
1026 | int pos, newpos; | 1026 | int pos, newpos; |
1027 | struct tnode *tp = NULL, *tn = NULL; | 1027 | struct tnode *tp = NULL, *tn = NULL; |
1028 | struct node *n; | 1028 | struct rt_trie_node *n; |
1029 | struct leaf *l; | 1029 | struct leaf *l; |
1030 | int missbit; | 1030 | int missbit; |
1031 | struct list_head *fa_head = NULL; | 1031 | struct list_head *fa_head = NULL; |
@@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1111 | if (t->trie && n == NULL) { | 1111 | if (t->trie && n == NULL) { |
1112 | /* Case 2: n is NULL, and will just insert a new leaf */ | 1112 | /* Case 2: n is NULL, and will just insert a new leaf */ |
1113 | 1113 | ||
1114 | node_set_parent((struct node *)l, tp); | 1114 | node_set_parent((struct rt_trie_node *)l, tp); |
1115 | 1115 | ||
1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1117 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1117 | put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); |
1118 | } else { | 1118 | } else { |
1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1120 | /* | 1120 | /* |
@@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1141 | return NULL; | 1141 | return NULL; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | node_set_parent((struct node *)tn, tp); | 1144 | node_set_parent((struct rt_trie_node *)tn, tp); |
1145 | 1145 | ||
1146 | missbit = tkey_extract_bits(key, newpos, 1); | 1146 | missbit = tkey_extract_bits(key, newpos, 1); |
1147 | put_child(t, tn, missbit, (struct node *)l); | 1147 | put_child(t, tn, missbit, (struct rt_trie_node *)l); |
1148 | put_child(t, tn, 1-missbit, n); | 1148 | put_child(t, tn, 1-missbit, n); |
1149 | 1149 | ||
1150 | if (tp) { | 1150 | if (tp) { |
1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1152 | put_child(t, (struct tnode *)tp, cindex, | 1152 | put_child(t, (struct tnode *)tp, cindex, |
1153 | (struct node *)tn); | 1153 | (struct rt_trie_node *)tn); |
1154 | } else { | 1154 | } else { |
1155 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1155 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1156 | tp = tn; | 1156 | tp = tn; |
1157 | } | 1157 | } |
1158 | } | 1158 | } |
@@ -1340,8 +1340,8 @@ err: | |||
1340 | } | 1340 | } |
1341 | 1341 | ||
1342 | /* should be called with rcu_read_lock */ | 1342 | /* should be called with rcu_read_lock */ |
1343 | static int check_leaf(struct trie *t, struct leaf *l, | 1343 | static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, |
1344 | t_key key, const struct flowi *flp, | 1344 | t_key key, const struct flowi4 *flp, |
1345 | struct fib_result *res, int fib_flags) | 1345 | struct fib_result *res, int fib_flags) |
1346 | { | 1346 | { |
1347 | struct leaf_info *li; | 1347 | struct leaf_info *li; |
@@ -1349,40 +1349,75 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1349 | struct hlist_node *node; | 1349 | struct hlist_node *node; |
1350 | 1350 | ||
1351 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { | 1351 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1352 | int err; | 1352 | struct fib_alias *fa; |
1353 | int plen = li->plen; | 1353 | int plen = li->plen; |
1354 | __be32 mask = inet_make_mask(plen); | 1354 | __be32 mask = inet_make_mask(plen); |
1355 | 1355 | ||
1356 | if (l->key != (key & ntohl(mask))) | 1356 | if (l->key != (key & ntohl(mask))) |
1357 | continue; | 1357 | continue; |
1358 | 1358 | ||
1359 | err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); | 1359 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
1360 | struct fib_info *fi = fa->fa_info; | ||
1361 | int nhsel, err; | ||
1360 | 1362 | ||
1363 | if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) | ||
1364 | continue; | ||
1365 | if (fa->fa_scope < flp->flowi4_scope) | ||
1366 | continue; | ||
1367 | fib_alias_accessed(fa); | ||
1368 | err = fib_props[fa->fa_type].error; | ||
1369 | if (err) { | ||
1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1370 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1362 | if (err <= 0) | 1371 | t->stats.semantic_match_miss++; |
1363 | t->stats.semantic_match_passed++; | 1372 | #endif |
1364 | else | 1373 | return 1; |
1365 | t->stats.semantic_match_miss++; | 1374 | } |
1375 | if (fi->fib_flags & RTNH_F_DEAD) | ||
1376 | continue; | ||
1377 | for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { | ||
1378 | const struct fib_nh *nh = &fi->fib_nh[nhsel]; | ||
1379 | |||
1380 | if (nh->nh_flags & RTNH_F_DEAD) | ||
1381 | continue; | ||
1382 | if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) | ||
1383 | continue; | ||
1384 | |||
1385 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
1386 | t->stats.semantic_match_passed++; | ||
1387 | #endif | ||
1388 | res->prefixlen = plen; | ||
1389 | res->nh_sel = nhsel; | ||
1390 | res->type = fa->fa_type; | ||
1391 | res->scope = fa->fa_scope; | ||
1392 | res->fi = fi; | ||
1393 | res->table = tb; | ||
1394 | res->fa_head = &li->falh; | ||
1395 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | ||
1396 | atomic_inc(&res->fi->fib_clntref); | ||
1397 | return 0; | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1401 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
1402 | t->stats.semantic_match_miss++; | ||
1366 | #endif | 1403 | #endif |
1367 | if (err <= 0) | ||
1368 | return err; | ||
1369 | } | 1404 | } |
1370 | 1405 | ||
1371 | return 1; | 1406 | return 1; |
1372 | } | 1407 | } |
1373 | 1408 | ||
1374 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | 1409 | int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, |
1375 | struct fib_result *res, int fib_flags) | 1410 | struct fib_result *res, int fib_flags) |
1376 | { | 1411 | { |
1377 | struct trie *t = (struct trie *) tb->tb_data; | 1412 | struct trie *t = (struct trie *) tb->tb_data; |
1378 | int ret; | 1413 | int ret; |
1379 | struct node *n; | 1414 | struct rt_trie_node *n; |
1380 | struct tnode *pn; | 1415 | struct tnode *pn; |
1381 | int pos, bits; | 1416 | unsigned int pos, bits; |
1382 | t_key key = ntohl(flp->fl4_dst); | 1417 | t_key key = ntohl(flp->daddr); |
1383 | int chopped_off; | 1418 | unsigned int chopped_off; |
1384 | t_key cindex = 0; | 1419 | t_key cindex = 0; |
1385 | int current_prefix_length = KEYLENGTH; | 1420 | unsigned int current_prefix_length = KEYLENGTH; |
1386 | struct tnode *cn; | 1421 | struct tnode *cn; |
1387 | t_key pref_mismatch; | 1422 | t_key pref_mismatch; |
1388 | 1423 | ||
@@ -1398,7 +1433,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1398 | 1433 | ||
1399 | /* Just a leaf? */ | 1434 | /* Just a leaf? */ |
1400 | if (IS_LEAF(n)) { | 1435 | if (IS_LEAF(n)) { |
1401 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1436 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1402 | goto found; | 1437 | goto found; |
1403 | } | 1438 | } |
1404 | 1439 | ||
@@ -1423,7 +1458,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1423 | } | 1458 | } |
1424 | 1459 | ||
1425 | if (IS_LEAF(n)) { | 1460 | if (IS_LEAF(n)) { |
1426 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1461 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1427 | if (ret > 0) | 1462 | if (ret > 0) |
1428 | goto backtrace; | 1463 | goto backtrace; |
1429 | goto found; | 1464 | goto found; |
@@ -1541,7 +1576,7 @@ backtrace: | |||
1541 | if (chopped_off <= pn->bits) { | 1576 | if (chopped_off <= pn->bits) { |
1542 | cindex &= ~(1 << (chopped_off-1)); | 1577 | cindex &= ~(1 << (chopped_off-1)); |
1543 | } else { | 1578 | } else { |
1544 | struct tnode *parent = node_parent_rcu((struct node *) pn); | 1579 | struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); |
1545 | if (!parent) | 1580 | if (!parent) |
1546 | goto failed; | 1581 | goto failed; |
1547 | 1582 | ||
@@ -1568,7 +1603,7 @@ found: | |||
1568 | */ | 1603 | */ |
1569 | static void trie_leaf_remove(struct trie *t, struct leaf *l) | 1604 | static void trie_leaf_remove(struct trie *t, struct leaf *l) |
1570 | { | 1605 | { |
1571 | struct tnode *tp = node_parent((struct node *) l); | 1606 | struct tnode *tp = node_parent((struct rt_trie_node *) l); |
1572 | 1607 | ||
1573 | pr_debug("entering trie_leaf_remove(%p)\n", l); | 1608 | pr_debug("entering trie_leaf_remove(%p)\n", l); |
1574 | 1609 | ||
@@ -1706,7 +1741,7 @@ static int trie_flush_leaf(struct leaf *l) | |||
1706 | * Scan for the next right leaf starting at node p->child[idx] | 1741 | * Scan for the next right leaf starting at node p->child[idx] |
1707 | * Since we have back pointer, no recursion necessary. | 1742 | * Since we have back pointer, no recursion necessary. |
1708 | */ | 1743 | */ |
1709 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | 1744 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) |
1710 | { | 1745 | { |
1711 | do { | 1746 | do { |
1712 | t_key idx; | 1747 | t_key idx; |
@@ -1732,7 +1767,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
1732 | } | 1767 | } |
1733 | 1768 | ||
1734 | /* Node empty, walk back up to parent */ | 1769 | /* Node empty, walk back up to parent */ |
1735 | c = (struct node *) p; | 1770 | c = (struct rt_trie_node *) p; |
1736 | } while ((p = node_parent_rcu(c)) != NULL); | 1771 | } while ((p = node_parent_rcu(c)) != NULL); |
1737 | 1772 | ||
1738 | return NULL; /* Root of trie */ | 1773 | return NULL; /* Root of trie */ |
@@ -1753,7 +1788,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
1753 | 1788 | ||
1754 | static struct leaf *trie_nextleaf(struct leaf *l) | 1789 | static struct leaf *trie_nextleaf(struct leaf *l) |
1755 | { | 1790 | { |
1756 | struct node *c = (struct node *) l; | 1791 | struct rt_trie_node *c = (struct rt_trie_node *) l; |
1757 | struct tnode *p = node_parent_rcu(c); | 1792 | struct tnode *p = node_parent_rcu(c); |
1758 | 1793 | ||
1759 | if (!p) | 1794 | if (!p) |
@@ -1802,80 +1837,6 @@ void fib_free_table(struct fib_table *tb) | |||
1802 | kfree(tb); | 1837 | kfree(tb); |
1803 | } | 1838 | } |
1804 | 1839 | ||
1805 | void fib_table_select_default(struct fib_table *tb, | ||
1806 | const struct flowi *flp, | ||
1807 | struct fib_result *res) | ||
1808 | { | ||
1809 | struct trie *t = (struct trie *) tb->tb_data; | ||
1810 | int order, last_idx; | ||
1811 | struct fib_info *fi = NULL; | ||
1812 | struct fib_info *last_resort; | ||
1813 | struct fib_alias *fa = NULL; | ||
1814 | struct list_head *fa_head; | ||
1815 | struct leaf *l; | ||
1816 | |||
1817 | last_idx = -1; | ||
1818 | last_resort = NULL; | ||
1819 | order = -1; | ||
1820 | |||
1821 | rcu_read_lock(); | ||
1822 | |||
1823 | l = fib_find_node(t, 0); | ||
1824 | if (!l) | ||
1825 | goto out; | ||
1826 | |||
1827 | fa_head = get_fa_head(l, 0); | ||
1828 | if (!fa_head) | ||
1829 | goto out; | ||
1830 | |||
1831 | if (list_empty(fa_head)) | ||
1832 | goto out; | ||
1833 | |||
1834 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
1835 | struct fib_info *next_fi = fa->fa_info; | ||
1836 | |||
1837 | if (fa->fa_scope != res->scope || | ||
1838 | fa->fa_type != RTN_UNICAST) | ||
1839 | continue; | ||
1840 | |||
1841 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
1842 | break; | ||
1843 | if (!next_fi->fib_nh[0].nh_gw || | ||
1844 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
1845 | continue; | ||
1846 | |||
1847 | fib_alias_accessed(fa); | ||
1848 | |||
1849 | if (fi == NULL) { | ||
1850 | if (next_fi != res->fi) | ||
1851 | break; | ||
1852 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
1853 | &last_idx, tb->tb_default)) { | ||
1854 | fib_result_assign(res, fi); | ||
1855 | tb->tb_default = order; | ||
1856 | goto out; | ||
1857 | } | ||
1858 | fi = next_fi; | ||
1859 | order++; | ||
1860 | } | ||
1861 | if (order <= 0 || fi == NULL) { | ||
1862 | tb->tb_default = -1; | ||
1863 | goto out; | ||
1864 | } | ||
1865 | |||
1866 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
1867 | tb->tb_default)) { | ||
1868 | fib_result_assign(res, fi); | ||
1869 | tb->tb_default = order; | ||
1870 | goto out; | ||
1871 | } | ||
1872 | if (last_idx >= 0) | ||
1873 | fib_result_assign(res, last_resort); | ||
1874 | tb->tb_default = last_idx; | ||
1875 | out: | ||
1876 | rcu_read_unlock(); | ||
1877 | } | ||
1878 | |||
1879 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | 1840 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, |
1880 | struct fib_table *tb, | 1841 | struct fib_table *tb, |
1881 | struct sk_buff *skb, struct netlink_callback *cb) | 1842 | struct sk_buff *skb, struct netlink_callback *cb) |
@@ -1990,7 +1951,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | |||
1990 | return skb->len; | 1951 | return skb->len; |
1991 | } | 1952 | } |
1992 | 1953 | ||
1993 | void __init fib_hash_init(void) | 1954 | void __init fib_trie_init(void) |
1994 | { | 1955 | { |
1995 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", | 1956 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", |
1996 | sizeof(struct fib_alias), | 1957 | sizeof(struct fib_alias), |
@@ -2003,8 +1964,7 @@ void __init fib_hash_init(void) | |||
2003 | } | 1964 | } |
2004 | 1965 | ||
2005 | 1966 | ||
2006 | /* Fix more generic FIB names for init later */ | 1967 | struct fib_table *fib_trie_table(u32 id) |
2007 | struct fib_table *fib_hash_table(u32 id) | ||
2008 | { | 1968 | { |
2009 | struct fib_table *tb; | 1969 | struct fib_table *tb; |
2010 | struct trie *t; | 1970 | struct trie *t; |
@@ -2036,7 +1996,7 @@ struct fib_trie_iter { | |||
2036 | unsigned int depth; | 1996 | unsigned int depth; |
2037 | }; | 1997 | }; |
2038 | 1998 | ||
2039 | static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | 1999 | static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) |
2040 | { | 2000 | { |
2041 | struct tnode *tn = iter->tnode; | 2001 | struct tnode *tn = iter->tnode; |
2042 | unsigned int cindex = iter->index; | 2002 | unsigned int cindex = iter->index; |
@@ -2050,7 +2010,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | |||
2050 | iter->tnode, iter->index, iter->depth); | 2010 | iter->tnode, iter->index, iter->depth); |
2051 | rescan: | 2011 | rescan: |
2052 | while (cindex < (1<<tn->bits)) { | 2012 | while (cindex < (1<<tn->bits)) { |
2053 | struct node *n = tnode_get_child_rcu(tn, cindex); | 2013 | struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); |
2054 | 2014 | ||
2055 | if (n) { | 2015 | if (n) { |
2056 | if (IS_LEAF(n)) { | 2016 | if (IS_LEAF(n)) { |
@@ -2069,7 +2029,7 @@ rescan: | |||
2069 | } | 2029 | } |
2070 | 2030 | ||
2071 | /* Current node exhausted, pop back up */ | 2031 | /* Current node exhausted, pop back up */ |
2072 | p = node_parent_rcu((struct node *)tn); | 2032 | p = node_parent_rcu((struct rt_trie_node *)tn); |
2073 | if (p) { | 2033 | if (p) { |
2074 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; | 2034 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; |
2075 | tn = p; | 2035 | tn = p; |
@@ -2081,10 +2041,10 @@ rescan: | |||
2081 | return NULL; | 2041 | return NULL; |
2082 | } | 2042 | } |
2083 | 2043 | ||
2084 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | 2044 | static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, |
2085 | struct trie *t) | 2045 | struct trie *t) |
2086 | { | 2046 | { |
2087 | struct node *n; | 2047 | struct rt_trie_node *n; |
2088 | 2048 | ||
2089 | if (!t) | 2049 | if (!t) |
2090 | return NULL; | 2050 | return NULL; |
@@ -2108,7 +2068,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
2108 | 2068 | ||
2109 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) | 2069 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) |
2110 | { | 2070 | { |
2111 | struct node *n; | 2071 | struct rt_trie_node *n; |
2112 | struct fib_trie_iter iter; | 2072 | struct fib_trie_iter iter; |
2113 | 2073 | ||
2114 | memset(s, 0, sizeof(*s)); | 2074 | memset(s, 0, sizeof(*s)); |
@@ -2181,7 +2141,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) | |||
2181 | seq_putc(seq, '\n'); | 2141 | seq_putc(seq, '\n'); |
2182 | seq_printf(seq, "\tPointers: %u\n", pointers); | 2142 | seq_printf(seq, "\tPointers: %u\n", pointers); |
2183 | 2143 | ||
2184 | bytes += sizeof(struct node *) * pointers; | 2144 | bytes += sizeof(struct rt_trie_node *) * pointers; |
2185 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); | 2145 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); |
2186 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); | 2146 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); |
2187 | } | 2147 | } |
@@ -2262,7 +2222,7 @@ static const struct file_operations fib_triestat_fops = { | |||
2262 | .release = single_release_net, | 2222 | .release = single_release_net, |
2263 | }; | 2223 | }; |
2264 | 2224 | ||
2265 | static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | 2225 | static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) |
2266 | { | 2226 | { |
2267 | struct fib_trie_iter *iter = seq->private; | 2227 | struct fib_trie_iter *iter = seq->private; |
2268 | struct net *net = seq_file_net(seq); | 2228 | struct net *net = seq_file_net(seq); |
@@ -2275,7 +2235,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | |||
2275 | struct fib_table *tb; | 2235 | struct fib_table *tb; |
2276 | 2236 | ||
2277 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { | 2237 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { |
2278 | struct node *n; | 2238 | struct rt_trie_node *n; |
2279 | 2239 | ||
2280 | for (n = fib_trie_get_first(iter, | 2240 | for (n = fib_trie_get_first(iter, |
2281 | (struct trie *) tb->tb_data); | 2241 | (struct trie *) tb->tb_data); |
@@ -2304,7 +2264,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2304 | struct fib_table *tb = iter->tb; | 2264 | struct fib_table *tb = iter->tb; |
2305 | struct hlist_node *tb_node; | 2265 | struct hlist_node *tb_node; |
2306 | unsigned int h; | 2266 | unsigned int h; |
2307 | struct node *n; | 2267 | struct rt_trie_node *n; |
2308 | 2268 | ||
2309 | ++*pos; | 2269 | ++*pos; |
2310 | /* next node in same table */ | 2270 | /* next node in same table */ |
@@ -2390,7 +2350,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) | |||
2390 | static int fib_trie_seq_show(struct seq_file *seq, void *v) | 2350 | static int fib_trie_seq_show(struct seq_file *seq, void *v) |
2391 | { | 2351 | { |
2392 | const struct fib_trie_iter *iter = seq->private; | 2352 | const struct fib_trie_iter *iter = seq->private; |
2393 | struct node *n = v; | 2353 | struct rt_trie_node *n = v; |
2394 | 2354 | ||
2395 | if (!node_parent_rcu(n)) | 2355 | if (!node_parent_rcu(n)) |
2396 | fib_table_print(seq, iter->tb); | 2356 | fib_table_print(seq, iter->tb); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..a91dc1611081 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) | |||
233 | * Send an ICMP frame. | 233 | * Send an ICMP frame. |
234 | */ | 234 | */ |
235 | 235 | ||
236 | /* | 236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
237 | * Check transmit rate limitation for given message. | ||
238 | * The rate information is held in the destination cache now. | ||
239 | * This function is generic and could be used for other purposes | ||
240 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
241 | * | ||
242 | * Note that the same dst_entry fields are modified by functions in | ||
243 | * route.c too, but these work for packet destinations while xrlim_allow | ||
244 | * works for icmp destinations. This means the rate limiting information | ||
245 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
246 | * by source and by destination. | ||
247 | * | ||
248 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
249 | * SHOULD allow setting of rate limits | ||
250 | * | ||
251 | * Shared between ICMPv4 and ICMPv6. | ||
252 | */ | ||
253 | #define XRLIM_BURST_FACTOR 6 | ||
254 | int xrlim_allow(struct dst_entry *dst, int timeout) | ||
255 | { | ||
256 | unsigned long now, token = dst->rate_tokens; | ||
257 | int rc = 0; | ||
258 | |||
259 | now = jiffies; | ||
260 | token += now - dst->rate_last; | ||
261 | dst->rate_last = now; | ||
262 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
263 | token = XRLIM_BURST_FACTOR * timeout; | ||
264 | if (token >= timeout) { | ||
265 | token -= timeout; | ||
266 | rc = 1; | ||
267 | } | ||
268 | dst->rate_tokens = token; | ||
269 | return rc; | ||
270 | } | ||
271 | EXPORT_SYMBOL(xrlim_allow); | ||
272 | |||
273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | ||
274 | int type, int code) | 237 | int type, int code) |
275 | { | 238 | { |
276 | struct dst_entry *dst = &rt->dst; | 239 | struct dst_entry *dst = &rt->dst; |
277 | int rc = 1; | 240 | bool rc = true; |
278 | 241 | ||
279 | if (type > NR_ICMP_TYPES) | 242 | if (type > NR_ICMP_TYPES) |
280 | goto out; | 243 | goto out; |
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
288 | goto out; | 251 | goto out; |
289 | 252 | ||
290 | /* Limit if icmp type is enabled in ratemask. */ | 253 | /* Limit if icmp type is enabled in ratemask. */ |
291 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) | 254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
292 | rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); | 255 | if (!rt->peer) |
256 | rt_bind_peer(rt, 1); | ||
257 | rc = inet_peer_xrlim_allow(rt->peer, | ||
258 | net->ipv4.sysctl_icmp_ratelimit); | ||
259 | } | ||
293 | out: | 260 | out: |
294 | return rc; | 261 | return rc; |
295 | } | 262 | } |
@@ -386,12 +353,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
386 | daddr = icmp_param->replyopts.faddr; | 353 | daddr = icmp_param->replyopts.faddr; |
387 | } | 354 | } |
388 | { | 355 | { |
389 | struct flowi fl = { .fl4_dst= daddr, | 356 | struct flowi4 fl4 = { |
390 | .fl4_src = rt->rt_spec_dst, | 357 | .daddr = daddr, |
391 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), | 358 | .saddr = rt->rt_spec_dst, |
392 | .proto = IPPROTO_ICMP }; | 359 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
393 | security_skb_classify_flow(skb, &fl); | 360 | .flowi4_proto = IPPROTO_ICMP, |
394 | if (ip_route_output_key(net, &rt, &fl)) | 361 | }; |
362 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
363 | rt = ip_route_output_key(net, &fl4); | ||
364 | if (IS_ERR(rt)) | ||
395 | goto out_unlock; | 365 | goto out_unlock; |
396 | } | 366 | } |
397 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, | 367 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, |
@@ -402,6 +372,97 @@ out_unlock: | |||
402 | icmp_xmit_unlock(sk); | 372 | icmp_xmit_unlock(sk); |
403 | } | 373 | } |
404 | 374 | ||
375 | static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | ||
376 | struct iphdr *iph, | ||
377 | __be32 saddr, u8 tos, | ||
378 | int type, int code, | ||
379 | struct icmp_bxm *param) | ||
380 | { | ||
381 | struct flowi4 fl4 = { | ||
382 | .daddr = (param->replyopts.srr ? | ||
383 | param->replyopts.faddr : iph->saddr), | ||
384 | .saddr = saddr, | ||
385 | .flowi4_tos = RT_TOS(tos), | ||
386 | .flowi4_proto = IPPROTO_ICMP, | ||
387 | .fl4_icmp_type = type, | ||
388 | .fl4_icmp_code = code, | ||
389 | }; | ||
390 | struct rtable *rt, *rt2; | ||
391 | int err; | ||
392 | |||
393 | security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); | ||
394 | rt = __ip_route_output_key(net, &fl4); | ||
395 | if (IS_ERR(rt)) | ||
396 | return rt; | ||
397 | |||
398 | /* No need to clone since we're just using its address. */ | ||
399 | rt2 = rt; | ||
400 | |||
401 | if (!fl4.saddr) | ||
402 | fl4.saddr = rt->rt_src; | ||
403 | |||
404 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | ||
405 | flowi4_to_flowi(&fl4), NULL, 0); | ||
406 | if (!IS_ERR(rt)) { | ||
407 | if (rt != rt2) | ||
408 | return rt; | ||
409 | } else if (PTR_ERR(rt) == -EPERM) { | ||
410 | rt = NULL; | ||
411 | } else | ||
412 | return rt; | ||
413 | |||
414 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); | ||
415 | if (err) | ||
416 | goto relookup_failed; | ||
417 | |||
418 | if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { | ||
419 | rt2 = __ip_route_output_key(net, &fl4); | ||
420 | if (IS_ERR(rt2)) | ||
421 | err = PTR_ERR(rt2); | ||
422 | } else { | ||
423 | struct flowi4 fl4_2 = {}; | ||
424 | unsigned long orefdst; | ||
425 | |||
426 | fl4_2.daddr = fl4.saddr; | ||
427 | rt2 = ip_route_output_key(net, &fl4_2); | ||
428 | if (IS_ERR(rt2)) { | ||
429 | err = PTR_ERR(rt2); | ||
430 | goto relookup_failed; | ||
431 | } | ||
432 | /* Ugh! */ | ||
433 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | ||
434 | err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, | ||
435 | RT_TOS(tos), rt2->dst.dev); | ||
436 | |||
437 | dst_release(&rt2->dst); | ||
438 | rt2 = skb_rtable(skb_in); | ||
439 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ | ||
440 | } | ||
441 | |||
442 | if (err) | ||
443 | goto relookup_failed; | ||
444 | |||
445 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, | ||
446 | flowi4_to_flowi(&fl4), NULL, | ||
447 | XFRM_LOOKUP_ICMP); | ||
448 | if (!IS_ERR(rt2)) { | ||
449 | dst_release(&rt->dst); | ||
450 | rt = rt2; | ||
451 | } else if (PTR_ERR(rt2) == -EPERM) { | ||
452 | if (rt) | ||
453 | dst_release(&rt->dst); | ||
454 | return rt2; | ||
455 | } else { | ||
456 | err = PTR_ERR(rt2); | ||
457 | goto relookup_failed; | ||
458 | } | ||
459 | return rt; | ||
460 | |||
461 | relookup_failed: | ||
462 | if (rt) | ||
463 | return rt; | ||
464 | return ERR_PTR(err); | ||
465 | } | ||
405 | 466 | ||
406 | /* | 467 | /* |
407 | * Send an ICMP message in response to a situation | 468 | * Send an ICMP message in response to a situation |
@@ -507,7 +568,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
507 | rcu_read_lock(); | 568 | rcu_read_lock(); |
508 | if (rt_is_input_route(rt) && | 569 | if (rt_is_input_route(rt) && |
509 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) | 570 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) |
510 | dev = dev_get_by_index_rcu(net, rt->fl.iif); | 571 | dev = dev_get_by_index_rcu(net, rt->rt_iif); |
511 | 572 | ||
512 | if (dev) | 573 | if (dev) |
513 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 574 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
@@ -539,86 +600,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
539 | ipc.opt = &icmp_param.replyopts; | 600 | ipc.opt = &icmp_param.replyopts; |
540 | ipc.tx_flags = 0; | 601 | ipc.tx_flags = 0; |
541 | 602 | ||
542 | { | 603 | rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, |
543 | struct flowi fl = { | 604 | type, code, &icmp_param); |
544 | .fl4_dst = icmp_param.replyopts.srr ? | 605 | if (IS_ERR(rt)) |
545 | icmp_param.replyopts.faddr : iph->saddr, | 606 | goto out_unlock; |
546 | .fl4_src = saddr, | ||
547 | .fl4_tos = RT_TOS(tos), | ||
548 | .proto = IPPROTO_ICMP, | ||
549 | .fl_icmp_type = type, | ||
550 | .fl_icmp_code = code, | ||
551 | }; | ||
552 | int err; | ||
553 | struct rtable *rt2; | ||
554 | |||
555 | security_skb_classify_flow(skb_in, &fl); | ||
556 | if (__ip_route_output_key(net, &rt, &fl)) | ||
557 | goto out_unlock; | ||
558 | |||
559 | /* No need to clone since we're just using its address. */ | ||
560 | rt2 = rt; | ||
561 | |||
562 | if (!fl.nl_u.ip4_u.saddr) | ||
563 | fl.nl_u.ip4_u.saddr = rt->rt_src; | ||
564 | |||
565 | err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); | ||
566 | switch (err) { | ||
567 | case 0: | ||
568 | if (rt != rt2) | ||
569 | goto route_done; | ||
570 | break; | ||
571 | case -EPERM: | ||
572 | rt = NULL; | ||
573 | break; | ||
574 | default: | ||
575 | goto out_unlock; | ||
576 | } | ||
577 | |||
578 | if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) | ||
579 | goto relookup_failed; | ||
580 | |||
581 | if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) | ||
582 | err = __ip_route_output_key(net, &rt2, &fl); | ||
583 | else { | ||
584 | struct flowi fl2 = {}; | ||
585 | unsigned long orefdst; | ||
586 | |||
587 | fl2.fl4_dst = fl.fl4_src; | ||
588 | if (ip_route_output_key(net, &rt2, &fl2)) | ||
589 | goto relookup_failed; | ||
590 | |||
591 | /* Ugh! */ | ||
592 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | ||
593 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, | ||
594 | RT_TOS(tos), rt2->dst.dev); | ||
595 | |||
596 | dst_release(&rt2->dst); | ||
597 | rt2 = skb_rtable(skb_in); | ||
598 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ | ||
599 | } | ||
600 | |||
601 | if (err) | ||
602 | goto relookup_failed; | ||
603 | |||
604 | err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, | ||
605 | XFRM_LOOKUP_ICMP); | ||
606 | switch (err) { | ||
607 | case 0: | ||
608 | dst_release(&rt->dst); | ||
609 | rt = rt2; | ||
610 | break; | ||
611 | case -EPERM: | ||
612 | goto ende; | ||
613 | default: | ||
614 | relookup_failed: | ||
615 | if (!rt) | ||
616 | goto out_unlock; | ||
617 | break; | ||
618 | } | ||
619 | } | ||
620 | 607 | ||
621 | route_done: | ||
622 | if (!icmpv4_xrlim_allow(net, rt, type, code)) | 608 | if (!icmpv4_xrlim_allow(net, rt, type, code)) |
623 | goto ende; | 609 | goto ende; |
624 | 610 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e0e77e297de3..1fd3d9ce8398 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -321,14 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
321 | } | 321 | } |
322 | igmp_skb_size(skb) = size; | 322 | igmp_skb_size(skb) = size; |
323 | 323 | ||
324 | { | 324 | rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, |
325 | struct flowi fl = { .oif = dev->ifindex, | 325 | 0, 0, |
326 | .fl4_dst = IGMPV3_ALL_MCR, | 326 | IPPROTO_IGMP, 0, dev->ifindex); |
327 | .proto = IPPROTO_IGMP }; | 327 | if (IS_ERR(rt)) { |
328 | if (ip_route_output_key(net, &rt, &fl)) { | 328 | kfree_skb(skb); |
329 | kfree_skb(skb); | 329 | return NULL; |
330 | return NULL; | ||
331 | } | ||
332 | } | 330 | } |
333 | if (rt->rt_src == 0) { | 331 | if (rt->rt_src == 0) { |
334 | kfree_skb(skb); | 332 | kfree_skb(skb); |
@@ -666,13 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
666 | else | 664 | else |
667 | dst = group; | 665 | dst = group; |
668 | 666 | ||
669 | { | 667 | rt = ip_route_output_ports(net, NULL, dst, 0, |
670 | struct flowi fl = { .oif = dev->ifindex, | 668 | 0, 0, |
671 | .fl4_dst = dst, | 669 | IPPROTO_IGMP, 0, dev->ifindex); |
672 | .proto = IPPROTO_IGMP }; | 670 | if (IS_ERR(rt)) |
673 | if (ip_route_output_key(net, &rt, &fl)) | 671 | return -1; |
674 | return -1; | 672 | |
675 | } | ||
676 | if (rt->rt_src == 0) { | 673 | if (rt->rt_src == 0) { |
677 | ip_rt_put(rt); | 674 | ip_rt_put(rt); |
678 | return -1; | 675 | return -1; |
@@ -1439,8 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) | |||
1439 | /* RTNL is locked */ | 1436 | /* RTNL is locked */ |
1440 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | 1437 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) |
1441 | { | 1438 | { |
1442 | struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; | ||
1443 | struct rtable *rt; | ||
1444 | struct net_device *dev = NULL; | 1439 | struct net_device *dev = NULL; |
1445 | struct in_device *idev = NULL; | 1440 | struct in_device *idev = NULL; |
1446 | 1441 | ||
@@ -1454,9 +1449,14 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | |||
1454 | return NULL; | 1449 | return NULL; |
1455 | } | 1450 | } |
1456 | 1451 | ||
1457 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { | 1452 | if (!dev) { |
1458 | dev = rt->dst.dev; | 1453 | struct rtable *rt = ip_route_output(net, |
1459 | ip_rt_put(rt); | 1454 | imr->imr_multiaddr.s_addr, |
1455 | 0, 0, 0); | ||
1456 | if (!IS_ERR(rt)) { | ||
1457 | dev = rt->dst.dev; | ||
1458 | ip_rt_put(rt); | ||
1459 | } | ||
1460 | } | 1460 | } |
1461 | if (dev) { | 1461 | if (dev) { |
1462 | imr->imr_ifindex = dev->ifindex; | 1462 | imr->imr_ifindex = dev->ifindex; |
@@ -2329,13 +2329,13 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2329 | rtnl_unlock(); | 2329 | rtnl_unlock(); |
2330 | } | 2330 | } |
2331 | 2331 | ||
2332 | int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | 2332 | /* called with rcu_read_lock() */ |
2333 | int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | ||
2333 | { | 2334 | { |
2334 | struct ip_mc_list *im; | 2335 | struct ip_mc_list *im; |
2335 | struct ip_sf_list *psf; | 2336 | struct ip_sf_list *psf; |
2336 | int rv = 0; | 2337 | int rv = 0; |
2337 | 2338 | ||
2338 | rcu_read_lock(); | ||
2339 | for_each_pmc_rcu(in_dev, im) { | 2339 | for_each_pmc_rcu(in_dev, im) { |
2340 | if (im->multiaddr == mc_addr) | 2340 | if (im->multiaddr == mc_addr) |
2341 | break; | 2341 | break; |
@@ -2357,7 +2357,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p | |||
2357 | } else | 2357 | } else |
2358 | rv = 1; /* unspecified source; tentatively allow */ | 2358 | rv = 1; /* unspecified source; tentatively allow */ |
2359 | } | 2359 | } |
2360 | rcu_read_unlock(); | ||
2361 | return rv; | 2360 | return rv; |
2362 | } | 2361 | } |
2363 | 2362 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 97e5fb765265..6c0b7f4a3d7d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -356,20 +356,23 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
356 | struct rtable *rt; | 356 | struct rtable *rt; |
357 | const struct inet_request_sock *ireq = inet_rsk(req); | 357 | const struct inet_request_sock *ireq = inet_rsk(req); |
358 | struct ip_options *opt = inet_rsk(req)->opt; | 358 | struct ip_options *opt = inet_rsk(req)->opt; |
359 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 359 | struct flowi4 fl4 = { |
360 | .mark = sk->sk_mark, | 360 | .flowi4_oif = sk->sk_bound_dev_if, |
361 | .fl4_dst = ((opt && opt->srr) ? | 361 | .flowi4_mark = sk->sk_mark, |
362 | opt->faddr : ireq->rmt_addr), | 362 | .daddr = ((opt && opt->srr) ? |
363 | .fl4_src = ireq->loc_addr, | 363 | opt->faddr : ireq->rmt_addr), |
364 | .fl4_tos = RT_CONN_FLAGS(sk), | 364 | .saddr = ireq->loc_addr, |
365 | .proto = sk->sk_protocol, | 365 | .flowi4_tos = RT_CONN_FLAGS(sk), |
366 | .flags = inet_sk_flowi_flags(sk), | 366 | .flowi4_proto = sk->sk_protocol, |
367 | .fl_ip_sport = inet_sk(sk)->inet_sport, | 367 | .flowi4_flags = inet_sk_flowi_flags(sk), |
368 | .fl_ip_dport = ireq->rmt_port }; | 368 | .fl4_sport = inet_sk(sk)->inet_sport, |
369 | .fl4_dport = ireq->rmt_port, | ||
370 | }; | ||
369 | struct net *net = sock_net(sk); | 371 | struct net *net = sock_net(sk); |
370 | 372 | ||
371 | security_req_classify_flow(req, &fl); | 373 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
372 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) | 374 | rt = ip_route_output_flow(net, &fl4, sk); |
375 | if (IS_ERR(rt)) | ||
373 | goto no_route; | 376 | goto no_route; |
374 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 377 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
375 | goto route_err; | 378 | goto route_err; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a96e65674ac3..dd1b20eca1a2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = { | |||
81 | 81 | ||
82 | struct inet_peer_base { | 82 | struct inet_peer_base { |
83 | struct inet_peer __rcu *root; | 83 | struct inet_peer __rcu *root; |
84 | spinlock_t lock; | 84 | seqlock_t lock; |
85 | int total; | 85 | int total; |
86 | }; | 86 | }; |
87 | 87 | ||
88 | static struct inet_peer_base v4_peers = { | 88 | static struct inet_peer_base v4_peers = { |
89 | .root = peer_avl_empty_rcu, | 89 | .root = peer_avl_empty_rcu, |
90 | .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), | 90 | .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), |
91 | .total = 0, | 91 | .total = 0, |
92 | }; | 92 | }; |
93 | 93 | ||
94 | static struct inet_peer_base v6_peers = { | 94 | static struct inet_peer_base v6_peers = { |
95 | .root = peer_avl_empty_rcu, | 95 | .root = peer_avl_empty_rcu, |
96 | .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), | 96 | .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), |
97 | .total = 0, | 97 | .total = 0, |
98 | }; | 98 | }; |
99 | 99 | ||
@@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
167 | int i, n = (a->family == AF_INET ? 1 : 4); | 167 | int i, n = (a->family == AF_INET ? 1 : 4); |
168 | 168 | ||
169 | for (i = 0; i < n; i++) { | 169 | for (i = 0; i < n; i++) { |
170 | if (a->a6[i] == b->a6[i]) | 170 | if (a->addr.a6[i] == b->addr.a6[i]) |
171 | continue; | 171 | continue; |
172 | if (a->a6[i] < b->a6[i]) | 172 | if (a->addr.a6[i] < b->addr.a6[i]) |
173 | return -1; | 173 | return -1; |
174 | return 1; | 174 | return 1; |
175 | } | 175 | } |
@@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
177 | return 0; | 177 | return 0; |
178 | } | 178 | } |
179 | 179 | ||
180 | #define rcu_deref_locked(X, BASE) \ | ||
181 | rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) | ||
182 | |||
180 | /* | 183 | /* |
181 | * Called with local BH disabled and the pool lock held. | 184 | * Called with local BH disabled and the pool lock held. |
182 | */ | 185 | */ |
@@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
187 | \ | 190 | \ |
188 | stackptr = _stack; \ | 191 | stackptr = _stack; \ |
189 | *stackptr++ = &_base->root; \ | 192 | *stackptr++ = &_base->root; \ |
190 | for (u = rcu_dereference_protected(_base->root, \ | 193 | for (u = rcu_deref_locked(_base->root, _base); \ |
191 | lockdep_is_held(&_base->lock)); \ | ||
192 | u != peer_avl_empty; ) { \ | 194 | u != peer_avl_empty; ) { \ |
193 | int cmp = addr_compare(_daddr, &u->daddr); \ | 195 | int cmp = addr_compare(_daddr, &u->daddr); \ |
194 | if (cmp == 0) \ | 196 | if (cmp == 0) \ |
@@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
198 | else \ | 200 | else \ |
199 | v = &u->avl_right; \ | 201 | v = &u->avl_right; \ |
200 | *stackptr++ = v; \ | 202 | *stackptr++ = v; \ |
201 | u = rcu_dereference_protected(*v, \ | 203 | u = rcu_deref_locked(*v, _base); \ |
202 | lockdep_is_held(&_base->lock)); \ | ||
203 | } \ | 204 | } \ |
204 | u; \ | 205 | u; \ |
205 | }) | 206 | }) |
206 | 207 | ||
207 | /* | 208 | /* |
208 | * Called with rcu_read_lock_bh() | 209 | * Called with rcu_read_lock() |
209 | * Because we hold no lock against a writer, its quite possible we fall | 210 | * Because we hold no lock against a writer, its quite possible we fall |
210 | * in an endless loop. | 211 | * in an endless loop. |
211 | * But every pointer we follow is guaranteed to be valid thanks to RCU. | 212 | * But every pointer we follow is guaranteed to be valid thanks to RCU. |
212 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 213 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
213 | */ | 214 | */ |
214 | static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | 215 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, |
215 | struct inet_peer_base *base) | 216 | struct inet_peer_base *base) |
216 | { | 217 | { |
217 | struct inet_peer *u = rcu_dereference_bh(base->root); | 218 | struct inet_peer *u = rcu_dereference(base->root); |
218 | int count = 0; | 219 | int count = 0; |
219 | 220 | ||
220 | while (u != peer_avl_empty) { | 221 | while (u != peer_avl_empty) { |
@@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | |||
230 | return u; | 231 | return u; |
231 | } | 232 | } |
232 | if (cmp == -1) | 233 | if (cmp == -1) |
233 | u = rcu_dereference_bh(u->avl_left); | 234 | u = rcu_dereference(u->avl_left); |
234 | else | 235 | else |
235 | u = rcu_dereference_bh(u->avl_right); | 236 | u = rcu_dereference(u->avl_right); |
236 | if (unlikely(++count == PEER_MAXDEPTH)) | 237 | if (unlikely(++count == PEER_MAXDEPTH)) |
237 | break; | 238 | break; |
238 | } | 239 | } |
@@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | |||
246 | struct inet_peer __rcu **v; \ | 247 | struct inet_peer __rcu **v; \ |
247 | *stackptr++ = &start->avl_left; \ | 248 | *stackptr++ = &start->avl_left; \ |
248 | v = &start->avl_left; \ | 249 | v = &start->avl_left; \ |
249 | for (u = rcu_dereference_protected(*v, \ | 250 | for (u = rcu_deref_locked(*v, base); \ |
250 | lockdep_is_held(&base->lock)); \ | ||
251 | u->avl_right != peer_avl_empty_rcu; ) { \ | 251 | u->avl_right != peer_avl_empty_rcu; ) { \ |
252 | v = &u->avl_right; \ | 252 | v = &u->avl_right; \ |
253 | *stackptr++ = v; \ | 253 | *stackptr++ = v; \ |
254 | u = rcu_dereference_protected(*v, \ | 254 | u = rcu_deref_locked(*v, base); \ |
255 | lockdep_is_held(&base->lock)); \ | ||
256 | } \ | 255 | } \ |
257 | u; \ | 256 | u; \ |
258 | }) | 257 | }) |
@@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
271 | 270 | ||
272 | while (stackend > stack) { | 271 | while (stackend > stack) { |
273 | nodep = *--stackend; | 272 | nodep = *--stackend; |
274 | node = rcu_dereference_protected(*nodep, | 273 | node = rcu_deref_locked(*nodep, base); |
275 | lockdep_is_held(&base->lock)); | 274 | l = rcu_deref_locked(node->avl_left, base); |
276 | l = rcu_dereference_protected(node->avl_left, | 275 | r = rcu_deref_locked(node->avl_right, base); |
277 | lockdep_is_held(&base->lock)); | ||
278 | r = rcu_dereference_protected(node->avl_right, | ||
279 | lockdep_is_held(&base->lock)); | ||
280 | lh = node_height(l); | 276 | lh = node_height(l); |
281 | rh = node_height(r); | 277 | rh = node_height(r); |
282 | if (lh > rh + 1) { /* l: RH+2 */ | 278 | if (lh > rh + 1) { /* l: RH+2 */ |
283 | struct inet_peer *ll, *lr, *lrl, *lrr; | 279 | struct inet_peer *ll, *lr, *lrl, *lrr; |
284 | int lrh; | 280 | int lrh; |
285 | ll = rcu_dereference_protected(l->avl_left, | 281 | ll = rcu_deref_locked(l->avl_left, base); |
286 | lockdep_is_held(&base->lock)); | 282 | lr = rcu_deref_locked(l->avl_right, base); |
287 | lr = rcu_dereference_protected(l->avl_right, | ||
288 | lockdep_is_held(&base->lock)); | ||
289 | lrh = node_height(lr); | 283 | lrh = node_height(lr); |
290 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ | 284 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ |
291 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ | 285 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ |
@@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
296 | l->avl_height = node->avl_height + 1; | 290 | l->avl_height = node->avl_height + 1; |
297 | RCU_INIT_POINTER(*nodep, l); | 291 | RCU_INIT_POINTER(*nodep, l); |
298 | } else { /* ll: RH, lr: RH+1 */ | 292 | } else { /* ll: RH, lr: RH+1 */ |
299 | lrl = rcu_dereference_protected(lr->avl_left, | 293 | lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ |
300 | lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ | 294 | lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ |
301 | lrr = rcu_dereference_protected(lr->avl_right, | ||
302 | lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ | ||
303 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ | 295 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ |
304 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | 296 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ |
305 | node->avl_height = rh + 1; /* node: RH+1 */ | 297 | node->avl_height = rh + 1; /* node: RH+1 */ |
@@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
314 | } else if (rh > lh + 1) { /* r: LH+2 */ | 306 | } else if (rh > lh + 1) { /* r: LH+2 */ |
315 | struct inet_peer *rr, *rl, *rlr, *rll; | 307 | struct inet_peer *rr, *rl, *rlr, *rll; |
316 | int rlh; | 308 | int rlh; |
317 | rr = rcu_dereference_protected(r->avl_right, | 309 | rr = rcu_deref_locked(r->avl_right, base); |
318 | lockdep_is_held(&base->lock)); | 310 | rl = rcu_deref_locked(r->avl_left, base); |
319 | rl = rcu_dereference_protected(r->avl_left, | ||
320 | lockdep_is_held(&base->lock)); | ||
321 | rlh = node_height(rl); | 311 | rlh = node_height(rl); |
322 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ | 312 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ |
323 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ | 313 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ |
@@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
328 | r->avl_height = node->avl_height + 1; | 318 | r->avl_height = node->avl_height + 1; |
329 | RCU_INIT_POINTER(*nodep, r); | 319 | RCU_INIT_POINTER(*nodep, r); |
330 | } else { /* rr: RH, rl: RH+1 */ | 320 | } else { /* rr: RH, rl: RH+1 */ |
331 | rlr = rcu_dereference_protected(rl->avl_right, | 321 | rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ |
332 | lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ | 322 | rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ |
333 | rll = rcu_dereference_protected(rl->avl_left, | ||
334 | lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ | ||
335 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ | 323 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ |
336 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | 324 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ |
337 | node->avl_height = lh + 1; /* node: LH+1 */ | 325 | node->avl_height = lh + 1; /* node: LH+1 */ |
@@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
372 | 360 | ||
373 | do_free = 0; | 361 | do_free = 0; |
374 | 362 | ||
375 | spin_lock_bh(&base->lock); | 363 | write_seqlock_bh(&base->lock); |
376 | /* Check the reference counter. It was artificially incremented by 1 | 364 | /* Check the reference counter. It was artificially incremented by 1 |
377 | * in cleanup() function to prevent sudden disappearing. If we can | 365 | * in cleanup() function to prevent sudden disappearing. If we can |
378 | * atomically (because of lockless readers) take this last reference, | 366 | * atomically (because of lockless readers) take this last reference, |
@@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
392 | /* look for a node to insert instead of p */ | 380 | /* look for a node to insert instead of p */ |
393 | struct inet_peer *t; | 381 | struct inet_peer *t; |
394 | t = lookup_rightempty(p, base); | 382 | t = lookup_rightempty(p, base); |
395 | BUG_ON(rcu_dereference_protected(*stackptr[-1], | 383 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); |
396 | lockdep_is_held(&base->lock)) != t); | ||
397 | **--stackptr = t->avl_left; | 384 | **--stackptr = t->avl_left; |
398 | /* t is removed, t->daddr > x->daddr for any | 385 | /* t is removed, t->daddr > x->daddr for any |
399 | * x in p->avl_left subtree. | 386 | * x in p->avl_left subtree. |
@@ -409,10 +396,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
409 | base->total--; | 396 | base->total--; |
410 | do_free = 1; | 397 | do_free = 1; |
411 | } | 398 | } |
412 | spin_unlock_bh(&base->lock); | 399 | write_sequnlock_bh(&base->lock); |
413 | 400 | ||
414 | if (do_free) | 401 | if (do_free) |
415 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); | 402 | call_rcu(&p->rcu, inetpeer_free_rcu); |
416 | else | 403 | else |
417 | /* The node is used again. Decrease the reference counter | 404 | /* The node is used again. Decrease the reference counter |
418 | * back. The loop "cleanup -> unlink_from_unused | 405 | * back. The loop "cleanup -> unlink_from_unused |
@@ -477,13 +464,17 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
477 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 464 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
478 | struct inet_peer_base *base = family_to_base(daddr->family); | 465 | struct inet_peer_base *base = family_to_base(daddr->family); |
479 | struct inet_peer *p; | 466 | struct inet_peer *p; |
467 | unsigned int sequence; | ||
468 | int invalidated; | ||
480 | 469 | ||
481 | /* Look up for the address quickly, lockless. | 470 | /* Look up for the address quickly, lockless. |
482 | * Because of a concurrent writer, we might not find an existing entry. | 471 | * Because of a concurrent writer, we might not find an existing entry. |
483 | */ | 472 | */ |
484 | rcu_read_lock_bh(); | 473 | rcu_read_lock(); |
485 | p = lookup_rcu_bh(daddr, base); | 474 | sequence = read_seqbegin(&base->lock); |
486 | rcu_read_unlock_bh(); | 475 | p = lookup_rcu(daddr, base); |
476 | invalidated = read_seqretry(&base->lock, sequence); | ||
477 | rcu_read_unlock(); | ||
487 | 478 | ||
488 | if (p) { | 479 | if (p) { |
489 | /* The existing node has been found. | 480 | /* The existing node has been found. |
@@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
493 | return p; | 484 | return p; |
494 | } | 485 | } |
495 | 486 | ||
487 | /* If no writer did a change during our lookup, we can return early. */ | ||
488 | if (!create && !invalidated) | ||
489 | return NULL; | ||
490 | |||
496 | /* retry an exact lookup, taking the lock before. | 491 | /* retry an exact lookup, taking the lock before. |
497 | * At least, nodes should be hot in our cache. | 492 | * At least, nodes should be hot in our cache. |
498 | */ | 493 | */ |
499 | spin_lock_bh(&base->lock); | 494 | write_seqlock_bh(&base->lock); |
500 | p = lookup(daddr, stack, base); | 495 | p = lookup(daddr, stack, base); |
501 | if (p != peer_avl_empty) { | 496 | if (p != peer_avl_empty) { |
502 | atomic_inc(&p->refcnt); | 497 | atomic_inc(&p->refcnt); |
503 | spin_unlock_bh(&base->lock); | 498 | write_sequnlock_bh(&base->lock); |
504 | /* Remove the entry from unused list if it was there. */ | 499 | /* Remove the entry from unused list if it was there. */ |
505 | unlink_from_unused(p); | 500 | unlink_from_unused(p); |
506 | return p; | 501 | return p; |
@@ -510,8 +505,14 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
510 | p->daddr = *daddr; | 505 | p->daddr = *daddr; |
511 | atomic_set(&p->refcnt, 1); | 506 | atomic_set(&p->refcnt, 1); |
512 | atomic_set(&p->rid, 0); | 507 | atomic_set(&p->rid, 0); |
513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); | 508 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); |
514 | p->tcp_ts_stamp = 0; | 509 | p->tcp_ts_stamp = 0; |
510 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | ||
511 | p->rate_tokens = 0; | ||
512 | p->rate_last = 0; | ||
513 | p->pmtu_expires = 0; | ||
514 | p->pmtu_orig = 0; | ||
515 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | ||
515 | INIT_LIST_HEAD(&p->unused); | 516 | INIT_LIST_HEAD(&p->unused); |
516 | 517 | ||
517 | 518 | ||
@@ -519,7 +520,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
519 | link_to_pool(p, base); | 520 | link_to_pool(p, base); |
520 | base->total++; | 521 | base->total++; |
521 | } | 522 | } |
522 | spin_unlock_bh(&base->lock); | 523 | write_sequnlock_bh(&base->lock); |
523 | 524 | ||
524 | if (base->total >= inet_peer_threshold) | 525 | if (base->total >= inet_peer_threshold) |
525 | /* Remove one less-recently-used entry. */ | 526 | /* Remove one less-recently-used entry. */ |
@@ -579,3 +580,44 @@ void inet_putpeer(struct inet_peer *p) | |||
579 | local_bh_enable(); | 580 | local_bh_enable(); |
580 | } | 581 | } |
581 | EXPORT_SYMBOL_GPL(inet_putpeer); | 582 | EXPORT_SYMBOL_GPL(inet_putpeer); |
583 | |||
584 | /* | ||
585 | * Check transmit rate limitation for given message. | ||
586 | * The rate information is held in the inet_peer entries now. | ||
587 | * This function is generic and could be used for other purposes | ||
588 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
589 | * | ||
590 | * Note that the same inet_peer fields are modified by functions in | ||
591 | * route.c too, but these work for packet destinations while xrlim_allow | ||
592 | * works for icmp destinations. This means the rate limiting information | ||
593 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
594 | * by source and by destination. | ||
595 | * | ||
596 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
597 | * SHOULD allow setting of rate limits | ||
598 | * | ||
599 | * Shared between ICMPv4 and ICMPv6. | ||
600 | */ | ||
601 | #define XRLIM_BURST_FACTOR 6 | ||
602 | bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) | ||
603 | { | ||
604 | unsigned long now, token; | ||
605 | bool rc = false; | ||
606 | |||
607 | if (!peer) | ||
608 | return true; | ||
609 | |||
610 | token = peer->rate_tokens; | ||
611 | now = jiffies; | ||
612 | token += now - peer->rate_last; | ||
613 | peer->rate_last = now; | ||
614 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
615 | token = XRLIM_BURST_FACTOR * timeout; | ||
616 | if (token >= timeout) { | ||
617 | token -= timeout; | ||
618 | rc = true; | ||
619 | } | ||
620 | peer->rate_tokens = token; | ||
621 | return rc; | ||
622 | } | ||
623 | EXPORT_SYMBOL(inet_peer_xrlim_allow); | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1d0e2c256fc..da5941f18c3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -769,19 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
769 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 769 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); |
770 | } | 770 | } |
771 | 771 | ||
772 | { | 772 | rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, |
773 | struct flowi fl = { | 773 | tunnel->parms.o_key, RT_TOS(tos), |
774 | .oif = tunnel->parms.link, | 774 | tunnel->parms.link); |
775 | .fl4_dst = dst, | 775 | if (IS_ERR(rt)) { |
776 | .fl4_src = tiph->saddr, | 776 | dev->stats.tx_carrier_errors++; |
777 | .fl4_tos = RT_TOS(tos), | 777 | goto tx_error; |
778 | .proto = IPPROTO_GRE, | ||
779 | .fl_gre_key = tunnel->parms.o_key | ||
780 | }; | ||
781 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
782 | dev->stats.tx_carrier_errors++; | ||
783 | goto tx_error; | ||
784 | } | ||
785 | } | 778 | } |
786 | tdev = rt->dst.dev; | 779 | tdev = rt->dst.dev; |
787 | 780 | ||
@@ -945,17 +938,13 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
945 | /* Guess output device to choose reasonable mtu and needed_headroom */ | 938 | /* Guess output device to choose reasonable mtu and needed_headroom */ |
946 | 939 | ||
947 | if (iph->daddr) { | 940 | if (iph->daddr) { |
948 | struct flowi fl = { | 941 | struct rtable *rt = ip_route_output_gre(dev_net(dev), |
949 | .oif = tunnel->parms.link, | 942 | iph->daddr, iph->saddr, |
950 | .fl4_dst = iph->daddr, | 943 | tunnel->parms.o_key, |
951 | .fl4_src = iph->saddr, | 944 | RT_TOS(iph->tos), |
952 | .fl4_tos = RT_TOS(iph->tos), | 945 | tunnel->parms.link); |
953 | .proto = IPPROTO_GRE, | 946 | |
954 | .fl_gre_key = tunnel->parms.o_key | 947 | if (!IS_ERR(rt)) { |
955 | }; | ||
956 | struct rtable *rt; | ||
957 | |||
958 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
959 | tdev = rt->dst.dev; | 948 | tdev = rt->dst.dev; |
960 | ip_rt_put(rt); | 949 | ip_rt_put(rt); |
961 | } | 950 | } |
@@ -1207,17 +1196,14 @@ static int ipgre_open(struct net_device *dev) | |||
1207 | struct ip_tunnel *t = netdev_priv(dev); | 1196 | struct ip_tunnel *t = netdev_priv(dev); |
1208 | 1197 | ||
1209 | if (ipv4_is_multicast(t->parms.iph.daddr)) { | 1198 | if (ipv4_is_multicast(t->parms.iph.daddr)) { |
1210 | struct flowi fl = { | 1199 | struct rtable *rt = ip_route_output_gre(dev_net(dev), |
1211 | .oif = t->parms.link, | 1200 | t->parms.iph.daddr, |
1212 | .fl4_dst = t->parms.iph.daddr, | 1201 | t->parms.iph.saddr, |
1213 | .fl4_src = t->parms.iph.saddr, | 1202 | t->parms.o_key, |
1214 | .fl4_tos = RT_TOS(t->parms.iph.tos), | 1203 | RT_TOS(t->parms.iph.tos), |
1215 | .proto = IPPROTO_GRE, | 1204 | t->parms.link); |
1216 | .fl_gre_key = t->parms.o_key | 1205 | |
1217 | }; | 1206 | if (IS_ERR(rt)) |
1218 | struct rtable *rt; | ||
1219 | |||
1220 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) | ||
1221 | return -EADDRNOTAVAIL; | 1207 | return -EADDRNOTAVAIL; |
1222 | dev = rt->dst.dev; | 1208 | dev = rt->dst.dev; |
1223 | ip_rt_put(rt); | 1209 | ip_rt_put(rt); |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc26cb7..d7b2b0987a3b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
340 | } | 340 | } |
341 | } | 341 | } |
342 | 342 | ||
343 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_IP_ROUTE_CLASSID |
344 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
346 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b39..67f241b97649 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -339,25 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
339 | if(opt && opt->srr) | 339 | if(opt && opt->srr) |
340 | daddr = opt->faddr; | 340 | daddr = opt->faddr; |
341 | 341 | ||
342 | { | 342 | /* If this fails, retransmit mechanism of transport layer will |
343 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 343 | * keep trying until route appears or the connection times |
344 | .mark = sk->sk_mark, | 344 | * itself out. |
345 | .fl4_dst = daddr, | 345 | */ |
346 | .fl4_src = inet->inet_saddr, | 346 | rt = ip_route_output_ports(sock_net(sk), sk, |
347 | .fl4_tos = RT_CONN_FLAGS(sk), | 347 | daddr, inet->inet_saddr, |
348 | .proto = sk->sk_protocol, | 348 | inet->inet_dport, |
349 | .flags = inet_sk_flowi_flags(sk), | 349 | inet->inet_sport, |
350 | .fl_ip_sport = inet->inet_sport, | 350 | sk->sk_protocol, |
351 | .fl_ip_dport = inet->inet_dport }; | 351 | RT_CONN_FLAGS(sk), |
352 | 352 | sk->sk_bound_dev_if); | |
353 | /* If this fails, retransmit mechanism of transport layer will | 353 | if (IS_ERR(rt)) |
354 | * keep trying until route appears or the connection times | 354 | goto no_route; |
355 | * itself out. | ||
356 | */ | ||
357 | security_sk_classify_flow(sk, &fl); | ||
358 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | ||
359 | goto no_route; | ||
360 | } | ||
361 | sk_setup_caps(sk, &rt->dst); | 355 | sk_setup_caps(sk, &rt->dst); |
362 | } | 356 | } |
363 | skb_dst_set_noref(skb, &rt->dst); | 357 | skb_dst_set_noref(skb, &rt->dst); |
@@ -733,6 +727,7 @@ csum_page(struct page *page, int offset, int copy) | |||
733 | } | 727 | } |
734 | 728 | ||
735 | static inline int ip_ufo_append_data(struct sock *sk, | 729 | static inline int ip_ufo_append_data(struct sock *sk, |
730 | struct sk_buff_head *queue, | ||
736 | int getfrag(void *from, char *to, int offset, int len, | 731 | int getfrag(void *from, char *to, int offset, int len, |
737 | int odd, struct sk_buff *skb), | 732 | int odd, struct sk_buff *skb), |
738 | void *from, int length, int hh_len, int fragheaderlen, | 733 | void *from, int length, int hh_len, int fragheaderlen, |
@@ -745,7 +740,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
745 | * device, so create one single skb packet containing complete | 740 | * device, so create one single skb packet containing complete |
746 | * udp datagram | 741 | * udp datagram |
747 | */ | 742 | */ |
748 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 743 | if ((skb = skb_peek_tail(queue)) == NULL) { |
749 | skb = sock_alloc_send_skb(sk, | 744 | skb = sock_alloc_send_skb(sk, |
750 | hh_len + fragheaderlen + transhdrlen + 20, | 745 | hh_len + fragheaderlen + transhdrlen + 20, |
751 | (flags & MSG_DONTWAIT), &err); | 746 | (flags & MSG_DONTWAIT), &err); |
@@ -767,40 +762,28 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
767 | 762 | ||
768 | skb->ip_summed = CHECKSUM_PARTIAL; | 763 | skb->ip_summed = CHECKSUM_PARTIAL; |
769 | skb->csum = 0; | 764 | skb->csum = 0; |
770 | sk->sk_sndmsg_off = 0; | ||
771 | 765 | ||
772 | /* specify the length of each IP datagram fragment */ | 766 | /* specify the length of each IP datagram fragment */ |
773 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 767 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
774 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 768 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
775 | __skb_queue_tail(&sk->sk_write_queue, skb); | 769 | __skb_queue_tail(queue, skb); |
776 | } | 770 | } |
777 | 771 | ||
778 | return skb_append_datato_frags(sk, skb, getfrag, from, | 772 | return skb_append_datato_frags(sk, skb, getfrag, from, |
779 | (length - transhdrlen)); | 773 | (length - transhdrlen)); |
780 | } | 774 | } |
781 | 775 | ||
782 | /* | 776 | static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, |
783 | * ip_append_data() and ip_append_page() can make one large IP datagram | 777 | struct inet_cork *cork, |
784 | * from many pieces of data. Each pieces will be holded on the socket | 778 | int getfrag(void *from, char *to, int offset, |
785 | * until ip_push_pending_frames() is called. Each piece can be a page | 779 | int len, int odd, struct sk_buff *skb), |
786 | * or non-page data. | 780 | void *from, int length, int transhdrlen, |
787 | * | 781 | unsigned int flags) |
788 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
789 | * this interface potentially. | ||
790 | * | ||
791 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
792 | */ | ||
793 | int ip_append_data(struct sock *sk, | ||
794 | int getfrag(void *from, char *to, int offset, int len, | ||
795 | int odd, struct sk_buff *skb), | ||
796 | void *from, int length, int transhdrlen, | ||
797 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
798 | unsigned int flags) | ||
799 | { | 782 | { |
800 | struct inet_sock *inet = inet_sk(sk); | 783 | struct inet_sock *inet = inet_sk(sk); |
801 | struct sk_buff *skb; | 784 | struct sk_buff *skb; |
802 | 785 | ||
803 | struct ip_options *opt = NULL; | 786 | struct ip_options *opt = cork->opt; |
804 | int hh_len; | 787 | int hh_len; |
805 | int exthdrlen; | 788 | int exthdrlen; |
806 | int mtu; | 789 | int mtu; |
@@ -809,58 +792,19 @@ int ip_append_data(struct sock *sk, | |||
809 | int offset = 0; | 792 | int offset = 0; |
810 | unsigned int maxfraglen, fragheaderlen; | 793 | unsigned int maxfraglen, fragheaderlen; |
811 | int csummode = CHECKSUM_NONE; | 794 | int csummode = CHECKSUM_NONE; |
812 | struct rtable *rt; | 795 | struct rtable *rt = (struct rtable *)cork->dst; |
813 | 796 | ||
814 | if (flags&MSG_PROBE) | 797 | exthdrlen = transhdrlen ? rt->dst.header_len : 0; |
815 | return 0; | 798 | length += exthdrlen; |
816 | 799 | transhdrlen += exthdrlen; | |
817 | if (skb_queue_empty(&sk->sk_write_queue)) { | 800 | mtu = cork->fragsize; |
818 | /* | ||
819 | * setup for corking. | ||
820 | */ | ||
821 | opt = ipc->opt; | ||
822 | if (opt) { | ||
823 | if (inet->cork.opt == NULL) { | ||
824 | inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); | ||
825 | if (unlikely(inet->cork.opt == NULL)) | ||
826 | return -ENOBUFS; | ||
827 | } | ||
828 | memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); | ||
829 | inet->cork.flags |= IPCORK_OPT; | ||
830 | inet->cork.addr = ipc->addr; | ||
831 | } | ||
832 | rt = *rtp; | ||
833 | if (unlikely(!rt)) | ||
834 | return -EFAULT; | ||
835 | /* | ||
836 | * We steal reference to this route, caller should not release it | ||
837 | */ | ||
838 | *rtp = NULL; | ||
839 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
840 | rt->dst.dev->mtu : | ||
841 | dst_mtu(rt->dst.path); | ||
842 | inet->cork.dst = &rt->dst; | ||
843 | inet->cork.length = 0; | ||
844 | sk->sk_sndmsg_page = NULL; | ||
845 | sk->sk_sndmsg_off = 0; | ||
846 | exthdrlen = rt->dst.header_len; | ||
847 | length += exthdrlen; | ||
848 | transhdrlen += exthdrlen; | ||
849 | } else { | ||
850 | rt = (struct rtable *)inet->cork.dst; | ||
851 | if (inet->cork.flags & IPCORK_OPT) | ||
852 | opt = inet->cork.opt; | ||
853 | 801 | ||
854 | transhdrlen = 0; | ||
855 | exthdrlen = 0; | ||
856 | mtu = inet->cork.fragsize; | ||
857 | } | ||
858 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 802 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
859 | 803 | ||
860 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 804 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
861 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 805 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
862 | 806 | ||
863 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { | 807 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
864 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 808 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
865 | mtu-exthdrlen); | 809 | mtu-exthdrlen); |
866 | return -EMSGSIZE; | 810 | return -EMSGSIZE; |
@@ -876,15 +820,15 @@ int ip_append_data(struct sock *sk, | |||
876 | !exthdrlen) | 820 | !exthdrlen) |
877 | csummode = CHECKSUM_PARTIAL; | 821 | csummode = CHECKSUM_PARTIAL; |
878 | 822 | ||
879 | skb = skb_peek_tail(&sk->sk_write_queue); | 823 | skb = skb_peek_tail(queue); |
880 | 824 | ||
881 | inet->cork.length += length; | 825 | cork->length += length; |
882 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 826 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
883 | (sk->sk_protocol == IPPROTO_UDP) && | 827 | (sk->sk_protocol == IPPROTO_UDP) && |
884 | (rt->dst.dev->features & NETIF_F_UFO)) { | 828 | (rt->dst.dev->features & NETIF_F_UFO)) { |
885 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 829 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
886 | fragheaderlen, transhdrlen, mtu, | 830 | hh_len, fragheaderlen, transhdrlen, |
887 | flags); | 831 | mtu, flags); |
888 | if (err) | 832 | if (err) |
889 | goto error; | 833 | goto error; |
890 | return 0; | 834 | return 0; |
@@ -961,7 +905,7 @@ alloc_new_skb: | |||
961 | else | 905 | else |
962 | /* only the initial fragment is | 906 | /* only the initial fragment is |
963 | time stamped */ | 907 | time stamped */ |
964 | ipc->tx_flags = 0; | 908 | cork->tx_flags = 0; |
965 | } | 909 | } |
966 | if (skb == NULL) | 910 | if (skb == NULL) |
967 | goto error; | 911 | goto error; |
@@ -972,7 +916,7 @@ alloc_new_skb: | |||
972 | skb->ip_summed = csummode; | 916 | skb->ip_summed = csummode; |
973 | skb->csum = 0; | 917 | skb->csum = 0; |
974 | skb_reserve(skb, hh_len); | 918 | skb_reserve(skb, hh_len); |
975 | skb_shinfo(skb)->tx_flags = ipc->tx_flags; | 919 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
976 | 920 | ||
977 | /* | 921 | /* |
978 | * Find where to start putting bytes. | 922 | * Find where to start putting bytes. |
@@ -1009,7 +953,7 @@ alloc_new_skb: | |||
1009 | /* | 953 | /* |
1010 | * Put the packet on the pending queue. | 954 | * Put the packet on the pending queue. |
1011 | */ | 955 | */ |
1012 | __skb_queue_tail(&sk->sk_write_queue, skb); | 956 | __skb_queue_tail(queue, skb); |
1013 | continue; | 957 | continue; |
1014 | } | 958 | } |
1015 | 959 | ||
@@ -1029,8 +973,8 @@ alloc_new_skb: | |||
1029 | } else { | 973 | } else { |
1030 | int i = skb_shinfo(skb)->nr_frags; | 974 | int i = skb_shinfo(skb)->nr_frags; |
1031 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 975 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
1032 | struct page *page = sk->sk_sndmsg_page; | 976 | struct page *page = cork->page; |
1033 | int off = sk->sk_sndmsg_off; | 977 | int off = cork->off; |
1034 | unsigned int left; | 978 | unsigned int left; |
1035 | 979 | ||
1036 | if (page && (left = PAGE_SIZE - off) > 0) { | 980 | if (page && (left = PAGE_SIZE - off) > 0) { |
@@ -1042,7 +986,7 @@ alloc_new_skb: | |||
1042 | goto error; | 986 | goto error; |
1043 | } | 987 | } |
1044 | get_page(page); | 988 | get_page(page); |
1045 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); | 989 | skb_fill_page_desc(skb, i, page, off, 0); |
1046 | frag = &skb_shinfo(skb)->frags[i]; | 990 | frag = &skb_shinfo(skb)->frags[i]; |
1047 | } | 991 | } |
1048 | } else if (i < MAX_SKB_FRAGS) { | 992 | } else if (i < MAX_SKB_FRAGS) { |
@@ -1053,8 +997,8 @@ alloc_new_skb: | |||
1053 | err = -ENOMEM; | 997 | err = -ENOMEM; |
1054 | goto error; | 998 | goto error; |
1055 | } | 999 | } |
1056 | sk->sk_sndmsg_page = page; | 1000 | cork->page = page; |
1057 | sk->sk_sndmsg_off = 0; | 1001 | cork->off = 0; |
1058 | 1002 | ||
1059 | skb_fill_page_desc(skb, i, page, 0, 0); | 1003 | skb_fill_page_desc(skb, i, page, 0, 0); |
1060 | frag = &skb_shinfo(skb)->frags[i]; | 1004 | frag = &skb_shinfo(skb)->frags[i]; |
@@ -1066,7 +1010,7 @@ alloc_new_skb: | |||
1066 | err = -EFAULT; | 1010 | err = -EFAULT; |
1067 | goto error; | 1011 | goto error; |
1068 | } | 1012 | } |
1069 | sk->sk_sndmsg_off += copy; | 1013 | cork->off += copy; |
1070 | frag->size += copy; | 1014 | frag->size += copy; |
1071 | skb->len += copy; | 1015 | skb->len += copy; |
1072 | skb->data_len += copy; | 1016 | skb->data_len += copy; |
@@ -1080,11 +1024,87 @@ alloc_new_skb: | |||
1080 | return 0; | 1024 | return 0; |
1081 | 1025 | ||
1082 | error: | 1026 | error: |
1083 | inet->cork.length -= length; | 1027 | cork->length -= length; |
1084 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1028 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1085 | return err; | 1029 | return err; |
1086 | } | 1030 | } |
1087 | 1031 | ||
1032 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | ||
1033 | struct ipcm_cookie *ipc, struct rtable **rtp) | ||
1034 | { | ||
1035 | struct inet_sock *inet = inet_sk(sk); | ||
1036 | struct ip_options *opt; | ||
1037 | struct rtable *rt; | ||
1038 | |||
1039 | /* | ||
1040 | * setup for corking. | ||
1041 | */ | ||
1042 | opt = ipc->opt; | ||
1043 | if (opt) { | ||
1044 | if (cork->opt == NULL) { | ||
1045 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, | ||
1046 | sk->sk_allocation); | ||
1047 | if (unlikely(cork->opt == NULL)) | ||
1048 | return -ENOBUFS; | ||
1049 | } | ||
1050 | memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); | ||
1051 | cork->flags |= IPCORK_OPT; | ||
1052 | cork->addr = ipc->addr; | ||
1053 | } | ||
1054 | rt = *rtp; | ||
1055 | if (unlikely(!rt)) | ||
1056 | return -EFAULT; | ||
1057 | /* | ||
1058 | * We steal reference to this route, caller should not release it | ||
1059 | */ | ||
1060 | *rtp = NULL; | ||
1061 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
1062 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | ||
1063 | cork->dst = &rt->dst; | ||
1064 | cork->length = 0; | ||
1065 | cork->tx_flags = ipc->tx_flags; | ||
1066 | cork->page = NULL; | ||
1067 | cork->off = 0; | ||
1068 | |||
1069 | return 0; | ||
1070 | } | ||
1071 | |||
1072 | /* | ||
1073 | * ip_append_data() and ip_append_page() can make one large IP datagram | ||
1074 | * from many pieces of data. Each pieces will be holded on the socket | ||
1075 | * until ip_push_pending_frames() is called. Each piece can be a page | ||
1076 | * or non-page data. | ||
1077 | * | ||
1078 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
1079 | * this interface potentially. | ||
1080 | * | ||
1081 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
1082 | */ | ||
1083 | int ip_append_data(struct sock *sk, | ||
1084 | int getfrag(void *from, char *to, int offset, int len, | ||
1085 | int odd, struct sk_buff *skb), | ||
1086 | void *from, int length, int transhdrlen, | ||
1087 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1088 | unsigned int flags) | ||
1089 | { | ||
1090 | struct inet_sock *inet = inet_sk(sk); | ||
1091 | int err; | ||
1092 | |||
1093 | if (flags&MSG_PROBE) | ||
1094 | return 0; | ||
1095 | |||
1096 | if (skb_queue_empty(&sk->sk_write_queue)) { | ||
1097 | err = ip_setup_cork(sk, &inet->cork, ipc, rtp); | ||
1098 | if (err) | ||
1099 | return err; | ||
1100 | } else { | ||
1101 | transhdrlen = 0; | ||
1102 | } | ||
1103 | |||
1104 | return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, | ||
1105 | from, length, transhdrlen, flags); | ||
1106 | } | ||
1107 | |||
1088 | ssize_t ip_append_page(struct sock *sk, struct page *page, | 1108 | ssize_t ip_append_page(struct sock *sk, struct page *page, |
1089 | int offset, size_t size, int flags) | 1109 | int offset, size_t size, int flags) |
1090 | { | 1110 | { |
@@ -1228,40 +1248,41 @@ error: | |||
1228 | return err; | 1248 | return err; |
1229 | } | 1249 | } |
1230 | 1250 | ||
1231 | static void ip_cork_release(struct inet_sock *inet) | 1251 | static void ip_cork_release(struct inet_cork *cork) |
1232 | { | 1252 | { |
1233 | inet->cork.flags &= ~IPCORK_OPT; | 1253 | cork->flags &= ~IPCORK_OPT; |
1234 | kfree(inet->cork.opt); | 1254 | kfree(cork->opt); |
1235 | inet->cork.opt = NULL; | 1255 | cork->opt = NULL; |
1236 | dst_release(inet->cork.dst); | 1256 | dst_release(cork->dst); |
1237 | inet->cork.dst = NULL; | 1257 | cork->dst = NULL; |
1238 | } | 1258 | } |
1239 | 1259 | ||
1240 | /* | 1260 | /* |
1241 | * Combined all pending IP fragments on the socket as one IP datagram | 1261 | * Combined all pending IP fragments on the socket as one IP datagram |
1242 | * and push them out. | 1262 | * and push them out. |
1243 | */ | 1263 | */ |
1244 | int ip_push_pending_frames(struct sock *sk) | 1264 | struct sk_buff *__ip_make_skb(struct sock *sk, |
1265 | struct sk_buff_head *queue, | ||
1266 | struct inet_cork *cork) | ||
1245 | { | 1267 | { |
1246 | struct sk_buff *skb, *tmp_skb; | 1268 | struct sk_buff *skb, *tmp_skb; |
1247 | struct sk_buff **tail_skb; | 1269 | struct sk_buff **tail_skb; |
1248 | struct inet_sock *inet = inet_sk(sk); | 1270 | struct inet_sock *inet = inet_sk(sk); |
1249 | struct net *net = sock_net(sk); | 1271 | struct net *net = sock_net(sk); |
1250 | struct ip_options *opt = NULL; | 1272 | struct ip_options *opt = NULL; |
1251 | struct rtable *rt = (struct rtable *)inet->cork.dst; | 1273 | struct rtable *rt = (struct rtable *)cork->dst; |
1252 | struct iphdr *iph; | 1274 | struct iphdr *iph; |
1253 | __be16 df = 0; | 1275 | __be16 df = 0; |
1254 | __u8 ttl; | 1276 | __u8 ttl; |
1255 | int err = 0; | ||
1256 | 1277 | ||
1257 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) | 1278 | if ((skb = __skb_dequeue(queue)) == NULL) |
1258 | goto out; | 1279 | goto out; |
1259 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1280 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1260 | 1281 | ||
1261 | /* move skb->data to ip header from ext header */ | 1282 | /* move skb->data to ip header from ext header */ |
1262 | if (skb->data < skb_network_header(skb)) | 1283 | if (skb->data < skb_network_header(skb)) |
1263 | __skb_pull(skb, skb_network_offset(skb)); | 1284 | __skb_pull(skb, skb_network_offset(skb)); |
1264 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1285 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { |
1265 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1286 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1266 | *tail_skb = tmp_skb; | 1287 | *tail_skb = tmp_skb; |
1267 | tail_skb = &(tmp_skb->next); | 1288 | tail_skb = &(tmp_skb->next); |
@@ -1287,8 +1308,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
1287 | ip_dont_fragment(sk, &rt->dst))) | 1308 | ip_dont_fragment(sk, &rt->dst))) |
1288 | df = htons(IP_DF); | 1309 | df = htons(IP_DF); |
1289 | 1310 | ||
1290 | if (inet->cork.flags & IPCORK_OPT) | 1311 | if (cork->flags & IPCORK_OPT) |
1291 | opt = inet->cork.opt; | 1312 | opt = cork->opt; |
1292 | 1313 | ||
1293 | if (rt->rt_type == RTN_MULTICAST) | 1314 | if (rt->rt_type == RTN_MULTICAST) |
1294 | ttl = inet->mc_ttl; | 1315 | ttl = inet->mc_ttl; |
@@ -1300,7 +1321,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1300 | iph->ihl = 5; | 1321 | iph->ihl = 5; |
1301 | if (opt) { | 1322 | if (opt) { |
1302 | iph->ihl += opt->optlen>>2; | 1323 | iph->ihl += opt->optlen>>2; |
1303 | ip_options_build(skb, opt, inet->cork.addr, rt, 0); | 1324 | ip_options_build(skb, opt, cork->addr, rt, 0); |
1304 | } | 1325 | } |
1305 | iph->tos = inet->tos; | 1326 | iph->tos = inet->tos; |
1306 | iph->frag_off = df; | 1327 | iph->frag_off = df; |
@@ -1316,44 +1337,95 @@ int ip_push_pending_frames(struct sock *sk) | |||
1316 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | 1337 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec |
1317 | * on dst refcount | 1338 | * on dst refcount |
1318 | */ | 1339 | */ |
1319 | inet->cork.dst = NULL; | 1340 | cork->dst = NULL; |
1320 | skb_dst_set(skb, &rt->dst); | 1341 | skb_dst_set(skb, &rt->dst); |
1321 | 1342 | ||
1322 | if (iph->protocol == IPPROTO_ICMP) | 1343 | if (iph->protocol == IPPROTO_ICMP) |
1323 | icmp_out_count(net, ((struct icmphdr *) | 1344 | icmp_out_count(net, ((struct icmphdr *) |
1324 | skb_transport_header(skb))->type); | 1345 | skb_transport_header(skb))->type); |
1325 | 1346 | ||
1326 | /* Netfilter gets whole the not fragmented skb. */ | 1347 | ip_cork_release(cork); |
1348 | out: | ||
1349 | return skb; | ||
1350 | } | ||
1351 | |||
1352 | int ip_send_skb(struct sk_buff *skb) | ||
1353 | { | ||
1354 | struct net *net = sock_net(skb->sk); | ||
1355 | int err; | ||
1356 | |||
1327 | err = ip_local_out(skb); | 1357 | err = ip_local_out(skb); |
1328 | if (err) { | 1358 | if (err) { |
1329 | if (err > 0) | 1359 | if (err > 0) |
1330 | err = net_xmit_errno(err); | 1360 | err = net_xmit_errno(err); |
1331 | if (err) | 1361 | if (err) |
1332 | goto error; | 1362 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
1333 | } | 1363 | } |
1334 | 1364 | ||
1335 | out: | ||
1336 | ip_cork_release(inet); | ||
1337 | return err; | 1365 | return err; |
1366 | } | ||
1338 | 1367 | ||
1339 | error: | 1368 | int ip_push_pending_frames(struct sock *sk) |
1340 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 1369 | { |
1341 | goto out; | 1370 | struct sk_buff *skb; |
1371 | |||
1372 | skb = ip_finish_skb(sk); | ||
1373 | if (!skb) | ||
1374 | return 0; | ||
1375 | |||
1376 | /* Netfilter gets whole the not fragmented skb. */ | ||
1377 | return ip_send_skb(skb); | ||
1342 | } | 1378 | } |
1343 | 1379 | ||
1344 | /* | 1380 | /* |
1345 | * Throw away all pending data on the socket. | 1381 | * Throw away all pending data on the socket. |
1346 | */ | 1382 | */ |
1347 | void ip_flush_pending_frames(struct sock *sk) | 1383 | static void __ip_flush_pending_frames(struct sock *sk, |
1384 | struct sk_buff_head *queue, | ||
1385 | struct inet_cork *cork) | ||
1348 | { | 1386 | { |
1349 | struct sk_buff *skb; | 1387 | struct sk_buff *skb; |
1350 | 1388 | ||
1351 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) | 1389 | while ((skb = __skb_dequeue_tail(queue)) != NULL) |
1352 | kfree_skb(skb); | 1390 | kfree_skb(skb); |
1353 | 1391 | ||
1354 | ip_cork_release(inet_sk(sk)); | 1392 | ip_cork_release(cork); |
1393 | } | ||
1394 | |||
1395 | void ip_flush_pending_frames(struct sock *sk) | ||
1396 | { | ||
1397 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); | ||
1355 | } | 1398 | } |
1356 | 1399 | ||
1400 | struct sk_buff *ip_make_skb(struct sock *sk, | ||
1401 | int getfrag(void *from, char *to, int offset, | ||
1402 | int len, int odd, struct sk_buff *skb), | ||
1403 | void *from, int length, int transhdrlen, | ||
1404 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1405 | unsigned int flags) | ||
1406 | { | ||
1407 | struct inet_cork cork = {}; | ||
1408 | struct sk_buff_head queue; | ||
1409 | int err; | ||
1410 | |||
1411 | if (flags & MSG_PROBE) | ||
1412 | return NULL; | ||
1413 | |||
1414 | __skb_queue_head_init(&queue); | ||
1415 | |||
1416 | err = ip_setup_cork(sk, &cork, ipc, rtp); | ||
1417 | if (err) | ||
1418 | return ERR_PTR(err); | ||
1419 | |||
1420 | err = __ip_append_data(sk, &queue, &cork, getfrag, | ||
1421 | from, length, transhdrlen, flags); | ||
1422 | if (err) { | ||
1423 | __ip_flush_pending_frames(sk, &queue, &cork); | ||
1424 | return ERR_PTR(err); | ||
1425 | } | ||
1426 | |||
1427 | return __ip_make_skb(sk, &queue, &cork); | ||
1428 | } | ||
1357 | 1429 | ||
1358 | /* | 1430 | /* |
1359 | * Fetch data from kernel space and fill in checksum if needed. | 1431 | * Fetch data from kernel space and fill in checksum if needed. |
@@ -1402,16 +1474,19 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1402 | } | 1474 | } |
1403 | 1475 | ||
1404 | { | 1476 | { |
1405 | struct flowi fl = { .oif = arg->bound_dev_if, | 1477 | struct flowi4 fl4 = { |
1406 | .fl4_dst = daddr, | 1478 | .flowi4_oif = arg->bound_dev_if, |
1407 | .fl4_src = rt->rt_spec_dst, | 1479 | .daddr = daddr, |
1408 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), | 1480 | .saddr = rt->rt_spec_dst, |
1409 | .fl_ip_sport = tcp_hdr(skb)->dest, | 1481 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
1410 | .fl_ip_dport = tcp_hdr(skb)->source, | 1482 | .fl4_sport = tcp_hdr(skb)->dest, |
1411 | .proto = sk->sk_protocol, | 1483 | .fl4_dport = tcp_hdr(skb)->source, |
1412 | .flags = ip_reply_arg_flowi_flags(arg) }; | 1484 | .flowi4_proto = sk->sk_protocol, |
1413 | security_skb_classify_flow(skb, &fl); | 1485 | .flowi4_flags = ip_reply_arg_flowi_flags(arg), |
1414 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) | 1486 | }; |
1487 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
1488 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
1489 | if (IS_ERR(rt)) | ||
1415 | return; | 1490 | return; |
1416 | } | 1491 | } |
1417 | 1492 | ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index a5f58e7cbb26..bfc17c5914e7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -460,19 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
460 | goto tx_error_icmp; | 460 | goto tx_error_icmp; |
461 | } | 461 | } |
462 | 462 | ||
463 | { | 463 | rt = ip_route_output_ports(dev_net(dev), NULL, |
464 | struct flowi fl = { | 464 | dst, tiph->saddr, |
465 | .oif = tunnel->parms.link, | 465 | 0, 0, |
466 | .fl4_dst = dst, | 466 | IPPROTO_IPIP, RT_TOS(tos), |
467 | .fl4_src= tiph->saddr, | 467 | tunnel->parms.link); |
468 | .fl4_tos = RT_TOS(tos), | 468 | if (IS_ERR(rt)) { |
469 | .proto = IPPROTO_IPIP | 469 | dev->stats.tx_carrier_errors++; |
470 | }; | 470 | goto tx_error_icmp; |
471 | |||
472 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
473 | dev->stats.tx_carrier_errors++; | ||
474 | goto tx_error_icmp; | ||
475 | } | ||
476 | } | 471 | } |
477 | tdev = rt->dst.dev; | 472 | tdev = rt->dst.dev; |
478 | 473 | ||
@@ -583,16 +578,14 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
583 | iph = &tunnel->parms.iph; | 578 | iph = &tunnel->parms.iph; |
584 | 579 | ||
585 | if (iph->daddr) { | 580 | if (iph->daddr) { |
586 | struct flowi fl = { | 581 | struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, |
587 | .oif = tunnel->parms.link, | 582 | iph->daddr, iph->saddr, |
588 | .fl4_dst = iph->daddr, | 583 | 0, 0, |
589 | .fl4_src = iph->saddr, | 584 | IPPROTO_IPIP, |
590 | .fl4_tos = RT_TOS(iph->tos), | 585 | RT_TOS(iph->tos), |
591 | .proto = IPPROTO_IPIP | 586 | tunnel->parms.link); |
592 | }; | 587 | |
593 | struct rtable *rt; | 588 | if (!IS_ERR(rt)) { |
594 | |||
595 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
596 | tdev = rt->dst.dev; | 589 | tdev = rt->dst.dev; |
597 | ip_rt_put(rt); | 590 | ip_rt_put(rt); |
598 | } | 591 | } |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8b65a12654e7..1f62eaeb6de4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -148,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) | |||
148 | return NULL; | 148 | return NULL; |
149 | } | 149 | } |
150 | 150 | ||
151 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 151 | static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, |
152 | struct mr_table **mrt) | 152 | struct mr_table **mrt) |
153 | { | 153 | { |
154 | struct ipmr_result res; | 154 | struct ipmr_result res; |
155 | struct fib_lookup_arg arg = { .result = &res, }; | 155 | struct fib_lookup_arg arg = { .result = &res, }; |
156 | int err; | 156 | int err; |
157 | 157 | ||
158 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); | 158 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, |
159 | flowi4_to_flowi(flp4), 0, &arg); | ||
159 | if (err < 0) | 160 | if (err < 0) |
160 | return err; | 161 | return err; |
161 | *mrt = res.mrt; | 162 | *mrt = res.mrt; |
@@ -283,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) | |||
283 | return net->ipv4.mrt; | 284 | return net->ipv4.mrt; |
284 | } | 285 | } |
285 | 286 | ||
286 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 287 | static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, |
287 | struct mr_table **mrt) | 288 | struct mr_table **mrt) |
288 | { | 289 | { |
289 | *mrt = net->ipv4.mrt; | 290 | *mrt = net->ipv4.mrt; |
@@ -435,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | |||
435 | { | 436 | { |
436 | struct net *net = dev_net(dev); | 437 | struct net *net = dev_net(dev); |
437 | struct mr_table *mrt; | 438 | struct mr_table *mrt; |
438 | struct flowi fl = { | 439 | struct flowi4 fl4 = { |
439 | .oif = dev->ifindex, | 440 | .flowi4_oif = dev->ifindex, |
440 | .iif = skb->skb_iif, | 441 | .flowi4_iif = skb->skb_iif, |
441 | .mark = skb->mark, | 442 | .flowi4_mark = skb->mark, |
442 | }; | 443 | }; |
443 | int err; | 444 | int err; |
444 | 445 | ||
445 | err = ipmr_fib_lookup(net, &fl, &mrt); | 446 | err = ipmr_fib_lookup(net, &fl4, &mrt); |
446 | if (err < 0) { | 447 | if (err < 0) { |
447 | kfree_skb(skb); | 448 | kfree_skb(skb); |
448 | return err; | 449 | return err; |
@@ -1611,26 +1612,20 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1611 | #endif | 1612 | #endif |
1612 | 1613 | ||
1613 | if (vif->flags & VIFF_TUNNEL) { | 1614 | if (vif->flags & VIFF_TUNNEL) { |
1614 | struct flowi fl = { | 1615 | rt = ip_route_output_ports(net, NULL, |
1615 | .oif = vif->link, | 1616 | vif->remote, vif->local, |
1616 | .fl4_dst = vif->remote, | 1617 | 0, 0, |
1617 | .fl4_src = vif->local, | 1618 | IPPROTO_IPIP, |
1618 | .fl4_tos = RT_TOS(iph->tos), | 1619 | RT_TOS(iph->tos), vif->link); |
1619 | .proto = IPPROTO_IPIP | 1620 | if (IS_ERR(rt)) |
1620 | }; | ||
1621 | |||
1622 | if (ip_route_output_key(net, &rt, &fl)) | ||
1623 | goto out_free; | 1621 | goto out_free; |
1624 | encap = sizeof(struct iphdr); | 1622 | encap = sizeof(struct iphdr); |
1625 | } else { | 1623 | } else { |
1626 | struct flowi fl = { | 1624 | rt = ip_route_output_ports(net, NULL, iph->daddr, 0, |
1627 | .oif = vif->link, | 1625 | 0, 0, |
1628 | .fl4_dst = iph->daddr, | 1626 | IPPROTO_IPIP, |
1629 | .fl4_tos = RT_TOS(iph->tos), | 1627 | RT_TOS(iph->tos), vif->link); |
1630 | .proto = IPPROTO_IPIP | 1628 | if (IS_ERR(rt)) |
1631 | }; | ||
1632 | |||
1633 | if (ip_route_output_key(net, &rt, &fl)) | ||
1634 | goto out_free; | 1629 | goto out_free; |
1635 | } | 1630 | } |
1636 | 1631 | ||
@@ -1793,6 +1788,24 @@ dont_forward: | |||
1793 | return 0; | 1788 | return 0; |
1794 | } | 1789 | } |
1795 | 1790 | ||
1791 | static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) | ||
1792 | { | ||
1793 | struct flowi4 fl4 = { | ||
1794 | .daddr = rt->rt_key_dst, | ||
1795 | .saddr = rt->rt_key_src, | ||
1796 | .flowi4_tos = rt->rt_tos, | ||
1797 | .flowi4_oif = rt->rt_oif, | ||
1798 | .flowi4_iif = rt->rt_iif, | ||
1799 | .flowi4_mark = rt->rt_mark, | ||
1800 | }; | ||
1801 | struct mr_table *mrt; | ||
1802 | int err; | ||
1803 | |||
1804 | err = ipmr_fib_lookup(net, &fl4, &mrt); | ||
1805 | if (err) | ||
1806 | return ERR_PTR(err); | ||
1807 | return mrt; | ||
1808 | } | ||
1796 | 1809 | ||
1797 | /* | 1810 | /* |
1798 | * Multicast packets for forwarding arrive here | 1811 | * Multicast packets for forwarding arrive here |
@@ -1805,7 +1818,6 @@ int ip_mr_input(struct sk_buff *skb) | |||
1805 | struct net *net = dev_net(skb->dev); | 1818 | struct net *net = dev_net(skb->dev); |
1806 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; | 1819 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; |
1807 | struct mr_table *mrt; | 1820 | struct mr_table *mrt; |
1808 | int err; | ||
1809 | 1821 | ||
1810 | /* Packet is looped back after forward, it should not be | 1822 | /* Packet is looped back after forward, it should not be |
1811 | * forwarded second time, but still can be delivered locally. | 1823 | * forwarded second time, but still can be delivered locally. |
@@ -1813,12 +1825,11 @@ int ip_mr_input(struct sk_buff *skb) | |||
1813 | if (IPCB(skb)->flags & IPSKB_FORWARDED) | 1825 | if (IPCB(skb)->flags & IPSKB_FORWARDED) |
1814 | goto dont_forward; | 1826 | goto dont_forward; |
1815 | 1827 | ||
1816 | err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); | 1828 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
1817 | if (err < 0) { | 1829 | if (IS_ERR(mrt)) { |
1818 | kfree_skb(skb); | 1830 | kfree_skb(skb); |
1819 | return err; | 1831 | return PTR_ERR(mrt); |
1820 | } | 1832 | } |
1821 | |||
1822 | if (!local) { | 1833 | if (!local) { |
1823 | if (IPCB(skb)->opt.router_alert) { | 1834 | if (IPCB(skb)->opt.router_alert) { |
1824 | if (ip_call_ra_chain(skb)) | 1835 | if (ip_call_ra_chain(skb)) |
@@ -1946,9 +1957,9 @@ int pim_rcv_v1(struct sk_buff *skb) | |||
1946 | 1957 | ||
1947 | pim = igmp_hdr(skb); | 1958 | pim = igmp_hdr(skb); |
1948 | 1959 | ||
1949 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1960 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
1961 | if (IS_ERR(mrt)) | ||
1950 | goto drop; | 1962 | goto drop; |
1951 | |||
1952 | if (!mrt->mroute_do_pim || | 1963 | if (!mrt->mroute_do_pim || |
1953 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1964 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
1954 | goto drop; | 1965 | goto drop; |
@@ -1978,9 +1989,9 @@ static int pim_rcv(struct sk_buff *skb) | |||
1978 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1989 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
1979 | goto drop; | 1990 | goto drop; |
1980 | 1991 | ||
1981 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1992 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
1993 | if (IS_ERR(mrt)) | ||
1982 | goto drop; | 1994 | goto drop; |
1983 | |||
1984 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | 1995 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
1985 | drop: | 1996 | drop: |
1986 | kfree_skb(skb); | 1997 | kfree_skb(skb); |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 994a1f29ebbc..f3c0b549b8e1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
16 | struct net *net = dev_net(skb_dst(skb)->dev); | 16 | struct net *net = dev_net(skb_dst(skb)->dev); |
17 | const struct iphdr *iph = ip_hdr(skb); | 17 | const struct iphdr *iph = ip_hdr(skb); |
18 | struct rtable *rt; | 18 | struct rtable *rt; |
19 | struct flowi fl = {}; | 19 | struct flowi4 fl4 = {}; |
20 | unsigned long orefdst; | 20 | unsigned long orefdst; |
21 | unsigned int hh_len; | 21 | unsigned int hh_len; |
22 | unsigned int type; | 22 | unsigned int type; |
@@ -31,14 +31,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. | 31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. |
32 | */ | 32 | */ |
33 | if (addr_type == RTN_LOCAL) { | 33 | if (addr_type == RTN_LOCAL) { |
34 | fl.fl4_dst = iph->daddr; | 34 | fl4.daddr = iph->daddr; |
35 | if (type == RTN_LOCAL) | 35 | if (type == RTN_LOCAL) |
36 | fl.fl4_src = iph->saddr; | 36 | fl4.saddr = iph->saddr; |
37 | fl.fl4_tos = RT_TOS(iph->tos); | 37 | fl4.flowi4_tos = RT_TOS(iph->tos); |
38 | fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; | 38 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; |
39 | fl.mark = skb->mark; | 39 | fl4.flowi4_mark = skb->mark; |
40 | fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; | 40 | fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; |
41 | if (ip_route_output_key(net, &rt, &fl) != 0) | 41 | rt = ip_route_output_key(net, &fl4); |
42 | if (IS_ERR(rt)) | ||
42 | return -1; | 43 | return -1; |
43 | 44 | ||
44 | /* Drop old route. */ | 45 | /* Drop old route. */ |
@@ -47,8 +48,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
47 | } else { | 48 | } else { |
48 | /* non-local src, find valid iif to satisfy | 49 | /* non-local src, find valid iif to satisfy |
49 | * rp-filter when calling ip_route_input. */ | 50 | * rp-filter when calling ip_route_input. */ |
50 | fl.fl4_dst = iph->saddr; | 51 | fl4.daddr = iph->saddr; |
51 | if (ip_route_output_key(net, &rt, &fl) != 0) | 52 | rt = ip_route_output_key(net, &fl4); |
53 | if (IS_ERR(rt)) | ||
52 | return -1; | 54 | return -1; |
53 | 55 | ||
54 | orefdst = skb->_skb_refdst; | 56 | orefdst = skb->_skb_refdst; |
@@ -66,10 +68,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
66 | 68 | ||
67 | #ifdef CONFIG_XFRM | 69 | #ifdef CONFIG_XFRM |
68 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | 70 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
69 | xfrm_decode_session(skb, &fl, AF_INET) == 0) { | 71 | xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { |
70 | struct dst_entry *dst = skb_dst(skb); | 72 | struct dst_entry *dst = skb_dst(skb); |
71 | skb_dst_set(skb, NULL); | 73 | skb_dst_set(skb, NULL); |
72 | if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) | 74 | dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); |
75 | if (IS_ERR(dst)) | ||
73 | return -1; | 76 | return -1; |
74 | skb_dst_set(skb, dst); | 77 | skb_dst_set(skb, dst); |
75 | } | 78 | } |
@@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb) | |||
102 | dst = ((struct xfrm_dst *)dst)->route; | 105 | dst = ((struct xfrm_dst *)dst)->route; |
103 | dst_hold(dst); | 106 | dst_hold(dst); |
104 | 107 | ||
105 | if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) | 108 | dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); |
109 | if (IS_ERR(dst)) | ||
106 | return -1; | 110 | return -1; |
107 | 111 | ||
108 | skb_dst_drop(skb); | 112 | skb_dst_drop(skb); |
@@ -219,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, | |||
219 | 223 | ||
220 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) | 224 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) |
221 | { | 225 | { |
222 | return ip_route_output_key(&init_net, (struct rtable **)dst, fl); | 226 | struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); |
227 | if (IS_ERR(rt)) | ||
228 | return PTR_ERR(rt); | ||
229 | *dst = &rt->dst; | ||
230 | return 0; | ||
223 | } | 231 | } |
224 | 232 | ||
225 | static const struct nf_afinfo nf_ip_afinfo = { | 233 | static const struct nf_afinfo nf_ip_afinfo = { |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2bae5f..1dfc18a03fd4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -64,16 +64,6 @@ config IP_NF_IPTABLES | |||
64 | if IP_NF_IPTABLES | 64 | if IP_NF_IPTABLES |
65 | 65 | ||
66 | # The matches. | 66 | # The matches. |
67 | config IP_NF_MATCH_ADDRTYPE | ||
68 | tristate '"addrtype" address type match support' | ||
69 | depends on NETFILTER_ADVANCED | ||
70 | help | ||
71 | This option allows you to match what routing thinks of an address, | ||
72 | eg. UNICAST, LOCAL, BROADCAST, ... | ||
73 | |||
74 | If you want to compile it as a module, say M here and read | ||
75 | <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. | ||
76 | |||
77 | config IP_NF_MATCH_AH | 67 | config IP_NF_MATCH_AH |
78 | tristate '"ah" match support' | 68 | tristate '"ah" match support' |
79 | depends on NETFILTER_ADVANCED | 69 | depends on NETFILTER_ADVANCED |
@@ -206,8 +196,9 @@ config IP_NF_TARGET_REDIRECT | |||
206 | 196 | ||
207 | config NF_NAT_SNMP_BASIC | 197 | config NF_NAT_SNMP_BASIC |
208 | tristate "Basic SNMP-ALG support" | 198 | tristate "Basic SNMP-ALG support" |
209 | depends on NF_NAT | 199 | depends on NF_CONNTRACK_SNMP && NF_NAT |
210 | depends on NETFILTER_ADVANCED | 200 | depends on NETFILTER_ADVANCED |
201 | default NF_NAT && NF_CONNTRACK_SNMP | ||
211 | ---help--- | 202 | ---help--- |
212 | 203 | ||
213 | This module implements an Application Layer Gateway (ALG) for | 204 | This module implements an Application Layer Gateway (ALG) for |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19eb59d01037..dca2082ec683 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o | |||
48 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o | 48 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o |
49 | 49 | ||
50 | # matches | 50 | # matches |
51 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | ||
52 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o | 51 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o |
53 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o | 52 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o |
54 | 53 | ||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e855fffaed95..4b5d457c2d76 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
867 | newinfo->initial_entries = 0; | 867 | newinfo->initial_entries = 0; |
868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
869 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
869 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 870 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
870 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 871 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
871 | if (ret != 0) | 872 | if (ret != 0) |
@@ -1065,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user, | |||
1065 | /* overflow check */ | 1066 | /* overflow check */ |
1066 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1067 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
1067 | return -ENOMEM; | 1068 | return -ENOMEM; |
1069 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
1068 | 1070 | ||
1069 | newinfo = xt_alloc_table_info(tmp.size); | 1071 | newinfo = xt_alloc_table_info(tmp.size); |
1070 | if (!newinfo) | 1072 | if (!newinfo) |
@@ -1333,6 +1335,7 @@ static int translate_compat_table(const char *name, | |||
1333 | duprintf("translate_compat_table: size %u\n", info->size); | 1335 | duprintf("translate_compat_table: size %u\n", info->size); |
1334 | j = 0; | 1336 | j = 0; |
1335 | xt_compat_lock(NFPROTO_ARP); | 1337 | xt_compat_lock(NFPROTO_ARP); |
1338 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1336 | /* Walk through entries, checking offsets. */ | 1339 | /* Walk through entries, checking offsets. */ |
1337 | xt_entry_foreach(iter0, entry0, total_size) { | 1340 | xt_entry_foreach(iter0, entry0, total_size) { |
1338 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1341 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
@@ -1486,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
1486 | return -ENOMEM; | 1489 | return -ENOMEM; |
1487 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1490 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
1488 | return -ENOMEM; | 1491 | return -ENOMEM; |
1492 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
1489 | 1493 | ||
1490 | newinfo = xt_alloc_table_info(tmp.size); | 1494 | newinfo = xt_alloc_table_info(tmp.size); |
1491 | if (!newinfo) | 1495 | if (!newinfo) |
@@ -1738,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1738 | ret = -EFAULT; | 1742 | ret = -EFAULT; |
1739 | break; | 1743 | break; |
1740 | } | 1744 | } |
1745 | rev.name[sizeof(rev.name)-1] = 0; | ||
1741 | 1746 | ||
1742 | try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, | 1747 | try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, |
1743 | rev.revision, 1, &ret), | 1748 | rev.revision, 1, &ret), |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 652efea013dc..b09ed0d080f9 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1064 | newinfo->initial_entries = 0; | 1064 | newinfo->initial_entries = 0; |
1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1066 | xt_compat_init_offsets(AF_INET, info->number); | ||
1066 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1067 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1067 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1068 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1068 | if (ret != 0) | 1069 | if (ret != 0) |
@@ -1261,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) | |||
1261 | /* overflow check */ | 1262 | /* overflow check */ |
1262 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1263 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
1263 | return -ENOMEM; | 1264 | return -ENOMEM; |
1265 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
1264 | 1266 | ||
1265 | newinfo = xt_alloc_table_info(tmp.size); | 1267 | newinfo = xt_alloc_table_info(tmp.size); |
1266 | if (!newinfo) | 1268 | if (!newinfo) |
@@ -1664,6 +1666,7 @@ translate_compat_table(struct net *net, | |||
1664 | duprintf("translate_compat_table: size %u\n", info->size); | 1666 | duprintf("translate_compat_table: size %u\n", info->size); |
1665 | j = 0; | 1667 | j = 0; |
1666 | xt_compat_lock(AF_INET); | 1668 | xt_compat_lock(AF_INET); |
1669 | xt_compat_init_offsets(AF_INET, number); | ||
1667 | /* Walk through entries, checking offsets. */ | 1670 | /* Walk through entries, checking offsets. */ |
1668 | xt_entry_foreach(iter0, entry0, total_size) { | 1671 | xt_entry_foreach(iter0, entry0, total_size) { |
1669 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1672 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
@@ -1805,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
1805 | return -ENOMEM; | 1808 | return -ENOMEM; |
1806 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1809 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
1807 | return -ENOMEM; | 1810 | return -ENOMEM; |
1811 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
1808 | 1812 | ||
1809 | newinfo = xt_alloc_table_info(tmp.size); | 1813 | newinfo = xt_alloc_table_info(tmp.size); |
1810 | if (!newinfo) | 1814 | if (!newinfo) |
@@ -2034,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2034 | ret = -EFAULT; | 2038 | ret = -EFAULT; |
2035 | break; | 2039 | break; |
2036 | } | 2040 | } |
2041 | rev.name[sizeof(rev.name)-1] = 0; | ||
2037 | 2042 | ||
2038 | if (cmd == IPT_SO_GET_REVISION_TARGET) | 2043 | if (cmd == IPT_SO_GET_REVISION_TARGET) |
2039 | target = 1; | 2044 | target = 1; |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a4897655..403ca57f6011 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
300 | * that the ->target() function isn't called after ->destroy() */ | 300 | * that the ->target() function isn't called after ->destroy() */ |
301 | 301 | ||
302 | ct = nf_ct_get(skb, &ctinfo); | 302 | ct = nf_ct_get(skb, &ctinfo); |
303 | if (ct == NULL) { | 303 | if (ct == NULL) |
304 | pr_info("no conntrack!\n"); | ||
305 | /* FIXME: need to drop invalid ones, since replies | ||
306 | * to outgoing connections of other nodes will be | ||
307 | * marked as INVALID */ | ||
308 | return NF_DROP; | 304 | return NF_DROP; |
309 | } | ||
310 | 305 | ||
311 | /* special case: ICMP error handling. conntrack distinguishes between | 306 | /* special case: ICMP error handling. conntrack distinguishes between |
312 | * error messages (RELATED) and information requests (see below) */ | 307 | * error messages (RELATED) and information requests (see below) */ |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8fda2e9..d76d6c9ed946 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, | |||
442 | } | 442 | } |
443 | #endif | 443 | #endif |
444 | 444 | ||
445 | /* MAC logging for input path only. */ | 445 | if (in != NULL) |
446 | if (in && !out) | ||
447 | dump_mac_header(m, loginfo, skb); | 446 | dump_mac_header(m, loginfo, skb); |
448 | 447 | ||
449 | dump_packet(m, loginfo, skb, 0); | 448 | dump_packet(m, loginfo, skb, 0); |
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index db8bff0fb86d..000000000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /* | ||
2 | * iptables module to match inet_addr_type() of an ip. | ||
3 | * | ||
4 | * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> | ||
5 | * (C) 2007 Laszlo Attila Toth <panther@balabit.hu> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/netdevice.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <net/route.h> | ||
18 | |||
19 | #include <linux/netfilter_ipv4/ipt_addrtype.h> | ||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | |||
22 | MODULE_LICENSE("GPL"); | ||
23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
24 | MODULE_DESCRIPTION("Xtables: address type match for IPv4"); | ||
25 | |||
26 | static inline bool match_type(struct net *net, const struct net_device *dev, | ||
27 | __be32 addr, u_int16_t mask) | ||
28 | { | ||
29 | return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); | ||
30 | } | ||
31 | |||
32 | static bool | ||
33 | addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) | ||
34 | { | ||
35 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
36 | const struct ipt_addrtype_info *info = par->matchinfo; | ||
37 | const struct iphdr *iph = ip_hdr(skb); | ||
38 | bool ret = true; | ||
39 | |||
40 | if (info->source) | ||
41 | ret &= match_type(net, NULL, iph->saddr, info->source) ^ | ||
42 | info->invert_source; | ||
43 | if (info->dest) | ||
44 | ret &= match_type(net, NULL, iph->daddr, info->dest) ^ | ||
45 | info->invert_dest; | ||
46 | |||
47 | return ret; | ||
48 | } | ||
49 | |||
50 | static bool | ||
51 | addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) | ||
52 | { | ||
53 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
54 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; | ||
55 | const struct iphdr *iph = ip_hdr(skb); | ||
56 | const struct net_device *dev = NULL; | ||
57 | bool ret = true; | ||
58 | |||
59 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) | ||
60 | dev = par->in; | ||
61 | else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) | ||
62 | dev = par->out; | ||
63 | |||
64 | if (info->source) | ||
65 | ret &= match_type(net, dev, iph->saddr, info->source) ^ | ||
66 | (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); | ||
67 | if (ret && info->dest) | ||
68 | ret &= match_type(net, dev, iph->daddr, info->dest) ^ | ||
69 | !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) | ||
74 | { | ||
75 | struct ipt_addrtype_info_v1 *info = par->matchinfo; | ||
76 | |||
77 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && | ||
78 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | ||
79 | pr_info("both incoming and outgoing " | ||
80 | "interface limitation cannot be selected\n"); | ||
81 | return -EINVAL; | ||
82 | } | ||
83 | |||
84 | if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | | ||
85 | (1 << NF_INET_LOCAL_IN)) && | ||
86 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | ||
87 | pr_info("output interface limitation " | ||
88 | "not valid in PREROUTING and INPUT\n"); | ||
89 | return -EINVAL; | ||
90 | } | ||
91 | |||
92 | if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | | ||
93 | (1 << NF_INET_LOCAL_OUT)) && | ||
94 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { | ||
95 | pr_info("input interface limitation " | ||
96 | "not valid in POSTROUTING and OUTPUT\n"); | ||
97 | return -EINVAL; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static struct xt_match addrtype_mt_reg[] __read_mostly = { | ||
104 | { | ||
105 | .name = "addrtype", | ||
106 | .family = NFPROTO_IPV4, | ||
107 | .match = addrtype_mt_v0, | ||
108 | .matchsize = sizeof(struct ipt_addrtype_info), | ||
109 | .me = THIS_MODULE | ||
110 | }, | ||
111 | { | ||
112 | .name = "addrtype", | ||
113 | .family = NFPROTO_IPV4, | ||
114 | .revision = 1, | ||
115 | .match = addrtype_mt_v1, | ||
116 | .checkentry = addrtype_mt_checkentry_v1, | ||
117 | .matchsize = sizeof(struct ipt_addrtype_info_v1), | ||
118 | .me = THIS_MODULE | ||
119 | } | ||
120 | }; | ||
121 | |||
122 | static int __init addrtype_mt_init(void) | ||
123 | { | ||
124 | return xt_register_matches(addrtype_mt_reg, | ||
125 | ARRAY_SIZE(addrtype_mt_reg)); | ||
126 | } | ||
127 | |||
128 | static void __exit addrtype_mt_exit(void) | ||
129 | { | ||
130 | xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); | ||
131 | } | ||
132 | |||
133 | module_init(addrtype_mt_init); | ||
134 | module_exit(addrtype_mt_exit); | ||
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 294a2a32f293..aef5d1fbe77d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) | |||
60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
61 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
62 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN) { |
64 | iph = ip_hdr(skb); | 64 | iph = ip_hdr(skb); |
65 | 65 | ||
66 | if (iph->saddr != saddr || | 66 | if (iph->saddr != saddr || |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63f60fc5d26a..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
21 | #include <net/netfilter/nf_conntrack_expect.h> | 21 | #include <net/netfilter/nf_conntrack_expect.h> |
22 | #include <net/netfilter/nf_conntrack_acct.h> | 22 | #include <net/netfilter/nf_conntrack_acct.h> |
23 | #include <linux/rculist_nulls.h> | ||
23 | 24 | ||
24 | struct ct_iter_state { | 25 | struct ct_iter_state { |
25 | struct seq_net_private p; | 26 | struct seq_net_private p; |
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
35 | for (st->bucket = 0; | 36 | for (st->bucket = 0; |
36 | st->bucket < net->ct.htable_size; | 37 | st->bucket < net->ct.htable_size; |
37 | st->bucket++) { | 38 | st->bucket++) { |
38 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 39 | n = rcu_dereference( |
40 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
39 | if (!is_a_nulls(n)) | 41 | if (!is_a_nulls(n)) |
40 | return n; | 42 | return n; |
41 | } | 43 | } |
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
48 | struct net *net = seq_file_net(seq); | 50 | struct net *net = seq_file_net(seq); |
49 | struct ct_iter_state *st = seq->private; | 51 | struct ct_iter_state *st = seq->private; |
50 | 52 | ||
51 | head = rcu_dereference(head->next); | 53 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
52 | while (is_a_nulls(head)) { | 54 | while (is_a_nulls(head)) { |
53 | if (likely(get_nulls_value(head) == st->bucket)) { | 55 | if (likely(get_nulls_value(head) == st->bucket)) { |
54 | if (++st->bucket >= net->ct.htable_size) | 56 | if (++st->bucket >= net->ct.htable_size) |
55 | return NULL; | 57 | return NULL; |
56 | } | 58 | } |
57 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 59 | head = rcu_dereference( |
60 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
58 | } | 61 | } |
59 | return head; | 62 | return head; |
60 | } | 63 | } |
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
217 | struct hlist_node *n; | 220 | struct hlist_node *n; |
218 | 221 | ||
219 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 222 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
220 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 223 | n = rcu_dereference( |
224 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
221 | if (n) | 225 | if (n) |
222 | return n; | 226 | return n; |
223 | } | 227 | } |
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
230 | struct net *net = seq_file_net(seq); | 234 | struct net *net = seq_file_net(seq); |
231 | struct ct_expect_iter_state *st = seq->private; | 235 | struct ct_expect_iter_state *st = seq->private; |
232 | 236 | ||
233 | head = rcu_dereference(head->next); | 237 | head = rcu_dereference(hlist_next_rcu(head)); |
234 | while (head == NULL) { | 238 | while (head == NULL) { |
235 | if (++st->bucket >= nf_ct_expect_hsize) | 239 | if (++st->bucket >= nf_ct_expect_hsize) |
236 | return NULL; | 240 | return NULL; |
237 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 241 | head = rcu_dereference( |
242 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
238 | } | 243 | } |
239 | return head; | 244 | return head; |
240 | } | 245 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f06df0..703f366fd235 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, | |||
44 | 44 | ||
45 | /* Try to get same port: if not, try to change it. */ | 45 | /* Try to get same port: if not, try to change it. */ |
46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { |
47 | int ret; | 47 | int res; |
48 | 48 | ||
49 | exp->tuple.dst.u.tcp.port = htons(port); | 49 | exp->tuple.dst.u.tcp.port = htons(port); |
50 | ret = nf_ct_expect_related(exp); | 50 | res = nf_ct_expect_related(exp); |
51 | if (ret == 0) | 51 | if (res == 0) |
52 | break; | 52 | break; |
53 | else if (ret != -EBUSY) { | 53 | else if (res != -EBUSY) { |
54 | port = 0; | 54 | port = 0; |
55 | break; | 55 | break; |
56 | } | 56 | } |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787ce1a71..21bcf471b25a 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
221 | manips not an issue. */ | 221 | manips not an issue. */ |
222 | if (maniptype == IP_NAT_MANIP_SRC && | 222 | if (maniptype == IP_NAT_MANIP_SRC && |
223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
224 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { | 224 | /* try the original tuple first */ |
225 | if (in_range(orig_tuple, range)) { | ||
226 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | ||
227 | *tuple = *orig_tuple; | ||
228 | return; | ||
229 | } | ||
230 | } else if (find_appropriate_src(net, zone, orig_tuple, tuple, | ||
231 | range)) { | ||
225 | pr_debug("get_unique_tuple: Found current src map\n"); | 232 | pr_debug("get_unique_tuple: Found current src map\n"); |
226 | if (!nf_nat_used_tuple(tuple, ct)) | 233 | if (!nf_nat_used_tuple(tuple, ct)) |
227 | return; | 234 | return; |
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
266 | struct net *net = nf_ct_net(ct); | 273 | struct net *net = nf_ct_net(ct); |
267 | struct nf_conntrack_tuple curr_tuple, new_tuple; | 274 | struct nf_conntrack_tuple curr_tuple, new_tuple; |
268 | struct nf_conn_nat *nat; | 275 | struct nf_conn_nat *nat; |
269 | int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); | ||
270 | 276 | ||
271 | /* nat helper or nfctnetlink also setup binding */ | 277 | /* nat helper or nfctnetlink also setup binding */ |
272 | nat = nfct_nat(ct); | 278 | nat = nfct_nat(ct); |
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
306 | ct->status |= IPS_DST_NAT; | 312 | ct->status |= IPS_DST_NAT; |
307 | } | 313 | } |
308 | 314 | ||
309 | /* Place in source hash if this is the first time. */ | 315 | if (maniptype == IP_NAT_MANIP_SRC) { |
310 | if (have_to_hash) { | ||
311 | unsigned int srchash; | 316 | unsigned int srchash; |
312 | 317 | ||
313 | srchash = hash_by_src(net, nf_ct_zone(ct), | 318 | srchash = hash_by_src(net, nf_ct_zone(ct), |
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
323 | 328 | ||
324 | /* It's done. */ | 329 | /* It's done. */ |
325 | if (maniptype == IP_NAT_MANIP_DST) | 330 | if (maniptype == IP_NAT_MANIP_DST) |
326 | set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 331 | ct->status |= IPS_DST_NAT_DONE; |
327 | else | 332 | else |
328 | set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 333 | ct->status |= IPS_SRC_NAT_DONE; |
329 | 334 | ||
330 | return NF_ACCEPT; | 335 | return NF_ACCEPT; |
331 | } | 336 | } |
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
502 | int ret = 0; | 507 | int ret = 0; |
503 | 508 | ||
504 | spin_lock_bh(&nf_nat_lock); | 509 | spin_lock_bh(&nf_nat_lock); |
505 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 510 | if (rcu_dereference_protected( |
511 | nf_nat_protos[proto->protonum], | ||
512 | lockdep_is_held(&nf_nat_lock) | ||
513 | ) != &nf_nat_unknown_protocol) { | ||
506 | ret = -EBUSY; | 514 | ret = -EBUSY; |
507 | goto out; | 515 | goto out; |
508 | } | 516 | } |
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | |||
532 | if (nat == NULL || nat->ct == NULL) | 540 | if (nat == NULL || nat->ct == NULL) |
533 | return; | 541 | return; |
534 | 542 | ||
535 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); | 543 | NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); |
536 | 544 | ||
537 | spin_lock_bh(&nf_nat_lock); | 545 | spin_lock_bh(&nf_nat_lock); |
538 | hlist_del_rcu(&nat->bysource); | 546 | hlist_del_rcu(&nat->bysource); |
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old) | |||
545 | struct nf_conn_nat *old_nat = old; | 553 | struct nf_conn_nat *old_nat = old; |
546 | struct nf_conn *ct = old_nat->ct; | 554 | struct nf_conn *ct = old_nat->ct; |
547 | 555 | ||
548 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) | 556 | if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) |
549 | return; | 557 | return; |
550 | 558 | ||
551 | spin_lock_bh(&nf_nat_lock); | 559 | spin_lock_bh(&nf_nat_lock); |
552 | new_nat->ct = ct; | ||
553 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | 560 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); |
554 | spin_unlock_bh(&nf_nat_lock); | 561 | spin_unlock_bh(&nf_nat_lock); |
555 | } | 562 | } |
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net) | |||
679 | { | 686 | { |
680 | /* Leave them the same for the moment. */ | 687 | /* Leave them the same for the moment. */ |
681 | net->ipv4.nat_htable_size = net->ct.htable_size; | 688 | net->ipv4.nat_htable_size = net->ct.htable_size; |
682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | 689 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); |
683 | &net->ipv4.nat_vmalloced, 0); | ||
684 | if (!net->ipv4.nat_bysource) | 690 | if (!net->ipv4.nat_bysource) |
685 | return -ENOMEM; | 691 | return -ENOMEM; |
686 | return 0; | 692 | return 0; |
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
702 | { | 708 | { |
703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 709 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
704 | synchronize_rcu(); | 710 | synchronize_rcu(); |
705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 711 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); |
706 | net->ipv4.nat_htable_size); | ||
707 | } | 712 | } |
708 | 713 | ||
709 | static struct pernet_operations nf_nat_net_ops = { | 714 | static struct pernet_operations nf_nat_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419d0a56..8812a02078ab 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <net/netfilter/nf_conntrack_expect.h> | 54 | #include <net/netfilter/nf_conntrack_expect.h> |
55 | #include <net/netfilter/nf_conntrack_helper.h> | 55 | #include <net/netfilter/nf_conntrack_helper.h> |
56 | #include <net/netfilter/nf_nat_helper.h> | 56 | #include <net/netfilter/nf_nat_helper.h> |
57 | #include <linux/netfilter/nf_conntrack_snmp.h> | ||
57 | 58 | ||
58 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
59 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 60 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1310 | { | 1311 | { |
1311 | int ret = 0; | 1312 | int ret = 0; |
1312 | 1313 | ||
1313 | ret = nf_conntrack_helper_register(&snmp_helper); | 1314 | BUG_ON(nf_nat_snmp_hook != NULL); |
1314 | if (ret < 0) | 1315 | rcu_assign_pointer(nf_nat_snmp_hook, help); |
1315 | return ret; | 1316 | |
1316 | ret = nf_conntrack_helper_register(&snmp_trap_helper); | 1317 | ret = nf_conntrack_helper_register(&snmp_trap_helper); |
1317 | if (ret < 0) { | 1318 | if (ret < 0) { |
1318 | nf_conntrack_helper_unregister(&snmp_helper); | 1319 | nf_conntrack_helper_unregister(&snmp_helper); |
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1323 | 1324 | ||
1324 | static void __exit nf_nat_snmp_basic_fini(void) | 1325 | static void __exit nf_nat_snmp_basic_fini(void) |
1325 | { | 1326 | { |
1326 | nf_conntrack_helper_unregister(&snmp_helper); | 1327 | rcu_assign_pointer(nf_nat_snmp_hook, NULL); |
1327 | nf_conntrack_helper_unregister(&snmp_trap_helper); | 1328 | nf_conntrack_helper_unregister(&snmp_trap_helper); |
1328 | } | 1329 | } |
1329 | 1330 | ||
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 95481fee8bdb..7317bdf1d457 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #ifdef CONFIG_XFRM | 31 | #ifdef CONFIG_XFRM |
32 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | 32 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) |
33 | { | 33 | { |
34 | struct flowi4 *fl4 = &fl->u.ip4; | ||
34 | const struct nf_conn *ct; | 35 | const struct nf_conn *ct; |
35 | const struct nf_conntrack_tuple *t; | 36 | const struct nf_conntrack_tuple *t; |
36 | enum ip_conntrack_info ctinfo; | 37 | enum ip_conntrack_info ctinfo; |
@@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | |||
49 | statusbit = IPS_SRC_NAT; | 50 | statusbit = IPS_SRC_NAT; |
50 | 51 | ||
51 | if (ct->status & statusbit) { | 52 | if (ct->status & statusbit) { |
52 | fl->fl4_dst = t->dst.u3.ip; | 53 | fl4->daddr = t->dst.u3.ip; |
53 | if (t->dst.protonum == IPPROTO_TCP || | 54 | if (t->dst.protonum == IPPROTO_TCP || |
54 | t->dst.protonum == IPPROTO_UDP || | 55 | t->dst.protonum == IPPROTO_UDP || |
55 | t->dst.protonum == IPPROTO_UDPLITE || | 56 | t->dst.protonum == IPPROTO_UDPLITE || |
56 | t->dst.protonum == IPPROTO_DCCP || | 57 | t->dst.protonum == IPPROTO_DCCP || |
57 | t->dst.protonum == IPPROTO_SCTP) | 58 | t->dst.protonum == IPPROTO_SCTP) |
58 | fl->fl_ip_dport = t->dst.u.tcp.port; | 59 | fl4->fl4_dport = t->dst.u.tcp.port; |
59 | } | 60 | } |
60 | 61 | ||
61 | statusbit ^= IPS_NAT_MASK; | 62 | statusbit ^= IPS_NAT_MASK; |
62 | 63 | ||
63 | if (ct->status & statusbit) { | 64 | if (ct->status & statusbit) { |
64 | fl->fl4_src = t->src.u3.ip; | 65 | fl4->saddr = t->src.u3.ip; |
65 | if (t->dst.protonum == IPPROTO_TCP || | 66 | if (t->dst.protonum == IPPROTO_TCP || |
66 | t->dst.protonum == IPPROTO_UDP || | 67 | t->dst.protonum == IPPROTO_UDP || |
67 | t->dst.protonum == IPPROTO_UDPLITE || | 68 | t->dst.protonum == IPPROTO_UDPLITE || |
68 | t->dst.protonum == IPPROTO_DCCP || | 69 | t->dst.protonum == IPPROTO_DCCP || |
69 | t->dst.protonum == IPPROTO_SCTP) | 70 | t->dst.protonum == IPPROTO_SCTP) |
70 | fl->fl_ip_sport = t->src.u.tcp.port; | 71 | fl4->fl4_sport = t->src.u.tcp.port; |
71 | } | 72 | } |
72 | } | 73 | } |
73 | #endif | 74 | #endif |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6390ba299b3d..e837ffd3edc3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -402,7 +402,7 @@ error: | |||
402 | return err; | 402 | return err; |
403 | } | 403 | } |
404 | 404 | ||
405 | static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | 405 | static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) |
406 | { | 406 | { |
407 | struct iovec *iov; | 407 | struct iovec *iov; |
408 | u8 __user *type = NULL; | 408 | u8 __user *type = NULL; |
@@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
418 | if (!iov) | 418 | if (!iov) |
419 | continue; | 419 | continue; |
420 | 420 | ||
421 | switch (fl->proto) { | 421 | switch (fl4->flowi4_proto) { |
422 | case IPPROTO_ICMP: | 422 | case IPPROTO_ICMP: |
423 | /* check if one-byte field is readable or not. */ | 423 | /* check if one-byte field is readable or not. */ |
424 | if (iov->iov_base && iov->iov_len < 1) | 424 | if (iov->iov_base && iov->iov_len < 1) |
@@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
433 | code = iov->iov_base; | 433 | code = iov->iov_base; |
434 | 434 | ||
435 | if (type && code) { | 435 | if (type && code) { |
436 | if (get_user(fl->fl_icmp_type, type) || | 436 | if (get_user(fl4->fl4_icmp_type, type) || |
437 | get_user(fl->fl_icmp_code, code)) | 437 | get_user(fl4->fl4_icmp_code, code)) |
438 | return -EFAULT; | 438 | return -EFAULT; |
439 | probed = 1; | 439 | probed = 1; |
440 | } | 440 | } |
@@ -548,25 +548,30 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
548 | } | 548 | } |
549 | 549 | ||
550 | { | 550 | { |
551 | struct flowi fl = { .oif = ipc.oif, | 551 | struct flowi4 fl4 = { |
552 | .mark = sk->sk_mark, | 552 | .flowi4_oif = ipc.oif, |
553 | .fl4_dst = daddr, | 553 | .flowi4_mark = sk->sk_mark, |
554 | .fl4_src = saddr, | 554 | .daddr = daddr, |
555 | .fl4_tos = tos, | 555 | .saddr = saddr, |
556 | .proto = inet->hdrincl ? IPPROTO_RAW : | 556 | .flowi4_tos = tos, |
557 | sk->sk_protocol, | 557 | .flowi4_proto = (inet->hdrincl ? |
558 | }; | 558 | IPPROTO_RAW : |
559 | sk->sk_protocol), | ||
560 | .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, | ||
561 | }; | ||
559 | if (!inet->hdrincl) { | 562 | if (!inet->hdrincl) { |
560 | err = raw_probe_proto_opt(&fl, msg); | 563 | err = raw_probe_proto_opt(&fl4, msg); |
561 | if (err) | 564 | if (err) |
562 | goto done; | 565 | goto done; |
563 | } | 566 | } |
564 | 567 | ||
565 | security_sk_classify_flow(sk, &fl); | 568 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); |
566 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); | 569 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); |
570 | if (IS_ERR(rt)) { | ||
571 | err = PTR_ERR(rt); | ||
572 | goto done; | ||
573 | } | ||
567 | } | 574 | } |
568 | if (err) | ||
569 | goto done; | ||
570 | 575 | ||
571 | err = -EACCES; | 576 | err = -EACCES; |
572 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) | 577 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ed6603c2f6d..209989cf7d1b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -109,8 +109,8 @@ | |||
109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #define RT_FL_TOS(oldflp) \ | 112 | #define RT_FL_TOS(oldflp4) \ |
113 | ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
114 | 114 | ||
115 | #define IP_MAX_MTU 0xFFF0 | 115 | #define IP_MAX_MTU 0xFFF0 |
116 | 116 | ||
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | |||
131 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
133 | 133 | ||
134 | static struct delayed_work expires_work; | ||
135 | static unsigned long expires_ljiffies; | ||
136 | |||
137 | /* | 134 | /* |
138 | * Interface to generic destination cache. | 135 | * Interface to generic destination cache. |
139 | */ | 136 | */ |
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
152 | { | 149 | { |
153 | } | 150 | } |
154 | 151 | ||
152 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | ||
153 | { | ||
154 | struct rtable *rt = (struct rtable *) dst; | ||
155 | struct inet_peer *peer; | ||
156 | u32 *p = NULL; | ||
157 | |||
158 | if (!rt->peer) | ||
159 | rt_bind_peer(rt, 1); | ||
160 | |||
161 | peer = rt->peer; | ||
162 | if (peer) { | ||
163 | u32 *old_p = __DST_METRICS_PTR(old); | ||
164 | unsigned long prev, new; | ||
165 | |||
166 | p = peer->metrics; | ||
167 | if (inet_metrics_new(peer)) | ||
168 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
169 | |||
170 | new = (unsigned long) p; | ||
171 | prev = cmpxchg(&dst->_metrics, old, new); | ||
172 | |||
173 | if (prev != old) { | ||
174 | p = __DST_METRICS_PTR(prev); | ||
175 | if (prev & DST_METRICS_READ_ONLY) | ||
176 | p = NULL; | ||
177 | } else { | ||
178 | if (rt->fi) { | ||
179 | fib_info_put(rt->fi); | ||
180 | rt->fi = NULL; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return p; | ||
185 | } | ||
186 | |||
155 | static struct dst_ops ipv4_dst_ops = { | 187 | static struct dst_ops ipv4_dst_ops = { |
156 | .family = AF_INET, | 188 | .family = AF_INET, |
157 | .protocol = cpu_to_be16(ETH_P_IP), | 189 | .protocol = cpu_to_be16(ETH_P_IP), |
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
159 | .check = ipv4_dst_check, | 191 | .check = ipv4_dst_check, |
160 | .default_advmss = ipv4_default_advmss, | 192 | .default_advmss = ipv4_default_advmss, |
161 | .default_mtu = ipv4_default_mtu, | 193 | .default_mtu = ipv4_default_mtu, |
194 | .cow_metrics = ipv4_cow_metrics, | ||
162 | .destroy = ipv4_dst_destroy, | 195 | .destroy = ipv4_dst_destroy, |
163 | .ifdown = ipv4_dst_ifdown, | 196 | .ifdown = ipv4_dst_ifdown, |
164 | .negative_advice = ipv4_negative_advice, | 197 | .negative_advice = ipv4_negative_advice, |
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
391 | dst_metric(&r->dst, RTAX_WINDOW), | 424 | dst_metric(&r->dst, RTAX_WINDOW), |
392 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
393 | dst_metric(&r->dst, RTAX_RTTVAR)), | 426 | dst_metric(&r->dst, RTAX_RTTVAR)), |
394 | r->fl.fl4_tos, | 427 | r->rt_tos, |
395 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
396 | r->dst.hh ? (r->dst.hh->hh_output == | 429 | r->dst.hh ? (r->dst.hh->hh_output == |
397 | dev_queue_xmit) : 0, | 430 | dev_queue_xmit) : 0, |
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
514 | .release = seq_release, | 547 | .release = seq_release, |
515 | }; | 548 | }; |
516 | 549 | ||
517 | #ifdef CONFIG_NET_CLS_ROUTE | 550 | #ifdef CONFIG_IP_ROUTE_CLASSID |
518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 551 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
519 | { | 552 | { |
520 | struct ip_rt_acct *dst, *src; | 553 | struct ip_rt_acct *dst, *src; |
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | if (!pde) | 600 | if (!pde) |
568 | goto err2; | 601 | goto err2; |
569 | 602 | ||
570 | #ifdef CONFIG_NET_CLS_ROUTE | 603 | #ifdef CONFIG_IP_ROUTE_CLASSID |
571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 604 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
572 | if (!pde) | 605 | if (!pde) |
573 | goto err3; | 606 | goto err3; |
574 | #endif | 607 | #endif |
575 | return 0; | 608 | return 0; |
576 | 609 | ||
577 | #ifdef CONFIG_NET_CLS_ROUTE | 610 | #ifdef CONFIG_IP_ROUTE_CLASSID |
578 | err3: | 611 | err3: |
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 612 | remove_proc_entry("rt_cache", net->proc_net_stat); |
580 | #endif | 613 | #endif |
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 621 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 622 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 623 | remove_proc_entry("rt_cache", net->proc_net); |
591 | #ifdef CONFIG_NET_CLS_ROUTE | 624 | #ifdef CONFIG_IP_ROUTE_CLASSID |
592 | remove_proc_entry("rt_acct", net->proc_net); | 625 | remove_proc_entry("rt_acct", net->proc_net); |
593 | #endif | 626 | #endif |
594 | } | 627 | } |
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
632 | static inline int rt_valuable(struct rtable *rth) | 665 | static inline int rt_valuable(struct rtable *rth) |
633 | { | 666 | { |
634 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 667 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
635 | rth->dst.expires; | 668 | (rth->peer && rth->peer->pmtu_expires); |
636 | } | 669 | } |
637 | 670 | ||
638 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 671 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
643 | if (atomic_read(&rth->dst.__refcnt)) | 676 | if (atomic_read(&rth->dst.__refcnt)) |
644 | goto out; | 677 | goto out; |
645 | 678 | ||
646 | ret = 1; | ||
647 | if (rth->dst.expires && | ||
648 | time_after_eq(jiffies, rth->dst.expires)) | ||
649 | goto out; | ||
650 | |||
651 | age = jiffies - rth->dst.lastuse; | 679 | age = jiffies - rth->dst.lastuse; |
652 | ret = 0; | ||
653 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 680 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
654 | (age <= tmo2 && rt_valuable(rth))) | 681 | (age <= tmo2 && rt_valuable(rth))) |
655 | goto out; | 682 | goto out; |
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net) | |||
684 | net->ipv4.sysctl_rt_cache_rebuild_count; | 711 | net->ipv4.sysctl_rt_cache_rebuild_count; |
685 | } | 712 | } |
686 | 713 | ||
687 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 714 | static inline bool compare_hash_inputs(const struct rtable *rt1, |
688 | const struct flowi *fl2) | 715 | const struct rtable *rt2) |
689 | { | 716 | { |
690 | return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 717 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
691 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 718 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
692 | (fl1->iif ^ fl2->iif)) == 0); | 719 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); |
693 | } | 720 | } |
694 | 721 | ||
695 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 722 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
696 | { | 723 | { |
697 | return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
698 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
699 | (fl1->mark ^ fl2->mark) | | 726 | (rt1->rt_mark ^ rt2->rt_mark) | |
700 | (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | | 727 | (rt1->rt_tos ^ rt2->rt_tos) | |
701 | (fl1->oif ^ fl2->oif) | | 728 | (rt1->rt_oif ^ rt2->rt_oif) | |
702 | (fl1->iif ^ fl2->iif)) == 0; | 729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
703 | } | 730 | } |
704 | 731 | ||
705 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 732 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
@@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
786 | const struct rtable *aux = head; | 813 | const struct rtable *aux = head; |
787 | 814 | ||
788 | while (aux != rth) { | 815 | while (aux != rth) { |
789 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 816 | if (compare_hash_inputs(aux, rth)) |
790 | return 0; | 817 | return 0; |
791 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); | 818 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
792 | } | 819 | } |
793 | return ONE; | 820 | return ONE; |
794 | } | 821 | } |
795 | 822 | ||
796 | static void rt_check_expire(void) | ||
797 | { | ||
798 | static unsigned int rover; | ||
799 | unsigned int i = rover, goal; | ||
800 | struct rtable *rth; | ||
801 | struct rtable __rcu **rthp; | ||
802 | unsigned long samples = 0; | ||
803 | unsigned long sum = 0, sum2 = 0; | ||
804 | unsigned long delta; | ||
805 | u64 mult; | ||
806 | |||
807 | delta = jiffies - expires_ljiffies; | ||
808 | expires_ljiffies = jiffies; | ||
809 | mult = ((u64)delta) << rt_hash_log; | ||
810 | if (ip_rt_gc_timeout > 1) | ||
811 | do_div(mult, ip_rt_gc_timeout); | ||
812 | goal = (unsigned int)mult; | ||
813 | if (goal > rt_hash_mask) | ||
814 | goal = rt_hash_mask + 1; | ||
815 | for (; goal > 0; goal--) { | ||
816 | unsigned long tmo = ip_rt_gc_timeout; | ||
817 | unsigned long length; | ||
818 | |||
819 | i = (i + 1) & rt_hash_mask; | ||
820 | rthp = &rt_hash_table[i].chain; | ||
821 | |||
822 | if (need_resched()) | ||
823 | cond_resched(); | ||
824 | |||
825 | samples++; | ||
826 | |||
827 | if (rcu_dereference_raw(*rthp) == NULL) | ||
828 | continue; | ||
829 | length = 0; | ||
830 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
831 | while ((rth = rcu_dereference_protected(*rthp, | ||
832 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
833 | prefetch(rth->dst.rt_next); | ||
834 | if (rt_is_expired(rth)) { | ||
835 | *rthp = rth->dst.rt_next; | ||
836 | rt_free(rth); | ||
837 | continue; | ||
838 | } | ||
839 | if (rth->dst.expires) { | ||
840 | /* Entry is expired even if it is in use */ | ||
841 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
842 | nofree: | ||
843 | tmo >>= 1; | ||
844 | rthp = &rth->dst.rt_next; | ||
845 | /* | ||
846 | * We only count entries on | ||
847 | * a chain with equal hash inputs once | ||
848 | * so that entries for different QOS | ||
849 | * levels, and other non-hash input | ||
850 | * attributes don't unfairly skew | ||
851 | * the length computation | ||
852 | */ | ||
853 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
854 | continue; | ||
855 | } | ||
856 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
857 | goto nofree; | ||
858 | |||
859 | /* Cleanup aged off entries. */ | ||
860 | *rthp = rth->dst.rt_next; | ||
861 | rt_free(rth); | ||
862 | } | ||
863 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
864 | sum += length; | ||
865 | sum2 += length*length; | ||
866 | } | ||
867 | if (samples) { | ||
868 | unsigned long avg = sum / samples; | ||
869 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
870 | rt_chain_length_max = max_t(unsigned long, | ||
871 | ip_rt_gc_elasticity, | ||
872 | (avg + 4*sd) >> FRACT_BITS); | ||
873 | } | ||
874 | rover = i; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * rt_worker_func() is run in process context. | ||
879 | * we call rt_check_expire() to scan part of the hash table | ||
880 | */ | ||
881 | static void rt_worker_func(struct work_struct *work) | ||
882 | { | ||
883 | rt_check_expire(); | ||
884 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
885 | } | ||
886 | |||
887 | /* | 823 | /* |
888 | * Pertubation of rt_genid by a small quantity [1..256] | 824 | * Pertubation of rt_genid by a small quantity [1..256] |
889 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head) | |||
1078 | return length >> FRACT_BITS; | 1014 | return length >> FRACT_BITS; |
1079 | } | 1015 | } |
1080 | 1016 | ||
1081 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1017 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1082 | struct rtable **rp, struct sk_buff *skb, int ifindex) | 1018 | struct sk_buff *skb, int ifindex) |
1083 | { | 1019 | { |
1084 | struct rtable *rth, *cand; | 1020 | struct rtable *rth, *cand; |
1085 | struct rtable __rcu **rthp, **candp; | 1021 | struct rtable __rcu **rthp, **candp; |
@@ -1120,7 +1056,7 @@ restart: | |||
1120 | printk(KERN_WARNING | 1056 | printk(KERN_WARNING |
1121 | "Neighbour table failure & not caching routes.\n"); | 1057 | "Neighbour table failure & not caching routes.\n"); |
1122 | ip_rt_put(rt); | 1058 | ip_rt_put(rt); |
1123 | return err; | 1059 | return ERR_PTR(err); |
1124 | } | 1060 | } |
1125 | } | 1061 | } |
1126 | 1062 | ||
@@ -1137,7 +1073,7 @@ restart: | |||
1137 | rt_free(rth); | 1073 | rt_free(rth); |
1138 | continue; | 1074 | continue; |
1139 | } | 1075 | } |
1140 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1076 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { |
1141 | /* Put it first */ | 1077 | /* Put it first */ |
1142 | *rthp = rth->dst.rt_next; | 1078 | *rthp = rth->dst.rt_next; |
1143 | /* | 1079 | /* |
@@ -1157,11 +1093,9 @@ restart: | |||
1157 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1093 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1158 | 1094 | ||
1159 | rt_drop(rt); | 1095 | rt_drop(rt); |
1160 | if (rp) | 1096 | if (skb) |
1161 | *rp = rth; | ||
1162 | else | ||
1163 | skb_dst_set(skb, &rth->dst); | 1097 | skb_dst_set(skb, &rth->dst); |
1164 | return 0; | 1098 | return rth; |
1165 | } | 1099 | } |
1166 | 1100 | ||
1167 | if (!atomic_read(&rth->dst.__refcnt)) { | 1101 | if (!atomic_read(&rth->dst.__refcnt)) { |
@@ -1202,7 +1136,7 @@ restart: | |||
1202 | rt_emergency_hash_rebuild(net); | 1136 | rt_emergency_hash_rebuild(net); |
1203 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1137 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1204 | 1138 | ||
1205 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1139 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1206 | ifindex, rt_genid(net)); | 1140 | ifindex, rt_genid(net)); |
1207 | goto restart; | 1141 | goto restart; |
1208 | } | 1142 | } |
@@ -1218,7 +1152,7 @@ restart: | |||
1218 | 1152 | ||
1219 | if (err != -ENOBUFS) { | 1153 | if (err != -ENOBUFS) { |
1220 | rt_drop(rt); | 1154 | rt_drop(rt); |
1221 | return err; | 1155 | return ERR_PTR(err); |
1222 | } | 1156 | } |
1223 | 1157 | ||
1224 | /* Neighbour tables are full and nothing | 1158 | /* Neighbour tables are full and nothing |
@@ -1239,7 +1173,7 @@ restart: | |||
1239 | if (net_ratelimit()) | 1173 | if (net_ratelimit()) |
1240 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); | 1174 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
1241 | rt_drop(rt); | 1175 | rt_drop(rt); |
1242 | return -ENOBUFS; | 1176 | return ERR_PTR(-ENOBUFS); |
1243 | } | 1177 | } |
1244 | } | 1178 | } |
1245 | 1179 | ||
@@ -1265,11 +1199,16 @@ restart: | |||
1265 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1199 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1266 | 1200 | ||
1267 | skip_hashing: | 1201 | skip_hashing: |
1268 | if (rp) | 1202 | if (skb) |
1269 | *rp = rt; | ||
1270 | else | ||
1271 | skb_dst_set(skb, &rt->dst); | 1203 | skb_dst_set(skb, &rt->dst); |
1272 | return 0; | 1204 | return rt; |
1205 | } | ||
1206 | |||
1207 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
1208 | |||
1209 | static u32 rt_peer_genid(void) | ||
1210 | { | ||
1211 | return atomic_read(&__rt_peer_genid); | ||
1273 | } | 1212 | } |
1274 | 1213 | ||
1275 | void rt_bind_peer(struct rtable *rt, int create) | 1214 | void rt_bind_peer(struct rtable *rt, int create) |
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
1280 | 1219 | ||
1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1220 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1282 | inet_putpeer(peer); | 1221 | inet_putpeer(peer); |
1222 | else | ||
1223 | rt->rt_peer_genid = rt_peer_genid(); | ||
1283 | } | 1224 | } |
1284 | 1225 | ||
1285 | /* | 1226 | /* |
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1349 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1290 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1350 | __be32 saddr, struct net_device *dev) | 1291 | __be32 saddr, struct net_device *dev) |
1351 | { | 1292 | { |
1352 | int i, k; | ||
1353 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1293 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1354 | struct rtable *rth; | 1294 | struct inet_peer *peer; |
1355 | struct rtable __rcu **rthp; | ||
1356 | __be32 skeys[2] = { saddr, 0 }; | ||
1357 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1358 | struct netevent_redirect netevent; | ||
1359 | struct net *net; | 1295 | struct net *net; |
1360 | 1296 | ||
1361 | if (!in_dev) | 1297 | if (!in_dev) |
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1367 | ipv4_is_zeronet(new_gw)) | 1303 | ipv4_is_zeronet(new_gw)) |
1368 | goto reject_redirect; | 1304 | goto reject_redirect; |
1369 | 1305 | ||
1370 | if (!rt_caching(net)) | ||
1371 | goto reject_redirect; | ||
1372 | |||
1373 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1306 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1374 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1307 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1375 | goto reject_redirect; | 1308 | goto reject_redirect; |
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1380 | goto reject_redirect; | 1313 | goto reject_redirect; |
1381 | } | 1314 | } |
1382 | 1315 | ||
1383 | for (i = 0; i < 2; i++) { | 1316 | peer = inet_getpeer_v4(daddr, 1); |
1384 | for (k = 0; k < 2; k++) { | 1317 | if (peer) { |
1385 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1318 | peer->redirect_learned.a4 = new_gw; |
1386 | rt_genid(net)); | ||
1387 | |||
1388 | rthp = &rt_hash_table[hash].chain; | ||
1389 | |||
1390 | while ((rth = rcu_dereference(*rthp)) != NULL) { | ||
1391 | struct rtable *rt; | ||
1392 | |||
1393 | if (rth->fl.fl4_dst != daddr || | ||
1394 | rth->fl.fl4_src != skeys[i] || | ||
1395 | rth->fl.oif != ikeys[k] || | ||
1396 | rt_is_input_route(rth) || | ||
1397 | rt_is_expired(rth) || | ||
1398 | !net_eq(dev_net(rth->dst.dev), net)) { | ||
1399 | rthp = &rth->dst.rt_next; | ||
1400 | continue; | ||
1401 | } | ||
1402 | |||
1403 | if (rth->rt_dst != daddr || | ||
1404 | rth->rt_src != saddr || | ||
1405 | rth->dst.error || | ||
1406 | rth->rt_gateway != old_gw || | ||
1407 | rth->dst.dev != dev) | ||
1408 | break; | ||
1409 | |||
1410 | dst_hold(&rth->dst); | ||
1411 | |||
1412 | rt = dst_alloc(&ipv4_dst_ops); | ||
1413 | if (rt == NULL) { | ||
1414 | ip_rt_put(rth); | ||
1415 | return; | ||
1416 | } | ||
1417 | |||
1418 | /* Copy all the information. */ | ||
1419 | *rt = *rth; | ||
1420 | rt->dst.__use = 1; | ||
1421 | atomic_set(&rt->dst.__refcnt, 1); | ||
1422 | rt->dst.child = NULL; | ||
1423 | if (rt->dst.dev) | ||
1424 | dev_hold(rt->dst.dev); | ||
1425 | rt->dst.obsolete = -1; | ||
1426 | rt->dst.lastuse = jiffies; | ||
1427 | rt->dst.path = &rt->dst; | ||
1428 | rt->dst.neighbour = NULL; | ||
1429 | rt->dst.hh = NULL; | ||
1430 | #ifdef CONFIG_XFRM | ||
1431 | rt->dst.xfrm = NULL; | ||
1432 | #endif | ||
1433 | rt->rt_genid = rt_genid(net); | ||
1434 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1435 | |||
1436 | /* Gateway is different ... */ | ||
1437 | rt->rt_gateway = new_gw; | ||
1438 | |||
1439 | /* Redirect received -> path was valid */ | ||
1440 | dst_confirm(&rth->dst); | ||
1441 | |||
1442 | if (rt->peer) | ||
1443 | atomic_inc(&rt->peer->refcnt); | ||
1444 | |||
1445 | if (arp_bind_neighbour(&rt->dst) || | ||
1446 | !(rt->dst.neighbour->nud_state & | ||
1447 | NUD_VALID)) { | ||
1448 | if (rt->dst.neighbour) | ||
1449 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1450 | ip_rt_put(rth); | ||
1451 | rt_drop(rt); | ||
1452 | goto do_next; | ||
1453 | } | ||
1454 | 1319 | ||
1455 | netevent.old = &rth->dst; | 1320 | inet_putpeer(peer); |
1456 | netevent.new = &rt->dst; | ||
1457 | call_netevent_notifiers(NETEVENT_REDIRECT, | ||
1458 | &netevent); | ||
1459 | 1321 | ||
1460 | rt_del(hash, rth); | 1322 | atomic_inc(&__rt_peer_genid); |
1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) | ||
1462 | ip_rt_put(rt); | ||
1463 | goto do_next; | ||
1464 | } | ||
1465 | do_next: | ||
1466 | ; | ||
1467 | } | ||
1468 | } | 1323 | } |
1469 | return; | 1324 | return; |
1470 | 1325 | ||
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1488 | if (dst->obsolete > 0) { | 1343 | if (dst->obsolete > 0) { |
1489 | ip_rt_put(rt); | 1344 | ip_rt_put(rt); |
1490 | ret = NULL; | 1345 | ret = NULL; |
1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1346 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1492 | (rt->dst.expires && | 1347 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1493 | time_after_eq(jiffies, rt->dst.expires))) { | 1348 | rt->rt_oif, |
1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
1495 | rt->fl.oif, | ||
1496 | rt_genid(dev_net(dst->dev))); | 1349 | rt_genid(dev_net(dst->dev))); |
1497 | #if RT_CACHE_DEBUG >= 1 | 1350 | #if RT_CACHE_DEBUG >= 1 |
1498 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", | 1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", |
1499 | &rt->rt_dst, rt->fl.fl4_tos); | 1352 | &rt->rt_dst, rt->rt_tos); |
1500 | #endif | 1353 | #endif |
1501 | rt_del(hash, rt); | 1354 | rt_del(hash, rt); |
1502 | ret = NULL; | 1355 | ret = NULL; |
1356 | } else if (rt->peer && | ||
1357 | rt->peer->pmtu_expires && | ||
1358 | time_after_eq(jiffies, rt->peer->pmtu_expires)) { | ||
1359 | unsigned long orig = rt->peer->pmtu_expires; | ||
1360 | |||
1361 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1362 | dst_metric_set(dst, RTAX_MTU, | ||
1363 | rt->peer->pmtu_orig); | ||
1503 | } | 1364 | } |
1504 | } | 1365 | } |
1505 | return ret; | 1366 | return ret; |
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1525 | { | 1386 | { |
1526 | struct rtable *rt = skb_rtable(skb); | 1387 | struct rtable *rt = skb_rtable(skb); |
1527 | struct in_device *in_dev; | 1388 | struct in_device *in_dev; |
1389 | struct inet_peer *peer; | ||
1528 | int log_martians; | 1390 | int log_martians; |
1529 | 1391 | ||
1530 | rcu_read_lock(); | 1392 | rcu_read_lock(); |
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1536 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1398 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1537 | rcu_read_unlock(); | 1399 | rcu_read_unlock(); |
1538 | 1400 | ||
1401 | if (!rt->peer) | ||
1402 | rt_bind_peer(rt, 1); | ||
1403 | peer = rt->peer; | ||
1404 | if (!peer) { | ||
1405 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
1406 | return; | ||
1407 | } | ||
1408 | |||
1539 | /* No redirected packets during ip_rt_redirect_silence; | 1409 | /* No redirected packets during ip_rt_redirect_silence; |
1540 | * reset the algorithm. | 1410 | * reset the algorithm. |
1541 | */ | 1411 | */ |
1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1412 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1543 | rt->dst.rate_tokens = 0; | 1413 | peer->rate_tokens = 0; |
1544 | 1414 | ||
1545 | /* Too many ignored redirects; do not send anything | 1415 | /* Too many ignored redirects; do not send anything |
1546 | * set dst.rate_last to the last seen redirected packet. | 1416 | * set dst.rate_last to the last seen redirected packet. |
1547 | */ | 1417 | */ |
1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1418 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1549 | rt->dst.rate_last = jiffies; | 1419 | peer->rate_last = jiffies; |
1550 | return; | 1420 | return; |
1551 | } | 1421 | } |
1552 | 1422 | ||
1553 | /* Check for load limit; set rate_last to the latest sent | 1423 | /* Check for load limit; set rate_last to the latest sent |
1554 | * redirect. | 1424 | * redirect. |
1555 | */ | 1425 | */ |
1556 | if (rt->dst.rate_tokens == 0 || | 1426 | if (peer->rate_tokens == 0 || |
1557 | time_after(jiffies, | 1427 | time_after(jiffies, |
1558 | (rt->dst.rate_last + | 1428 | (peer->rate_last + |
1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1429 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1430 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1561 | rt->dst.rate_last = jiffies; | 1431 | peer->rate_last = jiffies; |
1562 | ++rt->dst.rate_tokens; | 1432 | ++peer->rate_tokens; |
1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1433 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1564 | if (log_martians && | 1434 | if (log_martians && |
1565 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1435 | peer->rate_tokens == ip_rt_redirect_number && |
1566 | net_ratelimit()) | 1436 | net_ratelimit()) |
1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1437 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1568 | &rt->rt_src, rt->rt_iif, | 1438 | &rt->rt_src, rt->rt_iif, |
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1574 | static int ip_error(struct sk_buff *skb) | 1444 | static int ip_error(struct sk_buff *skb) |
1575 | { | 1445 | { |
1576 | struct rtable *rt = skb_rtable(skb); | 1446 | struct rtable *rt = skb_rtable(skb); |
1447 | struct inet_peer *peer; | ||
1577 | unsigned long now; | 1448 | unsigned long now; |
1449 | bool send; | ||
1578 | int code; | 1450 | int code; |
1579 | 1451 | ||
1580 | switch (rt->dst.error) { | 1452 | switch (rt->dst.error) { |
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb) | |||
1594 | break; | 1466 | break; |
1595 | } | 1467 | } |
1596 | 1468 | ||
1597 | now = jiffies; | 1469 | if (!rt->peer) |
1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1470 | rt_bind_peer(rt, 1); |
1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1471 | peer = rt->peer; |
1600 | rt->dst.rate_tokens = ip_rt_error_burst; | 1472 | |
1601 | rt->dst.rate_last = now; | 1473 | send = true; |
1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1474 | if (peer) { |
1603 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1475 | now = jiffies; |
1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1476 | peer->rate_tokens += now - peer->rate_last; |
1477 | if (peer->rate_tokens > ip_rt_error_burst) | ||
1478 | peer->rate_tokens = ip_rt_error_burst; | ||
1479 | peer->rate_last = now; | ||
1480 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
1481 | peer->rate_tokens -= ip_rt_error_cost; | ||
1482 | else | ||
1483 | send = false; | ||
1605 | } | 1484 | } |
1485 | if (send) | ||
1486 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | ||
1606 | 1487 | ||
1607 | out: kfree_skb(skb); | 1488 | out: kfree_skb(skb); |
1608 | return 0; | 1489 | return 0; |
@@ -1630,88 +1511,142 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1630 | unsigned short new_mtu, | 1511 | unsigned short new_mtu, |
1631 | struct net_device *dev) | 1512 | struct net_device *dev) |
1632 | { | 1513 | { |
1633 | int i, k; | ||
1634 | unsigned short old_mtu = ntohs(iph->tot_len); | 1514 | unsigned short old_mtu = ntohs(iph->tot_len); |
1635 | struct rtable *rth; | ||
1636 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1637 | __be32 skeys[2] = { iph->saddr, 0, }; | ||
1638 | __be32 daddr = iph->daddr; | ||
1639 | unsigned short est_mtu = 0; | 1515 | unsigned short est_mtu = 0; |
1516 | struct inet_peer *peer; | ||
1640 | 1517 | ||
1641 | for (k = 0; k < 2; k++) { | 1518 | peer = inet_getpeer_v4(iph->daddr, 1); |
1642 | for (i = 0; i < 2; i++) { | 1519 | if (peer) { |
1643 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1520 | unsigned short mtu = new_mtu; |
1644 | rt_genid(net)); | ||
1645 | |||
1646 | rcu_read_lock(); | ||
1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | ||
1648 | rth = rcu_dereference(rth->dst.rt_next)) { | ||
1649 | unsigned short mtu = new_mtu; | ||
1650 | |||
1651 | if (rth->fl.fl4_dst != daddr || | ||
1652 | rth->fl.fl4_src != skeys[i] || | ||
1653 | rth->rt_dst != daddr || | ||
1654 | rth->rt_src != iph->saddr || | ||
1655 | rth->fl.oif != ikeys[k] || | ||
1656 | rt_is_input_route(rth) || | ||
1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || | ||
1658 | !net_eq(dev_net(rth->dst.dev), net) || | ||
1659 | rt_is_expired(rth)) | ||
1660 | continue; | ||
1661 | 1521 | ||
1662 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1522 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
1523 | /* BSD 4.2 derived systems incorrectly adjust | ||
1524 | * tot_len by the IP header length, and report | ||
1525 | * a zero MTU in the ICMP message. | ||
1526 | */ | ||
1527 | if (mtu == 0 && | ||
1528 | old_mtu >= 68 + (iph->ihl << 2)) | ||
1529 | old_mtu -= iph->ihl << 2; | ||
1530 | mtu = guess_mtu(old_mtu); | ||
1531 | } | ||
1663 | 1532 | ||
1664 | /* BSD 4.2 compatibility hack :-( */ | 1533 | if (mtu < ip_rt_min_pmtu) |
1665 | if (mtu == 0 && | 1534 | mtu = ip_rt_min_pmtu; |
1666 | old_mtu >= dst_mtu(&rth->dst) && | 1535 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1667 | old_mtu >= 68 + (iph->ihl << 2)) | 1536 | unsigned long pmtu_expires; |
1668 | old_mtu -= iph->ihl << 2; | ||
1669 | 1537 | ||
1670 | mtu = guess_mtu(old_mtu); | 1538 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
1671 | } | 1539 | if (!pmtu_expires) |
1672 | if (mtu <= dst_mtu(&rth->dst)) { | 1540 | pmtu_expires = 1UL; |
1673 | if (mtu < dst_mtu(&rth->dst)) { | 1541 | |
1674 | dst_confirm(&rth->dst); | 1542 | est_mtu = mtu; |
1675 | if (mtu < ip_rt_min_pmtu) { | 1543 | peer->pmtu_learned = mtu; |
1676 | u32 lock = dst_metric(&rth->dst, | 1544 | peer->pmtu_expires = pmtu_expires; |
1677 | RTAX_LOCK); | ||
1678 | mtu = ip_rt_min_pmtu; | ||
1679 | lock |= (1 << RTAX_MTU); | ||
1680 | dst_metric_set(&rth->dst, RTAX_LOCK, | ||
1681 | lock); | ||
1682 | } | ||
1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); | ||
1684 | dst_set_expires(&rth->dst, | ||
1685 | ip_rt_mtu_expires); | ||
1686 | } | ||
1687 | est_mtu = mtu; | ||
1688 | } | ||
1689 | } | ||
1690 | rcu_read_unlock(); | ||
1691 | } | 1545 | } |
1546 | |||
1547 | inet_putpeer(peer); | ||
1548 | |||
1549 | atomic_inc(&__rt_peer_genid); | ||
1692 | } | 1550 | } |
1693 | return est_mtu ? : new_mtu; | 1551 | return est_mtu ? : new_mtu; |
1694 | } | 1552 | } |
1695 | 1553 | ||
1554 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
1555 | { | ||
1556 | unsigned long expires = peer->pmtu_expires; | ||
1557 | |||
1558 | if (time_before(jiffies, expires)) { | ||
1559 | u32 orig_dst_mtu = dst_mtu(dst); | ||
1560 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
1561 | if (!peer->pmtu_orig) | ||
1562 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
1563 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
1564 | } | ||
1565 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
1566 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1567 | } | ||
1568 | |||
1696 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1569 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1697 | { | 1570 | { |
1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1571 | struct rtable *rt = (struct rtable *) dst; |
1699 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1572 | struct inet_peer *peer; |
1700 | if (mtu < ip_rt_min_pmtu) { | 1573 | |
1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | 1574 | dst_confirm(dst); |
1575 | |||
1576 | if (!rt->peer) | ||
1577 | rt_bind_peer(rt, 1); | ||
1578 | peer = rt->peer; | ||
1579 | if (peer) { | ||
1580 | if (mtu < ip_rt_min_pmtu) | ||
1702 | mtu = ip_rt_min_pmtu; | 1581 | mtu = ip_rt_min_pmtu; |
1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); | 1582 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1583 | unsigned long pmtu_expires; | ||
1584 | |||
1585 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1586 | if (!pmtu_expires) | ||
1587 | pmtu_expires = 1UL; | ||
1588 | |||
1589 | peer->pmtu_learned = mtu; | ||
1590 | peer->pmtu_expires = pmtu_expires; | ||
1591 | |||
1592 | atomic_inc(&__rt_peer_genid); | ||
1593 | rt->rt_peer_genid = rt_peer_genid(); | ||
1704 | } | 1594 | } |
1705 | dst_metric_set(dst, RTAX_MTU, mtu); | 1595 | check_peer_pmtu(dst, peer); |
1706 | dst_set_expires(dst, ip_rt_mtu_expires); | 1596 | |
1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1597 | inet_putpeer(peer); |
1598 | } | ||
1599 | } | ||
1600 | |||
1601 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1602 | { | ||
1603 | struct rtable *rt = (struct rtable *) dst; | ||
1604 | __be32 orig_gw = rt->rt_gateway; | ||
1605 | |||
1606 | dst_confirm(&rt->dst); | ||
1607 | |||
1608 | neigh_release(rt->dst.neighbour); | ||
1609 | rt->dst.neighbour = NULL; | ||
1610 | |||
1611 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1612 | if (arp_bind_neighbour(&rt->dst) || | ||
1613 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
1614 | if (rt->dst.neighbour) | ||
1615 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1616 | rt->rt_gateway = orig_gw; | ||
1617 | return -EAGAIN; | ||
1618 | } else { | ||
1619 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1620 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
1621 | rt->dst.neighbour); | ||
1708 | } | 1622 | } |
1623 | return 0; | ||
1709 | } | 1624 | } |
1710 | 1625 | ||
1711 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1626 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1712 | { | 1627 | { |
1713 | if (rt_is_expired((struct rtable *)dst)) | 1628 | struct rtable *rt = (struct rtable *) dst; |
1629 | |||
1630 | if (rt_is_expired(rt)) | ||
1714 | return NULL; | 1631 | return NULL; |
1632 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
1633 | struct inet_peer *peer; | ||
1634 | |||
1635 | if (!rt->peer) | ||
1636 | rt_bind_peer(rt, 0); | ||
1637 | |||
1638 | peer = rt->peer; | ||
1639 | if (peer && peer->pmtu_expires) | ||
1640 | check_peer_pmtu(dst, peer); | ||
1641 | |||
1642 | if (peer && peer->redirect_learned.a4 && | ||
1643 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1644 | if (check_peer_redir(dst, peer)) | ||
1645 | return NULL; | ||
1646 | } | ||
1647 | |||
1648 | rt->rt_peer_genid = rt_peer_genid(); | ||
1649 | } | ||
1715 | return dst; | 1650 | return dst; |
1716 | } | 1651 | } |
1717 | 1652 | ||
@@ -1720,6 +1655,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1720 | struct rtable *rt = (struct rtable *) dst; | 1655 | struct rtable *rt = (struct rtable *) dst; |
1721 | struct inet_peer *peer = rt->peer; | 1656 | struct inet_peer *peer = rt->peer; |
1722 | 1657 | ||
1658 | if (rt->fi) { | ||
1659 | fib_info_put(rt->fi); | ||
1660 | rt->fi = NULL; | ||
1661 | } | ||
1723 | if (peer) { | 1662 | if (peer) { |
1724 | rt->peer = NULL; | 1663 | rt->peer = NULL; |
1725 | inet_putpeer(peer); | 1664 | inet_putpeer(peer); |
@@ -1734,8 +1673,14 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1734 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1673 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1735 | 1674 | ||
1736 | rt = skb_rtable(skb); | 1675 | rt = skb_rtable(skb); |
1737 | if (rt) | 1676 | if (rt && |
1738 | dst_set_expires(&rt->dst, 0); | 1677 | rt->peer && |
1678 | rt->peer->pmtu_expires) { | ||
1679 | unsigned long orig = rt->peer->pmtu_expires; | ||
1680 | |||
1681 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1682 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | ||
1683 | } | ||
1739 | } | 1684 | } |
1740 | 1685 | ||
1741 | static int ip_rt_bug(struct sk_buff *skb) | 1686 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1764,8 +1709,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1764 | if (rt_is_output_route(rt)) | 1709 | if (rt_is_output_route(rt)) |
1765 | src = rt->rt_src; | 1710 | src = rt->rt_src; |
1766 | else { | 1711 | else { |
1712 | struct flowi4 fl4 = { | ||
1713 | .daddr = rt->rt_key_dst, | ||
1714 | .saddr = rt->rt_key_src, | ||
1715 | .flowi4_tos = rt->rt_tos, | ||
1716 | .flowi4_oif = rt->rt_oif, | ||
1717 | .flowi4_iif = rt->rt_iif, | ||
1718 | .flowi4_mark = rt->rt_mark, | ||
1719 | }; | ||
1720 | |||
1767 | rcu_read_lock(); | 1721 | rcu_read_lock(); |
1768 | if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) | 1722 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
1769 | src = FIB_RES_PREFSRC(res); | 1723 | src = FIB_RES_PREFSRC(res); |
1770 | else | 1724 | else |
1771 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1725 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
@@ -1775,7 +1729,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1775 | memcpy(addr, &src, 4); | 1729 | memcpy(addr, &src, 4); |
1776 | } | 1730 | } |
1777 | 1731 | ||
1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1732 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1733 | static void set_class_tag(struct rtable *rt, u32 tag) |
1780 | { | 1734 | { |
1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1735 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1815,17 +1769,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | |||
1815 | return mtu; | 1769 | return mtu; |
1816 | } | 1770 | } |
1817 | 1771 | ||
1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1772 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, |
1773 | struct fib_info *fi) | ||
1774 | { | ||
1775 | struct inet_peer *peer; | ||
1776 | int create = 0; | ||
1777 | |||
1778 | /* If a peer entry exists for this destination, we must hook | ||
1779 | * it up in order to get at cached metrics. | ||
1780 | */ | ||
1781 | if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | ||
1782 | create = 1; | ||
1783 | |||
1784 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | ||
1785 | if (peer) { | ||
1786 | rt->rt_peer_genid = rt_peer_genid(); | ||
1787 | if (inet_metrics_new(peer)) | ||
1788 | memcpy(peer->metrics, fi->fib_metrics, | ||
1789 | sizeof(u32) * RTAX_MAX); | ||
1790 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
1791 | |||
1792 | if (peer->pmtu_expires) | ||
1793 | check_peer_pmtu(&rt->dst, peer); | ||
1794 | if (peer->redirect_learned.a4 && | ||
1795 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1796 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1797 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1798 | } | ||
1799 | } else { | ||
1800 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1801 | rt->fi = fi; | ||
1802 | atomic_inc(&fi->fib_clntref); | ||
1803 | } | ||
1804 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1805 | } | ||
1806 | } | ||
1807 | |||
1808 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | ||
1809 | const struct fib_result *res, | ||
1810 | struct fib_info *fi, u16 type, u32 itag) | ||
1819 | { | 1811 | { |
1820 | struct dst_entry *dst = &rt->dst; | 1812 | struct dst_entry *dst = &rt->dst; |
1821 | struct fib_info *fi = res->fi; | ||
1822 | 1813 | ||
1823 | if (fi) { | 1814 | if (fi) { |
1824 | if (FIB_RES_GW(*res) && | 1815 | if (FIB_RES_GW(*res) && |
1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1816 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1817 | rt->rt_gateway = FIB_RES_GW(*res); |
1827 | dst_import_metrics(dst, fi->fib_metrics); | 1818 | rt_init_metrics(rt, oldflp4, fi); |
1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1819 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1820 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1830 | #endif | 1821 | #endif |
1831 | } | 1822 | } |
@@ -1835,13 +1826,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1826 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1827 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1837 | 1828 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1829 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1830 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1831 | set_class_tag(rt, fib_rules_tclass(res)); |
1841 | #endif | 1832 | #endif |
1842 | set_class_tag(rt, itag); | 1833 | set_class_tag(rt, itag); |
1843 | #endif | 1834 | #endif |
1844 | rt->rt_type = res->type; | 1835 | rt->rt_type = type; |
1836 | } | ||
1837 | |||
1838 | static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) | ||
1839 | { | ||
1840 | struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); | ||
1841 | if (rt) { | ||
1842 | rt->dst.obsolete = -1; | ||
1843 | |||
1844 | rt->dst.flags = DST_HOST | | ||
1845 | (nopolicy ? DST_NOPOLICY : 0) | | ||
1846 | (noxfrm ? DST_NOXFRM : 0); | ||
1847 | } | ||
1848 | return rt; | ||
1845 | } | 1849 | } |
1846 | 1850 | ||
1847 | /* called in rcu_read_lock() section */ | 1851 | /* called in rcu_read_lock() section */ |
@@ -1874,31 +1878,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1874 | if (err < 0) | 1878 | if (err < 0) |
1875 | goto e_err; | 1879 | goto e_err; |
1876 | } | 1880 | } |
1877 | rth = dst_alloc(&ipv4_dst_ops); | 1881 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
1878 | if (!rth) | 1882 | if (!rth) |
1879 | goto e_nobufs; | 1883 | goto e_nobufs; |
1880 | 1884 | ||
1881 | rth->dst.output = ip_rt_bug; | 1885 | rth->dst.output = ip_rt_bug; |
1882 | rth->dst.obsolete = -1; | ||
1883 | 1886 | ||
1884 | atomic_set(&rth->dst.__refcnt, 1); | 1887 | rth->rt_key_dst = daddr; |
1885 | rth->dst.flags= DST_HOST; | ||
1886 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
1887 | rth->dst.flags |= DST_NOPOLICY; | ||
1888 | rth->fl.fl4_dst = daddr; | ||
1889 | rth->rt_dst = daddr; | 1888 | rth->rt_dst = daddr; |
1890 | rth->fl.fl4_tos = tos; | 1889 | rth->rt_tos = tos; |
1891 | rth->fl.mark = skb->mark; | 1890 | rth->rt_mark = skb->mark; |
1892 | rth->fl.fl4_src = saddr; | 1891 | rth->rt_key_src = saddr; |
1893 | rth->rt_src = saddr; | 1892 | rth->rt_src = saddr; |
1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1893 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1895 | rth->dst.tclassid = itag; | 1894 | rth->dst.tclassid = itag; |
1896 | #endif | 1895 | #endif |
1897 | rth->rt_iif = | 1896 | rth->rt_iif = dev->ifindex; |
1898 | rth->fl.iif = dev->ifindex; | ||
1899 | rth->dst.dev = init_net.loopback_dev; | 1897 | rth->dst.dev = init_net.loopback_dev; |
1900 | dev_hold(rth->dst.dev); | 1898 | dev_hold(rth->dst.dev); |
1901 | rth->fl.oif = 0; | 1899 | rth->rt_oif = 0; |
1902 | rth->rt_gateway = daddr; | 1900 | rth->rt_gateway = daddr; |
1903 | rth->rt_spec_dst= spec_dst; | 1901 | rth->rt_spec_dst= spec_dst; |
1904 | rth->rt_genid = rt_genid(dev_net(dev)); | 1902 | rth->rt_genid = rt_genid(dev_net(dev)); |
@@ -1916,7 +1914,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1916 | RT_CACHE_STAT_INC(in_slow_mc); | 1914 | RT_CACHE_STAT_INC(in_slow_mc); |
1917 | 1915 | ||
1918 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1916 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1919 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); | 1917 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); |
1918 | err = 0; | ||
1919 | if (IS_ERR(rth)) | ||
1920 | err = PTR_ERR(rth); | ||
1920 | 1921 | ||
1921 | e_nobufs: | 1922 | e_nobufs: |
1922 | return -ENOBUFS; | 1923 | return -ENOBUFS; |
@@ -1959,7 +1960,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1959 | 1960 | ||
1960 | /* called in rcu_read_lock() section */ | 1961 | /* called in rcu_read_lock() section */ |
1961 | static int __mkroute_input(struct sk_buff *skb, | 1962 | static int __mkroute_input(struct sk_buff *skb, |
1962 | struct fib_result *res, | 1963 | const struct fib_result *res, |
1963 | struct in_device *in_dev, | 1964 | struct in_device *in_dev, |
1964 | __be32 daddr, __be32 saddr, u32 tos, | 1965 | __be32 daddr, __be32 saddr, u32 tos, |
1965 | struct rtable **result) | 1966 | struct rtable **result) |
@@ -2013,39 +2014,31 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2013 | } | 2014 | } |
2014 | } | 2015 | } |
2015 | 2016 | ||
2016 | 2017 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2017 | rth = dst_alloc(&ipv4_dst_ops); | 2018 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
2018 | if (!rth) { | 2019 | if (!rth) { |
2019 | err = -ENOBUFS; | 2020 | err = -ENOBUFS; |
2020 | goto cleanup; | 2021 | goto cleanup; |
2021 | } | 2022 | } |
2022 | 2023 | ||
2023 | atomic_set(&rth->dst.__refcnt, 1); | 2024 | rth->rt_key_dst = daddr; |
2024 | rth->dst.flags= DST_HOST; | ||
2025 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2026 | rth->dst.flags |= DST_NOPOLICY; | ||
2027 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | ||
2028 | rth->dst.flags |= DST_NOXFRM; | ||
2029 | rth->fl.fl4_dst = daddr; | ||
2030 | rth->rt_dst = daddr; | 2025 | rth->rt_dst = daddr; |
2031 | rth->fl.fl4_tos = tos; | 2026 | rth->rt_tos = tos; |
2032 | rth->fl.mark = skb->mark; | 2027 | rth->rt_mark = skb->mark; |
2033 | rth->fl.fl4_src = saddr; | 2028 | rth->rt_key_src = saddr; |
2034 | rth->rt_src = saddr; | 2029 | rth->rt_src = saddr; |
2035 | rth->rt_gateway = daddr; | 2030 | rth->rt_gateway = daddr; |
2036 | rth->rt_iif = | 2031 | rth->rt_iif = in_dev->dev->ifindex; |
2037 | rth->fl.iif = in_dev->dev->ifindex; | ||
2038 | rth->dst.dev = (out_dev)->dev; | 2032 | rth->dst.dev = (out_dev)->dev; |
2039 | dev_hold(rth->dst.dev); | 2033 | dev_hold(rth->dst.dev); |
2040 | rth->fl.oif = 0; | 2034 | rth->rt_oif = 0; |
2041 | rth->rt_spec_dst= spec_dst; | 2035 | rth->rt_spec_dst= spec_dst; |
2042 | 2036 | ||
2043 | rth->dst.obsolete = -1; | ||
2044 | rth->dst.input = ip_forward; | 2037 | rth->dst.input = ip_forward; |
2045 | rth->dst.output = ip_output; | 2038 | rth->dst.output = ip_output; |
2046 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 2039 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2047 | 2040 | ||
2048 | rt_set_nexthop(rth, res, itag); | 2041 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
2049 | 2042 | ||
2050 | rth->rt_flags = flags; | 2043 | rth->rt_flags = flags; |
2051 | 2044 | ||
@@ -2057,7 +2050,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2057 | 2050 | ||
2058 | static int ip_mkroute_input(struct sk_buff *skb, | 2051 | static int ip_mkroute_input(struct sk_buff *skb, |
2059 | struct fib_result *res, | 2052 | struct fib_result *res, |
2060 | const struct flowi *fl, | 2053 | const struct flowi4 *fl4, |
2061 | struct in_device *in_dev, | 2054 | struct in_device *in_dev, |
2062 | __be32 daddr, __be32 saddr, u32 tos) | 2055 | __be32 daddr, __be32 saddr, u32 tos) |
2063 | { | 2056 | { |
@@ -2066,8 +2059,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2066 | unsigned hash; | 2059 | unsigned hash; |
2067 | 2060 | ||
2068 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2061 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2069 | if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | 2062 | if (res->fi && res->fi->fib_nhs > 1) |
2070 | fib_select_multipath(fl, res); | 2063 | fib_select_multipath(res); |
2071 | #endif | 2064 | #endif |
2072 | 2065 | ||
2073 | /* create a routing cache entry */ | 2066 | /* create a routing cache entry */ |
@@ -2076,9 +2069,12 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2076 | return err; | 2069 | return err; |
2077 | 2070 | ||
2078 | /* put it into the cache */ | 2071 | /* put it into the cache */ |
2079 | hash = rt_hash(daddr, saddr, fl->iif, | 2072 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, |
2080 | rt_genid(dev_net(rth->dst.dev))); | 2073 | rt_genid(dev_net(rth->dst.dev))); |
2081 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); | 2074 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); |
2075 | if (IS_ERR(rth)) | ||
2076 | return PTR_ERR(rth); | ||
2077 | return 0; | ||
2082 | } | 2078 | } |
2083 | 2079 | ||
2084 | /* | 2080 | /* |
@@ -2097,12 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2097 | { | 2093 | { |
2098 | struct fib_result res; | 2094 | struct fib_result res; |
2099 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2095 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2100 | struct flowi fl = { .fl4_dst = daddr, | 2096 | struct flowi4 fl4; |
2101 | .fl4_src = saddr, | ||
2102 | .fl4_tos = tos, | ||
2103 | .fl4_scope = RT_SCOPE_UNIVERSE, | ||
2104 | .mark = skb->mark, | ||
2105 | .iif = dev->ifindex }; | ||
2106 | unsigned flags = 0; | 2097 | unsigned flags = 0; |
2107 | u32 itag = 0; | 2098 | u32 itag = 0; |
2108 | struct rtable * rth; | 2099 | struct rtable * rth; |
@@ -2139,7 +2130,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2139 | /* | 2130 | /* |
2140 | * Now we are ready to route packet. | 2131 | * Now we are ready to route packet. |
2141 | */ | 2132 | */ |
2142 | err = fib_lookup(net, &fl, &res); | 2133 | fl4.flowi4_oif = 0; |
2134 | fl4.flowi4_iif = dev->ifindex; | ||
2135 | fl4.flowi4_mark = skb->mark; | ||
2136 | fl4.flowi4_tos = tos; | ||
2137 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
2138 | fl4.daddr = daddr; | ||
2139 | fl4.saddr = saddr; | ||
2140 | err = fib_lookup(net, &fl4, &res); | ||
2143 | if (err != 0) { | 2141 | if (err != 0) { |
2144 | if (!IN_DEV_FORWARD(in_dev)) | 2142 | if (!IN_DEV_FORWARD(in_dev)) |
2145 | goto e_hostunreach; | 2143 | goto e_hostunreach; |
@@ -2168,7 +2166,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2168 | if (res.type != RTN_UNICAST) | 2166 | if (res.type != RTN_UNICAST) |
2169 | goto martian_destination; | 2167 | goto martian_destination; |
2170 | 2168 | ||
2171 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2169 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); |
2172 | out: return err; | 2170 | out: return err; |
2173 | 2171 | ||
2174 | brd_input: | 2172 | brd_input: |
@@ -2190,29 +2188,23 @@ brd_input: | |||
2190 | RT_CACHE_STAT_INC(in_brd); | 2188 | RT_CACHE_STAT_INC(in_brd); |
2191 | 2189 | ||
2192 | local_input: | 2190 | local_input: |
2193 | rth = dst_alloc(&ipv4_dst_ops); | 2191 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
2194 | if (!rth) | 2192 | if (!rth) |
2195 | goto e_nobufs; | 2193 | goto e_nobufs; |
2196 | 2194 | ||
2197 | rth->dst.output= ip_rt_bug; | 2195 | rth->dst.output= ip_rt_bug; |
2198 | rth->dst.obsolete = -1; | ||
2199 | rth->rt_genid = rt_genid(net); | 2196 | rth->rt_genid = rt_genid(net); |
2200 | 2197 | ||
2201 | atomic_set(&rth->dst.__refcnt, 1); | 2198 | rth->rt_key_dst = daddr; |
2202 | rth->dst.flags= DST_HOST; | ||
2203 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2204 | rth->dst.flags |= DST_NOPOLICY; | ||
2205 | rth->fl.fl4_dst = daddr; | ||
2206 | rth->rt_dst = daddr; | 2199 | rth->rt_dst = daddr; |
2207 | rth->fl.fl4_tos = tos; | 2200 | rth->rt_tos = tos; |
2208 | rth->fl.mark = skb->mark; | 2201 | rth->rt_mark = skb->mark; |
2209 | rth->fl.fl4_src = saddr; | 2202 | rth->rt_key_src = saddr; |
2210 | rth->rt_src = saddr; | 2203 | rth->rt_src = saddr; |
2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2204 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2212 | rth->dst.tclassid = itag; | 2205 | rth->dst.tclassid = itag; |
2213 | #endif | 2206 | #endif |
2214 | rth->rt_iif = | 2207 | rth->rt_iif = dev->ifindex; |
2215 | rth->fl.iif = dev->ifindex; | ||
2216 | rth->dst.dev = net->loopback_dev; | 2208 | rth->dst.dev = net->loopback_dev; |
2217 | dev_hold(rth->dst.dev); | 2209 | dev_hold(rth->dst.dev); |
2218 | rth->rt_gateway = daddr; | 2210 | rth->rt_gateway = daddr; |
@@ -2225,8 +2217,11 @@ local_input: | |||
2225 | rth->rt_flags &= ~RTCF_LOCAL; | 2217 | rth->rt_flags &= ~RTCF_LOCAL; |
2226 | } | 2218 | } |
2227 | rth->rt_type = res.type; | 2219 | rth->rt_type = res.type; |
2228 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2220 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
2229 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2221 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
2222 | err = 0; | ||
2223 | if (IS_ERR(rth)) | ||
2224 | err = PTR_ERR(rth); | ||
2230 | goto out; | 2225 | goto out; |
2231 | 2226 | ||
2232 | no_route: | 2227 | no_route: |
@@ -2288,12 +2283,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2288 | 2283 | ||
2289 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2284 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2290 | rth = rcu_dereference(rth->dst.rt_next)) { | 2285 | rth = rcu_dereference(rth->dst.rt_next)) { |
2291 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | | 2286 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
2292 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | | 2287 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2293 | (rth->fl.iif ^ iif) | | 2288 | (rth->rt_iif ^ iif) | |
2294 | rth->fl.oif | | 2289 | rth->rt_oif | |
2295 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2290 | (rth->rt_tos ^ tos)) == 0 && |
2296 | rth->fl.mark == skb->mark && | 2291 | rth->rt_mark == skb->mark && |
2297 | net_eq(dev_net(rth->dst.dev), net) && | 2292 | net_eq(dev_net(rth->dst.dev), net) && |
2298 | !rt_is_expired(rth)) { | 2293 | !rt_is_expired(rth)) { |
2299 | if (noref) { | 2294 | if (noref) { |
@@ -2326,8 +2321,8 @@ skip_cache: | |||
2326 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2321 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2327 | 2322 | ||
2328 | if (in_dev) { | 2323 | if (in_dev) { |
2329 | int our = ip_check_mc(in_dev, daddr, saddr, | 2324 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, |
2330 | ip_hdr(skb)->protocol); | 2325 | ip_hdr(skb)->protocol); |
2331 | if (our | 2326 | if (our |
2332 | #ifdef CONFIG_IP_MROUTE | 2327 | #ifdef CONFIG_IP_MROUTE |
2333 | || | 2328 | || |
@@ -2351,98 +2346,91 @@ skip_cache: | |||
2351 | EXPORT_SYMBOL(ip_route_input_common); | 2346 | EXPORT_SYMBOL(ip_route_input_common); |
2352 | 2347 | ||
2353 | /* called with rcu_read_lock() */ | 2348 | /* called with rcu_read_lock() */ |
2354 | static int __mkroute_output(struct rtable **result, | 2349 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2355 | struct fib_result *res, | 2350 | const struct flowi4 *fl4, |
2356 | const struct flowi *fl, | 2351 | const struct flowi4 *oldflp4, |
2357 | const struct flowi *oldflp, | 2352 | struct net_device *dev_out, |
2358 | struct net_device *dev_out, | 2353 | unsigned int flags) |
2359 | unsigned flags) | ||
2360 | { | 2354 | { |
2361 | struct rtable *rth; | 2355 | struct fib_info *fi = res->fi; |
2356 | u32 tos = RT_FL_TOS(oldflp4); | ||
2362 | struct in_device *in_dev; | 2357 | struct in_device *in_dev; |
2363 | u32 tos = RT_FL_TOS(oldflp); | 2358 | u16 type = res->type; |
2359 | struct rtable *rth; | ||
2364 | 2360 | ||
2365 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) | 2361 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) |
2366 | return -EINVAL; | 2362 | return ERR_PTR(-EINVAL); |
2367 | 2363 | ||
2368 | if (ipv4_is_lbcast(fl->fl4_dst)) | 2364 | if (ipv4_is_lbcast(fl4->daddr)) |
2369 | res->type = RTN_BROADCAST; | 2365 | type = RTN_BROADCAST; |
2370 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2366 | else if (ipv4_is_multicast(fl4->daddr)) |
2371 | res->type = RTN_MULTICAST; | 2367 | type = RTN_MULTICAST; |
2372 | else if (ipv4_is_zeronet(fl->fl4_dst)) | 2368 | else if (ipv4_is_zeronet(fl4->daddr)) |
2373 | return -EINVAL; | 2369 | return ERR_PTR(-EINVAL); |
2374 | 2370 | ||
2375 | if (dev_out->flags & IFF_LOOPBACK) | 2371 | if (dev_out->flags & IFF_LOOPBACK) |
2376 | flags |= RTCF_LOCAL; | 2372 | flags |= RTCF_LOCAL; |
2377 | 2373 | ||
2378 | in_dev = __in_dev_get_rcu(dev_out); | 2374 | in_dev = __in_dev_get_rcu(dev_out); |
2379 | if (!in_dev) | 2375 | if (!in_dev) |
2380 | return -EINVAL; | 2376 | return ERR_PTR(-EINVAL); |
2381 | 2377 | ||
2382 | if (res->type == RTN_BROADCAST) { | 2378 | if (type == RTN_BROADCAST) { |
2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2379 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2384 | res->fi = NULL; | 2380 | fi = NULL; |
2385 | } else if (res->type == RTN_MULTICAST) { | 2381 | } else if (type == RTN_MULTICAST) { |
2386 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2382 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2387 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2383 | if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, |
2388 | oldflp->proto)) | 2384 | oldflp4->flowi4_proto)) |
2389 | flags &= ~RTCF_LOCAL; | 2385 | flags &= ~RTCF_LOCAL; |
2390 | /* If multicast route do not exist use | 2386 | /* If multicast route do not exist use |
2391 | * default one, but do not gateway in this case. | 2387 | * default one, but do not gateway in this case. |
2392 | * Yes, it is hack. | 2388 | * Yes, it is hack. |
2393 | */ | 2389 | */ |
2394 | if (res->fi && res->prefixlen < 4) | 2390 | if (fi && res->prefixlen < 4) |
2395 | res->fi = NULL; | 2391 | fi = NULL; |
2396 | } | 2392 | } |
2397 | 2393 | ||
2398 | 2394 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2399 | rth = dst_alloc(&ipv4_dst_ops); | 2395 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
2400 | if (!rth) | 2396 | if (!rth) |
2401 | return -ENOBUFS; | 2397 | return ERR_PTR(-ENOBUFS); |
2402 | 2398 | ||
2403 | atomic_set(&rth->dst.__refcnt, 1); | 2399 | rth->rt_key_dst = oldflp4->daddr; |
2404 | rth->dst.flags= DST_HOST; | 2400 | rth->rt_tos = tos; |
2405 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2401 | rth->rt_key_src = oldflp4->saddr; |
2406 | rth->dst.flags |= DST_NOXFRM; | 2402 | rth->rt_oif = oldflp4->flowi4_oif; |
2407 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2403 | rth->rt_mark = oldflp4->flowi4_mark; |
2408 | rth->dst.flags |= DST_NOPOLICY; | 2404 | rth->rt_dst = fl4->daddr; |
2409 | 2405 | rth->rt_src = fl4->saddr; | |
2410 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2406 | rth->rt_iif = 0; |
2411 | rth->fl.fl4_tos = tos; | ||
2412 | rth->fl.fl4_src = oldflp->fl4_src; | ||
2413 | rth->fl.oif = oldflp->oif; | ||
2414 | rth->fl.mark = oldflp->mark; | ||
2415 | rth->rt_dst = fl->fl4_dst; | ||
2416 | rth->rt_src = fl->fl4_src; | ||
2417 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | ||
2418 | /* get references to the devices that are to be hold by the routing | 2407 | /* get references to the devices that are to be hold by the routing |
2419 | cache entry */ | 2408 | cache entry */ |
2420 | rth->dst.dev = dev_out; | 2409 | rth->dst.dev = dev_out; |
2421 | dev_hold(dev_out); | 2410 | dev_hold(dev_out); |
2422 | rth->rt_gateway = fl->fl4_dst; | 2411 | rth->rt_gateway = fl4->daddr; |
2423 | rth->rt_spec_dst= fl->fl4_src; | 2412 | rth->rt_spec_dst= fl4->saddr; |
2424 | 2413 | ||
2425 | rth->dst.output=ip_output; | 2414 | rth->dst.output=ip_output; |
2426 | rth->dst.obsolete = -1; | ||
2427 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2415 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2428 | 2416 | ||
2429 | RT_CACHE_STAT_INC(out_slow_tot); | 2417 | RT_CACHE_STAT_INC(out_slow_tot); |
2430 | 2418 | ||
2431 | if (flags & RTCF_LOCAL) { | 2419 | if (flags & RTCF_LOCAL) { |
2432 | rth->dst.input = ip_local_deliver; | 2420 | rth->dst.input = ip_local_deliver; |
2433 | rth->rt_spec_dst = fl->fl4_dst; | 2421 | rth->rt_spec_dst = fl4->daddr; |
2434 | } | 2422 | } |
2435 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2423 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2436 | rth->rt_spec_dst = fl->fl4_src; | 2424 | rth->rt_spec_dst = fl4->saddr; |
2437 | if (flags & RTCF_LOCAL && | 2425 | if (flags & RTCF_LOCAL && |
2438 | !(dev_out->flags & IFF_LOOPBACK)) { | 2426 | !(dev_out->flags & IFF_LOOPBACK)) { |
2439 | rth->dst.output = ip_mc_output; | 2427 | rth->dst.output = ip_mc_output; |
2440 | RT_CACHE_STAT_INC(out_slow_mc); | 2428 | RT_CACHE_STAT_INC(out_slow_mc); |
2441 | } | 2429 | } |
2442 | #ifdef CONFIG_IP_MROUTE | 2430 | #ifdef CONFIG_IP_MROUTE |
2443 | if (res->type == RTN_MULTICAST) { | 2431 | if (type == RTN_MULTICAST) { |
2444 | if (IN_DEV_MFORWARD(in_dev) && | 2432 | if (IN_DEV_MFORWARD(in_dev) && |
2445 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2433 | !ipv4_is_local_multicast(oldflp4->daddr)) { |
2446 | rth->dst.input = ip_mr_input; | 2434 | rth->dst.input = ip_mr_input; |
2447 | rth->dst.output = ip_mc_output; | 2435 | rth->dst.output = ip_mc_output; |
2448 | } | 2436 | } |
@@ -2450,31 +2438,10 @@ static int __mkroute_output(struct rtable **result, | |||
2450 | #endif | 2438 | #endif |
2451 | } | 2439 | } |
2452 | 2440 | ||
2453 | rt_set_nexthop(rth, res, 0); | 2441 | rt_set_nexthop(rth, oldflp4, res, fi, type, 0); |
2454 | 2442 | ||
2455 | rth->rt_flags = flags; | 2443 | rth->rt_flags = flags; |
2456 | *result = rth; | 2444 | return rth; |
2457 | return 0; | ||
2458 | } | ||
2459 | |||
2460 | /* called with rcu_read_lock() */ | ||
2461 | static int ip_mkroute_output(struct rtable **rp, | ||
2462 | struct fib_result *res, | ||
2463 | const struct flowi *fl, | ||
2464 | const struct flowi *oldflp, | ||
2465 | struct net_device *dev_out, | ||
2466 | unsigned flags) | ||
2467 | { | ||
2468 | struct rtable *rth = NULL; | ||
2469 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | ||
2470 | unsigned hash; | ||
2471 | if (err == 0) { | ||
2472 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | ||
2473 | rt_genid(dev_net(dev_out))); | ||
2474 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
2475 | } | ||
2476 | |||
2477 | return err; | ||
2478 | } | 2445 | } |
2479 | 2446 | ||
2480 | /* | 2447 | /* |
@@ -2482,34 +2449,36 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2482 | * called with rcu_read_lock(); | 2449 | * called with rcu_read_lock(); |
2483 | */ | 2450 | */ |
2484 | 2451 | ||
2485 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2452 | static struct rtable *ip_route_output_slow(struct net *net, |
2486 | const struct flowi *oldflp) | 2453 | const struct flowi4 *oldflp4) |
2487 | { | 2454 | { |
2488 | u32 tos = RT_FL_TOS(oldflp); | 2455 | u32 tos = RT_FL_TOS(oldflp4); |
2489 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, | 2456 | struct flowi4 fl4; |
2490 | .fl4_src = oldflp->fl4_src, | ||
2491 | .fl4_tos = tos & IPTOS_RT_MASK, | ||
2492 | .fl4_scope = ((tos & RTO_ONLINK) ? | ||
2493 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), | ||
2494 | .mark = oldflp->mark, | ||
2495 | .iif = net->loopback_dev->ifindex, | ||
2496 | .oif = oldflp->oif }; | ||
2497 | struct fib_result res; | 2457 | struct fib_result res; |
2498 | unsigned int flags = 0; | 2458 | unsigned int flags = 0; |
2499 | struct net_device *dev_out = NULL; | 2459 | struct net_device *dev_out = NULL; |
2500 | int err; | 2460 | struct rtable *rth; |
2501 | |||
2502 | 2461 | ||
2503 | res.fi = NULL; | 2462 | res.fi = NULL; |
2504 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2463 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
2505 | res.r = NULL; | 2464 | res.r = NULL; |
2506 | #endif | 2465 | #endif |
2507 | 2466 | ||
2508 | if (oldflp->fl4_src) { | 2467 | fl4.flowi4_oif = oldflp4->flowi4_oif; |
2509 | err = -EINVAL; | 2468 | fl4.flowi4_iif = net->loopback_dev->ifindex; |
2510 | if (ipv4_is_multicast(oldflp->fl4_src) || | 2469 | fl4.flowi4_mark = oldflp4->flowi4_mark; |
2511 | ipv4_is_lbcast(oldflp->fl4_src) || | 2470 | fl4.daddr = oldflp4->daddr; |
2512 | ipv4_is_zeronet(oldflp->fl4_src)) | 2471 | fl4.saddr = oldflp4->saddr; |
2472 | fl4.flowi4_tos = tos & IPTOS_RT_MASK; | ||
2473 | fl4.flowi4_scope = ((tos & RTO_ONLINK) ? | ||
2474 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | ||
2475 | |||
2476 | rcu_read_lock(); | ||
2477 | if (oldflp4->saddr) { | ||
2478 | rth = ERR_PTR(-EINVAL); | ||
2479 | if (ipv4_is_multicast(oldflp4->saddr) || | ||
2480 | ipv4_is_lbcast(oldflp4->saddr) || | ||
2481 | ipv4_is_zeronet(oldflp4->saddr)) | ||
2513 | goto out; | 2482 | goto out; |
2514 | 2483 | ||
2515 | /* I removed check for oif == dev_out->oif here. | 2484 | /* I removed check for oif == dev_out->oif here. |
@@ -2520,11 +2489,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2520 | of another iface. --ANK | 2489 | of another iface. --ANK |
2521 | */ | 2490 | */ |
2522 | 2491 | ||
2523 | if (oldflp->oif == 0 && | 2492 | if (oldflp4->flowi4_oif == 0 && |
2524 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2493 | (ipv4_is_multicast(oldflp4->daddr) || |
2525 | ipv4_is_lbcast(oldflp->fl4_dst))) { | 2494 | ipv4_is_lbcast(oldflp4->daddr))) { |
2526 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2495 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2527 | dev_out = __ip_dev_find(net, oldflp->fl4_src, false); | 2496 | dev_out = __ip_dev_find(net, oldflp4->saddr, false); |
2528 | if (dev_out == NULL) | 2497 | if (dev_out == NULL) |
2529 | goto out; | 2498 | goto out; |
2530 | 2499 | ||
@@ -2543,60 +2512,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2543 | Luckily, this hack is good workaround. | 2512 | Luckily, this hack is good workaround. |
2544 | */ | 2513 | */ |
2545 | 2514 | ||
2546 | fl.oif = dev_out->ifindex; | 2515 | fl4.flowi4_oif = dev_out->ifindex; |
2547 | goto make_route; | 2516 | goto make_route; |
2548 | } | 2517 | } |
2549 | 2518 | ||
2550 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2519 | if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
2551 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2520 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
2552 | if (!__ip_dev_find(net, oldflp->fl4_src, false)) | 2521 | if (!__ip_dev_find(net, oldflp4->saddr, false)) |
2553 | goto out; | 2522 | goto out; |
2554 | } | 2523 | } |
2555 | } | 2524 | } |
2556 | 2525 | ||
2557 | 2526 | ||
2558 | if (oldflp->oif) { | 2527 | if (oldflp4->flowi4_oif) { |
2559 | dev_out = dev_get_by_index_rcu(net, oldflp->oif); | 2528 | dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); |
2560 | err = -ENODEV; | 2529 | rth = ERR_PTR(-ENODEV); |
2561 | if (dev_out == NULL) | 2530 | if (dev_out == NULL) |
2562 | goto out; | 2531 | goto out; |
2563 | 2532 | ||
2564 | /* RACE: Check return value of inet_select_addr instead. */ | 2533 | /* RACE: Check return value of inet_select_addr instead. */ |
2565 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { | 2534 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2566 | err = -ENETUNREACH; | 2535 | rth = ERR_PTR(-ENETUNREACH); |
2567 | goto out; | 2536 | goto out; |
2568 | } | 2537 | } |
2569 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2538 | if (ipv4_is_local_multicast(oldflp4->daddr) || |
2570 | ipv4_is_lbcast(oldflp->fl4_dst)) { | 2539 | ipv4_is_lbcast(oldflp4->daddr)) { |
2571 | if (!fl.fl4_src) | 2540 | if (!fl4.saddr) |
2572 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2541 | fl4.saddr = inet_select_addr(dev_out, 0, |
2573 | RT_SCOPE_LINK); | 2542 | RT_SCOPE_LINK); |
2574 | goto make_route; | 2543 | goto make_route; |
2575 | } | 2544 | } |
2576 | if (!fl.fl4_src) { | 2545 | if (!fl4.saddr) { |
2577 | if (ipv4_is_multicast(oldflp->fl4_dst)) | 2546 | if (ipv4_is_multicast(oldflp4->daddr)) |
2578 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2547 | fl4.saddr = inet_select_addr(dev_out, 0, |
2579 | fl.fl4_scope); | 2548 | fl4.flowi4_scope); |
2580 | else if (!oldflp->fl4_dst) | 2549 | else if (!oldflp4->daddr) |
2581 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2550 | fl4.saddr = inet_select_addr(dev_out, 0, |
2582 | RT_SCOPE_HOST); | 2551 | RT_SCOPE_HOST); |
2583 | } | 2552 | } |
2584 | } | 2553 | } |
2585 | 2554 | ||
2586 | if (!fl.fl4_dst) { | 2555 | if (!fl4.daddr) { |
2587 | fl.fl4_dst = fl.fl4_src; | 2556 | fl4.daddr = fl4.saddr; |
2588 | if (!fl.fl4_dst) | 2557 | if (!fl4.daddr) |
2589 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2558 | fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); |
2590 | dev_out = net->loopback_dev; | 2559 | dev_out = net->loopback_dev; |
2591 | fl.oif = net->loopback_dev->ifindex; | 2560 | fl4.flowi4_oif = net->loopback_dev->ifindex; |
2592 | res.type = RTN_LOCAL; | 2561 | res.type = RTN_LOCAL; |
2593 | flags |= RTCF_LOCAL; | 2562 | flags |= RTCF_LOCAL; |
2594 | goto make_route; | 2563 | goto make_route; |
2595 | } | 2564 | } |
2596 | 2565 | ||
2597 | if (fib_lookup(net, &fl, &res)) { | 2566 | if (fib_lookup(net, &fl4, &res)) { |
2598 | res.fi = NULL; | 2567 | res.fi = NULL; |
2599 | if (oldflp->oif) { | 2568 | if (oldflp4->flowi4_oif) { |
2600 | /* Apparently, routing tables are wrong. Assume, | 2569 | /* Apparently, routing tables are wrong. Assume, |
2601 | that the destination is on link. | 2570 | that the destination is on link. |
2602 | 2571 | ||
@@ -2615,90 +2584,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2615 | likely IPv6, but we do not. | 2584 | likely IPv6, but we do not. |
2616 | */ | 2585 | */ |
2617 | 2586 | ||
2618 | if (fl.fl4_src == 0) | 2587 | if (fl4.saddr == 0) |
2619 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2588 | fl4.saddr = inet_select_addr(dev_out, 0, |
2620 | RT_SCOPE_LINK); | 2589 | RT_SCOPE_LINK); |
2621 | res.type = RTN_UNICAST; | 2590 | res.type = RTN_UNICAST; |
2622 | goto make_route; | 2591 | goto make_route; |
2623 | } | 2592 | } |
2624 | err = -ENETUNREACH; | 2593 | rth = ERR_PTR(-ENETUNREACH); |
2625 | goto out; | 2594 | goto out; |
2626 | } | 2595 | } |
2627 | 2596 | ||
2628 | if (res.type == RTN_LOCAL) { | 2597 | if (res.type == RTN_LOCAL) { |
2629 | if (!fl.fl4_src) { | 2598 | if (!fl4.saddr) { |
2630 | if (res.fi->fib_prefsrc) | 2599 | if (res.fi->fib_prefsrc) |
2631 | fl.fl4_src = res.fi->fib_prefsrc; | 2600 | fl4.saddr = res.fi->fib_prefsrc; |
2632 | else | 2601 | else |
2633 | fl.fl4_src = fl.fl4_dst; | 2602 | fl4.saddr = fl4.daddr; |
2634 | } | 2603 | } |
2635 | dev_out = net->loopback_dev; | 2604 | dev_out = net->loopback_dev; |
2636 | fl.oif = dev_out->ifindex; | 2605 | fl4.flowi4_oif = dev_out->ifindex; |
2637 | res.fi = NULL; | 2606 | res.fi = NULL; |
2638 | flags |= RTCF_LOCAL; | 2607 | flags |= RTCF_LOCAL; |
2639 | goto make_route; | 2608 | goto make_route; |
2640 | } | 2609 | } |
2641 | 2610 | ||
2642 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2611 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2643 | if (res.fi->fib_nhs > 1 && fl.oif == 0) | 2612 | if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) |
2644 | fib_select_multipath(&fl, &res); | 2613 | fib_select_multipath(&res); |
2645 | else | 2614 | else |
2646 | #endif | 2615 | #endif |
2647 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | 2616 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) |
2648 | fib_select_default(net, &fl, &res); | 2617 | fib_select_default(&res); |
2649 | 2618 | ||
2650 | if (!fl.fl4_src) | 2619 | if (!fl4.saddr) |
2651 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2620 | fl4.saddr = FIB_RES_PREFSRC(res); |
2652 | 2621 | ||
2653 | dev_out = FIB_RES_DEV(res); | 2622 | dev_out = FIB_RES_DEV(res); |
2654 | fl.oif = dev_out->ifindex; | 2623 | fl4.flowi4_oif = dev_out->ifindex; |
2655 | 2624 | ||
2656 | 2625 | ||
2657 | make_route: | 2626 | make_route: |
2658 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2627 | rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); |
2628 | if (!IS_ERR(rth)) { | ||
2629 | unsigned int hash; | ||
2659 | 2630 | ||
2660 | out: return err; | 2631 | hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, |
2632 | rt_genid(dev_net(dev_out))); | ||
2633 | rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); | ||
2634 | } | ||
2635 | |||
2636 | out: | ||
2637 | rcu_read_unlock(); | ||
2638 | return rth; | ||
2661 | } | 2639 | } |
2662 | 2640 | ||
2663 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2641 | struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) |
2664 | const struct flowi *flp) | ||
2665 | { | 2642 | { |
2666 | unsigned int hash; | ||
2667 | int res; | ||
2668 | struct rtable *rth; | 2643 | struct rtable *rth; |
2644 | unsigned int hash; | ||
2669 | 2645 | ||
2670 | if (!rt_caching(net)) | 2646 | if (!rt_caching(net)) |
2671 | goto slow_output; | 2647 | goto slow_output; |
2672 | 2648 | ||
2673 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2649 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); |
2674 | 2650 | ||
2675 | rcu_read_lock_bh(); | 2651 | rcu_read_lock_bh(); |
2676 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2652 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2677 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2653 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2678 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2654 | if (rth->rt_key_dst == flp4->daddr && |
2679 | rth->fl.fl4_src == flp->fl4_src && | 2655 | rth->rt_key_src == flp4->saddr && |
2680 | rt_is_output_route(rth) && | 2656 | rt_is_output_route(rth) && |
2681 | rth->fl.oif == flp->oif && | 2657 | rth->rt_oif == flp4->flowi4_oif && |
2682 | rth->fl.mark == flp->mark && | 2658 | rth->rt_mark == flp4->flowi4_mark && |
2683 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2659 | !((rth->rt_tos ^ flp4->flowi4_tos) & |
2684 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2660 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2685 | net_eq(dev_net(rth->dst.dev), net) && | 2661 | net_eq(dev_net(rth->dst.dev), net) && |
2686 | !rt_is_expired(rth)) { | 2662 | !rt_is_expired(rth)) { |
2687 | dst_use(&rth->dst, jiffies); | 2663 | dst_use(&rth->dst, jiffies); |
2688 | RT_CACHE_STAT_INC(out_hit); | 2664 | RT_CACHE_STAT_INC(out_hit); |
2689 | rcu_read_unlock_bh(); | 2665 | rcu_read_unlock_bh(); |
2690 | *rp = rth; | 2666 | return rth; |
2691 | return 0; | ||
2692 | } | 2667 | } |
2693 | RT_CACHE_STAT_INC(out_hlist_search); | 2668 | RT_CACHE_STAT_INC(out_hlist_search); |
2694 | } | 2669 | } |
2695 | rcu_read_unlock_bh(); | 2670 | rcu_read_unlock_bh(); |
2696 | 2671 | ||
2697 | slow_output: | 2672 | slow_output: |
2698 | rcu_read_lock(); | 2673 | return ip_route_output_slow(net, flp4); |
2699 | res = ip_route_output_slow(net, rp, flp); | ||
2700 | rcu_read_unlock(); | ||
2701 | return res; | ||
2702 | } | 2674 | } |
2703 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2675 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2704 | 2676 | ||
@@ -2726,17 +2698,14 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2726 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2698 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2727 | }; | 2699 | }; |
2728 | 2700 | ||
2729 | 2701 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | |
2730 | static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2731 | { | 2702 | { |
2732 | struct rtable *ort = *rp; | 2703 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); |
2733 | struct rtable *rt = (struct rtable *) | 2704 | struct rtable *ort = (struct rtable *) dst_orig; |
2734 | dst_alloc(&ipv4_dst_blackhole_ops); | ||
2735 | 2705 | ||
2736 | if (rt) { | 2706 | if (rt) { |
2737 | struct dst_entry *new = &rt->dst; | 2707 | struct dst_entry *new = &rt->dst; |
2738 | 2708 | ||
2739 | atomic_set(&new->__refcnt, 1); | ||
2740 | new->__use = 1; | 2709 | new->__use = 1; |
2741 | new->input = dst_discard; | 2710 | new->input = dst_discard; |
2742 | new->output = dst_discard; | 2711 | new->output = dst_discard; |
@@ -2746,7 +2715,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2746 | if (new->dev) | 2715 | if (new->dev) |
2747 | dev_hold(new->dev); | 2716 | dev_hold(new->dev); |
2748 | 2717 | ||
2749 | rt->fl = ort->fl; | 2718 | rt->rt_key_dst = ort->rt_key_dst; |
2719 | rt->rt_key_src = ort->rt_key_src; | ||
2720 | rt->rt_tos = ort->rt_tos; | ||
2721 | rt->rt_iif = ort->rt_iif; | ||
2722 | rt->rt_oif = ort->rt_oif; | ||
2723 | rt->rt_mark = ort->rt_mark; | ||
2750 | 2724 | ||
2751 | rt->rt_genid = rt_genid(net); | 2725 | rt->rt_genid = rt_genid(net); |
2752 | rt->rt_flags = ort->rt_flags; | 2726 | rt->rt_flags = ort->rt_flags; |
@@ -2759,46 +2733,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2759 | rt->peer = ort->peer; | 2733 | rt->peer = ort->peer; |
2760 | if (rt->peer) | 2734 | if (rt->peer) |
2761 | atomic_inc(&rt->peer->refcnt); | 2735 | atomic_inc(&rt->peer->refcnt); |
2736 | rt->fi = ort->fi; | ||
2737 | if (rt->fi) | ||
2738 | atomic_inc(&rt->fi->fib_clntref); | ||
2762 | 2739 | ||
2763 | dst_free(new); | 2740 | dst_free(new); |
2764 | } | 2741 | } |
2765 | 2742 | ||
2766 | dst_release(&(*rp)->dst); | 2743 | dst_release(dst_orig); |
2767 | *rp = rt; | 2744 | |
2768 | return rt ? 0 : -ENOMEM; | 2745 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); |
2769 | } | 2746 | } |
2770 | 2747 | ||
2771 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | 2748 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
2772 | struct sock *sk, int flags) | 2749 | struct sock *sk) |
2773 | { | 2750 | { |
2774 | int err; | 2751 | struct rtable *rt = __ip_route_output_key(net, flp4); |
2775 | 2752 | ||
2776 | if ((err = __ip_route_output_key(net, rp, flp)) != 0) | 2753 | if (IS_ERR(rt)) |
2777 | return err; | 2754 | return rt; |
2778 | 2755 | ||
2779 | if (flp->proto) { | 2756 | if (flp4->flowi4_proto) { |
2780 | if (!flp->fl4_src) | 2757 | if (!flp4->saddr) |
2781 | flp->fl4_src = (*rp)->rt_src; | 2758 | flp4->saddr = rt->rt_src; |
2782 | if (!flp->fl4_dst) | 2759 | if (!flp4->daddr) |
2783 | flp->fl4_dst = (*rp)->rt_dst; | 2760 | flp4->daddr = rt->rt_dst; |
2784 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, | 2761 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
2785 | flags ? XFRM_LOOKUP_WAIT : 0); | 2762 | flowi4_to_flowi(flp4), |
2786 | if (err == -EREMOTE) | 2763 | sk, 0); |
2787 | err = ipv4_dst_blackhole(net, rp, flp); | ||
2788 | |||
2789 | return err; | ||
2790 | } | 2764 | } |
2791 | 2765 | ||
2792 | return 0; | 2766 | return rt; |
2793 | } | 2767 | } |
2794 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2768 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2795 | 2769 | ||
2796 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | ||
2797 | { | ||
2798 | return ip_route_output_flow(net, rp, flp, NULL, 0); | ||
2799 | } | ||
2800 | EXPORT_SYMBOL(ip_route_output_key); | ||
2801 | |||
2802 | static int rt_fill_info(struct net *net, | 2770 | static int rt_fill_info(struct net *net, |
2803 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2771 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
2804 | int nowait, unsigned int flags) | 2772 | int nowait, unsigned int flags) |
@@ -2817,7 +2785,7 @@ static int rt_fill_info(struct net *net, | |||
2817 | r->rtm_family = AF_INET; | 2785 | r->rtm_family = AF_INET; |
2818 | r->rtm_dst_len = 32; | 2786 | r->rtm_dst_len = 32; |
2819 | r->rtm_src_len = 0; | 2787 | r->rtm_src_len = 0; |
2820 | r->rtm_tos = rt->fl.fl4_tos; | 2788 | r->rtm_tos = rt->rt_tos; |
2821 | r->rtm_table = RT_TABLE_MAIN; | 2789 | r->rtm_table = RT_TABLE_MAIN; |
2822 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2790 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
2823 | r->rtm_type = rt->rt_type; | 2791 | r->rtm_type = rt->rt_type; |
@@ -2829,19 +2797,19 @@ static int rt_fill_info(struct net *net, | |||
2829 | 2797 | ||
2830 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2798 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); |
2831 | 2799 | ||
2832 | if (rt->fl.fl4_src) { | 2800 | if (rt->rt_key_src) { |
2833 | r->rtm_src_len = 32; | 2801 | r->rtm_src_len = 32; |
2834 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2802 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); |
2835 | } | 2803 | } |
2836 | if (rt->dst.dev) | 2804 | if (rt->dst.dev) |
2837 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2805 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2838 | #ifdef CONFIG_NET_CLS_ROUTE | 2806 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2839 | if (rt->dst.tclassid) | 2807 | if (rt->dst.tclassid) |
2840 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2808 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2841 | #endif | 2809 | #endif |
2842 | if (rt_is_input_route(rt)) | 2810 | if (rt_is_input_route(rt)) |
2843 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2811 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2844 | else if (rt->rt_src != rt->fl.fl4_src) | 2812 | else if (rt->rt_src != rt->rt_key_src) |
2845 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2813 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
2846 | 2814 | ||
2847 | if (rt->rt_dst != rt->rt_gateway) | 2815 | if (rt->rt_dst != rt->rt_gateway) |
@@ -2850,11 +2818,12 @@ static int rt_fill_info(struct net *net, | |||
2850 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 2818 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2851 | goto nla_put_failure; | 2819 | goto nla_put_failure; |
2852 | 2820 | ||
2853 | if (rt->fl.mark) | 2821 | if (rt->rt_mark) |
2854 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); | 2822 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); |
2855 | 2823 | ||
2856 | error = rt->dst.error; | 2824 | error = rt->dst.error; |
2857 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 2825 | expires = (rt->peer && rt->peer->pmtu_expires) ? |
2826 | rt->peer->pmtu_expires - jiffies : 0; | ||
2858 | if (rt->peer) { | 2827 | if (rt->peer) { |
2859 | inet_peer_refcheck(rt->peer); | 2828 | inet_peer_refcheck(rt->peer); |
2860 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2829 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
@@ -2884,7 +2853,7 @@ static int rt_fill_info(struct net *net, | |||
2884 | } | 2853 | } |
2885 | } else | 2854 | } else |
2886 | #endif | 2855 | #endif |
2887 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2856 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); |
2888 | } | 2857 | } |
2889 | 2858 | ||
2890 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2859 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
@@ -2958,14 +2927,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2958 | if (err == 0 && rt->dst.error) | 2927 | if (err == 0 && rt->dst.error) |
2959 | err = -rt->dst.error; | 2928 | err = -rt->dst.error; |
2960 | } else { | 2929 | } else { |
2961 | struct flowi fl = { | 2930 | struct flowi4 fl4 = { |
2962 | .fl4_dst = dst, | 2931 | .daddr = dst, |
2963 | .fl4_src = src, | 2932 | .saddr = src, |
2964 | .fl4_tos = rtm->rtm_tos, | 2933 | .flowi4_tos = rtm->rtm_tos, |
2965 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2934 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2966 | .mark = mark, | 2935 | .flowi4_mark = mark, |
2967 | }; | 2936 | }; |
2968 | err = ip_route_output_key(net, &rt, &fl); | 2937 | rt = ip_route_output_key(net, &fl4); |
2938 | |||
2939 | err = 0; | ||
2940 | if (IS_ERR(rt)) | ||
2941 | err = PTR_ERR(rt); | ||
2969 | } | 2942 | } |
2970 | 2943 | ||
2971 | if (err) | 2944 | if (err) |
@@ -3256,9 +3229,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3256 | }; | 3229 | }; |
3257 | 3230 | ||
3258 | 3231 | ||
3259 | #ifdef CONFIG_NET_CLS_ROUTE | 3232 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3260 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3233 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3261 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3234 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3262 | 3235 | ||
3263 | static __initdata unsigned long rhash_entries; | 3236 | static __initdata unsigned long rhash_entries; |
3264 | static int __init set_rhash_entries(char *str) | 3237 | static int __init set_rhash_entries(char *str) |
@@ -3274,7 +3247,7 @@ int __init ip_rt_init(void) | |||
3274 | { | 3247 | { |
3275 | int rc = 0; | 3248 | int rc = 0; |
3276 | 3249 | ||
3277 | #ifdef CONFIG_NET_CLS_ROUTE | 3250 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3278 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3251 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3279 | if (!ip_rt_acct) | 3252 | if (!ip_rt_acct) |
3280 | panic("IP: failed to allocate ip_rt_acct\n"); | 3253 | panic("IP: failed to allocate ip_rt_acct\n"); |
@@ -3311,14 +3284,6 @@ int __init ip_rt_init(void) | |||
3311 | devinet_init(); | 3284 | devinet_init(); |
3312 | ip_fib_init(); | 3285 | ip_fib_init(); |
3313 | 3286 | ||
3314 | /* All the timers, started at system startup tend | ||
3315 | to synchronize. Perturb it a bit. | ||
3316 | */ | ||
3317 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3318 | expires_ljiffies = jiffies; | ||
3319 | schedule_delayed_work(&expires_work, | ||
3320 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3321 | |||
3322 | if (ip_rt_proc_init()) | 3287 | if (ip_rt_proc_init()) |
3323 | printk(KERN_ERR "Unable to create route proc files\n"); | 3288 | printk(KERN_ERR "Unable to create route proc files\n"); |
3324 | #ifdef CONFIG_XFRM | 3289 | #ifdef CONFIG_XFRM |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 47519205a014..8b44c6d2a79b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -345,17 +345,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
345 | * no easy way to do this. | 345 | * no easy way to do this. |
346 | */ | 346 | */ |
347 | { | 347 | { |
348 | struct flowi fl = { .mark = sk->sk_mark, | 348 | struct flowi4 fl4 = { |
349 | .fl4_dst = ((opt && opt->srr) ? | 349 | .flowi4_mark = sk->sk_mark, |
350 | opt->faddr : ireq->rmt_addr), | 350 | .daddr = ((opt && opt->srr) ? |
351 | .fl4_src = ireq->loc_addr, | 351 | opt->faddr : ireq->rmt_addr), |
352 | .fl4_tos = RT_CONN_FLAGS(sk), | 352 | .saddr = ireq->loc_addr, |
353 | .proto = IPPROTO_TCP, | 353 | .flowi4_tos = RT_CONN_FLAGS(sk), |
354 | .flags = inet_sk_flowi_flags(sk), | 354 | .flowi4_proto = IPPROTO_TCP, |
355 | .fl_ip_sport = th->dest, | 355 | .flowi4_flags = inet_sk_flowi_flags(sk), |
356 | .fl_ip_dport = th->source }; | 356 | .fl4_sport = th->dest, |
357 | security_req_classify_flow(req, &fl); | 357 | .fl4_dport = th->source, |
358 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) { | 358 | }; |
359 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | ||
360 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
361 | if (IS_ERR(rt)) { | ||
359 | reqsk_free(req); | 362 | reqsk_free(req); |
360 | goto out; | 363 | goto out; |
361 | } | 364 | } |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262c..b22d45010545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
505 | else | 505 | else |
506 | answ = tp->write_seq - tp->snd_una; | 506 | answ = tp->write_seq - tp->snd_una; |
507 | break; | 507 | break; |
508 | case SIOCOUTQNSD: | ||
509 | if (sk->sk_state == TCP_LISTEN) | ||
510 | return -EINVAL; | ||
511 | |||
512 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | ||
513 | answ = 0; | ||
514 | else | ||
515 | answ = tp->write_seq - tp->snd_nxt; | ||
516 | break; | ||
508 | default: | 517 | default: |
509 | return -ENOIOCTLCMD; | 518 | return -ENOIOCTLCMD; |
510 | } | 519 | } |
@@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, | |||
873 | flags); | 882 | flags); |
874 | 883 | ||
875 | lock_sock(sk); | 884 | lock_sock(sk); |
876 | TCP_CHECK_TIMER(sk); | ||
877 | res = do_tcp_sendpages(sk, &page, offset, size, flags); | 885 | res = do_tcp_sendpages(sk, &page, offset, size, flags); |
878 | TCP_CHECK_TIMER(sk); | ||
879 | release_sock(sk); | 886 | release_sock(sk); |
880 | return res; | 887 | return res; |
881 | } | 888 | } |
@@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
916 | long timeo; | 923 | long timeo; |
917 | 924 | ||
918 | lock_sock(sk); | 925 | lock_sock(sk); |
919 | TCP_CHECK_TIMER(sk); | ||
920 | 926 | ||
921 | flags = msg->msg_flags; | 927 | flags = msg->msg_flags; |
922 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 928 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
@@ -1104,7 +1110,6 @@ wait_for_memory: | |||
1104 | out: | 1110 | out: |
1105 | if (copied) | 1111 | if (copied) |
1106 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1112 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1107 | TCP_CHECK_TIMER(sk); | ||
1108 | release_sock(sk); | 1113 | release_sock(sk); |
1109 | return copied; | 1114 | return copied; |
1110 | 1115 | ||
@@ -1123,7 +1128,6 @@ do_error: | |||
1123 | goto out; | 1128 | goto out; |
1124 | out_err: | 1129 | out_err: |
1125 | err = sk_stream_error(sk, flags, err); | 1130 | err = sk_stream_error(sk, flags, err); |
1126 | TCP_CHECK_TIMER(sk); | ||
1127 | release_sock(sk); | 1131 | release_sock(sk); |
1128 | return err; | 1132 | return err; |
1129 | } | 1133 | } |
@@ -1415,8 +1419,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1415 | 1419 | ||
1416 | lock_sock(sk); | 1420 | lock_sock(sk); |
1417 | 1421 | ||
1418 | TCP_CHECK_TIMER(sk); | ||
1419 | |||
1420 | err = -ENOTCONN; | 1422 | err = -ENOTCONN; |
1421 | if (sk->sk_state == TCP_LISTEN) | 1423 | if (sk->sk_state == TCP_LISTEN) |
1422 | goto out; | 1424 | goto out; |
@@ -1767,12 +1769,10 @@ skip_copy: | |||
1767 | /* Clean up data we have read: This will do ACK frames. */ | 1769 | /* Clean up data we have read: This will do ACK frames. */ |
1768 | tcp_cleanup_rbuf(sk, copied); | 1770 | tcp_cleanup_rbuf(sk, copied); |
1769 | 1771 | ||
1770 | TCP_CHECK_TIMER(sk); | ||
1771 | release_sock(sk); | 1772 | release_sock(sk); |
1772 | return copied; | 1773 | return copied; |
1773 | 1774 | ||
1774 | out: | 1775 | out: |
1775 | TCP_CHECK_TIMER(sk); | ||
1776 | release_sock(sk); | 1776 | release_sock(sk); |
1777 | return err; | 1777 | return err; |
1778 | 1778 | ||
@@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
2654 | #endif | 2654 | #endif |
2655 | 2655 | ||
2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | 2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) |
2657 | { | 2657 | { |
2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2659 | struct tcphdr *th; | 2659 | struct tcphdr *th; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 3b53fd1af23f..6187eb4d1dcf 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) | |||
209 | } | 209 | } |
210 | 210 | ||
211 | 211 | ||
212 | static struct tcp_congestion_ops bictcp = { | 212 | static struct tcp_congestion_ops bictcp __read_mostly = { |
213 | .init = bictcp_init, | 213 | .init = bictcp_init, |
214 | .ssthresh = bictcp_recalc_ssthresh, | 214 | .ssthresh = bictcp_recalc_ssthresh, |
215 | .cong_avoid = bictcp_cong_avoid, | 215 | .cong_avoid = bictcp_cong_avoid, |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 90d92dd4cf13..34340c9c95fa 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -424,7 +424,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
424 | hystart_update(sk, delay); | 424 | hystart_update(sk, delay); |
425 | } | 425 | } |
426 | 426 | ||
427 | static struct tcp_congestion_ops cubictcp = { | 427 | static struct tcp_congestion_ops cubictcp __read_mostly = { |
428 | .init = bictcp_init, | 428 | .init = bictcp_init, |
429 | .ssthresh = bictcp_recalc_ssthresh, | 429 | .ssthresh = bictcp_recalc_ssthresh, |
430 | .cong_avoid = bictcp_cong_avoid, | 430 | .cong_avoid = bictcp_cong_avoid, |
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b6caaf75bb9..30f27f6b3655 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
@@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk) | |||
158 | } | 158 | } |
159 | 159 | ||
160 | 160 | ||
161 | static struct tcp_congestion_ops tcp_highspeed = { | 161 | static struct tcp_congestion_ops tcp_highspeed __read_mostly = { |
162 | .init = hstcp_init, | 162 | .init = hstcp_init, |
163 | .ssthresh = hstcp_ssthresh, | 163 | .ssthresh = hstcp_ssthresh, |
164 | .cong_avoid = hstcp_cong_avoid, | 164 | .cong_avoid = hstcp_cong_avoid, |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 7c94a4955416..c1a8175361e8 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state) | |||
284 | } | 284 | } |
285 | } | 285 | } |
286 | 286 | ||
287 | static struct tcp_congestion_ops htcp = { | 287 | static struct tcp_congestion_ops htcp __read_mostly = { |
288 | .init = htcp_init, | 288 | .init = htcp_init, |
289 | .ssthresh = htcp_recalc_ssthresh, | 289 | .ssthresh = htcp_recalc_ssthresh, |
290 | .cong_avoid = htcp_cong_avoid, | 290 | .cong_avoid = htcp_cong_avoid, |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 377bc9349371..fe3ecf484b44 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
162 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); | 162 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); |
163 | } | 163 | } |
164 | 164 | ||
165 | static struct tcp_congestion_ops tcp_hybla = { | 165 | static struct tcp_congestion_ops tcp_hybla __read_mostly = { |
166 | .init = hybla_init, | 166 | .init = hybla_init, |
167 | .ssthresh = tcp_reno_ssthresh, | 167 | .ssthresh = tcp_reno_ssthresh, |
168 | .min_cwnd = tcp_reno_min_cwnd, | 168 | .min_cwnd = tcp_reno_min_cwnd, |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00ca688d8964..813b43a76fec 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
@@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, | |||
322 | } | 322 | } |
323 | } | 323 | } |
324 | 324 | ||
325 | static struct tcp_congestion_ops tcp_illinois = { | 325 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { |
326 | .flags = TCP_CONG_RTT_STAMP, | 326 | .flags = TCP_CONG_RTT_STAMP, |
327 | .init = tcp_illinois_init, | 327 | .init = tcp_illinois_init, |
328 | .ssthresh = tcp_illinois_ssthresh, | 328 | .ssthresh = tcp_illinois_ssthresh, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e16b17efcf57..da782e7ab16d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
818 | 818 | ||
819 | if (!cwnd) | 819 | if (!cwnd) |
820 | cwnd = rfc3390_bytes_to_packets(tp->mss_cache); | 820 | cwnd = TCP_INIT_CWND; |
821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
822 | } | 822 | } |
823 | 823 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 02f583b3744a..f7e6c2c2d2bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -149,9 +149,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
149 | struct inet_sock *inet = inet_sk(sk); | 149 | struct inet_sock *inet = inet_sk(sk); |
150 | struct tcp_sock *tp = tcp_sk(sk); | 150 | struct tcp_sock *tp = tcp_sk(sk); |
151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | 151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; |
152 | __be16 orig_sport, orig_dport; | ||
152 | struct rtable *rt; | 153 | struct rtable *rt; |
153 | __be32 daddr, nexthop; | 154 | __be32 daddr, nexthop; |
154 | int tmp; | ||
155 | int err; | 155 | int err; |
156 | 156 | ||
157 | if (addr_len < sizeof(struct sockaddr_in)) | 157 | if (addr_len < sizeof(struct sockaddr_in)) |
@@ -167,14 +167,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
167 | nexthop = inet->opt->faddr; | 167 | nexthop = inet->opt->faddr; |
168 | } | 168 | } |
169 | 169 | ||
170 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, | 170 | orig_sport = inet->inet_sport; |
171 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 171 | orig_dport = usin->sin_port; |
172 | IPPROTO_TCP, | 172 | rt = ip_route_connect(nexthop, inet->inet_saddr, |
173 | inet->inet_sport, usin->sin_port, sk, 1); | 173 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
174 | if (tmp < 0) { | 174 | IPPROTO_TCP, |
175 | if (tmp == -ENETUNREACH) | 175 | orig_sport, orig_dport, sk, true); |
176 | if (IS_ERR(rt)) { | ||
177 | err = PTR_ERR(rt); | ||
178 | if (err == -ENETUNREACH) | ||
176 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 179 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
177 | return tmp; | 180 | return err; |
178 | } | 181 | } |
179 | 182 | ||
180 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | 183 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { |
@@ -233,11 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
233 | if (err) | 236 | if (err) |
234 | goto failure; | 237 | goto failure; |
235 | 238 | ||
236 | err = ip_route_newports(&rt, IPPROTO_TCP, | 239 | rt = ip_route_newports(rt, IPPROTO_TCP, |
237 | inet->inet_sport, inet->inet_dport, sk); | 240 | orig_sport, orig_dport, |
238 | if (err) | 241 | inet->inet_sport, inet->inet_dport, sk); |
242 | if (IS_ERR(rt)) { | ||
243 | err = PTR_ERR(rt); | ||
244 | rt = NULL; | ||
239 | goto failure; | 245 | goto failure; |
240 | 246 | } | |
241 | /* OK, now commit destination to socket. */ | 247 | /* OK, now commit destination to socket. */ |
242 | sk->sk_gso_type = SKB_GSO_TCPV4; | 248 | sk->sk_gso_type = SKB_GSO_TCPV4; |
243 | sk_setup_caps(sk, &rt->dst); | 249 | sk_setup_caps(sk, &rt->dst); |
@@ -1341,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1341 | tcp_death_row.sysctl_tw_recycle && | 1347 | tcp_death_row.sysctl_tw_recycle && |
1342 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1348 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1343 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1349 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1344 | peer->daddr.a4 == saddr) { | 1350 | peer->daddr.addr.a4 == saddr) { |
1345 | inet_peer_refcheck(peer); | 1351 | inet_peer_refcheck(peer); |
1346 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1352 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1347 | (s32)(peer->tcp_ts - req->ts_recent) > | 1353 | (s32)(peer->tcp_ts - req->ts_recent) > |
@@ -1556,12 +1562,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1556 | 1562 | ||
1557 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1563 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
1558 | sock_rps_save_rxhash(sk, skb->rxhash); | 1564 | sock_rps_save_rxhash(sk, skb->rxhash); |
1559 | TCP_CHECK_TIMER(sk); | ||
1560 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { | 1565 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
1561 | rsk = sk; | 1566 | rsk = sk; |
1562 | goto reset; | 1567 | goto reset; |
1563 | } | 1568 | } |
1564 | TCP_CHECK_TIMER(sk); | ||
1565 | return 0; | 1569 | return 0; |
1566 | } | 1570 | } |
1567 | 1571 | ||
@@ -1583,13 +1587,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1583 | } else | 1587 | } else |
1584 | sock_rps_save_rxhash(sk, skb->rxhash); | 1588 | sock_rps_save_rxhash(sk, skb->rxhash); |
1585 | 1589 | ||
1586 | |||
1587 | TCP_CHECK_TIMER(sk); | ||
1588 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { | 1590 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { |
1589 | rsk = sk; | 1591 | rsk = sk; |
1590 | goto reset; | 1592 | goto reset; |
1591 | } | 1593 | } |
1592 | TCP_CHECK_TIMER(sk); | ||
1593 | return 0; | 1594 | return 0; |
1594 | 1595 | ||
1595 | reset: | 1596 | reset: |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index de870377fbba..656d431c99ad 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
@@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) | |||
313 | lp->last_drop = tcp_time_stamp; | 313 | lp->last_drop = tcp_time_stamp; |
314 | } | 314 | } |
315 | 315 | ||
316 | static struct tcp_congestion_ops tcp_lp = { | 316 | static struct tcp_congestion_ops tcp_lp __read_mostly = { |
317 | .flags = TCP_CONG_RTT_STAMP, | 317 | .flags = TCP_CONG_RTT_STAMP, |
318 | .init = tcp_lp_init, | 318 | .init = tcp_lp_init, |
319 | .ssthresh = tcp_reno_ssthresh, | 319 | .ssthresh = tcp_reno_ssthresh, |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index a76513779e2b..8ce55b8aaec8 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) | |||
35 | } | 35 | } |
36 | 36 | ||
37 | 37 | ||
38 | static struct tcp_congestion_ops tcp_scalable = { | 38 | static struct tcp_congestion_ops tcp_scalable __read_mostly = { |
39 | .ssthresh = tcp_scalable_ssthresh, | 39 | .ssthresh = tcp_scalable_ssthresh, |
40 | .cong_avoid = tcp_scalable_cong_avoid, | 40 | .cong_avoid = tcp_scalable_cong_avoid, |
41 | .min_cwnd = tcp_reno_min_cwnd, | 41 | .min_cwnd = tcp_reno_min_cwnd, |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa003657..ecd44b0c45f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data) | |||
259 | tcp_send_ack(sk); | 259 | tcp_send_ack(sk); |
260 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); | 260 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); |
261 | } | 261 | } |
262 | TCP_CHECK_TIMER(sk); | ||
263 | 262 | ||
264 | out: | 263 | out: |
265 | if (tcp_memory_pressure) | 264 | if (tcp_memory_pressure) |
@@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data) | |||
481 | tcp_probe_timer(sk); | 480 | tcp_probe_timer(sk); |
482 | break; | 481 | break; |
483 | } | 482 | } |
484 | TCP_CHECK_TIMER(sk); | ||
485 | 483 | ||
486 | out: | 484 | out: |
487 | sk_mem_reclaim(sk); | 485 | sk_mem_reclaim(sk); |
@@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data) | |||
589 | elapsed = keepalive_time_when(tp) - elapsed; | 587 | elapsed = keepalive_time_when(tp) - elapsed; |
590 | } | 588 | } |
591 | 589 | ||
592 | TCP_CHECK_TIMER(sk); | ||
593 | sk_mem_reclaim(sk); | 590 | sk_mem_reclaim(sk); |
594 | 591 | ||
595 | resched: | 592 | resched: |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c6743eec9b7d..80fa2bfd7ede 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | |||
304 | } | 304 | } |
305 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); | 305 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); |
306 | 306 | ||
307 | static struct tcp_congestion_ops tcp_vegas = { | 307 | static struct tcp_congestion_ops tcp_vegas __read_mostly = { |
308 | .flags = TCP_CONG_RTT_STAMP, | 308 | .flags = TCP_CONG_RTT_STAMP, |
309 | .init = tcp_vegas_init, | 309 | .init = tcp_vegas_init, |
310 | .ssthresh = tcp_reno_ssthresh, | 310 | .ssthresh = tcp_reno_ssthresh, |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 38bc0b52d745..ac43cd747bce 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) | |||
201 | return max(tp->snd_cwnd >> 1U, 2U); | 201 | return max(tp->snd_cwnd >> 1U, 2U); |
202 | } | 202 | } |
203 | 203 | ||
204 | static struct tcp_congestion_ops tcp_veno = { | 204 | static struct tcp_congestion_ops tcp_veno __read_mostly = { |
205 | .flags = TCP_CONG_RTT_STAMP, | 205 | .flags = TCP_CONG_RTT_STAMP, |
206 | .init = tcp_veno_init, | 206 | .init = tcp_veno_init, |
207 | .ssthresh = tcp_veno_ssthresh, | 207 | .ssthresh = tcp_veno_ssthresh, |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index a534dda5456e..1b91bf48e277 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
@@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, | |||
272 | } | 272 | } |
273 | 273 | ||
274 | 274 | ||
275 | static struct tcp_congestion_ops tcp_westwood = { | 275 | static struct tcp_congestion_ops tcp_westwood __read_mostly = { |
276 | .init = tcp_westwood_init, | 276 | .init = tcp_westwood_init, |
277 | .ssthresh = tcp_reno_ssthresh, | 277 | .ssthresh = tcp_reno_ssthresh, |
278 | .cong_avoid = tcp_reno_cong_avoid, | 278 | .cong_avoid = tcp_reno_cong_avoid, |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a0f240358892..dc7f43179c9a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { | |||
225 | return tp->snd_cwnd - reduction; | 225 | return tp->snd_cwnd - reduction; |
226 | } | 226 | } |
227 | 227 | ||
228 | static struct tcp_congestion_ops tcp_yeah = { | 228 | static struct tcp_congestion_ops tcp_yeah __read_mostly = { |
229 | .flags = TCP_CONG_RTT_STAMP, | 229 | .flags = TCP_CONG_RTT_STAMP, |
230 | .init = tcp_yeah_init, | 230 | .init = tcp_yeah_init, |
231 | .ssthresh = tcp_yeah_ssthresh, | 231 | .ssthresh = tcp_yeah_ssthresh, |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959ee..588f47af5faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk) | |||
663 | EXPORT_SYMBOL(udp_flush_pending_frames); | 663 | EXPORT_SYMBOL(udp_flush_pending_frames); |
664 | 664 | ||
665 | /** | 665 | /** |
666 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | 666 | * udp4_hwcsum - handle outgoing HW checksumming |
667 | * @sk: socket we are sending on | ||
668 | * @skb: sk_buff containing the filled-in UDP header | 667 | * @skb: sk_buff containing the filled-in UDP header |
669 | * (checksum field must be zeroed out) | 668 | * (checksum field must be zeroed out) |
669 | * @src: source IP address | ||
670 | * @dst: destination IP address | ||
670 | */ | 671 | */ |
671 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 672 | static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) |
672 | __be32 src, __be32 dst, int len) | ||
673 | { | 673 | { |
674 | unsigned int offset; | ||
675 | struct udphdr *uh = udp_hdr(skb); | 674 | struct udphdr *uh = udp_hdr(skb); |
675 | struct sk_buff *frags = skb_shinfo(skb)->frag_list; | ||
676 | int offset = skb_transport_offset(skb); | ||
677 | int len = skb->len - offset; | ||
678 | int hlen = len; | ||
676 | __wsum csum = 0; | 679 | __wsum csum = 0; |
677 | 680 | ||
678 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 681 | if (!frags) { |
679 | /* | 682 | /* |
680 | * Only one fragment on the socket. | 683 | * Only one fragment on the socket. |
681 | */ | 684 | */ |
682 | skb->csum_start = skb_transport_header(skb) - skb->head; | 685 | skb->csum_start = skb_transport_header(skb) - skb->head; |
683 | skb->csum_offset = offsetof(struct udphdr, check); | 686 | skb->csum_offset = offsetof(struct udphdr, check); |
684 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | 687 | uh->check = ~csum_tcpudp_magic(src, dst, len, |
688 | IPPROTO_UDP, 0); | ||
685 | } else { | 689 | } else { |
686 | /* | 690 | /* |
687 | * HW-checksum won't work as there are two or more | 691 | * HW-checksum won't work as there are two or more |
688 | * fragments on the socket so that all csums of sk_buffs | 692 | * fragments on the socket so that all csums of sk_buffs |
689 | * should be together | 693 | * should be together |
690 | */ | 694 | */ |
691 | offset = skb_transport_offset(skb); | 695 | do { |
692 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 696 | csum = csum_add(csum, frags->csum); |
697 | hlen -= frags->len; | ||
698 | } while ((frags = frags->next)); | ||
693 | 699 | ||
700 | csum = skb_checksum(skb, offset, hlen, csum); | ||
694 | skb->ip_summed = CHECKSUM_NONE; | 701 | skb->ip_summed = CHECKSUM_NONE; |
695 | 702 | ||
696 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
697 | csum = csum_add(csum, skb->csum); | ||
698 | } | ||
699 | |||
700 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | 703 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); |
701 | if (uh->check == 0) | 704 | if (uh->check == 0) |
702 | uh->check = CSUM_MANGLED_0; | 705 | uh->check = CSUM_MANGLED_0; |
703 | } | 706 | } |
704 | } | 707 | } |
705 | 708 | ||
706 | /* | 709 | static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) |
707 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
708 | */ | ||
709 | static int udp_push_pending_frames(struct sock *sk) | ||
710 | { | 710 | { |
711 | struct udp_sock *up = udp_sk(sk); | 711 | struct sock *sk = skb->sk; |
712 | struct inet_sock *inet = inet_sk(sk); | 712 | struct inet_sock *inet = inet_sk(sk); |
713 | struct flowi *fl = &inet->cork.fl; | ||
714 | struct sk_buff *skb; | ||
715 | struct udphdr *uh; | 713 | struct udphdr *uh; |
714 | struct rtable *rt = (struct rtable *)skb_dst(skb); | ||
716 | int err = 0; | 715 | int err = 0; |
717 | int is_udplite = IS_UDPLITE(sk); | 716 | int is_udplite = IS_UDPLITE(sk); |
717 | int offset = skb_transport_offset(skb); | ||
718 | int len = skb->len - offset; | ||
718 | __wsum csum = 0; | 719 | __wsum csum = 0; |
719 | 720 | ||
720 | /* Grab the skbuff where UDP header space exists. */ | ||
721 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
722 | goto out; | ||
723 | |||
724 | /* | 721 | /* |
725 | * Create a UDP header | 722 | * Create a UDP header |
726 | */ | 723 | */ |
727 | uh = udp_hdr(skb); | 724 | uh = udp_hdr(skb); |
728 | uh->source = fl->fl_ip_sport; | 725 | uh->source = inet->inet_sport; |
729 | uh->dest = fl->fl_ip_dport; | 726 | uh->dest = dport; |
730 | uh->len = htons(up->len); | 727 | uh->len = htons(len); |
731 | uh->check = 0; | 728 | uh->check = 0; |
732 | 729 | ||
733 | if (is_udplite) /* UDP-Lite */ | 730 | if (is_udplite) /* UDP-Lite */ |
734 | csum = udplite_csum_outgoing(sk, skb); | 731 | csum = udplite_csum(skb); |
735 | 732 | ||
736 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | 733 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ |
737 | 734 | ||
@@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk) | |||
740 | 737 | ||
741 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 738 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
742 | 739 | ||
743 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); | 740 | udp4_hwcsum(skb, rt->rt_src, daddr); |
744 | goto send; | 741 | goto send; |
745 | 742 | ||
746 | } else /* `normal' UDP */ | 743 | } else |
747 | csum = udp_csum_outgoing(sk, skb); | 744 | csum = udp_csum(skb); |
748 | 745 | ||
749 | /* add protocol-dependent pseudo-header */ | 746 | /* add protocol-dependent pseudo-header */ |
750 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | 747 | uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, |
751 | sk->sk_protocol, csum); | 748 | sk->sk_protocol, csum); |
752 | if (uh->check == 0) | 749 | if (uh->check == 0) |
753 | uh->check = CSUM_MANGLED_0; | 750 | uh->check = CSUM_MANGLED_0; |
754 | 751 | ||
755 | send: | 752 | send: |
756 | err = ip_push_pending_frames(sk); | 753 | err = ip_send_skb(skb); |
757 | if (err) { | 754 | if (err) { |
758 | if (err == -ENOBUFS && !inet->recverr) { | 755 | if (err == -ENOBUFS && !inet->recverr) { |
759 | UDP_INC_STATS_USER(sock_net(sk), | 756 | UDP_INC_STATS_USER(sock_net(sk), |
@@ -763,6 +760,26 @@ send: | |||
763 | } else | 760 | } else |
764 | UDP_INC_STATS_USER(sock_net(sk), | 761 | UDP_INC_STATS_USER(sock_net(sk), |
765 | UDP_MIB_OUTDATAGRAMS, is_udplite); | 762 | UDP_MIB_OUTDATAGRAMS, is_udplite); |
763 | return err; | ||
764 | } | ||
765 | |||
766 | /* | ||
767 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
768 | */ | ||
769 | static int udp_push_pending_frames(struct sock *sk) | ||
770 | { | ||
771 | struct udp_sock *up = udp_sk(sk); | ||
772 | struct inet_sock *inet = inet_sk(sk); | ||
773 | struct flowi4 *fl4 = &inet->cork.fl.u.ip4; | ||
774 | struct sk_buff *skb; | ||
775 | int err = 0; | ||
776 | |||
777 | skb = ip_finish_skb(sk); | ||
778 | if (!skb) | ||
779 | goto out; | ||
780 | |||
781 | err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); | ||
782 | |||
766 | out: | 783 | out: |
767 | up->len = 0; | 784 | up->len = 0; |
768 | up->pending = 0; | 785 | up->pending = 0; |
@@ -774,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
774 | { | 791 | { |
775 | struct inet_sock *inet = inet_sk(sk); | 792 | struct inet_sock *inet = inet_sk(sk); |
776 | struct udp_sock *up = udp_sk(sk); | 793 | struct udp_sock *up = udp_sk(sk); |
794 | struct flowi4 *fl4; | ||
777 | int ulen = len; | 795 | int ulen = len; |
778 | struct ipcm_cookie ipc; | 796 | struct ipcm_cookie ipc; |
779 | struct rtable *rt = NULL; | 797 | struct rtable *rt = NULL; |
@@ -785,6 +803,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
785 | int err, is_udplite = IS_UDPLITE(sk); | 803 | int err, is_udplite = IS_UDPLITE(sk); |
786 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
787 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
806 | struct sk_buff *skb; | ||
788 | 807 | ||
789 | if (len > 0xFFFF) | 808 | if (len > 0xFFFF) |
790 | return -EMSGSIZE; | 809 | return -EMSGSIZE; |
@@ -799,6 +818,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
799 | ipc.opt = NULL; | 818 | ipc.opt = NULL; |
800 | ipc.tx_flags = 0; | 819 | ipc.tx_flags = 0; |
801 | 820 | ||
821 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
822 | |||
802 | if (up->pending) { | 823 | if (up->pending) { |
803 | /* | 824 | /* |
804 | * There are pending frames. | 825 | * There are pending frames. |
@@ -888,20 +909,25 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
888 | rt = (struct rtable *)sk_dst_check(sk, 0); | 909 | rt = (struct rtable *)sk_dst_check(sk, 0); |
889 | 910 | ||
890 | if (rt == NULL) { | 911 | if (rt == NULL) { |
891 | struct flowi fl = { .oif = ipc.oif, | 912 | struct flowi4 fl4 = { |
892 | .mark = sk->sk_mark, | 913 | .flowi4_oif = ipc.oif, |
893 | .fl4_dst = faddr, | 914 | .flowi4_mark = sk->sk_mark, |
894 | .fl4_src = saddr, | 915 | .daddr = faddr, |
895 | .fl4_tos = tos, | 916 | .saddr = saddr, |
896 | .proto = sk->sk_protocol, | 917 | .flowi4_tos = tos, |
897 | .flags = inet_sk_flowi_flags(sk), | 918 | .flowi4_proto = sk->sk_protocol, |
898 | .fl_ip_sport = inet->inet_sport, | 919 | .flowi4_flags = (inet_sk_flowi_flags(sk) | |
899 | .fl_ip_dport = dport }; | 920 | FLOWI_FLAG_CAN_SLEEP), |
921 | .fl4_sport = inet->inet_sport, | ||
922 | .fl4_dport = dport, | ||
923 | }; | ||
900 | struct net *net = sock_net(sk); | 924 | struct net *net = sock_net(sk); |
901 | 925 | ||
902 | security_sk_classify_flow(sk, &fl); | 926 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); |
903 | err = ip_route_output_flow(net, &rt, &fl, sk, 1); | 927 | rt = ip_route_output_flow(net, &fl4, sk); |
904 | if (err) { | 928 | if (IS_ERR(rt)) { |
929 | err = PTR_ERR(rt); | ||
930 | rt = NULL; | ||
905 | if (err == -ENETUNREACH) | 931 | if (err == -ENETUNREACH) |
906 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 932 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
907 | goto out; | 933 | goto out; |
@@ -923,6 +949,17 @@ back_from_confirm: | |||
923 | if (!ipc.addr) | 949 | if (!ipc.addr) |
924 | daddr = ipc.addr = rt->rt_dst; | 950 | daddr = ipc.addr = rt->rt_dst; |
925 | 951 | ||
952 | /* Lockless fast path for the non-corking case. */ | ||
953 | if (!corkreq) { | ||
954 | skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, | ||
955 | sizeof(struct udphdr), &ipc, &rt, | ||
956 | msg->msg_flags); | ||
957 | err = PTR_ERR(skb); | ||
958 | if (skb && !IS_ERR(skb)) | ||
959 | err = udp_send_skb(skb, daddr, dport); | ||
960 | goto out; | ||
961 | } | ||
962 | |||
926 | lock_sock(sk); | 963 | lock_sock(sk); |
927 | if (unlikely(up->pending)) { | 964 | if (unlikely(up->pending)) { |
928 | /* The socket is already corked while preparing it. */ | 965 | /* The socket is already corked while preparing it. */ |
@@ -936,15 +973,15 @@ back_from_confirm: | |||
936 | /* | 973 | /* |
937 | * Now cork the socket to pend data. | 974 | * Now cork the socket to pend data. |
938 | */ | 975 | */ |
939 | inet->cork.fl.fl4_dst = daddr; | 976 | fl4 = &inet->cork.fl.u.ip4; |
940 | inet->cork.fl.fl_ip_dport = dport; | 977 | fl4->daddr = daddr; |
941 | inet->cork.fl.fl4_src = saddr; | 978 | fl4->saddr = saddr; |
942 | inet->cork.fl.fl_ip_sport = inet->inet_sport; | 979 | fl4->fl4_dport = dport; |
980 | fl4->fl4_sport = inet->inet_sport; | ||
943 | up->pending = AF_INET; | 981 | up->pending = AF_INET; |
944 | 982 | ||
945 | do_append_data: | 983 | do_append_data: |
946 | up->len += ulen; | 984 | up->len += ulen; |
947 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
948 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | 985 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
949 | sizeof(struct udphdr), &ipc, &rt, | 986 | sizeof(struct udphdr), &ipc, &rt, |
950 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 987 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
@@ -2199,7 +2236,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) | |||
2199 | return 0; | 2236 | return 0; |
2200 | } | 2237 | } |
2201 | 2238 | ||
2202 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | 2239 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) |
2203 | { | 2240 | { |
2204 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2241 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2205 | unsigned int mss; | 2242 | unsigned int mss; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40addec..13e0e7f659ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -19,25 +19,23 @@ | |||
19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | 19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
20 | 20 | ||
21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | 21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
22 | xfrm_address_t *saddr, | 22 | const xfrm_address_t *saddr, |
23 | xfrm_address_t *daddr) | 23 | const xfrm_address_t *daddr) |
24 | { | 24 | { |
25 | struct flowi fl = { | 25 | struct flowi4 fl4 = { |
26 | .fl4_dst = daddr->a4, | 26 | .daddr = daddr->a4, |
27 | .fl4_tos = tos, | 27 | .flowi4_tos = tos, |
28 | }; | 28 | }; |
29 | struct dst_entry *dst; | ||
30 | struct rtable *rt; | 29 | struct rtable *rt; |
31 | int err; | ||
32 | 30 | ||
33 | if (saddr) | 31 | if (saddr) |
34 | fl.fl4_src = saddr->a4; | 32 | fl4.saddr = saddr->a4; |
33 | |||
34 | rt = __ip_route_output_key(net, &fl4); | ||
35 | if (!IS_ERR(rt)) | ||
36 | return &rt->dst; | ||
35 | 37 | ||
36 | err = __ip_route_output_key(net, &rt, &fl); | 38 | return ERR_CAST(rt); |
37 | dst = &rt->dst; | ||
38 | if (err) | ||
39 | dst = ERR_PTR(err); | ||
40 | return dst; | ||
41 | } | 39 | } |
42 | 40 | ||
43 | static int xfrm4_get_saddr(struct net *net, | 41 | static int xfrm4_get_saddr(struct net *net, |
@@ -56,9 +54,9 @@ static int xfrm4_get_saddr(struct net *net, | |||
56 | return 0; | 54 | return 0; |
57 | } | 55 | } |
58 | 56 | ||
59 | static int xfrm4_get_tos(struct flowi *fl) | 57 | static int xfrm4_get_tos(const struct flowi *fl) |
60 | { | 58 | { |
61 | return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ | 59 | return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ |
62 | } | 60 | } |
63 | 61 | ||
64 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | 62 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
@@ -68,11 +66,17 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | |||
68 | } | 66 | } |
69 | 67 | ||
70 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | 68 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
71 | struct flowi *fl) | 69 | const struct flowi *fl) |
72 | { | 70 | { |
73 | struct rtable *rt = (struct rtable *)xdst->route; | 71 | struct rtable *rt = (struct rtable *)xdst->route; |
72 | const struct flowi4 *fl4 = &fl->u.ip4; | ||
74 | 73 | ||
75 | xdst->u.rt.fl = *fl; | 74 | rt->rt_key_dst = fl4->daddr; |
75 | rt->rt_key_src = fl4->saddr; | ||
76 | rt->rt_tos = fl4->flowi4_tos; | ||
77 | rt->rt_iif = fl4->flowi4_iif; | ||
78 | rt->rt_oif = fl4->flowi4_oif; | ||
79 | rt->rt_mark = fl4->flowi4_mark; | ||
76 | 80 | ||
77 | xdst->u.dst.dev = dev; | 81 | xdst->u.dst.dev = dev; |
78 | dev_hold(dev); | 82 | dev_hold(dev); |
@@ -99,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
99 | { | 103 | { |
100 | struct iphdr *iph = ip_hdr(skb); | 104 | struct iphdr *iph = ip_hdr(skb); |
101 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; | 105 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
106 | struct flowi4 *fl4 = &fl->u.ip4; | ||
102 | 107 | ||
103 | memset(fl, 0, sizeof(struct flowi)); | 108 | memset(fl4, 0, sizeof(struct flowi4)); |
104 | fl->mark = skb->mark; | 109 | fl4->flowi4_mark = skb->mark; |
105 | 110 | ||
106 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 111 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { |
107 | switch (iph->protocol) { | 112 | switch (iph->protocol) { |
@@ -114,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
114 | pskb_may_pull(skb, xprth + 4 - skb->data)) { | 119 | pskb_may_pull(skb, xprth + 4 - skb->data)) { |
115 | __be16 *ports = (__be16 *)xprth; | 120 | __be16 *ports = (__be16 *)xprth; |
116 | 121 | ||
117 | fl->fl_ip_sport = ports[!!reverse]; | 122 | fl4->fl4_sport = ports[!!reverse]; |
118 | fl->fl_ip_dport = ports[!reverse]; | 123 | fl4->fl4_dport = ports[!reverse]; |
119 | } | 124 | } |
120 | break; | 125 | break; |
121 | 126 | ||
@@ -123,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
123 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { | 128 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { |
124 | u8 *icmp = xprth; | 129 | u8 *icmp = xprth; |
125 | 130 | ||
126 | fl->fl_icmp_type = icmp[0]; | 131 | fl4->fl4_icmp_type = icmp[0]; |
127 | fl->fl_icmp_code = icmp[1]; | 132 | fl4->fl4_icmp_code = icmp[1]; |
128 | } | 133 | } |
129 | break; | 134 | break; |
130 | 135 | ||
@@ -132,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
132 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | 137 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { |
133 | __be32 *ehdr = (__be32 *)xprth; | 138 | __be32 *ehdr = (__be32 *)xprth; |
134 | 139 | ||
135 | fl->fl_ipsec_spi = ehdr[0]; | 140 | fl4->fl4_ipsec_spi = ehdr[0]; |
136 | } | 141 | } |
137 | break; | 142 | break; |
138 | 143 | ||
@@ -140,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
140 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | 145 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { |
141 | __be32 *ah_hdr = (__be32*)xprth; | 146 | __be32 *ah_hdr = (__be32*)xprth; |
142 | 147 | ||
143 | fl->fl_ipsec_spi = ah_hdr[1]; | 148 | fl4->fl4_ipsec_spi = ah_hdr[1]; |
144 | } | 149 | } |
145 | break; | 150 | break; |
146 | 151 | ||
@@ -148,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
148 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | 153 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { |
149 | __be16 *ipcomp_hdr = (__be16 *)xprth; | 154 | __be16 *ipcomp_hdr = (__be16 *)xprth; |
150 | 155 | ||
151 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); | 156 | fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); |
152 | } | 157 | } |
153 | break; | 158 | break; |
154 | 159 | ||
@@ -160,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
160 | if (greflags[0] & GRE_KEY) { | 165 | if (greflags[0] & GRE_KEY) { |
161 | if (greflags[0] & GRE_CSUM) | 166 | if (greflags[0] & GRE_CSUM) |
162 | gre_hdr++; | 167 | gre_hdr++; |
163 | fl->fl_gre_key = gre_hdr[1]; | 168 | fl4->fl4_gre_key = gre_hdr[1]; |
164 | } | 169 | } |
165 | } | 170 | } |
166 | break; | 171 | break; |
167 | 172 | ||
168 | default: | 173 | default: |
169 | fl->fl_ipsec_spi = 0; | 174 | fl4->fl4_ipsec_spi = 0; |
170 | break; | 175 | break; |
171 | } | 176 | } |
172 | } | 177 | } |
173 | fl->proto = iph->protocol; | 178 | fl4->flowi4_proto = iph->protocol; |
174 | fl->fl4_dst = reverse ? iph->saddr : iph->daddr; | 179 | fl4->daddr = reverse ? iph->saddr : iph->daddr; |
175 | fl->fl4_src = reverse ? iph->daddr : iph->saddr; | 180 | fl4->saddr = reverse ? iph->daddr : iph->saddr; |
176 | fl->fl4_tos = iph->tos; | 181 | fl4->flowi4_tos = iph->tos; |
177 | } | 182 | } |
178 | 183 | ||
179 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) | 184 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
@@ -196,8 +201,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) | |||
196 | { | 201 | { |
197 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | 202 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |
198 | 203 | ||
204 | dst_destroy_metrics_generic(dst); | ||
205 | |||
199 | if (likely(xdst->u.rt.peer)) | 206 | if (likely(xdst->u.rt.peer)) |
200 | inet_putpeer(xdst->u.rt.peer); | 207 | inet_putpeer(xdst->u.rt.peer); |
208 | |||
201 | xfrm_dst_destroy(xdst); | 209 | xfrm_dst_destroy(xdst); |
202 | } | 210 | } |
203 | 211 | ||
@@ -215,6 +223,7 @@ static struct dst_ops xfrm4_dst_ops = { | |||
215 | .protocol = cpu_to_be16(ETH_P_IP), | 223 | .protocol = cpu_to_be16(ETH_P_IP), |
216 | .gc = xfrm4_garbage_collect, | 224 | .gc = xfrm4_garbage_collect, |
217 | .update_pmtu = xfrm4_update_pmtu, | 225 | .update_pmtu = xfrm4_update_pmtu, |
226 | .cow_metrics = dst_cow_metrics_generic, | ||
218 | .destroy = xfrm4_dst_destroy, | 227 | .destroy = xfrm4_dst_destroy, |
219 | .ifdown = xfrm4_dst_ifdown, | 228 | .ifdown = xfrm4_dst_ifdown, |
220 | .local_out = __ip_local_out, | 229 | .local_out = __ip_local_out, |
@@ -230,6 +239,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
230 | .get_tos = xfrm4_get_tos, | 239 | .get_tos = xfrm4_get_tos, |
231 | .init_path = xfrm4_init_path, | 240 | .init_path = xfrm4_init_path, |
232 | .fill_dst = xfrm4_fill_dst, | 241 | .fill_dst = xfrm4_fill_dst, |
242 | .blackhole_route = ipv4_blackhole_route, | ||
233 | }; | 243 | }; |
234 | 244 | ||
235 | #ifdef CONFIG_SYSCTL | 245 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 47947624eccc..1717c64628d1 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
@@ -21,24 +21,26 @@ static int xfrm4_init_flags(struct xfrm_state *x) | |||
21 | } | 21 | } |
22 | 22 | ||
23 | static void | 23 | static void |
24 | __xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) | 24 | __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) |
25 | { | 25 | { |
26 | sel->daddr.a4 = fl->fl4_dst; | 26 | const struct flowi4 *fl4 = &fl->u.ip4; |
27 | sel->saddr.a4 = fl->fl4_src; | 27 | |
28 | sel->dport = xfrm_flowi_dport(fl); | 28 | sel->daddr.a4 = fl4->daddr; |
29 | sel->saddr.a4 = fl4->saddr; | ||
30 | sel->dport = xfrm_flowi_dport(fl, &fl4->uli); | ||
29 | sel->dport_mask = htons(0xffff); | 31 | sel->dport_mask = htons(0xffff); |
30 | sel->sport = xfrm_flowi_sport(fl); | 32 | sel->sport = xfrm_flowi_sport(fl, &fl4->uli); |
31 | sel->sport_mask = htons(0xffff); | 33 | sel->sport_mask = htons(0xffff); |
32 | sel->family = AF_INET; | 34 | sel->family = AF_INET; |
33 | sel->prefixlen_d = 32; | 35 | sel->prefixlen_d = 32; |
34 | sel->prefixlen_s = 32; | 36 | sel->prefixlen_s = 32; |
35 | sel->proto = fl->proto; | 37 | sel->proto = fl4->flowi4_proto; |
36 | sel->ifindex = fl->oif; | 38 | sel->ifindex = fl4->flowi4_oif; |
37 | } | 39 | } |
38 | 40 | ||
39 | static void | 41 | static void |
40 | xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, | 42 | xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, |
41 | xfrm_address_t *daddr, xfrm_address_t *saddr) | 43 | const xfrm_address_t *daddr, const xfrm_address_t *saddr) |
42 | { | 44 | { |
43 | x->id = tmpl->id; | 45 | x->id = tmpl->id; |
44 | if (x->id.daddr.a4 == 0) | 46 | if (x->id.daddr.a4 == 0) |