diff options
Diffstat (limited to 'net/ipv4')
30 files changed, 730 insertions, 1900 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a5a1050595d1..cbb505ba9324 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER | |||
55 | 55 | ||
56 | If unsure, say N here. | 56 | If unsure, say N here. |
57 | 57 | ||
58 | choice | ||
59 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" | ||
60 | depends on IP_ADVANCED_ROUTER | ||
61 | default ASK_IP_FIB_HASH | ||
62 | |||
63 | config ASK_IP_FIB_HASH | ||
64 | bool "FIB_HASH" | ||
65 | ---help--- | ||
66 | Current FIB is very proven and good enough for most users. | ||
67 | |||
68 | config IP_FIB_TRIE | ||
69 | bool "FIB_TRIE" | ||
70 | ---help--- | ||
71 | Use new experimental LC-trie as FIB lookup algorithm. | ||
72 | This improves lookup performance if you have a large | ||
73 | number of routes. | ||
74 | |||
75 | LC-trie is a longest matching prefix lookup algorithm which | ||
76 | performs better than FIB_HASH for large routing tables. | ||
77 | But, it consumes more memory and is more complex. | ||
78 | |||
79 | LC-trie is described in: | ||
80 | |||
81 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | ||
82 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, | ||
83 | June 1999 | ||
84 | |||
85 | An experimental study of compression methods for dynamic tries | ||
86 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | ||
87 | <http://www.csc.kth.se/~snilsson/software/dyntrie2/> | ||
88 | |||
89 | endchoice | ||
90 | |||
91 | config IP_FIB_HASH | ||
92 | def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER | ||
93 | |||
94 | config IP_FIB_TRIE_STATS | 58 | config IP_FIB_TRIE_STATS |
95 | bool "FIB TRIE statistics" | 59 | bool "FIB TRIE statistics" |
96 | depends on IP_FIB_TRIE | 60 | depends on IP_ADVANCED_ROUTER |
97 | ---help--- | 61 | ---help--- |
98 | Keep track of statistics on structure of FIB TRIE table. | 62 | Keep track of statistics on structure of FIB TRIE table. |
99 | Useful for testing and measuring TRIE performance. | 63 | Useful for testing and measuring TRIE performance. |
@@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE | |||
140 | handled by the klogd daemon which is responsible for kernel messages | 104 | handled by the klogd daemon which is responsible for kernel messages |
141 | ("man klogd"). | 105 | ("man klogd"). |
142 | 106 | ||
107 | config IP_ROUTE_CLASSID | ||
108 | bool | ||
109 | |||
143 | config IP_PNP | 110 | config IP_PNP |
144 | bool "IP: kernel level autoconfiguration" | 111 | bool "IP: kernel level autoconfiguration" |
145 | help | 112 | help |
@@ -657,4 +624,3 @@ config TCP_MD5SIG | |||
657 | on the Internet. | 624 | on the Internet. |
658 | 625 | ||
659 | If unsure, say N. | 626 | If unsure, say N. |
660 | |||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a75..0dc772d0d125 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
14 | inet_fragment.o | 14 | inet_fragment.o |
15 | 15 | ||
16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
17 | obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o | ||
18 | obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o | ||
19 | obj-$(CONFIG_PROC_FS) += proc.o | 17 | obj-$(CONFIG_PROC_FS) += proc.o |
20 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o | 18 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o |
21 | obj-$(CONFIG_IP_MROUTE) += ipmr.o | 19 | obj-$(CONFIG_IP_MROUTE) += ipmr.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 45b89d7bda5a..7ceb80447631 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1231,7 +1231,7 @@ out: | |||
1231 | return err; | 1231 | return err; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | 1234 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) |
1235 | { | 1235 | { |
1236 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1236 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
1237 | struct iphdr *iph; | 1237 | struct iphdr *iph; |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961bec70ab..325053df6e70 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -201,7 +201,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
201 | top_iph->ttl = 0; | 201 | top_iph->ttl = 0; |
202 | top_iph->check = 0; | 202 | top_iph->check = 0; |
203 | 203 | ||
204 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | 204 | if (x->props.flags & XFRM_STATE_ALIGN4) |
205 | ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
206 | else | ||
207 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
205 | 208 | ||
206 | ah->reserved = 0; | 209 | ah->reserved = 0; |
207 | ah->spi = x->id.spi; | 210 | ah->spi = x->id.spi; |
@@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
299 | nexthdr = ah->nexthdr; | 302 | nexthdr = ah->nexthdr; |
300 | ah_hlen = (ah->hdrlen + 2) << 2; | 303 | ah_hlen = (ah->hdrlen + 2) << 2; |
301 | 304 | ||
302 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | 305 | if (x->props.flags & XFRM_STATE_ALIGN4) { |
303 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | 306 | if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && |
304 | goto out; | 307 | ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) |
308 | goto out; | ||
309 | } else { | ||
310 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | ||
311 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | ||
312 | goto out; | ||
313 | } | ||
305 | 314 | ||
306 | if (!pskb_may_pull(skb, ah_hlen)) | 315 | if (!pskb_may_pull(skb, ah_hlen)) |
307 | goto out; | 316 | goto out; |
@@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) | |||
450 | 459 | ||
451 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); | 460 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); |
452 | 461 | ||
453 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | 462 | if (x->props.flags & XFRM_STATE_ALIGN4) |
454 | ahp->icv_trunc_len); | 463 | x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + |
464 | ahp->icv_trunc_len); | ||
465 | else | ||
466 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | ||
467 | ahp->icv_trunc_len); | ||
455 | if (x->props.mode == XFRM_MODE_TUNNEL) | 468 | if (x->props.mode == XFRM_MODE_TUNNEL) |
456 | x->props.header_len += sizeof(struct iphdr); | 469 | x->props.header_len += sizeof(struct iphdr); |
457 | x->data = ahp; | 470 | x->data = ahp; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index df4616fce929..90389281d97a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/inetdevice.h> | 51 | #include <linux/inetdevice.h> |
52 | #include <linux/igmp.h> | 52 | #include <linux/igmp.h> |
53 | #include <linux/slab.h> | 53 | #include <linux/slab.h> |
54 | #include <linux/hash.h> | ||
54 | #ifdef CONFIG_SYSCTL | 55 | #ifdef CONFIG_SYSCTL |
55 | #include <linux/sysctl.h> | 56 | #include <linux/sysctl.h> |
56 | #endif | 57 | #endif |
@@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { | |||
92 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | 93 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, |
93 | }; | 94 | }; |
94 | 95 | ||
96 | /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE | ||
97 | * value. So if you change this define, make appropriate changes to | ||
98 | * inet_addr_hash as well. | ||
99 | */ | ||
100 | #define IN4_ADDR_HSIZE 256 | ||
101 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; | ||
102 | static DEFINE_SPINLOCK(inet_addr_hash_lock); | ||
103 | |||
104 | static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) | ||
105 | { | ||
106 | u32 val = (__force u32) addr ^ hash_ptr(net, 8); | ||
107 | |||
108 | return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & | ||
109 | (IN4_ADDR_HSIZE - 1)); | ||
110 | } | ||
111 | |||
112 | static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) | ||
113 | { | ||
114 | unsigned int hash = inet_addr_hash(net, ifa->ifa_address); | ||
115 | |||
116 | spin_lock(&inet_addr_hash_lock); | ||
117 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); | ||
118 | spin_unlock(&inet_addr_hash_lock); | ||
119 | } | ||
120 | |||
121 | static void inet_hash_remove(struct in_ifaddr *ifa) | ||
122 | { | ||
123 | spin_lock(&inet_addr_hash_lock); | ||
124 | hlist_del_init_rcu(&ifa->hash); | ||
125 | spin_unlock(&inet_addr_hash_lock); | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * __ip_dev_find - find the first device with a given source address. | ||
130 | * @net: the net namespace | ||
131 | * @addr: the source address | ||
132 | * @devref: if true, take a reference on the found device | ||
133 | * | ||
134 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
135 | */ | ||
136 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
137 | { | ||
138 | unsigned int hash = inet_addr_hash(net, addr); | ||
139 | struct net_device *result = NULL; | ||
140 | struct in_ifaddr *ifa; | ||
141 | struct hlist_node *node; | ||
142 | |||
143 | rcu_read_lock(); | ||
144 | hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { | ||
145 | struct net_device *dev = ifa->ifa_dev->dev; | ||
146 | |||
147 | if (!net_eq(dev_net(dev), net)) | ||
148 | continue; | ||
149 | if (ifa->ifa_address == addr) { | ||
150 | result = dev; | ||
151 | break; | ||
152 | } | ||
153 | } | ||
154 | if (result && devref) | ||
155 | dev_hold(result); | ||
156 | rcu_read_unlock(); | ||
157 | return result; | ||
158 | } | ||
159 | EXPORT_SYMBOL(__ip_dev_find); | ||
160 | |||
95 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); | 161 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); |
96 | 162 | ||
97 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); | 163 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); |
@@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
265 | } | 331 | } |
266 | 332 | ||
267 | if (!do_promote) { | 333 | if (!do_promote) { |
334 | inet_hash_remove(ifa); | ||
268 | *ifap1 = ifa->ifa_next; | 335 | *ifap1 = ifa->ifa_next; |
269 | 336 | ||
270 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); | 337 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); |
@@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
281 | /* 2. Unlink it */ | 348 | /* 2. Unlink it */ |
282 | 349 | ||
283 | *ifap = ifa1->ifa_next; | 350 | *ifap = ifa1->ifa_next; |
351 | inet_hash_remove(ifa1); | ||
284 | 352 | ||
285 | /* 3. Announce address deletion */ | 353 | /* 3. Announce address deletion */ |
286 | 354 | ||
@@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
368 | ifa->ifa_next = *ifap; | 436 | ifa->ifa_next = *ifap; |
369 | *ifap = ifa; | 437 | *ifap = ifa; |
370 | 438 | ||
439 | inet_hash_insert(dev_net(in_dev->dev), ifa); | ||
440 | |||
371 | /* Send message first, then call notifier. | 441 | /* Send message first, then call notifier. |
372 | Notifier will trigger FIB update, so that | 442 | Notifier will trigger FIB update, so that |
373 | listeners of netlink will know about new ifaddr */ | 443 | listeners of netlink will know about new ifaddr */ |
@@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) | |||
521 | if (tb[IFA_ADDRESS] == NULL) | 591 | if (tb[IFA_ADDRESS] == NULL) |
522 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; | 592 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; |
523 | 593 | ||
594 | INIT_HLIST_NODE(&ifa->hash); | ||
524 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; | 595 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; |
525 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); | 596 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); |
526 | ifa->ifa_flags = ifm->ifa_flags; | 597 | ifa->ifa_flags = ifm->ifa_flags; |
@@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
728 | if (!ifa) { | 799 | if (!ifa) { |
729 | ret = -ENOBUFS; | 800 | ret = -ENOBUFS; |
730 | ifa = inet_alloc_ifa(); | 801 | ifa = inet_alloc_ifa(); |
802 | INIT_HLIST_NODE(&ifa->hash); | ||
731 | if (!ifa) | 803 | if (!ifa) |
732 | break; | 804 | break; |
733 | if (colon) | 805 | if (colon) |
@@ -1084,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1084 | struct in_ifaddr *ifa = inet_alloc_ifa(); | 1156 | struct in_ifaddr *ifa = inet_alloc_ifa(); |
1085 | 1157 | ||
1086 | if (ifa) { | 1158 | if (ifa) { |
1159 | INIT_HLIST_NODE(&ifa->hash); | ||
1087 | ifa->ifa_local = | 1160 | ifa->ifa_local = |
1088 | ifa->ifa_address = htonl(INADDR_LOOPBACK); | 1161 | ifa->ifa_address = htonl(INADDR_LOOPBACK); |
1089 | ifa->ifa_prefixlen = 8; | 1162 | ifa->ifa_prefixlen = 8; |
@@ -1720,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = { | |||
1720 | 1793 | ||
1721 | void __init devinet_init(void) | 1794 | void __init devinet_init(void) |
1722 | { | 1795 | { |
1796 | int i; | ||
1797 | |||
1798 | for (i = 0; i < IN4_ADDR_HSIZE; i++) | ||
1799 | INIT_HLIST_HEAD(&inet_addr_lst[i]); | ||
1800 | |||
1723 | register_pernet_subsys(&devinet_ops); | 1801 | register_pernet_subsys(&devinet_ops); |
1724 | 1802 | ||
1725 | register_gifconf(PF_INET, inet_gifconf); | 1803 | register_gifconf(PF_INET, inet_gifconf); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd43a878..ad0778a3fa53 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) | |||
51 | { | 51 | { |
52 | struct fib_table *local_table, *main_table; | 52 | struct fib_table *local_table, *main_table; |
53 | 53 | ||
54 | local_table = fib_hash_table(RT_TABLE_LOCAL); | 54 | local_table = fib_trie_table(RT_TABLE_LOCAL); |
55 | if (local_table == NULL) | 55 | if (local_table == NULL) |
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | main_table = fib_hash_table(RT_TABLE_MAIN); | 58 | main_table = fib_trie_table(RT_TABLE_MAIN); |
59 | if (main_table == NULL) | 59 | if (main_table == NULL) |
60 | goto fail; | 60 | goto fail; |
61 | 61 | ||
@@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) | |||
82 | if (tb) | 82 | if (tb) |
83 | return tb; | 83 | return tb; |
84 | 84 | ||
85 | tb = fib_hash_table(id); | 85 | tb = fib_trie_table(id); |
86 | if (!tb) | 86 | if (!tb) |
87 | return NULL; | 87 | return NULL; |
88 | h = id & (FIB_TABLE_HASHSZ - 1); | 88 | h = id & (FIB_TABLE_HASHSZ - 1); |
@@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) | |||
114 | } | 114 | } |
115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | 115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ |
116 | 116 | ||
117 | void fib_select_default(struct net *net, | ||
118 | const struct flowi *flp, struct fib_result *res) | ||
119 | { | ||
120 | struct fib_table *tb; | ||
121 | int table = RT_TABLE_MAIN; | ||
122 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
123 | if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) | ||
124 | return; | ||
125 | table = res->r->table; | ||
126 | #endif | ||
127 | tb = fib_get_table(net, table); | ||
128 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | ||
129 | fib_table_select_default(tb, flp, res); | ||
130 | } | ||
131 | |||
132 | static void fib_flush(struct net *net) | 117 | static void fib_flush(struct net *net) |
133 | { | 118 | { |
134 | int flushed = 0; | 119 | int flushed = 0; |
@@ -147,46 +132,6 @@ static void fib_flush(struct net *net) | |||
147 | rt_cache_flush(net, -1); | 132 | rt_cache_flush(net, -1); |
148 | } | 133 | } |
149 | 134 | ||
150 | /** | ||
151 | * __ip_dev_find - find the first device with a given source address. | ||
152 | * @net: the net namespace | ||
153 | * @addr: the source address | ||
154 | * @devref: if true, take a reference on the found device | ||
155 | * | ||
156 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
157 | */ | ||
158 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
159 | { | ||
160 | struct flowi fl = { | ||
161 | .fl4_dst = addr, | ||
162 | }; | ||
163 | struct fib_result res = { 0 }; | ||
164 | struct net_device *dev = NULL; | ||
165 | struct fib_table *local_table; | ||
166 | |||
167 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
168 | res.r = NULL; | ||
169 | #endif | ||
170 | |||
171 | rcu_read_lock(); | ||
172 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | ||
173 | if (!local_table || | ||
174 | fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
175 | rcu_read_unlock(); | ||
176 | return NULL; | ||
177 | } | ||
178 | if (res.type != RTN_LOCAL) | ||
179 | goto out; | ||
180 | dev = FIB_RES_DEV(res); | ||
181 | |||
182 | if (dev && devref) | ||
183 | dev_hold(dev); | ||
184 | out: | ||
185 | rcu_read_unlock(); | ||
186 | return dev; | ||
187 | } | ||
188 | EXPORT_SYMBOL(__ip_dev_find); | ||
189 | |||
190 | /* | 135 | /* |
191 | * Find address type as if only "dev" was present in the system. If | 136 | * Find address type as if only "dev" was present in the system. If |
192 | * on_dev is NULL then all interfaces are taken into consideration. | 137 | * on_dev is NULL then all interfaces are taken into consideration. |
@@ -1101,5 +1046,5 @@ void __init ip_fib_init(void) | |||
1101 | register_netdevice_notifier(&fib_netdev_notifier); | 1046 | register_netdevice_notifier(&fib_netdev_notifier); |
1102 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 1047 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
1103 | 1048 | ||
1104 | fib_hash_init(); | 1049 | fib_trie_init(); |
1105 | } | 1050 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index b3acb0417b21..000000000000 --- a/net/ipv4/fib_hash.c +++ /dev/null | |||
@@ -1,1133 +0,0 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IPv4 FIB: lookup engine and maintenance routines. | ||
7 | * | ||
8 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <linux/bitops.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/sockios.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/inet.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/netdevice.h> | ||
30 | #include <linux/if_arp.h> | ||
31 | #include <linux/proc_fs.h> | ||
32 | #include <linux/skbuff.h> | ||
33 | #include <linux/netlink.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/slab.h> | ||
36 | |||
37 | #include <net/net_namespace.h> | ||
38 | #include <net/ip.h> | ||
39 | #include <net/protocol.h> | ||
40 | #include <net/route.h> | ||
41 | #include <net/tcp.h> | ||
42 | #include <net/sock.h> | ||
43 | #include <net/ip_fib.h> | ||
44 | |||
45 | #include "fib_lookup.h" | ||
46 | |||
47 | static struct kmem_cache *fn_hash_kmem __read_mostly; | ||
48 | static struct kmem_cache *fn_alias_kmem __read_mostly; | ||
49 | |||
50 | struct fib_node { | ||
51 | struct hlist_node fn_hash; | ||
52 | struct list_head fn_alias; | ||
53 | __be32 fn_key; | ||
54 | struct fib_alias fn_embedded_alias; | ||
55 | }; | ||
56 | |||
57 | #define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) | ||
58 | |||
59 | struct fn_zone { | ||
60 | struct fn_zone __rcu *fz_next; /* Next not empty zone */ | ||
61 | struct hlist_head __rcu *fz_hash; /* Hash table pointer */ | ||
62 | seqlock_t fz_lock; | ||
63 | u32 fz_hashmask; /* (fz_divisor - 1) */ | ||
64 | |||
65 | u8 fz_order; /* Zone order (0..32) */ | ||
66 | u8 fz_revorder; /* 32 - fz_order */ | ||
67 | __be32 fz_mask; /* inet_make_mask(order) */ | ||
68 | #define FZ_MASK(fz) ((fz)->fz_mask) | ||
69 | |||
70 | struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; | ||
71 | |||
72 | int fz_nent; /* Number of entries */ | ||
73 | int fz_divisor; /* Hash size (mask+1) */ | ||
74 | }; | ||
75 | |||
76 | struct fn_hash { | ||
77 | struct fn_zone *fn_zones[33]; | ||
78 | struct fn_zone __rcu *fn_zone_list; | ||
79 | }; | ||
80 | |||
81 | static inline u32 fn_hash(__be32 key, struct fn_zone *fz) | ||
82 | { | ||
83 | u32 h = ntohl(key) >> fz->fz_revorder; | ||
84 | h ^= (h>>20); | ||
85 | h ^= (h>>10); | ||
86 | h ^= (h>>5); | ||
87 | h &= fz->fz_hashmask; | ||
88 | return h; | ||
89 | } | ||
90 | |||
91 | static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) | ||
92 | { | ||
93 | return dst & FZ_MASK(fz); | ||
94 | } | ||
95 | |||
96 | static unsigned int fib_hash_genid; | ||
97 | |||
98 | #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) | ||
99 | |||
100 | static struct hlist_head *fz_hash_alloc(int divisor) | ||
101 | { | ||
102 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
103 | |||
104 | if (size <= PAGE_SIZE) | ||
105 | return kzalloc(size, GFP_KERNEL); | ||
106 | |||
107 | return (struct hlist_head *) | ||
108 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); | ||
109 | } | ||
110 | |||
111 | /* The fib hash lock must be held when this is called. */ | ||
112 | static inline void fn_rebuild_zone(struct fn_zone *fz, | ||
113 | struct hlist_head *old_ht, | ||
114 | int old_divisor) | ||
115 | { | ||
116 | int i; | ||
117 | |||
118 | for (i = 0; i < old_divisor; i++) { | ||
119 | struct hlist_node *node, *n; | ||
120 | struct fib_node *f; | ||
121 | |||
122 | hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { | ||
123 | struct hlist_head *new_head; | ||
124 | |||
125 | hlist_del_rcu(&f->fn_hash); | ||
126 | |||
127 | new_head = rcu_dereference_protected(fz->fz_hash, 1) + | ||
128 | fn_hash(f->fn_key, fz); | ||
129 | hlist_add_head_rcu(&f->fn_hash, new_head); | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | static void fz_hash_free(struct hlist_head *hash, int divisor) | ||
135 | { | ||
136 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
137 | |||
138 | if (size <= PAGE_SIZE) | ||
139 | kfree(hash); | ||
140 | else | ||
141 | free_pages((unsigned long)hash, get_order(size)); | ||
142 | } | ||
143 | |||
144 | static void fn_rehash_zone(struct fn_zone *fz) | ||
145 | { | ||
146 | struct hlist_head *ht, *old_ht; | ||
147 | int old_divisor, new_divisor; | ||
148 | u32 new_hashmask; | ||
149 | |||
150 | new_divisor = old_divisor = fz->fz_divisor; | ||
151 | |||
152 | switch (old_divisor) { | ||
153 | case EMBEDDED_HASH_SIZE: | ||
154 | new_divisor *= EMBEDDED_HASH_SIZE; | ||
155 | break; | ||
156 | case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: | ||
157 | new_divisor *= (EMBEDDED_HASH_SIZE/2); | ||
158 | break; | ||
159 | default: | ||
160 | if ((old_divisor << 1) > FZ_MAX_DIVISOR) { | ||
161 | printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); | ||
162 | return; | ||
163 | } | ||
164 | new_divisor = (old_divisor << 1); | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | new_hashmask = (new_divisor - 1); | ||
169 | |||
170 | #if RT_CACHE_DEBUG >= 2 | ||
171 | printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", | ||
172 | fz->fz_order, old_divisor); | ||
173 | #endif | ||
174 | |||
175 | ht = fz_hash_alloc(new_divisor); | ||
176 | |||
177 | if (ht) { | ||
178 | struct fn_zone nfz; | ||
179 | |||
180 | memcpy(&nfz, fz, sizeof(nfz)); | ||
181 | |||
182 | write_seqlock_bh(&fz->fz_lock); | ||
183 | old_ht = rcu_dereference_protected(fz->fz_hash, 1); | ||
184 | RCU_INIT_POINTER(nfz.fz_hash, ht); | ||
185 | nfz.fz_hashmask = new_hashmask; | ||
186 | nfz.fz_divisor = new_divisor; | ||
187 | fn_rebuild_zone(&nfz, old_ht, old_divisor); | ||
188 | fib_hash_genid++; | ||
189 | rcu_assign_pointer(fz->fz_hash, ht); | ||
190 | fz->fz_hashmask = new_hashmask; | ||
191 | fz->fz_divisor = new_divisor; | ||
192 | write_sequnlock_bh(&fz->fz_lock); | ||
193 | |||
194 | if (old_ht != fz->fz_embedded_hash) { | ||
195 | synchronize_rcu(); | ||
196 | fz_hash_free(old_ht, old_divisor); | ||
197 | } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | static void fn_free_node_rcu(struct rcu_head *head) | ||
202 | { | ||
203 | struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); | ||
204 | |||
205 | kmem_cache_free(fn_hash_kmem, f); | ||
206 | } | ||
207 | |||
208 | static inline void fn_free_node(struct fib_node *f) | ||
209 | { | ||
210 | call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); | ||
211 | } | ||
212 | |||
213 | static void fn_free_alias_rcu(struct rcu_head *head) | ||
214 | { | ||
215 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); | ||
216 | |||
217 | kmem_cache_free(fn_alias_kmem, fa); | ||
218 | } | ||
219 | |||
220 | static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) | ||
221 | { | ||
222 | fib_release_info(fa->fa_info); | ||
223 | if (fa == &f->fn_embedded_alias) | ||
224 | fa->fa_info = NULL; | ||
225 | else | ||
226 | call_rcu(&fa->rcu, fn_free_alias_rcu); | ||
227 | } | ||
228 | |||
229 | static struct fn_zone * | ||
230 | fn_new_zone(struct fn_hash *table, int z) | ||
231 | { | ||
232 | int i; | ||
233 | struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); | ||
234 | if (!fz) | ||
235 | return NULL; | ||
236 | |||
237 | seqlock_init(&fz->fz_lock); | ||
238 | fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; | ||
239 | fz->fz_hashmask = fz->fz_divisor - 1; | ||
240 | RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); | ||
241 | fz->fz_order = z; | ||
242 | fz->fz_revorder = 32 - z; | ||
243 | fz->fz_mask = inet_make_mask(z); | ||
244 | |||
245 | /* Find the first not empty zone with more specific mask */ | ||
246 | for (i = z + 1; i <= 32; i++) | ||
247 | if (table->fn_zones[i]) | ||
248 | break; | ||
249 | if (i > 32) { | ||
250 | /* No more specific masks, we are the first. */ | ||
251 | rcu_assign_pointer(fz->fz_next, | ||
252 | rtnl_dereference(table->fn_zone_list)); | ||
253 | rcu_assign_pointer(table->fn_zone_list, fz); | ||
254 | } else { | ||
255 | rcu_assign_pointer(fz->fz_next, | ||
256 | rtnl_dereference(table->fn_zones[i]->fz_next)); | ||
257 | rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); | ||
258 | } | ||
259 | table->fn_zones[z] = fz; | ||
260 | fib_hash_genid++; | ||
261 | return fz; | ||
262 | } | ||
263 | |||
264 | int fib_table_lookup(struct fib_table *tb, | ||
265 | const struct flowi *flp, struct fib_result *res, | ||
266 | int fib_flags) | ||
267 | { | ||
268 | int err; | ||
269 | struct fn_zone *fz; | ||
270 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
271 | |||
272 | rcu_read_lock(); | ||
273 | for (fz = rcu_dereference(t->fn_zone_list); | ||
274 | fz != NULL; | ||
275 | fz = rcu_dereference(fz->fz_next)) { | ||
276 | struct hlist_head *head; | ||
277 | struct hlist_node *node; | ||
278 | struct fib_node *f; | ||
279 | __be32 k; | ||
280 | unsigned int seq; | ||
281 | |||
282 | do { | ||
283 | seq = read_seqbegin(&fz->fz_lock); | ||
284 | k = fz_key(flp->fl4_dst, fz); | ||
285 | |||
286 | head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); | ||
287 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
288 | if (f->fn_key != k) | ||
289 | continue; | ||
290 | |||
291 | err = fib_semantic_match(&f->fn_alias, | ||
292 | flp, res, | ||
293 | fz->fz_order, fib_flags); | ||
294 | if (err <= 0) | ||
295 | goto out; | ||
296 | } | ||
297 | } while (read_seqretry(&fz->fz_lock, seq)); | ||
298 | } | ||
299 | err = 1; | ||
300 | out: | ||
301 | rcu_read_unlock(); | ||
302 | return err; | ||
303 | } | ||
304 | |||
305 | void fib_table_select_default(struct fib_table *tb, | ||
306 | const struct flowi *flp, struct fib_result *res) | ||
307 | { | ||
308 | int order, last_idx; | ||
309 | struct hlist_node *node; | ||
310 | struct fib_node *f; | ||
311 | struct fib_info *fi = NULL; | ||
312 | struct fib_info *last_resort; | ||
313 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
314 | struct fn_zone *fz = t->fn_zones[0]; | ||
315 | struct hlist_head *head; | ||
316 | |||
317 | if (fz == NULL) | ||
318 | return; | ||
319 | |||
320 | last_idx = -1; | ||
321 | last_resort = NULL; | ||
322 | order = -1; | ||
323 | |||
324 | rcu_read_lock(); | ||
325 | head = rcu_dereference(fz->fz_hash); | ||
326 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
327 | struct fib_alias *fa; | ||
328 | |||
329 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
330 | struct fib_info *next_fi = fa->fa_info; | ||
331 | |||
332 | if (fa->fa_scope != res->scope || | ||
333 | fa->fa_type != RTN_UNICAST) | ||
334 | continue; | ||
335 | |||
336 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
337 | break; | ||
338 | if (!next_fi->fib_nh[0].nh_gw || | ||
339 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
340 | continue; | ||
341 | |||
342 | fib_alias_accessed(fa); | ||
343 | |||
344 | if (fi == NULL) { | ||
345 | if (next_fi != res->fi) | ||
346 | break; | ||
347 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
348 | &last_idx, tb->tb_default)) { | ||
349 | fib_result_assign(res, fi); | ||
350 | tb->tb_default = order; | ||
351 | goto out; | ||
352 | } | ||
353 | fi = next_fi; | ||
354 | order++; | ||
355 | } | ||
356 | } | ||
357 | |||
358 | if (order <= 0 || fi == NULL) { | ||
359 | tb->tb_default = -1; | ||
360 | goto out; | ||
361 | } | ||
362 | |||
363 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
364 | tb->tb_default)) { | ||
365 | fib_result_assign(res, fi); | ||
366 | tb->tb_default = order; | ||
367 | goto out; | ||
368 | } | ||
369 | |||
370 | if (last_idx >= 0) | ||
371 | fib_result_assign(res, last_resort); | ||
372 | tb->tb_default = last_idx; | ||
373 | out: | ||
374 | rcu_read_unlock(); | ||
375 | } | ||
376 | |||
377 | /* Insert node F to FZ. */ | ||
378 | static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) | ||
379 | { | ||
380 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); | ||
381 | |||
382 | hlist_add_head_rcu(&f->fn_hash, head); | ||
383 | } | ||
384 | |||
385 | /* Return the node in FZ matching KEY. */ | ||
386 | static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | ||
387 | { | ||
388 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); | ||
389 | struct hlist_node *node; | ||
390 | struct fib_node *f; | ||
391 | |||
392 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
393 | if (f->fn_key == key) | ||
394 | return f; | ||
395 | } | ||
396 | |||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | |||
401 | static struct fib_alias *fib_fast_alloc(struct fib_node *f) | ||
402 | { | ||
403 | struct fib_alias *fa = &f->fn_embedded_alias; | ||
404 | |||
405 | if (fa->fa_info != NULL) | ||
406 | fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | ||
407 | return fa; | ||
408 | } | ||
409 | |||
410 | /* Caller must hold RTNL. */ | ||
411 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | ||
412 | { | ||
413 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
414 | struct fib_node *new_f = NULL; | ||
415 | struct fib_node *f; | ||
416 | struct fib_alias *fa, *new_fa; | ||
417 | struct fn_zone *fz; | ||
418 | struct fib_info *fi; | ||
419 | u8 tos = cfg->fc_tos; | ||
420 | __be32 key; | ||
421 | int err; | ||
422 | |||
423 | if (cfg->fc_dst_len > 32) | ||
424 | return -EINVAL; | ||
425 | |||
426 | fz = table->fn_zones[cfg->fc_dst_len]; | ||
427 | if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) | ||
428 | return -ENOBUFS; | ||
429 | |||
430 | key = 0; | ||
431 | if (cfg->fc_dst) { | ||
432 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
433 | return -EINVAL; | ||
434 | key = fz_key(cfg->fc_dst, fz); | ||
435 | } | ||
436 | |||
437 | fi = fib_create_info(cfg); | ||
438 | if (IS_ERR(fi)) | ||
439 | return PTR_ERR(fi); | ||
440 | |||
441 | if (fz->fz_nent > (fz->fz_divisor<<1) && | ||
442 | fz->fz_divisor < FZ_MAX_DIVISOR && | ||
443 | (cfg->fc_dst_len == 32 || | ||
444 | (1 << cfg->fc_dst_len) > fz->fz_divisor)) | ||
445 | fn_rehash_zone(fz); | ||
446 | |||
447 | f = fib_find_node(fz, key); | ||
448 | |||
449 | if (!f) | ||
450 | fa = NULL; | ||
451 | else | ||
452 | fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); | ||
453 | |||
454 | /* Now fa, if non-NULL, points to the first fib alias | ||
455 | * with the same keys [prefix,tos,priority], if such key already | ||
456 | * exists or to the node before which we will insert new one. | ||
457 | * | ||
458 | * If fa is NULL, we will need to allocate a new one and | ||
459 | * insert to the head of f. | ||
460 | * | ||
461 | * If f is NULL, no fib node matched the destination key | ||
462 | * and we need to allocate a new one of those as well. | ||
463 | */ | ||
464 | |||
465 | if (fa && fa->fa_tos == tos && | ||
466 | fa->fa_info->fib_priority == fi->fib_priority) { | ||
467 | struct fib_alias *fa_first, *fa_match; | ||
468 | |||
469 | err = -EEXIST; | ||
470 | if (cfg->fc_nlflags & NLM_F_EXCL) | ||
471 | goto out; | ||
472 | |||
473 | /* We have 2 goals: | ||
474 | * 1. Find exact match for type, scope, fib_info to avoid | ||
475 | * duplicate routes | ||
476 | * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it | ||
477 | */ | ||
478 | fa_match = NULL; | ||
479 | fa_first = fa; | ||
480 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
481 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
482 | if (fa->fa_tos != tos) | ||
483 | break; | ||
484 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
485 | break; | ||
486 | if (fa->fa_type == cfg->fc_type && | ||
487 | fa->fa_scope == cfg->fc_scope && | ||
488 | fa->fa_info == fi) { | ||
489 | fa_match = fa; | ||
490 | break; | ||
491 | } | ||
492 | } | ||
493 | |||
494 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | ||
495 | u8 state; | ||
496 | |||
497 | fa = fa_first; | ||
498 | if (fa_match) { | ||
499 | if (fa == fa_match) | ||
500 | err = 0; | ||
501 | goto out; | ||
502 | } | ||
503 | err = -ENOBUFS; | ||
504 | new_fa = fib_fast_alloc(f); | ||
505 | if (new_fa == NULL) | ||
506 | goto out; | ||
507 | |||
508 | new_fa->fa_tos = fa->fa_tos; | ||
509 | new_fa->fa_info = fi; | ||
510 | new_fa->fa_type = cfg->fc_type; | ||
511 | new_fa->fa_scope = cfg->fc_scope; | ||
512 | state = fa->fa_state; | ||
513 | new_fa->fa_state = state & ~FA_S_ACCESSED; | ||
514 | fib_hash_genid++; | ||
515 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); | ||
516 | |||
517 | fn_free_alias(fa, f); | ||
518 | if (state & FA_S_ACCESSED) | ||
519 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
520 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, | ||
521 | tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | /* Error if we find a perfect match which | ||
526 | * uses the same scope, type, and nexthop | ||
527 | * information. | ||
528 | */ | ||
529 | if (fa_match) | ||
530 | goto out; | ||
531 | |||
532 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) | ||
533 | fa = fa_first; | ||
534 | } | ||
535 | |||
536 | err = -ENOENT; | ||
537 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) | ||
538 | goto out; | ||
539 | |||
540 | err = -ENOBUFS; | ||
541 | |||
542 | if (!f) { | ||
543 | new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); | ||
544 | if (new_f == NULL) | ||
545 | goto out; | ||
546 | |||
547 | INIT_HLIST_NODE(&new_f->fn_hash); | ||
548 | INIT_LIST_HEAD(&new_f->fn_alias); | ||
549 | new_f->fn_key = key; | ||
550 | f = new_f; | ||
551 | } | ||
552 | |||
553 | new_fa = fib_fast_alloc(f); | ||
554 | if (new_fa == NULL) | ||
555 | goto out; | ||
556 | |||
557 | new_fa->fa_info = fi; | ||
558 | new_fa->fa_tos = tos; | ||
559 | new_fa->fa_type = cfg->fc_type; | ||
560 | new_fa->fa_scope = cfg->fc_scope; | ||
561 | new_fa->fa_state = 0; | ||
562 | |||
563 | /* | ||
564 | * Insert new entry to the list. | ||
565 | */ | ||
566 | |||
567 | if (new_f) | ||
568 | fib_insert_node(fz, new_f); | ||
569 | list_add_tail_rcu(&new_fa->fa_list, | ||
570 | (fa ? &fa->fa_list : &f->fn_alias)); | ||
571 | fib_hash_genid++; | ||
572 | |||
573 | if (new_f) | ||
574 | fz->fz_nent++; | ||
575 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
576 | |||
577 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, | ||
578 | &cfg->fc_nlinfo, 0); | ||
579 | return 0; | ||
580 | |||
581 | out: | ||
582 | if (new_f) | ||
583 | kmem_cache_free(fn_hash_kmem, new_f); | ||
584 | fib_release_info(fi); | ||
585 | return err; | ||
586 | } | ||
587 | |||
588 | int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | ||
589 | { | ||
590 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
591 | struct fib_node *f; | ||
592 | struct fib_alias *fa, *fa_to_delete; | ||
593 | struct fn_zone *fz; | ||
594 | __be32 key; | ||
595 | |||
596 | if (cfg->fc_dst_len > 32) | ||
597 | return -EINVAL; | ||
598 | |||
599 | if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) | ||
600 | return -ESRCH; | ||
601 | |||
602 | key = 0; | ||
603 | if (cfg->fc_dst) { | ||
604 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
605 | return -EINVAL; | ||
606 | key = fz_key(cfg->fc_dst, fz); | ||
607 | } | ||
608 | |||
609 | f = fib_find_node(fz, key); | ||
610 | |||
611 | if (!f) | ||
612 | fa = NULL; | ||
613 | else | ||
614 | fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); | ||
615 | if (!fa) | ||
616 | return -ESRCH; | ||
617 | |||
618 | fa_to_delete = NULL; | ||
619 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
620 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
621 | struct fib_info *fi = fa->fa_info; | ||
622 | |||
623 | if (fa->fa_tos != cfg->fc_tos) | ||
624 | break; | ||
625 | |||
626 | if ((!cfg->fc_type || | ||
627 | fa->fa_type == cfg->fc_type) && | ||
628 | (cfg->fc_scope == RT_SCOPE_NOWHERE || | ||
629 | fa->fa_scope == cfg->fc_scope) && | ||
630 | (!cfg->fc_protocol || | ||
631 | fi->fib_protocol == cfg->fc_protocol) && | ||
632 | fib_nh_match(cfg, fi) == 0) { | ||
633 | fa_to_delete = fa; | ||
634 | break; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | if (fa_to_delete) { | ||
639 | int kill_fn; | ||
640 | |||
641 | fa = fa_to_delete; | ||
642 | rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, | ||
643 | tb->tb_id, &cfg->fc_nlinfo, 0); | ||
644 | |||
645 | kill_fn = 0; | ||
646 | list_del_rcu(&fa->fa_list); | ||
647 | if (list_empty(&f->fn_alias)) { | ||
648 | hlist_del_rcu(&f->fn_hash); | ||
649 | kill_fn = 1; | ||
650 | } | ||
651 | fib_hash_genid++; | ||
652 | |||
653 | if (fa->fa_state & FA_S_ACCESSED) | ||
654 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
655 | fn_free_alias(fa, f); | ||
656 | if (kill_fn) { | ||
657 | fn_free_node(f); | ||
658 | fz->fz_nent--; | ||
659 | } | ||
660 | |||
661 | return 0; | ||
662 | } | ||
663 | return -ESRCH; | ||
664 | } | ||
665 | |||
666 | static int fn_flush_list(struct fn_zone *fz, int idx) | ||
667 | { | ||
668 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; | ||
669 | struct hlist_node *node, *n; | ||
670 | struct fib_node *f; | ||
671 | int found = 0; | ||
672 | |||
673 | hlist_for_each_entry_safe(f, node, n, head, fn_hash) { | ||
674 | struct fib_alias *fa, *fa_node; | ||
675 | int kill_f; | ||
676 | |||
677 | kill_f = 0; | ||
678 | list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { | ||
679 | struct fib_info *fi = fa->fa_info; | ||
680 | |||
681 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | ||
682 | list_del_rcu(&fa->fa_list); | ||
683 | if (list_empty(&f->fn_alias)) { | ||
684 | hlist_del_rcu(&f->fn_hash); | ||
685 | kill_f = 1; | ||
686 | } | ||
687 | fib_hash_genid++; | ||
688 | |||
689 | fn_free_alias(fa, f); | ||
690 | found++; | ||
691 | } | ||
692 | } | ||
693 | if (kill_f) { | ||
694 | fn_free_node(f); | ||
695 | fz->fz_nent--; | ||
696 | } | ||
697 | } | ||
698 | return found; | ||
699 | } | ||
700 | |||
701 | /* caller must hold RTNL. */ | ||
702 | int fib_table_flush(struct fib_table *tb) | ||
703 | { | ||
704 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
705 | struct fn_zone *fz; | ||
706 | int found = 0; | ||
707 | |||
708 | for (fz = rtnl_dereference(table->fn_zone_list); | ||
709 | fz != NULL; | ||
710 | fz = rtnl_dereference(fz->fz_next)) { | ||
711 | int i; | ||
712 | |||
713 | for (i = fz->fz_divisor - 1; i >= 0; i--) | ||
714 | found += fn_flush_list(fz, i); | ||
715 | } | ||
716 | return found; | ||
717 | } | ||
718 | |||
719 | void fib_free_table(struct fib_table *tb) | ||
720 | { | ||
721 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
722 | struct fn_zone *fz, *next; | ||
723 | |||
724 | next = table->fn_zone_list; | ||
725 | while (next != NULL) { | ||
726 | fz = next; | ||
727 | next = fz->fz_next; | ||
728 | |||
729 | if (fz->fz_hash != fz->fz_embedded_hash) | ||
730 | fz_hash_free(fz->fz_hash, fz->fz_divisor); | ||
731 | |||
732 | kfree(fz); | ||
733 | } | ||
734 | |||
735 | kfree(tb); | ||
736 | } | ||
737 | |||
738 | static inline int | ||
739 | fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | ||
740 | struct fib_table *tb, | ||
741 | struct fn_zone *fz, | ||
742 | struct hlist_head *head) | ||
743 | { | ||
744 | struct hlist_node *node; | ||
745 | struct fib_node *f; | ||
746 | int i, s_i; | ||
747 | |||
748 | s_i = cb->args[4]; | ||
749 | i = 0; | ||
750 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
751 | struct fib_alias *fa; | ||
752 | |||
753 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
754 | if (i < s_i) | ||
755 | goto next; | ||
756 | |||
757 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, | ||
758 | cb->nlh->nlmsg_seq, | ||
759 | RTM_NEWROUTE, | ||
760 | tb->tb_id, | ||
761 | fa->fa_type, | ||
762 | fa->fa_scope, | ||
763 | f->fn_key, | ||
764 | fz->fz_order, | ||
765 | fa->fa_tos, | ||
766 | fa->fa_info, | ||
767 | NLM_F_MULTI) < 0) { | ||
768 | cb->args[4] = i; | ||
769 | return -1; | ||
770 | } | ||
771 | next: | ||
772 | i++; | ||
773 | } | ||
774 | } | ||
775 | cb->args[4] = i; | ||
776 | return skb->len; | ||
777 | } | ||
778 | |||
779 | static inline int | ||
780 | fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | ||
781 | struct fib_table *tb, | ||
782 | struct fn_zone *fz) | ||
783 | { | ||
784 | int h, s_h; | ||
785 | struct hlist_head *head = rcu_dereference(fz->fz_hash); | ||
786 | |||
787 | if (head == NULL) | ||
788 | return skb->len; | ||
789 | s_h = cb->args[3]; | ||
790 | for (h = s_h; h < fz->fz_divisor; h++) { | ||
791 | if (hlist_empty(head + h)) | ||
792 | continue; | ||
793 | if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { | ||
794 | cb->args[3] = h; | ||
795 | return -1; | ||
796 | } | ||
797 | memset(&cb->args[4], 0, | ||
798 | sizeof(cb->args) - 4*sizeof(cb->args[0])); | ||
799 | } | ||
800 | cb->args[3] = h; | ||
801 | return skb->len; | ||
802 | } | ||
803 | |||
804 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | ||
805 | struct netlink_callback *cb) | ||
806 | { | ||
807 | int m = 0, s_m; | ||
808 | struct fn_zone *fz; | ||
809 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
810 | |||
811 | s_m = cb->args[2]; | ||
812 | rcu_read_lock(); | ||
813 | for (fz = rcu_dereference(table->fn_zone_list); | ||
814 | fz != NULL; | ||
815 | fz = rcu_dereference(fz->fz_next), m++) { | ||
816 | if (m < s_m) | ||
817 | continue; | ||
818 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { | ||
819 | cb->args[2] = m; | ||
820 | rcu_read_unlock(); | ||
821 | return -1; | ||
822 | } | ||
823 | memset(&cb->args[3], 0, | ||
824 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | ||
825 | } | ||
826 | rcu_read_unlock(); | ||
827 | cb->args[2] = m; | ||
828 | return skb->len; | ||
829 | } | ||
830 | |||
831 | void __init fib_hash_init(void) | ||
832 | { | ||
833 | fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), | ||
834 | 0, SLAB_PANIC, NULL); | ||
835 | |||
836 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), | ||
837 | 0, SLAB_PANIC, NULL); | ||
838 | |||
839 | } | ||
840 | |||
841 | struct fib_table *fib_hash_table(u32 id) | ||
842 | { | ||
843 | struct fib_table *tb; | ||
844 | |||
845 | tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), | ||
846 | GFP_KERNEL); | ||
847 | if (tb == NULL) | ||
848 | return NULL; | ||
849 | |||
850 | tb->tb_id = id; | ||
851 | tb->tb_default = -1; | ||
852 | |||
853 | memset(tb->tb_data, 0, sizeof(struct fn_hash)); | ||
854 | return tb; | ||
855 | } | ||
856 | |||
857 | /* ------------------------------------------------------------------------ */ | ||
858 | #ifdef CONFIG_PROC_FS | ||
859 | |||
860 | struct fib_iter_state { | ||
861 | struct seq_net_private p; | ||
862 | struct fn_zone *zone; | ||
863 | int bucket; | ||
864 | struct hlist_head *hash_head; | ||
865 | struct fib_node *fn; | ||
866 | struct fib_alias *fa; | ||
867 | loff_t pos; | ||
868 | unsigned int genid; | ||
869 | int valid; | ||
870 | }; | ||
871 | |||
872 | static struct fib_alias *fib_get_first(struct seq_file *seq) | ||
873 | { | ||
874 | struct fib_iter_state *iter = seq->private; | ||
875 | struct fib_table *main_table; | ||
876 | struct fn_hash *table; | ||
877 | |||
878 | main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); | ||
879 | table = (struct fn_hash *)main_table->tb_data; | ||
880 | |||
881 | iter->bucket = 0; | ||
882 | iter->hash_head = NULL; | ||
883 | iter->fn = NULL; | ||
884 | iter->fa = NULL; | ||
885 | iter->pos = 0; | ||
886 | iter->genid = fib_hash_genid; | ||
887 | iter->valid = 1; | ||
888 | |||
889 | for (iter->zone = rcu_dereference(table->fn_zone_list); | ||
890 | iter->zone != NULL; | ||
891 | iter->zone = rcu_dereference(iter->zone->fz_next)) { | ||
892 | int maxslot; | ||
893 | |||
894 | if (!iter->zone->fz_nent) | ||
895 | continue; | ||
896 | |||
897 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
898 | maxslot = iter->zone->fz_divisor; | ||
899 | |||
900 | for (iter->bucket = 0; iter->bucket < maxslot; | ||
901 | ++iter->bucket, ++iter->hash_head) { | ||
902 | struct hlist_node *node; | ||
903 | struct fib_node *fn; | ||
904 | |||
905 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
906 | struct fib_alias *fa; | ||
907 | |||
908 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
909 | iter->fn = fn; | ||
910 | iter->fa = fa; | ||
911 | goto out; | ||
912 | } | ||
913 | } | ||
914 | } | ||
915 | } | ||
916 | out: | ||
917 | return iter->fa; | ||
918 | } | ||
919 | |||
920 | static struct fib_alias *fib_get_next(struct seq_file *seq) | ||
921 | { | ||
922 | struct fib_iter_state *iter = seq->private; | ||
923 | struct fib_node *fn; | ||
924 | struct fib_alias *fa; | ||
925 | |||
926 | /* Advance FA, if any. */ | ||
927 | fn = iter->fn; | ||
928 | fa = iter->fa; | ||
929 | if (fa) { | ||
930 | BUG_ON(!fn); | ||
931 | list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { | ||
932 | iter->fa = fa; | ||
933 | goto out; | ||
934 | } | ||
935 | } | ||
936 | |||
937 | fa = iter->fa = NULL; | ||
938 | |||
939 | /* Advance FN. */ | ||
940 | if (fn) { | ||
941 | struct hlist_node *node = &fn->fn_hash; | ||
942 | hlist_for_each_entry_continue(fn, node, fn_hash) { | ||
943 | iter->fn = fn; | ||
944 | |||
945 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
946 | iter->fa = fa; | ||
947 | goto out; | ||
948 | } | ||
949 | } | ||
950 | } | ||
951 | |||
952 | fn = iter->fn = NULL; | ||
953 | |||
954 | /* Advance hash chain. */ | ||
955 | if (!iter->zone) | ||
956 | goto out; | ||
957 | |||
958 | for (;;) { | ||
959 | struct hlist_node *node; | ||
960 | int maxslot; | ||
961 | |||
962 | maxslot = iter->zone->fz_divisor; | ||
963 | |||
964 | while (++iter->bucket < maxslot) { | ||
965 | iter->hash_head++; | ||
966 | |||
967 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
968 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
969 | iter->fn = fn; | ||
970 | iter->fa = fa; | ||
971 | goto out; | ||
972 | } | ||
973 | } | ||
974 | } | ||
975 | |||
976 | iter->zone = rcu_dereference(iter->zone->fz_next); | ||
977 | |||
978 | if (!iter->zone) | ||
979 | goto out; | ||
980 | |||
981 | iter->bucket = 0; | ||
982 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
983 | |||
984 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
985 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
986 | iter->fn = fn; | ||
987 | iter->fa = fa; | ||
988 | goto out; | ||
989 | } | ||
990 | } | ||
991 | } | ||
992 | out: | ||
993 | iter->pos++; | ||
994 | return fa; | ||
995 | } | ||
996 | |||
997 | static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) | ||
998 | { | ||
999 | struct fib_iter_state *iter = seq->private; | ||
1000 | struct fib_alias *fa; | ||
1001 | |||
1002 | if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { | ||
1003 | fa = iter->fa; | ||
1004 | pos -= iter->pos; | ||
1005 | } else | ||
1006 | fa = fib_get_first(seq); | ||
1007 | |||
1008 | if (fa) | ||
1009 | while (pos && (fa = fib_get_next(seq))) | ||
1010 | --pos; | ||
1011 | return pos ? NULL : fa; | ||
1012 | } | ||
1013 | |||
1014 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) | ||
1015 | __acquires(RCU) | ||
1016 | { | ||
1017 | void *v = NULL; | ||
1018 | |||
1019 | rcu_read_lock(); | ||
1020 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) | ||
1021 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | ||
1022 | return v; | ||
1023 | } | ||
1024 | |||
1025 | static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1026 | { | ||
1027 | ++*pos; | ||
1028 | return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); | ||
1029 | } | ||
1030 | |||
1031 | static void fib_seq_stop(struct seq_file *seq, void *v) | ||
1032 | __releases(RCU) | ||
1033 | { | ||
1034 | rcu_read_unlock(); | ||
1035 | } | ||
1036 | |||
1037 | static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) | ||
1038 | { | ||
1039 | static const unsigned type2flags[RTN_MAX + 1] = { | ||
1040 | [7] = RTF_REJECT, | ||
1041 | [8] = RTF_REJECT, | ||
1042 | }; | ||
1043 | unsigned flags = type2flags[type]; | ||
1044 | |||
1045 | if (fi && fi->fib_nh->nh_gw) | ||
1046 | flags |= RTF_GATEWAY; | ||
1047 | if (mask == htonl(0xFFFFFFFF)) | ||
1048 | flags |= RTF_HOST; | ||
1049 | flags |= RTF_UP; | ||
1050 | return flags; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * This outputs /proc/net/route. | ||
1055 | * | ||
1056 | * It always works in backward compatibility mode. | ||
1057 | * The format of the file is not supposed to be changed. | ||
1058 | */ | ||
1059 | static int fib_seq_show(struct seq_file *seq, void *v) | ||
1060 | { | ||
1061 | struct fib_iter_state *iter; | ||
1062 | int len; | ||
1063 | __be32 prefix, mask; | ||
1064 | unsigned flags; | ||
1065 | struct fib_node *f; | ||
1066 | struct fib_alias *fa; | ||
1067 | struct fib_info *fi; | ||
1068 | |||
1069 | if (v == SEQ_START_TOKEN) { | ||
1070 | seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " | ||
1071 | "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" | ||
1072 | "\tWindow\tIRTT"); | ||
1073 | goto out; | ||
1074 | } | ||
1075 | |||
1076 | iter = seq->private; | ||
1077 | f = iter->fn; | ||
1078 | fa = iter->fa; | ||
1079 | fi = fa->fa_info; | ||
1080 | prefix = f->fn_key; | ||
1081 | mask = FZ_MASK(iter->zone); | ||
1082 | flags = fib_flag_trans(fa->fa_type, mask, fi); | ||
1083 | if (fi) | ||
1084 | seq_printf(seq, | ||
1085 | "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
1086 | fi->fib_dev ? fi->fib_dev->name : "*", prefix, | ||
1087 | fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, | ||
1088 | mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), | ||
1089 | fi->fib_window, | ||
1090 | fi->fib_rtt >> 3, &len); | ||
1091 | else | ||
1092 | seq_printf(seq, | ||
1093 | "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
1094 | prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); | ||
1095 | |||
1096 | seq_printf(seq, "%*s\n", 127 - len, ""); | ||
1097 | out: | ||
1098 | return 0; | ||
1099 | } | ||
1100 | |||
1101 | static const struct seq_operations fib_seq_ops = { | ||
1102 | .start = fib_seq_start, | ||
1103 | .next = fib_seq_next, | ||
1104 | .stop = fib_seq_stop, | ||
1105 | .show = fib_seq_show, | ||
1106 | }; | ||
1107 | |||
1108 | static int fib_seq_open(struct inode *inode, struct file *file) | ||
1109 | { | ||
1110 | return seq_open_net(inode, file, &fib_seq_ops, | ||
1111 | sizeof(struct fib_iter_state)); | ||
1112 | } | ||
1113 | |||
1114 | static const struct file_operations fib_seq_fops = { | ||
1115 | .owner = THIS_MODULE, | ||
1116 | .open = fib_seq_open, | ||
1117 | .read = seq_read, | ||
1118 | .llseek = seq_lseek, | ||
1119 | .release = seq_release_net, | ||
1120 | }; | ||
1121 | |||
1122 | int __net_init fib_proc_init(struct net *net) | ||
1123 | { | ||
1124 | if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) | ||
1125 | return -ENOMEM; | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1129 | void __net_exit fib_proc_exit(struct net *net) | ||
1130 | { | ||
1131 | proc_net_remove(net, "route"); | ||
1132 | } | ||
1133 | #endif /* CONFIG_PROC_FS */ | ||
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0ec651..d5c40d8f6632 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
@@ -25,7 +25,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) | |||
25 | } | 25 | } |
26 | 26 | ||
27 | /* Exported by fib_semantics.c */ | 27 | /* Exported by fib_semantics.c */ |
28 | extern int fib_semantic_match(struct list_head *head, | 28 | extern int fib_semantic_match(struct fib_table *tb, struct list_head *head, |
29 | const struct flowi *flp, | 29 | const struct flowi *flp, |
30 | struct fib_result *res, int prefixlen, int fib_flags); | 30 | struct fib_result *res, int prefixlen, int fib_flags); |
31 | extern void fib_release_info(struct fib_info *); | 31 | extern void fib_release_info(struct fib_info *); |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24f5c7b..3018efbaea77 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -41,13 +41,13 @@ struct fib4_rule { | |||
41 | __be32 srcmask; | 41 | __be32 srcmask; |
42 | __be32 dst; | 42 | __be32 dst; |
43 | __be32 dstmask; | 43 | __be32 dstmask; |
44 | #ifdef CONFIG_NET_CLS_ROUTE | 44 | #ifdef CONFIG_IP_ROUTE_CLASSID |
45 | u32 tclassid; | 45 | u32 tclassid; |
46 | #endif | 46 | #endif |
47 | }; | 47 | }; |
48 | 48 | ||
49 | #ifdef CONFIG_NET_CLS_ROUTE | 49 | #ifdef CONFIG_IP_ROUTE_CLASSID |
50 | u32 fib_rules_tclass(struct fib_result *res) | 50 | u32 fib_rules_tclass(const struct fib_result *res) |
51 | { | 51 | { |
52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; | 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
53 | } | 53 | } |
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
165 | if (frh->dst_len) | 165 | if (frh->dst_len) |
166 | rule4->dst = nla_get_be32(tb[FRA_DST]); | 166 | rule4->dst = nla_get_be32(tb[FRA_DST]); |
167 | 167 | ||
168 | #ifdef CONFIG_NET_CLS_ROUTE | 168 | #ifdef CONFIG_IP_ROUTE_CLASSID |
169 | if (tb[FRA_FLOW]) | 169 | if (tb[FRA_FLOW]) |
170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | 170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); |
171 | #endif | 171 | #endif |
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | |||
195 | if (frh->tos && (rule4->tos != frh->tos)) | 195 | if (frh->tos && (rule4->tos != frh->tos)) |
196 | return 0; | 196 | return 0; |
197 | 197 | ||
198 | #ifdef CONFIG_NET_CLS_ROUTE | 198 | #ifdef CONFIG_IP_ROUTE_CLASSID |
199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) | 199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
200 | return 0; | 200 | return 0; |
201 | #endif | 201 | #endif |
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
224 | if (rule4->src_len) | 224 | if (rule4->src_len) |
225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); |
226 | 226 | ||
227 | #ifdef CONFIG_NET_CLS_ROUTE | 227 | #ifdef CONFIG_IP_ROUTE_CLASSID |
228 | if (rule4->tclassid) | 228 | if (rule4->tclassid) |
229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
230 | #endif | 230 | #endif |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 12d3dc3df1b7..562f34cd9303 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -49,7 +49,7 @@ | |||
49 | static DEFINE_SPINLOCK(fib_info_lock); | 49 | static DEFINE_SPINLOCK(fib_info_lock); |
50 | static struct hlist_head *fib_info_hash; | 50 | static struct hlist_head *fib_info_hash; |
51 | static struct hlist_head *fib_info_laddrhash; | 51 | static struct hlist_head *fib_info_laddrhash; |
52 | static unsigned int fib_hash_size; | 52 | static unsigned int fib_info_hash_size; |
53 | static unsigned int fib_info_cnt; | 53 | static unsigned int fib_info_cnt; |
54 | 54 | ||
55 | #define DEVINDEX_HASHBITS 8 | 55 | #define DEVINDEX_HASHBITS 8 |
@@ -152,6 +152,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
152 | { | 152 | { |
153 | struct fib_info *fi = container_of(head, struct fib_info, rcu); | 153 | struct fib_info *fi = container_of(head, struct fib_info, rcu); |
154 | 154 | ||
155 | if (fi->fib_metrics != (u32 *) dst_default_metrics) | ||
156 | kfree(fi->fib_metrics); | ||
155 | kfree(fi); | 157 | kfree(fi); |
156 | } | 158 | } |
157 | 159 | ||
@@ -200,7 +202,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | |||
200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 202 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
201 | nh->nh_weight != onh->nh_weight || | 203 | nh->nh_weight != onh->nh_weight || |
202 | #endif | 204 | #endif |
203 | #ifdef CONFIG_NET_CLS_ROUTE | 205 | #ifdef CONFIG_IP_ROUTE_CLASSID |
204 | nh->nh_tclassid != onh->nh_tclassid || | 206 | nh->nh_tclassid != onh->nh_tclassid || |
205 | #endif | 207 | #endif |
206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) | 208 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) |
@@ -221,7 +223,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) | |||
221 | 223 | ||
222 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) | 224 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) |
223 | { | 225 | { |
224 | unsigned int mask = (fib_hash_size - 1); | 226 | unsigned int mask = (fib_info_hash_size - 1); |
225 | unsigned int val = fi->fib_nhs; | 227 | unsigned int val = fi->fib_nhs; |
226 | 228 | ||
227 | val ^= fi->fib_protocol; | 229 | val ^= fi->fib_protocol; |
@@ -422,7 +424,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
422 | 424 | ||
423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 425 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 426 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
425 | #ifdef CONFIG_NET_CLS_ROUTE | 427 | #ifdef CONFIG_IP_ROUTE_CLASSID |
426 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 428 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 429 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
428 | #endif | 430 | #endif |
@@ -476,7 +478,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) | |||
476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 478 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
477 | if (nla && nla_get_be32(nla) != nh->nh_gw) | 479 | if (nla && nla_get_be32(nla) != nh->nh_gw) |
478 | return 1; | 480 | return 1; |
479 | #ifdef CONFIG_NET_CLS_ROUTE | 481 | #ifdef CONFIG_IP_ROUTE_CLASSID |
480 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 482 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) | 483 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
482 | return 1; | 484 | return 1; |
@@ -613,14 +615,14 @@ out: | |||
613 | 615 | ||
614 | static inline unsigned int fib_laddr_hashfn(__be32 val) | 616 | static inline unsigned int fib_laddr_hashfn(__be32 val) |
615 | { | 617 | { |
616 | unsigned int mask = (fib_hash_size - 1); | 618 | unsigned int mask = (fib_info_hash_size - 1); |
617 | 619 | ||
618 | return ((__force u32)val ^ | 620 | return ((__force u32)val ^ |
619 | ((__force u32)val >> 7) ^ | 621 | ((__force u32)val >> 7) ^ |
620 | ((__force u32)val >> 14)) & mask; | 622 | ((__force u32)val >> 14)) & mask; |
621 | } | 623 | } |
622 | 624 | ||
623 | static struct hlist_head *fib_hash_alloc(int bytes) | 625 | static struct hlist_head *fib_info_hash_alloc(int bytes) |
624 | { | 626 | { |
625 | if (bytes <= PAGE_SIZE) | 627 | if (bytes <= PAGE_SIZE) |
626 | return kzalloc(bytes, GFP_KERNEL); | 628 | return kzalloc(bytes, GFP_KERNEL); |
@@ -630,7 +632,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) | |||
630 | get_order(bytes)); | 632 | get_order(bytes)); |
631 | } | 633 | } |
632 | 634 | ||
633 | static void fib_hash_free(struct hlist_head *hash, int bytes) | 635 | static void fib_info_hash_free(struct hlist_head *hash, int bytes) |
634 | { | 636 | { |
635 | if (!hash) | 637 | if (!hash) |
636 | return; | 638 | return; |
@@ -641,18 +643,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) | |||
641 | free_pages((unsigned long) hash, get_order(bytes)); | 643 | free_pages((unsigned long) hash, get_order(bytes)); |
642 | } | 644 | } |
643 | 645 | ||
644 | static void fib_hash_move(struct hlist_head *new_info_hash, | 646 | static void fib_info_hash_move(struct hlist_head *new_info_hash, |
645 | struct hlist_head *new_laddrhash, | 647 | struct hlist_head *new_laddrhash, |
646 | unsigned int new_size) | 648 | unsigned int new_size) |
647 | { | 649 | { |
648 | struct hlist_head *old_info_hash, *old_laddrhash; | 650 | struct hlist_head *old_info_hash, *old_laddrhash; |
649 | unsigned int old_size = fib_hash_size; | 651 | unsigned int old_size = fib_info_hash_size; |
650 | unsigned int i, bytes; | 652 | unsigned int i, bytes; |
651 | 653 | ||
652 | spin_lock_bh(&fib_info_lock); | 654 | spin_lock_bh(&fib_info_lock); |
653 | old_info_hash = fib_info_hash; | 655 | old_info_hash = fib_info_hash; |
654 | old_laddrhash = fib_info_laddrhash; | 656 | old_laddrhash = fib_info_laddrhash; |
655 | fib_hash_size = new_size; | 657 | fib_info_hash_size = new_size; |
656 | 658 | ||
657 | for (i = 0; i < old_size; i++) { | 659 | for (i = 0; i < old_size; i++) { |
658 | struct hlist_head *head = &fib_info_hash[i]; | 660 | struct hlist_head *head = &fib_info_hash[i]; |
@@ -693,8 +695,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash, | |||
693 | spin_unlock_bh(&fib_info_lock); | 695 | spin_unlock_bh(&fib_info_lock); |
694 | 696 | ||
695 | bytes = old_size * sizeof(struct hlist_head *); | 697 | bytes = old_size * sizeof(struct hlist_head *); |
696 | fib_hash_free(old_info_hash, bytes); | 698 | fib_info_hash_free(old_info_hash, bytes); |
697 | fib_hash_free(old_laddrhash, bytes); | 699 | fib_info_hash_free(old_laddrhash, bytes); |
698 | } | 700 | } |
699 | 701 | ||
700 | struct fib_info *fib_create_info(struct fib_config *cfg) | 702 | struct fib_info *fib_create_info(struct fib_config *cfg) |
@@ -718,8 +720,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
718 | #endif | 720 | #endif |
719 | 721 | ||
720 | err = -ENOBUFS; | 722 | err = -ENOBUFS; |
721 | if (fib_info_cnt >= fib_hash_size) { | 723 | if (fib_info_cnt >= fib_info_hash_size) { |
722 | unsigned int new_size = fib_hash_size << 1; | 724 | unsigned int new_size = fib_info_hash_size << 1; |
723 | struct hlist_head *new_info_hash; | 725 | struct hlist_head *new_info_hash; |
724 | struct hlist_head *new_laddrhash; | 726 | struct hlist_head *new_laddrhash; |
725 | unsigned int bytes; | 727 | unsigned int bytes; |
@@ -727,21 +729,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
727 | if (!new_size) | 729 | if (!new_size) |
728 | new_size = 1; | 730 | new_size = 1; |
729 | bytes = new_size * sizeof(struct hlist_head *); | 731 | bytes = new_size * sizeof(struct hlist_head *); |
730 | new_info_hash = fib_hash_alloc(bytes); | 732 | new_info_hash = fib_info_hash_alloc(bytes); |
731 | new_laddrhash = fib_hash_alloc(bytes); | 733 | new_laddrhash = fib_info_hash_alloc(bytes); |
732 | if (!new_info_hash || !new_laddrhash) { | 734 | if (!new_info_hash || !new_laddrhash) { |
733 | fib_hash_free(new_info_hash, bytes); | 735 | fib_info_hash_free(new_info_hash, bytes); |
734 | fib_hash_free(new_laddrhash, bytes); | 736 | fib_info_hash_free(new_laddrhash, bytes); |
735 | } else | 737 | } else |
736 | fib_hash_move(new_info_hash, new_laddrhash, new_size); | 738 | fib_info_hash_move(new_info_hash, new_laddrhash, new_size); |
737 | 739 | ||
738 | if (!fib_hash_size) | 740 | if (!fib_info_hash_size) |
739 | goto failure; | 741 | goto failure; |
740 | } | 742 | } |
741 | 743 | ||
742 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); | 744 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); |
743 | if (fi == NULL) | 745 | if (fi == NULL) |
744 | goto failure; | 746 | goto failure; |
747 | if (cfg->fc_mx) { | ||
748 | fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); | ||
749 | if (!fi->fib_metrics) | ||
750 | goto failure; | ||
751 | } else | ||
752 | fi->fib_metrics = (u32 *) dst_default_metrics; | ||
745 | fib_info_cnt++; | 753 | fib_info_cnt++; |
746 | 754 | ||
747 | fi->fib_net = hold_net(net); | 755 | fi->fib_net = hold_net(net); |
@@ -779,7 +787,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
779 | goto err_inval; | 787 | goto err_inval; |
780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) | 788 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
781 | goto err_inval; | 789 | goto err_inval; |
782 | #ifdef CONFIG_NET_CLS_ROUTE | 790 | #ifdef CONFIG_IP_ROUTE_CLASSID |
783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) | 791 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
784 | goto err_inval; | 792 | goto err_inval; |
785 | #endif | 793 | #endif |
@@ -792,7 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
792 | nh->nh_oif = cfg->fc_oif; | 800 | nh->nh_oif = cfg->fc_oif; |
793 | nh->nh_gw = cfg->fc_gw; | 801 | nh->nh_gw = cfg->fc_gw; |
794 | nh->nh_flags = cfg->fc_flags; | 802 | nh->nh_flags = cfg->fc_flags; |
795 | #ifdef CONFIG_NET_CLS_ROUTE | 803 | #ifdef CONFIG_IP_ROUTE_CLASSID |
796 | nh->nh_tclassid = cfg->fc_flow; | 804 | nh->nh_tclassid = cfg->fc_flow; |
797 | #endif | 805 | #endif |
798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 806 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -881,8 +889,9 @@ failure: | |||
881 | } | 889 | } |
882 | 890 | ||
883 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | 891 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ |
884 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | 892 | int fib_semantic_match(struct fib_table *tb, struct list_head *head, |
885 | struct fib_result *res, int prefixlen, int fib_flags) | 893 | const struct flowi *flp, struct fib_result *res, |
894 | int prefixlen, int fib_flags) | ||
886 | { | 895 | { |
887 | struct fib_alias *fa; | 896 | struct fib_alias *fa; |
888 | int nh_sel = 0; | 897 | int nh_sel = 0; |
@@ -946,6 +955,8 @@ out_fill_res: | |||
946 | res->type = fa->fa_type; | 955 | res->type = fa->fa_type; |
947 | res->scope = fa->fa_scope; | 956 | res->scope = fa->fa_scope; |
948 | res->fi = fa->fa_info; | 957 | res->fi = fa->fa_info; |
958 | res->table = tb; | ||
959 | res->fa_head = head; | ||
949 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | 960 | if (!(fib_flags & FIB_LOOKUP_NOREF)) |
950 | atomic_inc(&res->fi->fib_clntref); | 961 | atomic_inc(&res->fi->fib_clntref); |
951 | return 0; | 962 | return 0; |
@@ -1002,7 +1013,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1002 | 1013 | ||
1003 | if (fi->fib_nh->nh_oif) | 1014 | if (fi->fib_nh->nh_oif) |
1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 1015 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
1005 | #ifdef CONFIG_NET_CLS_ROUTE | 1016 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1006 | if (fi->fib_nh[0].nh_tclassid) | 1017 | if (fi->fib_nh[0].nh_tclassid) |
1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 1018 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
1008 | #endif | 1019 | #endif |
@@ -1027,7 +1038,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1027 | 1038 | ||
1028 | if (nh->nh_gw) | 1039 | if (nh->nh_gw) |
1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 1040 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); |
1030 | #ifdef CONFIG_NET_CLS_ROUTE | 1041 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1031 | if (nh->nh_tclassid) | 1042 | if (nh->nh_tclassid) |
1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 1043 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
1033 | #endif | 1044 | #endif |
@@ -1125,6 +1136,62 @@ int fib_sync_down_dev(struct net_device *dev, int force) | |||
1125 | return ret; | 1136 | return ret; |
1126 | } | 1137 | } |
1127 | 1138 | ||
1139 | /* Must be invoked inside of an RCU protected region. */ | ||
1140 | void fib_select_default(struct fib_result *res) | ||
1141 | { | ||
1142 | struct fib_info *fi = NULL, *last_resort = NULL; | ||
1143 | struct list_head *fa_head = res->fa_head; | ||
1144 | struct fib_table *tb = res->table; | ||
1145 | int order = -1, last_idx = -1; | ||
1146 | struct fib_alias *fa; | ||
1147 | |||
1148 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
1149 | struct fib_info *next_fi = fa->fa_info; | ||
1150 | |||
1151 | if (fa->fa_scope != res->scope || | ||
1152 | fa->fa_type != RTN_UNICAST) | ||
1153 | continue; | ||
1154 | |||
1155 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
1156 | break; | ||
1157 | if (!next_fi->fib_nh[0].nh_gw || | ||
1158 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
1159 | continue; | ||
1160 | |||
1161 | fib_alias_accessed(fa); | ||
1162 | |||
1163 | if (fi == NULL) { | ||
1164 | if (next_fi != res->fi) | ||
1165 | break; | ||
1166 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
1167 | &last_idx, tb->tb_default)) { | ||
1168 | fib_result_assign(res, fi); | ||
1169 | tb->tb_default = order; | ||
1170 | goto out; | ||
1171 | } | ||
1172 | fi = next_fi; | ||
1173 | order++; | ||
1174 | } | ||
1175 | |||
1176 | if (order <= 0 || fi == NULL) { | ||
1177 | tb->tb_default = -1; | ||
1178 | goto out; | ||
1179 | } | ||
1180 | |||
1181 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
1182 | tb->tb_default)) { | ||
1183 | fib_result_assign(res, fi); | ||
1184 | tb->tb_default = order; | ||
1185 | goto out; | ||
1186 | } | ||
1187 | |||
1188 | if (last_idx >= 0) | ||
1189 | fib_result_assign(res, last_resort); | ||
1190 | tb->tb_default = last_idx; | ||
1191 | out: | ||
1192 | return; | ||
1193 | } | ||
1194 | |||
1128 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1195 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1129 | 1196 | ||
1130 | /* | 1197 | /* |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f280348e0fd..edf3b0997e01 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -95,7 +95,7 @@ typedef unsigned int t_key; | |||
95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | 95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) |
96 | #define IS_LEAF(n) (n->parent & T_LEAF) | 96 | #define IS_LEAF(n) (n->parent & T_LEAF) |
97 | 97 | ||
98 | struct node { | 98 | struct rt_trie_node { |
99 | unsigned long parent; | 99 | unsigned long parent; |
100 | t_key key; | 100 | t_key key; |
101 | }; | 101 | }; |
@@ -126,7 +126,7 @@ struct tnode { | |||
126 | struct work_struct work; | 126 | struct work_struct work; |
127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
128 | }; | 128 | }; |
129 | struct node *child[0]; | 129 | struct rt_trie_node *child[0]; |
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,16 +151,16 @@ struct trie_stat { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | struct trie { | 153 | struct trie { |
154 | struct node *trie; | 154 | struct rt_trie_node *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
157 | #endif | 157 | #endif |
158 | }; | 158 | }; |
159 | 159 | ||
160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); |
161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
162 | int wasfull); | 162 | int wasfull); |
163 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); |
164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
165 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
166 | /* tnodes to free after resize(); protected by RTNL */ | 166 | /* tnodes to free after resize(); protected by RTNL */ |
@@ -177,12 +177,12 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 179 | ||
180 | static inline struct tnode *node_parent(struct node *node) | 180 | static inline struct tnode *node_parent(struct rt_trie_node *node) |
181 | { | 181 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); |
183 | } | 183 | } |
184 | 184 | ||
185 | static inline struct tnode *node_parent_rcu(struct node *node) | 185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) |
186 | { | 186 | { |
187 | struct tnode *ret = node_parent(node); | 187 | struct tnode *ret = node_parent(node); |
188 | 188 | ||
@@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) | |||
192 | /* Same as rcu_assign_pointer | 192 | /* Same as rcu_assign_pointer |
193 | * but that macro() assumes that value is a pointer. | 193 | * but that macro() assumes that value is a pointer. |
194 | */ | 194 | */ |
195 | static inline void node_set_parent(struct node *node, struct tnode *ptr) | 195 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
196 | { | 196 | { |
197 | smp_wmb(); | 197 | smp_wmb(); |
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 199 | } |
200 | 200 | ||
201 | static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) | 201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) |
202 | { | 202 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 203 | BUG_ON(i >= 1U << tn->bits); |
204 | 204 | ||
205 | return tn->child[i]; | 205 | return tn->child[i]; |
206 | } | 206 | } |
207 | 207 | ||
208 | static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) |
209 | { | 209 | { |
210 | struct node *ret = tnode_get_child(tn, i); | 210 | struct rt_trie_node *ret = tnode_get_child(tn, i); |
211 | 211 | ||
212 | return rcu_dereference_rtnl(ret); | 212 | return rcu_dereference_rtnl(ret); |
213 | } | 213 | } |
@@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) | |||
217 | return 1 << tn->bits; | 217 | return 1 << tn->bits; |
218 | } | 218 | } |
219 | 219 | ||
220 | static inline t_key mask_pfx(t_key k, unsigned short l) | 220 | static inline t_key mask_pfx(t_key k, unsigned int l) |
221 | { | 221 | { |
222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); | 222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); |
223 | } | 223 | } |
224 | 224 | ||
225 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 225 | static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) |
226 | { | 226 | { |
227 | if (offset < KEYLENGTH) | 227 | if (offset < KEYLENGTH) |
228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
@@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
378 | { | 378 | { |
379 | struct tnode *tn = container_of(head, struct tnode, rcu); | 379 | struct tnode *tn = container_of(head, struct tnode, rcu); |
380 | size_t size = sizeof(struct tnode) + | 380 | size_t size = sizeof(struct tnode) + |
381 | (sizeof(struct node *) << tn->bits); | 381 | (sizeof(struct rt_trie_node *) << tn->bits); |
382 | 382 | ||
383 | if (size <= PAGE_SIZE) | 383 | if (size <= PAGE_SIZE) |
384 | kfree(tn); | 384 | kfree(tn); |
@@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) | |||
402 | tn->tnode_free = tnode_free_head; | 402 | tn->tnode_free = tnode_free_head; |
403 | tnode_free_head = tn; | 403 | tnode_free_head = tn; |
404 | tnode_free_size += sizeof(struct tnode) + | 404 | tnode_free_size += sizeof(struct tnode) + |
405 | (sizeof(struct node *) << tn->bits); | 405 | (sizeof(struct rt_trie_node *) << tn->bits); |
406 | } | 406 | } |
407 | 407 | ||
408 | static void tnode_free_flush(void) | 408 | static void tnode_free_flush(void) |
@@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
443 | 443 | ||
444 | static struct tnode *tnode_new(t_key key, int pos, int bits) | 444 | static struct tnode *tnode_new(t_key key, int pos, int bits) |
445 | { | 445 | { |
446 | size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); | 446 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); |
447 | struct tnode *tn = tnode_alloc(sz); | 447 | struct tnode *tn = tnode_alloc(sz); |
448 | 448 | ||
449 | if (tn) { | 449 | if (tn) { |
@@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
456 | } | 456 | } |
457 | 457 | ||
458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), | 458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), |
459 | sizeof(struct node) << bits); | 459 | sizeof(struct rt_trie_node) << bits); |
460 | return tn; | 460 | return tn; |
461 | } | 461 | } |
462 | 462 | ||
@@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
465 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 465 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
466 | */ | 466 | */ |
467 | 467 | ||
468 | static inline int tnode_full(const struct tnode *tn, const struct node *n) | 468 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) |
469 | { | 469 | { |
470 | if (n == NULL || IS_LEAF(n)) | 470 | if (n == NULL || IS_LEAF(n)) |
471 | return 0; | 471 | return 0; |
@@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) | |||
474 | } | 474 | } |
475 | 475 | ||
476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, | 476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, |
477 | struct node *n) | 477 | struct rt_trie_node *n) |
478 | { | 478 | { |
479 | tnode_put_child_reorg(tn, i, n, -1); | 479 | tnode_put_child_reorg(tn, i, n, -1); |
480 | } | 480 | } |
@@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
484 | * Update the value of full_children and empty_children. | 484 | * Update the value of full_children and empty_children. |
485 | */ | 485 | */ |
486 | 486 | ||
487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
488 | int wasfull) | 488 | int wasfull) |
489 | { | 489 | { |
490 | struct node *chi = tn->child[i]; | 490 | struct rt_trie_node *chi = tn->child[i]; |
491 | int isfull; | 491 | int isfull; |
492 | 492 | ||
493 | BUG_ON(i >= 1<<tn->bits); | 493 | BUG_ON(i >= 1<<tn->bits); |
@@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
515 | } | 515 | } |
516 | 516 | ||
517 | #define MAX_WORK 10 | 517 | #define MAX_WORK 10 |
518 | static struct node *resize(struct trie *t, struct tnode *tn) | 518 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) |
519 | { | 519 | { |
520 | int i; | 520 | int i; |
521 | struct tnode *old_tn; | 521 | struct tnode *old_tn; |
@@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
605 | 605 | ||
606 | /* Keep root node larger */ | 606 | /* Keep root node larger */ |
607 | 607 | ||
608 | if (!node_parent((struct node *)tn)) { | 608 | if (!node_parent((struct rt_trie_node *)tn)) { |
609 | inflate_threshold_use = inflate_threshold_root; | 609 | inflate_threshold_use = inflate_threshold_root; |
610 | halve_threshold_use = halve_threshold_root; | 610 | halve_threshold_use = halve_threshold_root; |
611 | } else { | 611 | } else { |
@@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
635 | 635 | ||
636 | /* Return if at least one inflate is run */ | 636 | /* Return if at least one inflate is run */ |
637 | if (max_work != MAX_WORK) | 637 | if (max_work != MAX_WORK) |
638 | return (struct node *) tn; | 638 | return (struct rt_trie_node *) tn; |
639 | 639 | ||
640 | /* | 640 | /* |
641 | * Halve as long as the number of empty children in this | 641 | * Halve as long as the number of empty children in this |
@@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
663 | if (tn->empty_children == tnode_child_length(tn) - 1) { | 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
664 | one_child: | 664 | one_child: |
665 | for (i = 0; i < tnode_child_length(tn); i++) { | 665 | for (i = 0; i < tnode_child_length(tn); i++) { |
666 | struct node *n; | 666 | struct rt_trie_node *n; |
667 | 667 | ||
668 | n = tn->child[i]; | 668 | n = tn->child[i]; |
669 | if (!n) | 669 | if (!n) |
@@ -676,7 +676,7 @@ one_child: | |||
676 | return n; | 676 | return n; |
677 | } | 677 | } |
678 | } | 678 | } |
679 | return (struct node *) tn; | 679 | return (struct rt_trie_node *) tn; |
680 | } | 680 | } |
681 | 681 | ||
682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
@@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
723 | goto nomem; | 723 | goto nomem; |
724 | } | 724 | } |
725 | 725 | ||
726 | put_child(t, tn, 2*i, (struct node *) left); | 726 | put_child(t, tn, 2*i, (struct rt_trie_node *) left); |
727 | put_child(t, tn, 2*i+1, (struct node *) right); | 727 | put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); |
728 | } | 728 | } |
729 | } | 729 | } |
730 | 730 | ||
731 | for (i = 0; i < olen; i++) { | 731 | for (i = 0; i < olen; i++) { |
732 | struct tnode *inode; | 732 | struct tnode *inode; |
733 | struct node *node = tnode_get_child(oldtnode, i); | 733 | struct rt_trie_node *node = tnode_get_child(oldtnode, i); |
734 | struct tnode *left, *right; | 734 | struct tnode *left, *right; |
735 | int size, j; | 735 | int size, j; |
736 | 736 | ||
@@ -825,7 +825,7 @@ nomem: | |||
825 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 825 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
826 | { | 826 | { |
827 | struct tnode *oldtnode = tn; | 827 | struct tnode *oldtnode = tn; |
828 | struct node *left, *right; | 828 | struct rt_trie_node *left, *right; |
829 | int i; | 829 | int i; |
830 | int olen = tnode_child_length(tn); | 830 | int olen = tnode_child_length(tn); |
831 | 831 | ||
@@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
856 | if (!newn) | 856 | if (!newn) |
857 | goto nomem; | 857 | goto nomem; |
858 | 858 | ||
859 | put_child(t, tn, i/2, (struct node *)newn); | 859 | put_child(t, tn, i/2, (struct rt_trie_node *)newn); |
860 | } | 860 | } |
861 | 861 | ||
862 | } | 862 | } |
@@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) | |||
958 | { | 958 | { |
959 | int pos; | 959 | int pos; |
960 | struct tnode *tn; | 960 | struct tnode *tn; |
961 | struct node *n; | 961 | struct rt_trie_node *n; |
962 | 962 | ||
963 | pos = 0; | 963 | pos = 0; |
964 | n = rcu_dereference_rtnl(t->trie); | 964 | n = rcu_dereference_rtnl(t->trie); |
@@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
993 | 993 | ||
994 | key = tn->key; | 994 | key = tn->key; |
995 | 995 | ||
996 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 996 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { |
997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); | 999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); |
1000 | 1000 | ||
1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, | 1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, |
1002 | (struct node *)tn, wasfull); | 1002 | (struct rt_trie_node *)tn, wasfull); |
1003 | 1003 | ||
1004 | tp = node_parent((struct node *) tn); | 1004 | tp = node_parent((struct rt_trie_node *) tn); |
1005 | if (!tp) | 1005 | if (!tp) |
1006 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1006 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1007 | 1007 | ||
1008 | tnode_free_flush(); | 1008 | tnode_free_flush(); |
1009 | if (!tp) | 1009 | if (!tp) |
@@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1015 | if (IS_TNODE(tn)) | 1015 | if (IS_TNODE(tn)) |
1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1017 | 1017 | ||
1018 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1018 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1019 | tnode_free_flush(); | 1019 | tnode_free_flush(); |
1020 | } | 1020 | } |
1021 | 1021 | ||
@@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1025 | { | 1025 | { |
1026 | int pos, newpos; | 1026 | int pos, newpos; |
1027 | struct tnode *tp = NULL, *tn = NULL; | 1027 | struct tnode *tp = NULL, *tn = NULL; |
1028 | struct node *n; | 1028 | struct rt_trie_node *n; |
1029 | struct leaf *l; | 1029 | struct leaf *l; |
1030 | int missbit; | 1030 | int missbit; |
1031 | struct list_head *fa_head = NULL; | 1031 | struct list_head *fa_head = NULL; |
@@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1111 | if (t->trie && n == NULL) { | 1111 | if (t->trie && n == NULL) { |
1112 | /* Case 2: n is NULL, and will just insert a new leaf */ | 1112 | /* Case 2: n is NULL, and will just insert a new leaf */ |
1113 | 1113 | ||
1114 | node_set_parent((struct node *)l, tp); | 1114 | node_set_parent((struct rt_trie_node *)l, tp); |
1115 | 1115 | ||
1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1117 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1117 | put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); |
1118 | } else { | 1118 | } else { |
1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1120 | /* | 1120 | /* |
@@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1141 | return NULL; | 1141 | return NULL; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | node_set_parent((struct node *)tn, tp); | 1144 | node_set_parent((struct rt_trie_node *)tn, tp); |
1145 | 1145 | ||
1146 | missbit = tkey_extract_bits(key, newpos, 1); | 1146 | missbit = tkey_extract_bits(key, newpos, 1); |
1147 | put_child(t, tn, missbit, (struct node *)l); | 1147 | put_child(t, tn, missbit, (struct rt_trie_node *)l); |
1148 | put_child(t, tn, 1-missbit, n); | 1148 | put_child(t, tn, 1-missbit, n); |
1149 | 1149 | ||
1150 | if (tp) { | 1150 | if (tp) { |
1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1152 | put_child(t, (struct tnode *)tp, cindex, | 1152 | put_child(t, (struct tnode *)tp, cindex, |
1153 | (struct node *)tn); | 1153 | (struct rt_trie_node *)tn); |
1154 | } else { | 1154 | } else { |
1155 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1155 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1156 | tp = tn; | 1156 | tp = tn; |
1157 | } | 1157 | } |
1158 | } | 1158 | } |
@@ -1340,7 +1340,7 @@ err: | |||
1340 | } | 1340 | } |
1341 | 1341 | ||
1342 | /* should be called with rcu_read_lock */ | 1342 | /* should be called with rcu_read_lock */ |
1343 | static int check_leaf(struct trie *t, struct leaf *l, | 1343 | static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, |
1344 | t_key key, const struct flowi *flp, | 1344 | t_key key, const struct flowi *flp, |
1345 | struct fib_result *res, int fib_flags) | 1345 | struct fib_result *res, int fib_flags) |
1346 | { | 1346 | { |
@@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
1356 | if (l->key != (key & ntohl(mask))) | 1356 | if (l->key != (key & ntohl(mask))) |
1357 | continue; | 1357 | continue; |
1358 | 1358 | ||
1359 | err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); | 1359 | err = fib_semantic_match(tb, &li->falh, flp, res, plen, fib_flags); |
1360 | 1360 | ||
1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1362 | if (err <= 0) | 1362 | if (err <= 0) |
@@ -1376,13 +1376,13 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1376 | { | 1376 | { |
1377 | struct trie *t = (struct trie *) tb->tb_data; | 1377 | struct trie *t = (struct trie *) tb->tb_data; |
1378 | int ret; | 1378 | int ret; |
1379 | struct node *n; | 1379 | struct rt_trie_node *n; |
1380 | struct tnode *pn; | 1380 | struct tnode *pn; |
1381 | int pos, bits; | 1381 | unsigned int pos, bits; |
1382 | t_key key = ntohl(flp->fl4_dst); | 1382 | t_key key = ntohl(flp->fl4_dst); |
1383 | int chopped_off; | 1383 | unsigned int chopped_off; |
1384 | t_key cindex = 0; | 1384 | t_key cindex = 0; |
1385 | int current_prefix_length = KEYLENGTH; | 1385 | unsigned int current_prefix_length = KEYLENGTH; |
1386 | struct tnode *cn; | 1386 | struct tnode *cn; |
1387 | t_key pref_mismatch; | 1387 | t_key pref_mismatch; |
1388 | 1388 | ||
@@ -1398,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1398 | 1398 | ||
1399 | /* Just a leaf? */ | 1399 | /* Just a leaf? */ |
1400 | if (IS_LEAF(n)) { | 1400 | if (IS_LEAF(n)) { |
1401 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1401 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1402 | goto found; | 1402 | goto found; |
1403 | } | 1403 | } |
1404 | 1404 | ||
@@ -1423,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | if (IS_LEAF(n)) { | 1425 | if (IS_LEAF(n)) { |
1426 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1426 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1427 | if (ret > 0) | 1427 | if (ret > 0) |
1428 | goto backtrace; | 1428 | goto backtrace; |
1429 | goto found; | 1429 | goto found; |
@@ -1541,7 +1541,7 @@ backtrace: | |||
1541 | if (chopped_off <= pn->bits) { | 1541 | if (chopped_off <= pn->bits) { |
1542 | cindex &= ~(1 << (chopped_off-1)); | 1542 | cindex &= ~(1 << (chopped_off-1)); |
1543 | } else { | 1543 | } else { |
1544 | struct tnode *parent = node_parent_rcu((struct node *) pn); | 1544 | struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); |
1545 | if (!parent) | 1545 | if (!parent) |
1546 | goto failed; | 1546 | goto failed; |
1547 | 1547 | ||
@@ -1568,7 +1568,7 @@ found: | |||
1568 | */ | 1568 | */ |
1569 | static void trie_leaf_remove(struct trie *t, struct leaf *l) | 1569 | static void trie_leaf_remove(struct trie *t, struct leaf *l) |
1570 | { | 1570 | { |
1571 | struct tnode *tp = node_parent((struct node *) l); | 1571 | struct tnode *tp = node_parent((struct rt_trie_node *) l); |
1572 | 1572 | ||
1573 | pr_debug("entering trie_leaf_remove(%p)\n", l); | 1573 | pr_debug("entering trie_leaf_remove(%p)\n", l); |
1574 | 1574 | ||
@@ -1706,7 +1706,7 @@ static int trie_flush_leaf(struct leaf *l) | |||
1706 | * Scan for the next right leaf starting at node p->child[idx] | 1706 | * Scan for the next right leaf starting at node p->child[idx] |
1707 | * Since we have back pointer, no recursion necessary. | 1707 | * Since we have back pointer, no recursion necessary. |
1708 | */ | 1708 | */ |
1709 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | 1709 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) |
1710 | { | 1710 | { |
1711 | do { | 1711 | do { |
1712 | t_key idx; | 1712 | t_key idx; |
@@ -1732,7 +1732,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
1732 | } | 1732 | } |
1733 | 1733 | ||
1734 | /* Node empty, walk back up to parent */ | 1734 | /* Node empty, walk back up to parent */ |
1735 | c = (struct node *) p; | 1735 | c = (struct rt_trie_node *) p; |
1736 | } while ((p = node_parent_rcu(c)) != NULL); | 1736 | } while ((p = node_parent_rcu(c)) != NULL); |
1737 | 1737 | ||
1738 | return NULL; /* Root of trie */ | 1738 | return NULL; /* Root of trie */ |
@@ -1753,7 +1753,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
1753 | 1753 | ||
1754 | static struct leaf *trie_nextleaf(struct leaf *l) | 1754 | static struct leaf *trie_nextleaf(struct leaf *l) |
1755 | { | 1755 | { |
1756 | struct node *c = (struct node *) l; | 1756 | struct rt_trie_node *c = (struct rt_trie_node *) l; |
1757 | struct tnode *p = node_parent_rcu(c); | 1757 | struct tnode *p = node_parent_rcu(c); |
1758 | 1758 | ||
1759 | if (!p) | 1759 | if (!p) |
@@ -1802,80 +1802,6 @@ void fib_free_table(struct fib_table *tb) | |||
1802 | kfree(tb); | 1802 | kfree(tb); |
1803 | } | 1803 | } |
1804 | 1804 | ||
1805 | void fib_table_select_default(struct fib_table *tb, | ||
1806 | const struct flowi *flp, | ||
1807 | struct fib_result *res) | ||
1808 | { | ||
1809 | struct trie *t = (struct trie *) tb->tb_data; | ||
1810 | int order, last_idx; | ||
1811 | struct fib_info *fi = NULL; | ||
1812 | struct fib_info *last_resort; | ||
1813 | struct fib_alias *fa = NULL; | ||
1814 | struct list_head *fa_head; | ||
1815 | struct leaf *l; | ||
1816 | |||
1817 | last_idx = -1; | ||
1818 | last_resort = NULL; | ||
1819 | order = -1; | ||
1820 | |||
1821 | rcu_read_lock(); | ||
1822 | |||
1823 | l = fib_find_node(t, 0); | ||
1824 | if (!l) | ||
1825 | goto out; | ||
1826 | |||
1827 | fa_head = get_fa_head(l, 0); | ||
1828 | if (!fa_head) | ||
1829 | goto out; | ||
1830 | |||
1831 | if (list_empty(fa_head)) | ||
1832 | goto out; | ||
1833 | |||
1834 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
1835 | struct fib_info *next_fi = fa->fa_info; | ||
1836 | |||
1837 | if (fa->fa_scope != res->scope || | ||
1838 | fa->fa_type != RTN_UNICAST) | ||
1839 | continue; | ||
1840 | |||
1841 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
1842 | break; | ||
1843 | if (!next_fi->fib_nh[0].nh_gw || | ||
1844 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
1845 | continue; | ||
1846 | |||
1847 | fib_alias_accessed(fa); | ||
1848 | |||
1849 | if (fi == NULL) { | ||
1850 | if (next_fi != res->fi) | ||
1851 | break; | ||
1852 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
1853 | &last_idx, tb->tb_default)) { | ||
1854 | fib_result_assign(res, fi); | ||
1855 | tb->tb_default = order; | ||
1856 | goto out; | ||
1857 | } | ||
1858 | fi = next_fi; | ||
1859 | order++; | ||
1860 | } | ||
1861 | if (order <= 0 || fi == NULL) { | ||
1862 | tb->tb_default = -1; | ||
1863 | goto out; | ||
1864 | } | ||
1865 | |||
1866 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
1867 | tb->tb_default)) { | ||
1868 | fib_result_assign(res, fi); | ||
1869 | tb->tb_default = order; | ||
1870 | goto out; | ||
1871 | } | ||
1872 | if (last_idx >= 0) | ||
1873 | fib_result_assign(res, last_resort); | ||
1874 | tb->tb_default = last_idx; | ||
1875 | out: | ||
1876 | rcu_read_unlock(); | ||
1877 | } | ||
1878 | |||
1879 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | 1805 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, |
1880 | struct fib_table *tb, | 1806 | struct fib_table *tb, |
1881 | struct sk_buff *skb, struct netlink_callback *cb) | 1807 | struct sk_buff *skb, struct netlink_callback *cb) |
@@ -1990,7 +1916,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | |||
1990 | return skb->len; | 1916 | return skb->len; |
1991 | } | 1917 | } |
1992 | 1918 | ||
1993 | void __init fib_hash_init(void) | 1919 | void __init fib_trie_init(void) |
1994 | { | 1920 | { |
1995 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", | 1921 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", |
1996 | sizeof(struct fib_alias), | 1922 | sizeof(struct fib_alias), |
@@ -2003,8 +1929,7 @@ void __init fib_hash_init(void) | |||
2003 | } | 1929 | } |
2004 | 1930 | ||
2005 | 1931 | ||
2006 | /* Fix more generic FIB names for init later */ | 1932 | struct fib_table *fib_trie_table(u32 id) |
2007 | struct fib_table *fib_hash_table(u32 id) | ||
2008 | { | 1933 | { |
2009 | struct fib_table *tb; | 1934 | struct fib_table *tb; |
2010 | struct trie *t; | 1935 | struct trie *t; |
@@ -2036,7 +1961,7 @@ struct fib_trie_iter { | |||
2036 | unsigned int depth; | 1961 | unsigned int depth; |
2037 | }; | 1962 | }; |
2038 | 1963 | ||
2039 | static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | 1964 | static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) |
2040 | { | 1965 | { |
2041 | struct tnode *tn = iter->tnode; | 1966 | struct tnode *tn = iter->tnode; |
2042 | unsigned int cindex = iter->index; | 1967 | unsigned int cindex = iter->index; |
@@ -2050,7 +1975,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | |||
2050 | iter->tnode, iter->index, iter->depth); | 1975 | iter->tnode, iter->index, iter->depth); |
2051 | rescan: | 1976 | rescan: |
2052 | while (cindex < (1<<tn->bits)) { | 1977 | while (cindex < (1<<tn->bits)) { |
2053 | struct node *n = tnode_get_child_rcu(tn, cindex); | 1978 | struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); |
2054 | 1979 | ||
2055 | if (n) { | 1980 | if (n) { |
2056 | if (IS_LEAF(n)) { | 1981 | if (IS_LEAF(n)) { |
@@ -2069,7 +1994,7 @@ rescan: | |||
2069 | } | 1994 | } |
2070 | 1995 | ||
2071 | /* Current node exhausted, pop back up */ | 1996 | /* Current node exhausted, pop back up */ |
2072 | p = node_parent_rcu((struct node *)tn); | 1997 | p = node_parent_rcu((struct rt_trie_node *)tn); |
2073 | if (p) { | 1998 | if (p) { |
2074 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; | 1999 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; |
2075 | tn = p; | 2000 | tn = p; |
@@ -2081,10 +2006,10 @@ rescan: | |||
2081 | return NULL; | 2006 | return NULL; |
2082 | } | 2007 | } |
2083 | 2008 | ||
2084 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | 2009 | static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, |
2085 | struct trie *t) | 2010 | struct trie *t) |
2086 | { | 2011 | { |
2087 | struct node *n; | 2012 | struct rt_trie_node *n; |
2088 | 2013 | ||
2089 | if (!t) | 2014 | if (!t) |
2090 | return NULL; | 2015 | return NULL; |
@@ -2108,7 +2033,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
2108 | 2033 | ||
2109 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) | 2034 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) |
2110 | { | 2035 | { |
2111 | struct node *n; | 2036 | struct rt_trie_node *n; |
2112 | struct fib_trie_iter iter; | 2037 | struct fib_trie_iter iter; |
2113 | 2038 | ||
2114 | memset(s, 0, sizeof(*s)); | 2039 | memset(s, 0, sizeof(*s)); |
@@ -2181,7 +2106,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) | |||
2181 | seq_putc(seq, '\n'); | 2106 | seq_putc(seq, '\n'); |
2182 | seq_printf(seq, "\tPointers: %u\n", pointers); | 2107 | seq_printf(seq, "\tPointers: %u\n", pointers); |
2183 | 2108 | ||
2184 | bytes += sizeof(struct node *) * pointers; | 2109 | bytes += sizeof(struct rt_trie_node *) * pointers; |
2185 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); | 2110 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); |
2186 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); | 2111 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); |
2187 | } | 2112 | } |
@@ -2262,7 +2187,7 @@ static const struct file_operations fib_triestat_fops = { | |||
2262 | .release = single_release_net, | 2187 | .release = single_release_net, |
2263 | }; | 2188 | }; |
2264 | 2189 | ||
2265 | static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | 2190 | static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) |
2266 | { | 2191 | { |
2267 | struct fib_trie_iter *iter = seq->private; | 2192 | struct fib_trie_iter *iter = seq->private; |
2268 | struct net *net = seq_file_net(seq); | 2193 | struct net *net = seq_file_net(seq); |
@@ -2275,7 +2200,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | |||
2275 | struct fib_table *tb; | 2200 | struct fib_table *tb; |
2276 | 2201 | ||
2277 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { | 2202 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { |
2278 | struct node *n; | 2203 | struct rt_trie_node *n; |
2279 | 2204 | ||
2280 | for (n = fib_trie_get_first(iter, | 2205 | for (n = fib_trie_get_first(iter, |
2281 | (struct trie *) tb->tb_data); | 2206 | (struct trie *) tb->tb_data); |
@@ -2304,7 +2229,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2304 | struct fib_table *tb = iter->tb; | 2229 | struct fib_table *tb = iter->tb; |
2305 | struct hlist_node *tb_node; | 2230 | struct hlist_node *tb_node; |
2306 | unsigned int h; | 2231 | unsigned int h; |
2307 | struct node *n; | 2232 | struct rt_trie_node *n; |
2308 | 2233 | ||
2309 | ++*pos; | 2234 | ++*pos; |
2310 | /* next node in same table */ | 2235 | /* next node in same table */ |
@@ -2390,7 +2315,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) | |||
2390 | static int fib_trie_seq_show(struct seq_file *seq, void *v) | 2315 | static int fib_trie_seq_show(struct seq_file *seq, void *v) |
2391 | { | 2316 | { |
2392 | const struct fib_trie_iter *iter = seq->private; | 2317 | const struct fib_trie_iter *iter = seq->private; |
2393 | struct node *n = v; | 2318 | struct rt_trie_node *n = v; |
2394 | 2319 | ||
2395 | if (!node_parent_rcu(n)) | 2320 | if (!node_parent_rcu(n)) |
2396 | fib_table_print(seq, iter->tb); | 2321 | fib_table_print(seq, iter->tb); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..ad2bcf1b69ae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) | |||
233 | * Send an ICMP frame. | 233 | * Send an ICMP frame. |
234 | */ | 234 | */ |
235 | 235 | ||
236 | /* | 236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
237 | * Check transmit rate limitation for given message. | ||
238 | * The rate information is held in the destination cache now. | ||
239 | * This function is generic and could be used for other purposes | ||
240 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
241 | * | ||
242 | * Note that the same dst_entry fields are modified by functions in | ||
243 | * route.c too, but these work for packet destinations while xrlim_allow | ||
244 | * works for icmp destinations. This means the rate limiting information | ||
245 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
246 | * by source and by destination. | ||
247 | * | ||
248 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
249 | * SHOULD allow setting of rate limits | ||
250 | * | ||
251 | * Shared between ICMPv4 and ICMPv6. | ||
252 | */ | ||
253 | #define XRLIM_BURST_FACTOR 6 | ||
254 | int xrlim_allow(struct dst_entry *dst, int timeout) | ||
255 | { | ||
256 | unsigned long now, token = dst->rate_tokens; | ||
257 | int rc = 0; | ||
258 | |||
259 | now = jiffies; | ||
260 | token += now - dst->rate_last; | ||
261 | dst->rate_last = now; | ||
262 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
263 | token = XRLIM_BURST_FACTOR * timeout; | ||
264 | if (token >= timeout) { | ||
265 | token -= timeout; | ||
266 | rc = 1; | ||
267 | } | ||
268 | dst->rate_tokens = token; | ||
269 | return rc; | ||
270 | } | ||
271 | EXPORT_SYMBOL(xrlim_allow); | ||
272 | |||
273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | ||
274 | int type, int code) | 237 | int type, int code) |
275 | { | 238 | { |
276 | struct dst_entry *dst = &rt->dst; | 239 | struct dst_entry *dst = &rt->dst; |
277 | int rc = 1; | 240 | bool rc = true; |
278 | 241 | ||
279 | if (type > NR_ICMP_TYPES) | 242 | if (type > NR_ICMP_TYPES) |
280 | goto out; | 243 | goto out; |
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
288 | goto out; | 251 | goto out; |
289 | 252 | ||
290 | /* Limit if icmp type is enabled in ratemask. */ | 253 | /* Limit if icmp type is enabled in ratemask. */ |
291 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) | 254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
292 | rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); | 255 | if (!rt->peer) |
256 | rt_bind_peer(rt, 1); | ||
257 | rc = inet_peer_xrlim_allow(rt->peer, | ||
258 | net->ipv4.sysctl_icmp_ratelimit); | ||
259 | } | ||
293 | out: | 260 | out: |
294 | return rc; | 261 | return rc; |
295 | } | 262 | } |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a96e65674ac3..48f8d4592ccd 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
167 | int i, n = (a->family == AF_INET ? 1 : 4); | 167 | int i, n = (a->family == AF_INET ? 1 : 4); |
168 | 168 | ||
169 | for (i = 0; i < n; i++) { | 169 | for (i = 0; i < n; i++) { |
170 | if (a->a6[i] == b->a6[i]) | 170 | if (a->addr.a6[i] == b->addr.a6[i]) |
171 | continue; | 171 | continue; |
172 | if (a->a6[i] < b->a6[i]) | 172 | if (a->addr.a6[i] < b->addr.a6[i]) |
173 | return -1; | 173 | return -1; |
174 | return 1; | 174 | return 1; |
175 | } | 175 | } |
@@ -510,8 +510,13 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
510 | p->daddr = *daddr; | 510 | p->daddr = *daddr; |
511 | atomic_set(&p->refcnt, 1); | 511 | atomic_set(&p->refcnt, 1); |
512 | atomic_set(&p->rid, 0); | 512 | atomic_set(&p->rid, 0); |
513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); | 513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); |
514 | p->tcp_ts_stamp = 0; | 514 | p->tcp_ts_stamp = 0; |
515 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | ||
516 | p->rate_tokens = 0; | ||
517 | p->rate_last = 0; | ||
518 | p->pmtu_expires = 0; | ||
519 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | ||
515 | INIT_LIST_HEAD(&p->unused); | 520 | INIT_LIST_HEAD(&p->unused); |
516 | 521 | ||
517 | 522 | ||
@@ -579,3 +584,44 @@ void inet_putpeer(struct inet_peer *p) | |||
579 | local_bh_enable(); | 584 | local_bh_enable(); |
580 | } | 585 | } |
581 | EXPORT_SYMBOL_GPL(inet_putpeer); | 586 | EXPORT_SYMBOL_GPL(inet_putpeer); |
587 | |||
588 | /* | ||
589 | * Check transmit rate limitation for given message. | ||
590 | * The rate information is held in the inet_peer entries now. | ||
591 | * This function is generic and could be used for other purposes | ||
592 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
593 | * | ||
594 | * Note that the same inet_peer fields are modified by functions in | ||
595 | * route.c too, but these work for packet destinations while xrlim_allow | ||
596 | * works for icmp destinations. This means the rate limiting information | ||
597 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
598 | * by source and by destination. | ||
599 | * | ||
600 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
601 | * SHOULD allow setting of rate limits | ||
602 | * | ||
603 | * Shared between ICMPv4 and ICMPv6. | ||
604 | */ | ||
605 | #define XRLIM_BURST_FACTOR 6 | ||
606 | bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) | ||
607 | { | ||
608 | unsigned long now, token; | ||
609 | bool rc = false; | ||
610 | |||
611 | if (!peer) | ||
612 | return true; | ||
613 | |||
614 | token = peer->rate_tokens; | ||
615 | now = jiffies; | ||
616 | token += now - peer->rate_last; | ||
617 | peer->rate_last = now; | ||
618 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
619 | token = XRLIM_BURST_FACTOR * timeout; | ||
620 | if (token >= timeout) { | ||
621 | token -= timeout; | ||
622 | rc = true; | ||
623 | } | ||
624 | peer->rate_tokens = token; | ||
625 | return rc; | ||
626 | } | ||
627 | EXPORT_SYMBOL(inet_peer_xrlim_allow); | ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc26cb7..d7b2b0987a3b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
340 | } | 340 | } |
341 | } | 341 | } |
342 | 342 | ||
343 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_IP_ROUTE_CLASSID |
344 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
346 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2bae5f..f926a310075d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT | |||
206 | 206 | ||
207 | config NF_NAT_SNMP_BASIC | 207 | config NF_NAT_SNMP_BASIC |
208 | tristate "Basic SNMP-ALG support" | 208 | tristate "Basic SNMP-ALG support" |
209 | depends on NF_NAT | 209 | depends on NF_CONNTRACK_SNMP && NF_NAT |
210 | depends on NETFILTER_ADVANCED | 210 | depends on NETFILTER_ADVANCED |
211 | default NF_NAT && NF_CONNTRACK_SNMP | ||
211 | ---help--- | 212 | ---help--- |
212 | 213 | ||
213 | This module implements an Application Layer Gateway (ALG) for | 214 | This module implements an Application Layer Gateway (ALG) for |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e855fffaed95..e95054c690c6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
867 | newinfo->initial_entries = 0; | 867 | newinfo->initial_entries = 0; |
868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
869 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
869 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 870 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
870 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 871 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
871 | if (ret != 0) | 872 | if (ret != 0) |
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name, | |||
1333 | duprintf("translate_compat_table: size %u\n", info->size); | 1334 | duprintf("translate_compat_table: size %u\n", info->size); |
1334 | j = 0; | 1335 | j = 0; |
1335 | xt_compat_lock(NFPROTO_ARP); | 1336 | xt_compat_lock(NFPROTO_ARP); |
1337 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1336 | /* Walk through entries, checking offsets. */ | 1338 | /* Walk through entries, checking offsets. */ |
1337 | xt_entry_foreach(iter0, entry0, total_size) { | 1339 | xt_entry_foreach(iter0, entry0, total_size) { |
1338 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1340 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 652efea013dc..ef7d7b9680ea 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1064 | newinfo->initial_entries = 0; | 1064 | newinfo->initial_entries = 0; |
1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1066 | xt_compat_init_offsets(AF_INET, info->number); | ||
1066 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1067 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1067 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1068 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1068 | if (ret != 0) | 1069 | if (ret != 0) |
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net, | |||
1664 | duprintf("translate_compat_table: size %u\n", info->size); | 1665 | duprintf("translate_compat_table: size %u\n", info->size); |
1665 | j = 0; | 1666 | j = 0; |
1666 | xt_compat_lock(AF_INET); | 1667 | xt_compat_lock(AF_INET); |
1668 | xt_compat_init_offsets(AF_INET, number); | ||
1667 | /* Walk through entries, checking offsets. */ | 1669 | /* Walk through entries, checking offsets. */ |
1668 | xt_entry_foreach(iter0, entry0, total_size) { | 1670 | xt_entry_foreach(iter0, entry0, total_size) { |
1669 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1671 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a4897655..403ca57f6011 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
300 | * that the ->target() function isn't called after ->destroy() */ | 300 | * that the ->target() function isn't called after ->destroy() */ |
301 | 301 | ||
302 | ct = nf_ct_get(skb, &ctinfo); | 302 | ct = nf_ct_get(skb, &ctinfo); |
303 | if (ct == NULL) { | 303 | if (ct == NULL) |
304 | pr_info("no conntrack!\n"); | ||
305 | /* FIXME: need to drop invalid ones, since replies | ||
306 | * to outgoing connections of other nodes will be | ||
307 | * marked as INVALID */ | ||
308 | return NF_DROP; | 304 | return NF_DROP; |
309 | } | ||
310 | 305 | ||
311 | /* special case: ICMP error handling. conntrack distinguishes between | 306 | /* special case: ICMP error handling. conntrack distinguishes between |
312 | * error messages (RELATED) and information requests (see below) */ | 307 | * error messages (RELATED) and information requests (see below) */ |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8fda2e9..d76d6c9ed946 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, | |||
442 | } | 442 | } |
443 | #endif | 443 | #endif |
444 | 444 | ||
445 | /* MAC logging for input path only. */ | 445 | if (in != NULL) |
446 | if (in && !out) | ||
447 | dump_mac_header(m, loginfo, skb); | 446 | dump_mac_header(m, loginfo, skb); |
448 | 447 | ||
449 | dump_packet(m, loginfo, skb, 0); | 448 | dump_packet(m, loginfo, skb, 0); |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 294a2a32f293..aef5d1fbe77d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) | |||
60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
61 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
62 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN) { |
64 | iph = ip_hdr(skb); | 64 | iph = ip_hdr(skb); |
65 | 65 | ||
66 | if (iph->saddr != saddr || | 66 | if (iph->saddr != saddr || |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63f60fc5d26a..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
21 | #include <net/netfilter/nf_conntrack_expect.h> | 21 | #include <net/netfilter/nf_conntrack_expect.h> |
22 | #include <net/netfilter/nf_conntrack_acct.h> | 22 | #include <net/netfilter/nf_conntrack_acct.h> |
23 | #include <linux/rculist_nulls.h> | ||
23 | 24 | ||
24 | struct ct_iter_state { | 25 | struct ct_iter_state { |
25 | struct seq_net_private p; | 26 | struct seq_net_private p; |
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
35 | for (st->bucket = 0; | 36 | for (st->bucket = 0; |
36 | st->bucket < net->ct.htable_size; | 37 | st->bucket < net->ct.htable_size; |
37 | st->bucket++) { | 38 | st->bucket++) { |
38 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 39 | n = rcu_dereference( |
40 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
39 | if (!is_a_nulls(n)) | 41 | if (!is_a_nulls(n)) |
40 | return n; | 42 | return n; |
41 | } | 43 | } |
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
48 | struct net *net = seq_file_net(seq); | 50 | struct net *net = seq_file_net(seq); |
49 | struct ct_iter_state *st = seq->private; | 51 | struct ct_iter_state *st = seq->private; |
50 | 52 | ||
51 | head = rcu_dereference(head->next); | 53 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
52 | while (is_a_nulls(head)) { | 54 | while (is_a_nulls(head)) { |
53 | if (likely(get_nulls_value(head) == st->bucket)) { | 55 | if (likely(get_nulls_value(head) == st->bucket)) { |
54 | if (++st->bucket >= net->ct.htable_size) | 56 | if (++st->bucket >= net->ct.htable_size) |
55 | return NULL; | 57 | return NULL; |
56 | } | 58 | } |
57 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 59 | head = rcu_dereference( |
60 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
58 | } | 61 | } |
59 | return head; | 62 | return head; |
60 | } | 63 | } |
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
217 | struct hlist_node *n; | 220 | struct hlist_node *n; |
218 | 221 | ||
219 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 222 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
220 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 223 | n = rcu_dereference( |
224 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
221 | if (n) | 225 | if (n) |
222 | return n; | 226 | return n; |
223 | } | 227 | } |
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
230 | struct net *net = seq_file_net(seq); | 234 | struct net *net = seq_file_net(seq); |
231 | struct ct_expect_iter_state *st = seq->private; | 235 | struct ct_expect_iter_state *st = seq->private; |
232 | 236 | ||
233 | head = rcu_dereference(head->next); | 237 | head = rcu_dereference(hlist_next_rcu(head)); |
234 | while (head == NULL) { | 238 | while (head == NULL) { |
235 | if (++st->bucket >= nf_ct_expect_hsize) | 239 | if (++st->bucket >= nf_ct_expect_hsize) |
236 | return NULL; | 240 | return NULL; |
237 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 241 | head = rcu_dereference( |
242 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
238 | } | 243 | } |
239 | return head; | 244 | return head; |
240 | } | 245 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f06df0..703f366fd235 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, | |||
44 | 44 | ||
45 | /* Try to get same port: if not, try to change it. */ | 45 | /* Try to get same port: if not, try to change it. */ |
46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { |
47 | int ret; | 47 | int res; |
48 | 48 | ||
49 | exp->tuple.dst.u.tcp.port = htons(port); | 49 | exp->tuple.dst.u.tcp.port = htons(port); |
50 | ret = nf_ct_expect_related(exp); | 50 | res = nf_ct_expect_related(exp); |
51 | if (ret == 0) | 51 | if (res == 0) |
52 | break; | 52 | break; |
53 | else if (ret != -EBUSY) { | 53 | else if (res != -EBUSY) { |
54 | port = 0; | 54 | port = 0; |
55 | break; | 55 | break; |
56 | } | 56 | } |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787ce1a71..21bcf471b25a 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
221 | manips not an issue. */ | 221 | manips not an issue. */ |
222 | if (maniptype == IP_NAT_MANIP_SRC && | 222 | if (maniptype == IP_NAT_MANIP_SRC && |
223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
224 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { | 224 | /* try the original tuple first */ |
225 | if (in_range(orig_tuple, range)) { | ||
226 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | ||
227 | *tuple = *orig_tuple; | ||
228 | return; | ||
229 | } | ||
230 | } else if (find_appropriate_src(net, zone, orig_tuple, tuple, | ||
231 | range)) { | ||
225 | pr_debug("get_unique_tuple: Found current src map\n"); | 232 | pr_debug("get_unique_tuple: Found current src map\n"); |
226 | if (!nf_nat_used_tuple(tuple, ct)) | 233 | if (!nf_nat_used_tuple(tuple, ct)) |
227 | return; | 234 | return; |
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
266 | struct net *net = nf_ct_net(ct); | 273 | struct net *net = nf_ct_net(ct); |
267 | struct nf_conntrack_tuple curr_tuple, new_tuple; | 274 | struct nf_conntrack_tuple curr_tuple, new_tuple; |
268 | struct nf_conn_nat *nat; | 275 | struct nf_conn_nat *nat; |
269 | int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); | ||
270 | 276 | ||
271 | /* nat helper or nfctnetlink also setup binding */ | 277 | /* nat helper or nfctnetlink also setup binding */ |
272 | nat = nfct_nat(ct); | 278 | nat = nfct_nat(ct); |
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
306 | ct->status |= IPS_DST_NAT; | 312 | ct->status |= IPS_DST_NAT; |
307 | } | 313 | } |
308 | 314 | ||
309 | /* Place in source hash if this is the first time. */ | 315 | if (maniptype == IP_NAT_MANIP_SRC) { |
310 | if (have_to_hash) { | ||
311 | unsigned int srchash; | 316 | unsigned int srchash; |
312 | 317 | ||
313 | srchash = hash_by_src(net, nf_ct_zone(ct), | 318 | srchash = hash_by_src(net, nf_ct_zone(ct), |
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
323 | 328 | ||
324 | /* It's done. */ | 329 | /* It's done. */ |
325 | if (maniptype == IP_NAT_MANIP_DST) | 330 | if (maniptype == IP_NAT_MANIP_DST) |
326 | set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 331 | ct->status |= IPS_DST_NAT_DONE; |
327 | else | 332 | else |
328 | set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 333 | ct->status |= IPS_SRC_NAT_DONE; |
329 | 334 | ||
330 | return NF_ACCEPT; | 335 | return NF_ACCEPT; |
331 | } | 336 | } |
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
502 | int ret = 0; | 507 | int ret = 0; |
503 | 508 | ||
504 | spin_lock_bh(&nf_nat_lock); | 509 | spin_lock_bh(&nf_nat_lock); |
505 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 510 | if (rcu_dereference_protected( |
511 | nf_nat_protos[proto->protonum], | ||
512 | lockdep_is_held(&nf_nat_lock) | ||
513 | ) != &nf_nat_unknown_protocol) { | ||
506 | ret = -EBUSY; | 514 | ret = -EBUSY; |
507 | goto out; | 515 | goto out; |
508 | } | 516 | } |
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | |||
532 | if (nat == NULL || nat->ct == NULL) | 540 | if (nat == NULL || nat->ct == NULL) |
533 | return; | 541 | return; |
534 | 542 | ||
535 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); | 543 | NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); |
536 | 544 | ||
537 | spin_lock_bh(&nf_nat_lock); | 545 | spin_lock_bh(&nf_nat_lock); |
538 | hlist_del_rcu(&nat->bysource); | 546 | hlist_del_rcu(&nat->bysource); |
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old) | |||
545 | struct nf_conn_nat *old_nat = old; | 553 | struct nf_conn_nat *old_nat = old; |
546 | struct nf_conn *ct = old_nat->ct; | 554 | struct nf_conn *ct = old_nat->ct; |
547 | 555 | ||
548 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) | 556 | if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) |
549 | return; | 557 | return; |
550 | 558 | ||
551 | spin_lock_bh(&nf_nat_lock); | 559 | spin_lock_bh(&nf_nat_lock); |
552 | new_nat->ct = ct; | ||
553 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | 560 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); |
554 | spin_unlock_bh(&nf_nat_lock); | 561 | spin_unlock_bh(&nf_nat_lock); |
555 | } | 562 | } |
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net) | |||
679 | { | 686 | { |
680 | /* Leave them the same for the moment. */ | 687 | /* Leave them the same for the moment. */ |
681 | net->ipv4.nat_htable_size = net->ct.htable_size; | 688 | net->ipv4.nat_htable_size = net->ct.htable_size; |
682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | 689 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); |
683 | &net->ipv4.nat_vmalloced, 0); | ||
684 | if (!net->ipv4.nat_bysource) | 690 | if (!net->ipv4.nat_bysource) |
685 | return -ENOMEM; | 691 | return -ENOMEM; |
686 | return 0; | 692 | return 0; |
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
702 | { | 708 | { |
703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 709 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
704 | synchronize_rcu(); | 710 | synchronize_rcu(); |
705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 711 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); |
706 | net->ipv4.nat_htable_size); | ||
707 | } | 712 | } |
708 | 713 | ||
709 | static struct pernet_operations nf_nat_net_ops = { | 714 | static struct pernet_operations nf_nat_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419d0a56..8812a02078ab 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <net/netfilter/nf_conntrack_expect.h> | 54 | #include <net/netfilter/nf_conntrack_expect.h> |
55 | #include <net/netfilter/nf_conntrack_helper.h> | 55 | #include <net/netfilter/nf_conntrack_helper.h> |
56 | #include <net/netfilter/nf_nat_helper.h> | 56 | #include <net/netfilter/nf_nat_helper.h> |
57 | #include <linux/netfilter/nf_conntrack_snmp.h> | ||
57 | 58 | ||
58 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
59 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 60 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1310 | { | 1311 | { |
1311 | int ret = 0; | 1312 | int ret = 0; |
1312 | 1313 | ||
1313 | ret = nf_conntrack_helper_register(&snmp_helper); | 1314 | BUG_ON(nf_nat_snmp_hook != NULL); |
1314 | if (ret < 0) | 1315 | rcu_assign_pointer(nf_nat_snmp_hook, help); |
1315 | return ret; | 1316 | |
1316 | ret = nf_conntrack_helper_register(&snmp_trap_helper); | 1317 | ret = nf_conntrack_helper_register(&snmp_trap_helper); |
1317 | if (ret < 0) { | 1318 | if (ret < 0) { |
1318 | nf_conntrack_helper_unregister(&snmp_helper); | 1319 | nf_conntrack_helper_unregister(&snmp_helper); |
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1323 | 1324 | ||
1324 | static void __exit nf_nat_snmp_basic_fini(void) | 1325 | static void __exit nf_nat_snmp_basic_fini(void) |
1325 | { | 1326 | { |
1326 | nf_conntrack_helper_unregister(&snmp_helper); | 1327 | rcu_assign_pointer(nf_nat_snmp_hook, NULL); |
1327 | nf_conntrack_helper_unregister(&snmp_trap_helper); | 1328 | nf_conntrack_helper_unregister(&snmp_trap_helper); |
1328 | } | 1329 | } |
1329 | 1330 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ed6603c2f6d..52b077d45208 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | |||
131 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
133 | 133 | ||
134 | static struct delayed_work expires_work; | ||
135 | static unsigned long expires_ljiffies; | ||
136 | |||
137 | /* | 134 | /* |
138 | * Interface to generic destination cache. | 135 | * Interface to generic destination cache. |
139 | */ | 136 | */ |
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
152 | { | 149 | { |
153 | } | 150 | } |
154 | 151 | ||
152 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | ||
153 | { | ||
154 | struct rtable *rt = (struct rtable *) dst; | ||
155 | struct inet_peer *peer; | ||
156 | u32 *p = NULL; | ||
157 | |||
158 | if (!rt->peer) | ||
159 | rt_bind_peer(rt, 1); | ||
160 | |||
161 | peer = rt->peer; | ||
162 | if (peer) { | ||
163 | u32 *old_p = __DST_METRICS_PTR(old); | ||
164 | unsigned long prev, new; | ||
165 | |||
166 | p = peer->metrics; | ||
167 | if (inet_metrics_new(peer)) | ||
168 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
169 | |||
170 | new = (unsigned long) p; | ||
171 | prev = cmpxchg(&dst->_metrics, old, new); | ||
172 | |||
173 | if (prev != old) { | ||
174 | p = __DST_METRICS_PTR(prev); | ||
175 | if (prev & DST_METRICS_READ_ONLY) | ||
176 | p = NULL; | ||
177 | } else { | ||
178 | if (rt->fi) { | ||
179 | fib_info_put(rt->fi); | ||
180 | rt->fi = NULL; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return p; | ||
185 | } | ||
186 | |||
155 | static struct dst_ops ipv4_dst_ops = { | 187 | static struct dst_ops ipv4_dst_ops = { |
156 | .family = AF_INET, | 188 | .family = AF_INET, |
157 | .protocol = cpu_to_be16(ETH_P_IP), | 189 | .protocol = cpu_to_be16(ETH_P_IP), |
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
159 | .check = ipv4_dst_check, | 191 | .check = ipv4_dst_check, |
160 | .default_advmss = ipv4_default_advmss, | 192 | .default_advmss = ipv4_default_advmss, |
161 | .default_mtu = ipv4_default_mtu, | 193 | .default_mtu = ipv4_default_mtu, |
194 | .cow_metrics = ipv4_cow_metrics, | ||
162 | .destroy = ipv4_dst_destroy, | 195 | .destroy = ipv4_dst_destroy, |
163 | .ifdown = ipv4_dst_ifdown, | 196 | .ifdown = ipv4_dst_ifdown, |
164 | .negative_advice = ipv4_negative_advice, | 197 | .negative_advice = ipv4_negative_advice, |
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
514 | .release = seq_release, | 547 | .release = seq_release, |
515 | }; | 548 | }; |
516 | 549 | ||
517 | #ifdef CONFIG_NET_CLS_ROUTE | 550 | #ifdef CONFIG_IP_ROUTE_CLASSID |
518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 551 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
519 | { | 552 | { |
520 | struct ip_rt_acct *dst, *src; | 553 | struct ip_rt_acct *dst, *src; |
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | if (!pde) | 600 | if (!pde) |
568 | goto err2; | 601 | goto err2; |
569 | 602 | ||
570 | #ifdef CONFIG_NET_CLS_ROUTE | 603 | #ifdef CONFIG_IP_ROUTE_CLASSID |
571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 604 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
572 | if (!pde) | 605 | if (!pde) |
573 | goto err3; | 606 | goto err3; |
574 | #endif | 607 | #endif |
575 | return 0; | 608 | return 0; |
576 | 609 | ||
577 | #ifdef CONFIG_NET_CLS_ROUTE | 610 | #ifdef CONFIG_IP_ROUTE_CLASSID |
578 | err3: | 611 | err3: |
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 612 | remove_proc_entry("rt_cache", net->proc_net_stat); |
580 | #endif | 613 | #endif |
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 621 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 622 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 623 | remove_proc_entry("rt_cache", net->proc_net); |
591 | #ifdef CONFIG_NET_CLS_ROUTE | 624 | #ifdef CONFIG_IP_ROUTE_CLASSID |
592 | remove_proc_entry("rt_acct", net->proc_net); | 625 | remove_proc_entry("rt_acct", net->proc_net); |
593 | #endif | 626 | #endif |
594 | } | 627 | } |
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
632 | static inline int rt_valuable(struct rtable *rth) | 665 | static inline int rt_valuable(struct rtable *rth) |
633 | { | 666 | { |
634 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 667 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
635 | rth->dst.expires; | 668 | (rth->peer && rth->peer->pmtu_expires); |
636 | } | 669 | } |
637 | 670 | ||
638 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 671 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
643 | if (atomic_read(&rth->dst.__refcnt)) | 676 | if (atomic_read(&rth->dst.__refcnt)) |
644 | goto out; | 677 | goto out; |
645 | 678 | ||
646 | ret = 1; | ||
647 | if (rth->dst.expires && | ||
648 | time_after_eq(jiffies, rth->dst.expires)) | ||
649 | goto out; | ||
650 | |||
651 | age = jiffies - rth->dst.lastuse; | 679 | age = jiffies - rth->dst.lastuse; |
652 | ret = 0; | ||
653 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 680 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
654 | (age <= tmo2 && rt_valuable(rth))) | 681 | (age <= tmo2 && rt_valuable(rth))) |
655 | goto out; | 682 | goto out; |
@@ -793,97 +820,6 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
793 | return ONE; | 820 | return ONE; |
794 | } | 821 | } |
795 | 822 | ||
796 | static void rt_check_expire(void) | ||
797 | { | ||
798 | static unsigned int rover; | ||
799 | unsigned int i = rover, goal; | ||
800 | struct rtable *rth; | ||
801 | struct rtable __rcu **rthp; | ||
802 | unsigned long samples = 0; | ||
803 | unsigned long sum = 0, sum2 = 0; | ||
804 | unsigned long delta; | ||
805 | u64 mult; | ||
806 | |||
807 | delta = jiffies - expires_ljiffies; | ||
808 | expires_ljiffies = jiffies; | ||
809 | mult = ((u64)delta) << rt_hash_log; | ||
810 | if (ip_rt_gc_timeout > 1) | ||
811 | do_div(mult, ip_rt_gc_timeout); | ||
812 | goal = (unsigned int)mult; | ||
813 | if (goal > rt_hash_mask) | ||
814 | goal = rt_hash_mask + 1; | ||
815 | for (; goal > 0; goal--) { | ||
816 | unsigned long tmo = ip_rt_gc_timeout; | ||
817 | unsigned long length; | ||
818 | |||
819 | i = (i + 1) & rt_hash_mask; | ||
820 | rthp = &rt_hash_table[i].chain; | ||
821 | |||
822 | if (need_resched()) | ||
823 | cond_resched(); | ||
824 | |||
825 | samples++; | ||
826 | |||
827 | if (rcu_dereference_raw(*rthp) == NULL) | ||
828 | continue; | ||
829 | length = 0; | ||
830 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
831 | while ((rth = rcu_dereference_protected(*rthp, | ||
832 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
833 | prefetch(rth->dst.rt_next); | ||
834 | if (rt_is_expired(rth)) { | ||
835 | *rthp = rth->dst.rt_next; | ||
836 | rt_free(rth); | ||
837 | continue; | ||
838 | } | ||
839 | if (rth->dst.expires) { | ||
840 | /* Entry is expired even if it is in use */ | ||
841 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
842 | nofree: | ||
843 | tmo >>= 1; | ||
844 | rthp = &rth->dst.rt_next; | ||
845 | /* | ||
846 | * We only count entries on | ||
847 | * a chain with equal hash inputs once | ||
848 | * so that entries for different QOS | ||
849 | * levels, and other non-hash input | ||
850 | * attributes don't unfairly skew | ||
851 | * the length computation | ||
852 | */ | ||
853 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
854 | continue; | ||
855 | } | ||
856 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
857 | goto nofree; | ||
858 | |||
859 | /* Cleanup aged off entries. */ | ||
860 | *rthp = rth->dst.rt_next; | ||
861 | rt_free(rth); | ||
862 | } | ||
863 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
864 | sum += length; | ||
865 | sum2 += length*length; | ||
866 | } | ||
867 | if (samples) { | ||
868 | unsigned long avg = sum / samples; | ||
869 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
870 | rt_chain_length_max = max_t(unsigned long, | ||
871 | ip_rt_gc_elasticity, | ||
872 | (avg + 4*sd) >> FRACT_BITS); | ||
873 | } | ||
874 | rover = i; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * rt_worker_func() is run in process context. | ||
879 | * we call rt_check_expire() to scan part of the hash table | ||
880 | */ | ||
881 | static void rt_worker_func(struct work_struct *work) | ||
882 | { | ||
883 | rt_check_expire(); | ||
884 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
885 | } | ||
886 | |||
887 | /* | 823 | /* |
888 | * Pertubation of rt_genid by a small quantity [1..256] | 824 | * Pertubation of rt_genid by a small quantity [1..256] |
889 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -1272,6 +1208,13 @@ skip_hashing: | |||
1272 | return 0; | 1208 | return 0; |
1273 | } | 1209 | } |
1274 | 1210 | ||
1211 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
1212 | |||
1213 | static u32 rt_peer_genid(void) | ||
1214 | { | ||
1215 | return atomic_read(&__rt_peer_genid); | ||
1216 | } | ||
1217 | |||
1275 | void rt_bind_peer(struct rtable *rt, int create) | 1218 | void rt_bind_peer(struct rtable *rt, int create) |
1276 | { | 1219 | { |
1277 | struct inet_peer *peer; | 1220 | struct inet_peer *peer; |
@@ -1280,6 +1223,8 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
1280 | 1223 | ||
1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1224 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1282 | inet_putpeer(peer); | 1225 | inet_putpeer(peer); |
1226 | else | ||
1227 | rt->rt_peer_genid = rt_peer_genid(); | ||
1283 | } | 1228 | } |
1284 | 1229 | ||
1285 | /* | 1230 | /* |
@@ -1349,13 +1294,8 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1349 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1294 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1350 | __be32 saddr, struct net_device *dev) | 1295 | __be32 saddr, struct net_device *dev) |
1351 | { | 1296 | { |
1352 | int i, k; | ||
1353 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1297 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1354 | struct rtable *rth; | 1298 | struct inet_peer *peer; |
1355 | struct rtable __rcu **rthp; | ||
1356 | __be32 skeys[2] = { saddr, 0 }; | ||
1357 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1358 | struct netevent_redirect netevent; | ||
1359 | struct net *net; | 1299 | struct net *net; |
1360 | 1300 | ||
1361 | if (!in_dev) | 1301 | if (!in_dev) |
@@ -1367,9 +1307,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1367 | ipv4_is_zeronet(new_gw)) | 1307 | ipv4_is_zeronet(new_gw)) |
1368 | goto reject_redirect; | 1308 | goto reject_redirect; |
1369 | 1309 | ||
1370 | if (!rt_caching(net)) | ||
1371 | goto reject_redirect; | ||
1372 | |||
1373 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1310 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1374 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1311 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1375 | goto reject_redirect; | 1312 | goto reject_redirect; |
@@ -1380,91 +1317,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1380 | goto reject_redirect; | 1317 | goto reject_redirect; |
1381 | } | 1318 | } |
1382 | 1319 | ||
1383 | for (i = 0; i < 2; i++) { | 1320 | peer = inet_getpeer_v4(daddr, 1); |
1384 | for (k = 0; k < 2; k++) { | 1321 | if (peer) { |
1385 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1322 | peer->redirect_learned.a4 = new_gw; |
1386 | rt_genid(net)); | ||
1387 | |||
1388 | rthp = &rt_hash_table[hash].chain; | ||
1389 | |||
1390 | while ((rth = rcu_dereference(*rthp)) != NULL) { | ||
1391 | struct rtable *rt; | ||
1392 | |||
1393 | if (rth->fl.fl4_dst != daddr || | ||
1394 | rth->fl.fl4_src != skeys[i] || | ||
1395 | rth->fl.oif != ikeys[k] || | ||
1396 | rt_is_input_route(rth) || | ||
1397 | rt_is_expired(rth) || | ||
1398 | !net_eq(dev_net(rth->dst.dev), net)) { | ||
1399 | rthp = &rth->dst.rt_next; | ||
1400 | continue; | ||
1401 | } | ||
1402 | |||
1403 | if (rth->rt_dst != daddr || | ||
1404 | rth->rt_src != saddr || | ||
1405 | rth->dst.error || | ||
1406 | rth->rt_gateway != old_gw || | ||
1407 | rth->dst.dev != dev) | ||
1408 | break; | ||
1409 | |||
1410 | dst_hold(&rth->dst); | ||
1411 | |||
1412 | rt = dst_alloc(&ipv4_dst_ops); | ||
1413 | if (rt == NULL) { | ||
1414 | ip_rt_put(rth); | ||
1415 | return; | ||
1416 | } | ||
1417 | |||
1418 | /* Copy all the information. */ | ||
1419 | *rt = *rth; | ||
1420 | rt->dst.__use = 1; | ||
1421 | atomic_set(&rt->dst.__refcnt, 1); | ||
1422 | rt->dst.child = NULL; | ||
1423 | if (rt->dst.dev) | ||
1424 | dev_hold(rt->dst.dev); | ||
1425 | rt->dst.obsolete = -1; | ||
1426 | rt->dst.lastuse = jiffies; | ||
1427 | rt->dst.path = &rt->dst; | ||
1428 | rt->dst.neighbour = NULL; | ||
1429 | rt->dst.hh = NULL; | ||
1430 | #ifdef CONFIG_XFRM | ||
1431 | rt->dst.xfrm = NULL; | ||
1432 | #endif | ||
1433 | rt->rt_genid = rt_genid(net); | ||
1434 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1435 | |||
1436 | /* Gateway is different ... */ | ||
1437 | rt->rt_gateway = new_gw; | ||
1438 | |||
1439 | /* Redirect received -> path was valid */ | ||
1440 | dst_confirm(&rth->dst); | ||
1441 | |||
1442 | if (rt->peer) | ||
1443 | atomic_inc(&rt->peer->refcnt); | ||
1444 | |||
1445 | if (arp_bind_neighbour(&rt->dst) || | ||
1446 | !(rt->dst.neighbour->nud_state & | ||
1447 | NUD_VALID)) { | ||
1448 | if (rt->dst.neighbour) | ||
1449 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1450 | ip_rt_put(rth); | ||
1451 | rt_drop(rt); | ||
1452 | goto do_next; | ||
1453 | } | ||
1454 | 1323 | ||
1455 | netevent.old = &rth->dst; | 1324 | inet_putpeer(peer); |
1456 | netevent.new = &rt->dst; | ||
1457 | call_netevent_notifiers(NETEVENT_REDIRECT, | ||
1458 | &netevent); | ||
1459 | 1325 | ||
1460 | rt_del(hash, rth); | 1326 | atomic_inc(&__rt_peer_genid); |
1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) | ||
1462 | ip_rt_put(rt); | ||
1463 | goto do_next; | ||
1464 | } | ||
1465 | do_next: | ||
1466 | ; | ||
1467 | } | ||
1468 | } | 1327 | } |
1469 | return; | 1328 | return; |
1470 | 1329 | ||
@@ -1488,9 +1347,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1488 | if (dst->obsolete > 0) { | 1347 | if (dst->obsolete > 0) { |
1489 | ip_rt_put(rt); | 1348 | ip_rt_put(rt); |
1490 | ret = NULL; | 1349 | ret = NULL; |
1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1350 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
1492 | (rt->dst.expires && | ||
1493 | time_after_eq(jiffies, rt->dst.expires))) { | ||
1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1351 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
1495 | rt->fl.oif, | 1352 | rt->fl.oif, |
1496 | rt_genid(dev_net(dst->dev))); | 1353 | rt_genid(dev_net(dst->dev))); |
@@ -1500,6 +1357,14 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1500 | #endif | 1357 | #endif |
1501 | rt_del(hash, rt); | 1358 | rt_del(hash, rt); |
1502 | ret = NULL; | 1359 | ret = NULL; |
1360 | } else if (rt->peer && | ||
1361 | rt->peer->pmtu_expires && | ||
1362 | time_after_eq(jiffies, rt->peer->pmtu_expires)) { | ||
1363 | unsigned long orig = rt->peer->pmtu_expires; | ||
1364 | |||
1365 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1366 | dst_metric_set(dst, RTAX_MTU, | ||
1367 | rt->peer->pmtu_orig); | ||
1503 | } | 1368 | } |
1504 | } | 1369 | } |
1505 | return ret; | 1370 | return ret; |
@@ -1525,6 +1390,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1525 | { | 1390 | { |
1526 | struct rtable *rt = skb_rtable(skb); | 1391 | struct rtable *rt = skb_rtable(skb); |
1527 | struct in_device *in_dev; | 1392 | struct in_device *in_dev; |
1393 | struct inet_peer *peer; | ||
1528 | int log_martians; | 1394 | int log_martians; |
1529 | 1395 | ||
1530 | rcu_read_lock(); | 1396 | rcu_read_lock(); |
@@ -1536,33 +1402,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1536 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1402 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1537 | rcu_read_unlock(); | 1403 | rcu_read_unlock(); |
1538 | 1404 | ||
1405 | if (!rt->peer) | ||
1406 | rt_bind_peer(rt, 1); | ||
1407 | peer = rt->peer; | ||
1408 | if (!peer) { | ||
1409 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
1410 | return; | ||
1411 | } | ||
1412 | |||
1539 | /* No redirected packets during ip_rt_redirect_silence; | 1413 | /* No redirected packets during ip_rt_redirect_silence; |
1540 | * reset the algorithm. | 1414 | * reset the algorithm. |
1541 | */ | 1415 | */ |
1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1416 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1543 | rt->dst.rate_tokens = 0; | 1417 | peer->rate_tokens = 0; |
1544 | 1418 | ||
1545 | /* Too many ignored redirects; do not send anything | 1419 | /* Too many ignored redirects; do not send anything |
1546 | * set dst.rate_last to the last seen redirected packet. | 1420 | * set dst.rate_last to the last seen redirected packet. |
1547 | */ | 1421 | */ |
1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1422 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1549 | rt->dst.rate_last = jiffies; | 1423 | peer->rate_last = jiffies; |
1550 | return; | 1424 | return; |
1551 | } | 1425 | } |
1552 | 1426 | ||
1553 | /* Check for load limit; set rate_last to the latest sent | 1427 | /* Check for load limit; set rate_last to the latest sent |
1554 | * redirect. | 1428 | * redirect. |
1555 | */ | 1429 | */ |
1556 | if (rt->dst.rate_tokens == 0 || | 1430 | if (peer->rate_tokens == 0 || |
1557 | time_after(jiffies, | 1431 | time_after(jiffies, |
1558 | (rt->dst.rate_last + | 1432 | (peer->rate_last + |
1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1433 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1434 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1561 | rt->dst.rate_last = jiffies; | 1435 | peer->rate_last = jiffies; |
1562 | ++rt->dst.rate_tokens; | 1436 | ++peer->rate_tokens; |
1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1437 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1564 | if (log_martians && | 1438 | if (log_martians && |
1565 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1439 | peer->rate_tokens == ip_rt_redirect_number && |
1566 | net_ratelimit()) | 1440 | net_ratelimit()) |
1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1441 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1568 | &rt->rt_src, rt->rt_iif, | 1442 | &rt->rt_src, rt->rt_iif, |
@@ -1574,7 +1448,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1574 | static int ip_error(struct sk_buff *skb) | 1448 | static int ip_error(struct sk_buff *skb) |
1575 | { | 1449 | { |
1576 | struct rtable *rt = skb_rtable(skb); | 1450 | struct rtable *rt = skb_rtable(skb); |
1451 | struct inet_peer *peer; | ||
1577 | unsigned long now; | 1452 | unsigned long now; |
1453 | bool send; | ||
1578 | int code; | 1454 | int code; |
1579 | 1455 | ||
1580 | switch (rt->dst.error) { | 1456 | switch (rt->dst.error) { |
@@ -1594,15 +1470,24 @@ static int ip_error(struct sk_buff *skb) | |||
1594 | break; | 1470 | break; |
1595 | } | 1471 | } |
1596 | 1472 | ||
1597 | now = jiffies; | 1473 | if (!rt->peer) |
1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1474 | rt_bind_peer(rt, 1); |
1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1475 | peer = rt->peer; |
1600 | rt->dst.rate_tokens = ip_rt_error_burst; | 1476 | |
1601 | rt->dst.rate_last = now; | 1477 | send = true; |
1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1478 | if (peer) { |
1603 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1479 | now = jiffies; |
1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1480 | peer->rate_tokens += now - peer->rate_last; |
1481 | if (peer->rate_tokens > ip_rt_error_burst) | ||
1482 | peer->rate_tokens = ip_rt_error_burst; | ||
1483 | peer->rate_last = now; | ||
1484 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
1485 | peer->rate_tokens -= ip_rt_error_cost; | ||
1486 | else | ||
1487 | send = false; | ||
1605 | } | 1488 | } |
1489 | if (send) | ||
1490 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | ||
1606 | 1491 | ||
1607 | out: kfree_skb(skb); | 1492 | out: kfree_skb(skb); |
1608 | return 0; | 1493 | return 0; |
@@ -1630,88 +1515,130 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1630 | unsigned short new_mtu, | 1515 | unsigned short new_mtu, |
1631 | struct net_device *dev) | 1516 | struct net_device *dev) |
1632 | { | 1517 | { |
1633 | int i, k; | ||
1634 | unsigned short old_mtu = ntohs(iph->tot_len); | 1518 | unsigned short old_mtu = ntohs(iph->tot_len); |
1635 | struct rtable *rth; | ||
1636 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1637 | __be32 skeys[2] = { iph->saddr, 0, }; | ||
1638 | __be32 daddr = iph->daddr; | ||
1639 | unsigned short est_mtu = 0; | 1519 | unsigned short est_mtu = 0; |
1520 | struct inet_peer *peer; | ||
1640 | 1521 | ||
1641 | for (k = 0; k < 2; k++) { | 1522 | peer = inet_getpeer_v4(iph->daddr, 1); |
1642 | for (i = 0; i < 2; i++) { | 1523 | if (peer) { |
1643 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1524 | unsigned short mtu = new_mtu; |
1644 | rt_genid(net)); | ||
1645 | |||
1646 | rcu_read_lock(); | ||
1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | ||
1648 | rth = rcu_dereference(rth->dst.rt_next)) { | ||
1649 | unsigned short mtu = new_mtu; | ||
1650 | |||
1651 | if (rth->fl.fl4_dst != daddr || | ||
1652 | rth->fl.fl4_src != skeys[i] || | ||
1653 | rth->rt_dst != daddr || | ||
1654 | rth->rt_src != iph->saddr || | ||
1655 | rth->fl.oif != ikeys[k] || | ||
1656 | rt_is_input_route(rth) || | ||
1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || | ||
1658 | !net_eq(dev_net(rth->dst.dev), net) || | ||
1659 | rt_is_expired(rth)) | ||
1660 | continue; | ||
1661 | |||
1662 | if (new_mtu < 68 || new_mtu >= old_mtu) { | ||
1663 | 1525 | ||
1664 | /* BSD 4.2 compatibility hack :-( */ | 1526 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
1665 | if (mtu == 0 && | 1527 | /* BSD 4.2 derived systems incorrectly adjust |
1666 | old_mtu >= dst_mtu(&rth->dst) && | 1528 | * tot_len by the IP header length, and report |
1667 | old_mtu >= 68 + (iph->ihl << 2)) | 1529 | * a zero MTU in the ICMP message. |
1668 | old_mtu -= iph->ihl << 2; | 1530 | */ |
1531 | if (mtu == 0 && | ||
1532 | old_mtu >= 68 + (iph->ihl << 2)) | ||
1533 | old_mtu -= iph->ihl << 2; | ||
1534 | mtu = guess_mtu(old_mtu); | ||
1535 | } | ||
1669 | 1536 | ||
1670 | mtu = guess_mtu(old_mtu); | 1537 | if (mtu < ip_rt_min_pmtu) |
1671 | } | 1538 | mtu = ip_rt_min_pmtu; |
1672 | if (mtu <= dst_mtu(&rth->dst)) { | 1539 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1673 | if (mtu < dst_mtu(&rth->dst)) { | 1540 | est_mtu = mtu; |
1674 | dst_confirm(&rth->dst); | 1541 | peer->pmtu_learned = mtu; |
1675 | if (mtu < ip_rt_min_pmtu) { | 1542 | peer->pmtu_expires = jiffies + ip_rt_mtu_expires; |
1676 | u32 lock = dst_metric(&rth->dst, | ||
1677 | RTAX_LOCK); | ||
1678 | mtu = ip_rt_min_pmtu; | ||
1679 | lock |= (1 << RTAX_MTU); | ||
1680 | dst_metric_set(&rth->dst, RTAX_LOCK, | ||
1681 | lock); | ||
1682 | } | ||
1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); | ||
1684 | dst_set_expires(&rth->dst, | ||
1685 | ip_rt_mtu_expires); | ||
1686 | } | ||
1687 | est_mtu = mtu; | ||
1688 | } | ||
1689 | } | ||
1690 | rcu_read_unlock(); | ||
1691 | } | 1543 | } |
1544 | |||
1545 | inet_putpeer(peer); | ||
1546 | |||
1547 | atomic_inc(&__rt_peer_genid); | ||
1692 | } | 1548 | } |
1693 | return est_mtu ? : new_mtu; | 1549 | return est_mtu ? : new_mtu; |
1694 | } | 1550 | } |
1695 | 1551 | ||
1552 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
1553 | { | ||
1554 | unsigned long expires = peer->pmtu_expires; | ||
1555 | |||
1556 | if (time_before(expires, jiffies)) { | ||
1557 | u32 orig_dst_mtu = dst_mtu(dst); | ||
1558 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
1559 | if (!peer->pmtu_orig) | ||
1560 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
1561 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
1562 | } | ||
1563 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
1564 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1565 | } | ||
1566 | |||
1696 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1567 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1697 | { | 1568 | { |
1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1569 | struct rtable *rt = (struct rtable *) dst; |
1699 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1570 | struct inet_peer *peer; |
1700 | if (mtu < ip_rt_min_pmtu) { | 1571 | |
1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | 1572 | dst_confirm(dst); |
1573 | |||
1574 | if (!rt->peer) | ||
1575 | rt_bind_peer(rt, 1); | ||
1576 | peer = rt->peer; | ||
1577 | if (peer) { | ||
1578 | if (mtu < ip_rt_min_pmtu) | ||
1702 | mtu = ip_rt_min_pmtu; | 1579 | mtu = ip_rt_min_pmtu; |
1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); | 1580 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
1581 | peer->pmtu_learned = mtu; | ||
1582 | peer->pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1583 | |||
1584 | atomic_inc(&__rt_peer_genid); | ||
1585 | rt->rt_peer_genid = rt_peer_genid(); | ||
1586 | |||
1587 | check_peer_pmtu(dst, peer); | ||
1704 | } | 1588 | } |
1705 | dst_metric_set(dst, RTAX_MTU, mtu); | 1589 | inet_putpeer(peer); |
1706 | dst_set_expires(dst, ip_rt_mtu_expires); | 1590 | } |
1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1591 | } |
1592 | |||
1593 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1594 | { | ||
1595 | struct rtable *rt = (struct rtable *) dst; | ||
1596 | __be32 orig_gw = rt->rt_gateway; | ||
1597 | |||
1598 | dst_confirm(&rt->dst); | ||
1599 | |||
1600 | neigh_release(rt->dst.neighbour); | ||
1601 | rt->dst.neighbour = NULL; | ||
1602 | |||
1603 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1604 | if (arp_bind_neighbour(&rt->dst) || | ||
1605 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
1606 | if (rt->dst.neighbour) | ||
1607 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1608 | rt->rt_gateway = orig_gw; | ||
1609 | return -EAGAIN; | ||
1610 | } else { | ||
1611 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1612 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
1613 | rt->dst.neighbour); | ||
1708 | } | 1614 | } |
1615 | return 0; | ||
1709 | } | 1616 | } |
1710 | 1617 | ||
1711 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1618 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1712 | { | 1619 | { |
1713 | if (rt_is_expired((struct rtable *)dst)) | 1620 | struct rtable *rt = (struct rtable *) dst; |
1621 | |||
1622 | if (rt_is_expired(rt)) | ||
1714 | return NULL; | 1623 | return NULL; |
1624 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
1625 | struct inet_peer *peer; | ||
1626 | |||
1627 | if (!rt->peer) | ||
1628 | rt_bind_peer(rt, 0); | ||
1629 | |||
1630 | peer = rt->peer; | ||
1631 | if (peer && peer->pmtu_expires) | ||
1632 | check_peer_pmtu(dst, peer); | ||
1633 | |||
1634 | if (peer && peer->redirect_learned.a4 && | ||
1635 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1636 | if (check_peer_redir(dst, peer)) | ||
1637 | return NULL; | ||
1638 | } | ||
1639 | |||
1640 | rt->rt_peer_genid = rt_peer_genid(); | ||
1641 | } | ||
1715 | return dst; | 1642 | return dst; |
1716 | } | 1643 | } |
1717 | 1644 | ||
@@ -1720,6 +1647,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1720 | struct rtable *rt = (struct rtable *) dst; | 1647 | struct rtable *rt = (struct rtable *) dst; |
1721 | struct inet_peer *peer = rt->peer; | 1648 | struct inet_peer *peer = rt->peer; |
1722 | 1649 | ||
1650 | if (rt->fi) { | ||
1651 | fib_info_put(rt->fi); | ||
1652 | rt->fi = NULL; | ||
1653 | } | ||
1723 | if (peer) { | 1654 | if (peer) { |
1724 | rt->peer = NULL; | 1655 | rt->peer = NULL; |
1725 | inet_putpeer(peer); | 1656 | inet_putpeer(peer); |
@@ -1734,8 +1665,14 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1734 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1665 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1735 | 1666 | ||
1736 | rt = skb_rtable(skb); | 1667 | rt = skb_rtable(skb); |
1737 | if (rt) | 1668 | if (rt && |
1738 | dst_set_expires(&rt->dst, 0); | 1669 | rt->peer && |
1670 | rt->peer->pmtu_expires) { | ||
1671 | unsigned long orig = rt->peer->pmtu_expires; | ||
1672 | |||
1673 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
1674 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | ||
1675 | } | ||
1739 | } | 1676 | } |
1740 | 1677 | ||
1741 | static int ip_rt_bug(struct sk_buff *skb) | 1678 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1775,7 +1712,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1775 | memcpy(addr, &src, 4); | 1712 | memcpy(addr, &src, 4); |
1776 | } | 1713 | } |
1777 | 1714 | ||
1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1715 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1716 | static void set_class_tag(struct rtable *rt, u32 tag) |
1780 | { | 1717 | { |
1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1718 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1815,17 +1752,52 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | |||
1815 | return mtu; | 1752 | return mtu; |
1816 | } | 1753 | } |
1817 | 1754 | ||
1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1755 | static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) |
1756 | { | ||
1757 | struct inet_peer *peer; | ||
1758 | int create = 0; | ||
1759 | |||
1760 | /* If a peer entry exists for this destination, we must hook | ||
1761 | * it up in order to get at cached metrics. | ||
1762 | */ | ||
1763 | if (rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS) | ||
1764 | create = 1; | ||
1765 | |||
1766 | rt_bind_peer(rt, create); | ||
1767 | peer = rt->peer; | ||
1768 | if (peer) { | ||
1769 | if (inet_metrics_new(peer)) | ||
1770 | memcpy(peer->metrics, fi->fib_metrics, | ||
1771 | sizeof(u32) * RTAX_MAX); | ||
1772 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
1773 | |||
1774 | if (peer->pmtu_expires) | ||
1775 | check_peer_pmtu(&rt->dst, peer); | ||
1776 | if (peer->redirect_learned.a4 && | ||
1777 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1778 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1779 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1780 | } | ||
1781 | } else { | ||
1782 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1783 | rt->fi = fi; | ||
1784 | atomic_inc(&fi->fib_clntref); | ||
1785 | } | ||
1786 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1787 | } | ||
1788 | } | ||
1789 | |||
1790 | static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res, | ||
1791 | struct fib_info *fi, u16 type, u32 itag) | ||
1819 | { | 1792 | { |
1820 | struct dst_entry *dst = &rt->dst; | 1793 | struct dst_entry *dst = &rt->dst; |
1821 | struct fib_info *fi = res->fi; | ||
1822 | 1794 | ||
1823 | if (fi) { | 1795 | if (fi) { |
1824 | if (FIB_RES_GW(*res) && | 1796 | if (FIB_RES_GW(*res) && |
1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1797 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1798 | rt->rt_gateway = FIB_RES_GW(*res); |
1827 | dst_import_metrics(dst, fi->fib_metrics); | 1799 | rt_init_metrics(rt, fi); |
1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1800 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1801 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1830 | #endif | 1802 | #endif |
1831 | } | 1803 | } |
@@ -1835,13 +1807,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1807 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1808 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1837 | 1809 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1810 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1811 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1812 | set_class_tag(rt, fib_rules_tclass(res)); |
1841 | #endif | 1813 | #endif |
1842 | set_class_tag(rt, itag); | 1814 | set_class_tag(rt, itag); |
1843 | #endif | 1815 | #endif |
1844 | rt->rt_type = res->type; | 1816 | rt->rt_type = type; |
1817 | } | ||
1818 | |||
1819 | static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) | ||
1820 | { | ||
1821 | struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); | ||
1822 | if (rt) { | ||
1823 | rt->dst.obsolete = -1; | ||
1824 | |||
1825 | rt->dst.flags = DST_HOST | | ||
1826 | (nopolicy ? DST_NOPOLICY : 0) | | ||
1827 | (noxfrm ? DST_NOXFRM : 0); | ||
1828 | } | ||
1829 | return rt; | ||
1845 | } | 1830 | } |
1846 | 1831 | ||
1847 | /* called in rcu_read_lock() section */ | 1832 | /* called in rcu_read_lock() section */ |
@@ -1874,24 +1859,19 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1874 | if (err < 0) | 1859 | if (err < 0) |
1875 | goto e_err; | 1860 | goto e_err; |
1876 | } | 1861 | } |
1877 | rth = dst_alloc(&ipv4_dst_ops); | 1862 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
1878 | if (!rth) | 1863 | if (!rth) |
1879 | goto e_nobufs; | 1864 | goto e_nobufs; |
1880 | 1865 | ||
1881 | rth->dst.output = ip_rt_bug; | 1866 | rth->dst.output = ip_rt_bug; |
1882 | rth->dst.obsolete = -1; | ||
1883 | 1867 | ||
1884 | atomic_set(&rth->dst.__refcnt, 1); | ||
1885 | rth->dst.flags= DST_HOST; | ||
1886 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
1887 | rth->dst.flags |= DST_NOPOLICY; | ||
1888 | rth->fl.fl4_dst = daddr; | 1868 | rth->fl.fl4_dst = daddr; |
1889 | rth->rt_dst = daddr; | 1869 | rth->rt_dst = daddr; |
1890 | rth->fl.fl4_tos = tos; | 1870 | rth->fl.fl4_tos = tos; |
1891 | rth->fl.mark = skb->mark; | 1871 | rth->fl.mark = skb->mark; |
1892 | rth->fl.fl4_src = saddr; | 1872 | rth->fl.fl4_src = saddr; |
1893 | rth->rt_src = saddr; | 1873 | rth->rt_src = saddr; |
1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1874 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1895 | rth->dst.tclassid = itag; | 1875 | rth->dst.tclassid = itag; |
1896 | #endif | 1876 | #endif |
1897 | rth->rt_iif = | 1877 | rth->rt_iif = |
@@ -1959,7 +1939,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1959 | 1939 | ||
1960 | /* called in rcu_read_lock() section */ | 1940 | /* called in rcu_read_lock() section */ |
1961 | static int __mkroute_input(struct sk_buff *skb, | 1941 | static int __mkroute_input(struct sk_buff *skb, |
1962 | struct fib_result *res, | 1942 | const struct fib_result *res, |
1963 | struct in_device *in_dev, | 1943 | struct in_device *in_dev, |
1964 | __be32 daddr, __be32 saddr, u32 tos, | 1944 | __be32 daddr, __be32 saddr, u32 tos, |
1965 | struct rtable **result) | 1945 | struct rtable **result) |
@@ -2013,19 +1993,13 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2013 | } | 1993 | } |
2014 | } | 1994 | } |
2015 | 1995 | ||
2016 | 1996 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2017 | rth = dst_alloc(&ipv4_dst_ops); | 1997 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
2018 | if (!rth) { | 1998 | if (!rth) { |
2019 | err = -ENOBUFS; | 1999 | err = -ENOBUFS; |
2020 | goto cleanup; | 2000 | goto cleanup; |
2021 | } | 2001 | } |
2022 | 2002 | ||
2023 | atomic_set(&rth->dst.__refcnt, 1); | ||
2024 | rth->dst.flags= DST_HOST; | ||
2025 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2026 | rth->dst.flags |= DST_NOPOLICY; | ||
2027 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | ||
2028 | rth->dst.flags |= DST_NOXFRM; | ||
2029 | rth->fl.fl4_dst = daddr; | 2003 | rth->fl.fl4_dst = daddr; |
2030 | rth->rt_dst = daddr; | 2004 | rth->rt_dst = daddr; |
2031 | rth->fl.fl4_tos = tos; | 2005 | rth->fl.fl4_tos = tos; |
@@ -2040,12 +2014,11 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2040 | rth->fl.oif = 0; | 2014 | rth->fl.oif = 0; |
2041 | rth->rt_spec_dst= spec_dst; | 2015 | rth->rt_spec_dst= spec_dst; |
2042 | 2016 | ||
2043 | rth->dst.obsolete = -1; | ||
2044 | rth->dst.input = ip_forward; | 2017 | rth->dst.input = ip_forward; |
2045 | rth->dst.output = ip_output; | 2018 | rth->dst.output = ip_output; |
2046 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 2019 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2047 | 2020 | ||
2048 | rt_set_nexthop(rth, res, itag); | 2021 | rt_set_nexthop(rth, res, res->fi, res->type, itag); |
2049 | 2022 | ||
2050 | rth->rt_flags = flags; | 2023 | rth->rt_flags = flags; |
2051 | 2024 | ||
@@ -2190,25 +2163,20 @@ brd_input: | |||
2190 | RT_CACHE_STAT_INC(in_brd); | 2163 | RT_CACHE_STAT_INC(in_brd); |
2191 | 2164 | ||
2192 | local_input: | 2165 | local_input: |
2193 | rth = dst_alloc(&ipv4_dst_ops); | 2166 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
2194 | if (!rth) | 2167 | if (!rth) |
2195 | goto e_nobufs; | 2168 | goto e_nobufs; |
2196 | 2169 | ||
2197 | rth->dst.output= ip_rt_bug; | 2170 | rth->dst.output= ip_rt_bug; |
2198 | rth->dst.obsolete = -1; | ||
2199 | rth->rt_genid = rt_genid(net); | 2171 | rth->rt_genid = rt_genid(net); |
2200 | 2172 | ||
2201 | atomic_set(&rth->dst.__refcnt, 1); | ||
2202 | rth->dst.flags= DST_HOST; | ||
2203 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2204 | rth->dst.flags |= DST_NOPOLICY; | ||
2205 | rth->fl.fl4_dst = daddr; | 2173 | rth->fl.fl4_dst = daddr; |
2206 | rth->rt_dst = daddr; | 2174 | rth->rt_dst = daddr; |
2207 | rth->fl.fl4_tos = tos; | 2175 | rth->fl.fl4_tos = tos; |
2208 | rth->fl.mark = skb->mark; | 2176 | rth->fl.mark = skb->mark; |
2209 | rth->fl.fl4_src = saddr; | 2177 | rth->fl.fl4_src = saddr; |
2210 | rth->rt_src = saddr; | 2178 | rth->rt_src = saddr; |
2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2179 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2212 | rth->dst.tclassid = itag; | 2180 | rth->dst.tclassid = itag; |
2213 | #endif | 2181 | #endif |
2214 | rth->rt_iif = | 2182 | rth->rt_iif = |
@@ -2351,38 +2319,39 @@ skip_cache: | |||
2351 | EXPORT_SYMBOL(ip_route_input_common); | 2319 | EXPORT_SYMBOL(ip_route_input_common); |
2352 | 2320 | ||
2353 | /* called with rcu_read_lock() */ | 2321 | /* called with rcu_read_lock() */ |
2354 | static int __mkroute_output(struct rtable **result, | 2322 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2355 | struct fib_result *res, | 2323 | const struct flowi *fl, |
2356 | const struct flowi *fl, | 2324 | const struct flowi *oldflp, |
2357 | const struct flowi *oldflp, | 2325 | struct net_device *dev_out, |
2358 | struct net_device *dev_out, | 2326 | unsigned int flags) |
2359 | unsigned flags) | ||
2360 | { | 2327 | { |
2361 | struct rtable *rth; | 2328 | struct fib_info *fi = res->fi; |
2362 | struct in_device *in_dev; | ||
2363 | u32 tos = RT_FL_TOS(oldflp); | 2329 | u32 tos = RT_FL_TOS(oldflp); |
2330 | struct in_device *in_dev; | ||
2331 | u16 type = res->type; | ||
2332 | struct rtable *rth; | ||
2364 | 2333 | ||
2365 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) | 2334 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) |
2366 | return -EINVAL; | 2335 | return ERR_PTR(-EINVAL); |
2367 | 2336 | ||
2368 | if (ipv4_is_lbcast(fl->fl4_dst)) | 2337 | if (ipv4_is_lbcast(fl->fl4_dst)) |
2369 | res->type = RTN_BROADCAST; | 2338 | type = RTN_BROADCAST; |
2370 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2339 | else if (ipv4_is_multicast(fl->fl4_dst)) |
2371 | res->type = RTN_MULTICAST; | 2340 | type = RTN_MULTICAST; |
2372 | else if (ipv4_is_zeronet(fl->fl4_dst)) | 2341 | else if (ipv4_is_zeronet(fl->fl4_dst)) |
2373 | return -EINVAL; | 2342 | return ERR_PTR(-EINVAL); |
2374 | 2343 | ||
2375 | if (dev_out->flags & IFF_LOOPBACK) | 2344 | if (dev_out->flags & IFF_LOOPBACK) |
2376 | flags |= RTCF_LOCAL; | 2345 | flags |= RTCF_LOCAL; |
2377 | 2346 | ||
2378 | in_dev = __in_dev_get_rcu(dev_out); | 2347 | in_dev = __in_dev_get_rcu(dev_out); |
2379 | if (!in_dev) | 2348 | if (!in_dev) |
2380 | return -EINVAL; | 2349 | return ERR_PTR(-EINVAL); |
2381 | 2350 | ||
2382 | if (res->type == RTN_BROADCAST) { | 2351 | if (type == RTN_BROADCAST) { |
2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2352 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
2384 | res->fi = NULL; | 2353 | fi = NULL; |
2385 | } else if (res->type == RTN_MULTICAST) { | 2354 | } else if (type == RTN_MULTICAST) { |
2386 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2355 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
2387 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2356 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, |
2388 | oldflp->proto)) | 2357 | oldflp->proto)) |
@@ -2391,21 +2360,14 @@ static int __mkroute_output(struct rtable **result, | |||
2391 | * default one, but do not gateway in this case. | 2360 | * default one, but do not gateway in this case. |
2392 | * Yes, it is hack. | 2361 | * Yes, it is hack. |
2393 | */ | 2362 | */ |
2394 | if (res->fi && res->prefixlen < 4) | 2363 | if (fi && res->prefixlen < 4) |
2395 | res->fi = NULL; | 2364 | fi = NULL; |
2396 | } | 2365 | } |
2397 | 2366 | ||
2398 | 2367 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
2399 | rth = dst_alloc(&ipv4_dst_ops); | 2368 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
2400 | if (!rth) | 2369 | if (!rth) |
2401 | return -ENOBUFS; | 2370 | return ERR_PTR(-ENOBUFS); |
2402 | |||
2403 | atomic_set(&rth->dst.__refcnt, 1); | ||
2404 | rth->dst.flags= DST_HOST; | ||
2405 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | ||
2406 | rth->dst.flags |= DST_NOXFRM; | ||
2407 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
2408 | rth->dst.flags |= DST_NOPOLICY; | ||
2409 | 2371 | ||
2410 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2372 | rth->fl.fl4_dst = oldflp->fl4_dst; |
2411 | rth->fl.fl4_tos = tos; | 2373 | rth->fl.fl4_tos = tos; |
@@ -2423,7 +2385,6 @@ static int __mkroute_output(struct rtable **result, | |||
2423 | rth->rt_spec_dst= fl->fl4_src; | 2385 | rth->rt_spec_dst= fl->fl4_src; |
2424 | 2386 | ||
2425 | rth->dst.output=ip_output; | 2387 | rth->dst.output=ip_output; |
2426 | rth->dst.obsolete = -1; | ||
2427 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2388 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2428 | 2389 | ||
2429 | RT_CACHE_STAT_INC(out_slow_tot); | 2390 | RT_CACHE_STAT_INC(out_slow_tot); |
@@ -2440,7 +2401,7 @@ static int __mkroute_output(struct rtable **result, | |||
2440 | RT_CACHE_STAT_INC(out_slow_mc); | 2401 | RT_CACHE_STAT_INC(out_slow_mc); |
2441 | } | 2402 | } |
2442 | #ifdef CONFIG_IP_MROUTE | 2403 | #ifdef CONFIG_IP_MROUTE |
2443 | if (res->type == RTN_MULTICAST) { | 2404 | if (type == RTN_MULTICAST) { |
2444 | if (IN_DEV_MFORWARD(in_dev) && | 2405 | if (IN_DEV_MFORWARD(in_dev) && |
2445 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2406 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { |
2446 | rth->dst.input = ip_mr_input; | 2407 | rth->dst.input = ip_mr_input; |
@@ -2450,31 +2411,10 @@ static int __mkroute_output(struct rtable **result, | |||
2450 | #endif | 2411 | #endif |
2451 | } | 2412 | } |
2452 | 2413 | ||
2453 | rt_set_nexthop(rth, res, 0); | 2414 | rt_set_nexthop(rth, res, fi, type, 0); |
2454 | 2415 | ||
2455 | rth->rt_flags = flags; | 2416 | rth->rt_flags = flags; |
2456 | *result = rth; | 2417 | return rth; |
2457 | return 0; | ||
2458 | } | ||
2459 | |||
2460 | /* called with rcu_read_lock() */ | ||
2461 | static int ip_mkroute_output(struct rtable **rp, | ||
2462 | struct fib_result *res, | ||
2463 | const struct flowi *fl, | ||
2464 | const struct flowi *oldflp, | ||
2465 | struct net_device *dev_out, | ||
2466 | unsigned flags) | ||
2467 | { | ||
2468 | struct rtable *rth = NULL; | ||
2469 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | ||
2470 | unsigned hash; | ||
2471 | if (err == 0) { | ||
2472 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | ||
2473 | rt_genid(dev_net(dev_out))); | ||
2474 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
2475 | } | ||
2476 | |||
2477 | return err; | ||
2478 | } | 2418 | } |
2479 | 2419 | ||
2480 | /* | 2420 | /* |
@@ -2497,6 +2437,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2497 | struct fib_result res; | 2437 | struct fib_result res; |
2498 | unsigned int flags = 0; | 2438 | unsigned int flags = 0; |
2499 | struct net_device *dev_out = NULL; | 2439 | struct net_device *dev_out = NULL; |
2440 | struct rtable *rth; | ||
2500 | int err; | 2441 | int err; |
2501 | 2442 | ||
2502 | 2443 | ||
@@ -2505,6 +2446,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2505 | res.r = NULL; | 2446 | res.r = NULL; |
2506 | #endif | 2447 | #endif |
2507 | 2448 | ||
2449 | rcu_read_lock(); | ||
2508 | if (oldflp->fl4_src) { | 2450 | if (oldflp->fl4_src) { |
2509 | err = -EINVAL; | 2451 | err = -EINVAL; |
2510 | if (ipv4_is_multicast(oldflp->fl4_src) || | 2452 | if (ipv4_is_multicast(oldflp->fl4_src) || |
@@ -2645,7 +2587,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2645 | else | 2587 | else |
2646 | #endif | 2588 | #endif |
2647 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | 2589 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) |
2648 | fib_select_default(net, &fl, &res); | 2590 | fib_select_default(&res); |
2649 | 2591 | ||
2650 | if (!fl.fl4_src) | 2592 | if (!fl.fl4_src) |
2651 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2593 | fl.fl4_src = FIB_RES_PREFSRC(res); |
@@ -2655,17 +2597,27 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2655 | 2597 | ||
2656 | 2598 | ||
2657 | make_route: | 2599 | make_route: |
2658 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2600 | rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); |
2601 | if (IS_ERR(rth)) | ||
2602 | err = PTR_ERR(rth); | ||
2603 | else { | ||
2604 | unsigned int hash; | ||
2659 | 2605 | ||
2660 | out: return err; | 2606 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
2607 | rt_genid(dev_net(dev_out))); | ||
2608 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
2609 | } | ||
2610 | |||
2611 | out: | ||
2612 | rcu_read_unlock(); | ||
2613 | return err; | ||
2661 | } | 2614 | } |
2662 | 2615 | ||
2663 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2616 | int __ip_route_output_key(struct net *net, struct rtable **rp, |
2664 | const struct flowi *flp) | 2617 | const struct flowi *flp) |
2665 | { | 2618 | { |
2666 | unsigned int hash; | ||
2667 | int res; | ||
2668 | struct rtable *rth; | 2619 | struct rtable *rth; |
2620 | unsigned int hash; | ||
2669 | 2621 | ||
2670 | if (!rt_caching(net)) | 2622 | if (!rt_caching(net)) |
2671 | goto slow_output; | 2623 | goto slow_output; |
@@ -2695,10 +2647,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2695 | rcu_read_unlock_bh(); | 2647 | rcu_read_unlock_bh(); |
2696 | 2648 | ||
2697 | slow_output: | 2649 | slow_output: |
2698 | rcu_read_lock(); | 2650 | return ip_route_output_slow(net, rp, flp); |
2699 | res = ip_route_output_slow(net, rp, flp); | ||
2700 | rcu_read_unlock(); | ||
2701 | return res; | ||
2702 | } | 2651 | } |
2703 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2652 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2704 | 2653 | ||
@@ -2731,12 +2680,11 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2731 | { | 2680 | { |
2732 | struct rtable *ort = *rp; | 2681 | struct rtable *ort = *rp; |
2733 | struct rtable *rt = (struct rtable *) | 2682 | struct rtable *rt = (struct rtable *) |
2734 | dst_alloc(&ipv4_dst_blackhole_ops); | 2683 | dst_alloc(&ipv4_dst_blackhole_ops, 1); |
2735 | 2684 | ||
2736 | if (rt) { | 2685 | if (rt) { |
2737 | struct dst_entry *new = &rt->dst; | 2686 | struct dst_entry *new = &rt->dst; |
2738 | 2687 | ||
2739 | atomic_set(&new->__refcnt, 1); | ||
2740 | new->__use = 1; | 2688 | new->__use = 1; |
2741 | new->input = dst_discard; | 2689 | new->input = dst_discard; |
2742 | new->output = dst_discard; | 2690 | new->output = dst_discard; |
@@ -2759,6 +2707,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2759 | rt->peer = ort->peer; | 2707 | rt->peer = ort->peer; |
2760 | if (rt->peer) | 2708 | if (rt->peer) |
2761 | atomic_inc(&rt->peer->refcnt); | 2709 | atomic_inc(&rt->peer->refcnt); |
2710 | rt->fi = ort->fi; | ||
2711 | if (rt->fi) | ||
2712 | atomic_inc(&rt->fi->fib_clntref); | ||
2762 | 2713 | ||
2763 | dst_free(new); | 2714 | dst_free(new); |
2764 | } | 2715 | } |
@@ -2835,7 +2786,7 @@ static int rt_fill_info(struct net *net, | |||
2835 | } | 2786 | } |
2836 | if (rt->dst.dev) | 2787 | if (rt->dst.dev) |
2837 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2788 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2838 | #ifdef CONFIG_NET_CLS_ROUTE | 2789 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2839 | if (rt->dst.tclassid) | 2790 | if (rt->dst.tclassid) |
2840 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2791 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2841 | #endif | 2792 | #endif |
@@ -2854,7 +2805,8 @@ static int rt_fill_info(struct net *net, | |||
2854 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); | 2805 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); |
2855 | 2806 | ||
2856 | error = rt->dst.error; | 2807 | error = rt->dst.error; |
2857 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 2808 | expires = (rt->peer && rt->peer->pmtu_expires) ? |
2809 | rt->peer->pmtu_expires - jiffies : 0; | ||
2858 | if (rt->peer) { | 2810 | if (rt->peer) { |
2859 | inet_peer_refcheck(rt->peer); | 2811 | inet_peer_refcheck(rt->peer); |
2860 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2812 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
@@ -3256,9 +3208,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3256 | }; | 3208 | }; |
3257 | 3209 | ||
3258 | 3210 | ||
3259 | #ifdef CONFIG_NET_CLS_ROUTE | 3211 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3260 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3212 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3261 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3213 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3262 | 3214 | ||
3263 | static __initdata unsigned long rhash_entries; | 3215 | static __initdata unsigned long rhash_entries; |
3264 | static int __init set_rhash_entries(char *str) | 3216 | static int __init set_rhash_entries(char *str) |
@@ -3274,7 +3226,7 @@ int __init ip_rt_init(void) | |||
3274 | { | 3226 | { |
3275 | int rc = 0; | 3227 | int rc = 0; |
3276 | 3228 | ||
3277 | #ifdef CONFIG_NET_CLS_ROUTE | 3229 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3278 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3230 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3279 | if (!ip_rt_acct) | 3231 | if (!ip_rt_acct) |
3280 | panic("IP: failed to allocate ip_rt_acct\n"); | 3232 | panic("IP: failed to allocate ip_rt_acct\n"); |
@@ -3311,14 +3263,6 @@ int __init ip_rt_init(void) | |||
3311 | devinet_init(); | 3263 | devinet_init(); |
3312 | ip_fib_init(); | 3264 | ip_fib_init(); |
3313 | 3265 | ||
3314 | /* All the timers, started at system startup tend | ||
3315 | to synchronize. Perturb it a bit. | ||
3316 | */ | ||
3317 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3318 | expires_ljiffies = jiffies; | ||
3319 | schedule_delayed_work(&expires_work, | ||
3320 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3321 | |||
3322 | if (ip_rt_proc_init()) | 3266 | if (ip_rt_proc_init()) |
3323 | printk(KERN_ERR "Unable to create route proc files\n"); | 3267 | printk(KERN_ERR "Unable to create route proc files\n"); |
3324 | #ifdef CONFIG_XFRM | 3268 | #ifdef CONFIG_XFRM |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262c..f9867d2dbef4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
2654 | #endif | 2654 | #endif |
2655 | 2655 | ||
2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | 2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) |
2657 | { | 2657 | { |
2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2659 | struct tcphdr *th; | 2659 | struct tcphdr *th; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb7f82ebf4a3..2f692cefd3b0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
818 | 818 | ||
819 | if (!cwnd) | 819 | if (!cwnd) |
820 | cwnd = rfc3390_bytes_to_packets(tp->mss_cache); | 820 | cwnd = TCP_INIT_CWND; |
821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
822 | } | 822 | } |
823 | 823 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 02f583b3744a..e2b9be27f226 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1341,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1341 | tcp_death_row.sysctl_tw_recycle && | 1341 | tcp_death_row.sysctl_tw_recycle && |
1342 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1342 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1343 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1343 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1344 | peer->daddr.a4 == saddr) { | 1344 | peer->daddr.addr.a4 == saddr) { |
1345 | inet_peer_refcheck(peer); | 1345 | inet_peer_refcheck(peer); |
1346 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1346 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1347 | (s32)(peer->tcp_ts - req->ts_recent) > | 1347 | (s32)(peer->tcp_ts - req->ts_recent) > |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959ee..d37baaa1dbe3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -2199,7 +2199,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) | |||
2199 | return 0; | 2199 | return 0; |
2200 | } | 2200 | } |
2201 | 2201 | ||
2202 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | 2202 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) |
2203 | { | 2203 | { |
2204 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2204 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
2205 | unsigned int mss; | 2205 | unsigned int mss; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40addec..19fbdec6baaa 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) | |||
196 | { | 196 | { |
197 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | 197 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |
198 | 198 | ||
199 | dst_destroy_metrics_generic(dst); | ||
200 | |||
199 | if (likely(xdst->u.rt.peer)) | 201 | if (likely(xdst->u.rt.peer)) |
200 | inet_putpeer(xdst->u.rt.peer); | 202 | inet_putpeer(xdst->u.rt.peer); |
203 | |||
201 | xfrm_dst_destroy(xdst); | 204 | xfrm_dst_destroy(xdst); |
202 | } | 205 | } |
203 | 206 | ||
@@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = { | |||
215 | .protocol = cpu_to_be16(ETH_P_IP), | 218 | .protocol = cpu_to_be16(ETH_P_IP), |
216 | .gc = xfrm4_garbage_collect, | 219 | .gc = xfrm4_garbage_collect, |
217 | .update_pmtu = xfrm4_update_pmtu, | 220 | .update_pmtu = xfrm4_update_pmtu, |
221 | .cow_metrics = dst_cow_metrics_generic, | ||
218 | .destroy = xfrm4_dst_destroy, | 222 | .destroy = xfrm4_dst_destroy, |
219 | .ifdown = xfrm4_dst_ifdown, | 223 | .ifdown = xfrm4_dst_ifdown, |
220 | .local_out = __ip_local_out, | 224 | .local_out = __ip_local_out, |