diff options
author | David S. Miller <davem@davemloft.net> | 2011-02-04 18:55:25 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-02-04 18:59:53 -0500 |
commit | 92d8682926342d2b6aa5b2ecc02221e00e1573a0 (patch) | |
tree | 7f70b9cc2975716ab60ddd632b9fecf0a51b828d | |
parent | 0131ba451e20239c5dc701027c1a2edef95e1a6e (diff) |
inetpeer: Move ICMP rate limiting state into inet_peer entries.
Like metrics, the ICMP rate limiting bits are cached state about
a destination. So move it into the inet_peer entries.
If an inet_peer cannot be bound (the reason is memory allocation
failure or similar), the policy is to allow.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/dst.h | 2 | ||||
-rw-r--r-- | include/net/icmp.h | 3 | ||||
-rw-r--r-- | include/net/inetpeer.h | 3 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 49 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 43 | ||||
-rw-r--r-- | net/ipv4/route.c | 56 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 16 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 5 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 4 |
9 files changed, 108 insertions, 73 deletions
diff --git a/include/net/dst.h b/include/net/dst.h index 484f80b69ada..e550195d4f86 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -78,8 +78,6 @@ struct dst_entry { | |||
78 | atomic_t __refcnt; /* client references */ | 78 | atomic_t __refcnt; /* client references */ |
79 | int __use; | 79 | int __use; |
80 | unsigned long lastuse; | 80 | unsigned long lastuse; |
81 | unsigned long rate_last; /* rate limiting for ICMP */ | ||
82 | unsigned int rate_tokens; | ||
83 | int flags; | 81 | int flags; |
84 | #define DST_HOST 0x0001 | 82 | #define DST_HOST 0x0001 |
85 | #define DST_NOXFRM 0x0002 | 83 | #define DST_NOXFRM 0x0002 |
diff --git a/include/net/icmp.h b/include/net/icmp.h index 6e991e0d0d6f..f0698b955b73 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h | |||
@@ -45,7 +45,4 @@ extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); | |||
45 | extern int icmp_init(void); | 45 | extern int icmp_init(void); |
46 | extern void icmp_out_count(struct net *net, unsigned char type); | 46 | extern void icmp_out_count(struct net *net, unsigned char type); |
47 | 47 | ||
48 | /* Move into dst.h ? */ | ||
49 | extern int xrlim_allow(struct dst_entry *dst, int timeout); | ||
50 | |||
51 | #endif /* _ICMP_H */ | 48 | #endif /* _ICMP_H */ |
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 61f2c66edb2a..ead2cb2de18c 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h | |||
@@ -44,6 +44,8 @@ struct inet_peer { | |||
44 | __u32 tcp_ts; | 44 | __u32 tcp_ts; |
45 | __u32 tcp_ts_stamp; | 45 | __u32 tcp_ts_stamp; |
46 | u32 metrics[RTAX_MAX]; | 46 | u32 metrics[RTAX_MAX]; |
47 | u32 rate_tokens; /* rate limiting for ICMP */ | ||
48 | unsigned long rate_last; | ||
47 | }; | 49 | }; |
48 | struct rcu_head rcu; | 50 | struct rcu_head rcu; |
49 | }; | 51 | }; |
@@ -81,6 +83,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct in6_addr *v6daddr, int cr | |||
81 | 83 | ||
82 | /* can be called from BH context or outside */ | 84 | /* can be called from BH context or outside */ |
83 | extern void inet_putpeer(struct inet_peer *p); | 85 | extern void inet_putpeer(struct inet_peer *p); |
86 | extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); | ||
84 | 87 | ||
85 | /* | 88 | /* |
86 | * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, | 89 | * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..ad2bcf1b69ae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) | |||
233 | * Send an ICMP frame. | 233 | * Send an ICMP frame. |
234 | */ | 234 | */ |
235 | 235 | ||
236 | /* | 236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
237 | * Check transmit rate limitation for given message. | ||
238 | * The rate information is held in the destination cache now. | ||
239 | * This function is generic and could be used for other purposes | ||
240 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
241 | * | ||
242 | * Note that the same dst_entry fields are modified by functions in | ||
243 | * route.c too, but these work for packet destinations while xrlim_allow | ||
244 | * works for icmp destinations. This means the rate limiting information | ||
245 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
246 | * by source and by destination. | ||
247 | * | ||
248 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
249 | * SHOULD allow setting of rate limits | ||
250 | * | ||
251 | * Shared between ICMPv4 and ICMPv6. | ||
252 | */ | ||
253 | #define XRLIM_BURST_FACTOR 6 | ||
254 | int xrlim_allow(struct dst_entry *dst, int timeout) | ||
255 | { | ||
256 | unsigned long now, token = dst->rate_tokens; | ||
257 | int rc = 0; | ||
258 | |||
259 | now = jiffies; | ||
260 | token += now - dst->rate_last; | ||
261 | dst->rate_last = now; | ||
262 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
263 | token = XRLIM_BURST_FACTOR * timeout; | ||
264 | if (token >= timeout) { | ||
265 | token -= timeout; | ||
266 | rc = 1; | ||
267 | } | ||
268 | dst->rate_tokens = token; | ||
269 | return rc; | ||
270 | } | ||
271 | EXPORT_SYMBOL(xrlim_allow); | ||
272 | |||
273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | ||
274 | int type, int code) | 237 | int type, int code) |
275 | { | 238 | { |
276 | struct dst_entry *dst = &rt->dst; | 239 | struct dst_entry *dst = &rt->dst; |
277 | int rc = 1; | 240 | bool rc = true; |
278 | 241 | ||
279 | if (type > NR_ICMP_TYPES) | 242 | if (type > NR_ICMP_TYPES) |
280 | goto out; | 243 | goto out; |
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
288 | goto out; | 251 | goto out; |
289 | 252 | ||
290 | /* Limit if icmp type is enabled in ratemask. */ | 253 | /* Limit if icmp type is enabled in ratemask. */ |
291 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) | 254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
292 | rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); | 255 | if (!rt->peer) |
256 | rt_bind_peer(rt, 1); | ||
257 | rc = inet_peer_xrlim_allow(rt->peer, | ||
258 | net->ipv4.sysctl_icmp_ratelimit); | ||
259 | } | ||
293 | out: | 260 | out: |
294 | return rc; | 261 | return rc; |
295 | } | 262 | } |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b6513b13d729..709fbb4132d7 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -513,6 +513,8 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); | 513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); |
514 | p->tcp_ts_stamp = 0; | 514 | p->tcp_ts_stamp = 0; |
515 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 515 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
516 | p->rate_tokens = 0; | ||
517 | p->rate_last = 0; | ||
516 | INIT_LIST_HEAD(&p->unused); | 518 | INIT_LIST_HEAD(&p->unused); |
517 | 519 | ||
518 | 520 | ||
@@ -580,3 +582,44 @@ void inet_putpeer(struct inet_peer *p) | |||
580 | local_bh_enable(); | 582 | local_bh_enable(); |
581 | } | 583 | } |
582 | EXPORT_SYMBOL_GPL(inet_putpeer); | 584 | EXPORT_SYMBOL_GPL(inet_putpeer); |
585 | |||
586 | /* | ||
587 | * Check transmit rate limitation for given message. | ||
588 | * The rate information is held in the inet_peer entries now. | ||
589 | * This function is generic and could be used for other purposes | ||
590 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
591 | * | ||
592 | * Note that the same inet_peer fields are modified by functions in | ||
593 | * route.c too, but these work for packet destinations while xrlim_allow | ||
594 | * works for icmp destinations. This means the rate limiting information | ||
595 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
596 | * by source and by destination. | ||
597 | * | ||
598 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
599 | * SHOULD allow setting of rate limits | ||
600 | * | ||
601 | * Shared between ICMPv4 and ICMPv6. | ||
602 | */ | ||
603 | #define XRLIM_BURST_FACTOR 6 | ||
604 | bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) | ||
605 | { | ||
606 | unsigned long now, token; | ||
607 | bool rc = false; | ||
608 | |||
609 | if (!peer) | ||
610 | return true; | ||
611 | |||
612 | token = peer->rate_tokens; | ||
613 | now = jiffies; | ||
614 | token += now - peer->rate_last; | ||
615 | peer->rate_last = now; | ||
616 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
617 | token = XRLIM_BURST_FACTOR * timeout; | ||
618 | if (token >= timeout) { | ||
619 | token -= timeout; | ||
620 | rc = true; | ||
621 | } | ||
622 | peer->rate_tokens = token; | ||
623 | return rc; | ||
624 | } | ||
625 | EXPORT_SYMBOL(inet_peer_xrlim_allow); | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0ba6a382b2b4..2e225dafc4f8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1563,6 +1563,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1563 | { | 1563 | { |
1564 | struct rtable *rt = skb_rtable(skb); | 1564 | struct rtable *rt = skb_rtable(skb); |
1565 | struct in_device *in_dev; | 1565 | struct in_device *in_dev; |
1566 | struct inet_peer *peer; | ||
1566 | int log_martians; | 1567 | int log_martians; |
1567 | 1568 | ||
1568 | rcu_read_lock(); | 1569 | rcu_read_lock(); |
@@ -1574,33 +1575,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1574 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1575 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1575 | rcu_read_unlock(); | 1576 | rcu_read_unlock(); |
1576 | 1577 | ||
1578 | if (!rt->peer) | ||
1579 | rt_bind_peer(rt, 1); | ||
1580 | peer = rt->peer; | ||
1581 | if (!peer) { | ||
1582 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
1583 | return; | ||
1584 | } | ||
1585 | |||
1577 | /* No redirected packets during ip_rt_redirect_silence; | 1586 | /* No redirected packets during ip_rt_redirect_silence; |
1578 | * reset the algorithm. | 1587 | * reset the algorithm. |
1579 | */ | 1588 | */ |
1580 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1589 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
1581 | rt->dst.rate_tokens = 0; | 1590 | peer->rate_tokens = 0; |
1582 | 1591 | ||
1583 | /* Too many ignored redirects; do not send anything | 1592 | /* Too many ignored redirects; do not send anything |
1584 | * set dst.rate_last to the last seen redirected packet. | 1593 | * set dst.rate_last to the last seen redirected packet. |
1585 | */ | 1594 | */ |
1586 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1595 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1587 | rt->dst.rate_last = jiffies; | 1596 | peer->rate_last = jiffies; |
1588 | return; | 1597 | return; |
1589 | } | 1598 | } |
1590 | 1599 | ||
1591 | /* Check for load limit; set rate_last to the latest sent | 1600 | /* Check for load limit; set rate_last to the latest sent |
1592 | * redirect. | 1601 | * redirect. |
1593 | */ | 1602 | */ |
1594 | if (rt->dst.rate_tokens == 0 || | 1603 | if (peer->rate_tokens == 0 || |
1595 | time_after(jiffies, | 1604 | time_after(jiffies, |
1596 | (rt->dst.rate_last + | 1605 | (peer->rate_last + |
1597 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1606 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
1598 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1607 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1599 | rt->dst.rate_last = jiffies; | 1608 | peer->rate_last = jiffies; |
1600 | ++rt->dst.rate_tokens; | 1609 | ++peer->rate_tokens; |
1601 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1610 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1602 | if (log_martians && | 1611 | if (log_martians && |
1603 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1612 | peer->rate_tokens == ip_rt_redirect_number && |
1604 | net_ratelimit()) | 1613 | net_ratelimit()) |
1605 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1614 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1606 | &rt->rt_src, rt->rt_iif, | 1615 | &rt->rt_src, rt->rt_iif, |
@@ -1612,7 +1621,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1612 | static int ip_error(struct sk_buff *skb) | 1621 | static int ip_error(struct sk_buff *skb) |
1613 | { | 1622 | { |
1614 | struct rtable *rt = skb_rtable(skb); | 1623 | struct rtable *rt = skb_rtable(skb); |
1624 | struct inet_peer *peer; | ||
1615 | unsigned long now; | 1625 | unsigned long now; |
1626 | bool send; | ||
1616 | int code; | 1627 | int code; |
1617 | 1628 | ||
1618 | switch (rt->dst.error) { | 1629 | switch (rt->dst.error) { |
@@ -1632,15 +1643,24 @@ static int ip_error(struct sk_buff *skb) | |||
1632 | break; | 1643 | break; |
1633 | } | 1644 | } |
1634 | 1645 | ||
1635 | now = jiffies; | 1646 | if (!rt->peer) |
1636 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1647 | rt_bind_peer(rt, 1); |
1637 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1648 | peer = rt->peer; |
1638 | rt->dst.rate_tokens = ip_rt_error_burst; | 1649 | |
1639 | rt->dst.rate_last = now; | 1650 | send = true; |
1640 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1651 | if (peer) { |
1641 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1652 | now = jiffies; |
1642 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1653 | peer->rate_tokens += now - peer->rate_last; |
1654 | if (peer->rate_tokens > ip_rt_error_burst) | ||
1655 | peer->rate_tokens = ip_rt_error_burst; | ||
1656 | peer->rate_last = now; | ||
1657 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
1658 | peer->rate_tokens -= ip_rt_error_cost; | ||
1659 | else | ||
1660 | send = false; | ||
1643 | } | 1661 | } |
1662 | if (send) | ||
1663 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | ||
1644 | 1664 | ||
1645 | out: kfree_skb(skb); | 1665 | out: kfree_skb(skb); |
1646 | return 0; | 1666 | return 0; |
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 03e62f94ff8e..a31d91b04c87 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -157,20 +157,20 @@ static int is_ineligible(struct sk_buff *skb) | |||
157 | /* | 157 | /* |
158 | * Check the ICMP output rate limit | 158 | * Check the ICMP output rate limit |
159 | */ | 159 | */ |
160 | static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, | 160 | static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, |
161 | struct flowi *fl) | 161 | struct flowi *fl) |
162 | { | 162 | { |
163 | struct dst_entry *dst; | 163 | struct dst_entry *dst; |
164 | struct net *net = sock_net(sk); | 164 | struct net *net = sock_net(sk); |
165 | int res = 0; | 165 | bool res = false; |
166 | 166 | ||
167 | /* Informational messages are not limited. */ | 167 | /* Informational messages are not limited. */ |
168 | if (type & ICMPV6_INFOMSG_MASK) | 168 | if (type & ICMPV6_INFOMSG_MASK) |
169 | return 1; | 169 | return true; |
170 | 170 | ||
171 | /* Do not limit pmtu discovery, it would break it. */ | 171 | /* Do not limit pmtu discovery, it would break it. */ |
172 | if (type == ICMPV6_PKT_TOOBIG) | 172 | if (type == ICMPV6_PKT_TOOBIG) |
173 | return 1; | 173 | return true; |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * Look up the output route. | 176 | * Look up the output route. |
@@ -182,7 +182,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, | |||
182 | IP6_INC_STATS(net, ip6_dst_idev(dst), | 182 | IP6_INC_STATS(net, ip6_dst_idev(dst), |
183 | IPSTATS_MIB_OUTNOROUTES); | 183 | IPSTATS_MIB_OUTNOROUTES); |
184 | } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { | 184 | } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { |
185 | res = 1; | 185 | res = true; |
186 | } else { | 186 | } else { |
187 | struct rt6_info *rt = (struct rt6_info *)dst; | 187 | struct rt6_info *rt = (struct rt6_info *)dst; |
188 | int tmo = net->ipv6.sysctl.icmpv6_time; | 188 | int tmo = net->ipv6.sysctl.icmpv6_time; |
@@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, | |||
191 | if (rt->rt6i_dst.plen < 128) | 191 | if (rt->rt6i_dst.plen < 128) |
192 | tmo >>= ((128 - rt->rt6i_dst.plen)>>5); | 192 | tmo >>= ((128 - rt->rt6i_dst.plen)>>5); |
193 | 193 | ||
194 | res = xrlim_allow(dst, tmo); | 194 | if (!rt->rt6i_peer) |
195 | rt6_bind_peer(rt, 1); | ||
196 | res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); | ||
195 | } | 197 | } |
196 | dst_release(dst); | 198 | dst_release(dst); |
197 | return res; | 199 | return res; |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f8d242be3f3..2600e2288724 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -479,10 +479,13 @@ int ip6_forward(struct sk_buff *skb) | |||
479 | else | 479 | else |
480 | target = &hdr->daddr; | 480 | target = &hdr->daddr; |
481 | 481 | ||
482 | if (!rt->rt6i_peer) | ||
483 | rt6_bind_peer(rt, 1); | ||
484 | |||
482 | /* Limit redirects both by destination (here) | 485 | /* Limit redirects both by destination (here) |
483 | and by source (inside ndisc_send_redirect) | 486 | and by source (inside ndisc_send_redirect) |
484 | */ | 487 | */ |
485 | if (xrlim_allow(dst, 1*HZ)) | 488 | if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) |
486 | ndisc_send_redirect(skb, n, target); | 489 | ndisc_send_redirect(skb, n, target); |
487 | } else { | 490 | } else { |
488 | int addrtype = ipv6_addr_type(&hdr->saddr); | 491 | int addrtype = ipv6_addr_type(&hdr->saddr); |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2342545a5ee9..7254ce364006 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, | |||
1553 | "ICMPv6 Redirect: destination is not a neighbour.\n"); | 1553 | "ICMPv6 Redirect: destination is not a neighbour.\n"); |
1554 | goto release; | 1554 | goto release; |
1555 | } | 1555 | } |
1556 | if (!xrlim_allow(dst, 1*HZ)) | 1556 | if (!rt->rt6i_peer) |
1557 | rt6_bind_peer(rt, 1); | ||
1558 | if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) | ||
1557 | goto release; | 1559 | goto release; |
1558 | 1560 | ||
1559 | if (dev->addr_len) { | 1561 | if (dev->addr_len) { |