aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-02-04 18:55:25 -0500
committerDavid S. Miller <davem@davemloft.net>2011-02-04 18:59:53 -0500
commit92d8682926342d2b6aa5b2ecc02221e00e1573a0 (patch)
tree7f70b9cc2975716ab60ddd632b9fecf0a51b828d
parent0131ba451e20239c5dc701027c1a2edef95e1a6e (diff)
inetpeer: Move ICMP rate limiting state into inet_peer entries.
Like metrics, the ICMP rate limiting bits are cached state about a destination. So move it into the inet_peer entries. If an inet_peer cannot be bound (the reason is memory allocation failure or similar), the policy is to allow. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/icmp.h3
-rw-r--r--include/net/inetpeer.h3
-rw-r--r--net/ipv4/icmp.c49
-rw-r--r--net/ipv4/inetpeer.c43
-rw-r--r--net/ipv4/route.c56
-rw-r--r--net/ipv6/icmp.c16
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/ndisc.c4
9 files changed, 108 insertions, 73 deletions
diff --git a/include/net/dst.h b/include/net/dst.h
index 484f80b69ada..e550195d4f86 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -78,8 +78,6 @@ struct dst_entry {
78 atomic_t __refcnt; /* client references */ 78 atomic_t __refcnt; /* client references */
79 int __use; 79 int __use;
80 unsigned long lastuse; 80 unsigned long lastuse;
81 unsigned long rate_last; /* rate limiting for ICMP */
82 unsigned int rate_tokens;
83 int flags; 81 int flags;
84#define DST_HOST 0x0001 82#define DST_HOST 0x0001
85#define DST_NOXFRM 0x0002 83#define DST_NOXFRM 0x0002
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6e991e0d0d6f..f0698b955b73 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -45,7 +45,4 @@ extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg);
45extern int icmp_init(void); 45extern int icmp_init(void);
46extern void icmp_out_count(struct net *net, unsigned char type); 46extern void icmp_out_count(struct net *net, unsigned char type);
47 47
48/* Move into dst.h ? */
49extern int xrlim_allow(struct dst_entry *dst, int timeout);
50
51#endif /* _ICMP_H */ 48#endif /* _ICMP_H */
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 61f2c66edb2a..ead2cb2de18c 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -44,6 +44,8 @@ struct inet_peer {
44 __u32 tcp_ts; 44 __u32 tcp_ts;
45 __u32 tcp_ts_stamp; 45 __u32 tcp_ts_stamp;
46 u32 metrics[RTAX_MAX]; 46 u32 metrics[RTAX_MAX];
47 u32 rate_tokens; /* rate limiting for ICMP */
48 unsigned long rate_last;
47 }; 49 };
48 struct rcu_head rcu; 50 struct rcu_head rcu;
49 }; 51 };
@@ -81,6 +83,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct in6_addr *v6daddr, int cr
81 83
82/* can be called from BH context or outside */ 84/* can be called from BH context or outside */
83extern void inet_putpeer(struct inet_peer *p); 85extern void inet_putpeer(struct inet_peer *p);
86extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
84 87
85/* 88/*
86 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, 89 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4aa1b7f01ea0..ad2bcf1b69ae 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk)
233 * Send an ICMP frame. 233 * Send an ICMP frame.
234 */ 234 */
235 235
236/* 236static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
237 * Check transmit rate limitation for given message.
238 * The rate information is held in the destination cache now.
239 * This function is generic and could be used for other purposes
240 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
241 *
242 * Note that the same dst_entry fields are modified by functions in
243 * route.c too, but these work for packet destinations while xrlim_allow
244 * works for icmp destinations. This means the rate limiting information
245 * for one "ip object" is shared - and these ICMPs are twice limited:
246 * by source and by destination.
247 *
248 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
249 * SHOULD allow setting of rate limits
250 *
251 * Shared between ICMPv4 and ICMPv6.
252 */
253#define XRLIM_BURST_FACTOR 6
254int xrlim_allow(struct dst_entry *dst, int timeout)
255{
256 unsigned long now, token = dst->rate_tokens;
257 int rc = 0;
258
259 now = jiffies;
260 token += now - dst->rate_last;
261 dst->rate_last = now;
262 if (token > XRLIM_BURST_FACTOR * timeout)
263 token = XRLIM_BURST_FACTOR * timeout;
264 if (token >= timeout) {
265 token -= timeout;
266 rc = 1;
267 }
268 dst->rate_tokens = token;
269 return rc;
270}
271EXPORT_SYMBOL(xrlim_allow);
272
273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
274 int type, int code) 237 int type, int code)
275{ 238{
276 struct dst_entry *dst = &rt->dst; 239 struct dst_entry *dst = &rt->dst;
277 int rc = 1; 240 bool rc = true;
278 241
279 if (type > NR_ICMP_TYPES) 242 if (type > NR_ICMP_TYPES)
280 goto out; 243 goto out;
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
288 goto out; 251 goto out;
289 252
290 /* Limit if icmp type is enabled in ratemask. */ 253 /* Limit if icmp type is enabled in ratemask. */
291 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) 254 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
292 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); 255 if (!rt->peer)
256 rt_bind_peer(rt, 1);
257 rc = inet_peer_xrlim_allow(rt->peer,
258 net->ipv4.sysctl_icmp_ratelimit);
259 }
293out: 260out:
294 return rc; 261 return rc;
295} 262}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b6513b13d729..709fbb4132d7 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -513,6 +513,8 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); 513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
514 p->tcp_ts_stamp = 0; 514 p->tcp_ts_stamp = 0;
515 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 515 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
516 p->rate_tokens = 0;
517 p->rate_last = 0;
516 INIT_LIST_HEAD(&p->unused); 518 INIT_LIST_HEAD(&p->unused);
517 519
518 520
@@ -580,3 +582,44 @@ void inet_putpeer(struct inet_peer *p)
580 local_bh_enable(); 582 local_bh_enable();
581} 583}
582EXPORT_SYMBOL_GPL(inet_putpeer); 584EXPORT_SYMBOL_GPL(inet_putpeer);
585
586/*
587 * Check transmit rate limitation for given message.
588 * The rate information is held in the inet_peer entries now.
589 * This function is generic and could be used for other purposes
590 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
591 *
592 * Note that the same inet_peer fields are modified by functions in
593 * route.c too, but these work for packet destinations while xrlim_allow
594 * works for icmp destinations. This means the rate limiting information
595 * for one "ip object" is shared - and these ICMPs are twice limited:
596 * by source and by destination.
597 *
598 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
599 * SHOULD allow setting of rate limits
600 *
601 * Shared between ICMPv4 and ICMPv6.
602 */
603#define XRLIM_BURST_FACTOR 6
604bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
605{
606 unsigned long now, token;
607 bool rc = false;
608
609 if (!peer)
610 return true;
611
612 token = peer->rate_tokens;
613 now = jiffies;
614 token += now - peer->rate_last;
615 peer->rate_last = now;
616 if (token > XRLIM_BURST_FACTOR * timeout)
617 token = XRLIM_BURST_FACTOR * timeout;
618 if (token >= timeout) {
619 token -= timeout;
620 rc = true;
621 }
622 peer->rate_tokens = token;
623 return rc;
624}
625EXPORT_SYMBOL(inet_peer_xrlim_allow);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0ba6a382b2b4..2e225dafc4f8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1563,6 +1563,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1563{ 1563{
1564 struct rtable *rt = skb_rtable(skb); 1564 struct rtable *rt = skb_rtable(skb);
1565 struct in_device *in_dev; 1565 struct in_device *in_dev;
1566 struct inet_peer *peer;
1566 int log_martians; 1567 int log_martians;
1567 1568
1568 rcu_read_lock(); 1569 rcu_read_lock();
@@ -1574,33 +1575,41 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1574 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1575 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1575 rcu_read_unlock(); 1576 rcu_read_unlock();
1576 1577
1578 if (!rt->peer)
1579 rt_bind_peer(rt, 1);
1580 peer = rt->peer;
1581 if (!peer) {
1582 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1583 return;
1584 }
1585
1577 /* No redirected packets during ip_rt_redirect_silence; 1586 /* No redirected packets during ip_rt_redirect_silence;
1578 * reset the algorithm. 1587 * reset the algorithm.
1579 */ 1588 */
1580 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1589 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
1581 rt->dst.rate_tokens = 0; 1590 peer->rate_tokens = 0;
1582 1591
1583 /* Too many ignored redirects; do not send anything 1592 /* Too many ignored redirects; do not send anything
1584 * set dst.rate_last to the last seen redirected packet. 1593 * set dst.rate_last to the last seen redirected packet.
1585 */ 1594 */
1586 if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1595 if (peer->rate_tokens >= ip_rt_redirect_number) {
1587 rt->dst.rate_last = jiffies; 1596 peer->rate_last = jiffies;
1588 return; 1597 return;
1589 } 1598 }
1590 1599
1591 /* Check for load limit; set rate_last to the latest sent 1600 /* Check for load limit; set rate_last to the latest sent
1592 * redirect. 1601 * redirect.
1593 */ 1602 */
1594 if (rt->dst.rate_tokens == 0 || 1603 if (peer->rate_tokens == 0 ||
1595 time_after(jiffies, 1604 time_after(jiffies,
1596 (rt->dst.rate_last + 1605 (peer->rate_last +
1597 (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1606 (ip_rt_redirect_load << peer->rate_tokens)))) {
1598 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1607 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1599 rt->dst.rate_last = jiffies; 1608 peer->rate_last = jiffies;
1600 ++rt->dst.rate_tokens; 1609 ++peer->rate_tokens;
1601#ifdef CONFIG_IP_ROUTE_VERBOSE 1610#ifdef CONFIG_IP_ROUTE_VERBOSE
1602 if (log_martians && 1611 if (log_martians &&
1603 rt->dst.rate_tokens == ip_rt_redirect_number && 1612 peer->rate_tokens == ip_rt_redirect_number &&
1604 net_ratelimit()) 1613 net_ratelimit())
1605 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1614 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1606 &rt->rt_src, rt->rt_iif, 1615 &rt->rt_src, rt->rt_iif,
@@ -1612,7 +1621,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1612static int ip_error(struct sk_buff *skb) 1621static int ip_error(struct sk_buff *skb)
1613{ 1622{
1614 struct rtable *rt = skb_rtable(skb); 1623 struct rtable *rt = skb_rtable(skb);
1624 struct inet_peer *peer;
1615 unsigned long now; 1625 unsigned long now;
1626 bool send;
1616 int code; 1627 int code;
1617 1628
1618 switch (rt->dst.error) { 1629 switch (rt->dst.error) {
@@ -1632,15 +1643,24 @@ static int ip_error(struct sk_buff *skb)
1632 break; 1643 break;
1633 } 1644 }
1634 1645
1635 now = jiffies; 1646 if (!rt->peer)
1636 rt->dst.rate_tokens += now - rt->dst.rate_last; 1647 rt_bind_peer(rt, 1);
1637 if (rt->dst.rate_tokens > ip_rt_error_burst) 1648 peer = rt->peer;
1638 rt->dst.rate_tokens = ip_rt_error_burst; 1649
1639 rt->dst.rate_last = now; 1650 send = true;
1640 if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1651 if (peer) {
1641 rt->dst.rate_tokens -= ip_rt_error_cost; 1652 now = jiffies;
1642 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1653 peer->rate_tokens += now - peer->rate_last;
1654 if (peer->rate_tokens > ip_rt_error_burst)
1655 peer->rate_tokens = ip_rt_error_burst;
1656 peer->rate_last = now;
1657 if (peer->rate_tokens >= ip_rt_error_cost)
1658 peer->rate_tokens -= ip_rt_error_cost;
1659 else
1660 send = false;
1643 } 1661 }
1662 if (send)
1663 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1644 1664
1645out: kfree_skb(skb); 1665out: kfree_skb(skb);
1646 return 0; 1666 return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f94ff8e..a31d91b04c87 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -157,20 +157,20 @@ static int is_ineligible(struct sk_buff *skb)
157/* 157/*
158 * Check the ICMP output rate limit 158 * Check the ICMP output rate limit
159 */ 159 */
160static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, 160static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
161 struct flowi *fl) 161 struct flowi *fl)
162{ 162{
163 struct dst_entry *dst; 163 struct dst_entry *dst;
164 struct net *net = sock_net(sk); 164 struct net *net = sock_net(sk);
165 int res = 0; 165 bool res = false;
166 166
167 /* Informational messages are not limited. */ 167 /* Informational messages are not limited. */
168 if (type & ICMPV6_INFOMSG_MASK) 168 if (type & ICMPV6_INFOMSG_MASK)
169 return 1; 169 return true;
170 170
171 /* Do not limit pmtu discovery, it would break it. */ 171 /* Do not limit pmtu discovery, it would break it. */
172 if (type == ICMPV6_PKT_TOOBIG) 172 if (type == ICMPV6_PKT_TOOBIG)
173 return 1; 173 return true;
174 174
175 /* 175 /*
176 * Look up the output route. 176 * Look up the output route.
@@ -182,7 +182,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
182 IP6_INC_STATS(net, ip6_dst_idev(dst), 182 IP6_INC_STATS(net, ip6_dst_idev(dst),
183 IPSTATS_MIB_OUTNOROUTES); 183 IPSTATS_MIB_OUTNOROUTES);
184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 res = 1; 185 res = true;
186 } else { 186 } else {
187 struct rt6_info *rt = (struct rt6_info *)dst; 187 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = net->ipv6.sysctl.icmpv6_time; 188 int tmo = net->ipv6.sysctl.icmpv6_time;
@@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
191 if (rt->rt6i_dst.plen < 128) 191 if (rt->rt6i_dst.plen < 128)
192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 193
194 res = xrlim_allow(dst, tmo); 194 if (!rt->rt6i_peer)
195 rt6_bind_peer(rt, 1);
196 res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
195 } 197 }
196 dst_release(dst); 198 dst_release(dst);
197 return res; 199 return res;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f8d242be3f3..2600e2288724 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -479,10 +479,13 @@ int ip6_forward(struct sk_buff *skb)
479 else 479 else
480 target = &hdr->daddr; 480 target = &hdr->daddr;
481 481
482 if (!rt->rt6i_peer)
483 rt6_bind_peer(rt, 1);
484
482 /* Limit redirects both by destination (here) 485 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect) 486 and by source (inside ndisc_send_redirect)
484 */ 487 */
485 if (xrlim_allow(dst, 1*HZ)) 488 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, n, target); 489 ndisc_send_redirect(skb, n, target);
487 } else { 490 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr); 491 int addrtype = ipv6_addr_type(&hdr->saddr);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2342545a5ee9..7254ce364006 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1553 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1553 "ICMPv6 Redirect: destination is not a neighbour.\n");
1554 goto release; 1554 goto release;
1555 } 1555 }
1556 if (!xrlim_allow(dst, 1*HZ)) 1556 if (!rt->rt6i_peer)
1557 rt6_bind_peer(rt, 1);
1558 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1557 goto release; 1559 goto release;
1558 1560
1559 if (dev->addr_len) { 1561 if (dev->addr_len) {