aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-15 01:44:11 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-20 06:02:23 -0400
commit27b75c95f10d249574d9c4cb9dab878107faede8 (patch)
tree466656d86aaa395951e12b50903e730203c5f86f
parente6484930d7c73d324bccda7d43d131088da697b9 (diff)
net: avoid RCU for NOCACHE dst
There is no point using RCU for dst we allocate for a very short time (used once). Change dst_release() to take DST_NOCACHE into account, but also change skb_dst_set_noref() to force a refcount increment for such dst. This is a _huge_ gain, because we dont waste memory to store xx thousand of dsts. Instead of queueing them to RCU, we can free them instantly. CPU caches can stay hot, re-using same memory blocks to hold temporary dsts. Note : remove unneeded smp_mb__before_atomic_dec(); in dst_release(), since atomic_dec_return() implies a full memory barrier. Stress test, 160.000.000 udp frames sent, IP route cache disabled (DDOS). Before: real 0m38.091s user 0m13.189s sys 7m53.018s After: real 0m29.946s user 0m12.157s sys 7m40.605s For reference, if IP route cache was enabled : real 0m32.030s user 0m10.521s sys 8m15.243s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h14
-rw-r--r--net/core/dst.c29
-rw-r--r--net/ipv4/route.c9
3 files changed, 33 insertions, 19 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 05a358f1ba11..e6ba898de61c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
460 skb->_skb_refdst = (unsigned long)dst; 460 skb->_skb_refdst = (unsigned long)dst;
461} 461}
462 462
463/** 463extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
464 * skb_dst_set_noref - sets skb dst, without a reference
465 * @skb: buffer
466 * @dst: dst entry
467 *
468 * Sets skb dst, assuming a reference was not taken on dst
469 * skb_dst_drop() should not dst_release() this dst
470 */
471static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
472{
473 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
474 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
475}
476 464
477/** 465/**
478 * skb_dst_is_noref - Test if skb dst isnt refcounted 466 * skb_dst_is_noref - Test if skb dst isnt refcounted
diff --git a/net/core/dst.c b/net/core/dst.c
index 32e542d7f472..8abe628b79f1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
271 if (dst) { 271 if (dst) {
272 int newrefcnt; 272 int newrefcnt;
273 273
274 smp_mb__before_atomic_dec();
275 newrefcnt = atomic_dec_return(&dst->__refcnt); 274 newrefcnt = atomic_dec_return(&dst->__refcnt);
276 WARN_ON(newrefcnt < 0); 275 WARN_ON(newrefcnt < 0);
276 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
277 dst = dst_destroy(dst);
278 if (dst)
279 __dst_free(dst);
280 }
277 } 281 }
278} 282}
279EXPORT_SYMBOL(dst_release); 283EXPORT_SYMBOL(dst_release);
280 284
285/**
286 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer
288 * @dst: dst entry
289 *
290 * Sets skb dst, assuming a reference was not taken on dst
291 * skb_dst_drop() should not dst_release() this dst
292 */
293void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
294{
295 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
296 /* If dst not in cache, we must take a reference, because
297 * dst_release() will destroy dst as soon as its refcount becomes zero
298 */
299 if (unlikely(dst->flags & DST_NOCACHE)) {
300 dst_hold(dst);
301 skb_dst_set(skb, dst);
302 } else {
303 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
304 }
305}
306EXPORT_SYMBOL(skb_dst_set_noref);
307
281/* Dirty hack. We did it in 2.2 (in __dst_free), 308/* Dirty hack. We did it in 2.2 (in __dst_free),
282 * we have _very_ good reasons not to repeat 309 * we have _very_ good reasons not to repeat
283 * this mistake in 2.3, but we have no choice 310 * this mistake in 2.3, but we have no choice
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff98983d2a45..d6cb2bfcd8e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1105,9 +1105,9 @@ restart:
1105 * Note that we do rt_free on this new route entry, so that 1105 * Note that we do rt_free on this new route entry, so that
1106 * once its refcount hits zero, we are still able to reap it 1106 * once its refcount hits zero, we are still able to reap it
1107 * (Thanks Alexey) 1107 * (Thanks Alexey)
1108 * Note also the rt_free uses call_rcu. We don't actually 1108 * Note: To avoid expensive rcu stuff for this uncached dst,
1109 * need rcu protection here, this is just our path to get 1109 * we set DST_NOCACHE so that dst_release() can free dst without
1110 * on the route gc list. 1110 * waiting a grace period.
1111 */ 1111 */
1112 1112
1113 rt->dst.flags |= DST_NOCACHE; 1113 rt->dst.flags |= DST_NOCACHE;
@@ -1117,12 +1117,11 @@ restart:
1117 if (net_ratelimit()) 1117 if (net_ratelimit())
1118 printk(KERN_WARNING 1118 printk(KERN_WARNING
1119 "Neighbour table failure & not caching routes.\n"); 1119 "Neighbour table failure & not caching routes.\n");
1120 rt_drop(rt); 1120 ip_rt_put(rt);
1121 return err; 1121 return err;
1122 } 1122 }
1123 } 1123 }
1124 1124
1125 rt_free(rt);
1126 goto skip_hashing; 1125 goto skip_hashing;
1127 } 1126 }
1128 1127