diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-11 19:19:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-17 20:18:50 -0400 |
commit | 7fee226ad2397b635e2fd565a59ca3ae08a164cd (patch) | |
tree | 0bcd26150ad74ec1a237109de87a3d214a07fc22 /net/core | |
parent | ebda37c27d0c768947e9b058332d7ea798210cf8 (diff) |
net: add a noref bit on skb dst
Use low order bit of skb->_skb_dst to tell dst is not refcounted.
Change _skb_dst to _skb_refdst to make sure all uses are catched.
skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.
New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)
skb_dst_drop() drops a reference only if skb dst was refcounted.
skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.
Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().
Use skb_dst_force() in dev_requeue_skb().
Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 2 | ||||
-rw-r--r-- | net/core/sock.c | 6 |
3 files changed, 10 insertions, 1 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index cdcb9cbedf41..6c820650b80f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2052,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2052 | * waiting to be sent out; and the qdisc is not running - | 2052 | * waiting to be sent out; and the qdisc is not running - |
2053 | * xmit the skb directly. | 2053 | * xmit the skb directly. |
2054 | */ | 2054 | */ |
2055 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | ||
2056 | skb_dst_force(skb); | ||
2055 | __qdisc_update_bstats(q, skb->len); | 2057 | __qdisc_update_bstats(q, skb->len); |
2056 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | 2058 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) |
2057 | __qdisc_run(q); | 2059 | __qdisc_run(q); |
@@ -2060,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2060 | 2062 | ||
2061 | rc = NET_XMIT_SUCCESS; | 2063 | rc = NET_XMIT_SUCCESS; |
2062 | } else { | 2064 | } else { |
2065 | skb_dst_force(skb); | ||
2063 | rc = qdisc_enqueue_root(skb, q); | 2066 | rc = qdisc_enqueue_root(skb, q); |
2064 | qdisc_run(q); | 2067 | qdisc_run(q); |
2065 | } | 2068 | } |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a9b0e1f77806..c543dd252433 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -520,7 +520,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
520 | new->transport_header = old->transport_header; | 520 | new->transport_header = old->transport_header; |
521 | new->network_header = old->network_header; | 521 | new->network_header = old->network_header; |
522 | new->mac_header = old->mac_header; | 522 | new->mac_header = old->mac_header; |
523 | skb_dst_set(new, dst_clone(skb_dst(old))); | 523 | skb_dst_copy(new, old); |
524 | new->rxhash = old->rxhash; | 524 | new->rxhash = old->rxhash; |
525 | #ifdef CONFIG_XFRM | 525 | #ifdef CONFIG_XFRM |
526 | new->sp = secpath_get(old->sp); | 526 | new->sp = secpath_get(old->sp); |
diff --git a/net/core/sock.c b/net/core/sock.c index 63530a03b8c2..bf88a167c8f2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
307 | */ | 307 | */ |
308 | skb_len = skb->len; | 308 | skb_len = skb->len; |
309 | 309 | ||
310 | /* we escape from rcu protected region, make sure we dont leak | ||
311 | * a norefcounted dst | ||
312 | */ | ||
313 | skb_dst_force(skb); | ||
314 | |||
310 | spin_lock_irqsave(&list->lock, flags); | 315 | spin_lock_irqsave(&list->lock, flags); |
311 | skb->dropcount = atomic_read(&sk->sk_drops); | 316 | skb->dropcount = atomic_read(&sk->sk_drops); |
312 | __skb_queue_tail(list, skb); | 317 | __skb_queue_tail(list, skb); |
@@ -1536,6 +1541,7 @@ static void __release_sock(struct sock *sk) | |||
1536 | do { | 1541 | do { |
1537 | struct sk_buff *next = skb->next; | 1542 | struct sk_buff *next = skb->next; |
1538 | 1543 | ||
1544 | WARN_ON_ONCE(skb_dst_is_noref(skb)); | ||
1539 | skb->next = NULL; | 1545 | skb->next = NULL; |
1540 | sk_backlog_rcv(sk, skb); | 1546 | sk_backlog_rcv(sk, skb); |
1541 | 1547 | ||