diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-11 19:19:48 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2010-05-17 20:18:50 -0400 |
| commit | 7fee226ad2397b635e2fd565a59ca3ae08a164cd (patch) | |
| tree | 0bcd26150ad74ec1a237109de87a3d214a07fc22 /include | |
| parent | ebda37c27d0c768947e9b058332d7ea798210cf8 (diff) | |
net: add a noref bit on skb dst
Use low order bit of skb->_skb_dst to tell dst is not refcounted.
Change _skb_dst to _skb_refdst to make sure all uses are catched.
skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.
New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)
skb_dst_drop() drops a reference only if skb dst was refcounted.
skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.
Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().
Use skb_dst_force() in dev_requeue_skb().
Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/skbuff.h | 58 | ||||
| -rw-r--r-- | include/net/dst.h | 48 | ||||
| -rw-r--r-- | include/net/sock.h | 13 |
3 files changed, 107 insertions, 12 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9525bce80f6..7cdfb4d52847 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
| @@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t; | |||
| 264 | * @transport_header: Transport layer header | 264 | * @transport_header: Transport layer header |
| 265 | * @network_header: Network layer header | 265 | * @network_header: Network layer header |
| 266 | * @mac_header: Link layer header | 266 | * @mac_header: Link layer header |
| 267 | * @_skb_dst: destination entry | 267 | * @_skb_refdst: destination entry (with norefcount bit) |
| 268 | * @sp: the security path, used for xfrm | 268 | * @sp: the security path, used for xfrm |
| 269 | * @cb: Control buffer. Free for use by every layer. Put private vars here | 269 | * @cb: Control buffer. Free for use by every layer. Put private vars here |
| 270 | * @len: Length of actual data | 270 | * @len: Length of actual data |
| @@ -328,7 +328,7 @@ struct sk_buff { | |||
| 328 | */ | 328 | */ |
| 329 | char cb[48] __aligned(8); | 329 | char cb[48] __aligned(8); |
| 330 | 330 | ||
| 331 | unsigned long _skb_dst; | 331 | unsigned long _skb_refdst; |
| 332 | #ifdef CONFIG_XFRM | 332 | #ifdef CONFIG_XFRM |
| 333 | struct sec_path *sp; | 333 | struct sec_path *sp; |
| 334 | #endif | 334 | #endif |
| @@ -419,14 +419,64 @@ struct sk_buff { | |||
| 419 | 419 | ||
| 420 | #include <asm/system.h> | 420 | #include <asm/system.h> |
| 421 | 421 | ||
| 422 | /* | ||
| 423 | * skb might have a dst pointer attached, refcounted or not. | ||
| 424 | * _skb_refdst low order bit is set if refcount was _not_ taken | ||
| 425 | */ | ||
| 426 | #define SKB_DST_NOREF 1UL | ||
| 427 | #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) | ||
| 428 | |||
| 429 | /** | ||
| 430 | * skb_dst - returns skb dst_entry | ||
| 431 | * @skb: buffer | ||
| 432 | * | ||
| 433 | * Returns skb dst_entry, regardless of reference taken or not. | ||
| 434 | */ | ||
| 422 | static inline struct dst_entry *skb_dst(const struct sk_buff *skb) | 435 | static inline struct dst_entry *skb_dst(const struct sk_buff *skb) |
| 423 | { | 436 | { |
| 424 | return (struct dst_entry *)skb->_skb_dst; | 437 | /* If refdst was not refcounted, check we still are in a |
| 438 | * rcu_read_lock section | ||
| 439 | */ | ||
| 440 | WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && | ||
| 441 | !rcu_read_lock_held() && | ||
| 442 | !rcu_read_lock_bh_held()); | ||
| 443 | return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); | ||
| 425 | } | 444 | } |
| 426 | 445 | ||
| 446 | /** | ||
| 447 | * skb_dst_set - sets skb dst | ||
| 448 | * @skb: buffer | ||
| 449 | * @dst: dst entry | ||
| 450 | * | ||
| 451 | * Sets skb dst, assuming a reference was taken on dst and should | ||
| 452 | * be released by skb_dst_drop() | ||
| 453 | */ | ||
| 427 | static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) | 454 | static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) |
| 428 | { | 455 | { |
| 429 | skb->_skb_dst = (unsigned long)dst; | 456 | skb->_skb_refdst = (unsigned long)dst; |
| 457 | } | ||
| 458 | |||
| 459 | /** | ||
| 460 | * skb_dst_set_noref - sets skb dst, without a reference | ||
| 461 | * @skb: buffer | ||
| 462 | * @dst: dst entry | ||
| 463 | * | ||
| 464 | * Sets skb dst, assuming a reference was not taken on dst | ||
| 465 | * skb_dst_drop() should not dst_release() this dst | ||
| 466 | */ | ||
| 467 | static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) | ||
| 468 | { | ||
| 469 | WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); | ||
| 470 | skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; | ||
| 471 | } | ||
| 472 | |||
| 473 | /** | ||
| 474 | * skb_dst_is_noref - Test if skb dst isnt refcounted | ||
| 475 | * @skb: buffer | ||
| 476 | */ | ||
| 477 | static inline bool skb_dst_is_noref(const struct sk_buff *skb) | ||
| 478 | { | ||
| 479 | return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); | ||
| 430 | } | 480 | } |
| 431 | 481 | ||
| 432 | static inline struct rtable *skb_rtable(const struct sk_buff *skb) | 482 | static inline struct rtable *skb_rtable(const struct sk_buff *skb) |
diff --git a/include/net/dst.h b/include/net/dst.h index aac5a5fcfda9..27207a13f2a6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
| @@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time) | |||
| 168 | dst->lastuse = time; | 168 | dst->lastuse = time; |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) | ||
| 172 | { | ||
| 173 | dst->__use++; | ||
| 174 | dst->lastuse = time; | ||
| 175 | } | ||
| 176 | |||
| 171 | static inline | 177 | static inline |
| 172 | struct dst_entry * dst_clone(struct dst_entry * dst) | 178 | struct dst_entry * dst_clone(struct dst_entry * dst) |
| 173 | { | 179 | { |
| @@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst) | |||
| 177 | } | 183 | } |
| 178 | 184 | ||
| 179 | extern void dst_release(struct dst_entry *dst); | 185 | extern void dst_release(struct dst_entry *dst); |
| 186 | |||
| 187 | static inline void refdst_drop(unsigned long refdst) | ||
| 188 | { | ||
| 189 | if (!(refdst & SKB_DST_NOREF)) | ||
| 190 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); | ||
| 191 | } | ||
| 192 | |||
| 193 | /** | ||
| 194 | * skb_dst_drop - drops skb dst | ||
| 195 | * @skb: buffer | ||
| 196 | * | ||
| 197 | * Drops dst reference count if a reference was taken. | ||
| 198 | */ | ||
| 180 | static inline void skb_dst_drop(struct sk_buff *skb) | 199 | static inline void skb_dst_drop(struct sk_buff *skb) |
| 181 | { | 200 | { |
| 182 | if (skb->_skb_dst) | 201 | if (skb->_skb_refdst) { |
| 183 | dst_release(skb_dst(skb)); | 202 | refdst_drop(skb->_skb_refdst); |
| 184 | skb->_skb_dst = 0UL; | 203 | skb->_skb_refdst = 0UL; |
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) | ||
| 208 | { | ||
| 209 | nskb->_skb_refdst = oskb->_skb_refdst; | ||
| 210 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) | ||
| 211 | dst_clone(skb_dst(nskb)); | ||
| 212 | } | ||
| 213 | |||
| 214 | /** | ||
| 215 | * skb_dst_force - makes sure skb dst is refcounted | ||
| 216 | * @skb: buffer | ||
| 217 | * | ||
| 218 | * If dst is not yet refcounted, let's do it | ||
| 219 | */ | ||
| 220 | static inline void skb_dst_force(struct sk_buff *skb) | ||
| 221 | { | ||
| 222 | if (skb_dst_is_noref(skb)) { | ||
| 223 | WARN_ON(!rcu_read_lock_held()); | ||
| 224 | skb->_skb_refdst &= ~SKB_DST_NOREF; | ||
| 225 | dst_clone(skb_dst(skb)); | ||
| 226 | } | ||
| 185 | } | 227 | } |
| 186 | 228 | ||
| 187 | /* Children define the path of the packet through the | 229 | /* Children define the path of the packet through the |
diff --git a/include/net/sock.h b/include/net/sock.h index aed16eb9db4b..5697caf8cc76 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk) | |||
| 600 | /* OOB backlog add */ | 600 | /* OOB backlog add */ |
| 601 | static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) | 601 | static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) |
| 602 | { | 602 | { |
| 603 | if (!sk->sk_backlog.tail) { | 603 | /* dont let skb dst not refcounted, we are going to leave rcu lock */ |
| 604 | sk->sk_backlog.head = sk->sk_backlog.tail = skb; | 604 | skb_dst_force(skb); |
| 605 | } else { | 605 | |
| 606 | if (!sk->sk_backlog.tail) | ||
| 607 | sk->sk_backlog.head = skb; | ||
| 608 | else | ||
| 606 | sk->sk_backlog.tail->next = skb; | 609 | sk->sk_backlog.tail->next = skb; |
| 607 | sk->sk_backlog.tail = skb; | 610 | |
| 608 | } | 611 | sk->sk_backlog.tail = skb; |
| 609 | skb->next = NULL; | 612 | skb->next = NULL; |
| 610 | } | 613 | } |
| 611 | 614 | ||
