diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-11 19:19:48 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-17 20:18:50 -0400 |
commit | 7fee226ad2397b635e2fd565a59ca3ae08a164cd (patch) | |
tree | 0bcd26150ad74ec1a237109de87a3d214a07fc22 /include | |
parent | ebda37c27d0c768947e9b058332d7ea798210cf8 (diff) |
net: add a noref bit on skb dst
Use low order bit of skb->_skb_dst to tell dst is not refcounted.
Change _skb_dst to _skb_refdst to make sure all uses are catched.
skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.
New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)
skb_dst_drop() drops a reference only if skb dst was refcounted.
skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.
Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().
Use skb_dst_force() in dev_requeue_skb().
Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/skbuff.h | 58 | ||||
-rw-r--r-- | include/net/dst.h | 48 | ||||
-rw-r--r-- | include/net/sock.h | 13 |
3 files changed, 107 insertions, 12 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9525bce80f6..7cdfb4d52847 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t; | |||
264 | * @transport_header: Transport layer header | 264 | * @transport_header: Transport layer header |
265 | * @network_header: Network layer header | 265 | * @network_header: Network layer header |
266 | * @mac_header: Link layer header | 266 | * @mac_header: Link layer header |
267 | * @_skb_dst: destination entry | 267 | * @_skb_refdst: destination entry (with norefcount bit) |
268 | * @sp: the security path, used for xfrm | 268 | * @sp: the security path, used for xfrm |
269 | * @cb: Control buffer. Free for use by every layer. Put private vars here | 269 | * @cb: Control buffer. Free for use by every layer. Put private vars here |
270 | * @len: Length of actual data | 270 | * @len: Length of actual data |
@@ -328,7 +328,7 @@ struct sk_buff { | |||
328 | */ | 328 | */ |
329 | char cb[48] __aligned(8); | 329 | char cb[48] __aligned(8); |
330 | 330 | ||
331 | unsigned long _skb_dst; | 331 | unsigned long _skb_refdst; |
332 | #ifdef CONFIG_XFRM | 332 | #ifdef CONFIG_XFRM |
333 | struct sec_path *sp; | 333 | struct sec_path *sp; |
334 | #endif | 334 | #endif |
@@ -419,14 +419,64 @@ struct sk_buff { | |||
419 | 419 | ||
420 | #include <asm/system.h> | 420 | #include <asm/system.h> |
421 | 421 | ||
422 | /* | ||
423 | * skb might have a dst pointer attached, refcounted or not. | ||
424 | * _skb_refdst low order bit is set if refcount was _not_ taken | ||
425 | */ | ||
426 | #define SKB_DST_NOREF 1UL | ||
427 | #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) | ||
428 | |||
429 | /** | ||
430 | * skb_dst - returns skb dst_entry | ||
431 | * @skb: buffer | ||
432 | * | ||
433 | * Returns skb dst_entry, regardless of reference taken or not. | ||
434 | */ | ||
422 | static inline struct dst_entry *skb_dst(const struct sk_buff *skb) | 435 | static inline struct dst_entry *skb_dst(const struct sk_buff *skb) |
423 | { | 436 | { |
424 | return (struct dst_entry *)skb->_skb_dst; | 437 | /* If refdst was not refcounted, check we still are in a |
438 | * rcu_read_lock section | ||
439 | */ | ||
440 | WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && | ||
441 | !rcu_read_lock_held() && | ||
442 | !rcu_read_lock_bh_held()); | ||
443 | return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); | ||
425 | } | 444 | } |
426 | 445 | ||
446 | /** | ||
447 | * skb_dst_set - sets skb dst | ||
448 | * @skb: buffer | ||
449 | * @dst: dst entry | ||
450 | * | ||
451 | * Sets skb dst, assuming a reference was taken on dst and should | ||
452 | * be released by skb_dst_drop() | ||
453 | */ | ||
427 | static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) | 454 | static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) |
428 | { | 455 | { |
429 | skb->_skb_dst = (unsigned long)dst; | 456 | skb->_skb_refdst = (unsigned long)dst; |
457 | } | ||
458 | |||
459 | /** | ||
460 | * skb_dst_set_noref - sets skb dst, without a reference | ||
461 | * @skb: buffer | ||
462 | * @dst: dst entry | ||
463 | * | ||
464 | * Sets skb dst, assuming a reference was not taken on dst | ||
465 | * skb_dst_drop() should not dst_release() this dst | ||
466 | */ | ||
467 | static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) | ||
468 | { | ||
469 | WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); | ||
470 | skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; | ||
471 | } | ||
472 | |||
473 | /** | ||
474 | * skb_dst_is_noref - Test if skb dst isnt refcounted | ||
475 | * @skb: buffer | ||
476 | */ | ||
477 | static inline bool skb_dst_is_noref(const struct sk_buff *skb) | ||
478 | { | ||
479 | return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); | ||
430 | } | 480 | } |
431 | 481 | ||
432 | static inline struct rtable *skb_rtable(const struct sk_buff *skb) | 482 | static inline struct rtable *skb_rtable(const struct sk_buff *skb) |
diff --git a/include/net/dst.h b/include/net/dst.h index aac5a5fcfda9..27207a13f2a6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time) | |||
168 | dst->lastuse = time; | 168 | dst->lastuse = time; |
169 | } | 169 | } |
170 | 170 | ||
171 | static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) | ||
172 | { | ||
173 | dst->__use++; | ||
174 | dst->lastuse = time; | ||
175 | } | ||
176 | |||
171 | static inline | 177 | static inline |
172 | struct dst_entry * dst_clone(struct dst_entry * dst) | 178 | struct dst_entry * dst_clone(struct dst_entry * dst) |
173 | { | 179 | { |
@@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst) | |||
177 | } | 183 | } |
178 | 184 | ||
179 | extern void dst_release(struct dst_entry *dst); | 185 | extern void dst_release(struct dst_entry *dst); |
186 | |||
187 | static inline void refdst_drop(unsigned long refdst) | ||
188 | { | ||
189 | if (!(refdst & SKB_DST_NOREF)) | ||
190 | dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK)); | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * skb_dst_drop - drops skb dst | ||
195 | * @skb: buffer | ||
196 | * | ||
197 | * Drops dst reference count if a reference was taken. | ||
198 | */ | ||
180 | static inline void skb_dst_drop(struct sk_buff *skb) | 199 | static inline void skb_dst_drop(struct sk_buff *skb) |
181 | { | 200 | { |
182 | if (skb->_skb_dst) | 201 | if (skb->_skb_refdst) { |
183 | dst_release(skb_dst(skb)); | 202 | refdst_drop(skb->_skb_refdst); |
184 | skb->_skb_dst = 0UL; | 203 | skb->_skb_refdst = 0UL; |
204 | } | ||
205 | } | ||
206 | |||
207 | static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb) | ||
208 | { | ||
209 | nskb->_skb_refdst = oskb->_skb_refdst; | ||
210 | if (!(nskb->_skb_refdst & SKB_DST_NOREF)) | ||
211 | dst_clone(skb_dst(nskb)); | ||
212 | } | ||
213 | |||
214 | /** | ||
215 | * skb_dst_force - makes sure skb dst is refcounted | ||
216 | * @skb: buffer | ||
217 | * | ||
218 | * If dst is not yet refcounted, let's do it | ||
219 | */ | ||
220 | static inline void skb_dst_force(struct sk_buff *skb) | ||
221 | { | ||
222 | if (skb_dst_is_noref(skb)) { | ||
223 | WARN_ON(!rcu_read_lock_held()); | ||
224 | skb->_skb_refdst &= ~SKB_DST_NOREF; | ||
225 | dst_clone(skb_dst(skb)); | ||
226 | } | ||
185 | } | 227 | } |
186 | 228 | ||
187 | /* Children define the path of the packet through the | 229 | /* Children define the path of the packet through the |
diff --git a/include/net/sock.h b/include/net/sock.h index aed16eb9db4b..5697caf8cc76 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk) | |||
600 | /* OOB backlog add */ | 600 | /* OOB backlog add */ |
601 | static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) | 601 | static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) |
602 | { | 602 | { |
603 | if (!sk->sk_backlog.tail) { | 603 | /* dont let skb dst not refcounted, we are going to leave rcu lock */ |
604 | sk->sk_backlog.head = sk->sk_backlog.tail = skb; | 604 | skb_dst_force(skb); |
605 | } else { | 605 | |
606 | if (!sk->sk_backlog.tail) | ||
607 | sk->sk_backlog.head = skb; | ||
608 | else | ||
606 | sk->sk_backlog.tail->next = skb; | 609 | sk->sk_backlog.tail->next = skb; |
607 | sk->sk_backlog.tail = skb; | 610 | |
608 | } | 611 | sk->sk_backlog.tail = skb; |
609 | skb->next = NULL; | 612 | skb->next = NULL; |
610 | } | 613 | } |
611 | 614 | ||