diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-11-16 00:56:04 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-11-16 14:17:43 -0500 |
commit | b178bb3dfc30d9555bdd2401e95af98e23e83e10 (patch) | |
tree | ec52d739015de589ceca1d31c5802305d94c9f6d | |
parent | c31504dc0d1dc853dcee509d9999169a9097a717 (diff) |
net: reorder struct sock fields
Right now, fields in struct sock are not optimally ordered, because each
path (RX softirq, TX completion, RX user, TX user) has to touch fields
that are contained in many different cache lines.
The really critical thing is to shrink number of cache lines that are
used at RX softirq time : CPU handling softirqs for a device can receive
many frames per second for many sockets. If load is too big, we can drop
frames at NIC level. RPS or multiqueue cards can help, but better reduce
latency if possible.
This patch starts with UDP protocol, then additional patches will try to
reduce latencies of other ones as well.
At RX softirq time, fields of interest for UDP protocol are :
(not counting ones in inet struct for the lookup)
Read/Written:
sk_refcnt (atomic increment/decrement)
sk_rmem_alloc & sk_backlog.len (to check if there is room in queues)
sk_receive_queue
sk_backlog (if socket locked by user program)
sk_rxhash
sk_forward_alloc
sk_drops
Read only:
sk_rcvbuf (sk_rcvqueues_full())
sk_filter
sk_wq
sk_policy[0]
sk_flags
Additional notes :
- sk_backlog has one hole on 64bit arches. We can fill it to save 8
bytes.
- sk_backlog is used only if RX sofirq handler finds the socket while
locked by user.
- sk_rxhash is written only once per flow.
- sk_drops is written only if queues are full
Final layout :
[1] One section grouping all read/write fields, but placing rxhash and
sk_backlog at the end of this section.
[2] One section grouping all read fields in RX handler
(sk_filter, sk_rcv_buf, sk_wq)
[3] Section used by other paths
I'll post a patch on its own to put sk_refcnt at the end of struct
sock_common so that it shares same cache line than section [1]
New offsets on 64bit arch :
sizeof(struct sock)=0x268
offsetof(struct sock, sk_refcnt) =0x10
offsetof(struct sock, sk_lock) =0x48
offsetof(struct sock, sk_receive_queue)=0x68
offsetof(struct sock, sk_backlog)=0x80
offsetof(struct sock, sk_rmem_alloc)=0x80
offsetof(struct sock, sk_forward_alloc)=0x98
offsetof(struct sock, sk_rxhash)=0x9c
offsetof(struct sock, sk_rcvbuf)=0xa4
offsetof(struct sock, sk_drops) =0xa0
offsetof(struct sock, sk_filter)=0xa8
offsetof(struct sock, sk_wq)=0xb0
offsetof(struct sock, sk_policy)=0xd0
offsetof(struct sock, sk_flags) =0xe0
Instead of :
sizeof(struct sock)=0x270
offsetof(struct sock, sk_refcnt) =0x10
offsetof(struct sock, sk_lock) =0x50
offsetof(struct sock, sk_receive_queue)=0xc0
offsetof(struct sock, sk_backlog)=0x70
offsetof(struct sock, sk_rmem_alloc)=0xac
offsetof(struct sock, sk_forward_alloc)=0x10c
offsetof(struct sock, sk_rxhash)=0x128
offsetof(struct sock, sk_rcvbuf)=0x4c
offsetof(struct sock, sk_drops) =0x16c
offsetof(struct sock, sk_filter)=0x198
offsetof(struct sock, sk_wq)=0x88
offsetof(struct sock, sk_policy)=0x98
offsetof(struct sock, sk_flags) =0x130
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 55 |
1 files changed, 31 insertions, 24 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index eb0c1f504678..5557dfb3dd68 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -241,59 +241,67 @@ struct sock { | |||
241 | #define sk_bind_node __sk_common.skc_bind_node | 241 | #define sk_bind_node __sk_common.skc_bind_node |
242 | #define sk_prot __sk_common.skc_prot | 242 | #define sk_prot __sk_common.skc_prot |
243 | #define sk_net __sk_common.skc_net | 243 | #define sk_net __sk_common.skc_net |
244 | kmemcheck_bitfield_begin(flags); | ||
245 | unsigned int sk_shutdown : 2, | ||
246 | sk_no_check : 2, | ||
247 | sk_userlocks : 4, | ||
248 | sk_protocol : 8, | ||
249 | sk_type : 16; | ||
250 | kmemcheck_bitfield_end(flags); | ||
251 | int sk_rcvbuf; | ||
252 | socket_lock_t sk_lock; | 244 | socket_lock_t sk_lock; |
245 | struct sk_buff_head sk_receive_queue; | ||
253 | /* | 246 | /* |
254 | * The backlog queue is special, it is always used with | 247 | * The backlog queue is special, it is always used with |
255 | * the per-socket spinlock held and requires low latency | 248 | * the per-socket spinlock held and requires low latency |
256 | * access. Therefore we special case it's implementation. | 249 | * access. Therefore we special case it's implementation. |
250 | * Note : rmem_alloc is in this structure to fill a hole | ||
251 | * on 64bit arches, not because its logically part of | ||
252 | * backlog. | ||
257 | */ | 253 | */ |
258 | struct { | 254 | struct { |
259 | struct sk_buff *head; | 255 | atomic_t rmem_alloc; |
260 | struct sk_buff *tail; | 256 | int len; |
261 | int len; | 257 | struct sk_buff *head; |
258 | struct sk_buff *tail; | ||
262 | } sk_backlog; | 259 | } sk_backlog; |
260 | #define sk_rmem_alloc sk_backlog.rmem_alloc | ||
261 | int sk_forward_alloc; | ||
262 | #ifdef CONFIG_RPS | ||
263 | __u32 sk_rxhash; | ||
264 | #endif | ||
265 | atomic_t sk_drops; | ||
266 | int sk_rcvbuf; | ||
267 | |||
268 | struct sk_filter __rcu *sk_filter; | ||
263 | struct socket_wq *sk_wq; | 269 | struct socket_wq *sk_wq; |
264 | struct dst_entry *sk_dst_cache; | 270 | |
271 | #ifdef CONFIG_NET_DMA | ||
272 | struct sk_buff_head sk_async_wait_queue; | ||
273 | #endif | ||
274 | |||
265 | #ifdef CONFIG_XFRM | 275 | #ifdef CONFIG_XFRM |
266 | struct xfrm_policy *sk_policy[2]; | 276 | struct xfrm_policy *sk_policy[2]; |
267 | #endif | 277 | #endif |
278 | unsigned long sk_flags; | ||
279 | struct dst_entry *sk_dst_cache; | ||
268 | spinlock_t sk_dst_lock; | 280 | spinlock_t sk_dst_lock; |
269 | atomic_t sk_rmem_alloc; | ||
270 | atomic_t sk_wmem_alloc; | 281 | atomic_t sk_wmem_alloc; |
271 | atomic_t sk_omem_alloc; | 282 | atomic_t sk_omem_alloc; |
272 | int sk_sndbuf; | 283 | int sk_sndbuf; |
273 | struct sk_buff_head sk_receive_queue; | ||
274 | struct sk_buff_head sk_write_queue; | 284 | struct sk_buff_head sk_write_queue; |
275 | #ifdef CONFIG_NET_DMA | 285 | kmemcheck_bitfield_begin(flags); |
276 | struct sk_buff_head sk_async_wait_queue; | 286 | unsigned int sk_shutdown : 2, |
277 | #endif | 287 | sk_no_check : 2, |
288 | sk_userlocks : 4, | ||
289 | sk_protocol : 8, | ||
290 | sk_type : 16; | ||
291 | kmemcheck_bitfield_end(flags); | ||
278 | int sk_wmem_queued; | 292 | int sk_wmem_queued; |
279 | int sk_forward_alloc; | ||
280 | gfp_t sk_allocation; | 293 | gfp_t sk_allocation; |
281 | int sk_route_caps; | 294 | int sk_route_caps; |
282 | int sk_route_nocaps; | 295 | int sk_route_nocaps; |
283 | int sk_gso_type; | 296 | int sk_gso_type; |
284 | unsigned int sk_gso_max_size; | 297 | unsigned int sk_gso_max_size; |
285 | int sk_rcvlowat; | 298 | int sk_rcvlowat; |
286 | #ifdef CONFIG_RPS | ||
287 | __u32 sk_rxhash; | ||
288 | #endif | ||
289 | unsigned long sk_flags; | ||
290 | unsigned long sk_lingertime; | 299 | unsigned long sk_lingertime; |
291 | struct sk_buff_head sk_error_queue; | 300 | struct sk_buff_head sk_error_queue; |
292 | struct proto *sk_prot_creator; | 301 | struct proto *sk_prot_creator; |
293 | rwlock_t sk_callback_lock; | 302 | rwlock_t sk_callback_lock; |
294 | int sk_err, | 303 | int sk_err, |
295 | sk_err_soft; | 304 | sk_err_soft; |
296 | atomic_t sk_drops; | ||
297 | unsigned short sk_ack_backlog; | 305 | unsigned short sk_ack_backlog; |
298 | unsigned short sk_max_ack_backlog; | 306 | unsigned short sk_max_ack_backlog; |
299 | __u32 sk_priority; | 307 | __u32 sk_priority; |
@@ -301,7 +309,6 @@ struct sock { | |||
301 | const struct cred *sk_peer_cred; | 309 | const struct cred *sk_peer_cred; |
302 | long sk_rcvtimeo; | 310 | long sk_rcvtimeo; |
303 | long sk_sndtimeo; | 311 | long sk_sndtimeo; |
304 | struct sk_filter __rcu *sk_filter; | ||
305 | void *sk_protinfo; | 312 | void *sk_protinfo; |
306 | struct timer_list sk_timer; | 313 | struct timer_list sk_timer; |
307 | ktime_t sk_stamp; | 314 | ktime_t sk_stamp; |