aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-11-16 00:56:04 -0500
committerDavid S. Miller <davem@davemloft.net>2010-11-16 14:17:43 -0500
commitb178bb3dfc30d9555bdd2401e95af98e23e83e10 (patch)
treeec52d739015de589ceca1d31c5802305d94c9f6d
parentc31504dc0d1dc853dcee509d9999169a9097a717 (diff)
net: reorder struct sock fields
Right now, fields in struct sock are not optimally ordered, because each path (RX softirq, TX completion, RX user, TX user) has to touch fields that are contained in many different cache lines. The really critical thing is to shrink number of cache lines that are used at RX softirq time : CPU handling softirqs for a device can receive many frames per second for many sockets. If load is too big, we can drop frames at NIC level. RPS or multiqueue cards can help, but better reduce latency if possible. This patch starts with UDP protocol, then additional patches will try to reduce latencies of other ones as well. At RX softirq time, fields of interest for UDP protocol are : (not counting ones in inet struct for the lookup) Read/Written: sk_refcnt (atomic increment/decrement) sk_rmem_alloc & sk_backlog.len (to check if there is room in queues) sk_receive_queue sk_backlog (if socket locked by user program) sk_rxhash sk_forward_alloc sk_drops Read only: sk_rcvbuf (sk_rcvqueues_full()) sk_filter sk_wq sk_policy[0] sk_flags Additional notes : - sk_backlog has one hole on 64bit arches. We can fill it to save 8 bytes. - sk_backlog is used only if RX sofirq handler finds the socket while locked by user. - sk_rxhash is written only once per flow. - sk_drops is written only if queues are full Final layout : [1] One section grouping all read/write fields, but placing rxhash and sk_backlog at the end of this section. [2] One section grouping all read fields in RX handler (sk_filter, sk_rcv_buf, sk_wq) [3] Section used by other paths I'll post a patch on its own to put sk_refcnt at the end of struct sock_common so that it shares same cache line than section [1] New offsets on 64bit arch : sizeof(struct sock)=0x268 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x48 offsetof(struct sock, sk_receive_queue)=0x68 offsetof(struct sock, sk_backlog)=0x80 offsetof(struct sock, sk_rmem_alloc)=0x80 offsetof(struct sock, sk_forward_alloc)=0x98 offsetof(struct sock, sk_rxhash)=0x9c offsetof(struct sock, sk_rcvbuf)=0xa4 offsetof(struct sock, sk_drops) =0xa0 offsetof(struct sock, sk_filter)=0xa8 offsetof(struct sock, sk_wq)=0xb0 offsetof(struct sock, sk_policy)=0xd0 offsetof(struct sock, sk_flags) =0xe0 Instead of : sizeof(struct sock)=0x270 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x50 offsetof(struct sock, sk_receive_queue)=0xc0 offsetof(struct sock, sk_backlog)=0x70 offsetof(struct sock, sk_rmem_alloc)=0xac offsetof(struct sock, sk_forward_alloc)=0x10c offsetof(struct sock, sk_rxhash)=0x128 offsetof(struct sock, sk_rcvbuf)=0x4c offsetof(struct sock, sk_drops) =0x16c offsetof(struct sock, sk_filter)=0x198 offsetof(struct sock, sk_wq)=0x88 offsetof(struct sock, sk_policy)=0x98 offsetof(struct sock, sk_flags) =0x130 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sock.h55
1 files changed, 31 insertions, 24 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index eb0c1f504678..5557dfb3dd68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -241,59 +241,67 @@ struct sock {
241#define sk_bind_node __sk_common.skc_bind_node 241#define sk_bind_node __sk_common.skc_bind_node
242#define sk_prot __sk_common.skc_prot 242#define sk_prot __sk_common.skc_prot
243#define sk_net __sk_common.skc_net 243#define sk_net __sk_common.skc_net
244 kmemcheck_bitfield_begin(flags);
245 unsigned int sk_shutdown : 2,
246 sk_no_check : 2,
247 sk_userlocks : 4,
248 sk_protocol : 8,
249 sk_type : 16;
250 kmemcheck_bitfield_end(flags);
251 int sk_rcvbuf;
252 socket_lock_t sk_lock; 244 socket_lock_t sk_lock;
245 struct sk_buff_head sk_receive_queue;
253 /* 246 /*
254 * The backlog queue is special, it is always used with 247 * The backlog queue is special, it is always used with
255 * the per-socket spinlock held and requires low latency 248 * the per-socket spinlock held and requires low latency
256 * access. Therefore we special case it's implementation. 249 * access. Therefore we special case it's implementation.
250 * Note : rmem_alloc is in this structure to fill a hole
251 * on 64bit arches, not because its logically part of
252 * backlog.
257 */ 253 */
258 struct { 254 struct {
259 struct sk_buff *head; 255 atomic_t rmem_alloc;
260 struct sk_buff *tail; 256 int len;
261 int len; 257 struct sk_buff *head;
258 struct sk_buff *tail;
262 } sk_backlog; 259 } sk_backlog;
260#define sk_rmem_alloc sk_backlog.rmem_alloc
261 int sk_forward_alloc;
262#ifdef CONFIG_RPS
263 __u32 sk_rxhash;
264#endif
265 atomic_t sk_drops;
266 int sk_rcvbuf;
267
268 struct sk_filter __rcu *sk_filter;
263 struct socket_wq *sk_wq; 269 struct socket_wq *sk_wq;
264 struct dst_entry *sk_dst_cache; 270
271#ifdef CONFIG_NET_DMA
272 struct sk_buff_head sk_async_wait_queue;
273#endif
274
265#ifdef CONFIG_XFRM 275#ifdef CONFIG_XFRM
266 struct xfrm_policy *sk_policy[2]; 276 struct xfrm_policy *sk_policy[2];
267#endif 277#endif
278 unsigned long sk_flags;
279 struct dst_entry *sk_dst_cache;
268 spinlock_t sk_dst_lock; 280 spinlock_t sk_dst_lock;
269 atomic_t sk_rmem_alloc;
270 atomic_t sk_wmem_alloc; 281 atomic_t sk_wmem_alloc;
271 atomic_t sk_omem_alloc; 282 atomic_t sk_omem_alloc;
272 int sk_sndbuf; 283 int sk_sndbuf;
273 struct sk_buff_head sk_receive_queue;
274 struct sk_buff_head sk_write_queue; 284 struct sk_buff_head sk_write_queue;
275#ifdef CONFIG_NET_DMA 285 kmemcheck_bitfield_begin(flags);
276 struct sk_buff_head sk_async_wait_queue; 286 unsigned int sk_shutdown : 2,
277#endif 287 sk_no_check : 2,
288 sk_userlocks : 4,
289 sk_protocol : 8,
290 sk_type : 16;
291 kmemcheck_bitfield_end(flags);
278 int sk_wmem_queued; 292 int sk_wmem_queued;
279 int sk_forward_alloc;
280 gfp_t sk_allocation; 293 gfp_t sk_allocation;
281 int sk_route_caps; 294 int sk_route_caps;
282 int sk_route_nocaps; 295 int sk_route_nocaps;
283 int sk_gso_type; 296 int sk_gso_type;
284 unsigned int sk_gso_max_size; 297 unsigned int sk_gso_max_size;
285 int sk_rcvlowat; 298 int sk_rcvlowat;
286#ifdef CONFIG_RPS
287 __u32 sk_rxhash;
288#endif
289 unsigned long sk_flags;
290 unsigned long sk_lingertime; 299 unsigned long sk_lingertime;
291 struct sk_buff_head sk_error_queue; 300 struct sk_buff_head sk_error_queue;
292 struct proto *sk_prot_creator; 301 struct proto *sk_prot_creator;
293 rwlock_t sk_callback_lock; 302 rwlock_t sk_callback_lock;
294 int sk_err, 303 int sk_err,
295 sk_err_soft; 304 sk_err_soft;
296 atomic_t sk_drops;
297 unsigned short sk_ack_backlog; 305 unsigned short sk_ack_backlog;
298 unsigned short sk_max_ack_backlog; 306 unsigned short sk_max_ack_backlog;
299 __u32 sk_priority; 307 __u32 sk_priority;
@@ -301,7 +309,6 @@ struct sock {
301 const struct cred *sk_peer_cred; 309 const struct cred *sk_peer_cred;
302 long sk_rcvtimeo; 310 long sk_rcvtimeo;
303 long sk_sndtimeo; 311 long sk_sndtimeo;
304 struct sk_filter __rcu *sk_filter;
305 void *sk_protinfo; 312 void *sk_protinfo;
306 struct timer_list sk_timer; 313 struct timer_list sk_timer;
307 ktime_t sk_stamp; 314 ktime_t sk_stamp;