net: reorder struct sock fields

Right now, fields in struct sock are not optimally ordered, because each path (RX softirq, TX completion, RX user, TX user) has to touch fields that are contained in many different cache lines. The really critical thing is to shrink number of cache lines that are used at RX softirq time : CPU handling softirqs for a device can receive many frames per second for many sockets. If load is too big, we can drop frames at NIC level. RPS or multiqueue cards can help, but better reduce latency if possible. This patch starts with UDP protocol, then additional patches will try to reduce latencies of other ones as well. At RX softirq time, fields of interest for UDP protocol are : (not counting ones in inet struct for the lookup) Read/Written: sk_refcnt (atomic increment/decrement) sk_rmem_alloc & sk_backlog.len (to check if there is room in queues) sk_receive_queue sk_backlog (if socket locked by user program) sk_rxhash sk_forward_alloc sk_drops Read only: sk_rcvbuf (sk_rcvqueues_full()) sk_filter sk_wq sk_policy[0] sk_flags Additional notes : - sk_backlog has one hole on 64bit arches. We can fill it to save 8 bytes. - sk_backlog is used only if RX sofirq handler finds the socket while locked by user. - sk_rxhash is written only once per flow. - sk_drops is written only if queues are full Final layout : [1] One section grouping all read/write fields, but placing rxhash and sk_backlog at the end of this section. [2] One section grouping all read fields in RX handler (sk_filter, sk_rcv_buf, sk_wq) [3] Section used by other paths I'll post a patch on its own to put sk_refcnt at the end of struct sock_common so that it shares same cache line than section [1] New offsets on 64bit arch : sizeof(struct sock)=0x268 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x48 offsetof(struct sock, sk_receive_queue)=0x68 offsetof(struct sock, sk_backlog)=0x80 offsetof(struct sock, sk_rmem_alloc)=0x80 offsetof(struct sock, sk_forward_alloc)=0x98 offsetof(struct sock, sk_rxhash)=0x9c offsetof(struct sock, sk_rcvbuf)=0xa4 offsetof(struct sock, sk_drops) =0xa0 offsetof(struct sock, sk_filter)=0xa8 offsetof(struct sock, sk_wq)=0xb0 offsetof(struct sock, sk_policy)=0xd0 offsetof(struct sock, sk_flags) =0xe0 Instead of : sizeof(struct sock)=0x270 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x50 offsetof(struct sock, sk_receive_queue)=0xc0 offsetof(struct sock, sk_backlog)=0x70 offsetof(struct sock, sk_rmem_alloc)=0xac offsetof(struct sock, sk_forward_alloc)=0x10c offsetof(struct sock, sk_rxhash)=0x128 offsetof(struct sock, sk_rcvbuf)=0x4c offsetof(struct sock, sk_drops) =0x16c offsetof(struct sock, sk_filter)=0x198 offsetof(struct sock, sk_wq)=0x88 offsetof(struct sock, sk_policy)=0x98 offsetof(struct sock, sk_flags) =0x130 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <eric.dumazet@gmail.com> 2010-11-16 00:56:04 -0500
committer: David S. Miller <davem@davemloft.net> 2010-11-16 14:17:43 -0500
commit: b178bb3dfc30d9555bdd2401e95af98e23e83e10 (patch)
tree: ec52d739015de589ceca1d31c5802305d94c9f6d
parent: c31504dc0d1dc853dcee509d9999169a9097a717 (diff)
1 files changed, 31 insertions, 24 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index eb0c1f504678..5557dfb3dd68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -241,59 +241,67 @@ struct sock {
 #define sk_bind_node            __sk_common.skc_bind_node
 #define sk_prot                 __sk_common.skc_prot
 #define sk_net                  __sk_common.skc_net
-        kmemcheck_bitfield_begin(flags);
-        unsigned int            sk_shutdown  : 2,
-                                sk_no_check  : 2,
-                                sk_userlocks : 4,
-                                sk_protocol  : 8,
-                                sk_type      : 16;
-        kmemcheck_bitfield_end(flags);
-        int                     sk_rcvbuf;
        socket_lock_t           sk_lock;
+        struct sk_buff_head     sk_receive_queue;
        /*
         * The backlog queue is special, it is always used with
         * the per-socket spinlock held and requires low latency
         * access. Therefore we special case it's implementation.
+         * Note : rmem_alloc is in this structure to fill a hole
+         * on 64bit arches, not because its logically part of
+         * backlog.
         */
        struct {
-                struct sk_buff *head;
+                atomic_t        rmem_alloc;
-                struct sk_buff *tail;
+                int             len;
-                int len;
+                struct sk_buff  *head;
+                struct sk_buff  *tail;
        } sk_backlog;
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+        int                     sk_forward_alloc;
+#ifdef CONFIG_RPS
+        __u32                   sk_rxhash;
+#endif
+        atomic_t                sk_drops;
+        int                     sk_rcvbuf;
+        struct sk_filter __rcu  *sk_filter;
        struct socket_wq        *sk_wq;
-        struct dst_entry        *sk_dst_cache;
+#ifdef CONFIG_NET_DMA
+        struct sk_buff_head     sk_async_wait_queue;
+#endif
 #ifdef CONFIG_XFRM
        struct xfrm_policy      *sk_policy[2];
 #endif
+        unsigned long           sk_flags;
+        struct dst_entry        *sk_dst_cache;
        spinlock_t              sk_dst_lock;
-        atomic_t                sk_rmem_alloc;
        atomic_t                sk_wmem_alloc;
        atomic_t                sk_omem_alloc;
        int                     sk_sndbuf;
-        struct sk_buff_head     sk_receive_queue;
        struct sk_buff_head     sk_write_queue;
-#ifdef CONFIG_NET_DMA
+        kmemcheck_bitfield_begin(flags);
-        struct sk_buff_head     sk_async_wait_queue;
+        unsigned int            sk_shutdown  : 2,
-#endif
+                                sk_no_check  : 2,
+                                sk_userlocks : 4,
+                                sk_protocol  : 8,
+                                sk_type      : 16;
+        kmemcheck_bitfield_end(flags);
        int                     sk_wmem_queued;
-        int                     sk_forward_alloc;
        gfp_t                   sk_allocation;
        int                     sk_route_caps;
        int                     sk_route_nocaps;
        int                     sk_gso_type;
        unsigned int            sk_gso_max_size;
        int                     sk_rcvlowat;
-#ifdef CONFIG_RPS
-        __u32                   sk_rxhash;
-#endif
-        unsigned long           sk_flags;
        unsigned long           sk_lingertime;
        struct sk_buff_head     sk_error_queue;
        struct proto            *sk_prot_creator;
        rwlock_t                sk_callback_lock;
        int                     sk_err,
                                sk_err_soft;
-        atomic_t                sk_drops;
        unsigned short          sk_ack_backlog;
        unsigned short          sk_max_ack_backlog;
        __u32                   sk_priority;
@@ -301,7 +309,6 @@ struct sock {
        const struct cred       *sk_peer_cred;
        long                    sk_rcvtimeo;
        long                    sk_sndtimeo;
-        struct sk_filter __rcu  *sk_filter;
        void                    *sk_protinfo;
        struct timer_list       sk_timer;
        ktime_t                 sk_stamp;
author	Eric Dumazet <eric.dumazet@gmail.com>	2010-11-16 00:56:04 -0500
committer	David S. Miller <davem@davemloft.net>	2010-11-16 14:17:43 -0500
commit	b178bb3dfc30d9555bdd2401e95af98e23e83e10 (patch)
tree	ec52d739015de589ceca1d31c5802305d94c9f6d
parent	c31504dc0d1dc853dcee509d9999169a9097a717 (diff)

diff --git a/include/net/sock.h b/include/net/sock.h index eb0c1f504678..5557dfb3dd68 100644 --- a/include/net/sock.h +++ b/include/net/sock.h
@@ -241,59 +241,67 @@ struct sock {
241	#define sk_bind_node __sk_common.skc_bind_node	241	#define sk_bind_node __sk_common.skc_bind_node
242	#define sk_prot __sk_common.skc_prot	242	#define sk_prot __sk_common.skc_prot
243	#define sk_net __sk_common.skc_net	243	#define sk_net __sk_common.skc_net
244	kmemcheck_bitfield_begin(flags);
245	unsigned int sk_shutdown : 2,
246	sk_no_check : 2,
247	sk_userlocks : 4,
248	sk_protocol : 8,
249	sk_type : 16;
250	kmemcheck_bitfield_end(flags);
251	int sk_rcvbuf;
252	socket_lock_t sk_lock;	244	socket_lock_t sk_lock;
		245	struct sk_buff_head sk_receive_queue;
253	/*	246	/*
254	* The backlog queue is special, it is always used with	247	* The backlog queue is special, it is always used with
255	* the per-socket spinlock held and requires low latency	248	* the per-socket spinlock held and requires low latency
256	* access. Therefore we special case it's implementation.	249	* access. Therefore we special case it's implementation.
		250	* Note : rmem_alloc is in this structure to fill a hole
		251	* on 64bit arches, not because its logically part of
		252	* backlog.
257	*/	253	*/
258	struct {	254	struct {
259	struct sk_buff *head;	255	atomic_t rmem_alloc;
260	struct sk_buff *tail;	256	int len;
261	int len;	257	struct sk_buff *head;
		258	struct sk_buff *tail;
262	} sk_backlog;	259	} sk_backlog;
		260	#define sk_rmem_alloc sk_backlog.rmem_alloc
		261	int sk_forward_alloc;
		262	#ifdef CONFIG_RPS
		263	__u32 sk_rxhash;
		264	#endif
		265	atomic_t sk_drops;
		266	int sk_rcvbuf;
		267
		268	struct sk_filter __rcu *sk_filter;
263	struct socket_wq *sk_wq;	269	struct socket_wq *sk_wq;
264	struct dst_entry *sk_dst_cache;	270
		271	#ifdef CONFIG_NET_DMA
		272	struct sk_buff_head sk_async_wait_queue;
		273	#endif
		274
265	#ifdef CONFIG_XFRM	275	#ifdef CONFIG_XFRM
266	struct xfrm_policy *sk_policy[2];	276	struct xfrm_policy *sk_policy[2];
267	#endif	277	#endif
		278	unsigned long sk_flags;
		279	struct dst_entry *sk_dst_cache;
268	spinlock_t sk_dst_lock;	280	spinlock_t sk_dst_lock;
269	atomic_t sk_rmem_alloc;
270	atomic_t sk_wmem_alloc;	281	atomic_t sk_wmem_alloc;
271	atomic_t sk_omem_alloc;	282	atomic_t sk_omem_alloc;
272	int sk_sndbuf;	283	int sk_sndbuf;
273	struct sk_buff_head sk_receive_queue;
274	struct sk_buff_head sk_write_queue;	284	struct sk_buff_head sk_write_queue;
275	#ifdef CONFIG_NET_DMA	285	kmemcheck_bitfield_begin(flags);
276	struct sk_buff_head sk_async_wait_queue;	286	unsigned int sk_shutdown : 2,
277	#endif	287	sk_no_check : 2,
		288	sk_userlocks : 4,
		289	sk_protocol : 8,
		290	sk_type : 16;
		291	kmemcheck_bitfield_end(flags);
278	int sk_wmem_queued;	292	int sk_wmem_queued;
279	int sk_forward_alloc;
280	gfp_t sk_allocation;	293	gfp_t sk_allocation;
281	int sk_route_caps;	294	int sk_route_caps;
282	int sk_route_nocaps;	295	int sk_route_nocaps;
283	int sk_gso_type;	296	int sk_gso_type;
284	unsigned int sk_gso_max_size;	297	unsigned int sk_gso_max_size;
285	int sk_rcvlowat;	298	int sk_rcvlowat;
286	#ifdef CONFIG_RPS
287	__u32 sk_rxhash;
288	#endif
289	unsigned long sk_flags;
290	unsigned long sk_lingertime;	299	unsigned long sk_lingertime;
291	struct sk_buff_head sk_error_queue;	300	struct sk_buff_head sk_error_queue;
292	struct proto *sk_prot_creator;	301	struct proto *sk_prot_creator;
293	rwlock_t sk_callback_lock;	302	rwlock_t sk_callback_lock;
294	int sk_err,	303	int sk_err,
295	sk_err_soft;	304	sk_err_soft;
296	atomic_t sk_drops;
297	unsigned short sk_ack_backlog;	305	unsigned short sk_ack_backlog;
298	unsigned short sk_max_ack_backlog;	306	unsigned short sk_max_ack_backlog;
299	__u32 sk_priority;	307	__u32 sk_priority;
@@ -301,7 +309,6 @@ struct sock {
301	const struct cred *sk_peer_cred;	309	const struct cred *sk_peer_cred;
302	long sk_rcvtimeo;	310	long sk_rcvtimeo;
303	long sk_sndtimeo;	311	long sk_sndtimeo;
304	struct sk_filter __rcu *sk_filter;
305	void *sk_protinfo;	312	void *sk_protinfo;
306	struct timer_list sk_timer;	313	struct timer_list sk_timer;
307	ktime_t sk_stamp;	314	ktime_t sk_stamp;