diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-11-30 14:04:07 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-12-09 23:05:58 -0500 |
commit | 68835aba4d9b74e2f94106d13b6a4bddc447c4c8 (patch) | |
tree | 468a9465acfa027791facef13fb6ef5129dda257 | |
parent | defb3519a64141608725e2dac5a5aa9a3c644bae (diff) |
net: optimize INET input path further
Followup of commit b178bb3dfc30 (net: reorder struct sock fields)
Optimize INET input path a bit further, by :
1) moving sk_refcnt close to sk_lock.
This reduces number of dirtied cache lines by one on 64bit arches (and
64 bytes cache line size).
2) moving inet_daddr & inet_rcv_saddr at the beginning of sk
(same cache line than hash / family / bound_dev_if / nulls_node)
This reduces number of accessed cache lines in lookups by one, and dont
increase size of inet and timewait socks.
inet and tw sockets now share same place-holder for these fields.
Before patch :
offsetof(struct sock, sk_refcnt) = 0x10
offsetof(struct sock, sk_lock) = 0x40
offsetof(struct sock, sk_receive_queue) = 0x60
offsetof(struct inet_sock, inet_daddr) = 0x270
offsetof(struct inet_sock, inet_rcv_saddr) = 0x274
After patch :
offsetof(struct sock, sk_refcnt) = 0x44
offsetof(struct sock, sk_lock) = 0x48
offsetof(struct sock, sk_receive_queue) = 0x68
offsetof(struct inet_sock, inet_daddr) = 0x0
offsetof(struct inet_sock, inet_rcv_saddr) = 0x4
compute_score() (udp or tcp) now use a single cache line per ignored
item, instead of two.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_sock.h | 5 | ||||
-rw-r--r-- | include/net/inet_timewait_sock.h | 20 | ||||
-rw-r--r-- | include/net/sock.h | 37 | ||||
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 7 | ||||
-rw-r--r-- | net/ipv6/udp.c | 4 |
6 files changed, 45 insertions, 39 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8945f9fb192a..8181498fa96c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
@@ -116,8 +116,9 @@ struct inet_sock { | |||
116 | struct ipv6_pinfo *pinet6; | 116 | struct ipv6_pinfo *pinet6; |
117 | #endif | 117 | #endif |
118 | /* Socket demultiplex comparisons on incoming packets. */ | 118 | /* Socket demultiplex comparisons on incoming packets. */ |
119 | __be32 inet_daddr; | 119 | #define inet_daddr sk.__sk_common.skc_daddr |
120 | __be32 inet_rcv_saddr; | 120 | #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr |
121 | |||
121 | __be16 inet_dport; | 122 | __be16 inet_dport; |
122 | __u16 inet_num; | 123 | __u16 inet_num; |
123 | __be32 inet_saddr; | 124 | __be32 inet_saddr; |
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index a066fdd50da6..17404b5388a7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h | |||
@@ -88,12 +88,6 @@ extern void inet_twdr_hangman(unsigned long data); | |||
88 | extern void inet_twdr_twkill_work(struct work_struct *work); | 88 | extern void inet_twdr_twkill_work(struct work_struct *work); |
89 | extern void inet_twdr_twcal_tick(unsigned long data); | 89 | extern void inet_twdr_twcal_tick(unsigned long data); |
90 | 90 | ||
91 | #if (BITS_PER_LONG == 64) | ||
92 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 | ||
93 | #else | ||
94 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 | ||
95 | #endif | ||
96 | |||
97 | struct inet_bind_bucket; | 91 | struct inet_bind_bucket; |
98 | 92 | ||
99 | /* | 93 | /* |
@@ -117,15 +111,15 @@ struct inet_timewait_sock { | |||
117 | #define tw_hash __tw_common.skc_hash | 111 | #define tw_hash __tw_common.skc_hash |
118 | #define tw_prot __tw_common.skc_prot | 112 | #define tw_prot __tw_common.skc_prot |
119 | #define tw_net __tw_common.skc_net | 113 | #define tw_net __tw_common.skc_net |
114 | #define tw_daddr __tw_common.skc_daddr | ||
115 | #define tw_rcv_saddr __tw_common.skc_rcv_saddr | ||
120 | int tw_timeout; | 116 | int tw_timeout; |
121 | volatile unsigned char tw_substate; | 117 | volatile unsigned char tw_substate; |
122 | /* 3 bits hole, try to pack */ | ||
123 | unsigned char tw_rcv_wscale; | 118 | unsigned char tw_rcv_wscale; |
119 | |||
124 | /* Socket demultiplex comparisons on incoming packets. */ | 120 | /* Socket demultiplex comparisons on incoming packets. */ |
125 | /* these five are in inet_sock */ | 121 | /* these three are in inet_sock */ |
126 | __be16 tw_sport; | 122 | __be16 tw_sport; |
127 | __be32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); | ||
128 | __be32 tw_rcv_saddr; | ||
129 | __be16 tw_dport; | 123 | __be16 tw_dport; |
130 | __u16 tw_num; | 124 | __u16 tw_num; |
131 | kmemcheck_bitfield_begin(flags); | 125 | kmemcheck_bitfield_begin(flags); |
@@ -191,10 +185,10 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) | |||
191 | return (struct inet_timewait_sock *)sk; | 185 | return (struct inet_timewait_sock *)sk; |
192 | } | 186 | } |
193 | 187 | ||
194 | static inline __be32 inet_rcv_saddr(const struct sock *sk) | 188 | static inline __be32 sk_rcv_saddr(const struct sock *sk) |
195 | { | 189 | { |
196 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | 190 | /* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */ |
197 | inet_sk(sk)->inet_rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; | 191 | return sk->__sk_common.skc_rcv_saddr; |
198 | } | 192 | } |
199 | 193 | ||
200 | extern void inet_twsk_put(struct inet_timewait_sock *tw); | 194 | extern void inet_twsk_put(struct inet_timewait_sock *tw); |
diff --git a/include/net/sock.h b/include/net/sock.h index 3482004e5c29..82e86034702f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -105,10 +105,8 @@ struct net; | |||
105 | 105 | ||
106 | /** | 106 | /** |
107 | * struct sock_common - minimal network layer representation of sockets | 107 | * struct sock_common - minimal network layer representation of sockets |
108 | * @skc_node: main hash linkage for various protocol lookup tables | 108 | * @skc_daddr: Foreign IPv4 addr |
109 | * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol | 109 | * @skc_rcv_saddr: Bound local IPv4 addr |
110 | * @skc_refcnt: reference count | ||
111 | * @skc_tx_queue_mapping: tx queue number for this connection | ||
112 | * @skc_hash: hash value used with various protocol lookup tables | 110 | * @skc_hash: hash value used with various protocol lookup tables |
113 | * @skc_u16hashes: two u16 hash values used by UDP lookup tables | 111 | * @skc_u16hashes: two u16 hash values used by UDP lookup tables |
114 | * @skc_family: network address family | 112 | * @skc_family: network address family |
@@ -119,20 +117,20 @@ struct net; | |||
119 | * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol | 117 | * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol |
120 | * @skc_prot: protocol handlers inside a network family | 118 | * @skc_prot: protocol handlers inside a network family |
121 | * @skc_net: reference to the network namespace of this socket | 119 | * @skc_net: reference to the network namespace of this socket |
120 | * @skc_node: main hash linkage for various protocol lookup tables | ||
121 | * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol | ||
122 | * @skc_tx_queue_mapping: tx queue number for this connection | ||
123 | * @skc_refcnt: reference count | ||
122 | * | 124 | * |
123 | * This is the minimal network layer representation of sockets, the header | 125 | * This is the minimal network layer representation of sockets, the header |
124 | * for struct sock and struct inet_timewait_sock. | 126 | * for struct sock and struct inet_timewait_sock. |
125 | */ | 127 | */ |
126 | struct sock_common { | 128 | struct sock_common { |
127 | /* | 129 | /* skc_daddr and skc_rcv_saddr must be grouped : |
128 | * first fields are not copied in sock_copy() | 130 | * cf INET_MATCH() and INET_TW_MATCH() |
129 | */ | 131 | */ |
130 | union { | 132 | __be32 skc_daddr; |
131 | struct hlist_node skc_node; | 133 | __be32 skc_rcv_saddr; |
132 | struct hlist_nulls_node skc_nulls_node; | ||
133 | }; | ||
134 | atomic_t skc_refcnt; | ||
135 | int skc_tx_queue_mapping; | ||
136 | 134 | ||
137 | union { | 135 | union { |
138 | unsigned int skc_hash; | 136 | unsigned int skc_hash; |
@@ -150,6 +148,18 @@ struct sock_common { | |||
150 | #ifdef CONFIG_NET_NS | 148 | #ifdef CONFIG_NET_NS |
151 | struct net *skc_net; | 149 | struct net *skc_net; |
152 | #endif | 150 | #endif |
151 | /* | ||
152 | * fields between dontcopy_begin/dontcopy_end | ||
153 | * are not copied in sock_copy() | ||
154 | */ | ||
155 | int skc_dontcopy_begin[0]; | ||
156 | union { | ||
157 | struct hlist_node skc_node; | ||
158 | struct hlist_nulls_node skc_nulls_node; | ||
159 | }; | ||
160 | int skc_tx_queue_mapping; | ||
161 | atomic_t skc_refcnt; | ||
162 | int skc_dontcopy_end[0]; | ||
153 | }; | 163 | }; |
154 | 164 | ||
155 | /** | 165 | /** |
@@ -232,7 +242,8 @@ struct sock { | |||
232 | #define sk_refcnt __sk_common.skc_refcnt | 242 | #define sk_refcnt __sk_common.skc_refcnt |
233 | #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping | 243 | #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping |
234 | 244 | ||
235 | #define sk_copy_start __sk_common.skc_hash | 245 | #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin |
246 | #define sk_dontcopy_end __sk_common.skc_dontcopy_end | ||
236 | #define sk_hash __sk_common.skc_hash | 247 | #define sk_hash __sk_common.skc_hash |
237 | #define sk_family __sk_common.skc_family | 248 | #define sk_family __sk_common.skc_family |
238 | #define sk_state __sk_common.skc_state | 249 | #define sk_state __sk_common.skc_state |
diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..bcdb6ff6e621 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk) | |||
992 | /* | 992 | /* |
993 | * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, | 993 | * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, |
994 | * even temporarly, because of RCU lookups. sk_node should also be left as is. | 994 | * even temporarly, because of RCU lookups. sk_node should also be left as is. |
995 | * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end | ||
995 | */ | 996 | */ |
996 | static void sock_copy(struct sock *nsk, const struct sock *osk) | 997 | static void sock_copy(struct sock *nsk, const struct sock *osk) |
997 | { | 998 | { |
998 | #ifdef CONFIG_SECURITY_NETWORK | 999 | #ifdef CONFIG_SECURITY_NETWORK |
999 | void *sptr = nsk->sk_security; | 1000 | void *sptr = nsk->sk_security; |
1000 | #endif | 1001 | #endif |
1001 | BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != | 1002 | memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); |
1002 | sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + | 1003 | |
1003 | sizeof(osk->sk_tx_queue_mapping)); | 1004 | memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, |
1004 | memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, | 1005 | osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); |
1005 | osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); | 1006 | |
1006 | #ifdef CONFIG_SECURITY_NETWORK | 1007 | #ifdef CONFIG_SECURITY_NETWORK |
1007 | nsk->sk_security = sptr; | 1008 | nsk->sk_security = sptr; |
1008 | security_sk_clone(osk, nsk); | 1009 | security_sk_clone(osk, nsk); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 06f5f8f482f0..25e318153f14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range); | |||
55 | int inet_csk_bind_conflict(const struct sock *sk, | 55 | int inet_csk_bind_conflict(const struct sock *sk, |
56 | const struct inet_bind_bucket *tb) | 56 | const struct inet_bind_bucket *tb) |
57 | { | 57 | { |
58 | const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
59 | struct sock *sk2; | 58 | struct sock *sk2; |
60 | struct hlist_node *node; | 59 | struct hlist_node *node; |
61 | int reuse = sk->sk_reuse; | 60 | int reuse = sk->sk_reuse; |
@@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
75 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | 74 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { |
76 | if (!reuse || !sk2->sk_reuse || | 75 | if (!reuse || !sk2->sk_reuse || |
77 | sk2->sk_state == TCP_LISTEN) { | 76 | sk2->sk_state == TCP_LISTEN) { |
78 | const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | 77 | const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); |
79 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | 78 | if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || |
80 | sk2_rcv_saddr == sk_rcv_saddr) | 79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) |
81 | break; | 80 | break; |
82 | } | 81 | } |
83 | } | 82 | } |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b541a4e009fb..7aad12770867 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) | |||
54 | { | 54 | { |
55 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; | 55 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; |
56 | const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); | 56 | const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); |
57 | __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; | 57 | __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); |
58 | __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | 58 | __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); |
59 | int sk_ipv6only = ipv6_only_sock(sk); | 59 | int sk_ipv6only = ipv6_only_sock(sk); |
60 | int sk2_ipv6only = inet_v6_ipv6only(sk2); | 60 | int sk2_ipv6only = inet_v6_ipv6only(sk2); |
61 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); | 61 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); |