diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2005-10-03 17:13:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-10-03 17:13:38 -0400 |
commit | 81c3d5470ecc70564eb9209946730fe2be93ad06 (patch) | |
tree | 1efa553c305a6453769dacfaf580bc6ccf146d82 /include/net/sock.h | |
parent | 399de50bbbb2501a6db43daaa8a2dafbc9bcfe0c (diff) |
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 8c48fbecb7cf..b6440805c420 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -99,6 +99,7 @@ struct proto; | |||
99 | * @skc_node: main hash linkage for various protocol lookup tables | 99 | * @skc_node: main hash linkage for various protocol lookup tables |
100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
101 | * @skc_refcnt: reference count | 101 | * @skc_refcnt: reference count |
102 | * @skc_hash: hash value used with various protocol lookup tables | ||
102 | * @skc_prot: protocol handlers inside a network family | 103 | * @skc_prot: protocol handlers inside a network family |
103 | * | 104 | * |
104 | * This is the minimal network layer representation of sockets, the header | 105 | * This is the minimal network layer representation of sockets, the header |
@@ -112,6 +113,7 @@ struct sock_common { | |||
112 | struct hlist_node skc_node; | 113 | struct hlist_node skc_node; |
113 | struct hlist_node skc_bind_node; | 114 | struct hlist_node skc_bind_node; |
114 | atomic_t skc_refcnt; | 115 | atomic_t skc_refcnt; |
116 | unsigned int skc_hash; | ||
115 | struct proto *skc_prot; | 117 | struct proto *skc_prot; |
116 | }; | 118 | }; |
117 | 119 | ||
@@ -139,7 +141,6 @@ struct sock_common { | |||
139 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 141 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
140 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 142 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
141 | * @sk_lingertime: %SO_LINGER l_linger setting | 143 | * @sk_lingertime: %SO_LINGER l_linger setting |
142 | * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) | ||
143 | * @sk_backlog: always used with the per-socket spinlock held | 144 | * @sk_backlog: always used with the per-socket spinlock held |
144 | * @sk_callback_lock: used with the callbacks in the end of this struct | 145 | * @sk_callback_lock: used with the callbacks in the end of this struct |
145 | * @sk_error_queue: rarely used | 146 | * @sk_error_queue: rarely used |
@@ -186,6 +187,7 @@ struct sock { | |||
186 | #define sk_node __sk_common.skc_node | 187 | #define sk_node __sk_common.skc_node |
187 | #define sk_bind_node __sk_common.skc_bind_node | 188 | #define sk_bind_node __sk_common.skc_bind_node |
188 | #define sk_refcnt __sk_common.skc_refcnt | 189 | #define sk_refcnt __sk_common.skc_refcnt |
190 | #define sk_hash __sk_common.skc_hash | ||
189 | #define sk_prot __sk_common.skc_prot | 191 | #define sk_prot __sk_common.skc_prot |
190 | unsigned char sk_shutdown : 2, | 192 | unsigned char sk_shutdown : 2, |
191 | sk_no_check : 2, | 193 | sk_no_check : 2, |
@@ -208,7 +210,6 @@ struct sock { | |||
208 | unsigned int sk_allocation; | 210 | unsigned int sk_allocation; |
209 | int sk_sndbuf; | 211 | int sk_sndbuf; |
210 | int sk_route_caps; | 212 | int sk_route_caps; |
211 | int sk_hashent; | ||
212 | unsigned long sk_flags; | 213 | unsigned long sk_flags; |
213 | unsigned long sk_lingertime; | 214 | unsigned long sk_lingertime; |
214 | /* | 215 | /* |