diff options
Diffstat (limited to 'include/net/inet_hashtables.h')
-rw-r--r-- | include/net/inet_hashtables.h | 85 |
1 files changed, 42 insertions, 43 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5cc182f9ecae..f44bb5c77a70 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h | |||
@@ -41,8 +41,8 @@ | |||
41 | * I'll experiment with dynamic table growth later. | 41 | * I'll experiment with dynamic table growth later. |
42 | */ | 42 | */ |
43 | struct inet_ehash_bucket { | 43 | struct inet_ehash_bucket { |
44 | struct hlist_head chain; | 44 | struct hlist_nulls_head chain; |
45 | struct hlist_head twchain; | 45 | struct hlist_nulls_head twchain; |
46 | }; | 46 | }; |
47 | 47 | ||
48 | /* There are a few simple rules, which allow for local port reuse by | 48 | /* There are a few simple rules, which allow for local port reuse by |
@@ -77,13 +77,20 @@ struct inet_ehash_bucket { | |||
77 | * ports are created in O(1) time? I thought so. ;-) -DaveM | 77 | * ports are created in O(1) time? I thought so. ;-) -DaveM |
78 | */ | 78 | */ |
79 | struct inet_bind_bucket { | 79 | struct inet_bind_bucket { |
80 | #ifdef CONFIG_NET_NS | ||
80 | struct net *ib_net; | 81 | struct net *ib_net; |
82 | #endif | ||
81 | unsigned short port; | 83 | unsigned short port; |
82 | signed short fastreuse; | 84 | signed short fastreuse; |
83 | struct hlist_node node; | 85 | struct hlist_node node; |
84 | struct hlist_head owners; | 86 | struct hlist_head owners; |
85 | }; | 87 | }; |
86 | 88 | ||
89 | static inline struct net *ib_net(struct inet_bind_bucket *ib) | ||
90 | { | ||
91 | return read_pnet(&ib->ib_net); | ||
92 | } | ||
93 | |||
87 | #define inet_bind_bucket_for_each(tb, node, head) \ | 94 | #define inet_bind_bucket_for_each(tb, node, head) \ |
88 | hlist_for_each_entry(tb, node, head, node) | 95 | hlist_for_each_entry(tb, node, head, node) |
89 | 96 | ||
@@ -92,6 +99,18 @@ struct inet_bind_hashbucket { | |||
92 | struct hlist_head chain; | 99 | struct hlist_head chain; |
93 | }; | 100 | }; |
94 | 101 | ||
102 | /* | ||
103 | * Sockets can be hashed in established or listening table | ||
104 | * We must use different 'nulls' end-of-chain value for listening | ||
105 | * hash table, or we might find a socket that was closed and | ||
106 | * reallocated/inserted into established hash table | ||
107 | */ | ||
108 | #define LISTENING_NULLS_BASE (1U << 29) | ||
109 | struct inet_listen_hashbucket { | ||
110 | spinlock_t lock; | ||
111 | struct hlist_nulls_head head; | ||
112 | }; | ||
113 | |||
95 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | 114 | /* This is for listening sockets, thus all sockets which possess wildcards. */ |
96 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | 115 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ |
97 | 116 | ||
@@ -104,7 +123,7 @@ struct inet_hashinfo { | |||
104 | * TIME_WAIT sockets use a separate chain (twchain). | 123 | * TIME_WAIT sockets use a separate chain (twchain). |
105 | */ | 124 | */ |
106 | struct inet_ehash_bucket *ehash; | 125 | struct inet_ehash_bucket *ehash; |
107 | rwlock_t *ehash_locks; | 126 | spinlock_t *ehash_locks; |
108 | unsigned int ehash_size; | 127 | unsigned int ehash_size; |
109 | unsigned int ehash_locks_mask; | 128 | unsigned int ehash_locks_mask; |
110 | 129 | ||
@@ -116,22 +135,21 @@ struct inet_hashinfo { | |||
116 | unsigned int bhash_size; | 135 | unsigned int bhash_size; |
117 | /* Note : 4 bytes padding on 64 bit arches */ | 136 | /* Note : 4 bytes padding on 64 bit arches */ |
118 | 137 | ||
119 | /* All sockets in TCP_LISTEN state will be in here. This is the only | 138 | struct kmem_cache *bind_bucket_cachep; |
120 | * table where wildcard'd TCP sockets can exist. Hash function here | ||
121 | * is just local port number. | ||
122 | */ | ||
123 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; | ||
124 | 139 | ||
125 | /* All the above members are written once at bootup and | 140 | /* All the above members are written once at bootup and |
126 | * never written again _or_ are predominantly read-access. | 141 | * never written again _or_ are predominantly read-access. |
127 | * | 142 | * |
128 | * Now align to a new cache line as all the following members | 143 | * Now align to a new cache line as all the following members |
129 | * are often dirty. | 144 | * might be often dirty. |
130 | */ | 145 | */ |
131 | rwlock_t lhash_lock ____cacheline_aligned; | 146 | /* All sockets in TCP_LISTEN state will be in here. This is the only |
132 | atomic_t lhash_users; | 147 | * table where wildcard'd TCP sockets can exist. Hash function here |
133 | wait_queue_head_t lhash_wait; | 148 | * is just local port number. |
134 | struct kmem_cache *bind_bucket_cachep; | 149 | */ |
150 | struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] | ||
151 | ____cacheline_aligned_in_smp; | ||
152 | |||
135 | }; | 153 | }; |
136 | 154 | ||
137 | static inline struct inet_ehash_bucket *inet_ehash_bucket( | 155 | static inline struct inet_ehash_bucket *inet_ehash_bucket( |
@@ -141,7 +159,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( | |||
141 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; | 159 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; |
142 | } | 160 | } |
143 | 161 | ||
144 | static inline rwlock_t *inet_ehash_lockp( | 162 | static inline spinlock_t *inet_ehash_lockp( |
145 | struct inet_hashinfo *hashinfo, | 163 | struct inet_hashinfo *hashinfo, |
146 | unsigned int hash) | 164 | unsigned int hash) |
147 | { | 165 | { |
@@ -166,16 +184,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) | |||
166 | size = 4096; | 184 | size = 4096; |
167 | if (sizeof(rwlock_t) != 0) { | 185 | if (sizeof(rwlock_t) != 0) { |
168 | #ifdef CONFIG_NUMA | 186 | #ifdef CONFIG_NUMA |
169 | if (size * sizeof(rwlock_t) > PAGE_SIZE) | 187 | if (size * sizeof(spinlock_t) > PAGE_SIZE) |
170 | hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); | 188 | hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); |
171 | else | 189 | else |
172 | #endif | 190 | #endif |
173 | hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), | 191 | hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), |
174 | GFP_KERNEL); | 192 | GFP_KERNEL); |
175 | if (!hashinfo->ehash_locks) | 193 | if (!hashinfo->ehash_locks) |
176 | return ENOMEM; | 194 | return ENOMEM; |
177 | for (i = 0; i < size; i++) | 195 | for (i = 0; i < size; i++) |
178 | rwlock_init(&hashinfo->ehash_locks[i]); | 196 | spin_lock_init(&hashinfo->ehash_locks[i]); |
179 | } | 197 | } |
180 | hashinfo->ehash_locks_mask = size - 1; | 198 | hashinfo->ehash_locks_mask = size - 1; |
181 | return 0; | 199 | return 0; |
@@ -186,7 +204,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) | |||
186 | if (hashinfo->ehash_locks) { | 204 | if (hashinfo->ehash_locks) { |
187 | #ifdef CONFIG_NUMA | 205 | #ifdef CONFIG_NUMA |
188 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * | 206 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * |
189 | sizeof(rwlock_t); | 207 | sizeof(spinlock_t); |
190 | if (size > PAGE_SIZE) | 208 | if (size > PAGE_SIZE) |
191 | vfree(hashinfo->ehash_locks); | 209 | vfree(hashinfo->ehash_locks); |
192 | else | 210 | else |
@@ -229,26 +247,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child); | |||
229 | 247 | ||
230 | extern void inet_put_port(struct sock *sk); | 248 | extern void inet_put_port(struct sock *sk); |
231 | 249 | ||
232 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | 250 | void inet_hashinfo_init(struct inet_hashinfo *h); |
233 | |||
234 | /* | ||
235 | * - We may sleep inside this lock. | ||
236 | * - If sleeping is not required (or called from BH), | ||
237 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | ||
238 | */ | ||
239 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | ||
240 | { | ||
241 | /* read_lock synchronizes to candidates to writers */ | ||
242 | read_lock(&hashinfo->lhash_lock); | ||
243 | atomic_inc(&hashinfo->lhash_users); | ||
244 | read_unlock(&hashinfo->lhash_lock); | ||
245 | } | ||
246 | |||
247 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | ||
248 | { | ||
249 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | ||
250 | wake_up(&hashinfo->lhash_wait); | ||
251 | } | ||
252 | 251 | ||
253 | extern void __inet_hash_nolisten(struct sock *sk); | 252 | extern void __inet_hash_nolisten(struct sock *sk); |
254 | extern void inet_hash(struct sock *sk); | 253 | extern void inet_hash(struct sock *sk); |
@@ -299,25 +298,25 @@ typedef __u64 __bitwise __addrpair; | |||
299 | ((__force __u64)(__be32)(__saddr))); | 298 | ((__force __u64)(__be32)(__saddr))); |
300 | #endif /* __BIG_ENDIAN */ | 299 | #endif /* __BIG_ENDIAN */ |
301 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 300 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
302 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 301 | (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ |
303 | ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | 302 | ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ |
304 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 303 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
305 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 304 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
306 | #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 305 | #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
307 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 306 | (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ |
308 | ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | 307 | ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ |
309 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 308 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |
310 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 309 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
311 | #else /* 32-bit arch */ | 310 | #else /* 32-bit arch */ |
312 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | 311 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) |
313 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ | 312 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ |
314 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 313 | (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ |
315 | (inet_sk(__sk)->daddr == (__saddr)) && \ | 314 | (inet_sk(__sk)->daddr == (__saddr)) && \ |
316 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | 315 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ |
317 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 316 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
318 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 317 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
319 | #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ | 318 | #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ |
320 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 319 | (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ |
321 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | 320 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ |
322 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | 321 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ |
323 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 322 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |