aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/inet_hashtables.h
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-12-29 16:32:35 -0500
committerRusty Russell <rusty@rustcorp.com.au>2008-12-29 16:32:35 -0500
commit33edcf133ba93ecba2e4b6472e97b689895d805c (patch)
tree327d7a20acef64005e7c5ccbfa1265be28aeb6ac /include/net/inet_hashtables.h
parentbe4d638c1597580ed2294d899d9f1a2cd10e462c (diff)
parent3c92ec8ae91ecf59d88c798301833d7cf83f2179 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'include/net/inet_hashtables.h')
-rw-r--r--include/net/inet_hashtables.h85
1 files changed, 42 insertions, 43 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5cc182f9ecae..f44bb5c77a70 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -41,8 +41,8 @@
41 * I'll experiment with dynamic table growth later. 41 * I'll experiment with dynamic table growth later.
42 */ 42 */
43struct inet_ehash_bucket { 43struct inet_ehash_bucket {
44 struct hlist_head chain; 44 struct hlist_nulls_head chain;
45 struct hlist_head twchain; 45 struct hlist_nulls_head twchain;
46}; 46};
47 47
48/* There are a few simple rules, which allow for local port reuse by 48/* There are a few simple rules, which allow for local port reuse by
@@ -77,13 +77,20 @@ struct inet_ehash_bucket {
77 * ports are created in O(1) time? I thought so. ;-) -DaveM 77 * ports are created in O(1) time? I thought so. ;-) -DaveM
78 */ 78 */
79struct inet_bind_bucket { 79struct inet_bind_bucket {
80#ifdef CONFIG_NET_NS
80 struct net *ib_net; 81 struct net *ib_net;
82#endif
81 unsigned short port; 83 unsigned short port;
82 signed short fastreuse; 84 signed short fastreuse;
83 struct hlist_node node; 85 struct hlist_node node;
84 struct hlist_head owners; 86 struct hlist_head owners;
85}; 87};
86 88
89static inline struct net *ib_net(struct inet_bind_bucket *ib)
90{
91 return read_pnet(&ib->ib_net);
92}
93
87#define inet_bind_bucket_for_each(tb, node, head) \ 94#define inet_bind_bucket_for_each(tb, node, head) \
88 hlist_for_each_entry(tb, node, head, node) 95 hlist_for_each_entry(tb, node, head, node)
89 96
@@ -92,6 +99,18 @@ struct inet_bind_hashbucket {
92 struct hlist_head chain; 99 struct hlist_head chain;
93}; 100};
94 101
102/*
103 * Sockets can be hashed in established or listening table
104 * We must use different 'nulls' end-of-chain value for listening
105 * hash table, or we might find a socket that was closed and
106 * reallocated/inserted into established hash table
107 */
108#define LISTENING_NULLS_BASE (1U << 29)
109struct inet_listen_hashbucket {
110 spinlock_t lock;
111 struct hlist_nulls_head head;
112};
113
95/* This is for listening sockets, thus all sockets which possess wildcards. */ 114/* This is for listening sockets, thus all sockets which possess wildcards. */
96#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ 115#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
97 116
@@ -104,7 +123,7 @@ struct inet_hashinfo {
104 * TIME_WAIT sockets use a separate chain (twchain). 123 * TIME_WAIT sockets use a separate chain (twchain).
105 */ 124 */
106 struct inet_ehash_bucket *ehash; 125 struct inet_ehash_bucket *ehash;
107 rwlock_t *ehash_locks; 126 spinlock_t *ehash_locks;
108 unsigned int ehash_size; 127 unsigned int ehash_size;
109 unsigned int ehash_locks_mask; 128 unsigned int ehash_locks_mask;
110 129
@@ -116,22 +135,21 @@ struct inet_hashinfo {
116 unsigned int bhash_size; 135 unsigned int bhash_size;
117 /* Note : 4 bytes padding on 64 bit arches */ 136 /* Note : 4 bytes padding on 64 bit arches */
118 137
119 /* All sockets in TCP_LISTEN state will be in here. This is the only 138 struct kmem_cache *bind_bucket_cachep;
120 * table where wildcard'd TCP sockets can exist. Hash function here
121 * is just local port number.
122 */
123 struct hlist_head listening_hash[INET_LHTABLE_SIZE];
124 139
125 /* All the above members are written once at bootup and 140 /* All the above members are written once at bootup and
126 * never written again _or_ are predominantly read-access. 141 * never written again _or_ are predominantly read-access.
127 * 142 *
128 * Now align to a new cache line as all the following members 143 * Now align to a new cache line as all the following members
129 * are often dirty. 144 * might be often dirty.
130 */ 145 */
131 rwlock_t lhash_lock ____cacheline_aligned; 146 /* All sockets in TCP_LISTEN state will be in here. This is the only
132 atomic_t lhash_users; 147 * table where wildcard'd TCP sockets can exist. Hash function here
133 wait_queue_head_t lhash_wait; 148 * is just local port number.
134 struct kmem_cache *bind_bucket_cachep; 149 */
150 struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
151 ____cacheline_aligned_in_smp;
152
135}; 153};
136 154
137static inline struct inet_ehash_bucket *inet_ehash_bucket( 155static inline struct inet_ehash_bucket *inet_ehash_bucket(
@@ -141,7 +159,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
141 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; 159 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
142} 160}
143 161
144static inline rwlock_t *inet_ehash_lockp( 162static inline spinlock_t *inet_ehash_lockp(
145 struct inet_hashinfo *hashinfo, 163 struct inet_hashinfo *hashinfo,
146 unsigned int hash) 164 unsigned int hash)
147{ 165{
@@ -166,16 +184,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
166 size = 4096; 184 size = 4096;
167 if (sizeof(rwlock_t) != 0) { 185 if (sizeof(rwlock_t) != 0) {
168#ifdef CONFIG_NUMA 186#ifdef CONFIG_NUMA
169 if (size * sizeof(rwlock_t) > PAGE_SIZE) 187 if (size * sizeof(spinlock_t) > PAGE_SIZE)
170 hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); 188 hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
171 else 189 else
172#endif 190#endif
173 hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), 191 hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
174 GFP_KERNEL); 192 GFP_KERNEL);
175 if (!hashinfo->ehash_locks) 193 if (!hashinfo->ehash_locks)
176 return ENOMEM; 194 return ENOMEM;
177 for (i = 0; i < size; i++) 195 for (i = 0; i < size; i++)
178 rwlock_init(&hashinfo->ehash_locks[i]); 196 spin_lock_init(&hashinfo->ehash_locks[i]);
179 } 197 }
180 hashinfo->ehash_locks_mask = size - 1; 198 hashinfo->ehash_locks_mask = size - 1;
181 return 0; 199 return 0;
@@ -186,7 +204,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
186 if (hashinfo->ehash_locks) { 204 if (hashinfo->ehash_locks) {
187#ifdef CONFIG_NUMA 205#ifdef CONFIG_NUMA
188 unsigned int size = (hashinfo->ehash_locks_mask + 1) * 206 unsigned int size = (hashinfo->ehash_locks_mask + 1) *
189 sizeof(rwlock_t); 207 sizeof(spinlock_t);
190 if (size > PAGE_SIZE) 208 if (size > PAGE_SIZE)
191 vfree(hashinfo->ehash_locks); 209 vfree(hashinfo->ehash_locks);
192 else 210 else
@@ -229,26 +247,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child);
229 247
230extern void inet_put_port(struct sock *sk); 248extern void inet_put_port(struct sock *sk);
231 249
232extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); 250void inet_hashinfo_init(struct inet_hashinfo *h);
233
234/*
235 * - We may sleep inside this lock.
236 * - If sleeping is not required (or called from BH),
237 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
238 */
239static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
240{
241 /* read_lock synchronizes to candidates to writers */
242 read_lock(&hashinfo->lhash_lock);
243 atomic_inc(&hashinfo->lhash_users);
244 read_unlock(&hashinfo->lhash_lock);
245}
246
247static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
248{
249 if (atomic_dec_and_test(&hashinfo->lhash_users))
250 wake_up(&hashinfo->lhash_wait);
251}
252 251
253extern void __inet_hash_nolisten(struct sock *sk); 252extern void __inet_hash_nolisten(struct sock *sk);
254extern void inet_hash(struct sock *sk); 253extern void inet_hash(struct sock *sk);
@@ -299,25 +298,25 @@ typedef __u64 __bitwise __addrpair;
299 ((__force __u64)(__be32)(__saddr))); 298 ((__force __u64)(__be32)(__saddr)));
300#endif /* __BIG_ENDIAN */ 299#endif /* __BIG_ENDIAN */
301#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ 300#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
302 (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ 301 (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
303 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ 302 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
304 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ 303 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \
305 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 304 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
306#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ 305#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
307 (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ 306 (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
308 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ 307 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
309 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ 308 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
310 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 309 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
311#else /* 32-bit arch */ 310#else /* 32-bit arch */
312#define INET_ADDR_COOKIE(__name, __saddr, __daddr) 311#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
313#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ 312#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
314 (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ 313 (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
315 (inet_sk(__sk)->daddr == (__saddr)) && \ 314 (inet_sk(__sk)->daddr == (__saddr)) && \
316 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ 315 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
317 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ 316 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \
318 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 317 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
319#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ 318#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
320 (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ 319 (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \
321 (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ 320 (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
322 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ 321 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
323 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ 322 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \