aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-11-09 18:07:57 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-11-09 18:07:57 -0500
commitc5b875e354a54e2b5ba24eecae69bf94e025edd5 (patch)
tree0446a68d99ad50305ab78835456d9faa62be5948 /include/net
parenteae1920a21b4f87e89cea802e7df39442b119617 (diff)
parentc3d8d1e30cace31fed6186a4b8c6b1401836d89c (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (44 commits) [NETLINK]: Fix unicast timeouts [INET]: Remove per bucket rwlock in tcp/dccp ehash table. [IPVS]: Synchronize closing of Connections [IPVS]: Bind connections on stanby if the destination exists [NET]: Remove Documentation/networking/pt.txt [NET]: Remove Documentation/networking/routing.txt [NET]: Remove Documentation/networking/ncsa-telnet [NET]: Remove comx driver docs. [NET]: Remove Documentation/networking/Configurable [NET]: Clean proto_(un)register from in-code ifdefs [IPSEC]: Fix crypto_alloc_comp error checking [VLAN]: Fix SET_VLAN_INGRESS_PRIORITY_CMD ioctl [NETNS]: Fix compiler error in net_namespace.c [TTY]: Use tty_mode_ioctl() in network drivers. [TTY]: Fix network driver interactions with TCGET/SET calls. [PKT_SCHED] CLS_U32: Fix endianness problem with u32 classifier hash masks. [NET]: Removing duplicit #includes [NET]: Let USB_USBNET always select MII. [RRUNNER]: Do not muck with sysctl_{r,w}mem_max [DLM] lowcomms: Do not muck with sysctl_rmem_max. ...
Diffstat (limited to 'include/net')
-rw-r--r--include/net/inet_hashtables.h71
-rw-r--r--include/net/ip_fib.h15
-rw-r--r--include/net/ip_vs.h8
-rw-r--r--include/net/sock.h63
4 files changed, 136 insertions, 21 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 4427dcd1e53a..8461cda37490 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -37,7 +37,6 @@
37 * I'll experiment with dynamic table growth later. 37 * I'll experiment with dynamic table growth later.
38 */ 38 */
39struct inet_ehash_bucket { 39struct inet_ehash_bucket {
40 rwlock_t lock;
41 struct hlist_head chain; 40 struct hlist_head chain;
42 struct hlist_head twchain; 41 struct hlist_head twchain;
43}; 42};
@@ -100,6 +99,9 @@ struct inet_hashinfo {
100 * TIME_WAIT sockets use a separate chain (twchain). 99 * TIME_WAIT sockets use a separate chain (twchain).
101 */ 100 */
102 struct inet_ehash_bucket *ehash; 101 struct inet_ehash_bucket *ehash;
102 rwlock_t *ehash_locks;
103 unsigned int ehash_size;
104 unsigned int ehash_locks_mask;
103 105
104 /* Ok, let's try this, I give up, we do need a local binding 106 /* Ok, let's try this, I give up, we do need a local binding
105 * TCP hash as well as the others for fast bind/connect. 107 * TCP hash as well as the others for fast bind/connect.
@@ -107,7 +109,7 @@ struct inet_hashinfo {
107 struct inet_bind_hashbucket *bhash; 109 struct inet_bind_hashbucket *bhash;
108 110
109 unsigned int bhash_size; 111 unsigned int bhash_size;
110 unsigned int ehash_size; 112 /* Note : 4 bytes padding on 64 bit arches */
111 113
112 /* All sockets in TCP_LISTEN state will be in here. This is the only 114 /* All sockets in TCP_LISTEN state will be in here. This is the only
113 * table where wildcard'd TCP sockets can exist. Hash function here 115 * table where wildcard'd TCP sockets can exist. Hash function here
@@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
134 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; 136 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
135} 137}
136 138
139static inline rwlock_t *inet_ehash_lockp(
140 struct inet_hashinfo *hashinfo,
141 unsigned int hash)
142{
143 return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
144}
145
146static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
147{
148 unsigned int i, size = 256;
149#if defined(CONFIG_PROVE_LOCKING)
150 unsigned int nr_pcpus = 2;
151#else
152 unsigned int nr_pcpus = num_possible_cpus();
153#endif
154 if (nr_pcpus >= 4)
155 size = 512;
156 if (nr_pcpus >= 8)
157 size = 1024;
158 if (nr_pcpus >= 16)
159 size = 2048;
160 if (nr_pcpus >= 32)
161 size = 4096;
162 if (sizeof(rwlock_t) != 0) {
163#ifdef CONFIG_NUMA
164 if (size * sizeof(rwlock_t) > PAGE_SIZE)
165 hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
166 else
167#endif
168 hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
169 GFP_KERNEL);
170 if (!hashinfo->ehash_locks)
171 return ENOMEM;
172 for (i = 0; i < size; i++)
173 rwlock_init(&hashinfo->ehash_locks[i]);
174 }
175 hashinfo->ehash_locks_mask = size - 1;
176 return 0;
177}
178
179static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
180{
181 if (hashinfo->ehash_locks) {
182#ifdef CONFIG_NUMA
183 unsigned int size = (hashinfo->ehash_locks_mask + 1) *
184 sizeof(rwlock_t);
185 if (size > PAGE_SIZE)
186 vfree(hashinfo->ehash_locks);
187 else
188#else
189 kfree(hashinfo->ehash_locks);
190#endif
191 hashinfo->ehash_locks = NULL;
192 }
193}
194
137extern struct inet_bind_bucket * 195extern struct inet_bind_bucket *
138 inet_bind_bucket_create(struct kmem_cache *cachep, 196 inet_bind_bucket_create(struct kmem_cache *cachep,
139 struct inet_bind_hashbucket *head, 197 struct inet_bind_hashbucket *head,
@@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo,
222 sk->sk_hash = inet_sk_ehashfn(sk); 280 sk->sk_hash = inet_sk_ehashfn(sk);
223 head = inet_ehash_bucket(hashinfo, sk->sk_hash); 281 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
224 list = &head->chain; 282 list = &head->chain;
225 lock = &head->lock; 283 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
226 write_lock(lock); 284 write_lock(lock);
227 } 285 }
228 __sk_add_node(sk, list); 286 __sk_add_node(sk, list);
@@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
253 inet_listen_wlock(hashinfo); 311 inet_listen_wlock(hashinfo);
254 lock = &hashinfo->lhash_lock; 312 lock = &hashinfo->lhash_lock;
255 } else { 313 } else {
256 lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; 314 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
257 write_lock_bh(lock); 315 write_lock_bh(lock);
258 } 316 }
259 317
@@ -354,9 +412,10 @@ static inline struct sock *
354 */ 412 */
355 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); 413 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
356 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 414 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
415 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
357 416
358 prefetch(head->chain.first); 417 prefetch(head->chain.first);
359 read_lock(&head->lock); 418 read_lock(lock);
360 sk_for_each(sk, node, &head->chain) { 419 sk_for_each(sk, node, &head->chain) {
361 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 420 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
362 goto hit; /* You sunk my battleship! */ 421 goto hit; /* You sunk my battleship! */
@@ -369,7 +428,7 @@ static inline struct sock *
369 } 428 }
370 sk = NULL; 429 sk = NULL;
371out: 430out:
372 read_unlock(&head->lock); 431 read_unlock(lock);
373 return sk; 432 return sk;
374hit: 433hit:
375 sock_hold(sk); 434 sock_hold(sk);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 8cadc77c7df4..ed514bfb61ba 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -185,6 +185,12 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result
185} 185}
186 186
187#else /* CONFIG_IP_MULTIPLE_TABLES */ 187#else /* CONFIG_IP_MULTIPLE_TABLES */
188extern void __init fib4_rules_init(void);
189
190#ifdef CONFIG_NET_CLS_ROUTE
191extern u32 fib_rules_tclass(struct fib_result *res);
192#endif
193
188#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) 194#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
189#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) 195#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
190 196
@@ -214,15 +220,6 @@ extern __be32 __fib_res_prefsrc(struct fib_result *res);
214/* Exported by fib_hash.c */ 220/* Exported by fib_hash.c */
215extern struct fib_table *fib_hash_init(u32 id); 221extern struct fib_table *fib_hash_init(u32 id);
216 222
217#ifdef CONFIG_IP_MULTIPLE_TABLES
218extern void __init fib4_rules_init(void);
219
220#ifdef CONFIG_NET_CLS_ROUTE
221extern u32 fib_rules_tclass(struct fib_result *res);
222#endif
223
224#endif
225
226static inline void fib_combine_itag(u32 *itag, struct fib_result *res) 223static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
227{ 224{
228#ifdef CONFIG_NET_CLS_ROUTE 225#ifdef CONFIG_NET_CLS_ROUTE
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 41870564df8e..67ea2c0c0ab7 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -520,6 +520,10 @@ struct ip_vs_conn {
520 spinlock_t lock; /* lock for state transition */ 520 spinlock_t lock; /* lock for state transition */
521 volatile __u16 flags; /* status flags */ 521 volatile __u16 flags; /* status flags */
522 volatile __u16 state; /* state info */ 522 volatile __u16 state; /* state info */
523 volatile __u16 old_state; /* old state, to be used for
524 * state transition triggerd
525 * synchronization
526 */
523 527
524 /* Control members */ 528 /* Control members */
525 struct ip_vs_conn *control; /* Master control connection */ 529 struct ip_vs_conn *control; /* Master control connection */
@@ -901,6 +905,10 @@ extern int ip_vs_use_count_inc(void);
901extern void ip_vs_use_count_dec(void); 905extern void ip_vs_use_count_dec(void);
902extern int ip_vs_control_init(void); 906extern int ip_vs_control_init(void);
903extern void ip_vs_control_cleanup(void); 907extern void ip_vs_control_cleanup(void);
908extern struct ip_vs_dest *
909ip_vs_find_dest(__be32 daddr, __be16 dport,
910 __be32 vaddr, __be16 vport, __u16 protocol);
911extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
904 912
905 913
906/* 914/*
diff --git a/include/net/sock.h b/include/net/sock.h
index 20de3fa7ae40..5504fb9fa88a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -560,6 +560,14 @@ struct proto {
560 void (*unhash)(struct sock *sk); 560 void (*unhash)(struct sock *sk);
561 int (*get_port)(struct sock *sk, unsigned short snum); 561 int (*get_port)(struct sock *sk, unsigned short snum);
562 562
563#ifdef CONFIG_SMP
564 /* Keeping track of sockets in use */
565 void (*inuse_add)(struct proto *prot, int inc);
566 int (*inuse_getval)(const struct proto *prot);
567 int *inuse_ptr;
568#else
569 int inuse;
570#endif
563 /* Memory pressure */ 571 /* Memory pressure */
564 void (*enter_memory_pressure)(void); 572 void (*enter_memory_pressure)(void);
565 atomic_t *memory_allocated; /* Current allocated memory. */ 573 atomic_t *memory_allocated; /* Current allocated memory. */
@@ -592,12 +600,38 @@ struct proto {
592#ifdef SOCK_REFCNT_DEBUG 600#ifdef SOCK_REFCNT_DEBUG
593 atomic_t socks; 601 atomic_t socks;
594#endif 602#endif
595 struct {
596 int inuse;
597 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
598 } stats[NR_CPUS];
599}; 603};
600 604
605/*
606 * Special macros to let protos use a fast version of inuse{get|add}
607 * using a static percpu variable per proto instead of an allocated one,
608 * saving one dereference.
609 * This might be changed if/when dynamic percpu vars become fast.
610 */
611#ifdef CONFIG_SMP
612# define DEFINE_PROTO_INUSE(NAME) \
613static DEFINE_PER_CPU(int, NAME##_inuse); \
614static void NAME##_inuse_add(struct proto *prot, int inc) \
615{ \
616 __get_cpu_var(NAME##_inuse) += inc; \
617} \
618 \
619static int NAME##_inuse_getval(const struct proto *prot)\
620{ \
621 int res = 0, cpu; \
622 \
623 for_each_possible_cpu(cpu) \
624 res += per_cpu(NAME##_inuse, cpu); \
625 return res; \
626}
627# define REF_PROTO_INUSE(NAME) \
628 .inuse_add = NAME##_inuse_add, \
629 .inuse_getval = NAME##_inuse_getval,
630#else
631# define DEFINE_PROTO_INUSE(NAME)
632# define REF_PROTO_INUSE(NAME)
633#endif
634
601extern int proto_register(struct proto *prot, int alloc_slab); 635extern int proto_register(struct proto *prot, int alloc_slab);
602extern void proto_unregister(struct proto *prot); 636extern void proto_unregister(struct proto *prot);
603 637
@@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
629/* Called with local bh disabled */ 663/* Called with local bh disabled */
630static __inline__ void sock_prot_inc_use(struct proto *prot) 664static __inline__ void sock_prot_inc_use(struct proto *prot)
631{ 665{
632 prot->stats[smp_processor_id()].inuse++; 666#ifdef CONFIG_SMP
667 prot->inuse_add(prot, 1);
668#else
669 prot->inuse++;
670#endif
633} 671}
634 672
635static __inline__ void sock_prot_dec_use(struct proto *prot) 673static __inline__ void sock_prot_dec_use(struct proto *prot)
636{ 674{
637 prot->stats[smp_processor_id()].inuse--; 675#ifdef CONFIG_SMP
676 prot->inuse_add(prot, -1);
677#else
678 prot->inuse--;
679#endif
680}
681
682static __inline__ int sock_prot_inuse(struct proto *proto)
683{
684#ifdef CONFIG_SMP
685 return proto->inuse_getval(proto);
686#else
687 return proto->inuse;
688#endif
638} 689}
639 690
640/* With per-bucket locks this operation is not-atomic, so that 691/* With per-bucket locks this operation is not-atomic, so that