aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2007-11-06 02:38:39 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 07:08:57 -0500
commit286ab3d46058840d68e5d7d52e316c1f7e98c59f (patch)
tree1d70e7895c49d2b148e026aa047efe186697fff9
parent91781004b9c029ee55b7aa9ef950a373ba865dc6 (diff)
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
"struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sock.h63
-rw-r--r--net/core/sock.c48
-rw-r--r--net/ipv4/proc.c19
-rw-r--r--net/ipv6/proc.c19
4 files changed, 112 insertions, 37 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 20de3fa7ae40..5504fb9fa88a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -560,6 +560,14 @@ struct proto {
560 void (*unhash)(struct sock *sk); 560 void (*unhash)(struct sock *sk);
561 int (*get_port)(struct sock *sk, unsigned short snum); 561 int (*get_port)(struct sock *sk, unsigned short snum);
562 562
563#ifdef CONFIG_SMP
564 /* Keeping track of sockets in use */
565 void (*inuse_add)(struct proto *prot, int inc);
566 int (*inuse_getval)(const struct proto *prot);
567 int *inuse_ptr;
568#else
569 int inuse;
570#endif
563 /* Memory pressure */ 571 /* Memory pressure */
564 void (*enter_memory_pressure)(void); 572 void (*enter_memory_pressure)(void);
565 atomic_t *memory_allocated; /* Current allocated memory. */ 573 atomic_t *memory_allocated; /* Current allocated memory. */
@@ -592,12 +600,38 @@ struct proto {
592#ifdef SOCK_REFCNT_DEBUG 600#ifdef SOCK_REFCNT_DEBUG
593 atomic_t socks; 601 atomic_t socks;
594#endif 602#endif
595 struct {
596 int inuse;
597 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
598 } stats[NR_CPUS];
599}; 603};
600 604
605/*
606 * Special macros to let protos use a fast version of inuse{get|add}
607 * using a static percpu variable per proto instead of an allocated one,
608 * saving one dereference.
609 * This might be changed if/when dynamic percpu vars become fast.
610 */
611#ifdef CONFIG_SMP
612# define DEFINE_PROTO_INUSE(NAME) \
613static DEFINE_PER_CPU(int, NAME##_inuse); \
614static void NAME##_inuse_add(struct proto *prot, int inc) \
615{ \
616 __get_cpu_var(NAME##_inuse) += inc; \
617} \
618 \
619static int NAME##_inuse_getval(const struct proto *prot)\
620{ \
621 int res = 0, cpu; \
622 \
623 for_each_possible_cpu(cpu) \
624 res += per_cpu(NAME##_inuse, cpu); \
625 return res; \
626}
627# define REF_PROTO_INUSE(NAME) \
628 .inuse_add = NAME##_inuse_add, \
629 .inuse_getval = NAME##_inuse_getval,
630#else
631# define DEFINE_PROTO_INUSE(NAME)
632# define REF_PROTO_INUSE(NAME)
633#endif
634
601extern int proto_register(struct proto *prot, int alloc_slab); 635extern int proto_register(struct proto *prot, int alloc_slab);
602extern void proto_unregister(struct proto *prot); 636extern void proto_unregister(struct proto *prot);
603 637
@@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
629/* Called with local bh disabled */ 663/* Called with local bh disabled */
630static __inline__ void sock_prot_inc_use(struct proto *prot) 664static __inline__ void sock_prot_inc_use(struct proto *prot)
631{ 665{
632 prot->stats[smp_processor_id()].inuse++; 666#ifdef CONFIG_SMP
667 prot->inuse_add(prot, 1);
668#else
669 prot->inuse++;
670#endif
633} 671}
634 672
635static __inline__ void sock_prot_dec_use(struct proto *prot) 673static __inline__ void sock_prot_dec_use(struct proto *prot)
636{ 674{
637 prot->stats[smp_processor_id()].inuse--; 675#ifdef CONFIG_SMP
676 prot->inuse_add(prot, -1);
677#else
678 prot->inuse--;
679#endif
680}
681
682static __inline__ int sock_prot_inuse(struct proto *proto)
683{
684#ifdef CONFIG_SMP
685 return proto->inuse_getval(proto);
686#else
687 return proto->inuse;
688#endif
638} 689}
639 690
640/* With per-bucket locks this operation is not-atomic, so that 691/* With per-bucket locks this operation is not-atomic, so that
diff --git a/net/core/sock.c b/net/core/sock.c
index 12ad2067a988..e077f263b730 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release);
1801static DEFINE_RWLOCK(proto_list_lock); 1801static DEFINE_RWLOCK(proto_list_lock);
1802static LIST_HEAD(proto_list); 1802static LIST_HEAD(proto_list);
1803 1803
1804#ifdef CONFIG_SMP
1805/*
1806 * Define default functions to keep track of inuse sockets per protocol
1807 * Note that often used protocols use dedicated functions to get a speed increase.
1808 * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
1809 */
1810static void inuse_add(struct proto *prot, int inc)
1811{
1812 per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
1813}
1814
1815static int inuse_get(const struct proto *prot)
1816{
1817 int res = 0, cpu;
1818 for_each_possible_cpu(cpu)
1819 res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
1820 return res;
1821}
1822#endif
1823
1804int proto_register(struct proto *prot, int alloc_slab) 1824int proto_register(struct proto *prot, int alloc_slab)
1805{ 1825{
1806 char *request_sock_slab_name = NULL; 1826 char *request_sock_slab_name = NULL;
1807 char *timewait_sock_slab_name; 1827 char *timewait_sock_slab_name;
1808 int rc = -ENOBUFS; 1828 int rc = -ENOBUFS;
1809 1829
1830#ifdef CONFIG_SMP
1831 if (!prot->inuse_getval || !prot->inuse_add) {
1832 prot->inuse_ptr = alloc_percpu(int);
1833 if (prot->inuse_ptr == NULL)
1834 goto out;
1835 prot->inuse_getval = inuse_get;
1836 prot->inuse_add = inuse_add;
1837 }
1838#endif
1810 if (alloc_slab) { 1839 if (alloc_slab) {
1811 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 1840 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1812 SLAB_HWCACHE_ALIGN, NULL); 1841 SLAB_HWCACHE_ALIGN, NULL);
@@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab)
1814 if (prot->slab == NULL) { 1843 if (prot->slab == NULL) {
1815 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 1844 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1816 prot->name); 1845 prot->name);
1817 goto out; 1846 goto out_free_inuse;
1818 } 1847 }
1819 1848
1820 if (prot->rsk_prot != NULL) { 1849 if (prot->rsk_prot != NULL) {
@@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name:
1873out_free_sock_slab: 1902out_free_sock_slab:
1874 kmem_cache_destroy(prot->slab); 1903 kmem_cache_destroy(prot->slab);
1875 prot->slab = NULL; 1904 prot->slab = NULL;
1905out_free_inuse:
1906#ifdef CONFIG_SMP
1907 if (prot->inuse_ptr != NULL) {
1908 free_percpu(prot->inuse_ptr);
1909 prot->inuse_ptr = NULL;
1910 prot->inuse_getval = NULL;
1911 prot->inuse_add = NULL;
1912 }
1913#endif
1876 goto out; 1914 goto out;
1877} 1915}
1878 1916
@@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot)
1884 list_del(&prot->node); 1922 list_del(&prot->node);
1885 write_unlock(&proto_list_lock); 1923 write_unlock(&proto_list_lock);
1886 1924
1925#ifdef CONFIG_SMP
1926 if (prot->inuse_ptr != NULL) {
1927 free_percpu(prot->inuse_ptr);
1928 prot->inuse_ptr = NULL;
1929 prot->inuse_getval = NULL;
1930 prot->inuse_add = NULL;
1931 }
1932#endif
1887 if (prot->slab != NULL) { 1933 if (prot->slab != NULL) {
1888 kmem_cache_destroy(prot->slab); 1934 kmem_cache_destroy(prot->slab);
1889 prot->slab = NULL; 1935 prot->slab = NULL;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ffdccc0972e0..ce34b281803f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,17 +46,6 @@
46#include <net/sock.h> 46#include <net/sock.h>
47#include <net/raw.h> 47#include <net/raw.h>
48 48
49static int fold_prot_inuse(struct proto *proto)
50{
51 int res = 0;
52 int cpu;
53
54 for_each_possible_cpu(cpu)
55 res += proto->stats[cpu].inuse;
56
57 return res;
58}
59
60/* 49/*
61 * Report socket allocation statistics [mea@utu.fi] 50 * Report socket allocation statistics [mea@utu.fi]
62 */ 51 */
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
64{ 53{
65 socket_seq_show(seq); 54 socket_seq_show(seq);
66 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
67 fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), 56 sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
68 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 57 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
69 atomic_read(&tcp_memory_allocated)); 58 atomic_read(&tcp_memory_allocated));
70 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); 59 seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot));
71 seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); 60 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot));
72 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); 61 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot));
73 seq_printf(seq, "FRAG: inuse %d memory %d\n", 62 seq_printf(seq, "FRAG: inuse %d memory %d\n",
74 ip_frag_nqueues(), ip_frag_mem()); 63 ip_frag_nqueues(), ip_frag_mem());
75 return 0; 64 return 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index be526ad92543..8631ed7fe8a9 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -32,27 +32,16 @@
32 32
33static struct proc_dir_entry *proc_net_devsnmp6; 33static struct proc_dir_entry *proc_net_devsnmp6;
34 34
35static int fold_prot_inuse(struct proto *proto)
36{
37 int res = 0;
38 int cpu;
39
40 for_each_possible_cpu(cpu)
41 res += proto->stats[cpu].inuse;
42
43 return res;
44}
45
46static int sockstat6_seq_show(struct seq_file *seq, void *v) 35static int sockstat6_seq_show(struct seq_file *seq, void *v)
47{ 36{
48 seq_printf(seq, "TCP6: inuse %d\n", 37 seq_printf(seq, "TCP6: inuse %d\n",
49 fold_prot_inuse(&tcpv6_prot)); 38 sock_prot_inuse(&tcpv6_prot));
50 seq_printf(seq, "UDP6: inuse %d\n", 39 seq_printf(seq, "UDP6: inuse %d\n",
51 fold_prot_inuse(&udpv6_prot)); 40 sock_prot_inuse(&udpv6_prot));
52 seq_printf(seq, "UDPLITE6: inuse %d\n", 41 seq_printf(seq, "UDPLITE6: inuse %d\n",
53 fold_prot_inuse(&udplitev6_prot)); 42 sock_prot_inuse(&udplitev6_prot));
54 seq_printf(seq, "RAW6: inuse %d\n", 43 seq_printf(seq, "RAW6: inuse %d\n",
55 fold_prot_inuse(&rawv6_prot)); 44 sock_prot_inuse(&rawv6_prot));
56 seq_printf(seq, "FRAG6: inuse %d memory %d\n", 45 seq_printf(seq, "FRAG6: inuse %d memory %d\n",
57 ip6_frag_nqueues(), ip6_frag_mem()); 46 ip6_frag_nqueues(), ip6_frag_mem());
58 return 0; 47 return 0;