diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-11-06 02:38:39 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-11-07 07:08:57 -0500 |
commit | 286ab3d46058840d68e5d7d52e316c1f7e98c59f (patch) | |
tree | 1d70e7895c49d2b148e026aa047efe186697fff9 /net | |
parent | 91781004b9c029ee55b7aa9ef950a373ba865dc6 (diff) |
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
"struct proto" currently uses an array stats[NR_CPUS] to track change on
'inuse' sockets per protocol.
If NR_CPUS is big, this means we use a big memory area for this.
Moreover, all this memory area is located on a single node on NUMA
machines, increasing memory pressure on the boot node.
In this patch, I tried to :
- Keep a fast !CONFIG_SMP implementation
- Keep a fast CONFIG_SMP implementation for often used protocols
(tcp,udp,raw,...)
- Introduce a NUMA efficient implementation
Some helper macros are defined in include/net/sock.h
These macros take into account CONFIG_SMP
If a "struct proto" is declared without using DEFINE_PROTO_INUSE /
REF_PROTO_INUSE
macros, it will automatically use a default implementation, using a
dynamically allocated percpu zone.
This default implementation will be NUMA efficient, but might use 32/64
bytes per possible cpu
because of current alloc_percpu() implementation.
However it still should be better than previous implementation based on
stats[NR_CPUS] field.
When a "struct proto" is changed to use the new macros, we use a single
static "int" percpu variable,
lowering the memory and cpu costs, still preserving NUMA efficiency.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 48 | ||||
-rw-r--r-- | net/ipv4/proc.c | 19 | ||||
-rw-r--r-- | net/ipv6/proc.c | 19 |
3 files changed, 55 insertions, 31 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 12ad2067a988..e077f263b730 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release); | |||
1801 | static DEFINE_RWLOCK(proto_list_lock); | 1801 | static DEFINE_RWLOCK(proto_list_lock); |
1802 | static LIST_HEAD(proto_list); | 1802 | static LIST_HEAD(proto_list); |
1803 | 1803 | ||
1804 | #ifdef CONFIG_SMP | ||
1805 | /* | ||
1806 | * Define default functions to keep track of inuse sockets per protocol | ||
1807 | * Note that often used protocols use dedicated functions to get a speed increase. | ||
1808 | * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) | ||
1809 | */ | ||
1810 | static void inuse_add(struct proto *prot, int inc) | ||
1811 | { | ||
1812 | per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; | ||
1813 | } | ||
1814 | |||
1815 | static int inuse_get(const struct proto *prot) | ||
1816 | { | ||
1817 | int res = 0, cpu; | ||
1818 | for_each_possible_cpu(cpu) | ||
1819 | res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; | ||
1820 | return res; | ||
1821 | } | ||
1822 | #endif | ||
1823 | |||
1804 | int proto_register(struct proto *prot, int alloc_slab) | 1824 | int proto_register(struct proto *prot, int alloc_slab) |
1805 | { | 1825 | { |
1806 | char *request_sock_slab_name = NULL; | 1826 | char *request_sock_slab_name = NULL; |
1807 | char *timewait_sock_slab_name; | 1827 | char *timewait_sock_slab_name; |
1808 | int rc = -ENOBUFS; | 1828 | int rc = -ENOBUFS; |
1809 | 1829 | ||
1830 | #ifdef CONFIG_SMP | ||
1831 | if (!prot->inuse_getval || !prot->inuse_add) { | ||
1832 | prot->inuse_ptr = alloc_percpu(int); | ||
1833 | if (prot->inuse_ptr == NULL) | ||
1834 | goto out; | ||
1835 | prot->inuse_getval = inuse_get; | ||
1836 | prot->inuse_add = inuse_add; | ||
1837 | } | ||
1838 | #endif | ||
1810 | if (alloc_slab) { | 1839 | if (alloc_slab) { |
1811 | prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, | 1840 | prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, |
1812 | SLAB_HWCACHE_ALIGN, NULL); | 1841 | SLAB_HWCACHE_ALIGN, NULL); |
@@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
1814 | if (prot->slab == NULL) { | 1843 | if (prot->slab == NULL) { |
1815 | printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", | 1844 | printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", |
1816 | prot->name); | 1845 | prot->name); |
1817 | goto out; | 1846 | goto out_free_inuse; |
1818 | } | 1847 | } |
1819 | 1848 | ||
1820 | if (prot->rsk_prot != NULL) { | 1849 | if (prot->rsk_prot != NULL) { |
@@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name: | |||
1873 | out_free_sock_slab: | 1902 | out_free_sock_slab: |
1874 | kmem_cache_destroy(prot->slab); | 1903 | kmem_cache_destroy(prot->slab); |
1875 | prot->slab = NULL; | 1904 | prot->slab = NULL; |
1905 | out_free_inuse: | ||
1906 | #ifdef CONFIG_SMP | ||
1907 | if (prot->inuse_ptr != NULL) { | ||
1908 | free_percpu(prot->inuse_ptr); | ||
1909 | prot->inuse_ptr = NULL; | ||
1910 | prot->inuse_getval = NULL; | ||
1911 | prot->inuse_add = NULL; | ||
1912 | } | ||
1913 | #endif | ||
1876 | goto out; | 1914 | goto out; |
1877 | } | 1915 | } |
1878 | 1916 | ||
@@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot) | |||
1884 | list_del(&prot->node); | 1922 | list_del(&prot->node); |
1885 | write_unlock(&proto_list_lock); | 1923 | write_unlock(&proto_list_lock); |
1886 | 1924 | ||
1925 | #ifdef CONFIG_SMP | ||
1926 | if (prot->inuse_ptr != NULL) { | ||
1927 | free_percpu(prot->inuse_ptr); | ||
1928 | prot->inuse_ptr = NULL; | ||
1929 | prot->inuse_getval = NULL; | ||
1930 | prot->inuse_add = NULL; | ||
1931 | } | ||
1932 | #endif | ||
1887 | if (prot->slab != NULL) { | 1933 | if (prot->slab != NULL) { |
1888 | kmem_cache_destroy(prot->slab); | 1934 | kmem_cache_destroy(prot->slab); |
1889 | prot->slab = NULL; | 1935 | prot->slab = NULL; |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ffdccc0972e0..ce34b281803f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -46,17 +46,6 @@ | |||
46 | #include <net/sock.h> | 46 | #include <net/sock.h> |
47 | #include <net/raw.h> | 47 | #include <net/raw.h> |
48 | 48 | ||
49 | static int fold_prot_inuse(struct proto *proto) | ||
50 | { | ||
51 | int res = 0; | ||
52 | int cpu; | ||
53 | |||
54 | for_each_possible_cpu(cpu) | ||
55 | res += proto->stats[cpu].inuse; | ||
56 | |||
57 | return res; | ||
58 | } | ||
59 | |||
60 | /* | 49 | /* |
61 | * Report socket allocation statistics [mea@utu.fi] | 50 | * Report socket allocation statistics [mea@utu.fi] |
62 | */ | 51 | */ |
@@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
64 | { | 53 | { |
65 | socket_seq_show(seq); | 54 | socket_seq_show(seq); |
66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
67 | fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), | 56 | sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), |
68 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), | 57 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), |
69 | atomic_read(&tcp_memory_allocated)); | 58 | atomic_read(&tcp_memory_allocated)); |
70 | seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); | 59 | seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); |
71 | seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); | 60 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); |
72 | seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); | 61 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); |
73 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 62 | seq_printf(seq, "FRAG: inuse %d memory %d\n", |
74 | ip_frag_nqueues(), ip_frag_mem()); | 63 | ip_frag_nqueues(), ip_frag_mem()); |
75 | return 0; | 64 | return 0; |
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index be526ad92543..8631ed7fe8a9 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c | |||
@@ -32,27 +32,16 @@ | |||
32 | 32 | ||
33 | static struct proc_dir_entry *proc_net_devsnmp6; | 33 | static struct proc_dir_entry *proc_net_devsnmp6; |
34 | 34 | ||
35 | static int fold_prot_inuse(struct proto *proto) | ||
36 | { | ||
37 | int res = 0; | ||
38 | int cpu; | ||
39 | |||
40 | for_each_possible_cpu(cpu) | ||
41 | res += proto->stats[cpu].inuse; | ||
42 | |||
43 | return res; | ||
44 | } | ||
45 | |||
46 | static int sockstat6_seq_show(struct seq_file *seq, void *v) | 35 | static int sockstat6_seq_show(struct seq_file *seq, void *v) |
47 | { | 36 | { |
48 | seq_printf(seq, "TCP6: inuse %d\n", | 37 | seq_printf(seq, "TCP6: inuse %d\n", |
49 | fold_prot_inuse(&tcpv6_prot)); | 38 | sock_prot_inuse(&tcpv6_prot)); |
50 | seq_printf(seq, "UDP6: inuse %d\n", | 39 | seq_printf(seq, "UDP6: inuse %d\n", |
51 | fold_prot_inuse(&udpv6_prot)); | 40 | sock_prot_inuse(&udpv6_prot)); |
52 | seq_printf(seq, "UDPLITE6: inuse %d\n", | 41 | seq_printf(seq, "UDPLITE6: inuse %d\n", |
53 | fold_prot_inuse(&udplitev6_prot)); | 42 | sock_prot_inuse(&udplitev6_prot)); |
54 | seq_printf(seq, "RAW6: inuse %d\n", | 43 | seq_printf(seq, "RAW6: inuse %d\n", |
55 | fold_prot_inuse(&rawv6_prot)); | 44 | sock_prot_inuse(&rawv6_prot)); |
56 | seq_printf(seq, "FRAG6: inuse %d memory %d\n", | 45 | seq_printf(seq, "FRAG6: inuse %d memory %d\n", |
57 | ip6_frag_nqueues(), ip6_frag_mem()); | 46 | ip6_frag_nqueues(), ip6_frag_mem()); |
58 | return 0; | 47 | return 0; |