aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2009-10-06 20:37:59 -0400
committerDavid S. Miller <davem@davemloft.net>2009-10-08 01:00:22 -0400
commitf86dcc5aa8c7908f2c287e7a211228df599e3e71 (patch)
tree1721c94d0254b337b0668b2e331771dfef4b142d
parent8a6dfd43d1891882f8ca05d73aa7735fb0edae3b (diff)
udp: dynamically size hash tables at boot time
UDP_HTABLE_SIZE was initialy defined to 128, which is a bit small for several setups. 4000 active UDP sockets -> 32 sockets per chain in average. An incoming frame has to lookup all sockets to find best match, so long chains hurt latency. Instead of a fixed size hash table that cant be perfect for every needs, let UDP stack choose its table size at boot time like tcp/ip route, using alloc_large_system_hash() helper Add an optional boot parameter, uhash_entries=x so that an admin can force a size between 256 and 65536 if needed, like thash_entries and rhash_entries. dmesg logs two new lines : [ 0.647039] UDP hash table entries: 512 (order: 0, 4096 bytes) [ 0.647099] UDP Lite hash table entries: 512 (order: 0, 4096 bytes) Maximal size on 64bit arches would be 65536 slots, ie 1 MBytes for non debugging spinlocks. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--include/linux/udp.h6
-rw-r--r--include/net/udp.h13
-rw-r--r--net/ipv4/udp.c91
-rw-r--r--net/ipv4/udplite.c4
-rw-r--r--net/ipv6/udp.c6
6 files changed, 87 insertions, 36 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6fa7292947e..02df20be776 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2589,6 +2589,9 @@ and is between 256 and 4096 characters. It is defined in the file
2589 uart6850= [HW,OSS] 2589 uart6850= [HW,OSS]
2590 Format: <io>,<irq> 2590 Format: <io>,<irq>
2591 2591
2592 uhash_entries= [KNL,NET]
2593 Set number of hash buckets for UDP/UDP-Lite connections
2594
2592 uhci-hcd.ignore_oc= 2595 uhci-hcd.ignore_oc=
2593 [USB] Ignore overcurrent events (default N). 2596 [USB] Ignore overcurrent events (default N).
2594 Some badly-designed motherboards generate lots of 2597 Some badly-designed motherboards generate lots of
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0cf5c4c0ec8..832361e3e59 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -45,11 +45,11 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
45 return (struct udphdr *)skb_transport_header(skb); 45 return (struct udphdr *)skb_transport_header(skb);
46} 46}
47 47
48#define UDP_HTABLE_SIZE 128 48#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
49 49
50static inline int udp_hashfn(struct net *net, const unsigned num) 50static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
51{ 51{
52 return (num + net_hash_mix(net)) & (UDP_HTABLE_SIZE - 1); 52 return (num + net_hash_mix(net)) & mask;
53} 53}
54 54
55struct udp_sock { 55struct udp_sock {
diff --git a/include/net/udp.h b/include/net/udp.h
index f98abd2ce70..22aa2e7eb1d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -54,12 +54,19 @@ struct udp_hslot {
54 struct hlist_nulls_head head; 54 struct hlist_nulls_head head;
55 spinlock_t lock; 55 spinlock_t lock;
56} __attribute__((aligned(2 * sizeof(long)))); 56} __attribute__((aligned(2 * sizeof(long))));
57
57struct udp_table { 58struct udp_table {
58 struct udp_hslot hash[UDP_HTABLE_SIZE]; 59 struct udp_hslot *hash;
60 unsigned int mask;
61 unsigned int log;
59}; 62};
60extern struct udp_table udp_table; 63extern struct udp_table udp_table;
61extern void udp_table_init(struct udp_table *); 64extern void udp_table_init(struct udp_table *, const char *);
62 65static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
66 struct net *net, unsigned num)
67{
68 return &table->hash[udp_hashfn(net, num, table->mask)];
69}
63 70
64/* Note: this must match 'valbool' in sock_setsockopt */ 71/* Note: this must match 'valbool' in sock_setsockopt */
65#define UDP_CSUM_NOXMIT 1 72#define UDP_CSUM_NOXMIT 1
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ec6a8a4a22..194bcdc6d9f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,7 +106,7 @@
106#include <net/xfrm.h> 106#include <net/xfrm.h>
107#include "udp_impl.h" 107#include "udp_impl.h"
108 108
109struct udp_table udp_table; 109struct udp_table udp_table __read_mostly;
110EXPORT_SYMBOL(udp_table); 110EXPORT_SYMBOL(udp_table);
111 111
112int sysctl_udp_mem[3] __read_mostly; 112int sysctl_udp_mem[3] __read_mostly;
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
121atomic_t udp_memory_allocated; 121atomic_t udp_memory_allocated;
122EXPORT_SYMBOL(udp_memory_allocated); 122EXPORT_SYMBOL(udp_memory_allocated);
123 123
124#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) 124#define MAX_UDP_PORTS 65536
125#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
125 126
126static int udp_lib_lport_inuse(struct net *net, __u16 num, 127static int udp_lib_lport_inuse(struct net *net, __u16 num,
127 const struct udp_hslot *hslot, 128 const struct udp_hslot *hslot,
128 unsigned long *bitmap, 129 unsigned long *bitmap,
129 struct sock *sk, 130 struct sock *sk,
130 int (*saddr_comp)(const struct sock *sk1, 131 int (*saddr_comp)(const struct sock *sk1,
131 const struct sock *sk2)) 132 const struct sock *sk2),
133 unsigned int log)
132{ 134{
133 struct sock *sk2; 135 struct sock *sk2;
134 struct hlist_nulls_node *node; 136 struct hlist_nulls_node *node;
@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
142 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 144 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
143 (*saddr_comp)(sk, sk2)) { 145 (*saddr_comp)(sk, sk2)) {
144 if (bitmap) 146 if (bitmap)
145 __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, 147 __set_bit(sk2->sk_hash >> log, bitmap);
146 bitmap);
147 else 148 else
148 return 1; 149 return 1;
149 } 150 }
@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
180 /* 181 /*
181 * force rand to be an odd multiple of UDP_HTABLE_SIZE 182 * force rand to be an odd multiple of UDP_HTABLE_SIZE
182 */ 183 */
183 rand = (rand | 1) * UDP_HTABLE_SIZE; 184 rand = (rand | 1) * (udptable->mask + 1);
184 for (last = first + UDP_HTABLE_SIZE; first != last; first++) { 185 for (last = first + udptable->mask + 1;
185 hslot = &udptable->hash[udp_hashfn(net, first)]; 186 first != last;
187 first++) {
188 hslot = udp_hashslot(udptable, net, first);
186 bitmap_zero(bitmap, PORTS_PER_CHAIN); 189 bitmap_zero(bitmap, PORTS_PER_CHAIN);
187 spin_lock_bh(&hslot->lock); 190 spin_lock_bh(&hslot->lock);
188 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 191 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
189 saddr_comp); 192 saddr_comp, udptable->log);
190 193
191 snum = first; 194 snum = first;
192 /* 195 /*
@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
196 */ 199 */
197 do { 200 do {
198 if (low <= snum && snum <= high && 201 if (low <= snum && snum <= high &&
199 !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) 202 !test_bit(snum >> udptable->log, bitmap))
200 goto found; 203 goto found;
201 snum += rand; 204 snum += rand;
202 } while (snum != first); 205 } while (snum != first);
@@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
204 } 207 }
205 goto fail; 208 goto fail;
206 } else { 209 } else {
207 hslot = &udptable->hash[udp_hashfn(net, snum)]; 210 hslot = udp_hashslot(udptable, net, snum);
208 spin_lock_bh(&hslot->lock); 211 spin_lock_bh(&hslot->lock);
209 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) 212 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
213 saddr_comp, 0))
210 goto fail_unlock; 214 goto fail_unlock;
211 } 215 }
212found: 216found:
@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
283 struct sock *sk, *result; 287 struct sock *sk, *result;
284 struct hlist_nulls_node *node; 288 struct hlist_nulls_node *node;
285 unsigned short hnum = ntohs(dport); 289 unsigned short hnum = ntohs(dport);
286 unsigned int hash = udp_hashfn(net, hnum); 290 unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
287 struct udp_hslot *hslot = &udptable->hash[hash]; 291 struct udp_hslot *hslot = &udptable->hash[hash];
288 int score, badness; 292 int score, badness;
289 293
@@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk)
1013{ 1017{
1014 if (sk_hashed(sk)) { 1018 if (sk_hashed(sk)) {
1015 struct udp_table *udptable = sk->sk_prot->h.udp_table; 1019 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1016 unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); 1020 struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk),
1017 struct udp_hslot *hslot = &udptable->hash[hash]; 1021 sk->sk_hash);
1018 1022
1019 spin_lock_bh(&hslot->lock); 1023 spin_lock_bh(&hslot->lock);
1020 if (sk_nulls_del_node_init_rcu(sk)) { 1024 if (sk_nulls_del_node_init_rcu(sk)) {
@@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1169 struct udp_table *udptable) 1173 struct udp_table *udptable)
1170{ 1174{
1171 struct sock *sk; 1175 struct sock *sk;
1172 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; 1176 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
1173 int dif; 1177 int dif;
1174 1178
1175 spin_lock(&hslot->lock); 1179 spin_lock(&hslot->lock);
@@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
1609 struct udp_iter_state *state = seq->private; 1613 struct udp_iter_state *state = seq->private;
1610 struct net *net = seq_file_net(seq); 1614 struct net *net = seq_file_net(seq);
1611 1615
1612 for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1616 for (state->bucket = start; state->bucket <= state->udp_table->mask;
1617 ++state->bucket) {
1613 struct hlist_nulls_node *node; 1618 struct hlist_nulls_node *node;
1614 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 1619 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
1620
1621 if (hlist_nulls_empty(&hslot->head))
1622 continue;
1623
1615 spin_lock_bh(&hslot->lock); 1624 spin_lock_bh(&hslot->lock);
1616 sk_nulls_for_each(sk, node, &hslot->head) { 1625 sk_nulls_for_each(sk, node, &hslot->head) {
1617 if (!net_eq(sock_net(sk), net)) 1626 if (!net_eq(sock_net(sk), net))
@@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1636 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1645 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1637 1646
1638 if (!sk) { 1647 if (!sk) {
1639 if (state->bucket < UDP_HTABLE_SIZE) 1648 if (state->bucket <= state->udp_table->mask)
1640 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1649 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1641 return udp_get_first(seq, state->bucket + 1); 1650 return udp_get_first(seq, state->bucket + 1);
1642 } 1651 }
@@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1656static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1665static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1657{ 1666{
1658 struct udp_iter_state *state = seq->private; 1667 struct udp_iter_state *state = seq->private;
1659 state->bucket = UDP_HTABLE_SIZE; 1668 state->bucket = MAX_UDP_PORTS;
1660 1669
1661 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1670 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1662} 1671}
@@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
1678{ 1687{
1679 struct udp_iter_state *state = seq->private; 1688 struct udp_iter_state *state = seq->private;
1680 1689
1681 if (state->bucket < UDP_HTABLE_SIZE) 1690 if (state->bucket <= state->udp_table->mask)
1682 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1691 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1683} 1692}
1684 1693
@@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1738 __u16 destp = ntohs(inet->dport); 1747 __u16 destp = ntohs(inet->dport);
1739 __u16 srcp = ntohs(inet->sport); 1748 __u16 srcp = ntohs(inet->sport);
1740 1749
1741 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 1750 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
1742 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", 1751 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1743 bucket, src, srcp, dest, destp, sp->sk_state, 1752 bucket, src, srcp, dest, destp, sp->sk_state,
1744 sk_wmem_alloc_get(sp), 1753 sk_wmem_alloc_get(sp),
@@ -1804,11 +1813,43 @@ void udp4_proc_exit(void)
1804} 1813}
1805#endif /* CONFIG_PROC_FS */ 1814#endif /* CONFIG_PROC_FS */
1806 1815
1807void __init udp_table_init(struct udp_table *table) 1816static __initdata unsigned long uhash_entries;
1817static int __init set_uhash_entries(char *str)
1808{ 1818{
1809 int i; 1819 if (!str)
1820 return 0;
1821 uhash_entries = simple_strtoul(str, &str, 0);
1822 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
1823 uhash_entries = UDP_HTABLE_SIZE_MIN;
1824 return 1;
1825}
1826__setup("uhash_entries=", set_uhash_entries);
1810 1827
1811 for (i = 0; i < UDP_HTABLE_SIZE; i++) { 1828void __init udp_table_init(struct udp_table *table, const char *name)
1829{
1830 unsigned int i;
1831
1832 if (!CONFIG_BASE_SMALL)
1833 table->hash = alloc_large_system_hash(name,
1834 sizeof(struct udp_hslot),
1835 uhash_entries,
1836 21, /* one slot per 2 MB */
1837 0,
1838 &table->log,
1839 &table->mask,
1840 64 * 1024);
1841 /*
1842 * Make sure hash table has the minimum size
1843 */
1844 if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
1845 table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
1846 sizeof(struct udp_hslot), GFP_KERNEL);
1847 if (!table->hash)
1848 panic(name);
1849 table->log = ilog2(UDP_HTABLE_SIZE_MIN);
1850 table->mask = UDP_HTABLE_SIZE_MIN - 1;
1851 }
1852 for (i = 0; i <= table->mask; i++) {
1812 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 1853 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
1813 spin_lock_init(&table->hash[i].lock); 1854 spin_lock_init(&table->hash[i].lock);
1814 } 1855 }
@@ -1818,7 +1859,7 @@ void __init udp_init(void)
1818{ 1859{
1819 unsigned long nr_pages, limit; 1860 unsigned long nr_pages, limit;
1820 1861
1821 udp_table_init(&udp_table); 1862 udp_table_init(&udp_table, "UDP");
1822 /* Set the pressure threshold up by the same strategy of TCP. It is a 1863 /* Set the pressure threshold up by the same strategy of TCP. It is a
1823 * fraction of global memory that is up to 1/2 at 256 MB, decreasing 1864 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1824 * toward zero with the amount of memory, with a floor of 128 pages. 1865 * toward zero with the amount of memory, with a floor of 128 pages.
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 95248d7f75e..470c504b955 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -12,7 +12,7 @@
12 */ 12 */
13#include "udp_impl.h" 13#include "udp_impl.h"
14 14
15struct udp_table udplite_table; 15struct udp_table udplite_table __read_mostly;
16EXPORT_SYMBOL(udplite_table); 16EXPORT_SYMBOL(udplite_table);
17 17
18static int udplite_rcv(struct sk_buff *skb) 18static int udplite_rcv(struct sk_buff *skb)
@@ -110,7 +110,7 @@ static inline int udplite4_proc_init(void)
110 110
111void __init udplite4_register(void) 111void __init udplite4_register(void)
112{ 112{
113 udp_table_init(&udplite_table); 113 udp_table_init(&udplite_table, "UDP-Lite");
114 if (proto_register(&udplite_prot, 1)) 114 if (proto_register(&udplite_prot, 1))
115 goto out_register_err; 115 goto out_register_err;
116 116
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6a303ec834..ff778c172ef 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -132,7 +132,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
132 struct sock *sk, *result; 132 struct sock *sk, *result;
133 struct hlist_nulls_node *node; 133 struct hlist_nulls_node *node;
134 unsigned short hnum = ntohs(dport); 134 unsigned short hnum = ntohs(dport);
135 unsigned int hash = udp_hashfn(net, hnum); 135 unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
136 struct udp_hslot *hslot = &udptable->hash[hash]; 136 struct udp_hslot *hslot = &udptable->hash[hash];
137 int score, badness; 137 int score, badness;
138 138
@@ -452,7 +452,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
452{ 452{
453 struct sock *sk, *sk2; 453 struct sock *sk, *sk2;
454 const struct udphdr *uh = udp_hdr(skb); 454 const struct udphdr *uh = udp_hdr(skb);
455 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; 455 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
456 int dif; 456 int dif;
457 457
458 spin_lock(&hslot->lock); 458 spin_lock(&hslot->lock);
@@ -1197,7 +1197,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
1197 destp = ntohs(inet->dport); 1197 destp = ntohs(inet->dport);
1198 srcp = ntohs(inet->sport); 1198 srcp = ntohs(inet->sport);
1199 seq_printf(seq, 1199 seq_printf(seq,
1200 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1200 "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1201 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 1201 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1202 bucket, 1202 bucket,
1203 src->s6_addr32[0], src->s6_addr32[1], 1203 src->s6_addr32[0], src->s6_addr32[1],