diff options
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 91 |
1 files changed, 66 insertions, 25 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6ec6a8a4a224..194bcdc6d9fc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -106,7 +106,7 @@ | |||
106 | #include <net/xfrm.h> | 106 | #include <net/xfrm.h> |
107 | #include "udp_impl.h" | 107 | #include "udp_impl.h" |
108 | 108 | ||
109 | struct udp_table udp_table; | 109 | struct udp_table udp_table __read_mostly; |
110 | EXPORT_SYMBOL(udp_table); | 110 | EXPORT_SYMBOL(udp_table); |
111 | 111 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); | |||
121 | atomic_t udp_memory_allocated; | 121 | atomic_t udp_memory_allocated; |
122 | EXPORT_SYMBOL(udp_memory_allocated); | 122 | EXPORT_SYMBOL(udp_memory_allocated); |
123 | 123 | ||
124 | #define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) | 124 | #define MAX_UDP_PORTS 65536 |
125 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) | ||
125 | 126 | ||
126 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 127 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
127 | const struct udp_hslot *hslot, | 128 | const struct udp_hslot *hslot, |
128 | unsigned long *bitmap, | 129 | unsigned long *bitmap, |
129 | struct sock *sk, | 130 | struct sock *sk, |
130 | int (*saddr_comp)(const struct sock *sk1, | 131 | int (*saddr_comp)(const struct sock *sk1, |
131 | const struct sock *sk2)) | 132 | const struct sock *sk2), |
133 | unsigned int log) | ||
132 | { | 134 | { |
133 | struct sock *sk2; | 135 | struct sock *sk2; |
134 | struct hlist_nulls_node *node; | 136 | struct hlist_nulls_node *node; |
@@ -142,8 +144,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
142 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 144 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
143 | (*saddr_comp)(sk, sk2)) { | 145 | (*saddr_comp)(sk, sk2)) { |
144 | if (bitmap) | 146 | if (bitmap) |
145 | __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, | 147 | __set_bit(sk2->sk_hash >> log, bitmap); |
146 | bitmap); | ||
147 | else | 148 | else |
148 | return 1; | 149 | return 1; |
149 | } | 150 | } |
@@ -180,13 +181,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
180 | /* | 181 | /* |
181 | * force rand to be an odd multiple of UDP_HTABLE_SIZE | 182 | * force rand to be an odd multiple of UDP_HTABLE_SIZE |
182 | */ | 183 | */ |
183 | rand = (rand | 1) * UDP_HTABLE_SIZE; | 184 | rand = (rand | 1) * (udptable->mask + 1); |
184 | for (last = first + UDP_HTABLE_SIZE; first != last; first++) { | 185 | for (last = first + udptable->mask + 1; |
185 | hslot = &udptable->hash[udp_hashfn(net, first)]; | 186 | first != last; |
187 | first++) { | ||
188 | hslot = udp_hashslot(udptable, net, first); | ||
186 | bitmap_zero(bitmap, PORTS_PER_CHAIN); | 189 | bitmap_zero(bitmap, PORTS_PER_CHAIN); |
187 | spin_lock_bh(&hslot->lock); | 190 | spin_lock_bh(&hslot->lock); |
188 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, | 191 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, |
189 | saddr_comp); | 192 | saddr_comp, udptable->log); |
190 | 193 | ||
191 | snum = first; | 194 | snum = first; |
192 | /* | 195 | /* |
@@ -196,7 +199,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
196 | */ | 199 | */ |
197 | do { | 200 | do { |
198 | if (low <= snum && snum <= high && | 201 | if (low <= snum && snum <= high && |
199 | !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) | 202 | !test_bit(snum >> udptable->log, bitmap)) |
200 | goto found; | 203 | goto found; |
201 | snum += rand; | 204 | snum += rand; |
202 | } while (snum != first); | 205 | } while (snum != first); |
@@ -204,9 +207,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
204 | } | 207 | } |
205 | goto fail; | 208 | goto fail; |
206 | } else { | 209 | } else { |
207 | hslot = &udptable->hash[udp_hashfn(net, snum)]; | 210 | hslot = udp_hashslot(udptable, net, snum); |
208 | spin_lock_bh(&hslot->lock); | 211 | spin_lock_bh(&hslot->lock); |
209 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) | 212 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, |
213 | saddr_comp, 0)) | ||
210 | goto fail_unlock; | 214 | goto fail_unlock; |
211 | } | 215 | } |
212 | found: | 216 | found: |
@@ -283,7 +287,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
283 | struct sock *sk, *result; | 287 | struct sock *sk, *result; |
284 | struct hlist_nulls_node *node; | 288 | struct hlist_nulls_node *node; |
285 | unsigned short hnum = ntohs(dport); | 289 | unsigned short hnum = ntohs(dport); |
286 | unsigned int hash = udp_hashfn(net, hnum); | 290 | unsigned int hash = udp_hashfn(net, hnum, udptable->mask); |
287 | struct udp_hslot *hslot = &udptable->hash[hash]; | 291 | struct udp_hslot *hslot = &udptable->hash[hash]; |
288 | int score, badness; | 292 | int score, badness; |
289 | 293 | ||
@@ -1013,8 +1017,8 @@ void udp_lib_unhash(struct sock *sk) | |||
1013 | { | 1017 | { |
1014 | if (sk_hashed(sk)) { | 1018 | if (sk_hashed(sk)) { |
1015 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 1019 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
1016 | unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); | 1020 | struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), |
1017 | struct udp_hslot *hslot = &udptable->hash[hash]; | 1021 | sk->sk_hash); |
1018 | 1022 | ||
1019 | spin_lock_bh(&hslot->lock); | 1023 | spin_lock_bh(&hslot->lock); |
1020 | if (sk_nulls_del_node_init_rcu(sk)) { | 1024 | if (sk_nulls_del_node_init_rcu(sk)) { |
@@ -1169,7 +1173,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1169 | struct udp_table *udptable) | 1173 | struct udp_table *udptable) |
1170 | { | 1174 | { |
1171 | struct sock *sk; | 1175 | struct sock *sk; |
1172 | struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; | 1176 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
1173 | int dif; | 1177 | int dif; |
1174 | 1178 | ||
1175 | spin_lock(&hslot->lock); | 1179 | spin_lock(&hslot->lock); |
@@ -1609,9 +1613,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) | |||
1609 | struct udp_iter_state *state = seq->private; | 1613 | struct udp_iter_state *state = seq->private; |
1610 | struct net *net = seq_file_net(seq); | 1614 | struct net *net = seq_file_net(seq); |
1611 | 1615 | ||
1612 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1616 | for (state->bucket = start; state->bucket <= state->udp_table->mask; |
1617 | ++state->bucket) { | ||
1613 | struct hlist_nulls_node *node; | 1618 | struct hlist_nulls_node *node; |
1614 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; | 1619 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1620 | |||
1621 | if (hlist_nulls_empty(&hslot->head)) | ||
1622 | continue; | ||
1623 | |||
1615 | spin_lock_bh(&hslot->lock); | 1624 | spin_lock_bh(&hslot->lock); |
1616 | sk_nulls_for_each(sk, node, &hslot->head) { | 1625 | sk_nulls_for_each(sk, node, &hslot->head) { |
1617 | if (!net_eq(sock_net(sk), net)) | 1626 | if (!net_eq(sock_net(sk), net)) |
@@ -1636,7 +1645,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1636 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1645 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1637 | 1646 | ||
1638 | if (!sk) { | 1647 | if (!sk) { |
1639 | if (state->bucket < UDP_HTABLE_SIZE) | 1648 | if (state->bucket <= state->udp_table->mask) |
1640 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1649 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1641 | return udp_get_first(seq, state->bucket + 1); | 1650 | return udp_get_first(seq, state->bucket + 1); |
1642 | } | 1651 | } |
@@ -1656,7 +1665,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1656 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1665 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1657 | { | 1666 | { |
1658 | struct udp_iter_state *state = seq->private; | 1667 | struct udp_iter_state *state = seq->private; |
1659 | state->bucket = UDP_HTABLE_SIZE; | 1668 | state->bucket = MAX_UDP_PORTS; |
1660 | 1669 | ||
1661 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1670 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1662 | } | 1671 | } |
@@ -1678,7 +1687,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) | |||
1678 | { | 1687 | { |
1679 | struct udp_iter_state *state = seq->private; | 1688 | struct udp_iter_state *state = seq->private; |
1680 | 1689 | ||
1681 | if (state->bucket < UDP_HTABLE_SIZE) | 1690 | if (state->bucket <= state->udp_table->mask) |
1682 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1691 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1683 | } | 1692 | } |
1684 | 1693 | ||
@@ -1738,7 +1747,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, | |||
1738 | __u16 destp = ntohs(inet->dport); | 1747 | __u16 destp = ntohs(inet->dport); |
1739 | __u16 srcp = ntohs(inet->sport); | 1748 | __u16 srcp = ntohs(inet->sport); |
1740 | 1749 | ||
1741 | seq_printf(f, "%4d: %08X:%04X %08X:%04X" | 1750 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" |
1742 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", | 1751 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", |
1743 | bucket, src, srcp, dest, destp, sp->sk_state, | 1752 | bucket, src, srcp, dest, destp, sp->sk_state, |
1744 | sk_wmem_alloc_get(sp), | 1753 | sk_wmem_alloc_get(sp), |
@@ -1804,11 +1813,43 @@ void udp4_proc_exit(void) | |||
1804 | } | 1813 | } |
1805 | #endif /* CONFIG_PROC_FS */ | 1814 | #endif /* CONFIG_PROC_FS */ |
1806 | 1815 | ||
1807 | void __init udp_table_init(struct udp_table *table) | 1816 | static __initdata unsigned long uhash_entries; |
1817 | static int __init set_uhash_entries(char *str) | ||
1808 | { | 1818 | { |
1809 | int i; | 1819 | if (!str) |
1820 | return 0; | ||
1821 | uhash_entries = simple_strtoul(str, &str, 0); | ||
1822 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) | ||
1823 | uhash_entries = UDP_HTABLE_SIZE_MIN; | ||
1824 | return 1; | ||
1825 | } | ||
1826 | __setup("uhash_entries=", set_uhash_entries); | ||
1810 | 1827 | ||
1811 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | 1828 | void __init udp_table_init(struct udp_table *table, const char *name) |
1829 | { | ||
1830 | unsigned int i; | ||
1831 | |||
1832 | if (!CONFIG_BASE_SMALL) | ||
1833 | table->hash = alloc_large_system_hash(name, | ||
1834 | sizeof(struct udp_hslot), | ||
1835 | uhash_entries, | ||
1836 | 21, /* one slot per 2 MB */ | ||
1837 | 0, | ||
1838 | &table->log, | ||
1839 | &table->mask, | ||
1840 | 64 * 1024); | ||
1841 | /* | ||
1842 | * Make sure hash table has the minimum size | ||
1843 | */ | ||
1844 | if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { | ||
1845 | table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * | ||
1846 | sizeof(struct udp_hslot), GFP_KERNEL); | ||
1847 | if (!table->hash) | ||
1848 | panic(name); | ||
1849 | table->log = ilog2(UDP_HTABLE_SIZE_MIN); | ||
1850 | table->mask = UDP_HTABLE_SIZE_MIN - 1; | ||
1851 | } | ||
1852 | for (i = 0; i <= table->mask; i++) { | ||
1812 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | 1853 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); |
1813 | spin_lock_init(&table->hash[i].lock); | 1854 | spin_lock_init(&table->hash[i].lock); |
1814 | } | 1855 | } |
@@ -1818,7 +1859,7 @@ void __init udp_init(void) | |||
1818 | { | 1859 | { |
1819 | unsigned long nr_pages, limit; | 1860 | unsigned long nr_pages, limit; |
1820 | 1861 | ||
1821 | udp_table_init(&udp_table); | 1862 | udp_table_init(&udp_table, "UDP"); |
1822 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 1863 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1823 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 1864 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1824 | * toward zero with the amount of memory, with a floor of 128 pages. | 1865 | * toward zero with the amount of memory, with a floor of 128 pages. |