aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c207
1 files changed, 123 insertions, 84 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2095abc3caba..2a6c491f97d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -104,12 +104,8 @@
104#include <net/xfrm.h> 104#include <net/xfrm.h>
105#include "udp_impl.h" 105#include "udp_impl.h"
106 106
107/* 107struct udp_table udp_table;
108 * Snmp MIB for the UDP layer 108EXPORT_SYMBOL(udp_table);
109 */
110
111struct hlist_head udp_hash[UDP_HTABLE_SIZE];
112DEFINE_RWLOCK(udp_hash_lock);
113 109
114int sysctl_udp_mem[3] __read_mostly; 110int sysctl_udp_mem[3] __read_mostly;
115int sysctl_udp_rmem_min __read_mostly; 111int sysctl_udp_rmem_min __read_mostly;
@@ -123,7 +119,7 @@ atomic_t udp_memory_allocated;
123EXPORT_SYMBOL(udp_memory_allocated); 119EXPORT_SYMBOL(udp_memory_allocated);
124 120
125static int udp_lib_lport_inuse(struct net *net, __u16 num, 121static int udp_lib_lport_inuse(struct net *net, __u16 num,
126 const struct hlist_head udptable[], 122 const struct udp_hslot *hslot,
127 struct sock *sk, 123 struct sock *sk,
128 int (*saddr_comp)(const struct sock *sk1, 124 int (*saddr_comp)(const struct sock *sk1,
129 const struct sock *sk2)) 125 const struct sock *sk2))
@@ -131,7 +127,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
131 struct sock *sk2; 127 struct sock *sk2;
132 struct hlist_node *node; 128 struct hlist_node *node;
133 129
134 sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) 130 sk_for_each(sk2, node, &hslot->head)
135 if (net_eq(sock_net(sk2), net) && 131 if (net_eq(sock_net(sk2), net) &&
136 sk2 != sk && 132 sk2 != sk &&
137 sk2->sk_hash == num && 133 sk2->sk_hash == num &&
@@ -154,12 +150,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
154 int (*saddr_comp)(const struct sock *sk1, 150 int (*saddr_comp)(const struct sock *sk1,
155 const struct sock *sk2 ) ) 151 const struct sock *sk2 ) )
156{ 152{
157 struct hlist_head *udptable = sk->sk_prot->h.udp_hash; 153 struct udp_hslot *hslot;
154 struct udp_table *udptable = sk->sk_prot->h.udp_table;
158 int error = 1; 155 int error = 1;
159 struct net *net = sock_net(sk); 156 struct net *net = sock_net(sk);
160 157
161 write_lock_bh(&udp_hash_lock);
162
163 if (!snum) { 158 if (!snum) {
164 int low, high, remaining; 159 int low, high, remaining;
165 unsigned rand; 160 unsigned rand;
@@ -171,26 +166,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
171 rand = net_random(); 166 rand = net_random();
172 snum = first = rand % remaining + low; 167 snum = first = rand % remaining + low;
173 rand |= 1; 168 rand |= 1;
174 while (udp_lib_lport_inuse(net, snum, udptable, sk, 169 for (;;) {
175 saddr_comp)) { 170 hslot = &udptable->hash[udp_hashfn(net, snum)];
171 spin_lock_bh(&hslot->lock);
172 if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
173 break;
174 spin_unlock_bh(&hslot->lock);
176 do { 175 do {
177 snum = snum + rand; 176 snum = snum + rand;
178 } while (snum < low || snum > high); 177 } while (snum < low || snum > high);
179 if (snum == first) 178 if (snum == first)
180 goto fail; 179 goto fail;
181 } 180 }
182 } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) 181 } else {
183 goto fail; 182 hslot = &udptable->hash[udp_hashfn(net, snum)];
184 183 spin_lock_bh(&hslot->lock);
184 if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
185 goto fail_unlock;
186 }
185 inet_sk(sk)->num = snum; 187 inet_sk(sk)->num = snum;
186 sk->sk_hash = snum; 188 sk->sk_hash = snum;
187 if (sk_unhashed(sk)) { 189 if (sk_unhashed(sk)) {
188 sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); 190 sk_add_node(sk, &hslot->head);
189 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 191 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
190 } 192 }
191 error = 0; 193 error = 0;
194fail_unlock:
195 spin_unlock_bh(&hslot->lock);
192fail: 196fail:
193 write_unlock_bh(&udp_hash_lock);
194 return error; 197 return error;
195} 198}
196 199
@@ -208,63 +211,73 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
208 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); 211 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
209} 212}
210 213
214static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
215 unsigned short hnum,
216 __be16 sport, __be32 daddr, __be16 dport, int dif)
217{
218 int score = -1;
219
220 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
221 !ipv6_only_sock(sk)) {
222 struct inet_sock *inet = inet_sk(sk);
223
224 score = (sk->sk_family == PF_INET ? 1 : 0);
225 if (inet->rcv_saddr) {
226 if (inet->rcv_saddr != daddr)
227 return -1;
228 score += 2;
229 }
230 if (inet->daddr) {
231 if (inet->daddr != saddr)
232 return -1;
233 score += 2;
234 }
235 if (inet->dport) {
236 if (inet->dport != sport)
237 return -1;
238 score += 2;
239 }
240 if (sk->sk_bound_dev_if) {
241 if (sk->sk_bound_dev_if != dif)
242 return -1;
243 score += 2;
244 }
245 }
246 return score;
247}
248
211/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 249/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
212 * harder than this. -DaveM 250 * harder than this. -DaveM
213 */ 251 */
214static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 252static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
215 __be16 sport, __be32 daddr, __be16 dport, 253 __be16 sport, __be32 daddr, __be16 dport,
216 int dif, struct hlist_head udptable[]) 254 int dif, struct udp_table *udptable)
217{ 255{
218 struct sock *sk, *result = NULL; 256 struct sock *sk, *result = NULL;
219 struct hlist_node *node; 257 struct hlist_node *node;
220 unsigned short hnum = ntohs(dport); 258 unsigned short hnum = ntohs(dport);
221 int badness = -1; 259 unsigned int hash = udp_hashfn(net, hnum);
222 260 struct udp_hslot *hslot = &udptable->hash[hash];
223 read_lock(&udp_hash_lock); 261 int score, badness = -1;
224 sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { 262
225 struct inet_sock *inet = inet_sk(sk); 263 spin_lock(&hslot->lock);
226 264 sk_for_each(sk, node, &hslot->head) {
227 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 265 score = compute_score(sk, net, saddr, hnum, sport,
228 !ipv6_only_sock(sk)) { 266 daddr, dport, dif);
229 int score = (sk->sk_family == PF_INET ? 1 : 0); 267 if (score > badness) {
230 if (inet->rcv_saddr) { 268 result = sk;
231 if (inet->rcv_saddr != daddr) 269 badness = score;
232 continue;
233 score+=2;
234 }
235 if (inet->daddr) {
236 if (inet->daddr != saddr)
237 continue;
238 score+=2;
239 }
240 if (inet->dport) {
241 if (inet->dport != sport)
242 continue;
243 score+=2;
244 }
245 if (sk->sk_bound_dev_if) {
246 if (sk->sk_bound_dev_if != dif)
247 continue;
248 score+=2;
249 }
250 if (score == 9) {
251 result = sk;
252 break;
253 } else if (score > badness) {
254 result = sk;
255 badness = score;
256 }
257 } 270 }
258 } 271 }
259 if (result) 272 if (result)
260 sock_hold(result); 273 sock_hold(result);
261 read_unlock(&udp_hash_lock); 274 spin_unlock(&hslot->lock);
262 return result; 275 return result;
263} 276}
264 277
265static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 278static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
266 __be16 sport, __be16 dport, 279 __be16 sport, __be16 dport,
267 struct hlist_head udptable[]) 280 struct udp_table *udptable)
268{ 281{
269 struct sock *sk; 282 struct sock *sk;
270 const struct iphdr *iph = ip_hdr(skb); 283 const struct iphdr *iph = ip_hdr(skb);
@@ -280,7 +293,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
280struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 293struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
281 __be32 daddr, __be16 dport, int dif) 294 __be32 daddr, __be16 dport, int dif)
282{ 295{
283 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); 296 return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
284} 297}
285EXPORT_SYMBOL_GPL(udp4_lib_lookup); 298EXPORT_SYMBOL_GPL(udp4_lib_lookup);
286 299
@@ -323,7 +336,7 @@ found:
323 * to find the appropriate port. 336 * to find the appropriate port.
324 */ 337 */
325 338
326void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) 339void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
327{ 340{
328 struct inet_sock *inet; 341 struct inet_sock *inet;
329 struct iphdr *iph = (struct iphdr*)skb->data; 342 struct iphdr *iph = (struct iphdr*)skb->data;
@@ -392,7 +405,7 @@ out:
392 405
393void udp_err(struct sk_buff *skb, u32 info) 406void udp_err(struct sk_buff *skb, u32 info)
394{ 407{
395 __udp4_lib_err(skb, info, udp_hash); 408 __udp4_lib_err(skb, info, &udp_table);
396} 409}
397 410
398/* 411/*
@@ -933,6 +946,21 @@ int udp_disconnect(struct sock *sk, int flags)
933 return 0; 946 return 0;
934} 947}
935 948
949void udp_lib_unhash(struct sock *sk)
950{
951 struct udp_table *udptable = sk->sk_prot->h.udp_table;
952 unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
953 struct udp_hslot *hslot = &udptable->hash[hash];
954
955 spin_lock(&hslot->lock);
956 if (sk_del_node_init(sk)) {
957 inet_sk(sk)->num = 0;
958 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
959 }
960 spin_unlock(&hslot->lock);
961}
962EXPORT_SYMBOL(udp_lib_unhash);
963
936static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 964static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
937{ 965{
938 int is_udplite = IS_UDPLITE(sk); 966 int is_udplite = IS_UDPLITE(sk);
@@ -1071,13 +1099,14 @@ drop:
1071static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1099static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1072 struct udphdr *uh, 1100 struct udphdr *uh,
1073 __be32 saddr, __be32 daddr, 1101 __be32 saddr, __be32 daddr,
1074 struct hlist_head udptable[]) 1102 struct udp_table *udptable)
1075{ 1103{
1076 struct sock *sk; 1104 struct sock *sk;
1105 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
1077 int dif; 1106 int dif;
1078 1107
1079 read_lock(&udp_hash_lock); 1108 spin_lock(&hslot->lock);
1080 sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); 1109 sk = sk_head(&hslot->head);
1081 dif = skb->dev->ifindex; 1110 dif = skb->dev->ifindex;
1082 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 1111 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1083 if (sk) { 1112 if (sk) {
@@ -1102,7 +1131,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1102 } while (sknext); 1131 } while (sknext);
1103 } else 1132 } else
1104 kfree_skb(skb); 1133 kfree_skb(skb);
1105 read_unlock(&udp_hash_lock); 1134 spin_unlock(&hslot->lock);
1106 return 0; 1135 return 0;
1107} 1136}
1108 1137
@@ -1148,7 +1177,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1148 * All we need to do is get the socket, and then do a checksum. 1177 * All we need to do is get the socket, and then do a checksum.
1149 */ 1178 */
1150 1179
1151int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], 1180int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1152 int proto) 1181 int proto)
1153{ 1182{
1154 struct sock *sk; 1183 struct sock *sk;
@@ -1246,7 +1275,7 @@ drop:
1246 1275
1247int udp_rcv(struct sk_buff *skb) 1276int udp_rcv(struct sk_buff *skb)
1248{ 1277{
1249 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); 1278 return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
1250} 1279}
1251 1280
1252void udp_destroy_sock(struct sock *sk) 1281void udp_destroy_sock(struct sock *sk)
@@ -1488,7 +1517,7 @@ struct proto udp_prot = {
1488 .sysctl_wmem = &sysctl_udp_wmem_min, 1517 .sysctl_wmem = &sysctl_udp_wmem_min,
1489 .sysctl_rmem = &sysctl_udp_rmem_min, 1518 .sysctl_rmem = &sysctl_udp_rmem_min,
1490 .obj_size = sizeof(struct udp_sock), 1519 .obj_size = sizeof(struct udp_sock),
1491 .h.udp_hash = udp_hash, 1520 .h.udp_table = &udp_table,
1492#ifdef CONFIG_COMPAT 1521#ifdef CONFIG_COMPAT
1493 .compat_setsockopt = compat_udp_setsockopt, 1522 .compat_setsockopt = compat_udp_setsockopt,
1494 .compat_getsockopt = compat_udp_getsockopt, 1523 .compat_getsockopt = compat_udp_getsockopt,
@@ -1498,20 +1527,23 @@ struct proto udp_prot = {
1498/* ------------------------------------------------------------------------ */ 1527/* ------------------------------------------------------------------------ */
1499#ifdef CONFIG_PROC_FS 1528#ifdef CONFIG_PROC_FS
1500 1529
1501static struct sock *udp_get_first(struct seq_file *seq) 1530static struct sock *udp_get_first(struct seq_file *seq, int start)
1502{ 1531{
1503 struct sock *sk; 1532 struct sock *sk;
1504 struct udp_iter_state *state = seq->private; 1533 struct udp_iter_state *state = seq->private;
1505 struct net *net = seq_file_net(seq); 1534 struct net *net = seq_file_net(seq);
1506 1535
1507 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1536 for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1508 struct hlist_node *node; 1537 struct hlist_node *node;
1509 sk_for_each(sk, node, state->hashtable + state->bucket) { 1538 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
1539 spin_lock_bh(&hslot->lock);
1540 sk_for_each(sk, node, &hslot->head) {
1510 if (!net_eq(sock_net(sk), net)) 1541 if (!net_eq(sock_net(sk), net))
1511 continue; 1542 continue;
1512 if (sk->sk_family == state->family) 1543 if (sk->sk_family == state->family)
1513 goto found; 1544 goto found;
1514 } 1545 }
1546 spin_unlock_bh(&hslot->lock);
1515 } 1547 }
1516 sk = NULL; 1548 sk = NULL;
1517found: 1549found:
@@ -1525,20 +1557,18 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1525 1557
1526 do { 1558 do {
1527 sk = sk_next(sk); 1559 sk = sk_next(sk);
1528try_again:
1529 ;
1530 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1560 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1531 1561
1532 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { 1562 if (!sk) {
1533 sk = sk_head(state->hashtable + state->bucket); 1563 spin_unlock(&state->udp_table->hash[state->bucket].lock);
1534 goto try_again; 1564 return udp_get_first(seq, state->bucket + 1);
1535 } 1565 }
1536 return sk; 1566 return sk;
1537} 1567}
1538 1568
1539static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) 1569static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1540{ 1570{
1541 struct sock *sk = udp_get_first(seq); 1571 struct sock *sk = udp_get_first(seq, 0);
1542 1572
1543 if (sk) 1573 if (sk)
1544 while (pos && (sk = udp_get_next(seq, sk)) != NULL) 1574 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
@@ -1547,9 +1577,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1547} 1577}
1548 1578
1549static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1579static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1550 __acquires(udp_hash_lock)
1551{ 1580{
1552 read_lock(&udp_hash_lock);
1553 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1581 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1554} 1582}
1555 1583
@@ -1567,9 +1595,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1567} 1595}
1568 1596
1569static void udp_seq_stop(struct seq_file *seq, void *v) 1597static void udp_seq_stop(struct seq_file *seq, void *v)
1570 __releases(udp_hash_lock)
1571{ 1598{
1572 read_unlock(&udp_hash_lock); 1599 struct udp_iter_state *state = seq->private;
1600
1601 if (state->bucket < UDP_HTABLE_SIZE)
1602 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1573} 1603}
1574 1604
1575static int udp_seq_open(struct inode *inode, struct file *file) 1605static int udp_seq_open(struct inode *inode, struct file *file)
@@ -1585,7 +1615,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
1585 1615
1586 s = ((struct seq_file *)file->private_data)->private; 1616 s = ((struct seq_file *)file->private_data)->private;
1587 s->family = afinfo->family; 1617 s->family = afinfo->family;
1588 s->hashtable = afinfo->hashtable; 1618 s->udp_table = afinfo->udp_table;
1589 return err; 1619 return err;
1590} 1620}
1591 1621
@@ -1657,7 +1687,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1657static struct udp_seq_afinfo udp4_seq_afinfo = { 1687static struct udp_seq_afinfo udp4_seq_afinfo = {
1658 .name = "udp", 1688 .name = "udp",
1659 .family = AF_INET, 1689 .family = AF_INET,
1660 .hashtable = udp_hash, 1690 .udp_table = &udp_table,
1661 .seq_fops = { 1691 .seq_fops = {
1662 .owner = THIS_MODULE, 1692 .owner = THIS_MODULE,
1663 }, 1693 },
@@ -1692,10 +1722,21 @@ void udp4_proc_exit(void)
1692} 1722}
1693#endif /* CONFIG_PROC_FS */ 1723#endif /* CONFIG_PROC_FS */
1694 1724
1725void __init udp_table_init(struct udp_table *table)
1726{
1727 int i;
1728
1729 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1730 INIT_HLIST_HEAD(&table->hash[i].head);
1731 spin_lock_init(&table->hash[i].lock);
1732 }
1733}
1734
1695void __init udp_init(void) 1735void __init udp_init(void)
1696{ 1736{
1697 unsigned long limit; 1737 unsigned long limit;
1698 1738
1739 udp_table_init(&udp_table);
1699 /* Set the pressure threshold up by the same strategy of TCP. It is a 1740 /* Set the pressure threshold up by the same strategy of TCP. It is a
1700 * fraction of global memory that is up to 1/2 at 256 MB, decreasing 1741 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1701 * toward zero with the amount of memory, with a floor of 128 pages. 1742 * toward zero with the amount of memory, with a floor of 128 pages.
@@ -1712,8 +1753,6 @@ void __init udp_init(void)
1712} 1753}
1713 1754
1714EXPORT_SYMBOL(udp_disconnect); 1755EXPORT_SYMBOL(udp_disconnect);
1715EXPORT_SYMBOL(udp_hash);
1716EXPORT_SYMBOL(udp_hash_lock);
1717EXPORT_SYMBOL(udp_ioctl); 1756EXPORT_SYMBOL(udp_ioctl);
1718EXPORT_SYMBOL(udp_prot); 1757EXPORT_SYMBOL(udp_prot);
1719EXPORT_SYMBOL(udp_sendmsg); 1758EXPORT_SYMBOL(udp_sendmsg);