diff options
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 267 |
1 files changed, 164 insertions, 103 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 98c1fd09be88..da869ce041d9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -81,6 +81,8 @@ | |||
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | #include <asm/ioctls.h> | 82 | #include <asm/ioctls.h> |
83 | #include <linux/bootmem.h> | 83 | #include <linux/bootmem.h> |
84 | #include <linux/highmem.h> | ||
85 | #include <linux/swap.h> | ||
84 | #include <linux/types.h> | 86 | #include <linux/types.h> |
85 | #include <linux/fcntl.h> | 87 | #include <linux/fcntl.h> |
86 | #include <linux/module.h> | 88 | #include <linux/module.h> |
@@ -104,12 +106,8 @@ | |||
104 | #include <net/xfrm.h> | 106 | #include <net/xfrm.h> |
105 | #include "udp_impl.h" | 107 | #include "udp_impl.h" |
106 | 108 | ||
107 | /* | 109 | struct udp_table udp_table; |
108 | * Snmp MIB for the UDP layer | 110 | EXPORT_SYMBOL(udp_table); |
109 | */ | ||
110 | |||
111 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | ||
112 | DEFINE_RWLOCK(udp_hash_lock); | ||
113 | 111 | ||
114 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
115 | int sysctl_udp_rmem_min __read_mostly; | 113 | int sysctl_udp_rmem_min __read_mostly; |
@@ -123,15 +121,15 @@ atomic_t udp_memory_allocated; | |||
123 | EXPORT_SYMBOL(udp_memory_allocated); | 121 | EXPORT_SYMBOL(udp_memory_allocated); |
124 | 122 | ||
125 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 123 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
126 | const struct hlist_head udptable[], | 124 | const struct udp_hslot *hslot, |
127 | struct sock *sk, | 125 | struct sock *sk, |
128 | int (*saddr_comp)(const struct sock *sk1, | 126 | int (*saddr_comp)(const struct sock *sk1, |
129 | const struct sock *sk2)) | 127 | const struct sock *sk2)) |
130 | { | 128 | { |
131 | struct sock *sk2; | 129 | struct sock *sk2; |
132 | struct hlist_node *node; | 130 | struct hlist_nulls_node *node; |
133 | 131 | ||
134 | sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) | 132 | sk_nulls_for_each(sk2, node, &hslot->head) |
135 | if (net_eq(sock_net(sk2), net) && | 133 | if (net_eq(sock_net(sk2), net) && |
136 | sk2 != sk && | 134 | sk2 != sk && |
137 | sk2->sk_hash == num && | 135 | sk2->sk_hash == num && |
@@ -154,12 +152,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
154 | int (*saddr_comp)(const struct sock *sk1, | 152 | int (*saddr_comp)(const struct sock *sk1, |
155 | const struct sock *sk2 ) ) | 153 | const struct sock *sk2 ) ) |
156 | { | 154 | { |
157 | struct hlist_head *udptable = sk->sk_prot->h.udp_hash; | 155 | struct udp_hslot *hslot; |
156 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | ||
158 | int error = 1; | 157 | int error = 1; |
159 | struct net *net = sock_net(sk); | 158 | struct net *net = sock_net(sk); |
160 | 159 | ||
161 | write_lock_bh(&udp_hash_lock); | ||
162 | |||
163 | if (!snum) { | 160 | if (!snum) { |
164 | int low, high, remaining; | 161 | int low, high, remaining; |
165 | unsigned rand; | 162 | unsigned rand; |
@@ -171,26 +168,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
171 | rand = net_random(); | 168 | rand = net_random(); |
172 | snum = first = rand % remaining + low; | 169 | snum = first = rand % remaining + low; |
173 | rand |= 1; | 170 | rand |= 1; |
174 | while (udp_lib_lport_inuse(net, snum, udptable, sk, | 171 | for (;;) { |
175 | saddr_comp)) { | 172 | hslot = &udptable->hash[udp_hashfn(net, snum)]; |
173 | spin_lock_bh(&hslot->lock); | ||
174 | if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | ||
175 | break; | ||
176 | spin_unlock_bh(&hslot->lock); | ||
176 | do { | 177 | do { |
177 | snum = snum + rand; | 178 | snum = snum + rand; |
178 | } while (snum < low || snum > high); | 179 | } while (snum < low || snum > high); |
179 | if (snum == first) | 180 | if (snum == first) |
180 | goto fail; | 181 | goto fail; |
181 | } | 182 | } |
182 | } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) | 183 | } else { |
183 | goto fail; | 184 | hslot = &udptable->hash[udp_hashfn(net, snum)]; |
184 | 185 | spin_lock_bh(&hslot->lock); | |
186 | if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) | ||
187 | goto fail_unlock; | ||
188 | } | ||
185 | inet_sk(sk)->num = snum; | 189 | inet_sk(sk)->num = snum; |
186 | sk->sk_hash = snum; | 190 | sk->sk_hash = snum; |
187 | if (sk_unhashed(sk)) { | 191 | if (sk_unhashed(sk)) { |
188 | sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); | 192 | sk_nulls_add_node_rcu(sk, &hslot->head); |
189 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 193 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
190 | } | 194 | } |
191 | error = 0; | 195 | error = 0; |
196 | fail_unlock: | ||
197 | spin_unlock_bh(&hslot->lock); | ||
192 | fail: | 198 | fail: |
193 | write_unlock_bh(&udp_hash_lock); | ||
194 | return error; | 199 | return error; |
195 | } | 200 | } |
196 | 201 | ||
@@ -208,63 +213,91 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
208 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); | 213 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); |
209 | } | 214 | } |
210 | 215 | ||
216 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | ||
217 | unsigned short hnum, | ||
218 | __be16 sport, __be32 daddr, __be16 dport, int dif) | ||
219 | { | ||
220 | int score = -1; | ||
221 | |||
222 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | ||
223 | !ipv6_only_sock(sk)) { | ||
224 | struct inet_sock *inet = inet_sk(sk); | ||
225 | |||
226 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
227 | if (inet->rcv_saddr) { | ||
228 | if (inet->rcv_saddr != daddr) | ||
229 | return -1; | ||
230 | score += 2; | ||
231 | } | ||
232 | if (inet->daddr) { | ||
233 | if (inet->daddr != saddr) | ||
234 | return -1; | ||
235 | score += 2; | ||
236 | } | ||
237 | if (inet->dport) { | ||
238 | if (inet->dport != sport) | ||
239 | return -1; | ||
240 | score += 2; | ||
241 | } | ||
242 | if (sk->sk_bound_dev_if) { | ||
243 | if (sk->sk_bound_dev_if != dif) | ||
244 | return -1; | ||
245 | score += 2; | ||
246 | } | ||
247 | } | ||
248 | return score; | ||
249 | } | ||
250 | |||
211 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 251 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
212 | * harder than this. -DaveM | 252 | * harder than this. -DaveM |
213 | */ | 253 | */ |
214 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | 254 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, |
215 | __be16 sport, __be32 daddr, __be16 dport, | 255 | __be16 sport, __be32 daddr, __be16 dport, |
216 | int dif, struct hlist_head udptable[]) | 256 | int dif, struct udp_table *udptable) |
217 | { | 257 | { |
218 | struct sock *sk, *result = NULL; | 258 | struct sock *sk, *result; |
219 | struct hlist_node *node; | 259 | struct hlist_nulls_node *node; |
220 | unsigned short hnum = ntohs(dport); | 260 | unsigned short hnum = ntohs(dport); |
221 | int badness = -1; | 261 | unsigned int hash = udp_hashfn(net, hnum); |
222 | 262 | struct udp_hslot *hslot = &udptable->hash[hash]; | |
223 | read_lock(&udp_hash_lock); | 263 | int score, badness; |
224 | sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { | 264 | |
225 | struct inet_sock *inet = inet_sk(sk); | 265 | rcu_read_lock(); |
226 | 266 | begin: | |
227 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 267 | result = NULL; |
228 | !ipv6_only_sock(sk)) { | 268 | badness = -1; |
229 | int score = (sk->sk_family == PF_INET ? 1 : 0); | 269 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
230 | if (inet->rcv_saddr) { | 270 | score = compute_score(sk, net, saddr, hnum, sport, |
231 | if (inet->rcv_saddr != daddr) | 271 | daddr, dport, dif); |
232 | continue; | 272 | if (score > badness) { |
233 | score+=2; | 273 | result = sk; |
234 | } | 274 | badness = score; |
235 | if (inet->daddr) { | ||
236 | if (inet->daddr != saddr) | ||
237 | continue; | ||
238 | score+=2; | ||
239 | } | ||
240 | if (inet->dport) { | ||
241 | if (inet->dport != sport) | ||
242 | continue; | ||
243 | score+=2; | ||
244 | } | ||
245 | if (sk->sk_bound_dev_if) { | ||
246 | if (sk->sk_bound_dev_if != dif) | ||
247 | continue; | ||
248 | score+=2; | ||
249 | } | ||
250 | if (score == 9) { | ||
251 | result = sk; | ||
252 | break; | ||
253 | } else if (score > badness) { | ||
254 | result = sk; | ||
255 | badness = score; | ||
256 | } | ||
257 | } | 275 | } |
258 | } | 276 | } |
259 | if (result) | 277 | /* |
260 | sock_hold(result); | 278 | * if the nulls value we got at the end of this lookup is |
261 | read_unlock(&udp_hash_lock); | 279 | * not the expected one, we must restart lookup. |
280 | * We probably met an item that was moved to another chain. | ||
281 | */ | ||
282 | if (get_nulls_value(node) != hash) | ||
283 | goto begin; | ||
284 | |||
285 | if (result) { | ||
286 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | ||
287 | result = NULL; | ||
288 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, | ||
289 | daddr, dport, dif) < badness)) { | ||
290 | sock_put(result); | ||
291 | goto begin; | ||
292 | } | ||
293 | } | ||
294 | rcu_read_unlock(); | ||
262 | return result; | 295 | return result; |
263 | } | 296 | } |
264 | 297 | ||
265 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | 298 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, |
266 | __be16 sport, __be16 dport, | 299 | __be16 sport, __be16 dport, |
267 | struct hlist_head udptable[]) | 300 | struct udp_table *udptable) |
268 | { | 301 | { |
269 | struct sock *sk; | 302 | struct sock *sk; |
270 | const struct iphdr *iph = ip_hdr(skb); | 303 | const struct iphdr *iph = ip_hdr(skb); |
@@ -280,7 +313,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | |||
280 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, | 313 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, |
281 | __be32 daddr, __be16 dport, int dif) | 314 | __be32 daddr, __be16 dport, int dif) |
282 | { | 315 | { |
283 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); | 316 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); |
284 | } | 317 | } |
285 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); | 318 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); |
286 | 319 | ||
@@ -289,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | |||
289 | __be16 rmt_port, __be32 rmt_addr, | 322 | __be16 rmt_port, __be32 rmt_addr, |
290 | int dif) | 323 | int dif) |
291 | { | 324 | { |
292 | struct hlist_node *node; | 325 | struct hlist_nulls_node *node; |
293 | struct sock *s = sk; | 326 | struct sock *s = sk; |
294 | unsigned short hnum = ntohs(loc_port); | 327 | unsigned short hnum = ntohs(loc_port); |
295 | 328 | ||
296 | sk_for_each_from(s, node) { | 329 | sk_nulls_for_each_from(s, node) { |
297 | struct inet_sock *inet = inet_sk(s); | 330 | struct inet_sock *inet = inet_sk(s); |
298 | 331 | ||
299 | if (!net_eq(sock_net(s), net) || | 332 | if (!net_eq(sock_net(s), net) || |
@@ -324,7 +357,7 @@ found: | |||
324 | * to find the appropriate port. | 357 | * to find the appropriate port. |
325 | */ | 358 | */ |
326 | 359 | ||
327 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | 360 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
328 | { | 361 | { |
329 | struct inet_sock *inet; | 362 | struct inet_sock *inet; |
330 | struct iphdr *iph = (struct iphdr*)skb->data; | 363 | struct iphdr *iph = (struct iphdr*)skb->data; |
@@ -393,7 +426,7 @@ out: | |||
393 | 426 | ||
394 | void udp_err(struct sk_buff *skb, u32 info) | 427 | void udp_err(struct sk_buff *skb, u32 info) |
395 | { | 428 | { |
396 | __udp4_lib_err(skb, info, udp_hash); | 429 | __udp4_lib_err(skb, info, &udp_table); |
397 | } | 430 | } |
398 | 431 | ||
399 | /* | 432 | /* |
@@ -935,6 +968,21 @@ int udp_disconnect(struct sock *sk, int flags) | |||
935 | return 0; | 968 | return 0; |
936 | } | 969 | } |
937 | 970 | ||
971 | void udp_lib_unhash(struct sock *sk) | ||
972 | { | ||
973 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | ||
974 | unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); | ||
975 | struct udp_hslot *hslot = &udptable->hash[hash]; | ||
976 | |||
977 | spin_lock_bh(&hslot->lock); | ||
978 | if (sk_nulls_del_node_init_rcu(sk)) { | ||
979 | inet_sk(sk)->num = 0; | ||
980 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
981 | } | ||
982 | spin_unlock_bh(&hslot->lock); | ||
983 | } | ||
984 | EXPORT_SYMBOL(udp_lib_unhash); | ||
985 | |||
938 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 986 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
939 | { | 987 | { |
940 | int is_udplite = IS_UDPLITE(sk); | 988 | int is_udplite = IS_UDPLITE(sk); |
@@ -1073,13 +1121,14 @@ drop: | |||
1073 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1121 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1074 | struct udphdr *uh, | 1122 | struct udphdr *uh, |
1075 | __be32 saddr, __be32 daddr, | 1123 | __be32 saddr, __be32 daddr, |
1076 | struct hlist_head udptable[]) | 1124 | struct udp_table *udptable) |
1077 | { | 1125 | { |
1078 | struct sock *sk; | 1126 | struct sock *sk; |
1127 | struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; | ||
1079 | int dif; | 1128 | int dif; |
1080 | 1129 | ||
1081 | read_lock(&udp_hash_lock); | 1130 | spin_lock(&hslot->lock); |
1082 | sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); | 1131 | sk = sk_nulls_head(&hslot->head); |
1083 | dif = skb->dev->ifindex; | 1132 | dif = skb->dev->ifindex; |
1084 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1133 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1085 | if (sk) { | 1134 | if (sk) { |
@@ -1088,7 +1137,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1088 | do { | 1137 | do { |
1089 | struct sk_buff *skb1 = skb; | 1138 | struct sk_buff *skb1 = skb; |
1090 | 1139 | ||
1091 | sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, | 1140 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, |
1092 | daddr, uh->source, saddr, | 1141 | daddr, uh->source, saddr, |
1093 | dif); | 1142 | dif); |
1094 | if (sknext) | 1143 | if (sknext) |
@@ -1105,7 +1154,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1105 | } while (sknext); | 1154 | } while (sknext); |
1106 | } else | 1155 | } else |
1107 | kfree_skb(skb); | 1156 | kfree_skb(skb); |
1108 | read_unlock(&udp_hash_lock); | 1157 | spin_unlock(&hslot->lock); |
1109 | return 0; | 1158 | return 0; |
1110 | } | 1159 | } |
1111 | 1160 | ||
@@ -1151,7 +1200,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1151 | * All we need to do is get the socket, and then do a checksum. | 1200 | * All we need to do is get the socket, and then do a checksum. |
1152 | */ | 1201 | */ |
1153 | 1202 | ||
1154 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | 1203 | int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, |
1155 | int proto) | 1204 | int proto) |
1156 | { | 1205 | { |
1157 | struct sock *sk; | 1206 | struct sock *sk; |
@@ -1219,13 +1268,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1219 | return 0; | 1268 | return 0; |
1220 | 1269 | ||
1221 | short_packet: | 1270 | short_packet: |
1222 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n", | 1271 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", |
1223 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1272 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1224 | NIPQUAD(saddr), | 1273 | &saddr, |
1225 | ntohs(uh->source), | 1274 | ntohs(uh->source), |
1226 | ulen, | 1275 | ulen, |
1227 | skb->len, | 1276 | skb->len, |
1228 | NIPQUAD(daddr), | 1277 | &daddr, |
1229 | ntohs(uh->dest)); | 1278 | ntohs(uh->dest)); |
1230 | goto drop; | 1279 | goto drop; |
1231 | 1280 | ||
@@ -1234,11 +1283,11 @@ csum_error: | |||
1234 | * RFC1122: OK. Discards the bad packet silently (as far as | 1283 | * RFC1122: OK. Discards the bad packet silently (as far as |
1235 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1284 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1236 | */ | 1285 | */ |
1237 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n", | 1286 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", |
1238 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1287 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1239 | NIPQUAD(saddr), | 1288 | &saddr, |
1240 | ntohs(uh->source), | 1289 | ntohs(uh->source), |
1241 | NIPQUAD(daddr), | 1290 | &daddr, |
1242 | ntohs(uh->dest), | 1291 | ntohs(uh->dest), |
1243 | ulen); | 1292 | ulen); |
1244 | drop: | 1293 | drop: |
@@ -1249,7 +1298,7 @@ drop: | |||
1249 | 1298 | ||
1250 | int udp_rcv(struct sk_buff *skb) | 1299 | int udp_rcv(struct sk_buff *skb) |
1251 | { | 1300 | { |
1252 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | 1301 | return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); |
1253 | } | 1302 | } |
1254 | 1303 | ||
1255 | void udp_destroy_sock(struct sock *sk) | 1304 | void udp_destroy_sock(struct sock *sk) |
@@ -1491,7 +1540,8 @@ struct proto udp_prot = { | |||
1491 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1540 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1492 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1541 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1493 | .obj_size = sizeof(struct udp_sock), | 1542 | .obj_size = sizeof(struct udp_sock), |
1494 | .h.udp_hash = udp_hash, | 1543 | .slab_flags = SLAB_DESTROY_BY_RCU, |
1544 | .h.udp_table = &udp_table, | ||
1495 | #ifdef CONFIG_COMPAT | 1545 | #ifdef CONFIG_COMPAT |
1496 | .compat_setsockopt = compat_udp_setsockopt, | 1546 | .compat_setsockopt = compat_udp_setsockopt, |
1497 | .compat_getsockopt = compat_udp_getsockopt, | 1547 | .compat_getsockopt = compat_udp_getsockopt, |
@@ -1501,20 +1551,23 @@ struct proto udp_prot = { | |||
1501 | /* ------------------------------------------------------------------------ */ | 1551 | /* ------------------------------------------------------------------------ */ |
1502 | #ifdef CONFIG_PROC_FS | 1552 | #ifdef CONFIG_PROC_FS |
1503 | 1553 | ||
1504 | static struct sock *udp_get_first(struct seq_file *seq) | 1554 | static struct sock *udp_get_first(struct seq_file *seq, int start) |
1505 | { | 1555 | { |
1506 | struct sock *sk; | 1556 | struct sock *sk; |
1507 | struct udp_iter_state *state = seq->private; | 1557 | struct udp_iter_state *state = seq->private; |
1508 | struct net *net = seq_file_net(seq); | 1558 | struct net *net = seq_file_net(seq); |
1509 | 1559 | ||
1510 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1560 | for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1511 | struct hlist_node *node; | 1561 | struct hlist_nulls_node *node; |
1512 | sk_for_each(sk, node, state->hashtable + state->bucket) { | 1562 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1563 | spin_lock_bh(&hslot->lock); | ||
1564 | sk_nulls_for_each(sk, node, &hslot->head) { | ||
1513 | if (!net_eq(sock_net(sk), net)) | 1565 | if (!net_eq(sock_net(sk), net)) |
1514 | continue; | 1566 | continue; |
1515 | if (sk->sk_family == state->family) | 1567 | if (sk->sk_family == state->family) |
1516 | goto found; | 1568 | goto found; |
1517 | } | 1569 | } |
1570 | spin_unlock_bh(&hslot->lock); | ||
1518 | } | 1571 | } |
1519 | sk = NULL; | 1572 | sk = NULL; |
1520 | found: | 1573 | found: |
@@ -1527,21 +1580,19 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1527 | struct net *net = seq_file_net(seq); | 1580 | struct net *net = seq_file_net(seq); |
1528 | 1581 | ||
1529 | do { | 1582 | do { |
1530 | sk = sk_next(sk); | 1583 | sk = sk_nulls_next(sk); |
1531 | try_again: | ||
1532 | ; | ||
1533 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1584 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1534 | 1585 | ||
1535 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { | 1586 | if (!sk) { |
1536 | sk = sk_head(state->hashtable + state->bucket); | 1587 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1537 | goto try_again; | 1588 | return udp_get_first(seq, state->bucket + 1); |
1538 | } | 1589 | } |
1539 | return sk; | 1590 | return sk; |
1540 | } | 1591 | } |
1541 | 1592 | ||
1542 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | 1593 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) |
1543 | { | 1594 | { |
1544 | struct sock *sk = udp_get_first(seq); | 1595 | struct sock *sk = udp_get_first(seq, 0); |
1545 | 1596 | ||
1546 | if (sk) | 1597 | if (sk) |
1547 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) | 1598 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) |
@@ -1550,9 +1601,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1550 | } | 1601 | } |
1551 | 1602 | ||
1552 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1603 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1553 | __acquires(udp_hash_lock) | ||
1554 | { | 1604 | { |
1555 | read_lock(&udp_hash_lock); | ||
1556 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1605 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1557 | } | 1606 | } |
1558 | 1607 | ||
@@ -1570,9 +1619,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1570 | } | 1619 | } |
1571 | 1620 | ||
1572 | static void udp_seq_stop(struct seq_file *seq, void *v) | 1621 | static void udp_seq_stop(struct seq_file *seq, void *v) |
1573 | __releases(udp_hash_lock) | ||
1574 | { | 1622 | { |
1575 | read_unlock(&udp_hash_lock); | 1623 | struct udp_iter_state *state = seq->private; |
1624 | |||
1625 | if (state->bucket < UDP_HTABLE_SIZE) | ||
1626 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | ||
1576 | } | 1627 | } |
1577 | 1628 | ||
1578 | static int udp_seq_open(struct inode *inode, struct file *file) | 1629 | static int udp_seq_open(struct inode *inode, struct file *file) |
@@ -1588,7 +1639,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) | |||
1588 | 1639 | ||
1589 | s = ((struct seq_file *)file->private_data)->private; | 1640 | s = ((struct seq_file *)file->private_data)->private; |
1590 | s->family = afinfo->family; | 1641 | s->family = afinfo->family; |
1591 | s->hashtable = afinfo->hashtable; | 1642 | s->udp_table = afinfo->udp_table; |
1592 | return err; | 1643 | return err; |
1593 | } | 1644 | } |
1594 | 1645 | ||
@@ -1660,7 +1711,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) | |||
1660 | static struct udp_seq_afinfo udp4_seq_afinfo = { | 1711 | static struct udp_seq_afinfo udp4_seq_afinfo = { |
1661 | .name = "udp", | 1712 | .name = "udp", |
1662 | .family = AF_INET, | 1713 | .family = AF_INET, |
1663 | .hashtable = udp_hash, | 1714 | .udp_table = &udp_table, |
1664 | .seq_fops = { | 1715 | .seq_fops = { |
1665 | .owner = THIS_MODULE, | 1716 | .owner = THIS_MODULE, |
1666 | }, | 1717 | }, |
@@ -1695,16 +1746,28 @@ void udp4_proc_exit(void) | |||
1695 | } | 1746 | } |
1696 | #endif /* CONFIG_PROC_FS */ | 1747 | #endif /* CONFIG_PROC_FS */ |
1697 | 1748 | ||
1749 | void __init udp_table_init(struct udp_table *table) | ||
1750 | { | ||
1751 | int i; | ||
1752 | |||
1753 | for (i = 0; i < UDP_HTABLE_SIZE; i++) { | ||
1754 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | ||
1755 | spin_lock_init(&table->hash[i].lock); | ||
1756 | } | ||
1757 | } | ||
1758 | |||
1698 | void __init udp_init(void) | 1759 | void __init udp_init(void) |
1699 | { | 1760 | { |
1700 | unsigned long limit; | 1761 | unsigned long nr_pages, limit; |
1701 | 1762 | ||
1763 | udp_table_init(&udp_table); | ||
1702 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 1764 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1703 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 1765 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1704 | * toward zero with the amount of memory, with a floor of 128 pages. | 1766 | * toward zero with the amount of memory, with a floor of 128 pages. |
1705 | */ | 1767 | */ |
1706 | limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | 1768 | nr_pages = totalram_pages - totalhigh_pages; |
1707 | limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | 1769 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); |
1770 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
1708 | limit = max(limit, 128UL); | 1771 | limit = max(limit, 128UL); |
1709 | sysctl_udp_mem[0] = limit / 4 * 3; | 1772 | sysctl_udp_mem[0] = limit / 4 * 3; |
1710 | sysctl_udp_mem[1] = limit; | 1773 | sysctl_udp_mem[1] = limit; |
@@ -1715,8 +1778,6 @@ void __init udp_init(void) | |||
1715 | } | 1778 | } |
1716 | 1779 | ||
1717 | EXPORT_SYMBOL(udp_disconnect); | 1780 | EXPORT_SYMBOL(udp_disconnect); |
1718 | EXPORT_SYMBOL(udp_hash); | ||
1719 | EXPORT_SYMBOL(udp_hash_lock); | ||
1720 | EXPORT_SYMBOL(udp_ioctl); | 1781 | EXPORT_SYMBOL(udp_ioctl); |
1721 | EXPORT_SYMBOL(udp_prot); | 1782 | EXPORT_SYMBOL(udp_prot); |
1722 | EXPORT_SYMBOL(udp_sendmsg); | 1783 | EXPORT_SYMBOL(udp_sendmsg); |