diff options
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 558 |
1 files changed, 328 insertions, 230 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 865d75214a9a..035915fc9ed3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -92,22 +92,16 @@ | |||
92 | #include <linux/timer.h> | 92 | #include <linux/timer.h> |
93 | #include <linux/mm.h> | 93 | #include <linux/mm.h> |
94 | #include <linux/inet.h> | 94 | #include <linux/inet.h> |
95 | #include <linux/ipv6.h> | ||
96 | #include <linux/netdevice.h> | 95 | #include <linux/netdevice.h> |
97 | #include <net/snmp.h> | ||
98 | #include <net/ip.h> | ||
99 | #include <net/tcp_states.h> | 96 | #include <net/tcp_states.h> |
100 | #include <net/protocol.h> | ||
101 | #include <linux/skbuff.h> | 97 | #include <linux/skbuff.h> |
102 | #include <linux/proc_fs.h> | 98 | #include <linux/proc_fs.h> |
103 | #include <linux/seq_file.h> | 99 | #include <linux/seq_file.h> |
104 | #include <net/sock.h> | ||
105 | #include <net/udp.h> | ||
106 | #include <net/icmp.h> | 100 | #include <net/icmp.h> |
107 | #include <net/route.h> | 101 | #include <net/route.h> |
108 | #include <net/inet_common.h> | ||
109 | #include <net/checksum.h> | 102 | #include <net/checksum.h> |
110 | #include <net/xfrm.h> | 103 | #include <net/xfrm.h> |
104 | #include "udp_impl.h" | ||
111 | 105 | ||
112 | /* | 106 | /* |
113 | * Snmp MIB for the UDP layer | 107 | * Snmp MIB for the UDP layer |
@@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock); | |||
120 | 114 | ||
121 | static int udp_port_rover; | 115 | static int udp_port_rover; |
122 | 116 | ||
123 | static inline int udp_lport_inuse(u16 num) | 117 | static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) |
124 | { | 118 | { |
125 | struct sock *sk; | 119 | struct sock *sk; |
126 | struct hlist_node *node; | 120 | struct hlist_node *node; |
127 | 121 | ||
128 | sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) | 122 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) |
129 | if (inet_sk(sk)->num == num) | 123 | if (inet_sk(sk)->num == num) |
130 | return 1; | 124 | return 1; |
131 | return 0; | 125 | return 0; |
132 | } | 126 | } |
133 | 127 | ||
134 | /** | 128 | /** |
135 | * udp_get_port - common port lookup for IPv4 and IPv6 | 129 | * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
136 | * | 130 | * |
137 | * @sk: socket struct in question | 131 | * @sk: socket struct in question |
138 | * @snum: port number to look up | 132 | * @snum: port number to look up |
133 | * @udptable: hash list table, must be of UDP_HTABLE_SIZE | ||
134 | * @port_rover: pointer to record of last unallocated port | ||
139 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 135 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
140 | */ | 136 | */ |
141 | int udp_get_port(struct sock *sk, unsigned short snum, | 137 | int __udp_lib_get_port(struct sock *sk, unsigned short snum, |
142 | int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) | 138 | struct hlist_head udptable[], int *port_rover, |
139 | int (*saddr_comp)(const struct sock *sk1, | ||
140 | const struct sock *sk2 ) ) | ||
143 | { | 141 | { |
144 | struct hlist_node *node; | 142 | struct hlist_node *node; |
145 | struct hlist_head *head; | 143 | struct hlist_head *head; |
@@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
150 | if (snum == 0) { | 148 | if (snum == 0) { |
151 | int best_size_so_far, best, result, i; | 149 | int best_size_so_far, best, result, i; |
152 | 150 | ||
153 | if (udp_port_rover > sysctl_local_port_range[1] || | 151 | if (*port_rover > sysctl_local_port_range[1] || |
154 | udp_port_rover < sysctl_local_port_range[0]) | 152 | *port_rover < sysctl_local_port_range[0]) |
155 | udp_port_rover = sysctl_local_port_range[0]; | 153 | *port_rover = sysctl_local_port_range[0]; |
156 | best_size_so_far = 32767; | 154 | best_size_so_far = 32767; |
157 | best = result = udp_port_rover; | 155 | best = result = *port_rover; |
158 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | 156 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { |
159 | int size; | 157 | int size; |
160 | 158 | ||
161 | head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; | 159 | head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; |
162 | if (hlist_empty(head)) { | 160 | if (hlist_empty(head)) { |
163 | if (result > sysctl_local_port_range[1]) | 161 | if (result > sysctl_local_port_range[1]) |
164 | result = sysctl_local_port_range[0] + | 162 | result = sysctl_local_port_range[0] + |
@@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
179 | result = sysctl_local_port_range[0] | 177 | result = sysctl_local_port_range[0] |
180 | + ((result - sysctl_local_port_range[0]) & | 178 | + ((result - sysctl_local_port_range[0]) & |
181 | (UDP_HTABLE_SIZE - 1)); | 179 | (UDP_HTABLE_SIZE - 1)); |
182 | if (!udp_lport_inuse(result)) | 180 | if (! __udp_lib_lport_inuse(result, udptable)) |
183 | break; | 181 | break; |
184 | } | 182 | } |
185 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) | 183 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) |
186 | goto fail; | 184 | goto fail; |
187 | gotit: | 185 | gotit: |
188 | udp_port_rover = snum = result; | 186 | *port_rover = snum = result; |
189 | } else { | 187 | } else { |
190 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; | 188 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
191 | 189 | ||
192 | sk_for_each(sk2, node, head) | 190 | sk_for_each(sk2, node, head) |
193 | if (inet_sk(sk2)->num == snum && | 191 | if (inet_sk(sk2)->num == snum && |
@@ -195,12 +193,12 @@ gotit: | |||
195 | (!sk2->sk_reuse || !sk->sk_reuse) && | 193 | (!sk2->sk_reuse || !sk->sk_reuse) && |
196 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 194 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
197 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 195 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
198 | (*saddr_cmp)(sk, sk2) ) | 196 | (*saddr_comp)(sk, sk2) ) |
199 | goto fail; | 197 | goto fail; |
200 | } | 198 | } |
201 | inet_sk(sk)->num = snum; | 199 | inet_sk(sk)->num = snum; |
202 | if (sk_unhashed(sk)) { | 200 | if (sk_unhashed(sk)) { |
203 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; | 201 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
204 | sk_add_node(sk, head); | 202 | sk_add_node(sk, head); |
205 | sock_prot_inc_use(sk->sk_prot); | 203 | sock_prot_inc_use(sk->sk_prot); |
206 | } | 204 | } |
@@ -210,7 +208,13 @@ fail: | |||
210 | return error; | 208 | return error; |
211 | } | 209 | } |
212 | 210 | ||
213 | static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 211 | __inline__ int udp_get_port(struct sock *sk, unsigned short snum, |
212 | int (*scmp)(const struct sock *, const struct sock *)) | ||
213 | { | ||
214 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); | ||
215 | } | ||
216 | |||
217 | inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
214 | { | 218 | { |
215 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 219 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
216 | 220 | ||
@@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
224 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | 228 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); |
225 | } | 229 | } |
226 | 230 | ||
227 | |||
228 | static void udp_v4_hash(struct sock *sk) | ||
229 | { | ||
230 | BUG(); | ||
231 | } | ||
232 | |||
233 | static void udp_v4_unhash(struct sock *sk) | ||
234 | { | ||
235 | write_lock_bh(&udp_hash_lock); | ||
236 | if (sk_del_node_init(sk)) { | ||
237 | inet_sk(sk)->num = 0; | ||
238 | sock_prot_dec_use(sk->sk_prot); | ||
239 | } | ||
240 | write_unlock_bh(&udp_hash_lock); | ||
241 | } | ||
242 | |||
243 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 231 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
244 | * harder than this. -DaveM | 232 | * harder than this. -DaveM |
245 | */ | 233 | */ |
246 | static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, | 234 | static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, |
247 | __be32 daddr, __be16 dport, int dif) | 235 | __be32 daddr, __be16 dport, |
236 | int dif, struct hlist_head udptable[]) | ||
248 | { | 237 | { |
249 | struct sock *sk, *result = NULL; | 238 | struct sock *sk, *result = NULL; |
250 | struct hlist_node *node; | 239 | struct hlist_node *node; |
251 | unsigned short hnum = ntohs(dport); | 240 | unsigned short hnum = ntohs(dport); |
252 | int badness = -1; | 241 | int badness = -1; |
253 | 242 | ||
254 | sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { | 243 | read_lock(&udp_hash_lock); |
244 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
255 | struct inet_sock *inet = inet_sk(sk); | 245 | struct inet_sock *inet = inet_sk(sk); |
256 | 246 | ||
257 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | 247 | if (inet->num == hnum && !ipv6_only_sock(sk)) { |
@@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, | |||
285 | } | 275 | } |
286 | } | 276 | } |
287 | } | 277 | } |
288 | return result; | 278 | if (result) |
289 | } | 279 | sock_hold(result); |
290 | |||
291 | static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, | ||
292 | __be32 daddr, __be16 dport, int dif) | ||
293 | { | ||
294 | struct sock *sk; | ||
295 | |||
296 | read_lock(&udp_hash_lock); | ||
297 | sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); | ||
298 | if (sk) | ||
299 | sock_hold(sk); | ||
300 | read_unlock(&udp_hash_lock); | 280 | read_unlock(&udp_hash_lock); |
301 | return sk; | 281 | return result; |
302 | } | 282 | } |
303 | 283 | ||
304 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | 284 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, |
@@ -340,7 +320,7 @@ found: | |||
340 | * to find the appropriate port. | 320 | * to find the appropriate port. |
341 | */ | 321 | */ |
342 | 322 | ||
343 | void udp_err(struct sk_buff *skb, u32 info) | 323 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) |
344 | { | 324 | { |
345 | struct inet_sock *inet; | 325 | struct inet_sock *inet; |
346 | struct iphdr *iph = (struct iphdr*)skb->data; | 326 | struct iphdr *iph = (struct iphdr*)skb->data; |
@@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info) | |||
351 | int harderr; | 331 | int harderr; |
352 | int err; | 332 | int err; |
353 | 333 | ||
354 | sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); | 334 | sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, |
335 | skb->dev->ifindex, udptable ); | ||
355 | if (sk == NULL) { | 336 | if (sk == NULL) { |
356 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 337 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
357 | return; /* No socket for error */ | 338 | return; /* No socket for error */ |
@@ -405,6 +386,11 @@ out: | |||
405 | sock_put(sk); | 386 | sock_put(sk); |
406 | } | 387 | } |
407 | 388 | ||
389 | __inline__ void udp_err(struct sk_buff *skb, u32 info) | ||
390 | { | ||
391 | return __udp4_lib_err(skb, info, udp_hash); | ||
392 | } | ||
393 | |||
408 | /* | 394 | /* |
409 | * Throw away all pending data and cancel the corking. Socket is locked. | 395 | * Throw away all pending data and cancel the corking. Socket is locked. |
410 | */ | 396 | */ |
@@ -419,16 +405,58 @@ static void udp_flush_pending_frames(struct sock *sk) | |||
419 | } | 405 | } |
420 | } | 406 | } |
421 | 407 | ||
408 | /** | ||
409 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
410 | * @sk: socket we are sending on | ||
411 | * @skb: sk_buff containing the filled-in UDP header | ||
412 | * (checksum field must be zeroed out) | ||
413 | */ | ||
414 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
415 | __be32 src, __be32 dst, int len ) | ||
416 | { | ||
417 | unsigned int offset; | ||
418 | struct udphdr *uh = skb->h.uh; | ||
419 | __wsum csum = 0; | ||
420 | |||
421 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
422 | /* | ||
423 | * Only one fragment on the socket. | ||
424 | */ | ||
425 | skb->csum_offset = offsetof(struct udphdr, check); | ||
426 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
427 | } else { | ||
428 | /* | ||
429 | * HW-checksum won't work as there are two or more | ||
430 | * fragments on the socket so that all csums of sk_buffs | ||
431 | * should be together | ||
432 | */ | ||
433 | offset = skb->h.raw - skb->data; | ||
434 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
435 | |||
436 | skb->ip_summed = CHECKSUM_NONE; | ||
437 | |||
438 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
439 | csum = csum_add(csum, skb->csum); | ||
440 | } | ||
441 | |||
442 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
443 | if (uh->check == 0) | ||
444 | uh->check = CSUM_MANGLED_0; | ||
445 | } | ||
446 | } | ||
447 | |||
422 | /* | 448 | /* |
423 | * Push out all pending data as one UDP datagram. Socket is locked. | 449 | * Push out all pending data as one UDP datagram. Socket is locked. |
424 | */ | 450 | */ |
425 | static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | 451 | static int udp_push_pending_frames(struct sock *sk) |
426 | { | 452 | { |
453 | struct udp_sock *up = udp_sk(sk); | ||
427 | struct inet_sock *inet = inet_sk(sk); | 454 | struct inet_sock *inet = inet_sk(sk); |
428 | struct flowi *fl = &inet->cork.fl; | 455 | struct flowi *fl = &inet->cork.fl; |
429 | struct sk_buff *skb; | 456 | struct sk_buff *skb; |
430 | struct udphdr *uh; | 457 | struct udphdr *uh; |
431 | int err = 0; | 458 | int err = 0; |
459 | __wsum csum = 0; | ||
432 | 460 | ||
433 | /* Grab the skbuff where UDP header space exists. */ | 461 | /* Grab the skbuff where UDP header space exists. */ |
434 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | 462 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) |
@@ -443,52 +471,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | |||
443 | uh->len = htons(up->len); | 471 | uh->len = htons(up->len); |
444 | uh->check = 0; | 472 | uh->check = 0; |
445 | 473 | ||
446 | if (sk->sk_no_check == UDP_CSUM_NOXMIT) { | 474 | if (up->pcflag) /* UDP-Lite */ |
475 | csum = udplite_csum_outgoing(sk, skb); | ||
476 | |||
477 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
478 | |||
447 | skb->ip_summed = CHECKSUM_NONE; | 479 | skb->ip_summed = CHECKSUM_NONE; |
448 | goto send; | 480 | goto send; |
449 | } | ||
450 | 481 | ||
451 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 482 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
452 | /* | ||
453 | * Only one fragment on the socket. | ||
454 | */ | ||
455 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
456 | skb->csum = offsetof(struct udphdr, check); | ||
457 | uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
458 | up->len, IPPROTO_UDP, 0); | ||
459 | } else { | ||
460 | skb->csum = csum_partial((char *)uh, | ||
461 | sizeof(struct udphdr), skb->csum); | ||
462 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
463 | up->len, IPPROTO_UDP, skb->csum); | ||
464 | if (uh->check == 0) | ||
465 | uh->check = -1; | ||
466 | } | ||
467 | } else { | ||
468 | unsigned int csum = 0; | ||
469 | /* | ||
470 | * HW-checksum won't work as there are two or more | ||
471 | * fragments on the socket so that all csums of sk_buffs | ||
472 | * should be together. | ||
473 | */ | ||
474 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
475 | int offset = (unsigned char *)uh - skb->data; | ||
476 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
477 | 483 | ||
478 | skb->ip_summed = CHECKSUM_NONE; | 484 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); |
479 | } else { | 485 | goto send; |
480 | skb->csum = csum_partial((char *)uh, | 486 | |
481 | sizeof(struct udphdr), skb->csum); | 487 | } else /* `normal' UDP */ |
482 | } | 488 | csum = udp_csum_outgoing(sk, skb); |
489 | |||
490 | /* add protocol-dependent pseudo-header */ | ||
491 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
492 | sk->sk_protocol, csum ); | ||
493 | if (uh->check == 0) | ||
494 | uh->check = CSUM_MANGLED_0; | ||
483 | 495 | ||
484 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
485 | csum = csum_add(csum, skb->csum); | ||
486 | } | ||
487 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
488 | up->len, IPPROTO_UDP, csum); | ||
489 | if (uh->check == 0) | ||
490 | uh->check = -1; | ||
491 | } | ||
492 | send: | 496 | send: |
493 | err = ip_push_pending_frames(sk); | 497 | err = ip_push_pending_frames(sk); |
494 | out: | 498 | out: |
@@ -497,12 +501,6 @@ out: | |||
497 | return err; | 501 | return err; |
498 | } | 502 | } |
499 | 503 | ||
500 | |||
501 | static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) | ||
502 | { | ||
503 | return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); | ||
504 | } | ||
505 | |||
506 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 504 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
507 | size_t len) | 505 | size_t len) |
508 | { | 506 | { |
@@ -516,8 +514,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
516 | __be32 daddr, faddr, saddr; | 514 | __be32 daddr, faddr, saddr; |
517 | __be16 dport; | 515 | __be16 dport; |
518 | u8 tos; | 516 | u8 tos; |
519 | int err; | 517 | int err, is_udplite = up->pcflag; |
520 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 518 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
519 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
521 | 520 | ||
522 | if (len > 0xFFFF) | 521 | if (len > 0xFFFF) |
523 | return -EMSGSIZE; | 522 | return -EMSGSIZE; |
@@ -622,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
622 | { .daddr = faddr, | 621 | { .daddr = faddr, |
623 | .saddr = saddr, | 622 | .saddr = saddr, |
624 | .tos = tos } }, | 623 | .tos = tos } }, |
625 | .proto = IPPROTO_UDP, | 624 | .proto = sk->sk_protocol, |
626 | .uli_u = { .ports = | 625 | .uli_u = { .ports = |
627 | { .sport = inet->sport, | 626 | { .sport = inet->sport, |
628 | .dport = dport } } }; | 627 | .dport = dport } } }; |
@@ -668,13 +667,14 @@ back_from_confirm: | |||
668 | 667 | ||
669 | do_append_data: | 668 | do_append_data: |
670 | up->len += ulen; | 669 | up->len += ulen; |
671 | err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, | 670 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
672 | sizeof(struct udphdr), &ipc, rt, | 671 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
672 | sizeof(struct udphdr), &ipc, rt, | ||
673 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 673 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
674 | if (err) | 674 | if (err) |
675 | udp_flush_pending_frames(sk); | 675 | udp_flush_pending_frames(sk); |
676 | else if (!corkreq) | 676 | else if (!corkreq) |
677 | err = udp_push_pending_frames(sk, up); | 677 | err = udp_push_pending_frames(sk); |
678 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | 678 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) |
679 | up->pending = 0; | 679 | up->pending = 0; |
680 | release_sock(sk); | 680 | release_sock(sk); |
@@ -684,7 +684,7 @@ out: | |||
684 | if (free) | 684 | if (free) |
685 | kfree(ipc.opt); | 685 | kfree(ipc.opt); |
686 | if (!err) { | 686 | if (!err) { |
687 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); | 687 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); |
688 | return len; | 688 | return len; |
689 | } | 689 | } |
690 | /* | 690 | /* |
@@ -695,7 +695,7 @@ out: | |||
695 | * seems like overkill. | 695 | * seems like overkill. |
696 | */ | 696 | */ |
697 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 697 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
698 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); | 698 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); |
699 | } | 699 | } |
700 | return err; | 700 | return err; |
701 | 701 | ||
@@ -707,8 +707,8 @@ do_confirm: | |||
707 | goto out; | 707 | goto out; |
708 | } | 708 | } |
709 | 709 | ||
710 | static int udp_sendpage(struct sock *sk, struct page *page, int offset, | 710 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
711 | size_t size, int flags) | 711 | size_t size, int flags) |
712 | { | 712 | { |
713 | struct udp_sock *up = udp_sk(sk); | 713 | struct udp_sock *up = udp_sk(sk); |
714 | int ret; | 714 | int ret; |
@@ -747,7 +747,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, | |||
747 | 747 | ||
748 | up->len += size; | 748 | up->len += size; |
749 | if (!(up->corkflag || (flags&MSG_MORE))) | 749 | if (!(up->corkflag || (flags&MSG_MORE))) |
750 | ret = udp_push_pending_frames(sk, up); | 750 | ret = udp_push_pending_frames(sk); |
751 | if (!ret) | 751 | if (!ret) |
752 | ret = size; | 752 | ret = size; |
753 | out: | 753 | out: |
@@ -795,29 +795,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
795 | return(0); | 795 | return(0); |
796 | } | 796 | } |
797 | 797 | ||
798 | static __inline__ int __udp_checksum_complete(struct sk_buff *skb) | ||
799 | { | ||
800 | return __skb_checksum_complete(skb); | ||
801 | } | ||
802 | |||
803 | static __inline__ int udp_checksum_complete(struct sk_buff *skb) | ||
804 | { | ||
805 | return skb->ip_summed != CHECKSUM_UNNECESSARY && | ||
806 | __udp_checksum_complete(skb); | ||
807 | } | ||
808 | |||
809 | /* | 798 | /* |
810 | * This should be easy, if there is something there we | 799 | * This should be easy, if there is something there we |
811 | * return it, otherwise we block. | 800 | * return it, otherwise we block. |
812 | */ | 801 | */ |
813 | 802 | ||
814 | static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 803 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
815 | size_t len, int noblock, int flags, int *addr_len) | 804 | size_t len, int noblock, int flags, int *addr_len) |
816 | { | 805 | { |
817 | struct inet_sock *inet = inet_sk(sk); | 806 | struct inet_sock *inet = inet_sk(sk); |
818 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 807 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
819 | struct sk_buff *skb; | 808 | struct sk_buff *skb; |
820 | int copied, err; | 809 | int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); |
821 | 810 | ||
822 | /* | 811 | /* |
823 | * Check any passed addresses | 812 | * Check any passed addresses |
@@ -839,15 +828,25 @@ try_again: | |||
839 | msg->msg_flags |= MSG_TRUNC; | 828 | msg->msg_flags |= MSG_TRUNC; |
840 | } | 829 | } |
841 | 830 | ||
842 | if (skb->ip_summed==CHECKSUM_UNNECESSARY) { | 831 | /* |
843 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 832 | * Decide whether to checksum and/or copy data. |
844 | copied); | 833 | * |
845 | } else if (msg->msg_flags&MSG_TRUNC) { | 834 | * UDP: checksum may have been computed in HW, |
846 | if (__udp_checksum_complete(skb)) | 835 | * (re-)compute it if message is truncated. |
836 | * UDP-Lite: always needs to checksum, no HW support. | ||
837 | */ | ||
838 | copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); | ||
839 | |||
840 | if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { | ||
841 | if (__udp_lib_checksum_complete(skb)) | ||
847 | goto csum_copy_err; | 842 | goto csum_copy_err; |
848 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 843 | copy_only = 1; |
849 | copied); | 844 | } |
850 | } else { | 845 | |
846 | if (copy_only) | ||
847 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
848 | msg->msg_iov, copied ); | ||
849 | else { | ||
851 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | 850 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); |
852 | 851 | ||
853 | if (err == -EINVAL) | 852 | if (err == -EINVAL) |
@@ -880,7 +879,7 @@ out: | |||
880 | return err; | 879 | return err; |
881 | 880 | ||
882 | csum_copy_err: | 881 | csum_copy_err: |
883 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 882 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); |
884 | 883 | ||
885 | skb_kill_datagram(sk, skb, flags); | 884 | skb_kill_datagram(sk, skb, flags); |
886 | 885 | ||
@@ -912,11 +911,6 @@ int udp_disconnect(struct sock *sk, int flags) | |||
912 | return 0; | 911 | return 0; |
913 | } | 912 | } |
914 | 913 | ||
915 | static void udp_close(struct sock *sk, long timeout) | ||
916 | { | ||
917 | sk_common_release(sk); | ||
918 | } | ||
919 | |||
920 | /* return: | 914 | /* return: |
921 | * 1 if the the UDP system should process it | 915 | * 1 if the the UDP system should process it |
922 | * 0 if we should drop this packet | 916 | * 0 if we should drop this packet |
@@ -928,23 +922,32 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
928 | return 1; | 922 | return 1; |
929 | #else | 923 | #else |
930 | struct udp_sock *up = udp_sk(sk); | 924 | struct udp_sock *up = udp_sk(sk); |
931 | struct udphdr *uh = skb->h.uh; | 925 | struct udphdr *uh; |
932 | struct iphdr *iph; | 926 | struct iphdr *iph; |
933 | int iphlen, len; | 927 | int iphlen, len; |
934 | 928 | ||
935 | __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); | 929 | __u8 *udpdata; |
936 | __be32 *udpdata32 = (__be32 *)udpdata; | 930 | __be32 *udpdata32; |
937 | __u16 encap_type = up->encap_type; | 931 | __u16 encap_type = up->encap_type; |
938 | 932 | ||
939 | /* if we're overly short, let UDP handle it */ | 933 | /* if we're overly short, let UDP handle it */ |
940 | if (udpdata > skb->tail) | 934 | len = skb->len - sizeof(struct udphdr); |
935 | if (len <= 0) | ||
941 | return 1; | 936 | return 1; |
942 | 937 | ||
943 | /* if this is not encapsulated socket, then just return now */ | 938 | /* if this is not encapsulated socket, then just return now */ |
944 | if (!encap_type) | 939 | if (!encap_type) |
945 | return 1; | 940 | return 1; |
946 | 941 | ||
947 | len = skb->tail - udpdata; | 942 | /* If this is a paged skb, make sure we pull up |
943 | * whatever data we need to look at. */ | ||
944 | if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) | ||
945 | return 1; | ||
946 | |||
947 | /* Now we can get the pointers */ | ||
948 | uh = skb->h.uh; | ||
949 | udpdata = (__u8 *)uh + sizeof(struct udphdr); | ||
950 | udpdata32 = (__be32 *)udpdata; | ||
948 | 951 | ||
949 | switch (encap_type) { | 952 | switch (encap_type) { |
950 | default: | 953 | default: |
@@ -1013,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
1013 | * Note that in the success and error cases, the skb is assumed to | 1016 | * Note that in the success and error cases, the skb is assumed to |
1014 | * have either been requeued or freed. | 1017 | * have either been requeued or freed. |
1015 | */ | 1018 | */ |
1016 | static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 1019 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) |
1017 | { | 1020 | { |
1018 | struct udp_sock *up = udp_sk(sk); | 1021 | struct udp_sock *up = udp_sk(sk); |
1019 | int rc; | 1022 | int rc; |
@@ -1021,10 +1024,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1021 | /* | 1024 | /* |
1022 | * Charge it to the socket, dropping if the queue is full. | 1025 | * Charge it to the socket, dropping if the queue is full. |
1023 | */ | 1026 | */ |
1024 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | 1027 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1025 | kfree_skb(skb); | 1028 | goto drop; |
1026 | return -1; | ||
1027 | } | ||
1028 | nf_reset(skb); | 1029 | nf_reset(skb); |
1029 | 1030 | ||
1030 | if (up->encap_type) { | 1031 | if (up->encap_type) { |
@@ -1048,31 +1049,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1048 | if (ret < 0) { | 1049 | if (ret < 0) { |
1049 | /* process the ESP packet */ | 1050 | /* process the ESP packet */ |
1050 | ret = xfrm4_rcv_encap(skb, up->encap_type); | 1051 | ret = xfrm4_rcv_encap(skb, up->encap_type); |
1051 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); | 1052 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); |
1052 | return -ret; | 1053 | return -ret; |
1053 | } | 1054 | } |
1054 | /* FALLTHROUGH -- it's a UDP Packet */ | 1055 | /* FALLTHROUGH -- it's a UDP Packet */ |
1055 | } | 1056 | } |
1056 | 1057 | ||
1057 | if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { | 1058 | /* |
1058 | if (__udp_checksum_complete(skb)) { | 1059 | * UDP-Lite specific tests, ignored on UDP sockets |
1059 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1060 | */ |
1060 | kfree_skb(skb); | 1061 | if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { |
1061 | return -1; | 1062 | |
1063 | /* | ||
1064 | * MIB statistics other than incrementing the error count are | ||
1065 | * disabled for the following two types of errors: these depend | ||
1066 | * on the application settings, not on the functioning of the | ||
1067 | * protocol stack as such. | ||
1068 | * | ||
1069 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
1070 | * way ... to ... at least let the receiving application block | ||
1071 | * delivery of packets with coverage values less than a value | ||
1072 | * provided by the application." | ||
1073 | */ | ||
1074 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
1075 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
1076 | "%d while full coverage %d requested\n", | ||
1077 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
1078 | goto drop; | ||
1062 | } | 1079 | } |
1080 | /* The next case involves violating the min. coverage requested | ||
1081 | * by the receiver. This is subtle: if receiver wants x and x is | ||
1082 | * greater than the buffersize/MTU then receiver will complain | ||
1083 | * that it wants x while sender emits packets of smaller size y. | ||
1084 | * Therefore the above ...()->partial_cov statement is essential. | ||
1085 | */ | ||
1086 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
1087 | LIMIT_NETDEBUG(KERN_WARNING | ||
1088 | "UDPLITE: coverage %d too small, need min %d\n", | ||
1089 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
1090 | goto drop; | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { | ||
1095 | if (__udp_lib_checksum_complete(skb)) | ||
1096 | goto drop; | ||
1063 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1097 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1064 | } | 1098 | } |
1065 | 1099 | ||
1066 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | 1100 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
1067 | /* Note that an ENOMEM error is charged twice */ | 1101 | /* Note that an ENOMEM error is charged twice */ |
1068 | if (rc == -ENOMEM) | 1102 | if (rc == -ENOMEM) |
1069 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); | 1103 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); |
1070 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1104 | goto drop; |
1071 | kfree_skb(skb); | ||
1072 | return -1; | ||
1073 | } | 1105 | } |
1074 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); | 1106 | |
1107 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); | ||
1075 | return 0; | 1108 | return 0; |
1109 | |||
1110 | drop: | ||
1111 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); | ||
1112 | kfree_skb(skb); | ||
1113 | return -1; | ||
1076 | } | 1114 | } |
1077 | 1115 | ||
1078 | /* | 1116 | /* |
@@ -1081,14 +1119,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1081 | * Note: called only from the BH handler context, | 1119 | * Note: called only from the BH handler context, |
1082 | * so we don't need to lock the hashes. | 1120 | * so we don't need to lock the hashes. |
1083 | */ | 1121 | */ |
1084 | static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, | 1122 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, |
1085 | __be32 saddr, __be32 daddr) | 1123 | struct udphdr *uh, |
1124 | __be32 saddr, __be32 daddr, | ||
1125 | struct hlist_head udptable[]) | ||
1086 | { | 1126 | { |
1087 | struct sock *sk; | 1127 | struct sock *sk; |
1088 | int dif; | 1128 | int dif; |
1089 | 1129 | ||
1090 | read_lock(&udp_hash_lock); | 1130 | read_lock(&udp_hash_lock); |
1091 | sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | 1131 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); |
1092 | dif = skb->dev->ifindex; | 1132 | dif = skb->dev->ifindex; |
1093 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | 1133 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); |
1094 | if (sk) { | 1134 | if (sk) { |
@@ -1122,65 +1162,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, | |||
1122 | * Otherwise, csum completion requires chacksumming packet body, | 1162 | * Otherwise, csum completion requires chacksumming packet body, |
1123 | * including udp header and folding it to skb->csum. | 1163 | * including udp header and folding it to skb->csum. |
1124 | */ | 1164 | */ |
1125 | static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | 1165 | static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) |
1126 | unsigned short ulen, __be32 saddr, __be32 daddr) | ||
1127 | { | 1166 | { |
1128 | if (uh->check == 0) { | 1167 | if (uh->check == 0) { |
1129 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1168 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1130 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1169 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1131 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) | 1170 | if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, |
1171 | skb->len, IPPROTO_UDP, skb->csum )) | ||
1132 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1172 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1133 | } | 1173 | } |
1134 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 1174 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
1135 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | 1175 | skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, |
1176 | skb->nh.iph->daddr, | ||
1177 | skb->len, IPPROTO_UDP, 0); | ||
1136 | /* Probably, we should checksum udp header (it should be in cache | 1178 | /* Probably, we should checksum udp header (it should be in cache |
1137 | * in any case) and data in tiny packets (< rx copybreak). | 1179 | * in any case) and data in tiny packets (< rx copybreak). |
1138 | */ | 1180 | */ |
1181 | |||
1182 | /* UDP = UDP-Lite with a non-partial checksum coverage */ | ||
1183 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1139 | } | 1184 | } |
1140 | 1185 | ||
1141 | /* | 1186 | /* |
1142 | * All we need to do is get the socket, and then do a checksum. | 1187 | * All we need to do is get the socket, and then do a checksum. |
1143 | */ | 1188 | */ |
1144 | 1189 | ||
1145 | int udp_rcv(struct sk_buff *skb) | 1190 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], |
1191 | int is_udplite) | ||
1146 | { | 1192 | { |
1147 | struct sock *sk; | 1193 | struct sock *sk; |
1148 | struct udphdr *uh; | 1194 | struct udphdr *uh = skb->h.uh; |
1149 | unsigned short ulen; | 1195 | unsigned short ulen; |
1150 | struct rtable *rt = (struct rtable*)skb->dst; | 1196 | struct rtable *rt = (struct rtable*)skb->dst; |
1151 | __be32 saddr = skb->nh.iph->saddr; | 1197 | __be32 saddr = skb->nh.iph->saddr; |
1152 | __be32 daddr = skb->nh.iph->daddr; | 1198 | __be32 daddr = skb->nh.iph->daddr; |
1153 | int len = skb->len; | ||
1154 | 1199 | ||
1155 | /* | 1200 | /* |
1156 | * Validate the packet and the UDP length. | 1201 | * Validate the packet. |
1157 | */ | 1202 | */ |
1158 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 1203 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
1159 | goto no_header; | 1204 | goto drop; /* No space for header. */ |
1160 | |||
1161 | uh = skb->h.uh; | ||
1162 | 1205 | ||
1163 | ulen = ntohs(uh->len); | 1206 | ulen = ntohs(uh->len); |
1164 | 1207 | if (ulen > skb->len) | |
1165 | if (ulen > len || ulen < sizeof(*uh)) | ||
1166 | goto short_packet; | 1208 | goto short_packet; |
1167 | 1209 | ||
1168 | if (pskb_trim_rcsum(skb, ulen)) | 1210 | if(! is_udplite ) { /* UDP validates ulen. */ |
1169 | goto short_packet; | 1211 | |
1212 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
1213 | goto short_packet; | ||
1170 | 1214 | ||
1171 | udp_checksum_init(skb, uh, ulen, saddr, daddr); | 1215 | udp4_csum_init(skb, uh); |
1216 | |||
1217 | } else { /* UDP-Lite validates cscov. */ | ||
1218 | if (udplite4_csum_init(skb, uh)) | ||
1219 | goto csum_error; | ||
1220 | } | ||
1172 | 1221 | ||
1173 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1222 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1174 | return udp_v4_mcast_deliver(skb, uh, saddr, daddr); | 1223 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1175 | 1224 | ||
1176 | sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); | 1225 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, |
1226 | skb->dev->ifindex, udptable ); | ||
1177 | 1227 | ||
1178 | if (sk != NULL) { | 1228 | if (sk != NULL) { |
1179 | int ret = udp_queue_rcv_skb(sk, skb); | 1229 | int ret = udp_queue_rcv_skb(sk, skb); |
1180 | sock_put(sk); | 1230 | sock_put(sk); |
1181 | 1231 | ||
1182 | /* a return value > 0 means to resubmit the input, but | 1232 | /* a return value > 0 means to resubmit the input, but |
1183 | * it it wants the return to be -protocol, or 0 | 1233 | * it wants the return to be -protocol, or 0 |
1184 | */ | 1234 | */ |
1185 | if (ret > 0) | 1235 | if (ret > 0) |
1186 | return -ret; | 1236 | return -ret; |
@@ -1192,10 +1242,10 @@ int udp_rcv(struct sk_buff *skb) | |||
1192 | nf_reset(skb); | 1242 | nf_reset(skb); |
1193 | 1243 | ||
1194 | /* No socket. Drop packet silently, if checksum is wrong */ | 1244 | /* No socket. Drop packet silently, if checksum is wrong */ |
1195 | if (udp_checksum_complete(skb)) | 1245 | if (udp_lib_checksum_complete(skb)) |
1196 | goto csum_error; | 1246 | goto csum_error; |
1197 | 1247 | ||
1198 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS); | 1248 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); |
1199 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 1249 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
1200 | 1250 | ||
1201 | /* | 1251 | /* |
@@ -1206,36 +1256,40 @@ int udp_rcv(struct sk_buff *skb) | |||
1206 | return(0); | 1256 | return(0); |
1207 | 1257 | ||
1208 | short_packet: | 1258 | short_packet: |
1209 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | 1259 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", |
1260 | is_udplite? "-Lite" : "", | ||
1210 | NIPQUAD(saddr), | 1261 | NIPQUAD(saddr), |
1211 | ntohs(uh->source), | 1262 | ntohs(uh->source), |
1212 | ulen, | 1263 | ulen, |
1213 | len, | 1264 | skb->len, |
1214 | NIPQUAD(daddr), | 1265 | NIPQUAD(daddr), |
1215 | ntohs(uh->dest)); | 1266 | ntohs(uh->dest)); |
1216 | no_header: | 1267 | goto drop; |
1217 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | ||
1218 | kfree_skb(skb); | ||
1219 | return(0); | ||
1220 | 1268 | ||
1221 | csum_error: | 1269 | csum_error: |
1222 | /* | 1270 | /* |
1223 | * RFC1122: OK. Discards the bad packet silently (as far as | 1271 | * RFC1122: OK. Discards the bad packet silently (as far as |
1224 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1272 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1225 | */ | 1273 | */ |
1226 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | 1274 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", |
1275 | is_udplite? "-Lite" : "", | ||
1227 | NIPQUAD(saddr), | 1276 | NIPQUAD(saddr), |
1228 | ntohs(uh->source), | 1277 | ntohs(uh->source), |
1229 | NIPQUAD(daddr), | 1278 | NIPQUAD(daddr), |
1230 | ntohs(uh->dest), | 1279 | ntohs(uh->dest), |
1231 | ulen); | 1280 | ulen); |
1232 | drop: | 1281 | drop: |
1233 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1282 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); |
1234 | kfree_skb(skb); | 1283 | kfree_skb(skb); |
1235 | return(0); | 1284 | return(0); |
1236 | } | 1285 | } |
1237 | 1286 | ||
1238 | static int udp_destroy_sock(struct sock *sk) | 1287 | __inline__ int udp_rcv(struct sk_buff *skb) |
1288 | { | ||
1289 | return __udp4_lib_rcv(skb, udp_hash, 0); | ||
1290 | } | ||
1291 | |||
1292 | int udp_destroy_sock(struct sock *sk) | ||
1239 | { | 1293 | { |
1240 | lock_sock(sk); | 1294 | lock_sock(sk); |
1241 | udp_flush_pending_frames(sk); | 1295 | udp_flush_pending_frames(sk); |
@@ -1246,8 +1300,9 @@ static int udp_destroy_sock(struct sock *sk) | |||
1246 | /* | 1300 | /* |
1247 | * Socket option code for UDP | 1301 | * Socket option code for UDP |
1248 | */ | 1302 | */ |
1249 | static int do_udp_setsockopt(struct sock *sk, int level, int optname, | 1303 | int udp_lib_setsockopt(struct sock *sk, int level, int optname, |
1250 | char __user *optval, int optlen) | 1304 | char __user *optval, int optlen, |
1305 | int (*push_pending_frames)(struct sock *)) | ||
1251 | { | 1306 | { |
1252 | struct udp_sock *up = udp_sk(sk); | 1307 | struct udp_sock *up = udp_sk(sk); |
1253 | int val; | 1308 | int val; |
@@ -1266,7 +1321,7 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, | |||
1266 | } else { | 1321 | } else { |
1267 | up->corkflag = 0; | 1322 | up->corkflag = 0; |
1268 | lock_sock(sk); | 1323 | lock_sock(sk); |
1269 | udp_push_pending_frames(sk, up); | 1324 | (*push_pending_frames)(sk); |
1270 | release_sock(sk); | 1325 | release_sock(sk); |
1271 | } | 1326 | } |
1272 | break; | 1327 | break; |
@@ -1284,6 +1339,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, | |||
1284 | } | 1339 | } |
1285 | break; | 1340 | break; |
1286 | 1341 | ||
1342 | /* | ||
1343 | * UDP-Lite's partial checksum coverage (RFC 3828). | ||
1344 | */ | ||
1345 | /* The sender sets actual checksum coverage length via this option. | ||
1346 | * The case coverage > packet length is handled by send module. */ | ||
1347 | case UDPLITE_SEND_CSCOV: | ||
1348 | if (!up->pcflag) /* Disable the option on UDP sockets */ | ||
1349 | return -ENOPROTOOPT; | ||
1350 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ | ||
1351 | val = 8; | ||
1352 | up->pcslen = val; | ||
1353 | up->pcflag |= UDPLITE_SEND_CC; | ||
1354 | break; | ||
1355 | |||
1356 | /* The receiver specifies a minimum checksum coverage value. To make | ||
1357 | * sense, this should be set to at least 8 (as done below). If zero is | ||
1358 | * used, this again means full checksum coverage. */ | ||
1359 | case UDPLITE_RECV_CSCOV: | ||
1360 | if (!up->pcflag) /* Disable the option on UDP sockets */ | ||
1361 | return -ENOPROTOOPT; | ||
1362 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ | ||
1363 | val = 8; | ||
1364 | up->pcrlen = val; | ||
1365 | up->pcflag |= UDPLITE_RECV_CC; | ||
1366 | break; | ||
1367 | |||
1287 | default: | 1368 | default: |
1288 | err = -ENOPROTOOPT; | 1369 | err = -ENOPROTOOPT; |
1289 | break; | 1370 | break; |
@@ -1292,26 +1373,28 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, | |||
1292 | return err; | 1373 | return err; |
1293 | } | 1374 | } |
1294 | 1375 | ||
1295 | static int udp_setsockopt(struct sock *sk, int level, int optname, | 1376 | int udp_setsockopt(struct sock *sk, int level, int optname, |
1296 | char __user *optval, int optlen) | 1377 | char __user *optval, int optlen) |
1297 | { | 1378 | { |
1298 | if (level != SOL_UDP) | 1379 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1299 | return ip_setsockopt(sk, level, optname, optval, optlen); | 1380 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1300 | return do_udp_setsockopt(sk, level, optname, optval, optlen); | 1381 | udp_push_pending_frames); |
1382 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
1301 | } | 1383 | } |
1302 | 1384 | ||
1303 | #ifdef CONFIG_COMPAT | 1385 | #ifdef CONFIG_COMPAT |
1304 | static int compat_udp_setsockopt(struct sock *sk, int level, int optname, | 1386 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, |
1305 | char __user *optval, int optlen) | 1387 | char __user *optval, int optlen) |
1306 | { | 1388 | { |
1307 | if (level != SOL_UDP) | 1389 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1308 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | 1390 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1309 | return do_udp_setsockopt(sk, level, optname, optval, optlen); | 1391 | udp_push_pending_frames); |
1392 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | ||
1310 | } | 1393 | } |
1311 | #endif | 1394 | #endif |
1312 | 1395 | ||
1313 | static int do_udp_getsockopt(struct sock *sk, int level, int optname, | 1396 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, |
1314 | char __user *optval, int __user *optlen) | 1397 | char __user *optval, int __user *optlen) |
1315 | { | 1398 | { |
1316 | struct udp_sock *up = udp_sk(sk); | 1399 | struct udp_sock *up = udp_sk(sk); |
1317 | int val, len; | 1400 | int val, len; |
@@ -1333,6 +1416,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, | |||
1333 | val = up->encap_type; | 1416 | val = up->encap_type; |
1334 | break; | 1417 | break; |
1335 | 1418 | ||
1419 | /* The following two cannot be changed on UDP sockets, the return is | ||
1420 | * always 0 (which corresponds to the full checksum coverage of UDP). */ | ||
1421 | case UDPLITE_SEND_CSCOV: | ||
1422 | val = up->pcslen; | ||
1423 | break; | ||
1424 | |||
1425 | case UDPLITE_RECV_CSCOV: | ||
1426 | val = up->pcrlen; | ||
1427 | break; | ||
1428 | |||
1336 | default: | 1429 | default: |
1337 | return -ENOPROTOOPT; | 1430 | return -ENOPROTOOPT; |
1338 | }; | 1431 | }; |
@@ -1344,21 +1437,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, | |||
1344 | return 0; | 1437 | return 0; |
1345 | } | 1438 | } |
1346 | 1439 | ||
1347 | static int udp_getsockopt(struct sock *sk, int level, int optname, | 1440 | int udp_getsockopt(struct sock *sk, int level, int optname, |
1348 | char __user *optval, int __user *optlen) | 1441 | char __user *optval, int __user *optlen) |
1349 | { | 1442 | { |
1350 | if (level != SOL_UDP) | 1443 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1351 | return ip_getsockopt(sk, level, optname, optval, optlen); | 1444 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1352 | return do_udp_getsockopt(sk, level, optname, optval, optlen); | 1445 | return ip_getsockopt(sk, level, optname, optval, optlen); |
1353 | } | 1446 | } |
1354 | 1447 | ||
1355 | #ifdef CONFIG_COMPAT | 1448 | #ifdef CONFIG_COMPAT |
1356 | static int compat_udp_getsockopt(struct sock *sk, int level, int optname, | 1449 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, |
1357 | char __user *optval, int __user *optlen) | 1450 | char __user *optval, int __user *optlen) |
1358 | { | 1451 | { |
1359 | if (level != SOL_UDP) | 1452 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1360 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | 1453 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1361 | return do_udp_getsockopt(sk, level, optname, optval, optlen); | 1454 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); |
1362 | } | 1455 | } |
1363 | #endif | 1456 | #endif |
1364 | /** | 1457 | /** |
@@ -1378,7 +1471,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1378 | { | 1471 | { |
1379 | unsigned int mask = datagram_poll(file, sock, wait); | 1472 | unsigned int mask = datagram_poll(file, sock, wait); |
1380 | struct sock *sk = sock->sk; | 1473 | struct sock *sk = sock->sk; |
1381 | 1474 | int is_lite = IS_UDPLITE(sk); | |
1475 | |||
1382 | /* Check for false positives due to checksum errors */ | 1476 | /* Check for false positives due to checksum errors */ |
1383 | if ( (mask & POLLRDNORM) && | 1477 | if ( (mask & POLLRDNORM) && |
1384 | !(file->f_flags & O_NONBLOCK) && | 1478 | !(file->f_flags & O_NONBLOCK) && |
@@ -1388,8 +1482,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1388 | 1482 | ||
1389 | spin_lock_bh(&rcvq->lock); | 1483 | spin_lock_bh(&rcvq->lock); |
1390 | while ((skb = skb_peek(rcvq)) != NULL) { | 1484 | while ((skb = skb_peek(rcvq)) != NULL) { |
1391 | if (udp_checksum_complete(skb)) { | 1485 | if (udp_lib_checksum_complete(skb)) { |
1392 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1486 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); |
1393 | __skb_unlink(skb, rcvq); | 1487 | __skb_unlink(skb, rcvq); |
1394 | kfree_skb(skb); | 1488 | kfree_skb(skb); |
1395 | } else { | 1489 | } else { |
@@ -1411,7 +1505,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1411 | struct proto udp_prot = { | 1505 | struct proto udp_prot = { |
1412 | .name = "UDP", | 1506 | .name = "UDP", |
1413 | .owner = THIS_MODULE, | 1507 | .owner = THIS_MODULE, |
1414 | .close = udp_close, | 1508 | .close = udp_lib_close, |
1415 | .connect = ip4_datagram_connect, | 1509 | .connect = ip4_datagram_connect, |
1416 | .disconnect = udp_disconnect, | 1510 | .disconnect = udp_disconnect, |
1417 | .ioctl = udp_ioctl, | 1511 | .ioctl = udp_ioctl, |
@@ -1422,8 +1516,8 @@ struct proto udp_prot = { | |||
1422 | .recvmsg = udp_recvmsg, | 1516 | .recvmsg = udp_recvmsg, |
1423 | .sendpage = udp_sendpage, | 1517 | .sendpage = udp_sendpage, |
1424 | .backlog_rcv = udp_queue_rcv_skb, | 1518 | .backlog_rcv = udp_queue_rcv_skb, |
1425 | .hash = udp_v4_hash, | 1519 | .hash = udp_lib_hash, |
1426 | .unhash = udp_v4_unhash, | 1520 | .unhash = udp_lib_unhash, |
1427 | .get_port = udp_v4_get_port, | 1521 | .get_port = udp_v4_get_port, |
1428 | .obj_size = sizeof(struct udp_sock), | 1522 | .obj_size = sizeof(struct udp_sock), |
1429 | #ifdef CONFIG_COMPAT | 1523 | #ifdef CONFIG_COMPAT |
@@ -1442,7 +1536,7 @@ static struct sock *udp_get_first(struct seq_file *seq) | |||
1442 | 1536 | ||
1443 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1537 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1444 | struct hlist_node *node; | 1538 | struct hlist_node *node; |
1445 | sk_for_each(sk, node, &udp_hash[state->bucket]) { | 1539 | sk_for_each(sk, node, state->hashtable + state->bucket) { |
1446 | if (sk->sk_family == state->family) | 1540 | if (sk->sk_family == state->family) |
1447 | goto found; | 1541 | goto found; |
1448 | } | 1542 | } |
@@ -1463,7 +1557,7 @@ try_again: | |||
1463 | } while (sk && sk->sk_family != state->family); | 1557 | } while (sk && sk->sk_family != state->family); |
1464 | 1558 | ||
1465 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { | 1559 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { |
1466 | sk = sk_head(&udp_hash[state->bucket]); | 1560 | sk = sk_head(state->hashtable + state->bucket); |
1467 | goto try_again; | 1561 | goto try_again; |
1468 | } | 1562 | } |
1469 | return sk; | 1563 | return sk; |
@@ -1513,6 +1607,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) | |||
1513 | if (!s) | 1607 | if (!s) |
1514 | goto out; | 1608 | goto out; |
1515 | s->family = afinfo->family; | 1609 | s->family = afinfo->family; |
1610 | s->hashtable = afinfo->hashtable; | ||
1516 | s->seq_ops.start = udp_seq_start; | 1611 | s->seq_ops.start = udp_seq_start; |
1517 | s->seq_ops.next = udp_seq_next; | 1612 | s->seq_ops.next = udp_seq_next; |
1518 | s->seq_ops.show = afinfo->seq_show; | 1613 | s->seq_ops.show = afinfo->seq_show; |
@@ -1579,7 +1674,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | |||
1579 | atomic_read(&sp->sk_refcnt), sp); | 1674 | atomic_read(&sp->sk_refcnt), sp); |
1580 | } | 1675 | } |
1581 | 1676 | ||
1582 | static int udp4_seq_show(struct seq_file *seq, void *v) | 1677 | int udp4_seq_show(struct seq_file *seq, void *v) |
1583 | { | 1678 | { |
1584 | if (v == SEQ_START_TOKEN) | 1679 | if (v == SEQ_START_TOKEN) |
1585 | seq_printf(seq, "%-127s\n", | 1680 | seq_printf(seq, "%-127s\n", |
@@ -1602,6 +1697,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { | |||
1602 | .owner = THIS_MODULE, | 1697 | .owner = THIS_MODULE, |
1603 | .name = "udp", | 1698 | .name = "udp", |
1604 | .family = AF_INET, | 1699 | .family = AF_INET, |
1700 | .hashtable = udp_hash, | ||
1605 | .seq_show = udp4_seq_show, | 1701 | .seq_show = udp4_seq_show, |
1606 | .seq_fops = &udp4_seq_fops, | 1702 | .seq_fops = &udp4_seq_fops, |
1607 | }; | 1703 | }; |
@@ -1624,6 +1720,8 @@ EXPORT_SYMBOL(udp_ioctl); | |||
1624 | EXPORT_SYMBOL(udp_get_port); | 1720 | EXPORT_SYMBOL(udp_get_port); |
1625 | EXPORT_SYMBOL(udp_prot); | 1721 | EXPORT_SYMBOL(udp_prot); |
1626 | EXPORT_SYMBOL(udp_sendmsg); | 1722 | EXPORT_SYMBOL(udp_sendmsg); |
1723 | EXPORT_SYMBOL(udp_lib_getsockopt); | ||
1724 | EXPORT_SYMBOL(udp_lib_setsockopt); | ||
1627 | EXPORT_SYMBOL(udp_poll); | 1725 | EXPORT_SYMBOL(udp_poll); |
1628 | 1726 | ||
1629 | #ifdef CONFIG_PROC_FS | 1727 | #ifdef CONFIG_PROC_FS |