diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 10 | ||||
-rw-r--r-- | net/ipv4/Makefile | 3 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 7 | ||||
-rw-r--r-- | net/ipv4/proc.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 1090 | ||||
-rw-r--r-- | net/ipv4/udp_ipv4.c | 1134 | ||||
-rw-r--r-- | net/ipv4/udplite.c (renamed from net/ipv4/udplite_ipv4.c) | 0 |
7 files changed, 1089 insertions, 1160 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5098fd2ff4d0..9c7e5ffb223d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -632,15 +632,5 @@ config TCP_MD5SIG | |||
632 | 632 | ||
633 | If unsure, say N. | 633 | If unsure, say N. |
634 | 634 | ||
635 | config IP_UDPLITE | ||
636 | bool "IP: UDP-Lite Protocol (RFC 3828)" | ||
637 | default n | ||
638 | ---help--- | ||
639 | UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length | ||
640 | checksum. Read <file:Documentation/networking/udplite.txt> for | ||
641 | details. | ||
642 | |||
643 | If unsure, say N. | ||
644 | |||
645 | source "net/ipv4/ipvs/Kconfig" | 635 | source "net/ipv4/ipvs/Kconfig" |
646 | 636 | ||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index d5226241d5ed..ad40ef3f9ebc 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
8 | inet_timewait_sock.o inet_connection_sock.o \ | 8 | inet_timewait_sock.o inet_connection_sock.o \ |
9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ | 9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ |
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
11 | datagram.o raw.o udp.o udp_ipv4.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o \ | 13 | fib_frontend.o fib_semantics.o \ |
14 | inet_fragment.o | 14 | inet_fragment.o |
@@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
52 | obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o | ||
53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 52 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
54 | 53 | ||
55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 54 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 25871c6c7444..4cb8a1385539 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void) | |||
1317 | if (snmp_mib_init((void **)udp_statistics, | 1317 | if (snmp_mib_init((void **)udp_statistics, |
1318 | sizeof(struct udp_mib)) < 0) | 1318 | sizeof(struct udp_mib)) < 0) |
1319 | goto err_udp_mib; | 1319 | goto err_udp_mib; |
1320 | #ifdef CONFIG_IP_UDPLITE | ||
1321 | if (snmp_mib_init((void **)udplite_statistics, | 1320 | if (snmp_mib_init((void **)udplite_statistics, |
1322 | sizeof(struct udp_mib)) < 0) | 1321 | sizeof(struct udp_mib)) < 0) |
1323 | goto err_udplite_mib; | 1322 | goto err_udplite_mib; |
1324 | #endif | 1323 | |
1325 | tcp_mib_init(); | 1324 | tcp_mib_init(); |
1326 | 1325 | ||
1327 | return 0; | 1326 | return 0; |
1328 | 1327 | ||
1329 | #ifdef CONFIG_IP_UDPLITE | ||
1330 | err_udplite_mib: | 1328 | err_udplite_mib: |
1331 | #endif | ||
1332 | snmp_mib_free((void **)udp_statistics); | 1329 | snmp_mib_free((void **)udp_statistics); |
1333 | err_udp_mib: | 1330 | err_udp_mib: |
1334 | snmp_mib_free((void **)tcp_statistics); | 1331 | snmp_mib_free((void **)tcp_statistics); |
@@ -1426,10 +1423,8 @@ static int __init inet_init(void) | |||
1426 | /* Setup UDP memory threshold */ | 1423 | /* Setup UDP memory threshold */ |
1427 | udp_init(); | 1424 | udp_init(); |
1428 | 1425 | ||
1429 | #ifdef CONFIG_IP_UDPLITE | ||
1430 | /* Add UDP-Lite (RFC 3828) */ | 1426 | /* Add UDP-Lite (RFC 3828) */ |
1431 | udplite4_register(); | 1427 | udplite4_register(); |
1432 | #endif | ||
1433 | 1428 | ||
1434 | /* | 1429 | /* |
1435 | * Set the ICMP layer up | 1430 | * Set the ICMP layer up |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d75ddb7fa4b8..d63474c6b400 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
59 | atomic_read(&tcp_memory_allocated)); | 59 | atomic_read(&tcp_memory_allocated)); |
60 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), | 60 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), |
61 | atomic_read(&udp_memory_allocated)); | 61 | atomic_read(&udp_memory_allocated)); |
62 | #ifdef CONFIG_IP_UDPLITE | ||
63 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); | 62 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); |
64 | #endif | ||
65 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); | 63 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); |
66 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 64 | seq_printf(seq, "FRAG: inuse %d memory %d\n", |
67 | ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); | 65 | ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); |
@@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
351 | snmp_fold_field((void **)udp_statistics, | 349 | snmp_fold_field((void **)udp_statistics, |
352 | snmp4_udp_list[i].entry)); | 350 | snmp4_udp_list[i].entry)); |
353 | 351 | ||
354 | #ifdef CONFIG_IP_UDPLITE | ||
355 | /* the UDP and UDP-Lite MIBs are the same */ | 352 | /* the UDP and UDP-Lite MIBs are the same */ |
356 | seq_puts(seq, "\nUdpLite:"); | 353 | seq_puts(seq, "\nUdpLite:"); |
357 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) | 354 | for (i = 0; snmp4_udp_list[i].name != NULL; i++) |
@@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
362 | seq_printf(seq, " %lu", | 359 | seq_printf(seq, " %lu", |
363 | snmp_fold_field((void **)udplite_statistics, | 360 | snmp_fold_field((void **)udplite_statistics, |
364 | snmp4_udp_list[i].entry)); | 361 | snmp4_udp_list[i].entry)); |
365 | #endif | 362 | |
366 | seq_putc(seq, '\n'); | 363 | seq_putc(seq, '\n'); |
367 | return 0; | 364 | return 0; |
368 | } | 365 | } |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c53d7673b57d..7ea1b67b6de1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
246 | return __udp_lib_get_port(sk, snum, udp_hash, scmp); | 246 | return __udp_lib_get_port(sk, snum, udp_hash, scmp); |
247 | } | 247 | } |
248 | 248 | ||
249 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
250 | { | ||
251 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | ||
252 | |||
253 | return ( !ipv6_only_sock(sk2) && | ||
254 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | ||
255 | inet1->rcv_saddr == inet2->rcv_saddr )); | ||
256 | } | ||
257 | |||
258 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | ||
259 | { | ||
260 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
261 | } | ||
262 | |||
263 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | ||
264 | * harder than this. -DaveM | ||
265 | */ | ||
266 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | ||
267 | __be16 sport, __be32 daddr, __be16 dport, | ||
268 | int dif, struct hlist_head udptable[]) | ||
269 | { | ||
270 | struct sock *sk, *result = NULL; | ||
271 | struct hlist_node *node; | ||
272 | unsigned short hnum = ntohs(dport); | ||
273 | int badness = -1; | ||
274 | |||
275 | read_lock(&udp_hash_lock); | ||
276 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
277 | struct inet_sock *inet = inet_sk(sk); | ||
278 | |||
279 | if (sk->sk_net == net && sk->sk_hash == hnum && | ||
280 | !ipv6_only_sock(sk)) { | ||
281 | int score = (sk->sk_family == PF_INET ? 1 : 0); | ||
282 | if (inet->rcv_saddr) { | ||
283 | if (inet->rcv_saddr != daddr) | ||
284 | continue; | ||
285 | score+=2; | ||
286 | } | ||
287 | if (inet->daddr) { | ||
288 | if (inet->daddr != saddr) | ||
289 | continue; | ||
290 | score+=2; | ||
291 | } | ||
292 | if (inet->dport) { | ||
293 | if (inet->dport != sport) | ||
294 | continue; | ||
295 | score+=2; | ||
296 | } | ||
297 | if (sk->sk_bound_dev_if) { | ||
298 | if (sk->sk_bound_dev_if != dif) | ||
299 | continue; | ||
300 | score+=2; | ||
301 | } | ||
302 | if (score == 9) { | ||
303 | result = sk; | ||
304 | break; | ||
305 | } else if (score > badness) { | ||
306 | result = sk; | ||
307 | badness = score; | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | if (result) | ||
312 | sock_hold(result); | ||
313 | read_unlock(&udp_hash_lock); | ||
314 | return result; | ||
315 | } | ||
316 | |||
317 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | ||
318 | __be16 loc_port, __be32 loc_addr, | ||
319 | __be16 rmt_port, __be32 rmt_addr, | ||
320 | int dif) | ||
321 | { | ||
322 | struct hlist_node *node; | ||
323 | struct sock *s = sk; | ||
324 | unsigned short hnum = ntohs(loc_port); | ||
325 | |||
326 | sk_for_each_from(s, node) { | ||
327 | struct inet_sock *inet = inet_sk(s); | ||
328 | |||
329 | if (s->sk_hash != hnum || | ||
330 | (inet->daddr && inet->daddr != rmt_addr) || | ||
331 | (inet->dport != rmt_port && inet->dport) || | ||
332 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | ||
333 | ipv6_only_sock(s) || | ||
334 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | ||
335 | continue; | ||
336 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | ||
337 | continue; | ||
338 | goto found; | ||
339 | } | ||
340 | s = NULL; | ||
341 | found: | ||
342 | return s; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * This routine is called by the ICMP module when it gets some | ||
347 | * sort of error condition. If err < 0 then the socket should | ||
348 | * be closed and the error returned to the user. If err > 0 | ||
349 | * it's just the icmp type << 8 | icmp code. | ||
350 | * Header points to the ip header of the error packet. We move | ||
351 | * on past this. Then (as it used to claim before adjustment) | ||
352 | * header points to the first 8 bytes of the udp header. We need | ||
353 | * to find the appropriate port. | ||
354 | */ | ||
355 | |||
356 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | ||
357 | { | ||
358 | struct inet_sock *inet; | ||
359 | struct iphdr *iph = (struct iphdr*)skb->data; | ||
360 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | ||
361 | const int type = icmp_hdr(skb)->type; | ||
362 | const int code = icmp_hdr(skb)->code; | ||
363 | struct sock *sk; | ||
364 | int harderr; | ||
365 | int err; | ||
366 | |||
367 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, | ||
368 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | ||
369 | if (sk == NULL) { | ||
370 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
371 | return; /* No socket for error */ | ||
372 | } | ||
373 | |||
374 | err = 0; | ||
375 | harderr = 0; | ||
376 | inet = inet_sk(sk); | ||
377 | |||
378 | switch (type) { | ||
379 | default: | ||
380 | case ICMP_TIME_EXCEEDED: | ||
381 | err = EHOSTUNREACH; | ||
382 | break; | ||
383 | case ICMP_SOURCE_QUENCH: | ||
384 | goto out; | ||
385 | case ICMP_PARAMETERPROB: | ||
386 | err = EPROTO; | ||
387 | harderr = 1; | ||
388 | break; | ||
389 | case ICMP_DEST_UNREACH: | ||
390 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | ||
391 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | ||
392 | err = EMSGSIZE; | ||
393 | harderr = 1; | ||
394 | break; | ||
395 | } | ||
396 | goto out; | ||
397 | } | ||
398 | err = EHOSTUNREACH; | ||
399 | if (code <= NR_ICMP_UNREACH) { | ||
400 | harderr = icmp_err_convert[code].fatal; | ||
401 | err = icmp_err_convert[code].errno; | ||
402 | } | ||
403 | break; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * RFC1122: OK. Passes ICMP errors back to application, as per | ||
408 | * 4.1.3.3. | ||
409 | */ | ||
410 | if (!inet->recverr) { | ||
411 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | ||
412 | goto out; | ||
413 | } else { | ||
414 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | ||
415 | } | ||
416 | sk->sk_err = err; | ||
417 | sk->sk_error_report(sk); | ||
418 | out: | ||
419 | sock_put(sk); | ||
420 | } | ||
421 | |||
422 | void udp_err(struct sk_buff *skb, u32 info) | ||
423 | { | ||
424 | __udp4_lib_err(skb, info, udp_hash); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Throw away all pending data and cancel the corking. Socket is locked. | ||
429 | */ | ||
430 | static void udp_flush_pending_frames(struct sock *sk) | ||
431 | { | ||
432 | struct udp_sock *up = udp_sk(sk); | ||
433 | |||
434 | if (up->pending) { | ||
435 | up->len = 0; | ||
436 | up->pending = 0; | ||
437 | ip_flush_pending_frames(sk); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /** | ||
442 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
443 | * @sk: socket we are sending on | ||
444 | * @skb: sk_buff containing the filled-in UDP header | ||
445 | * (checksum field must be zeroed out) | ||
446 | */ | ||
447 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
448 | __be32 src, __be32 dst, int len ) | ||
449 | { | ||
450 | unsigned int offset; | ||
451 | struct udphdr *uh = udp_hdr(skb); | ||
452 | __wsum csum = 0; | ||
453 | |||
454 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
455 | /* | ||
456 | * Only one fragment on the socket. | ||
457 | */ | ||
458 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
459 | skb->csum_offset = offsetof(struct udphdr, check); | ||
460 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
461 | } else { | ||
462 | /* | ||
463 | * HW-checksum won't work as there are two or more | ||
464 | * fragments on the socket so that all csums of sk_buffs | ||
465 | * should be together | ||
466 | */ | ||
467 | offset = skb_transport_offset(skb); | ||
468 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
469 | |||
470 | skb->ip_summed = CHECKSUM_NONE; | ||
471 | |||
472 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
473 | csum = csum_add(csum, skb->csum); | ||
474 | } | ||
475 | |||
476 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
477 | if (uh->check == 0) | ||
478 | uh->check = CSUM_MANGLED_0; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
484 | */ | ||
485 | static int udp_push_pending_frames(struct sock *sk) | ||
486 | { | ||
487 | struct udp_sock *up = udp_sk(sk); | ||
488 | struct inet_sock *inet = inet_sk(sk); | ||
489 | struct flowi *fl = &inet->cork.fl; | ||
490 | struct sk_buff *skb; | ||
491 | struct udphdr *uh; | ||
492 | int err = 0; | ||
493 | int is_udplite = IS_UDPLITE(sk); | ||
494 | __wsum csum = 0; | ||
495 | |||
496 | /* Grab the skbuff where UDP header space exists. */ | ||
497 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
498 | goto out; | ||
499 | |||
500 | /* | ||
501 | * Create a UDP header | ||
502 | */ | ||
503 | uh = udp_hdr(skb); | ||
504 | uh->source = fl->fl_ip_sport; | ||
505 | uh->dest = fl->fl_ip_dport; | ||
506 | uh->len = htons(up->len); | ||
507 | uh->check = 0; | ||
508 | |||
509 | if (is_udplite) /* UDP-Lite */ | ||
510 | csum = udplite_csum_outgoing(sk, skb); | ||
511 | |||
512 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
513 | |||
514 | skb->ip_summed = CHECKSUM_NONE; | ||
515 | goto send; | ||
516 | |||
517 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | ||
518 | |||
519 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | ||
520 | goto send; | ||
521 | |||
522 | } else /* `normal' UDP */ | ||
523 | csum = udp_csum_outgoing(sk, skb); | ||
524 | |||
525 | /* add protocol-dependent pseudo-header */ | ||
526 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
527 | sk->sk_protocol, csum ); | ||
528 | if (uh->check == 0) | ||
529 | uh->check = CSUM_MANGLED_0; | ||
530 | |||
531 | send: | ||
532 | err = ip_push_pending_frames(sk); | ||
533 | out: | ||
534 | up->len = 0; | ||
535 | up->pending = 0; | ||
536 | if (!err) | ||
537 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
538 | return err; | ||
539 | } | ||
540 | |||
541 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
542 | size_t len) | ||
543 | { | ||
544 | struct inet_sock *inet = inet_sk(sk); | ||
545 | struct udp_sock *up = udp_sk(sk); | ||
546 | int ulen = len; | ||
547 | struct ipcm_cookie ipc; | ||
548 | struct rtable *rt = NULL; | ||
549 | int free = 0; | ||
550 | int connected = 0; | ||
551 | __be32 daddr, faddr, saddr; | ||
552 | __be16 dport; | ||
553 | u8 tos; | ||
554 | int err, is_udplite = IS_UDPLITE(sk); | ||
555 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | ||
556 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
557 | |||
558 | if (len > 0xFFFF) | ||
559 | return -EMSGSIZE; | ||
560 | |||
561 | /* | ||
562 | * Check the flags. | ||
563 | */ | ||
564 | |||
565 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | ||
566 | return -EOPNOTSUPP; | ||
567 | |||
568 | ipc.opt = NULL; | ||
569 | |||
570 | if (up->pending) { | ||
571 | /* | ||
572 | * There are pending frames. | ||
573 | * The socket lock must be held while it's corked. | ||
574 | */ | ||
575 | lock_sock(sk); | ||
576 | if (likely(up->pending)) { | ||
577 | if (unlikely(up->pending != AF_INET)) { | ||
578 | release_sock(sk); | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | goto do_append_data; | ||
582 | } | ||
583 | release_sock(sk); | ||
584 | } | ||
585 | ulen += sizeof(struct udphdr); | ||
586 | |||
587 | /* | ||
588 | * Get and verify the address. | ||
589 | */ | ||
590 | if (msg->msg_name) { | ||
591 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | ||
592 | if (msg->msg_namelen < sizeof(*usin)) | ||
593 | return -EINVAL; | ||
594 | if (usin->sin_family != AF_INET) { | ||
595 | if (usin->sin_family != AF_UNSPEC) | ||
596 | return -EAFNOSUPPORT; | ||
597 | } | ||
598 | |||
599 | daddr = usin->sin_addr.s_addr; | ||
600 | dport = usin->sin_port; | ||
601 | if (dport == 0) | ||
602 | return -EINVAL; | ||
603 | } else { | ||
604 | if (sk->sk_state != TCP_ESTABLISHED) | ||
605 | return -EDESTADDRREQ; | ||
606 | daddr = inet->daddr; | ||
607 | dport = inet->dport; | ||
608 | /* Open fast path for connected socket. | ||
609 | Route will not be used, if at least one option is set. | ||
610 | */ | ||
611 | connected = 1; | ||
612 | } | ||
613 | ipc.addr = inet->saddr; | ||
614 | |||
615 | ipc.oif = sk->sk_bound_dev_if; | ||
616 | if (msg->msg_controllen) { | ||
617 | err = ip_cmsg_send(msg, &ipc); | ||
618 | if (err) | ||
619 | return err; | ||
620 | if (ipc.opt) | ||
621 | free = 1; | ||
622 | connected = 0; | ||
623 | } | ||
624 | if (!ipc.opt) | ||
625 | ipc.opt = inet->opt; | ||
626 | |||
627 | saddr = ipc.addr; | ||
628 | ipc.addr = faddr = daddr; | ||
629 | |||
630 | if (ipc.opt && ipc.opt->srr) { | ||
631 | if (!daddr) | ||
632 | return -EINVAL; | ||
633 | faddr = ipc.opt->faddr; | ||
634 | connected = 0; | ||
635 | } | ||
636 | tos = RT_TOS(inet->tos); | ||
637 | if (sock_flag(sk, SOCK_LOCALROUTE) || | ||
638 | (msg->msg_flags & MSG_DONTROUTE) || | ||
639 | (ipc.opt && ipc.opt->is_strictroute)) { | ||
640 | tos |= RTO_ONLINK; | ||
641 | connected = 0; | ||
642 | } | ||
643 | |||
644 | if (ipv4_is_multicast(daddr)) { | ||
645 | if (!ipc.oif) | ||
646 | ipc.oif = inet->mc_index; | ||
647 | if (!saddr) | ||
648 | saddr = inet->mc_addr; | ||
649 | connected = 0; | ||
650 | } | ||
651 | |||
652 | if (connected) | ||
653 | rt = (struct rtable*)sk_dst_check(sk, 0); | ||
654 | |||
655 | if (rt == NULL) { | ||
656 | struct flowi fl = { .oif = ipc.oif, | ||
657 | .nl_u = { .ip4_u = | ||
658 | { .daddr = faddr, | ||
659 | .saddr = saddr, | ||
660 | .tos = tos } }, | ||
661 | .proto = sk->sk_protocol, | ||
662 | .uli_u = { .ports = | ||
663 | { .sport = inet->sport, | ||
664 | .dport = dport } } }; | ||
665 | security_sk_classify_flow(sk, &fl); | ||
666 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | ||
667 | if (err) { | ||
668 | if (err == -ENETUNREACH) | ||
669 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
670 | goto out; | ||
671 | } | ||
672 | |||
673 | err = -EACCES; | ||
674 | if ((rt->rt_flags & RTCF_BROADCAST) && | ||
675 | !sock_flag(sk, SOCK_BROADCAST)) | ||
676 | goto out; | ||
677 | if (connected) | ||
678 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | ||
679 | } | ||
680 | |||
681 | if (msg->msg_flags&MSG_CONFIRM) | ||
682 | goto do_confirm; | ||
683 | back_from_confirm: | ||
684 | |||
685 | saddr = rt->rt_src; | ||
686 | if (!ipc.addr) | ||
687 | daddr = ipc.addr = rt->rt_dst; | ||
688 | |||
689 | lock_sock(sk); | ||
690 | if (unlikely(up->pending)) { | ||
691 | /* The socket is already corked while preparing it. */ | ||
692 | /* ... which is an evident application bug. --ANK */ | ||
693 | release_sock(sk); | ||
694 | |||
695 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | ||
696 | err = -EINVAL; | ||
697 | goto out; | ||
698 | } | ||
699 | /* | ||
700 | * Now cork the socket to pend data. | ||
701 | */ | ||
702 | inet->cork.fl.fl4_dst = daddr; | ||
703 | inet->cork.fl.fl_ip_dport = dport; | ||
704 | inet->cork.fl.fl4_src = saddr; | ||
705 | inet->cork.fl.fl_ip_sport = inet->sport; | ||
706 | up->pending = AF_INET; | ||
707 | |||
708 | do_append_data: | ||
709 | up->len += ulen; | ||
710 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
711 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | ||
712 | sizeof(struct udphdr), &ipc, rt, | ||
713 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | ||
714 | if (err) | ||
715 | udp_flush_pending_frames(sk); | ||
716 | else if (!corkreq) | ||
717 | err = udp_push_pending_frames(sk); | ||
718 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | ||
719 | up->pending = 0; | ||
720 | release_sock(sk); | ||
721 | |||
722 | out: | ||
723 | ip_rt_put(rt); | ||
724 | if (free) | ||
725 | kfree(ipc.opt); | ||
726 | if (!err) | ||
727 | return len; | ||
728 | /* | ||
729 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
730 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
731 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
732 | * things). We could add another new stat but at least for now that | ||
733 | * seems like overkill. | ||
734 | */ | ||
735 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
736 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); | ||
737 | } | ||
738 | return err; | ||
739 | |||
740 | do_confirm: | ||
741 | dst_confirm(&rt->u.dst); | ||
742 | if (!(msg->msg_flags&MSG_PROBE) || len) | ||
743 | goto back_from_confirm; | ||
744 | err = 0; | ||
745 | goto out; | ||
746 | } | ||
747 | |||
748 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | ||
749 | size_t size, int flags) | ||
750 | { | ||
751 | struct udp_sock *up = udp_sk(sk); | ||
752 | int ret; | ||
753 | |||
754 | if (!up->pending) { | ||
755 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; | ||
756 | |||
757 | /* Call udp_sendmsg to specify destination address which | ||
758 | * sendpage interface can't pass. | ||
759 | * This will succeed only when the socket is connected. | ||
760 | */ | ||
761 | ret = udp_sendmsg(NULL, sk, &msg, 0); | ||
762 | if (ret < 0) | ||
763 | return ret; | ||
764 | } | ||
765 | |||
766 | lock_sock(sk); | ||
767 | |||
768 | if (unlikely(!up->pending)) { | ||
769 | release_sock(sk); | ||
770 | |||
771 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); | ||
772 | return -EINVAL; | ||
773 | } | ||
774 | |||
775 | ret = ip_append_page(sk, page, offset, size, flags); | ||
776 | if (ret == -EOPNOTSUPP) { | ||
777 | release_sock(sk); | ||
778 | return sock_no_sendpage(sk->sk_socket, page, offset, | ||
779 | size, flags); | ||
780 | } | ||
781 | if (ret < 0) { | ||
782 | udp_flush_pending_frames(sk); | ||
783 | goto out; | ||
784 | } | ||
785 | |||
786 | up->len += size; | ||
787 | if (!(up->corkflag || (flags&MSG_MORE))) | ||
788 | ret = udp_push_pending_frames(sk); | ||
789 | if (!ret) | ||
790 | ret = size; | ||
791 | out: | ||
792 | release_sock(sk); | ||
793 | return ret; | ||
794 | } | ||
795 | |||
249 | /* | 796 | /* |
250 | * IOCTL requests applicable to the UDP protocol | 797 | * IOCTL requests applicable to the UDP protocol |
251 | */ | 798 | */ |
@@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
286 | return 0; | 833 | return 0; |
287 | } | 834 | } |
288 | 835 | ||
836 | /* | ||
837 | * This should be easy, if there is something there we | ||
838 | * return it, otherwise we block. | ||
839 | */ | ||
840 | |||
841 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
842 | size_t len, int noblock, int flags, int *addr_len) | ||
843 | { | ||
844 | struct inet_sock *inet = inet_sk(sk); | ||
845 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | ||
846 | struct sk_buff *skb; | ||
847 | unsigned int ulen, copied; | ||
848 | int peeked; | ||
849 | int err; | ||
850 | int is_udplite = IS_UDPLITE(sk); | ||
851 | |||
852 | /* | ||
853 | * Check any passed addresses | ||
854 | */ | ||
855 | if (addr_len) | ||
856 | *addr_len=sizeof(*sin); | ||
857 | |||
858 | if (flags & MSG_ERRQUEUE) | ||
859 | return ip_recv_error(sk, msg, len); | ||
860 | |||
861 | try_again: | ||
862 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | ||
863 | &peeked, &err); | ||
864 | if (!skb) | ||
865 | goto out; | ||
866 | |||
867 | ulen = skb->len - sizeof(struct udphdr); | ||
868 | copied = len; | ||
869 | if (copied > ulen) | ||
870 | copied = ulen; | ||
871 | else if (copied < ulen) | ||
872 | msg->msg_flags |= MSG_TRUNC; | ||
873 | |||
874 | /* | ||
875 | * If checksum is needed at all, try to do it while copying the | ||
876 | * data. If the data is truncated, or if we only want a partial | ||
877 | * coverage checksum (UDP-Lite), do it before the copy. | ||
878 | */ | ||
879 | |||
880 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | ||
881 | if (udp_lib_checksum_complete(skb)) | ||
882 | goto csum_copy_err; | ||
883 | } | ||
884 | |||
885 | if (skb_csum_unnecessary(skb)) | ||
886 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
887 | msg->msg_iov, copied ); | ||
888 | else { | ||
889 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | ||
890 | |||
891 | if (err == -EINVAL) | ||
892 | goto csum_copy_err; | ||
893 | } | ||
894 | |||
895 | if (err) | ||
896 | goto out_free; | ||
897 | |||
898 | if (!peeked) | ||
899 | UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); | ||
900 | |||
901 | sock_recv_timestamp(msg, sk, skb); | ||
902 | |||
903 | /* Copy the address. */ | ||
904 | if (sin) | ||
905 | { | ||
906 | sin->sin_family = AF_INET; | ||
907 | sin->sin_port = udp_hdr(skb)->source; | ||
908 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | ||
909 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
910 | } | ||
911 | if (inet->cmsg_flags) | ||
912 | ip_cmsg_recv(msg, skb); | ||
913 | |||
914 | err = copied; | ||
915 | if (flags & MSG_TRUNC) | ||
916 | err = ulen; | ||
917 | |||
918 | out_free: | ||
919 | lock_sock(sk); | ||
920 | skb_free_datagram(sk, skb); | ||
921 | release_sock(sk); | ||
922 | out: | ||
923 | return err; | ||
924 | |||
925 | csum_copy_err: | ||
926 | lock_sock(sk); | ||
927 | if (!skb_kill_datagram(sk, skb, flags)) | ||
928 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | ||
929 | release_sock(sk); | ||
930 | |||
931 | if (noblock) | ||
932 | return -EAGAIN; | ||
933 | goto try_again; | ||
934 | } | ||
935 | |||
936 | |||
289 | int udp_disconnect(struct sock *sk, int flags) | 937 | int udp_disconnect(struct sock *sk, int flags) |
290 | { | 938 | { |
291 | struct inet_sock *inet = inet_sk(sk); | 939 | struct inet_sock *inet = inet_sk(sk); |
@@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags) | |||
308 | return 0; | 956 | return 0; |
309 | } | 957 | } |
310 | 958 | ||
959 | /* returns: | ||
960 | * -1: error | ||
961 | * 0: success | ||
962 | * >0: "udp encap" protocol resubmission | ||
963 | * | ||
964 | * Note that in the success and error cases, the skb is assumed to | ||
965 | * have either been requeued or freed. | ||
966 | */ | ||
967 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | ||
968 | { | ||
969 | struct udp_sock *up = udp_sk(sk); | ||
970 | int rc; | ||
971 | int is_udplite = IS_UDPLITE(sk); | ||
972 | |||
973 | /* | ||
974 | * Charge it to the socket, dropping if the queue is full. | ||
975 | */ | ||
976 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | ||
977 | goto drop; | ||
978 | nf_reset(skb); | ||
979 | |||
980 | if (up->encap_type) { | ||
981 | /* | ||
982 | * This is an encapsulation socket so pass the skb to | ||
983 | * the socket's udp_encap_rcv() hook. Otherwise, just | ||
984 | * fall through and pass this up the UDP socket. | ||
985 | * up->encap_rcv() returns the following value: | ||
986 | * =0 if skb was successfully passed to the encap | ||
987 | * handler or was discarded by it. | ||
988 | * >0 if skb should be passed on to UDP. | ||
989 | * <0 if skb should be resubmitted as proto -N | ||
990 | */ | ||
991 | |||
992 | /* if we're overly short, let UDP handle it */ | ||
993 | if (skb->len > sizeof(struct udphdr) && | ||
994 | up->encap_rcv != NULL) { | ||
995 | int ret; | ||
996 | |||
997 | ret = (*up->encap_rcv)(sk, skb); | ||
998 | if (ret <= 0) { | ||
999 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, | ||
1000 | is_udplite); | ||
1001 | return -ret; | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | /* FALLTHROUGH -- it's a UDP Packet */ | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * UDP-Lite specific tests, ignored on UDP sockets | ||
1010 | */ | ||
1011 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | ||
1012 | |||
1013 | /* | ||
1014 | * MIB statistics other than incrementing the error count are | ||
1015 | * disabled for the following two types of errors: these depend | ||
1016 | * on the application settings, not on the functioning of the | ||
1017 | * protocol stack as such. | ||
1018 | * | ||
1019 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
1020 | * way ... to ... at least let the receiving application block | ||
1021 | * delivery of packets with coverage values less than a value | ||
1022 | * provided by the application." | ||
1023 | */ | ||
1024 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
1025 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
1026 | "%d while full coverage %d requested\n", | ||
1027 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
1028 | goto drop; | ||
1029 | } | ||
1030 | /* The next case involves violating the min. coverage requested | ||
1031 | * by the receiver. This is subtle: if receiver wants x and x is | ||
1032 | * greater than the buffersize/MTU then receiver will complain | ||
1033 | * that it wants x while sender emits packets of smaller size y. | ||
1034 | * Therefore the above ...()->partial_cov statement is essential. | ||
1035 | */ | ||
1036 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
1037 | LIMIT_NETDEBUG(KERN_WARNING | ||
1038 | "UDPLITE: coverage %d too small, need min %d\n", | ||
1039 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
1040 | goto drop; | ||
1041 | } | ||
1042 | } | ||
1043 | |||
1044 | if (sk->sk_filter) { | ||
1045 | if (udp_lib_checksum_complete(skb)) | ||
1046 | goto drop; | ||
1047 | } | ||
1048 | |||
1049 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | ||
1050 | /* Note that an ENOMEM error is charged twice */ | ||
1051 | if (rc == -ENOMEM) | ||
1052 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); | ||
1053 | goto drop; | ||
1054 | } | ||
1055 | |||
1056 | return 0; | ||
1057 | |||
1058 | drop: | ||
1059 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | ||
1060 | kfree_skb(skb); | ||
1061 | return -1; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1065 | * Multicasts and broadcasts go to each listener. | ||
1066 | * | ||
1067 | * Note: called only from the BH handler context, | ||
1068 | * so we don't need to lock the hashes. | ||
1069 | */ | ||
1070 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | ||
1071 | struct udphdr *uh, | ||
1072 | __be32 saddr, __be32 daddr, | ||
1073 | struct hlist_head udptable[]) | ||
1074 | { | ||
1075 | struct sock *sk; | ||
1076 | int dif; | ||
1077 | |||
1078 | read_lock(&udp_hash_lock); | ||
1079 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | ||
1080 | dif = skb->dev->ifindex; | ||
1081 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | ||
1082 | if (sk) { | ||
1083 | struct sock *sknext = NULL; | ||
1084 | |||
1085 | do { | ||
1086 | struct sk_buff *skb1 = skb; | ||
1087 | |||
1088 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | ||
1089 | uh->source, saddr, dif); | ||
1090 | if (sknext) | ||
1091 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
1092 | |||
1093 | if (skb1) { | ||
1094 | int ret = 0; | ||
1095 | |||
1096 | bh_lock_sock_nested(sk); | ||
1097 | if (!sock_owned_by_user(sk)) | ||
1098 | ret = udp_queue_rcv_skb(sk, skb1); | ||
1099 | else | ||
1100 | sk_add_backlog(sk, skb1); | ||
1101 | bh_unlock_sock(sk); | ||
1102 | |||
1103 | if (ret > 0) | ||
1104 | /* we should probably re-process instead | ||
1105 | * of dropping packets here. */ | ||
1106 | kfree_skb(skb1); | ||
1107 | } | ||
1108 | sk = sknext; | ||
1109 | } while (sknext); | ||
1110 | } else | ||
1111 | kfree_skb(skb); | ||
1112 | read_unlock(&udp_hash_lock); | ||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | /* Initialize UDP checksum. If exited with zero value (success), | ||
1117 | * CHECKSUM_UNNECESSARY means, that no more checks are required. | ||
1118 | * Otherwise, csum completion requires chacksumming packet body, | ||
1119 | * including udp header and folding it to skb->csum. | ||
1120 | */ | ||
1121 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | ||
1122 | int proto) | ||
1123 | { | ||
1124 | const struct iphdr *iph; | ||
1125 | int err; | ||
1126 | |||
1127 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1128 | UDP_SKB_CB(skb)->cscov = skb->len; | ||
1129 | |||
1130 | if (proto == IPPROTO_UDPLITE) { | ||
1131 | err = udplite_checksum_init(skb, uh); | ||
1132 | if (err) | ||
1133 | return err; | ||
1134 | } | ||
1135 | |||
1136 | iph = ip_hdr(skb); | ||
1137 | if (uh->check == 0) { | ||
1138 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1139 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
1140 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
1141 | proto, skb->csum)) | ||
1142 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
1143 | } | ||
1144 | if (!skb_csum_unnecessary(skb)) | ||
1145 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
1146 | skb->len, proto, 0); | ||
1147 | /* Probably, we should checksum udp header (it should be in cache | ||
1148 | * in any case) and data in tiny packets (< rx copybreak). | ||
1149 | */ | ||
1150 | |||
1151 | return 0; | ||
1152 | } | ||
1153 | |||
1154 | /* | ||
1155 | * All we need to do is get the socket, and then do a checksum. | ||
1156 | */ | ||
1157 | |||
1158 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | ||
1159 | int proto) | ||
1160 | { | ||
1161 | struct sock *sk; | ||
1162 | struct udphdr *uh = udp_hdr(skb); | ||
1163 | unsigned short ulen; | ||
1164 | struct rtable *rt = (struct rtable*)skb->dst; | ||
1165 | __be32 saddr = ip_hdr(skb)->saddr; | ||
1166 | __be32 daddr = ip_hdr(skb)->daddr; | ||
1167 | |||
1168 | /* | ||
1169 | * Validate the packet. | ||
1170 | */ | ||
1171 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | ||
1172 | goto drop; /* No space for header. */ | ||
1173 | |||
1174 | ulen = ntohs(uh->len); | ||
1175 | if (ulen > skb->len) | ||
1176 | goto short_packet; | ||
1177 | |||
1178 | if (proto == IPPROTO_UDP) { | ||
1179 | /* UDP validates ulen. */ | ||
1180 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
1181 | goto short_packet; | ||
1182 | uh = udp_hdr(skb); | ||
1183 | } | ||
1184 | |||
1185 | if (udp4_csum_init(skb, uh, proto)) | ||
1186 | goto csum_error; | ||
1187 | |||
1188 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | ||
1189 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | ||
1190 | |||
1191 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, | ||
1192 | uh->dest, inet_iif(skb), udptable); | ||
1193 | |||
1194 | if (sk != NULL) { | ||
1195 | int ret = 0; | ||
1196 | bh_lock_sock_nested(sk); | ||
1197 | if (!sock_owned_by_user(sk)) | ||
1198 | ret = udp_queue_rcv_skb(sk, skb); | ||
1199 | else | ||
1200 | sk_add_backlog(sk, skb); | ||
1201 | bh_unlock_sock(sk); | ||
1202 | sock_put(sk); | ||
1203 | |||
1204 | /* a return value > 0 means to resubmit the input, but | ||
1205 | * it wants the return to be -protocol, or 0 | ||
1206 | */ | ||
1207 | if (ret > 0) | ||
1208 | return -ret; | ||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1213 | goto drop; | ||
1214 | nf_reset(skb); | ||
1215 | |||
1216 | /* No socket. Drop packet silently, if checksum is wrong */ | ||
1217 | if (udp_lib_checksum_complete(skb)) | ||
1218 | goto csum_error; | ||
1219 | |||
1220 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); | ||
1221 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
1222 | |||
1223 | /* | ||
1224 | * Hmm. We got an UDP packet to a port to which we | ||
1225 | * don't wanna listen. Ignore it. | ||
1226 | */ | ||
1227 | kfree_skb(skb); | ||
1228 | return 0; | ||
1229 | |||
1230 | short_packet: | ||
1231 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | ||
1232 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | ||
1233 | NIPQUAD(saddr), | ||
1234 | ntohs(uh->source), | ||
1235 | ulen, | ||
1236 | skb->len, | ||
1237 | NIPQUAD(daddr), | ||
1238 | ntohs(uh->dest)); | ||
1239 | goto drop; | ||
1240 | |||
1241 | csum_error: | ||
1242 | /* | ||
1243 | * RFC1122: OK. Discards the bad packet silently (as far as | ||
1244 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | ||
1245 | */ | ||
1246 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | ||
1247 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | ||
1248 | NIPQUAD(saddr), | ||
1249 | ntohs(uh->source), | ||
1250 | NIPQUAD(daddr), | ||
1251 | ntohs(uh->dest), | ||
1252 | ulen); | ||
1253 | drop: | ||
1254 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); | ||
1255 | kfree_skb(skb); | ||
1256 | return 0; | ||
1257 | } | ||
1258 | |||
1259 | int udp_rcv(struct sk_buff *skb) | ||
1260 | { | ||
1261 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | ||
1262 | } | ||
1263 | |||
1264 | int udp_destroy_sock(struct sock *sk) | ||
1265 | { | ||
1266 | lock_sock(sk); | ||
1267 | udp_flush_pending_frames(sk); | ||
1268 | release_sock(sk); | ||
1269 | return 0; | ||
1270 | } | ||
1271 | |||
311 | /* | 1272 | /* |
312 | * Socket option code for UDP | 1273 | * Socket option code for UDP |
313 | */ | 1274 | */ |
@@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
318 | struct udp_sock *up = udp_sk(sk); | 1279 | struct udp_sock *up = udp_sk(sk); |
319 | int val; | 1280 | int val; |
320 | int err = 0; | 1281 | int err = 0; |
321 | #ifdef CONFIG_IP_UDPLITE | ||
322 | int is_udplite = IS_UDPLITE(sk); | 1282 | int is_udplite = IS_UDPLITE(sk); |
323 | #endif | ||
324 | 1283 | ||
325 | if (optlen<sizeof(int)) | 1284 | if (optlen<sizeof(int)) |
326 | return -EINVAL; | 1285 | return -EINVAL; |
@@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
356 | } | 1315 | } |
357 | break; | 1316 | break; |
358 | 1317 | ||
359 | #ifdef CONFIG_IP_UDPLITE | ||
360 | /* | 1318 | /* |
361 | * UDP-Lite's partial checksum coverage (RFC 3828). | 1319 | * UDP-Lite's partial checksum coverage (RFC 3828). |
362 | */ | 1320 | */ |
@@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
382 | up->pcrlen = val; | 1340 | up->pcrlen = val; |
383 | up->pcflag |= UDPLITE_RECV_CC; | 1341 | up->pcflag |= UDPLITE_RECV_CC; |
384 | break; | 1342 | break; |
385 | #endif | ||
386 | 1343 | ||
387 | default: | 1344 | default: |
388 | err = -ENOPROTOOPT; | 1345 | err = -ENOPROTOOPT; |
@@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
392 | return err; | 1349 | return err; |
393 | } | 1350 | } |
394 | 1351 | ||
1352 | int udp_setsockopt(struct sock *sk, int level, int optname, | ||
1353 | char __user *optval, int optlen) | ||
1354 | { | ||
1355 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1356 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1357 | udp_push_pending_frames); | ||
1358 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
1359 | } | ||
1360 | |||
1361 | #ifdef CONFIG_COMPAT | ||
1362 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, | ||
1363 | char __user *optval, int optlen) | ||
1364 | { | ||
1365 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1366 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1367 | udp_push_pending_frames); | ||
1368 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | ||
1369 | } | ||
1370 | #endif | ||
1371 | |||
395 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, | 1372 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, |
396 | char __user *optval, int __user *optlen) | 1373 | char __user *optval, int __user *optlen) |
397 | { | 1374 | { |
@@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
436 | return 0; | 1413 | return 0; |
437 | } | 1414 | } |
438 | 1415 | ||
1416 | int udp_getsockopt(struct sock *sk, int level, int optname, | ||
1417 | char __user *optval, int __user *optlen) | ||
1418 | { | ||
1419 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1420 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1421 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
1422 | } | ||
1423 | |||
1424 | #ifdef CONFIG_COMPAT | ||
1425 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, | ||
1426 | char __user *optval, int __user *optlen) | ||
1427 | { | ||
1428 | if (level == SOL_UDP || level == SOL_UDPLITE) | ||
1429 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1430 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | ||
1431 | } | ||
1432 | #endif | ||
439 | /** | 1433 | /** |
440 | * udp_poll - wait for a UDP event. | 1434 | * udp_poll - wait for a UDP event. |
441 | * @file - file struct | 1435 | * @file - file struct |
@@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
480 | 1474 | ||
481 | } | 1475 | } |
482 | 1476 | ||
1477 | DEFINE_PROTO_INUSE(udp) | ||
1478 | |||
1479 | struct proto udp_prot = { | ||
1480 | .name = "UDP", | ||
1481 | .owner = THIS_MODULE, | ||
1482 | .close = udp_lib_close, | ||
1483 | .connect = ip4_datagram_connect, | ||
1484 | .disconnect = udp_disconnect, | ||
1485 | .ioctl = udp_ioctl, | ||
1486 | .destroy = udp_destroy_sock, | ||
1487 | .setsockopt = udp_setsockopt, | ||
1488 | .getsockopt = udp_getsockopt, | ||
1489 | .sendmsg = udp_sendmsg, | ||
1490 | .recvmsg = udp_recvmsg, | ||
1491 | .sendpage = udp_sendpage, | ||
1492 | .backlog_rcv = udp_queue_rcv_skb, | ||
1493 | .hash = udp_lib_hash, | ||
1494 | .unhash = udp_lib_unhash, | ||
1495 | .get_port = udp_v4_get_port, | ||
1496 | .memory_allocated = &udp_memory_allocated, | ||
1497 | .sysctl_mem = sysctl_udp_mem, | ||
1498 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1499 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1500 | .obj_size = sizeof(struct udp_sock), | ||
1501 | #ifdef CONFIG_COMPAT | ||
1502 | .compat_setsockopt = compat_udp_setsockopt, | ||
1503 | .compat_getsockopt = compat_udp_getsockopt, | ||
1504 | #endif | ||
1505 | REF_PROTO_INUSE(udp) | ||
1506 | }; | ||
483 | 1507 | ||
484 | /* ------------------------------------------------------------------------ */ | 1508 | /* ------------------------------------------------------------------------ */ |
485 | #ifdef CONFIG_PROC_FS | 1509 | #ifdef CONFIG_PROC_FS |
@@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) | |||
612 | proc_net_remove(&init_net, afinfo->name); | 1636 | proc_net_remove(&init_net, afinfo->name); |
613 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); | 1637 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); |
614 | } | 1638 | } |
1639 | |||
1640 | /* ------------------------------------------------------------------------ */ | ||
1641 | static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | ||
1642 | { | ||
1643 | struct inet_sock *inet = inet_sk(sp); | ||
1644 | __be32 dest = inet->daddr; | ||
1645 | __be32 src = inet->rcv_saddr; | ||
1646 | __u16 destp = ntohs(inet->dport); | ||
1647 | __u16 srcp = ntohs(inet->sport); | ||
1648 | |||
1649 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" | ||
1650 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", | ||
1651 | bucket, src, srcp, dest, destp, sp->sk_state, | ||
1652 | atomic_read(&sp->sk_wmem_alloc), | ||
1653 | atomic_read(&sp->sk_rmem_alloc), | ||
1654 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | ||
1655 | atomic_read(&sp->sk_refcnt), sp); | ||
1656 | } | ||
1657 | |||
1658 | int udp4_seq_show(struct seq_file *seq, void *v) | ||
1659 | { | ||
1660 | if (v == SEQ_START_TOKEN) | ||
1661 | seq_printf(seq, "%-127s\n", | ||
1662 | " sl local_address rem_address st tx_queue " | ||
1663 | "rx_queue tr tm->when retrnsmt uid timeout " | ||
1664 | "inode"); | ||
1665 | else { | ||
1666 | char tmpbuf[129]; | ||
1667 | struct udp_iter_state *state = seq->private; | ||
1668 | |||
1669 | udp4_format_sock(v, tmpbuf, state->bucket); | ||
1670 | seq_printf(seq, "%-127s\n", tmpbuf); | ||
1671 | } | ||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | /* ------------------------------------------------------------------------ */ | ||
1676 | static struct file_operations udp4_seq_fops; | ||
1677 | static struct udp_seq_afinfo udp4_seq_afinfo = { | ||
1678 | .owner = THIS_MODULE, | ||
1679 | .name = "udp", | ||
1680 | .family = AF_INET, | ||
1681 | .hashtable = udp_hash, | ||
1682 | .seq_show = udp4_seq_show, | ||
1683 | .seq_fops = &udp4_seq_fops, | ||
1684 | }; | ||
1685 | |||
1686 | int __init udp4_proc_init(void) | ||
1687 | { | ||
1688 | return udp_proc_register(&udp4_seq_afinfo); | ||
1689 | } | ||
1690 | |||
1691 | void udp4_proc_exit(void) | ||
1692 | { | ||
1693 | udp_proc_unregister(&udp4_seq_afinfo); | ||
1694 | } | ||
615 | #endif /* CONFIG_PROC_FS */ | 1695 | #endif /* CONFIG_PROC_FS */ |
616 | 1696 | ||
617 | void __init udp_init(void) | 1697 | void __init udp_init(void) |
@@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash); | |||
638 | EXPORT_SYMBOL(udp_hash_lock); | 1718 | EXPORT_SYMBOL(udp_hash_lock); |
639 | EXPORT_SYMBOL(udp_ioctl); | 1719 | EXPORT_SYMBOL(udp_ioctl); |
640 | EXPORT_SYMBOL(udp_get_port); | 1720 | EXPORT_SYMBOL(udp_get_port); |
1721 | EXPORT_SYMBOL(udp_prot); | ||
1722 | EXPORT_SYMBOL(udp_sendmsg); | ||
641 | EXPORT_SYMBOL(udp_lib_getsockopt); | 1723 | EXPORT_SYMBOL(udp_lib_getsockopt); |
642 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1724 | EXPORT_SYMBOL(udp_lib_setsockopt); |
643 | EXPORT_SYMBOL(udp_poll); | 1725 | EXPORT_SYMBOL(udp_poll); |
diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c deleted file mode 100644 index fd14c2c50ed4..000000000000 --- a/net/ipv4/udp_ipv4.c +++ /dev/null | |||
@@ -1,1134 +0,0 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * UDP for IPv4. | ||
7 | * | ||
8 | * For full credits, see net/ipv4/udp.c. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <asm/system.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/ioctls.h> | ||
19 | #include <linux/bootmem.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/fcntl.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/sockios.h> | ||
25 | #include <linux/igmp.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/inet.h> | ||
31 | #include <linux/netdevice.h> | ||
32 | #include <net/tcp_states.h> | ||
33 | #include <linux/skbuff.h> | ||
34 | #include <linux/proc_fs.h> | ||
35 | #include <linux/seq_file.h> | ||
36 | #include <net/net_namespace.h> | ||
37 | #include <net/icmp.h> | ||
38 | #include <net/route.h> | ||
39 | #include <net/checksum.h> | ||
40 | #include <net/xfrm.h> | ||
41 | #include "udp_impl.h" | ||
42 | |||
43 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
44 | { | ||
45 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | ||
46 | |||
47 | return ( !ipv6_only_sock(sk2) && | ||
48 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | ||
49 | inet1->rcv_saddr == inet2->rcv_saddr )); | ||
50 | } | ||
51 | |||
52 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | ||
53 | { | ||
54 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
55 | } | ||
56 | |||
57 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | ||
58 | * harder than this. -DaveM | ||
59 | */ | ||
60 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | ||
61 | __be16 sport, __be32 daddr, __be16 dport, | ||
62 | int dif, struct hlist_head udptable[]) | ||
63 | { | ||
64 | struct sock *sk, *result = NULL; | ||
65 | struct hlist_node *node; | ||
66 | unsigned short hnum = ntohs(dport); | ||
67 | int badness = -1; | ||
68 | |||
69 | read_lock(&udp_hash_lock); | ||
70 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
71 | struct inet_sock *inet = inet_sk(sk); | ||
72 | |||
73 | if (sk->sk_net == net && sk->sk_hash == hnum && | ||
74 | !ipv6_only_sock(sk)) { | ||
75 | int score = (sk->sk_family == PF_INET ? 1 : 0); | ||
76 | if (inet->rcv_saddr) { | ||
77 | if (inet->rcv_saddr != daddr) | ||
78 | continue; | ||
79 | score+=2; | ||
80 | } | ||
81 | if (inet->daddr) { | ||
82 | if (inet->daddr != saddr) | ||
83 | continue; | ||
84 | score+=2; | ||
85 | } | ||
86 | if (inet->dport) { | ||
87 | if (inet->dport != sport) | ||
88 | continue; | ||
89 | score+=2; | ||
90 | } | ||
91 | if (sk->sk_bound_dev_if) { | ||
92 | if (sk->sk_bound_dev_if != dif) | ||
93 | continue; | ||
94 | score+=2; | ||
95 | } | ||
96 | if (score == 9) { | ||
97 | result = sk; | ||
98 | break; | ||
99 | } else if (score > badness) { | ||
100 | result = sk; | ||
101 | badness = score; | ||
102 | } | ||
103 | } | ||
104 | } | ||
105 | if (result) | ||
106 | sock_hold(result); | ||
107 | read_unlock(&udp_hash_lock); | ||
108 | return result; | ||
109 | } | ||
110 | |||
111 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | ||
112 | __be16 loc_port, __be32 loc_addr, | ||
113 | __be16 rmt_port, __be32 rmt_addr, | ||
114 | int dif) | ||
115 | { | ||
116 | struct hlist_node *node; | ||
117 | struct sock *s = sk; | ||
118 | unsigned short hnum = ntohs(loc_port); | ||
119 | |||
120 | sk_for_each_from(s, node) { | ||
121 | struct inet_sock *inet = inet_sk(s); | ||
122 | |||
123 | if (s->sk_hash != hnum || | ||
124 | (inet->daddr && inet->daddr != rmt_addr) || | ||
125 | (inet->dport != rmt_port && inet->dport) || | ||
126 | (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || | ||
127 | ipv6_only_sock(s) || | ||
128 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | ||
129 | continue; | ||
130 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | ||
131 | continue; | ||
132 | goto found; | ||
133 | } | ||
134 | s = NULL; | ||
135 | found: | ||
136 | return s; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * This routine is called by the ICMP module when it gets some | ||
141 | * sort of error condition. If err < 0 then the socket should | ||
142 | * be closed and the error returned to the user. If err > 0 | ||
143 | * it's just the icmp type << 8 | icmp code. | ||
144 | * Header points to the ip header of the error packet. We move | ||
145 | * on past this. Then (as it used to claim before adjustment) | ||
146 | * header points to the first 8 bytes of the udp header. We need | ||
147 | * to find the appropriate port. | ||
148 | */ | ||
149 | |||
150 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | ||
151 | { | ||
152 | struct inet_sock *inet; | ||
153 | struct iphdr *iph = (struct iphdr*)skb->data; | ||
154 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | ||
155 | const int type = icmp_hdr(skb)->type; | ||
156 | const int code = icmp_hdr(skb)->code; | ||
157 | struct sock *sk; | ||
158 | int harderr; | ||
159 | int err; | ||
160 | |||
161 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, | ||
162 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | ||
163 | if (sk == NULL) { | ||
164 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
165 | return; /* No socket for error */ | ||
166 | } | ||
167 | |||
168 | err = 0; | ||
169 | harderr = 0; | ||
170 | inet = inet_sk(sk); | ||
171 | |||
172 | switch (type) { | ||
173 | default: | ||
174 | case ICMP_TIME_EXCEEDED: | ||
175 | err = EHOSTUNREACH; | ||
176 | break; | ||
177 | case ICMP_SOURCE_QUENCH: | ||
178 | goto out; | ||
179 | case ICMP_PARAMETERPROB: | ||
180 | err = EPROTO; | ||
181 | harderr = 1; | ||
182 | break; | ||
183 | case ICMP_DEST_UNREACH: | ||
184 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | ||
185 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | ||
186 | err = EMSGSIZE; | ||
187 | harderr = 1; | ||
188 | break; | ||
189 | } | ||
190 | goto out; | ||
191 | } | ||
192 | err = EHOSTUNREACH; | ||
193 | if (code <= NR_ICMP_UNREACH) { | ||
194 | harderr = icmp_err_convert[code].fatal; | ||
195 | err = icmp_err_convert[code].errno; | ||
196 | } | ||
197 | break; | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * RFC1122: OK. Passes ICMP errors back to application, as per | ||
202 | * 4.1.3.3. | ||
203 | */ | ||
204 | if (!inet->recverr) { | ||
205 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | ||
206 | goto out; | ||
207 | } else { | ||
208 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | ||
209 | } | ||
210 | sk->sk_err = err; | ||
211 | sk->sk_error_report(sk); | ||
212 | out: | ||
213 | sock_put(sk); | ||
214 | } | ||
215 | |||
216 | void udp_err(struct sk_buff *skb, u32 info) | ||
217 | { | ||
218 | __udp4_lib_err(skb, info, udp_hash); | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | * Throw away all pending data and cancel the corking. Socket is locked. | ||
223 | */ | ||
224 | static void udp_flush_pending_frames(struct sock *sk) | ||
225 | { | ||
226 | struct udp_sock *up = udp_sk(sk); | ||
227 | |||
228 | if (up->pending) { | ||
229 | up->len = 0; | ||
230 | up->pending = 0; | ||
231 | ip_flush_pending_frames(sk); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
237 | * @sk: socket we are sending on | ||
238 | * @skb: sk_buff containing the filled-in UDP header | ||
239 | * (checksum field must be zeroed out) | ||
240 | */ | ||
241 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
242 | __be32 src, __be32 dst, int len ) | ||
243 | { | ||
244 | unsigned int offset; | ||
245 | struct udphdr *uh = udp_hdr(skb); | ||
246 | __wsum csum = 0; | ||
247 | |||
248 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
249 | /* | ||
250 | * Only one fragment on the socket. | ||
251 | */ | ||
252 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
253 | skb->csum_offset = offsetof(struct udphdr, check); | ||
254 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
255 | } else { | ||
256 | /* | ||
257 | * HW-checksum won't work as there are two or more | ||
258 | * fragments on the socket so that all csums of sk_buffs | ||
259 | * should be together | ||
260 | */ | ||
261 | offset = skb_transport_offset(skb); | ||
262 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
263 | |||
264 | skb->ip_summed = CHECKSUM_NONE; | ||
265 | |||
266 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
267 | csum = csum_add(csum, skb->csum); | ||
268 | } | ||
269 | |||
270 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
271 | if (uh->check == 0) | ||
272 | uh->check = CSUM_MANGLED_0; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
278 | */ | ||
279 | static int udp_push_pending_frames(struct sock *sk) | ||
280 | { | ||
281 | struct udp_sock *up = udp_sk(sk); | ||
282 | struct inet_sock *inet = inet_sk(sk); | ||
283 | struct flowi *fl = &inet->cork.fl; | ||
284 | struct sk_buff *skb; | ||
285 | struct udphdr *uh; | ||
286 | int err = 0; | ||
287 | int is_udplite = IS_UDPLITE(sk); | ||
288 | __wsum csum = 0; | ||
289 | |||
290 | /* Grab the skbuff where UDP header space exists. */ | ||
291 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
292 | goto out; | ||
293 | |||
294 | /* | ||
295 | * Create a UDP header | ||
296 | */ | ||
297 | uh = udp_hdr(skb); | ||
298 | uh->source = fl->fl_ip_sport; | ||
299 | uh->dest = fl->fl_ip_dport; | ||
300 | uh->len = htons(up->len); | ||
301 | uh->check = 0; | ||
302 | |||
303 | if (is_udplite) /* UDP-Lite */ | ||
304 | csum = udplite_csum_outgoing(sk, skb); | ||
305 | |||
306 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
307 | |||
308 | skb->ip_summed = CHECKSUM_NONE; | ||
309 | goto send; | ||
310 | |||
311 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | ||
312 | |||
313 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | ||
314 | goto send; | ||
315 | |||
316 | } else /* `normal' UDP */ | ||
317 | csum = udp_csum_outgoing(sk, skb); | ||
318 | |||
319 | /* add protocol-dependent pseudo-header */ | ||
320 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
321 | sk->sk_protocol, csum ); | ||
322 | if (uh->check == 0) | ||
323 | uh->check = CSUM_MANGLED_0; | ||
324 | |||
325 | send: | ||
326 | err = ip_push_pending_frames(sk); | ||
327 | out: | ||
328 | up->len = 0; | ||
329 | up->pending = 0; | ||
330 | if (!err) | ||
331 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
332 | return err; | ||
333 | } | ||
334 | |||
335 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
336 | size_t len) | ||
337 | { | ||
338 | struct inet_sock *inet = inet_sk(sk); | ||
339 | struct udp_sock *up = udp_sk(sk); | ||
340 | int ulen = len; | ||
341 | struct ipcm_cookie ipc; | ||
342 | struct rtable *rt = NULL; | ||
343 | int free = 0; | ||
344 | int connected = 0; | ||
345 | __be32 daddr, faddr, saddr; | ||
346 | __be16 dport; | ||
347 | u8 tos; | ||
348 | int err, is_udplite = IS_UDPLITE(sk); | ||
349 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | ||
350 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
351 | |||
352 | if (len > 0xFFFF) | ||
353 | return -EMSGSIZE; | ||
354 | |||
355 | /* | ||
356 | * Check the flags. | ||
357 | */ | ||
358 | |||
359 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | ||
360 | return -EOPNOTSUPP; | ||
361 | |||
362 | ipc.opt = NULL; | ||
363 | |||
364 | if (up->pending) { | ||
365 | /* | ||
366 | * There are pending frames. | ||
367 | * The socket lock must be held while it's corked. | ||
368 | */ | ||
369 | lock_sock(sk); | ||
370 | if (likely(up->pending)) { | ||
371 | if (unlikely(up->pending != AF_INET)) { | ||
372 | release_sock(sk); | ||
373 | return -EINVAL; | ||
374 | } | ||
375 | goto do_append_data; | ||
376 | } | ||
377 | release_sock(sk); | ||
378 | } | ||
379 | ulen += sizeof(struct udphdr); | ||
380 | |||
381 | /* | ||
382 | * Get and verify the address. | ||
383 | */ | ||
384 | if (msg->msg_name) { | ||
385 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | ||
386 | if (msg->msg_namelen < sizeof(*usin)) | ||
387 | return -EINVAL; | ||
388 | if (usin->sin_family != AF_INET) { | ||
389 | if (usin->sin_family != AF_UNSPEC) | ||
390 | return -EAFNOSUPPORT; | ||
391 | } | ||
392 | |||
393 | daddr = usin->sin_addr.s_addr; | ||
394 | dport = usin->sin_port; | ||
395 | if (dport == 0) | ||
396 | return -EINVAL; | ||
397 | } else { | ||
398 | if (sk->sk_state != TCP_ESTABLISHED) | ||
399 | return -EDESTADDRREQ; | ||
400 | daddr = inet->daddr; | ||
401 | dport = inet->dport; | ||
402 | /* Open fast path for connected socket. | ||
403 | Route will not be used, if at least one option is set. | ||
404 | */ | ||
405 | connected = 1; | ||
406 | } | ||
407 | ipc.addr = inet->saddr; | ||
408 | |||
409 | ipc.oif = sk->sk_bound_dev_if; | ||
410 | if (msg->msg_controllen) { | ||
411 | err = ip_cmsg_send(msg, &ipc); | ||
412 | if (err) | ||
413 | return err; | ||
414 | if (ipc.opt) | ||
415 | free = 1; | ||
416 | connected = 0; | ||
417 | } | ||
418 | if (!ipc.opt) | ||
419 | ipc.opt = inet->opt; | ||
420 | |||
421 | saddr = ipc.addr; | ||
422 | ipc.addr = faddr = daddr; | ||
423 | |||
424 | if (ipc.opt && ipc.opt->srr) { | ||
425 | if (!daddr) | ||
426 | return -EINVAL; | ||
427 | faddr = ipc.opt->faddr; | ||
428 | connected = 0; | ||
429 | } | ||
430 | tos = RT_TOS(inet->tos); | ||
431 | if (sock_flag(sk, SOCK_LOCALROUTE) || | ||
432 | (msg->msg_flags & MSG_DONTROUTE) || | ||
433 | (ipc.opt && ipc.opt->is_strictroute)) { | ||
434 | tos |= RTO_ONLINK; | ||
435 | connected = 0; | ||
436 | } | ||
437 | |||
438 | if (ipv4_is_multicast(daddr)) { | ||
439 | if (!ipc.oif) | ||
440 | ipc.oif = inet->mc_index; | ||
441 | if (!saddr) | ||
442 | saddr = inet->mc_addr; | ||
443 | connected = 0; | ||
444 | } | ||
445 | |||
446 | if (connected) | ||
447 | rt = (struct rtable*)sk_dst_check(sk, 0); | ||
448 | |||
449 | if (rt == NULL) { | ||
450 | struct flowi fl = { .oif = ipc.oif, | ||
451 | .nl_u = { .ip4_u = | ||
452 | { .daddr = faddr, | ||
453 | .saddr = saddr, | ||
454 | .tos = tos } }, | ||
455 | .proto = sk->sk_protocol, | ||
456 | .uli_u = { .ports = | ||
457 | { .sport = inet->sport, | ||
458 | .dport = dport } } }; | ||
459 | security_sk_classify_flow(sk, &fl); | ||
460 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | ||
461 | if (err) { | ||
462 | if (err == -ENETUNREACH) | ||
463 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | err = -EACCES; | ||
468 | if ((rt->rt_flags & RTCF_BROADCAST) && | ||
469 | !sock_flag(sk, SOCK_BROADCAST)) | ||
470 | goto out; | ||
471 | if (connected) | ||
472 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | ||
473 | } | ||
474 | |||
475 | if (msg->msg_flags&MSG_CONFIRM) | ||
476 | goto do_confirm; | ||
477 | back_from_confirm: | ||
478 | |||
479 | saddr = rt->rt_src; | ||
480 | if (!ipc.addr) | ||
481 | daddr = ipc.addr = rt->rt_dst; | ||
482 | |||
483 | lock_sock(sk); | ||
484 | if (unlikely(up->pending)) { | ||
485 | /* The socket is already corked while preparing it. */ | ||
486 | /* ... which is an evident application bug. --ANK */ | ||
487 | release_sock(sk); | ||
488 | |||
489 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | ||
490 | err = -EINVAL; | ||
491 | goto out; | ||
492 | } | ||
493 | /* | ||
494 | * Now cork the socket to pend data. | ||
495 | */ | ||
496 | inet->cork.fl.fl4_dst = daddr; | ||
497 | inet->cork.fl.fl_ip_dport = dport; | ||
498 | inet->cork.fl.fl4_src = saddr; | ||
499 | inet->cork.fl.fl_ip_sport = inet->sport; | ||
500 | up->pending = AF_INET; | ||
501 | |||
502 | do_append_data: | ||
503 | up->len += ulen; | ||
504 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
505 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | ||
506 | sizeof(struct udphdr), &ipc, rt, | ||
507 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | ||
508 | if (err) | ||
509 | udp_flush_pending_frames(sk); | ||
510 | else if (!corkreq) | ||
511 | err = udp_push_pending_frames(sk); | ||
512 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | ||
513 | up->pending = 0; | ||
514 | release_sock(sk); | ||
515 | |||
516 | out: | ||
517 | ip_rt_put(rt); | ||
518 | if (free) | ||
519 | kfree(ipc.opt); | ||
520 | if (!err) | ||
521 | return len; | ||
522 | /* | ||
523 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
524 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
525 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
526 | * things). We could add another new stat but at least for now that | ||
527 | * seems like overkill. | ||
528 | */ | ||
529 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
530 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); | ||
531 | } | ||
532 | return err; | ||
533 | |||
534 | do_confirm: | ||
535 | dst_confirm(&rt->u.dst); | ||
536 | if (!(msg->msg_flags&MSG_PROBE) || len) | ||
537 | goto back_from_confirm; | ||
538 | err = 0; | ||
539 | goto out; | ||
540 | } | ||
541 | |||
542 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | ||
543 | size_t size, int flags) | ||
544 | { | ||
545 | struct udp_sock *up = udp_sk(sk); | ||
546 | int ret; | ||
547 | |||
548 | if (!up->pending) { | ||
549 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; | ||
550 | |||
551 | /* Call udp_sendmsg to specify destination address which | ||
552 | * sendpage interface can't pass. | ||
553 | * This will succeed only when the socket is connected. | ||
554 | */ | ||
555 | ret = udp_sendmsg(NULL, sk, &msg, 0); | ||
556 | if (ret < 0) | ||
557 | return ret; | ||
558 | } | ||
559 | |||
560 | lock_sock(sk); | ||
561 | |||
562 | if (unlikely(!up->pending)) { | ||
563 | release_sock(sk); | ||
564 | |||
565 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); | ||
566 | return -EINVAL; | ||
567 | } | ||
568 | |||
569 | ret = ip_append_page(sk, page, offset, size, flags); | ||
570 | if (ret == -EOPNOTSUPP) { | ||
571 | release_sock(sk); | ||
572 | return sock_no_sendpage(sk->sk_socket, page, offset, | ||
573 | size, flags); | ||
574 | } | ||
575 | if (ret < 0) { | ||
576 | udp_flush_pending_frames(sk); | ||
577 | goto out; | ||
578 | } | ||
579 | |||
580 | up->len += size; | ||
581 | if (!(up->corkflag || (flags&MSG_MORE))) | ||
582 | ret = udp_push_pending_frames(sk); | ||
583 | if (!ret) | ||
584 | ret = size; | ||
585 | out: | ||
586 | release_sock(sk); | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | /* | ||
591 | * This should be easy, if there is something there we | ||
592 | * return it, otherwise we block. | ||
593 | */ | ||
594 | |||
595 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
596 | size_t len, int noblock, int flags, int *addr_len) | ||
597 | { | ||
598 | struct inet_sock *inet = inet_sk(sk); | ||
599 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | ||
600 | struct sk_buff *skb; | ||
601 | unsigned int ulen, copied; | ||
602 | int peeked; | ||
603 | int err; | ||
604 | int is_udplite = IS_UDPLITE(sk); | ||
605 | |||
606 | /* | ||
607 | * Check any passed addresses | ||
608 | */ | ||
609 | if (addr_len) | ||
610 | *addr_len=sizeof(*sin); | ||
611 | |||
612 | if (flags & MSG_ERRQUEUE) | ||
613 | return ip_recv_error(sk, msg, len); | ||
614 | |||
615 | try_again: | ||
616 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | ||
617 | &peeked, &err); | ||
618 | if (!skb) | ||
619 | goto out; | ||
620 | |||
621 | ulen = skb->len - sizeof(struct udphdr); | ||
622 | copied = len; | ||
623 | if (copied > ulen) | ||
624 | copied = ulen; | ||
625 | else if (copied < ulen) | ||
626 | msg->msg_flags |= MSG_TRUNC; | ||
627 | |||
628 | /* | ||
629 | * If checksum is needed at all, try to do it while copying the | ||
630 | * data. If the data is truncated, or if we only want a partial | ||
631 | * coverage checksum (UDP-Lite), do it before the copy. | ||
632 | */ | ||
633 | |||
634 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | ||
635 | if (udp_lib_checksum_complete(skb)) | ||
636 | goto csum_copy_err; | ||
637 | } | ||
638 | |||
639 | if (skb_csum_unnecessary(skb)) | ||
640 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
641 | msg->msg_iov, copied ); | ||
642 | else { | ||
643 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | ||
644 | |||
645 | if (err == -EINVAL) | ||
646 | goto csum_copy_err; | ||
647 | } | ||
648 | |||
649 | if (err) | ||
650 | goto out_free; | ||
651 | |||
652 | if (!peeked) | ||
653 | UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); | ||
654 | |||
655 | sock_recv_timestamp(msg, sk, skb); | ||
656 | |||
657 | /* Copy the address. */ | ||
658 | if (sin) | ||
659 | { | ||
660 | sin->sin_family = AF_INET; | ||
661 | sin->sin_port = udp_hdr(skb)->source; | ||
662 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | ||
663 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
664 | } | ||
665 | if (inet->cmsg_flags) | ||
666 | ip_cmsg_recv(msg, skb); | ||
667 | |||
668 | err = copied; | ||
669 | if (flags & MSG_TRUNC) | ||
670 | err = ulen; | ||
671 | |||
672 | out_free: | ||
673 | lock_sock(sk); | ||
674 | skb_free_datagram(sk, skb); | ||
675 | release_sock(sk); | ||
676 | out: | ||
677 | return err; | ||
678 | |||
679 | csum_copy_err: | ||
680 | lock_sock(sk); | ||
681 | if (!skb_kill_datagram(sk, skb, flags)) | ||
682 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | ||
683 | release_sock(sk); | ||
684 | |||
685 | if (noblock) | ||
686 | return -EAGAIN; | ||
687 | goto try_again; | ||
688 | } | ||
689 | |||
690 | |||
691 | /* returns: | ||
692 | * -1: error | ||
693 | * 0: success | ||
694 | * >0: "udp encap" protocol resubmission | ||
695 | * | ||
696 | * Note that in the success and error cases, the skb is assumed to | ||
697 | * have either been requeued or freed. | ||
698 | */ | ||
699 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | ||
700 | { | ||
701 | struct udp_sock *up = udp_sk(sk); | ||
702 | int rc; | ||
703 | int is_udplite = IS_UDPLITE(sk); | ||
704 | |||
705 | /* | ||
706 | * Charge it to the socket, dropping if the queue is full. | ||
707 | */ | ||
708 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | ||
709 | goto drop; | ||
710 | nf_reset(skb); | ||
711 | |||
712 | if (up->encap_type) { | ||
713 | /* | ||
714 | * This is an encapsulation socket so pass the skb to | ||
715 | * the socket's udp_encap_rcv() hook. Otherwise, just | ||
716 | * fall through and pass this up the UDP socket. | ||
717 | * up->encap_rcv() returns the following value: | ||
718 | * =0 if skb was successfully passed to the encap | ||
719 | * handler or was discarded by it. | ||
720 | * >0 if skb should be passed on to UDP. | ||
721 | * <0 if skb should be resubmitted as proto -N | ||
722 | */ | ||
723 | |||
724 | /* if we're overly short, let UDP handle it */ | ||
725 | if (skb->len > sizeof(struct udphdr) && | ||
726 | up->encap_rcv != NULL) { | ||
727 | int ret; | ||
728 | |||
729 | ret = (*up->encap_rcv)(sk, skb); | ||
730 | if (ret <= 0) { | ||
731 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, | ||
732 | is_udplite); | ||
733 | return -ret; | ||
734 | } | ||
735 | } | ||
736 | |||
737 | /* FALLTHROUGH -- it's a UDP Packet */ | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * UDP-Lite specific tests, ignored on UDP sockets | ||
742 | */ | ||
743 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | ||
744 | |||
745 | /* | ||
746 | * MIB statistics other than incrementing the error count are | ||
747 | * disabled for the following two types of errors: these depend | ||
748 | * on the application settings, not on the functioning of the | ||
749 | * protocol stack as such. | ||
750 | * | ||
751 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
752 | * way ... to ... at least let the receiving application block | ||
753 | * delivery of packets with coverage values less than a value | ||
754 | * provided by the application." | ||
755 | */ | ||
756 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
757 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
758 | "%d while full coverage %d requested\n", | ||
759 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
760 | goto drop; | ||
761 | } | ||
762 | /* The next case involves violating the min. coverage requested | ||
763 | * by the receiver. This is subtle: if receiver wants x and x is | ||
764 | * greater than the buffersize/MTU then receiver will complain | ||
765 | * that it wants x while sender emits packets of smaller size y. | ||
766 | * Therefore the above ...()->partial_cov statement is essential. | ||
767 | */ | ||
768 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
769 | LIMIT_NETDEBUG(KERN_WARNING | ||
770 | "UDPLITE: coverage %d too small, need min %d\n", | ||
771 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
772 | goto drop; | ||
773 | } | ||
774 | } | ||
775 | |||
776 | if (sk->sk_filter) { | ||
777 | if (udp_lib_checksum_complete(skb)) | ||
778 | goto drop; | ||
779 | } | ||
780 | |||
781 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | ||
782 | /* Note that an ENOMEM error is charged twice */ | ||
783 | if (rc == -ENOMEM) | ||
784 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); | ||
785 | goto drop; | ||
786 | } | ||
787 | |||
788 | return 0; | ||
789 | |||
790 | drop: | ||
791 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | ||
792 | kfree_skb(skb); | ||
793 | return -1; | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * Multicasts and broadcasts go to each listener. | ||
798 | * | ||
799 | * Note: called only from the BH handler context, | ||
800 | * so we don't need to lock the hashes. | ||
801 | */ | ||
802 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | ||
803 | struct udphdr *uh, | ||
804 | __be32 saddr, __be32 daddr, | ||
805 | struct hlist_head udptable[]) | ||
806 | { | ||
807 | struct sock *sk; | ||
808 | int dif; | ||
809 | |||
810 | read_lock(&udp_hash_lock); | ||
811 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | ||
812 | dif = skb->dev->ifindex; | ||
813 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | ||
814 | if (sk) { | ||
815 | struct sock *sknext = NULL; | ||
816 | |||
817 | do { | ||
818 | struct sk_buff *skb1 = skb; | ||
819 | |||
820 | sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, | ||
821 | uh->source, saddr, dif); | ||
822 | if (sknext) | ||
823 | skb1 = skb_clone(skb, GFP_ATOMIC); | ||
824 | |||
825 | if (skb1) { | ||
826 | int ret = 0; | ||
827 | |||
828 | bh_lock_sock_nested(sk); | ||
829 | if (!sock_owned_by_user(sk)) | ||
830 | ret = udp_queue_rcv_skb(sk, skb1); | ||
831 | else | ||
832 | sk_add_backlog(sk, skb1); | ||
833 | bh_unlock_sock(sk); | ||
834 | |||
835 | if (ret > 0) | ||
836 | /* we should probably re-process instead | ||
837 | * of dropping packets here. */ | ||
838 | kfree_skb(skb1); | ||
839 | } | ||
840 | sk = sknext; | ||
841 | } while (sknext); | ||
842 | } else | ||
843 | kfree_skb(skb); | ||
844 | read_unlock(&udp_hash_lock); | ||
845 | return 0; | ||
846 | } | ||
847 | |||
848 | /* Initialize UDP checksum. If exited with zero value (success), | ||
849 | * CHECKSUM_UNNECESSARY means, that no more checks are required. | ||
850 | * Otherwise, csum completion requires chacksumming packet body, | ||
851 | * including udp header and folding it to skb->csum. | ||
852 | */ | ||
853 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | ||
854 | int proto) | ||
855 | { | ||
856 | const struct iphdr *iph; | ||
857 | int err; | ||
858 | |||
859 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
860 | UDP_SKB_CB(skb)->cscov = skb->len; | ||
861 | |||
862 | if (IS_PROTO_UDPLITE(proto)) { | ||
863 | err = udplite_checksum_init(skb, uh); | ||
864 | if (err) | ||
865 | return err; | ||
866 | } | ||
867 | |||
868 | iph = ip_hdr(skb); | ||
869 | if (uh->check == 0) { | ||
870 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
871 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
872 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
873 | proto, skb->csum)) | ||
874 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
875 | } | ||
876 | if (!skb_csum_unnecessary(skb)) | ||
877 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | ||
878 | skb->len, proto, 0); | ||
879 | /* Probably, we should checksum udp header (it should be in cache | ||
880 | * in any case) and data in tiny packets (< rx copybreak). | ||
881 | */ | ||
882 | |||
883 | return 0; | ||
884 | } | ||
885 | |||
886 | /* | ||
887 | * All we need to do is get the socket, and then do a checksum. | ||
888 | */ | ||
889 | |||
890 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | ||
891 | int proto) | ||
892 | { | ||
893 | struct sock *sk; | ||
894 | struct udphdr *uh = udp_hdr(skb); | ||
895 | unsigned short ulen; | ||
896 | struct rtable *rt = skb->rtable; | ||
897 | __be32 saddr = ip_hdr(skb)->saddr; | ||
898 | __be32 daddr = ip_hdr(skb)->daddr; | ||
899 | |||
900 | /* | ||
901 | * Validate the packet. | ||
902 | */ | ||
903 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | ||
904 | goto drop; /* No space for header. */ | ||
905 | |||
906 | ulen = ntohs(uh->len); | ||
907 | if (ulen > skb->len) | ||
908 | goto short_packet; | ||
909 | |||
910 | if (IS_PROTO_UDPLITE(proto)) { | ||
911 | /* UDP validates ulen. */ | ||
912 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
913 | goto short_packet; | ||
914 | uh = udp_hdr(skb); | ||
915 | } | ||
916 | |||
917 | if (udp4_csum_init(skb, uh, proto)) | ||
918 | goto csum_error; | ||
919 | |||
920 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | ||
921 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | ||
922 | |||
923 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, | ||
924 | uh->dest, inet_iif(skb), udptable); | ||
925 | |||
926 | if (sk != NULL) { | ||
927 | int ret = 0; | ||
928 | bh_lock_sock_nested(sk); | ||
929 | if (!sock_owned_by_user(sk)) | ||
930 | ret = udp_queue_rcv_skb(sk, skb); | ||
931 | else | ||
932 | sk_add_backlog(sk, skb); | ||
933 | bh_unlock_sock(sk); | ||
934 | sock_put(sk); | ||
935 | |||
936 | /* a return value > 0 means to resubmit the input, but | ||
937 | * it wants the return to be -protocol, or 0 | ||
938 | */ | ||
939 | if (ret > 0) | ||
940 | return -ret; | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
945 | goto drop; | ||
946 | nf_reset(skb); | ||
947 | |||
948 | /* No socket. Drop packet silently, if checksum is wrong */ | ||
949 | if (udp_lib_checksum_complete(skb)) | ||
950 | goto csum_error; | ||
951 | |||
952 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); | ||
953 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
954 | |||
955 | /* | ||
956 | * Hmm. We got an UDP packet to a port to which we | ||
957 | * don't wanna listen. Ignore it. | ||
958 | */ | ||
959 | kfree_skb(skb); | ||
960 | return 0; | ||
961 | |||
962 | short_packet: | ||
963 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | ||
964 | IS_PROTO_UDPLITE(proto) ? "-Lite" : "", | ||
965 | NIPQUAD(saddr), | ||
966 | ntohs(uh->source), | ||
967 | ulen, | ||
968 | skb->len, | ||
969 | NIPQUAD(daddr), | ||
970 | ntohs(uh->dest)); | ||
971 | goto drop; | ||
972 | |||
973 | csum_error: | ||
974 | /* | ||
975 | * RFC1122: OK. Discards the bad packet silently (as far as | ||
976 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | ||
977 | */ | ||
978 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | ||
979 | IS_PROTO_UDPLITE(proto) ? "-Lite" : "", | ||
980 | NIPQUAD(saddr), | ||
981 | ntohs(uh->source), | ||
982 | NIPQUAD(daddr), | ||
983 | ntohs(uh->dest), | ||
984 | ulen); | ||
985 | drop: | ||
986 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); | ||
987 | kfree_skb(skb); | ||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | int udp_rcv(struct sk_buff *skb) | ||
992 | { | ||
993 | return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); | ||
994 | } | ||
995 | |||
996 | int udp_destroy_sock(struct sock *sk) | ||
997 | { | ||
998 | lock_sock(sk); | ||
999 | udp_flush_pending_frames(sk); | ||
1000 | release_sock(sk); | ||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | int udp_setsockopt(struct sock *sk, int level, int optname, | ||
1005 | char __user *optval, int optlen) | ||
1006 | { | ||
1007 | if (IS_SOL_UDPFAMILY(level)) | ||
1008 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1009 | udp_push_pending_frames); | ||
1010 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
1011 | } | ||
1012 | |||
1013 | #ifdef CONFIG_COMPAT | ||
1014 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, | ||
1015 | char __user *optval, int optlen) | ||
1016 | { | ||
1017 | if (IS_SOL_UDPFAMILY(level)) | ||
1018 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | ||
1019 | udp_push_pending_frames); | ||
1020 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | ||
1021 | } | ||
1022 | #endif | ||
1023 | |||
1024 | int udp_getsockopt(struct sock *sk, int level, int optname, | ||
1025 | char __user *optval, int __user *optlen) | ||
1026 | { | ||
1027 | if (IS_SOL_UDPFAMILY(level)) | ||
1028 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1029 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
1030 | } | ||
1031 | |||
1032 | #ifdef CONFIG_COMPAT | ||
1033 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, | ||
1034 | char __user *optval, int __user *optlen) | ||
1035 | { | ||
1036 | if (IS_SOL_UDPFAMILY(level)) | ||
1037 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | ||
1038 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | ||
1039 | } | ||
1040 | #endif | ||
1041 | |||
1042 | /* ------------------------------------------------------------------------ */ | ||
1043 | DEFINE_PROTO_INUSE(udp) | ||
1044 | |||
1045 | struct proto udp_prot = { | ||
1046 | .name = "UDP", | ||
1047 | .owner = THIS_MODULE, | ||
1048 | .close = udp_lib_close, | ||
1049 | .connect = ip4_datagram_connect, | ||
1050 | .disconnect = udp_disconnect, | ||
1051 | .ioctl = udp_ioctl, | ||
1052 | .destroy = udp_destroy_sock, | ||
1053 | .setsockopt = udp_setsockopt, | ||
1054 | .getsockopt = udp_getsockopt, | ||
1055 | .sendmsg = udp_sendmsg, | ||
1056 | .recvmsg = udp_recvmsg, | ||
1057 | .sendpage = udp_sendpage, | ||
1058 | .backlog_rcv = udp_queue_rcv_skb, | ||
1059 | .hash = udp_lib_hash, | ||
1060 | .unhash = udp_lib_unhash, | ||
1061 | .get_port = udp_v4_get_port, | ||
1062 | .memory_allocated = &udp_memory_allocated, | ||
1063 | .sysctl_mem = sysctl_udp_mem, | ||
1064 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1065 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1066 | .obj_size = sizeof(struct udp_sock), | ||
1067 | #ifdef CONFIG_COMPAT | ||
1068 | .compat_setsockopt = compat_udp_setsockopt, | ||
1069 | .compat_getsockopt = compat_udp_getsockopt, | ||
1070 | #endif | ||
1071 | REF_PROTO_INUSE(udp) | ||
1072 | }; | ||
1073 | |||
1074 | /* ------------------------------------------------------------------------ */ | ||
1075 | static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | ||
1076 | { | ||
1077 | struct inet_sock *inet = inet_sk(sp); | ||
1078 | __be32 dest = inet->daddr; | ||
1079 | __be32 src = inet->rcv_saddr; | ||
1080 | __u16 destp = ntohs(inet->dport); | ||
1081 | __u16 srcp = ntohs(inet->sport); | ||
1082 | |||
1083 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" | ||
1084 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", | ||
1085 | bucket, src, srcp, dest, destp, sp->sk_state, | ||
1086 | atomic_read(&sp->sk_wmem_alloc), | ||
1087 | atomic_read(&sp->sk_rmem_alloc), | ||
1088 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | ||
1089 | atomic_read(&sp->sk_refcnt), sp); | ||
1090 | } | ||
1091 | |||
1092 | int udp4_seq_show(struct seq_file *seq, void *v) | ||
1093 | { | ||
1094 | if (v == SEQ_START_TOKEN) | ||
1095 | seq_printf(seq, "%-127s\n", | ||
1096 | " sl local_address rem_address st tx_queue " | ||
1097 | "rx_queue tr tm->when retrnsmt uid timeout " | ||
1098 | "inode"); | ||
1099 | else { | ||
1100 | char tmpbuf[129]; | ||
1101 | struct udp_iter_state *state = seq->private; | ||
1102 | |||
1103 | udp4_format_sock(v, tmpbuf, state->bucket); | ||
1104 | seq_printf(seq, "%-127s\n", tmpbuf); | ||
1105 | } | ||
1106 | return 0; | ||
1107 | } | ||
1108 | |||
1109 | /* ------------------------------------------------------------------------ */ | ||
1110 | #ifdef CONFIG_PROC_FS | ||
1111 | static struct file_operations udp4_seq_fops; | ||
1112 | static struct udp_seq_afinfo udp4_seq_afinfo = { | ||
1113 | .owner = THIS_MODULE, | ||
1114 | .name = "udp", | ||
1115 | .family = AF_INET, | ||
1116 | .hashtable = udp_hash, | ||
1117 | .seq_show = udp4_seq_show, | ||
1118 | .seq_fops = &udp4_seq_fops, | ||
1119 | }; | ||
1120 | |||
1121 | int __init udp4_proc_init(void) | ||
1122 | { | ||
1123 | return udp_proc_register(&udp4_seq_afinfo); | ||
1124 | } | ||
1125 | |||
1126 | void udp4_proc_exit(void) | ||
1127 | { | ||
1128 | udp_proc_unregister(&udp4_seq_afinfo); | ||
1129 | } | ||
1130 | #endif /* CONFIG_PROC_FS */ | ||
1131 | |||
1132 | EXPORT_SYMBOL(udp_prot); | ||
1133 | EXPORT_SYMBOL(udp_sendmsg); | ||
1134 | |||
diff --git a/net/ipv4/udplite_ipv4.c b/net/ipv4/udplite.c index d49c6d68c8a9..d49c6d68c8a9 100644 --- a/net/ipv4/udplite_ipv4.c +++ b/net/ipv4/udplite.c | |||