aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
committerDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
commitdb8dac20d5199307dcfcf4e01dac4bda5edf9e89 (patch)
tree3694d1aee5c0014fb45eec045a67ca150ca1231f /net/ipv4/udp.c
parentba0fa4599484b98dbb21d279fbfdb40e9c07d30d (diff)
[UDP]: Revert udplite and code split.
This reverts commit db1ed684f6c430c4cdad67d058688b8a1b5e607c ("[IPV6] UDP: Rename IPv6 UDP files."), commit 8be8af8fa4405652e6c0797db5465a4be8afb998 ("[IPV4] UDP: Move IPv4-specific bits to other file.") and commit e898d4db2749c6052072e9bc4448e396cbdeb06a ("[UDP]: Allow users to configure UDP-Lite."). First, udplite is of such small cost, and it is a core protocol just like TCP and normal UDP are. We spent enormous amounts of effort to make udplite share as much code with core UDP as possible. All of that work is less valuable if we're just going to slap a config option on udplite support. It is also causing build failures, as reported on linux-next, showing that the changeset was not tested very well. In fact, this is the second build failure resulting from the udplite change. Finally, the config options provided was a bool, instead of a modular option. Meaning the udplite code does not even get build tested by allmodconfig builds, and furthermore the user is not presented with a reasonable modular build option which is particularly needed by distribution vendors. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c1090
1 files changed, 1086 insertions, 4 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c53d7673b57d..7ea1b67b6de1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum,
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp); 246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
247} 247}
248 248
249int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
250{
251 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
252
253 return ( !ipv6_only_sock(sk2) &&
254 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
255 inet1->rcv_saddr == inet2->rcv_saddr ));
256}
257
258static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
259{
260 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
261}
262
263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
264 * harder than this. -DaveM
265 */
266static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
267 __be16 sport, __be32 daddr, __be16 dport,
268 int dif, struct hlist_head udptable[])
269{
270 struct sock *sk, *result = NULL;
271 struct hlist_node *node;
272 unsigned short hnum = ntohs(dport);
273 int badness = -1;
274
275 read_lock(&udp_hash_lock);
276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
277 struct inet_sock *inet = inet_sk(sk);
278
279 if (sk->sk_net == net && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) {
281 int score = (sk->sk_family == PF_INET ? 1 : 0);
282 if (inet->rcv_saddr) {
283 if (inet->rcv_saddr != daddr)
284 continue;
285 score+=2;
286 }
287 if (inet->daddr) {
288 if (inet->daddr != saddr)
289 continue;
290 score+=2;
291 }
292 if (inet->dport) {
293 if (inet->dport != sport)
294 continue;
295 score+=2;
296 }
297 if (sk->sk_bound_dev_if) {
298 if (sk->sk_bound_dev_if != dif)
299 continue;
300 score+=2;
301 }
302 if (score == 9) {
303 result = sk;
304 break;
305 } else if (score > badness) {
306 result = sk;
307 badness = score;
308 }
309 }
310 }
311 if (result)
312 sock_hold(result);
313 read_unlock(&udp_hash_lock);
314 return result;
315}
316
317static inline struct sock *udp_v4_mcast_next(struct sock *sk,
318 __be16 loc_port, __be32 loc_addr,
319 __be16 rmt_port, __be32 rmt_addr,
320 int dif)
321{
322 struct hlist_node *node;
323 struct sock *s = sk;
324 unsigned short hnum = ntohs(loc_port);
325
326 sk_for_each_from(s, node) {
327 struct inet_sock *inet = inet_sk(s);
328
329 if (s->sk_hash != hnum ||
330 (inet->daddr && inet->daddr != rmt_addr) ||
331 (inet->dport != rmt_port && inet->dport) ||
332 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
333 ipv6_only_sock(s) ||
334 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
335 continue;
336 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
337 continue;
338 goto found;
339 }
340 s = NULL;
341found:
342 return s;
343}
344
345/*
346 * This routine is called by the ICMP module when it gets some
347 * sort of error condition. If err < 0 then the socket should
348 * be closed and the error returned to the user. If err > 0
349 * it's just the icmp type << 8 | icmp code.
350 * Header points to the ip header of the error packet. We move
351 * on past this. Then (as it used to claim before adjustment)
352 * header points to the first 8 bytes of the udp header. We need
353 * to find the appropriate port.
354 */
355
356void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
357{
358 struct inet_sock *inet;
359 struct iphdr *iph = (struct iphdr*)skb->data;
360 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
361 const int type = icmp_hdr(skb)->type;
362 const int code = icmp_hdr(skb)->code;
363 struct sock *sk;
364 int harderr;
365 int err;
366
367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
368 iph->saddr, uh->source, skb->dev->ifindex, udptable);
369 if (sk == NULL) {
370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
371 return; /* No socket for error */
372 }
373
374 err = 0;
375 harderr = 0;
376 inet = inet_sk(sk);
377
378 switch (type) {
379 default:
380 case ICMP_TIME_EXCEEDED:
381 err = EHOSTUNREACH;
382 break;
383 case ICMP_SOURCE_QUENCH:
384 goto out;
385 case ICMP_PARAMETERPROB:
386 err = EPROTO;
387 harderr = 1;
388 break;
389 case ICMP_DEST_UNREACH:
390 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
391 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
392 err = EMSGSIZE;
393 harderr = 1;
394 break;
395 }
396 goto out;
397 }
398 err = EHOSTUNREACH;
399 if (code <= NR_ICMP_UNREACH) {
400 harderr = icmp_err_convert[code].fatal;
401 err = icmp_err_convert[code].errno;
402 }
403 break;
404 }
405
406 /*
407 * RFC1122: OK. Passes ICMP errors back to application, as per
408 * 4.1.3.3.
409 */
410 if (!inet->recverr) {
411 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
412 goto out;
413 } else {
414 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
415 }
416 sk->sk_err = err;
417 sk->sk_error_report(sk);
418out:
419 sock_put(sk);
420}
421
422void udp_err(struct sk_buff *skb, u32 info)
423{
424 __udp4_lib_err(skb, info, udp_hash);
425}
426
427/*
428 * Throw away all pending data and cancel the corking. Socket is locked.
429 */
430static void udp_flush_pending_frames(struct sock *sk)
431{
432 struct udp_sock *up = udp_sk(sk);
433
434 if (up->pending) {
435 up->len = 0;
436 up->pending = 0;
437 ip_flush_pending_frames(sk);
438 }
439}
440
441/**
442 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
443 * @sk: socket we are sending on
444 * @skb: sk_buff containing the filled-in UDP header
445 * (checksum field must be zeroed out)
446 */
447static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
448 __be32 src, __be32 dst, int len )
449{
450 unsigned int offset;
451 struct udphdr *uh = udp_hdr(skb);
452 __wsum csum = 0;
453
454 if (skb_queue_len(&sk->sk_write_queue) == 1) {
455 /*
456 * Only one fragment on the socket.
457 */
458 skb->csum_start = skb_transport_header(skb) - skb->head;
459 skb->csum_offset = offsetof(struct udphdr, check);
460 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
461 } else {
462 /*
463 * HW-checksum won't work as there are two or more
464 * fragments on the socket so that all csums of sk_buffs
465 * should be together
466 */
467 offset = skb_transport_offset(skb);
468 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
469
470 skb->ip_summed = CHECKSUM_NONE;
471
472 skb_queue_walk(&sk->sk_write_queue, skb) {
473 csum = csum_add(csum, skb->csum);
474 }
475
476 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
477 if (uh->check == 0)
478 uh->check = CSUM_MANGLED_0;
479 }
480}
481
482/*
483 * Push out all pending data as one UDP datagram. Socket is locked.
484 */
485static int udp_push_pending_frames(struct sock *sk)
486{
487 struct udp_sock *up = udp_sk(sk);
488 struct inet_sock *inet = inet_sk(sk);
489 struct flowi *fl = &inet->cork.fl;
490 struct sk_buff *skb;
491 struct udphdr *uh;
492 int err = 0;
493 int is_udplite = IS_UDPLITE(sk);
494 __wsum csum = 0;
495
496 /* Grab the skbuff where UDP header space exists. */
497 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
498 goto out;
499
500 /*
501 * Create a UDP header
502 */
503 uh = udp_hdr(skb);
504 uh->source = fl->fl_ip_sport;
505 uh->dest = fl->fl_ip_dport;
506 uh->len = htons(up->len);
507 uh->check = 0;
508
509 if (is_udplite) /* UDP-Lite */
510 csum = udplite_csum_outgoing(sk, skb);
511
512 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
513
514 skb->ip_summed = CHECKSUM_NONE;
515 goto send;
516
517 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
518
519 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
520 goto send;
521
522 } else /* `normal' UDP */
523 csum = udp_csum_outgoing(sk, skb);
524
525 /* add protocol-dependent pseudo-header */
526 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
527 sk->sk_protocol, csum );
528 if (uh->check == 0)
529 uh->check = CSUM_MANGLED_0;
530
531send:
532 err = ip_push_pending_frames(sk);
533out:
534 up->len = 0;
535 up->pending = 0;
536 if (!err)
537 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
538 return err;
539}
540
541int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
542 size_t len)
543{
544 struct inet_sock *inet = inet_sk(sk);
545 struct udp_sock *up = udp_sk(sk);
546 int ulen = len;
547 struct ipcm_cookie ipc;
548 struct rtable *rt = NULL;
549 int free = 0;
550 int connected = 0;
551 __be32 daddr, faddr, saddr;
552 __be16 dport;
553 u8 tos;
554 int err, is_udplite = IS_UDPLITE(sk);
555 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
556 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
557
558 if (len > 0xFFFF)
559 return -EMSGSIZE;
560
561 /*
562 * Check the flags.
563 */
564
565 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
566 return -EOPNOTSUPP;
567
568 ipc.opt = NULL;
569
570 if (up->pending) {
571 /*
572 * There are pending frames.
573 * The socket lock must be held while it's corked.
574 */
575 lock_sock(sk);
576 if (likely(up->pending)) {
577 if (unlikely(up->pending != AF_INET)) {
578 release_sock(sk);
579 return -EINVAL;
580 }
581 goto do_append_data;
582 }
583 release_sock(sk);
584 }
585 ulen += sizeof(struct udphdr);
586
587 /*
588 * Get and verify the address.
589 */
590 if (msg->msg_name) {
591 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
592 if (msg->msg_namelen < sizeof(*usin))
593 return -EINVAL;
594 if (usin->sin_family != AF_INET) {
595 if (usin->sin_family != AF_UNSPEC)
596 return -EAFNOSUPPORT;
597 }
598
599 daddr = usin->sin_addr.s_addr;
600 dport = usin->sin_port;
601 if (dport == 0)
602 return -EINVAL;
603 } else {
604 if (sk->sk_state != TCP_ESTABLISHED)
605 return -EDESTADDRREQ;
606 daddr = inet->daddr;
607 dport = inet->dport;
608 /* Open fast path for connected socket.
609 Route will not be used, if at least one option is set.
610 */
611 connected = 1;
612 }
613 ipc.addr = inet->saddr;
614
615 ipc.oif = sk->sk_bound_dev_if;
616 if (msg->msg_controllen) {
617 err = ip_cmsg_send(msg, &ipc);
618 if (err)
619 return err;
620 if (ipc.opt)
621 free = 1;
622 connected = 0;
623 }
624 if (!ipc.opt)
625 ipc.opt = inet->opt;
626
627 saddr = ipc.addr;
628 ipc.addr = faddr = daddr;
629
630 if (ipc.opt && ipc.opt->srr) {
631 if (!daddr)
632 return -EINVAL;
633 faddr = ipc.opt->faddr;
634 connected = 0;
635 }
636 tos = RT_TOS(inet->tos);
637 if (sock_flag(sk, SOCK_LOCALROUTE) ||
638 (msg->msg_flags & MSG_DONTROUTE) ||
639 (ipc.opt && ipc.opt->is_strictroute)) {
640 tos |= RTO_ONLINK;
641 connected = 0;
642 }
643
644 if (ipv4_is_multicast(daddr)) {
645 if (!ipc.oif)
646 ipc.oif = inet->mc_index;
647 if (!saddr)
648 saddr = inet->mc_addr;
649 connected = 0;
650 }
651
652 if (connected)
653 rt = (struct rtable*)sk_dst_check(sk, 0);
654
655 if (rt == NULL) {
656 struct flowi fl = { .oif = ipc.oif,
657 .nl_u = { .ip4_u =
658 { .daddr = faddr,
659 .saddr = saddr,
660 .tos = tos } },
661 .proto = sk->sk_protocol,
662 .uli_u = { .ports =
663 { .sport = inet->sport,
664 .dport = dport } } };
665 security_sk_classify_flow(sk, &fl);
666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
667 if (err) {
668 if (err == -ENETUNREACH)
669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
670 goto out;
671 }
672
673 err = -EACCES;
674 if ((rt->rt_flags & RTCF_BROADCAST) &&
675 !sock_flag(sk, SOCK_BROADCAST))
676 goto out;
677 if (connected)
678 sk_dst_set(sk, dst_clone(&rt->u.dst));
679 }
680
681 if (msg->msg_flags&MSG_CONFIRM)
682 goto do_confirm;
683back_from_confirm:
684
685 saddr = rt->rt_src;
686 if (!ipc.addr)
687 daddr = ipc.addr = rt->rt_dst;
688
689 lock_sock(sk);
690 if (unlikely(up->pending)) {
691 /* The socket is already corked while preparing it. */
692 /* ... which is an evident application bug. --ANK */
693 release_sock(sk);
694
695 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
696 err = -EINVAL;
697 goto out;
698 }
699 /*
700 * Now cork the socket to pend data.
701 */
702 inet->cork.fl.fl4_dst = daddr;
703 inet->cork.fl.fl_ip_dport = dport;
704 inet->cork.fl.fl4_src = saddr;
705 inet->cork.fl.fl_ip_sport = inet->sport;
706 up->pending = AF_INET;
707
708do_append_data:
709 up->len += ulen;
710 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
711 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
712 sizeof(struct udphdr), &ipc, rt,
713 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
714 if (err)
715 udp_flush_pending_frames(sk);
716 else if (!corkreq)
717 err = udp_push_pending_frames(sk);
718 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
719 up->pending = 0;
720 release_sock(sk);
721
722out:
723 ip_rt_put(rt);
724 if (free)
725 kfree(ipc.opt);
726 if (!err)
727 return len;
728 /*
729 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
730 * ENOBUFS might not be good (it's not tunable per se), but otherwise
731 * we don't have a good statistic (IpOutDiscards but it can be too many
732 * things). We could add another new stat but at least for now that
733 * seems like overkill.
734 */
735 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
736 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
737 }
738 return err;
739
740do_confirm:
741 dst_confirm(&rt->u.dst);
742 if (!(msg->msg_flags&MSG_PROBE) || len)
743 goto back_from_confirm;
744 err = 0;
745 goto out;
746}
747
748int udp_sendpage(struct sock *sk, struct page *page, int offset,
749 size_t size, int flags)
750{
751 struct udp_sock *up = udp_sk(sk);
752 int ret;
753
754 if (!up->pending) {
755 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
756
757 /* Call udp_sendmsg to specify destination address which
758 * sendpage interface can't pass.
759 * This will succeed only when the socket is connected.
760 */
761 ret = udp_sendmsg(NULL, sk, &msg, 0);
762 if (ret < 0)
763 return ret;
764 }
765
766 lock_sock(sk);
767
768 if (unlikely(!up->pending)) {
769 release_sock(sk);
770
771 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
772 return -EINVAL;
773 }
774
775 ret = ip_append_page(sk, page, offset, size, flags);
776 if (ret == -EOPNOTSUPP) {
777 release_sock(sk);
778 return sock_no_sendpage(sk->sk_socket, page, offset,
779 size, flags);
780 }
781 if (ret < 0) {
782 udp_flush_pending_frames(sk);
783 goto out;
784 }
785
786 up->len += size;
787 if (!(up->corkflag || (flags&MSG_MORE)))
788 ret = udp_push_pending_frames(sk);
789 if (!ret)
790 ret = size;
791out:
792 release_sock(sk);
793 return ret;
794}
795
249/* 796/*
250 * IOCTL requests applicable to the UDP protocol 797 * IOCTL requests applicable to the UDP protocol
251 */ 798 */
@@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
286 return 0; 833 return 0;
287} 834}
288 835
836/*
837 * This should be easy, if there is something there we
838 * return it, otherwise we block.
839 */
840
841int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
842 size_t len, int noblock, int flags, int *addr_len)
843{
844 struct inet_sock *inet = inet_sk(sk);
845 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
846 struct sk_buff *skb;
847 unsigned int ulen, copied;
848 int peeked;
849 int err;
850 int is_udplite = IS_UDPLITE(sk);
851
852 /*
853 * Check any passed addresses
854 */
855 if (addr_len)
856 *addr_len=sizeof(*sin);
857
858 if (flags & MSG_ERRQUEUE)
859 return ip_recv_error(sk, msg, len);
860
861try_again:
862 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
863 &peeked, &err);
864 if (!skb)
865 goto out;
866
867 ulen = skb->len - sizeof(struct udphdr);
868 copied = len;
869 if (copied > ulen)
870 copied = ulen;
871 else if (copied < ulen)
872 msg->msg_flags |= MSG_TRUNC;
873
874 /*
875 * If checksum is needed at all, try to do it while copying the
876 * data. If the data is truncated, or if we only want a partial
877 * coverage checksum (UDP-Lite), do it before the copy.
878 */
879
880 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
881 if (udp_lib_checksum_complete(skb))
882 goto csum_copy_err;
883 }
884
885 if (skb_csum_unnecessary(skb))
886 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
887 msg->msg_iov, copied );
888 else {
889 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
890
891 if (err == -EINVAL)
892 goto csum_copy_err;
893 }
894
895 if (err)
896 goto out_free;
897
898 if (!peeked)
899 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
900
901 sock_recv_timestamp(msg, sk, skb);
902
903 /* Copy the address. */
904 if (sin)
905 {
906 sin->sin_family = AF_INET;
907 sin->sin_port = udp_hdr(skb)->source;
908 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
909 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
910 }
911 if (inet->cmsg_flags)
912 ip_cmsg_recv(msg, skb);
913
914 err = copied;
915 if (flags & MSG_TRUNC)
916 err = ulen;
917
918out_free:
919 lock_sock(sk);
920 skb_free_datagram(sk, skb);
921 release_sock(sk);
922out:
923 return err;
924
925csum_copy_err:
926 lock_sock(sk);
927 if (!skb_kill_datagram(sk, skb, flags))
928 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
929 release_sock(sk);
930
931 if (noblock)
932 return -EAGAIN;
933 goto try_again;
934}
935
936
289int udp_disconnect(struct sock *sk, int flags) 937int udp_disconnect(struct sock *sk, int flags)
290{ 938{
291 struct inet_sock *inet = inet_sk(sk); 939 struct inet_sock *inet = inet_sk(sk);
@@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags)
308 return 0; 956 return 0;
309} 957}
310 958
959/* returns:
960 * -1: error
961 * 0: success
962 * >0: "udp encap" protocol resubmission
963 *
964 * Note that in the success and error cases, the skb is assumed to
965 * have either been requeued or freed.
966 */
967int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
968{
969 struct udp_sock *up = udp_sk(sk);
970 int rc;
971 int is_udplite = IS_UDPLITE(sk);
972
973 /*
974 * Charge it to the socket, dropping if the queue is full.
975 */
976 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
977 goto drop;
978 nf_reset(skb);
979
980 if (up->encap_type) {
981 /*
982 * This is an encapsulation socket so pass the skb to
983 * the socket's udp_encap_rcv() hook. Otherwise, just
984 * fall through and pass this up the UDP socket.
985 * up->encap_rcv() returns the following value:
986 * =0 if skb was successfully passed to the encap
987 * handler or was discarded by it.
988 * >0 if skb should be passed on to UDP.
989 * <0 if skb should be resubmitted as proto -N
990 */
991
992 /* if we're overly short, let UDP handle it */
993 if (skb->len > sizeof(struct udphdr) &&
994 up->encap_rcv != NULL) {
995 int ret;
996
997 ret = (*up->encap_rcv)(sk, skb);
998 if (ret <= 0) {
999 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
1000 is_udplite);
1001 return -ret;
1002 }
1003 }
1004
1005 /* FALLTHROUGH -- it's a UDP Packet */
1006 }
1007
1008 /*
1009 * UDP-Lite specific tests, ignored on UDP sockets
1010 */
1011 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1012
1013 /*
1014 * MIB statistics other than incrementing the error count are
1015 * disabled for the following two types of errors: these depend
1016 * on the application settings, not on the functioning of the
1017 * protocol stack as such.
1018 *
1019 * RFC 3828 here recommends (sec 3.3): "There should also be a
1020 * way ... to ... at least let the receiving application block
1021 * delivery of packets with coverage values less than a value
1022 * provided by the application."
1023 */
1024 if (up->pcrlen == 0) { /* full coverage was set */
1025 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1026 "%d while full coverage %d requested\n",
1027 UDP_SKB_CB(skb)->cscov, skb->len);
1028 goto drop;
1029 }
1030 /* The next case involves violating the min. coverage requested
1031 * by the receiver. This is subtle: if receiver wants x and x is
1032 * greater than the buffersize/MTU then receiver will complain
1033 * that it wants x while sender emits packets of smaller size y.
1034 * Therefore the above ...()->partial_cov statement is essential.
1035 */
1036 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1037 LIMIT_NETDEBUG(KERN_WARNING
1038 "UDPLITE: coverage %d too small, need min %d\n",
1039 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1040 goto drop;
1041 }
1042 }
1043
1044 if (sk->sk_filter) {
1045 if (udp_lib_checksum_complete(skb))
1046 goto drop;
1047 }
1048
1049 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1050 /* Note that an ENOMEM error is charged twice */
1051 if (rc == -ENOMEM)
1052 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
1053 goto drop;
1054 }
1055
1056 return 0;
1057
1058drop:
1059 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1060 kfree_skb(skb);
1061 return -1;
1062}
1063
1064/*
1065 * Multicasts and broadcasts go to each listener.
1066 *
1067 * Note: called only from the BH handler context,
1068 * so we don't need to lock the hashes.
1069 */
1070static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1071 struct udphdr *uh,
1072 __be32 saddr, __be32 daddr,
1073 struct hlist_head udptable[])
1074{
1075 struct sock *sk;
1076 int dif;
1077
1078 read_lock(&udp_hash_lock);
1079 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1080 dif = skb->dev->ifindex;
1081 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1082 if (sk) {
1083 struct sock *sknext = NULL;
1084
1085 do {
1086 struct sk_buff *skb1 = skb;
1087
1088 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
1089 uh->source, saddr, dif);
1090 if (sknext)
1091 skb1 = skb_clone(skb, GFP_ATOMIC);
1092
1093 if (skb1) {
1094 int ret = 0;
1095
1096 bh_lock_sock_nested(sk);
1097 if (!sock_owned_by_user(sk))
1098 ret = udp_queue_rcv_skb(sk, skb1);
1099 else
1100 sk_add_backlog(sk, skb1);
1101 bh_unlock_sock(sk);
1102
1103 if (ret > 0)
1104 /* we should probably re-process instead
1105 * of dropping packets here. */
1106 kfree_skb(skb1);
1107 }
1108 sk = sknext;
1109 } while (sknext);
1110 } else
1111 kfree_skb(skb);
1112 read_unlock(&udp_hash_lock);
1113 return 0;
1114}
1115
1116/* Initialize UDP checksum. If exited with zero value (success),
1117 * CHECKSUM_UNNECESSARY means, that no more checks are required.
1118 * Otherwise, csum completion requires chacksumming packet body,
1119 * including udp header and folding it to skb->csum.
1120 */
1121static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1122 int proto)
1123{
1124 const struct iphdr *iph;
1125 int err;
1126
1127 UDP_SKB_CB(skb)->partial_cov = 0;
1128 UDP_SKB_CB(skb)->cscov = skb->len;
1129
1130 if (proto == IPPROTO_UDPLITE) {
1131 err = udplite_checksum_init(skb, uh);
1132 if (err)
1133 return err;
1134 }
1135
1136 iph = ip_hdr(skb);
1137 if (uh->check == 0) {
1138 skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1140 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1141 proto, skb->csum))
1142 skb->ip_summed = CHECKSUM_UNNECESSARY;
1143 }
1144 if (!skb_csum_unnecessary(skb))
1145 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1146 skb->len, proto, 0);
1147 /* Probably, we should checksum udp header (it should be in cache
1148 * in any case) and data in tiny packets (< rx copybreak).
1149 */
1150
1151 return 0;
1152}
1153
1154/*
1155 * All we need to do is get the socket, and then do a checksum.
1156 */
1157
1158int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1159 int proto)
1160{
1161 struct sock *sk;
1162 struct udphdr *uh = udp_hdr(skb);
1163 unsigned short ulen;
1164 struct rtable *rt = (struct rtable*)skb->dst;
1165 __be32 saddr = ip_hdr(skb)->saddr;
1166 __be32 daddr = ip_hdr(skb)->daddr;
1167
1168 /*
1169 * Validate the packet.
1170 */
1171 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1172 goto drop; /* No space for header. */
1173
1174 ulen = ntohs(uh->len);
1175 if (ulen > skb->len)
1176 goto short_packet;
1177
1178 if (proto == IPPROTO_UDP) {
1179 /* UDP validates ulen. */
1180 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1181 goto short_packet;
1182 uh = udp_hdr(skb);
1183 }
1184
1185 if (udp4_csum_init(skb, uh, proto))
1186 goto csum_error;
1187
1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1190
1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
1192 uh->dest, inet_iif(skb), udptable);
1193
1194 if (sk != NULL) {
1195 int ret = 0;
1196 bh_lock_sock_nested(sk);
1197 if (!sock_owned_by_user(sk))
1198 ret = udp_queue_rcv_skb(sk, skb);
1199 else
1200 sk_add_backlog(sk, skb);
1201 bh_unlock_sock(sk);
1202 sock_put(sk);
1203
1204 /* a return value > 0 means to resubmit the input, but
1205 * it wants the return to be -protocol, or 0
1206 */
1207 if (ret > 0)
1208 return -ret;
1209 return 0;
1210 }
1211
1212 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1213 goto drop;
1214 nf_reset(skb);
1215
1216 /* No socket. Drop packet silently, if checksum is wrong */
1217 if (udp_lib_checksum_complete(skb))
1218 goto csum_error;
1219
1220 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1221 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1222
1223 /*
1224 * Hmm. We got an UDP packet to a port to which we
1225 * don't wanna listen. Ignore it.
1226 */
1227 kfree_skb(skb);
1228 return 0;
1229
1230short_packet:
1231 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1232 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1233 NIPQUAD(saddr),
1234 ntohs(uh->source),
1235 ulen,
1236 skb->len,
1237 NIPQUAD(daddr),
1238 ntohs(uh->dest));
1239 goto drop;
1240
1241csum_error:
1242 /*
1243 * RFC1122: OK. Discards the bad packet silently (as far as
1244 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1245 */
1246 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1247 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1248 NIPQUAD(saddr),
1249 ntohs(uh->source),
1250 NIPQUAD(daddr),
1251 ntohs(uh->dest),
1252 ulen);
1253drop:
1254 UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1255 kfree_skb(skb);
1256 return 0;
1257}
1258
1259int udp_rcv(struct sk_buff *skb)
1260{
1261 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1262}
1263
1264int udp_destroy_sock(struct sock *sk)
1265{
1266 lock_sock(sk);
1267 udp_flush_pending_frames(sk);
1268 release_sock(sk);
1269 return 0;
1270}
1271
311/* 1272/*
312 * Socket option code for UDP 1273 * Socket option code for UDP
313 */ 1274 */
@@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
318 struct udp_sock *up = udp_sk(sk); 1279 struct udp_sock *up = udp_sk(sk);
319 int val; 1280 int val;
320 int err = 0; 1281 int err = 0;
321#ifdef CONFIG_IP_UDPLITE
322 int is_udplite = IS_UDPLITE(sk); 1282 int is_udplite = IS_UDPLITE(sk);
323#endif
324 1283
325 if (optlen<sizeof(int)) 1284 if (optlen<sizeof(int))
326 return -EINVAL; 1285 return -EINVAL;
@@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
356 } 1315 }
357 break; 1316 break;
358 1317
359#ifdef CONFIG_IP_UDPLITE
360 /* 1318 /*
361 * UDP-Lite's partial checksum coverage (RFC 3828). 1319 * UDP-Lite's partial checksum coverage (RFC 3828).
362 */ 1320 */
@@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
382 up->pcrlen = val; 1340 up->pcrlen = val;
383 up->pcflag |= UDPLITE_RECV_CC; 1341 up->pcflag |= UDPLITE_RECV_CC;
384 break; 1342 break;
385#endif
386 1343
387 default: 1344 default:
388 err = -ENOPROTOOPT; 1345 err = -ENOPROTOOPT;
@@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
392 return err; 1349 return err;
393} 1350}
394 1351
1352int udp_setsockopt(struct sock *sk, int level, int optname,
1353 char __user *optval, int optlen)
1354{
1355 if (level == SOL_UDP || level == SOL_UDPLITE)
1356 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1357 udp_push_pending_frames);
1358 return ip_setsockopt(sk, level, optname, optval, optlen);
1359}
1360
1361#ifdef CONFIG_COMPAT
1362int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1363 char __user *optval, int optlen)
1364{
1365 if (level == SOL_UDP || level == SOL_UDPLITE)
1366 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1367 udp_push_pending_frames);
1368 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1369}
1370#endif
1371
395int udp_lib_getsockopt(struct sock *sk, int level, int optname, 1372int udp_lib_getsockopt(struct sock *sk, int level, int optname,
396 char __user *optval, int __user *optlen) 1373 char __user *optval, int __user *optlen)
397{ 1374{
@@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
436 return 0; 1413 return 0;
437} 1414}
438 1415
1416int udp_getsockopt(struct sock *sk, int level, int optname,
1417 char __user *optval, int __user *optlen)
1418{
1419 if (level == SOL_UDP || level == SOL_UDPLITE)
1420 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1421 return ip_getsockopt(sk, level, optname, optval, optlen);
1422}
1423
1424#ifdef CONFIG_COMPAT
1425int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1426 char __user *optval, int __user *optlen)
1427{
1428 if (level == SOL_UDP || level == SOL_UDPLITE)
1429 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1430 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1431}
1432#endif
439/** 1433/**
440 * udp_poll - wait for a UDP event. 1434 * udp_poll - wait for a UDP event.
441 * @file - file struct 1435 * @file - file struct
@@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
480 1474
481} 1475}
482 1476
1477DEFINE_PROTO_INUSE(udp)
1478
1479struct proto udp_prot = {
1480 .name = "UDP",
1481 .owner = THIS_MODULE,
1482 .close = udp_lib_close,
1483 .connect = ip4_datagram_connect,
1484 .disconnect = udp_disconnect,
1485 .ioctl = udp_ioctl,
1486 .destroy = udp_destroy_sock,
1487 .setsockopt = udp_setsockopt,
1488 .getsockopt = udp_getsockopt,
1489 .sendmsg = udp_sendmsg,
1490 .recvmsg = udp_recvmsg,
1491 .sendpage = udp_sendpage,
1492 .backlog_rcv = udp_queue_rcv_skb,
1493 .hash = udp_lib_hash,
1494 .unhash = udp_lib_unhash,
1495 .get_port = udp_v4_get_port,
1496 .memory_allocated = &udp_memory_allocated,
1497 .sysctl_mem = sysctl_udp_mem,
1498 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min,
1500 .obj_size = sizeof(struct udp_sock),
1501#ifdef CONFIG_COMPAT
1502 .compat_setsockopt = compat_udp_setsockopt,
1503 .compat_getsockopt = compat_udp_getsockopt,
1504#endif
1505 REF_PROTO_INUSE(udp)
1506};
483 1507
484/* ------------------------------------------------------------------------ */ 1508/* ------------------------------------------------------------------------ */
485#ifdef CONFIG_PROC_FS 1509#ifdef CONFIG_PROC_FS
@@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
612 proc_net_remove(&init_net, afinfo->name); 1636 proc_net_remove(&init_net, afinfo->name);
613 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 1637 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
614} 1638}
1639
1640/* ------------------------------------------------------------------------ */
1641static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1642{
1643 struct inet_sock *inet = inet_sk(sp);
1644 __be32 dest = inet->daddr;
1645 __be32 src = inet->rcv_saddr;
1646 __u16 destp = ntohs(inet->dport);
1647 __u16 srcp = ntohs(inet->sport);
1648
1649 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1650 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1651 bucket, src, srcp, dest, destp, sp->sk_state,
1652 atomic_read(&sp->sk_wmem_alloc),
1653 atomic_read(&sp->sk_rmem_alloc),
1654 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1655 atomic_read(&sp->sk_refcnt), sp);
1656}
1657
1658int udp4_seq_show(struct seq_file *seq, void *v)
1659{
1660 if (v == SEQ_START_TOKEN)
1661 seq_printf(seq, "%-127s\n",
1662 " sl local_address rem_address st tx_queue "
1663 "rx_queue tr tm->when retrnsmt uid timeout "
1664 "inode");
1665 else {
1666 char tmpbuf[129];
1667 struct udp_iter_state *state = seq->private;
1668
1669 udp4_format_sock(v, tmpbuf, state->bucket);
1670 seq_printf(seq, "%-127s\n", tmpbuf);
1671 }
1672 return 0;
1673}
1674
1675/* ------------------------------------------------------------------------ */
1676static struct file_operations udp4_seq_fops;
1677static struct udp_seq_afinfo udp4_seq_afinfo = {
1678 .owner = THIS_MODULE,
1679 .name = "udp",
1680 .family = AF_INET,
1681 .hashtable = udp_hash,
1682 .seq_show = udp4_seq_show,
1683 .seq_fops = &udp4_seq_fops,
1684};
1685
1686int __init udp4_proc_init(void)
1687{
1688 return udp_proc_register(&udp4_seq_afinfo);
1689}
1690
1691void udp4_proc_exit(void)
1692{
1693 udp_proc_unregister(&udp4_seq_afinfo);
1694}
615#endif /* CONFIG_PROC_FS */ 1695#endif /* CONFIG_PROC_FS */
616 1696
617void __init udp_init(void) 1697void __init udp_init(void)
@@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash);
638EXPORT_SYMBOL(udp_hash_lock); 1718EXPORT_SYMBOL(udp_hash_lock);
639EXPORT_SYMBOL(udp_ioctl); 1719EXPORT_SYMBOL(udp_ioctl);
640EXPORT_SYMBOL(udp_get_port); 1720EXPORT_SYMBOL(udp_get_port);
1721EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg);
641EXPORT_SYMBOL(udp_lib_getsockopt); 1723EXPORT_SYMBOL(udp_lib_getsockopt);
642EXPORT_SYMBOL(udp_lib_setsockopt); 1724EXPORT_SYMBOL(udp_lib_setsockopt);
643EXPORT_SYMBOL(udp_poll); 1725EXPORT_SYMBOL(udp_poll);