aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
committerDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
commitdb8dac20d5199307dcfcf4e01dac4bda5edf9e89 (patch)
tree3694d1aee5c0014fb45eec045a67ca150ca1231f /net/ipv4
parentba0fa4599484b98dbb21d279fbfdb40e9c07d30d (diff)
[UDP]: Revert udplite and code split.
This reverts commit db1ed684f6c430c4cdad67d058688b8a1b5e607c ("[IPV6] UDP: Rename IPv6 UDP files."), commit 8be8af8fa4405652e6c0797db5465a4be8afb998 ("[IPV4] UDP: Move IPv4-specific bits to other file.") and commit e898d4db2749c6052072e9bc4448e396cbdeb06a ("[UDP]: Allow users to configure UDP-Lite."). First, udplite is of such small cost, and it is a core protocol just like TCP and normal UDP are. We spent enormous amounts of effort to make udplite share as much code with core UDP as possible. All of that work is less valuable if we're just going to slap a config option on udplite support. It is also causing build failures, as reported on linux-next, showing that the changeset was not tested very well. In fact, this is the second build failure resulting from the udplite change. Finally, the config options provided was a bool, instead of a modular option. Meaning the udplite code does not even get build tested by allmodconfig builds, and furthermore the user is not presented with a reasonable modular build option which is particularly needed by distribution vendors. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/proc.c5
-rw-r--r--net/ipv4/udp.c1090
-rw-r--r--net/ipv4/udp_ipv4.c1134
-rw-r--r--net/ipv4/udplite.c (renamed from net/ipv4/udplite_ipv4.c)0
7 files changed, 1089 insertions, 1160 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 5098fd2ff4d0..9c7e5ffb223d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -632,15 +632,5 @@ config TCP_MD5SIG
632 632
633 If unsure, say N. 633 If unsure, say N.
634 634
635config IP_UDPLITE
636 bool "IP: UDP-Lite Protocol (RFC 3828)"
637 default n
638 ---help---
639 UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length
640 checksum. Read <file:Documentation/networking/udplite.txt> for
641 details.
642
643 If unsure, say N.
644
645source "net/ipv4/ipvs/Kconfig" 635source "net/ipv4/ipvs/Kconfig"
646 636
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index d5226241d5ed..ad40ef3f9ebc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \
8 inet_timewait_sock.o inet_connection_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udp_ipv4.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o \
14 inet_fragment.o 14 inet_fragment.o
@@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
52obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o
53obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 52obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
54 53
55obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 54obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 25871c6c7444..4cb8a1385539 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void)
1317 if (snmp_mib_init((void **)udp_statistics, 1317 if (snmp_mib_init((void **)udp_statistics,
1318 sizeof(struct udp_mib)) < 0) 1318 sizeof(struct udp_mib)) < 0)
1319 goto err_udp_mib; 1319 goto err_udp_mib;
1320#ifdef CONFIG_IP_UDPLITE
1321 if (snmp_mib_init((void **)udplite_statistics, 1320 if (snmp_mib_init((void **)udplite_statistics,
1322 sizeof(struct udp_mib)) < 0) 1321 sizeof(struct udp_mib)) < 0)
1323 goto err_udplite_mib; 1322 goto err_udplite_mib;
1324#endif 1323
1325 tcp_mib_init(); 1324 tcp_mib_init();
1326 1325
1327 return 0; 1326 return 0;
1328 1327
1329#ifdef CONFIG_IP_UDPLITE
1330err_udplite_mib: 1328err_udplite_mib:
1331#endif
1332 snmp_mib_free((void **)udp_statistics); 1329 snmp_mib_free((void **)udp_statistics);
1333err_udp_mib: 1330err_udp_mib:
1334 snmp_mib_free((void **)tcp_statistics); 1331 snmp_mib_free((void **)tcp_statistics);
@@ -1426,10 +1423,8 @@ static int __init inet_init(void)
1426 /* Setup UDP memory threshold */ 1423 /* Setup UDP memory threshold */
1427 udp_init(); 1424 udp_init();
1428 1425
1429#ifdef CONFIG_IP_UDPLITE
1430 /* Add UDP-Lite (RFC 3828) */ 1426 /* Add UDP-Lite (RFC 3828) */
1431 udplite4_register(); 1427 udplite4_register();
1432#endif
1433 1428
1434 /* 1429 /*
1435 * Set the ICMP layer up 1430 * Set the ICMP layer up
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d75ddb7fa4b8..d63474c6b400 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 atomic_read(&tcp_memory_allocated)); 59 atomic_read(&tcp_memory_allocated));
60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), 60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
61 atomic_read(&udp_memory_allocated)); 61 atomic_read(&udp_memory_allocated));
62#ifdef CONFIG_IP_UDPLITE
63 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); 62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
64#endif
65 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); 63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
66 seq_printf(seq, "FRAG: inuse %d memory %d\n", 64 seq_printf(seq, "FRAG: inuse %d memory %d\n",
67 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); 65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
@@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
351 snmp_fold_field((void **)udp_statistics, 349 snmp_fold_field((void **)udp_statistics,
352 snmp4_udp_list[i].entry)); 350 snmp4_udp_list[i].entry));
353 351
354#ifdef CONFIG_IP_UDPLITE
355 /* the UDP and UDP-Lite MIBs are the same */ 352 /* the UDP and UDP-Lite MIBs are the same */
356 seq_puts(seq, "\nUdpLite:"); 353 seq_puts(seq, "\nUdpLite:");
357 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 354 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
@@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
362 seq_printf(seq, " %lu", 359 seq_printf(seq, " %lu",
363 snmp_fold_field((void **)udplite_statistics, 360 snmp_fold_field((void **)udplite_statistics,
364 snmp4_udp_list[i].entry)); 361 snmp4_udp_list[i].entry));
365#endif 362
366 seq_putc(seq, '\n'); 363 seq_putc(seq, '\n');
367 return 0; 364 return 0;
368} 365}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c53d7673b57d..7ea1b67b6de1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum,
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp); 246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
247} 247}
248 248
249int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
250{
251 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
252
253 return ( !ipv6_only_sock(sk2) &&
254 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
255 inet1->rcv_saddr == inet2->rcv_saddr ));
256}
257
258static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
259{
260 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
261}
262
263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
264 * harder than this. -DaveM
265 */
266static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
267 __be16 sport, __be32 daddr, __be16 dport,
268 int dif, struct hlist_head udptable[])
269{
270 struct sock *sk, *result = NULL;
271 struct hlist_node *node;
272 unsigned short hnum = ntohs(dport);
273 int badness = -1;
274
275 read_lock(&udp_hash_lock);
276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
277 struct inet_sock *inet = inet_sk(sk);
278
279 if (sk->sk_net == net && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) {
281 int score = (sk->sk_family == PF_INET ? 1 : 0);
282 if (inet->rcv_saddr) {
283 if (inet->rcv_saddr != daddr)
284 continue;
285 score+=2;
286 }
287 if (inet->daddr) {
288 if (inet->daddr != saddr)
289 continue;
290 score+=2;
291 }
292 if (inet->dport) {
293 if (inet->dport != sport)
294 continue;
295 score+=2;
296 }
297 if (sk->sk_bound_dev_if) {
298 if (sk->sk_bound_dev_if != dif)
299 continue;
300 score+=2;
301 }
302 if (score == 9) {
303 result = sk;
304 break;
305 } else if (score > badness) {
306 result = sk;
307 badness = score;
308 }
309 }
310 }
311 if (result)
312 sock_hold(result);
313 read_unlock(&udp_hash_lock);
314 return result;
315}
316
317static inline struct sock *udp_v4_mcast_next(struct sock *sk,
318 __be16 loc_port, __be32 loc_addr,
319 __be16 rmt_port, __be32 rmt_addr,
320 int dif)
321{
322 struct hlist_node *node;
323 struct sock *s = sk;
324 unsigned short hnum = ntohs(loc_port);
325
326 sk_for_each_from(s, node) {
327 struct inet_sock *inet = inet_sk(s);
328
329 if (s->sk_hash != hnum ||
330 (inet->daddr && inet->daddr != rmt_addr) ||
331 (inet->dport != rmt_port && inet->dport) ||
332 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
333 ipv6_only_sock(s) ||
334 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
335 continue;
336 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
337 continue;
338 goto found;
339 }
340 s = NULL;
341found:
342 return s;
343}
344
345/*
346 * This routine is called by the ICMP module when it gets some
347 * sort of error condition. If err < 0 then the socket should
348 * be closed and the error returned to the user. If err > 0
349 * it's just the icmp type << 8 | icmp code.
350 * Header points to the ip header of the error packet. We move
351 * on past this. Then (as it used to claim before adjustment)
352 * header points to the first 8 bytes of the udp header. We need
353 * to find the appropriate port.
354 */
355
356void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
357{
358 struct inet_sock *inet;
359 struct iphdr *iph = (struct iphdr*)skb->data;
360 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
361 const int type = icmp_hdr(skb)->type;
362 const int code = icmp_hdr(skb)->code;
363 struct sock *sk;
364 int harderr;
365 int err;
366
367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
368 iph->saddr, uh->source, skb->dev->ifindex, udptable);
369 if (sk == NULL) {
370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
371 return; /* No socket for error */
372 }
373
374 err = 0;
375 harderr = 0;
376 inet = inet_sk(sk);
377
378 switch (type) {
379 default:
380 case ICMP_TIME_EXCEEDED:
381 err = EHOSTUNREACH;
382 break;
383 case ICMP_SOURCE_QUENCH:
384 goto out;
385 case ICMP_PARAMETERPROB:
386 err = EPROTO;
387 harderr = 1;
388 break;
389 case ICMP_DEST_UNREACH:
390 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
391 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
392 err = EMSGSIZE;
393 harderr = 1;
394 break;
395 }
396 goto out;
397 }
398 err = EHOSTUNREACH;
399 if (code <= NR_ICMP_UNREACH) {
400 harderr = icmp_err_convert[code].fatal;
401 err = icmp_err_convert[code].errno;
402 }
403 break;
404 }
405
406 /*
407 * RFC1122: OK. Passes ICMP errors back to application, as per
408 * 4.1.3.3.
409 */
410 if (!inet->recverr) {
411 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
412 goto out;
413 } else {
414 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
415 }
416 sk->sk_err = err;
417 sk->sk_error_report(sk);
418out:
419 sock_put(sk);
420}
421
422void udp_err(struct sk_buff *skb, u32 info)
423{
424 __udp4_lib_err(skb, info, udp_hash);
425}
426
427/*
428 * Throw away all pending data and cancel the corking. Socket is locked.
429 */
430static void udp_flush_pending_frames(struct sock *sk)
431{
432 struct udp_sock *up = udp_sk(sk);
433
434 if (up->pending) {
435 up->len = 0;
436 up->pending = 0;
437 ip_flush_pending_frames(sk);
438 }
439}
440
441/**
442 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
443 * @sk: socket we are sending on
444 * @skb: sk_buff containing the filled-in UDP header
445 * (checksum field must be zeroed out)
446 */
447static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
448 __be32 src, __be32 dst, int len )
449{
450 unsigned int offset;
451 struct udphdr *uh = udp_hdr(skb);
452 __wsum csum = 0;
453
454 if (skb_queue_len(&sk->sk_write_queue) == 1) {
455 /*
456 * Only one fragment on the socket.
457 */
458 skb->csum_start = skb_transport_header(skb) - skb->head;
459 skb->csum_offset = offsetof(struct udphdr, check);
460 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
461 } else {
462 /*
463 * HW-checksum won't work as there are two or more
464 * fragments on the socket so that all csums of sk_buffs
465 * should be together
466 */
467 offset = skb_transport_offset(skb);
468 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
469
470 skb->ip_summed = CHECKSUM_NONE;
471
472 skb_queue_walk(&sk->sk_write_queue, skb) {
473 csum = csum_add(csum, skb->csum);
474 }
475
476 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
477 if (uh->check == 0)
478 uh->check = CSUM_MANGLED_0;
479 }
480}
481
482/*
483 * Push out all pending data as one UDP datagram. Socket is locked.
484 */
485static int udp_push_pending_frames(struct sock *sk)
486{
487 struct udp_sock *up = udp_sk(sk);
488 struct inet_sock *inet = inet_sk(sk);
489 struct flowi *fl = &inet->cork.fl;
490 struct sk_buff *skb;
491 struct udphdr *uh;
492 int err = 0;
493 int is_udplite = IS_UDPLITE(sk);
494 __wsum csum = 0;
495
496 /* Grab the skbuff where UDP header space exists. */
497 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
498 goto out;
499
500 /*
501 * Create a UDP header
502 */
503 uh = udp_hdr(skb);
504 uh->source = fl->fl_ip_sport;
505 uh->dest = fl->fl_ip_dport;
506 uh->len = htons(up->len);
507 uh->check = 0;
508
509 if (is_udplite) /* UDP-Lite */
510 csum = udplite_csum_outgoing(sk, skb);
511
512 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
513
514 skb->ip_summed = CHECKSUM_NONE;
515 goto send;
516
517 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
518
519 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
520 goto send;
521
522 } else /* `normal' UDP */
523 csum = udp_csum_outgoing(sk, skb);
524
525 /* add protocol-dependent pseudo-header */
526 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
527 sk->sk_protocol, csum );
528 if (uh->check == 0)
529 uh->check = CSUM_MANGLED_0;
530
531send:
532 err = ip_push_pending_frames(sk);
533out:
534 up->len = 0;
535 up->pending = 0;
536 if (!err)
537 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
538 return err;
539}
540
541int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
542 size_t len)
543{
544 struct inet_sock *inet = inet_sk(sk);
545 struct udp_sock *up = udp_sk(sk);
546 int ulen = len;
547 struct ipcm_cookie ipc;
548 struct rtable *rt = NULL;
549 int free = 0;
550 int connected = 0;
551 __be32 daddr, faddr, saddr;
552 __be16 dport;
553 u8 tos;
554 int err, is_udplite = IS_UDPLITE(sk);
555 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
556 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
557
558 if (len > 0xFFFF)
559 return -EMSGSIZE;
560
561 /*
562 * Check the flags.
563 */
564
565 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
566 return -EOPNOTSUPP;
567
568 ipc.opt = NULL;
569
570 if (up->pending) {
571 /*
572 * There are pending frames.
573 * The socket lock must be held while it's corked.
574 */
575 lock_sock(sk);
576 if (likely(up->pending)) {
577 if (unlikely(up->pending != AF_INET)) {
578 release_sock(sk);
579 return -EINVAL;
580 }
581 goto do_append_data;
582 }
583 release_sock(sk);
584 }
585 ulen += sizeof(struct udphdr);
586
587 /*
588 * Get and verify the address.
589 */
590 if (msg->msg_name) {
591 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
592 if (msg->msg_namelen < sizeof(*usin))
593 return -EINVAL;
594 if (usin->sin_family != AF_INET) {
595 if (usin->sin_family != AF_UNSPEC)
596 return -EAFNOSUPPORT;
597 }
598
599 daddr = usin->sin_addr.s_addr;
600 dport = usin->sin_port;
601 if (dport == 0)
602 return -EINVAL;
603 } else {
604 if (sk->sk_state != TCP_ESTABLISHED)
605 return -EDESTADDRREQ;
606 daddr = inet->daddr;
607 dport = inet->dport;
608 /* Open fast path for connected socket.
609 Route will not be used, if at least one option is set.
610 */
611 connected = 1;
612 }
613 ipc.addr = inet->saddr;
614
615 ipc.oif = sk->sk_bound_dev_if;
616 if (msg->msg_controllen) {
617 err = ip_cmsg_send(msg, &ipc);
618 if (err)
619 return err;
620 if (ipc.opt)
621 free = 1;
622 connected = 0;
623 }
624 if (!ipc.opt)
625 ipc.opt = inet->opt;
626
627 saddr = ipc.addr;
628 ipc.addr = faddr = daddr;
629
630 if (ipc.opt && ipc.opt->srr) {
631 if (!daddr)
632 return -EINVAL;
633 faddr = ipc.opt->faddr;
634 connected = 0;
635 }
636 tos = RT_TOS(inet->tos);
637 if (sock_flag(sk, SOCK_LOCALROUTE) ||
638 (msg->msg_flags & MSG_DONTROUTE) ||
639 (ipc.opt && ipc.opt->is_strictroute)) {
640 tos |= RTO_ONLINK;
641 connected = 0;
642 }
643
644 if (ipv4_is_multicast(daddr)) {
645 if (!ipc.oif)
646 ipc.oif = inet->mc_index;
647 if (!saddr)
648 saddr = inet->mc_addr;
649 connected = 0;
650 }
651
652 if (connected)
653 rt = (struct rtable*)sk_dst_check(sk, 0);
654
655 if (rt == NULL) {
656 struct flowi fl = { .oif = ipc.oif,
657 .nl_u = { .ip4_u =
658 { .daddr = faddr,
659 .saddr = saddr,
660 .tos = tos } },
661 .proto = sk->sk_protocol,
662 .uli_u = { .ports =
663 { .sport = inet->sport,
664 .dport = dport } } };
665 security_sk_classify_flow(sk, &fl);
666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
667 if (err) {
668 if (err == -ENETUNREACH)
669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
670 goto out;
671 }
672
673 err = -EACCES;
674 if ((rt->rt_flags & RTCF_BROADCAST) &&
675 !sock_flag(sk, SOCK_BROADCAST))
676 goto out;
677 if (connected)
678 sk_dst_set(sk, dst_clone(&rt->u.dst));
679 }
680
681 if (msg->msg_flags&MSG_CONFIRM)
682 goto do_confirm;
683back_from_confirm:
684
685 saddr = rt->rt_src;
686 if (!ipc.addr)
687 daddr = ipc.addr = rt->rt_dst;
688
689 lock_sock(sk);
690 if (unlikely(up->pending)) {
691 /* The socket is already corked while preparing it. */
692 /* ... which is an evident application bug. --ANK */
693 release_sock(sk);
694
695 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
696 err = -EINVAL;
697 goto out;
698 }
699 /*
700 * Now cork the socket to pend data.
701 */
702 inet->cork.fl.fl4_dst = daddr;
703 inet->cork.fl.fl_ip_dport = dport;
704 inet->cork.fl.fl4_src = saddr;
705 inet->cork.fl.fl_ip_sport = inet->sport;
706 up->pending = AF_INET;
707
708do_append_data:
709 up->len += ulen;
710 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
711 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
712 sizeof(struct udphdr), &ipc, rt,
713 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
714 if (err)
715 udp_flush_pending_frames(sk);
716 else if (!corkreq)
717 err = udp_push_pending_frames(sk);
718 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
719 up->pending = 0;
720 release_sock(sk);
721
722out:
723 ip_rt_put(rt);
724 if (free)
725 kfree(ipc.opt);
726 if (!err)
727 return len;
728 /*
729 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
730 * ENOBUFS might not be good (it's not tunable per se), but otherwise
731 * we don't have a good statistic (IpOutDiscards but it can be too many
732 * things). We could add another new stat but at least for now that
733 * seems like overkill.
734 */
735 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
736 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
737 }
738 return err;
739
740do_confirm:
741 dst_confirm(&rt->u.dst);
742 if (!(msg->msg_flags&MSG_PROBE) || len)
743 goto back_from_confirm;
744 err = 0;
745 goto out;
746}
747
748int udp_sendpage(struct sock *sk, struct page *page, int offset,
749 size_t size, int flags)
750{
751 struct udp_sock *up = udp_sk(sk);
752 int ret;
753
754 if (!up->pending) {
755 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
756
757 /* Call udp_sendmsg to specify destination address which
758 * sendpage interface can't pass.
759 * This will succeed only when the socket is connected.
760 */
761 ret = udp_sendmsg(NULL, sk, &msg, 0);
762 if (ret < 0)
763 return ret;
764 }
765
766 lock_sock(sk);
767
768 if (unlikely(!up->pending)) {
769 release_sock(sk);
770
771 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
772 return -EINVAL;
773 }
774
775 ret = ip_append_page(sk, page, offset, size, flags);
776 if (ret == -EOPNOTSUPP) {
777 release_sock(sk);
778 return sock_no_sendpage(sk->sk_socket, page, offset,
779 size, flags);
780 }
781 if (ret < 0) {
782 udp_flush_pending_frames(sk);
783 goto out;
784 }
785
786 up->len += size;
787 if (!(up->corkflag || (flags&MSG_MORE)))
788 ret = udp_push_pending_frames(sk);
789 if (!ret)
790 ret = size;
791out:
792 release_sock(sk);
793 return ret;
794}
795
249/* 796/*
250 * IOCTL requests applicable to the UDP protocol 797 * IOCTL requests applicable to the UDP protocol
251 */ 798 */
@@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
286 return 0; 833 return 0;
287} 834}
288 835
836/*
837 * This should be easy, if there is something there we
838 * return it, otherwise we block.
839 */
840
841int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
842 size_t len, int noblock, int flags, int *addr_len)
843{
844 struct inet_sock *inet = inet_sk(sk);
845 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
846 struct sk_buff *skb;
847 unsigned int ulen, copied;
848 int peeked;
849 int err;
850 int is_udplite = IS_UDPLITE(sk);
851
852 /*
853 * Check any passed addresses
854 */
855 if (addr_len)
856 *addr_len=sizeof(*sin);
857
858 if (flags & MSG_ERRQUEUE)
859 return ip_recv_error(sk, msg, len);
860
861try_again:
862 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
863 &peeked, &err);
864 if (!skb)
865 goto out;
866
867 ulen = skb->len - sizeof(struct udphdr);
868 copied = len;
869 if (copied > ulen)
870 copied = ulen;
871 else if (copied < ulen)
872 msg->msg_flags |= MSG_TRUNC;
873
874 /*
875 * If checksum is needed at all, try to do it while copying the
876 * data. If the data is truncated, or if we only want a partial
877 * coverage checksum (UDP-Lite), do it before the copy.
878 */
879
880 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
881 if (udp_lib_checksum_complete(skb))
882 goto csum_copy_err;
883 }
884
885 if (skb_csum_unnecessary(skb))
886 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
887 msg->msg_iov, copied );
888 else {
889 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
890
891 if (err == -EINVAL)
892 goto csum_copy_err;
893 }
894
895 if (err)
896 goto out_free;
897
898 if (!peeked)
899 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
900
901 sock_recv_timestamp(msg, sk, skb);
902
903 /* Copy the address. */
904 if (sin)
905 {
906 sin->sin_family = AF_INET;
907 sin->sin_port = udp_hdr(skb)->source;
908 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
909 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
910 }
911 if (inet->cmsg_flags)
912 ip_cmsg_recv(msg, skb);
913
914 err = copied;
915 if (flags & MSG_TRUNC)
916 err = ulen;
917
918out_free:
919 lock_sock(sk);
920 skb_free_datagram(sk, skb);
921 release_sock(sk);
922out:
923 return err;
924
925csum_copy_err:
926 lock_sock(sk);
927 if (!skb_kill_datagram(sk, skb, flags))
928 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
929 release_sock(sk);
930
931 if (noblock)
932 return -EAGAIN;
933 goto try_again;
934}
935
936
289int udp_disconnect(struct sock *sk, int flags) 937int udp_disconnect(struct sock *sk, int flags)
290{ 938{
291 struct inet_sock *inet = inet_sk(sk); 939 struct inet_sock *inet = inet_sk(sk);
@@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags)
308 return 0; 956 return 0;
309} 957}
310 958
959/* returns:
960 * -1: error
961 * 0: success
962 * >0: "udp encap" protocol resubmission
963 *
964 * Note that in the success and error cases, the skb is assumed to
965 * have either been requeued or freed.
966 */
967int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
968{
969 struct udp_sock *up = udp_sk(sk);
970 int rc;
971 int is_udplite = IS_UDPLITE(sk);
972
973 /*
974 * Charge it to the socket, dropping if the queue is full.
975 */
976 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
977 goto drop;
978 nf_reset(skb);
979
980 if (up->encap_type) {
981 /*
982 * This is an encapsulation socket so pass the skb to
983 * the socket's udp_encap_rcv() hook. Otherwise, just
984 * fall through and pass this up the UDP socket.
985 * up->encap_rcv() returns the following value:
986 * =0 if skb was successfully passed to the encap
987 * handler or was discarded by it.
988 * >0 if skb should be passed on to UDP.
989 * <0 if skb should be resubmitted as proto -N
990 */
991
992 /* if we're overly short, let UDP handle it */
993 if (skb->len > sizeof(struct udphdr) &&
994 up->encap_rcv != NULL) {
995 int ret;
996
997 ret = (*up->encap_rcv)(sk, skb);
998 if (ret <= 0) {
999 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
1000 is_udplite);
1001 return -ret;
1002 }
1003 }
1004
1005 /* FALLTHROUGH -- it's a UDP Packet */
1006 }
1007
1008 /*
1009 * UDP-Lite specific tests, ignored on UDP sockets
1010 */
1011 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1012
1013 /*
1014 * MIB statistics other than incrementing the error count are
1015 * disabled for the following two types of errors: these depend
1016 * on the application settings, not on the functioning of the
1017 * protocol stack as such.
1018 *
1019 * RFC 3828 here recommends (sec 3.3): "There should also be a
1020 * way ... to ... at least let the receiving application block
1021 * delivery of packets with coverage values less than a value
1022 * provided by the application."
1023 */
1024 if (up->pcrlen == 0) { /* full coverage was set */
1025 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1026 "%d while full coverage %d requested\n",
1027 UDP_SKB_CB(skb)->cscov, skb->len);
1028 goto drop;
1029 }
1030 /* The next case involves violating the min. coverage requested
1031 * by the receiver. This is subtle: if receiver wants x and x is
1032 * greater than the buffersize/MTU then receiver will complain
1033 * that it wants x while sender emits packets of smaller size y.
1034 * Therefore the above ...()->partial_cov statement is essential.
1035 */
1036 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1037 LIMIT_NETDEBUG(KERN_WARNING
1038 "UDPLITE: coverage %d too small, need min %d\n",
1039 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1040 goto drop;
1041 }
1042 }
1043
1044 if (sk->sk_filter) {
1045 if (udp_lib_checksum_complete(skb))
1046 goto drop;
1047 }
1048
1049 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1050 /* Note that an ENOMEM error is charged twice */
1051 if (rc == -ENOMEM)
1052 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
1053 goto drop;
1054 }
1055
1056 return 0;
1057
1058drop:
1059 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1060 kfree_skb(skb);
1061 return -1;
1062}
1063
1064/*
1065 * Multicasts and broadcasts go to each listener.
1066 *
1067 * Note: called only from the BH handler context,
1068 * so we don't need to lock the hashes.
1069 */
1070static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1071 struct udphdr *uh,
1072 __be32 saddr, __be32 daddr,
1073 struct hlist_head udptable[])
1074{
1075 struct sock *sk;
1076 int dif;
1077
1078 read_lock(&udp_hash_lock);
1079 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1080 dif = skb->dev->ifindex;
1081 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1082 if (sk) {
1083 struct sock *sknext = NULL;
1084
1085 do {
1086 struct sk_buff *skb1 = skb;
1087
1088 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
1089 uh->source, saddr, dif);
1090 if (sknext)
1091 skb1 = skb_clone(skb, GFP_ATOMIC);
1092
1093 if (skb1) {
1094 int ret = 0;
1095
1096 bh_lock_sock_nested(sk);
1097 if (!sock_owned_by_user(sk))
1098 ret = udp_queue_rcv_skb(sk, skb1);
1099 else
1100 sk_add_backlog(sk, skb1);
1101 bh_unlock_sock(sk);
1102
1103 if (ret > 0)
1104 /* we should probably re-process instead
1105 * of dropping packets here. */
1106 kfree_skb(skb1);
1107 }
1108 sk = sknext;
1109 } while (sknext);
1110 } else
1111 kfree_skb(skb);
1112 read_unlock(&udp_hash_lock);
1113 return 0;
1114}
1115
1116/* Initialize UDP checksum. If exited with zero value (success),
1117 * CHECKSUM_UNNECESSARY means, that no more checks are required.
1118 * Otherwise, csum completion requires chacksumming packet body,
1119 * including udp header and folding it to skb->csum.
1120 */
1121static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1122 int proto)
1123{
1124 const struct iphdr *iph;
1125 int err;
1126
1127 UDP_SKB_CB(skb)->partial_cov = 0;
1128 UDP_SKB_CB(skb)->cscov = skb->len;
1129
1130 if (proto == IPPROTO_UDPLITE) {
1131 err = udplite_checksum_init(skb, uh);
1132 if (err)
1133 return err;
1134 }
1135
1136 iph = ip_hdr(skb);
1137 if (uh->check == 0) {
1138 skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1140 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1141 proto, skb->csum))
1142 skb->ip_summed = CHECKSUM_UNNECESSARY;
1143 }
1144 if (!skb_csum_unnecessary(skb))
1145 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1146 skb->len, proto, 0);
1147 /* Probably, we should checksum udp header (it should be in cache
1148 * in any case) and data in tiny packets (< rx copybreak).
1149 */
1150
1151 return 0;
1152}
1153
1154/*
1155 * All we need to do is get the socket, and then do a checksum.
1156 */
1157
1158int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1159 int proto)
1160{
1161 struct sock *sk;
1162 struct udphdr *uh = udp_hdr(skb);
1163 unsigned short ulen;
1164 struct rtable *rt = (struct rtable*)skb->dst;
1165 __be32 saddr = ip_hdr(skb)->saddr;
1166 __be32 daddr = ip_hdr(skb)->daddr;
1167
1168 /*
1169 * Validate the packet.
1170 */
1171 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1172 goto drop; /* No space for header. */
1173
1174 ulen = ntohs(uh->len);
1175 if (ulen > skb->len)
1176 goto short_packet;
1177
1178 if (proto == IPPROTO_UDP) {
1179 /* UDP validates ulen. */
1180 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1181 goto short_packet;
1182 uh = udp_hdr(skb);
1183 }
1184
1185 if (udp4_csum_init(skb, uh, proto))
1186 goto csum_error;
1187
1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1190
1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
1192 uh->dest, inet_iif(skb), udptable);
1193
1194 if (sk != NULL) {
1195 int ret = 0;
1196 bh_lock_sock_nested(sk);
1197 if (!sock_owned_by_user(sk))
1198 ret = udp_queue_rcv_skb(sk, skb);
1199 else
1200 sk_add_backlog(sk, skb);
1201 bh_unlock_sock(sk);
1202 sock_put(sk);
1203
1204 /* a return value > 0 means to resubmit the input, but
1205 * it wants the return to be -protocol, or 0
1206 */
1207 if (ret > 0)
1208 return -ret;
1209 return 0;
1210 }
1211
1212 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1213 goto drop;
1214 nf_reset(skb);
1215
1216 /* No socket. Drop packet silently, if checksum is wrong */
1217 if (udp_lib_checksum_complete(skb))
1218 goto csum_error;
1219
1220 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1221 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1222
1223 /*
1224 * Hmm. We got an UDP packet to a port to which we
1225 * don't wanna listen. Ignore it.
1226 */
1227 kfree_skb(skb);
1228 return 0;
1229
1230short_packet:
1231 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1232 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1233 NIPQUAD(saddr),
1234 ntohs(uh->source),
1235 ulen,
1236 skb->len,
1237 NIPQUAD(daddr),
1238 ntohs(uh->dest));
1239 goto drop;
1240
1241csum_error:
1242 /*
1243 * RFC1122: OK. Discards the bad packet silently (as far as
1244 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1245 */
1246 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1247 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1248 NIPQUAD(saddr),
1249 ntohs(uh->source),
1250 NIPQUAD(daddr),
1251 ntohs(uh->dest),
1252 ulen);
1253drop:
1254 UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1255 kfree_skb(skb);
1256 return 0;
1257}
1258
1259int udp_rcv(struct sk_buff *skb)
1260{
1261 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1262}
1263
1264int udp_destroy_sock(struct sock *sk)
1265{
1266 lock_sock(sk);
1267 udp_flush_pending_frames(sk);
1268 release_sock(sk);
1269 return 0;
1270}
1271
311/* 1272/*
312 * Socket option code for UDP 1273 * Socket option code for UDP
313 */ 1274 */
@@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
318 struct udp_sock *up = udp_sk(sk); 1279 struct udp_sock *up = udp_sk(sk);
319 int val; 1280 int val;
320 int err = 0; 1281 int err = 0;
321#ifdef CONFIG_IP_UDPLITE
322 int is_udplite = IS_UDPLITE(sk); 1282 int is_udplite = IS_UDPLITE(sk);
323#endif
324 1283
325 if (optlen<sizeof(int)) 1284 if (optlen<sizeof(int))
326 return -EINVAL; 1285 return -EINVAL;
@@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
356 } 1315 }
357 break; 1316 break;
358 1317
359#ifdef CONFIG_IP_UDPLITE
360 /* 1318 /*
361 * UDP-Lite's partial checksum coverage (RFC 3828). 1319 * UDP-Lite's partial checksum coverage (RFC 3828).
362 */ 1320 */
@@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
382 up->pcrlen = val; 1340 up->pcrlen = val;
383 up->pcflag |= UDPLITE_RECV_CC; 1341 up->pcflag |= UDPLITE_RECV_CC;
384 break; 1342 break;
385#endif
386 1343
387 default: 1344 default:
388 err = -ENOPROTOOPT; 1345 err = -ENOPROTOOPT;
@@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
392 return err; 1349 return err;
393} 1350}
394 1351
1352int udp_setsockopt(struct sock *sk, int level, int optname,
1353 char __user *optval, int optlen)
1354{
1355 if (level == SOL_UDP || level == SOL_UDPLITE)
1356 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1357 udp_push_pending_frames);
1358 return ip_setsockopt(sk, level, optname, optval, optlen);
1359}
1360
1361#ifdef CONFIG_COMPAT
1362int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1363 char __user *optval, int optlen)
1364{
1365 if (level == SOL_UDP || level == SOL_UDPLITE)
1366 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1367 udp_push_pending_frames);
1368 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1369}
1370#endif
1371
395int udp_lib_getsockopt(struct sock *sk, int level, int optname, 1372int udp_lib_getsockopt(struct sock *sk, int level, int optname,
396 char __user *optval, int __user *optlen) 1373 char __user *optval, int __user *optlen)
397{ 1374{
@@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
436 return 0; 1413 return 0;
437} 1414}
438 1415
1416int udp_getsockopt(struct sock *sk, int level, int optname,
1417 char __user *optval, int __user *optlen)
1418{
1419 if (level == SOL_UDP || level == SOL_UDPLITE)
1420 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1421 return ip_getsockopt(sk, level, optname, optval, optlen);
1422}
1423
1424#ifdef CONFIG_COMPAT
1425int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1426 char __user *optval, int __user *optlen)
1427{
1428 if (level == SOL_UDP || level == SOL_UDPLITE)
1429 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1430 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1431}
1432#endif
439/** 1433/**
440 * udp_poll - wait for a UDP event. 1434 * udp_poll - wait for a UDP event.
441 * @file - file struct 1435 * @file - file struct
@@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
480 1474
481} 1475}
482 1476
1477DEFINE_PROTO_INUSE(udp)
1478
1479struct proto udp_prot = {
1480 .name = "UDP",
1481 .owner = THIS_MODULE,
1482 .close = udp_lib_close,
1483 .connect = ip4_datagram_connect,
1484 .disconnect = udp_disconnect,
1485 .ioctl = udp_ioctl,
1486 .destroy = udp_destroy_sock,
1487 .setsockopt = udp_setsockopt,
1488 .getsockopt = udp_getsockopt,
1489 .sendmsg = udp_sendmsg,
1490 .recvmsg = udp_recvmsg,
1491 .sendpage = udp_sendpage,
1492 .backlog_rcv = udp_queue_rcv_skb,
1493 .hash = udp_lib_hash,
1494 .unhash = udp_lib_unhash,
1495 .get_port = udp_v4_get_port,
1496 .memory_allocated = &udp_memory_allocated,
1497 .sysctl_mem = sysctl_udp_mem,
1498 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min,
1500 .obj_size = sizeof(struct udp_sock),
1501#ifdef CONFIG_COMPAT
1502 .compat_setsockopt = compat_udp_setsockopt,
1503 .compat_getsockopt = compat_udp_getsockopt,
1504#endif
1505 REF_PROTO_INUSE(udp)
1506};
483 1507
484/* ------------------------------------------------------------------------ */ 1508/* ------------------------------------------------------------------------ */
485#ifdef CONFIG_PROC_FS 1509#ifdef CONFIG_PROC_FS
@@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
612 proc_net_remove(&init_net, afinfo->name); 1636 proc_net_remove(&init_net, afinfo->name);
613 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 1637 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
614} 1638}
1639
1640/* ------------------------------------------------------------------------ */
1641static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1642{
1643 struct inet_sock *inet = inet_sk(sp);
1644 __be32 dest = inet->daddr;
1645 __be32 src = inet->rcv_saddr;
1646 __u16 destp = ntohs(inet->dport);
1647 __u16 srcp = ntohs(inet->sport);
1648
1649 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1650 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1651 bucket, src, srcp, dest, destp, sp->sk_state,
1652 atomic_read(&sp->sk_wmem_alloc),
1653 atomic_read(&sp->sk_rmem_alloc),
1654 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1655 atomic_read(&sp->sk_refcnt), sp);
1656}
1657
1658int udp4_seq_show(struct seq_file *seq, void *v)
1659{
1660 if (v == SEQ_START_TOKEN)
1661 seq_printf(seq, "%-127s\n",
1662 " sl local_address rem_address st tx_queue "
1663 "rx_queue tr tm->when retrnsmt uid timeout "
1664 "inode");
1665 else {
1666 char tmpbuf[129];
1667 struct udp_iter_state *state = seq->private;
1668
1669 udp4_format_sock(v, tmpbuf, state->bucket);
1670 seq_printf(seq, "%-127s\n", tmpbuf);
1671 }
1672 return 0;
1673}
1674
1675/* ------------------------------------------------------------------------ */
1676static struct file_operations udp4_seq_fops;
1677static struct udp_seq_afinfo udp4_seq_afinfo = {
1678 .owner = THIS_MODULE,
1679 .name = "udp",
1680 .family = AF_INET,
1681 .hashtable = udp_hash,
1682 .seq_show = udp4_seq_show,
1683 .seq_fops = &udp4_seq_fops,
1684};
1685
1686int __init udp4_proc_init(void)
1687{
1688 return udp_proc_register(&udp4_seq_afinfo);
1689}
1690
1691void udp4_proc_exit(void)
1692{
1693 udp_proc_unregister(&udp4_seq_afinfo);
1694}
615#endif /* CONFIG_PROC_FS */ 1695#endif /* CONFIG_PROC_FS */
616 1696
617void __init udp_init(void) 1697void __init udp_init(void)
@@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash);
638EXPORT_SYMBOL(udp_hash_lock); 1718EXPORT_SYMBOL(udp_hash_lock);
639EXPORT_SYMBOL(udp_ioctl); 1719EXPORT_SYMBOL(udp_ioctl);
640EXPORT_SYMBOL(udp_get_port); 1720EXPORT_SYMBOL(udp_get_port);
1721EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg);
641EXPORT_SYMBOL(udp_lib_getsockopt); 1723EXPORT_SYMBOL(udp_lib_getsockopt);
642EXPORT_SYMBOL(udp_lib_setsockopt); 1724EXPORT_SYMBOL(udp_lib_setsockopt);
643EXPORT_SYMBOL(udp_poll); 1725EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c
deleted file mode 100644
index fd14c2c50ed4..000000000000
--- a/net/ipv4/udp_ipv4.c
+++ /dev/null
@@ -1,1134 +0,0 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * UDP for IPv4.
7 *
8 * For full credits, see net/ipv4/udp.c.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <asm/system.h>
17#include <asm/uaccess.h>
18#include <asm/ioctls.h>
19#include <linux/bootmem.h>
20#include <linux/types.h>
21#include <linux/fcntl.h>
22#include <linux/module.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/igmp.h>
26#include <linux/in.h>
27#include <linux/errno.h>
28#include <linux/timer.h>
29#include <linux/mm.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <net/tcp_states.h>
33#include <linux/skbuff.h>
34#include <linux/proc_fs.h>
35#include <linux/seq_file.h>
36#include <net/net_namespace.h>
37#include <net/icmp.h>
38#include <net/route.h>
39#include <net/checksum.h>
40#include <net/xfrm.h>
41#include "udp_impl.h"
42
43int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
44{
45 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
46
47 return ( !ipv6_only_sock(sk2) &&
48 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
49 inet1->rcv_saddr == inet2->rcv_saddr ));
50}
51
52static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
53{
54 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
55}
56
57/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
58 * harder than this. -DaveM
59 */
60static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
61 __be16 sport, __be32 daddr, __be16 dport,
62 int dif, struct hlist_head udptable[])
63{
64 struct sock *sk, *result = NULL;
65 struct hlist_node *node;
66 unsigned short hnum = ntohs(dport);
67 int badness = -1;
68
69 read_lock(&udp_hash_lock);
70 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
71 struct inet_sock *inet = inet_sk(sk);
72
73 if (sk->sk_net == net && sk->sk_hash == hnum &&
74 !ipv6_only_sock(sk)) {
75 int score = (sk->sk_family == PF_INET ? 1 : 0);
76 if (inet->rcv_saddr) {
77 if (inet->rcv_saddr != daddr)
78 continue;
79 score+=2;
80 }
81 if (inet->daddr) {
82 if (inet->daddr != saddr)
83 continue;
84 score+=2;
85 }
86 if (inet->dport) {
87 if (inet->dport != sport)
88 continue;
89 score+=2;
90 }
91 if (sk->sk_bound_dev_if) {
92 if (sk->sk_bound_dev_if != dif)
93 continue;
94 score+=2;
95 }
96 if (score == 9) {
97 result = sk;
98 break;
99 } else if (score > badness) {
100 result = sk;
101 badness = score;
102 }
103 }
104 }
105 if (result)
106 sock_hold(result);
107 read_unlock(&udp_hash_lock);
108 return result;
109}
110
111static inline struct sock *udp_v4_mcast_next(struct sock *sk,
112 __be16 loc_port, __be32 loc_addr,
113 __be16 rmt_port, __be32 rmt_addr,
114 int dif)
115{
116 struct hlist_node *node;
117 struct sock *s = sk;
118 unsigned short hnum = ntohs(loc_port);
119
120 sk_for_each_from(s, node) {
121 struct inet_sock *inet = inet_sk(s);
122
123 if (s->sk_hash != hnum ||
124 (inet->daddr && inet->daddr != rmt_addr) ||
125 (inet->dport != rmt_port && inet->dport) ||
126 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
127 ipv6_only_sock(s) ||
128 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
129 continue;
130 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
131 continue;
132 goto found;
133 }
134 s = NULL;
135found:
136 return s;
137}
138
139/*
140 * This routine is called by the ICMP module when it gets some
141 * sort of error condition. If err < 0 then the socket should
142 * be closed and the error returned to the user. If err > 0
143 * it's just the icmp type << 8 | icmp code.
144 * Header points to the ip header of the error packet. We move
145 * on past this. Then (as it used to claim before adjustment)
146 * header points to the first 8 bytes of the udp header. We need
147 * to find the appropriate port.
148 */
149
150void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
151{
152 struct inet_sock *inet;
153 struct iphdr *iph = (struct iphdr*)skb->data;
154 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
155 const int type = icmp_hdr(skb)->type;
156 const int code = icmp_hdr(skb)->code;
157 struct sock *sk;
158 int harderr;
159 int err;
160
161 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
162 iph->saddr, uh->source, skb->dev->ifindex, udptable);
163 if (sk == NULL) {
164 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
165 return; /* No socket for error */
166 }
167
168 err = 0;
169 harderr = 0;
170 inet = inet_sk(sk);
171
172 switch (type) {
173 default:
174 case ICMP_TIME_EXCEEDED:
175 err = EHOSTUNREACH;
176 break;
177 case ICMP_SOURCE_QUENCH:
178 goto out;
179 case ICMP_PARAMETERPROB:
180 err = EPROTO;
181 harderr = 1;
182 break;
183 case ICMP_DEST_UNREACH:
184 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
185 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
186 err = EMSGSIZE;
187 harderr = 1;
188 break;
189 }
190 goto out;
191 }
192 err = EHOSTUNREACH;
193 if (code <= NR_ICMP_UNREACH) {
194 harderr = icmp_err_convert[code].fatal;
195 err = icmp_err_convert[code].errno;
196 }
197 break;
198 }
199
200 /*
201 * RFC1122: OK. Passes ICMP errors back to application, as per
202 * 4.1.3.3.
203 */
204 if (!inet->recverr) {
205 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
206 goto out;
207 } else {
208 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
209 }
210 sk->sk_err = err;
211 sk->sk_error_report(sk);
212out:
213 sock_put(sk);
214}
215
216void udp_err(struct sk_buff *skb, u32 info)
217{
218 __udp4_lib_err(skb, info, udp_hash);
219}
220
221/*
222 * Throw away all pending data and cancel the corking. Socket is locked.
223 */
224static void udp_flush_pending_frames(struct sock *sk)
225{
226 struct udp_sock *up = udp_sk(sk);
227
228 if (up->pending) {
229 up->len = 0;
230 up->pending = 0;
231 ip_flush_pending_frames(sk);
232 }
233}
234
235/**
236 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
237 * @sk: socket we are sending on
238 * @skb: sk_buff containing the filled-in UDP header
239 * (checksum field must be zeroed out)
240 */
241static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
242 __be32 src, __be32 dst, int len )
243{
244 unsigned int offset;
245 struct udphdr *uh = udp_hdr(skb);
246 __wsum csum = 0;
247
248 if (skb_queue_len(&sk->sk_write_queue) == 1) {
249 /*
250 * Only one fragment on the socket.
251 */
252 skb->csum_start = skb_transport_header(skb) - skb->head;
253 skb->csum_offset = offsetof(struct udphdr, check);
254 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
255 } else {
256 /*
257 * HW-checksum won't work as there are two or more
258 * fragments on the socket so that all csums of sk_buffs
259 * should be together
260 */
261 offset = skb_transport_offset(skb);
262 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
263
264 skb->ip_summed = CHECKSUM_NONE;
265
266 skb_queue_walk(&sk->sk_write_queue, skb) {
267 csum = csum_add(csum, skb->csum);
268 }
269
270 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
271 if (uh->check == 0)
272 uh->check = CSUM_MANGLED_0;
273 }
274}
275
276/*
277 * Push out all pending data as one UDP datagram. Socket is locked.
278 */
279static int udp_push_pending_frames(struct sock *sk)
280{
281 struct udp_sock *up = udp_sk(sk);
282 struct inet_sock *inet = inet_sk(sk);
283 struct flowi *fl = &inet->cork.fl;
284 struct sk_buff *skb;
285 struct udphdr *uh;
286 int err = 0;
287 int is_udplite = IS_UDPLITE(sk);
288 __wsum csum = 0;
289
290 /* Grab the skbuff where UDP header space exists. */
291 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
292 goto out;
293
294 /*
295 * Create a UDP header
296 */
297 uh = udp_hdr(skb);
298 uh->source = fl->fl_ip_sport;
299 uh->dest = fl->fl_ip_dport;
300 uh->len = htons(up->len);
301 uh->check = 0;
302
303 if (is_udplite) /* UDP-Lite */
304 csum = udplite_csum_outgoing(sk, skb);
305
306 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
307
308 skb->ip_summed = CHECKSUM_NONE;
309 goto send;
310
311 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
312
313 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
314 goto send;
315
316 } else /* `normal' UDP */
317 csum = udp_csum_outgoing(sk, skb);
318
319 /* add protocol-dependent pseudo-header */
320 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
321 sk->sk_protocol, csum );
322 if (uh->check == 0)
323 uh->check = CSUM_MANGLED_0;
324
325send:
326 err = ip_push_pending_frames(sk);
327out:
328 up->len = 0;
329 up->pending = 0;
330 if (!err)
331 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
332 return err;
333}
334
335int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
336 size_t len)
337{
338 struct inet_sock *inet = inet_sk(sk);
339 struct udp_sock *up = udp_sk(sk);
340 int ulen = len;
341 struct ipcm_cookie ipc;
342 struct rtable *rt = NULL;
343 int free = 0;
344 int connected = 0;
345 __be32 daddr, faddr, saddr;
346 __be16 dport;
347 u8 tos;
348 int err, is_udplite = IS_UDPLITE(sk);
349 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
350 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
351
352 if (len > 0xFFFF)
353 return -EMSGSIZE;
354
355 /*
356 * Check the flags.
357 */
358
359 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
360 return -EOPNOTSUPP;
361
362 ipc.opt = NULL;
363
364 if (up->pending) {
365 /*
366 * There are pending frames.
367 * The socket lock must be held while it's corked.
368 */
369 lock_sock(sk);
370 if (likely(up->pending)) {
371 if (unlikely(up->pending != AF_INET)) {
372 release_sock(sk);
373 return -EINVAL;
374 }
375 goto do_append_data;
376 }
377 release_sock(sk);
378 }
379 ulen += sizeof(struct udphdr);
380
381 /*
382 * Get and verify the address.
383 */
384 if (msg->msg_name) {
385 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
386 if (msg->msg_namelen < sizeof(*usin))
387 return -EINVAL;
388 if (usin->sin_family != AF_INET) {
389 if (usin->sin_family != AF_UNSPEC)
390 return -EAFNOSUPPORT;
391 }
392
393 daddr = usin->sin_addr.s_addr;
394 dport = usin->sin_port;
395 if (dport == 0)
396 return -EINVAL;
397 } else {
398 if (sk->sk_state != TCP_ESTABLISHED)
399 return -EDESTADDRREQ;
400 daddr = inet->daddr;
401 dport = inet->dport;
402 /* Open fast path for connected socket.
403 Route will not be used, if at least one option is set.
404 */
405 connected = 1;
406 }
407 ipc.addr = inet->saddr;
408
409 ipc.oif = sk->sk_bound_dev_if;
410 if (msg->msg_controllen) {
411 err = ip_cmsg_send(msg, &ipc);
412 if (err)
413 return err;
414 if (ipc.opt)
415 free = 1;
416 connected = 0;
417 }
418 if (!ipc.opt)
419 ipc.opt = inet->opt;
420
421 saddr = ipc.addr;
422 ipc.addr = faddr = daddr;
423
424 if (ipc.opt && ipc.opt->srr) {
425 if (!daddr)
426 return -EINVAL;
427 faddr = ipc.opt->faddr;
428 connected = 0;
429 }
430 tos = RT_TOS(inet->tos);
431 if (sock_flag(sk, SOCK_LOCALROUTE) ||
432 (msg->msg_flags & MSG_DONTROUTE) ||
433 (ipc.opt && ipc.opt->is_strictroute)) {
434 tos |= RTO_ONLINK;
435 connected = 0;
436 }
437
438 if (ipv4_is_multicast(daddr)) {
439 if (!ipc.oif)
440 ipc.oif = inet->mc_index;
441 if (!saddr)
442 saddr = inet->mc_addr;
443 connected = 0;
444 }
445
446 if (connected)
447 rt = (struct rtable*)sk_dst_check(sk, 0);
448
449 if (rt == NULL) {
450 struct flowi fl = { .oif = ipc.oif,
451 .nl_u = { .ip4_u =
452 { .daddr = faddr,
453 .saddr = saddr,
454 .tos = tos } },
455 .proto = sk->sk_protocol,
456 .uli_u = { .ports =
457 { .sport = inet->sport,
458 .dport = dport } } };
459 security_sk_classify_flow(sk, &fl);
460 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
461 if (err) {
462 if (err == -ENETUNREACH)
463 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
464 goto out;
465 }
466
467 err = -EACCES;
468 if ((rt->rt_flags & RTCF_BROADCAST) &&
469 !sock_flag(sk, SOCK_BROADCAST))
470 goto out;
471 if (connected)
472 sk_dst_set(sk, dst_clone(&rt->u.dst));
473 }
474
475 if (msg->msg_flags&MSG_CONFIRM)
476 goto do_confirm;
477back_from_confirm:
478
479 saddr = rt->rt_src;
480 if (!ipc.addr)
481 daddr = ipc.addr = rt->rt_dst;
482
483 lock_sock(sk);
484 if (unlikely(up->pending)) {
485 /* The socket is already corked while preparing it. */
486 /* ... which is an evident application bug. --ANK */
487 release_sock(sk);
488
489 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
490 err = -EINVAL;
491 goto out;
492 }
493 /*
494 * Now cork the socket to pend data.
495 */
496 inet->cork.fl.fl4_dst = daddr;
497 inet->cork.fl.fl_ip_dport = dport;
498 inet->cork.fl.fl4_src = saddr;
499 inet->cork.fl.fl_ip_sport = inet->sport;
500 up->pending = AF_INET;
501
502do_append_data:
503 up->len += ulen;
504 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
505 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
506 sizeof(struct udphdr), &ipc, rt,
507 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
508 if (err)
509 udp_flush_pending_frames(sk);
510 else if (!corkreq)
511 err = udp_push_pending_frames(sk);
512 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
513 up->pending = 0;
514 release_sock(sk);
515
516out:
517 ip_rt_put(rt);
518 if (free)
519 kfree(ipc.opt);
520 if (!err)
521 return len;
522 /*
523 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
524 * ENOBUFS might not be good (it's not tunable per se), but otherwise
525 * we don't have a good statistic (IpOutDiscards but it can be too many
526 * things). We could add another new stat but at least for now that
527 * seems like overkill.
528 */
529 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
530 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
531 }
532 return err;
533
534do_confirm:
535 dst_confirm(&rt->u.dst);
536 if (!(msg->msg_flags&MSG_PROBE) || len)
537 goto back_from_confirm;
538 err = 0;
539 goto out;
540}
541
542int udp_sendpage(struct sock *sk, struct page *page, int offset,
543 size_t size, int flags)
544{
545 struct udp_sock *up = udp_sk(sk);
546 int ret;
547
548 if (!up->pending) {
549 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
550
551 /* Call udp_sendmsg to specify destination address which
552 * sendpage interface can't pass.
553 * This will succeed only when the socket is connected.
554 */
555 ret = udp_sendmsg(NULL, sk, &msg, 0);
556 if (ret < 0)
557 return ret;
558 }
559
560 lock_sock(sk);
561
562 if (unlikely(!up->pending)) {
563 release_sock(sk);
564
565 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
566 return -EINVAL;
567 }
568
569 ret = ip_append_page(sk, page, offset, size, flags);
570 if (ret == -EOPNOTSUPP) {
571 release_sock(sk);
572 return sock_no_sendpage(sk->sk_socket, page, offset,
573 size, flags);
574 }
575 if (ret < 0) {
576 udp_flush_pending_frames(sk);
577 goto out;
578 }
579
580 up->len += size;
581 if (!(up->corkflag || (flags&MSG_MORE)))
582 ret = udp_push_pending_frames(sk);
583 if (!ret)
584 ret = size;
585out:
586 release_sock(sk);
587 return ret;
588}
589
590/*
591 * This should be easy, if there is something there we
592 * return it, otherwise we block.
593 */
594
595int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
596 size_t len, int noblock, int flags, int *addr_len)
597{
598 struct inet_sock *inet = inet_sk(sk);
599 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
600 struct sk_buff *skb;
601 unsigned int ulen, copied;
602 int peeked;
603 int err;
604 int is_udplite = IS_UDPLITE(sk);
605
606 /*
607 * Check any passed addresses
608 */
609 if (addr_len)
610 *addr_len=sizeof(*sin);
611
612 if (flags & MSG_ERRQUEUE)
613 return ip_recv_error(sk, msg, len);
614
615try_again:
616 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
617 &peeked, &err);
618 if (!skb)
619 goto out;
620
621 ulen = skb->len - sizeof(struct udphdr);
622 copied = len;
623 if (copied > ulen)
624 copied = ulen;
625 else if (copied < ulen)
626 msg->msg_flags |= MSG_TRUNC;
627
628 /*
629 * If checksum is needed at all, try to do it while copying the
630 * data. If the data is truncated, or if we only want a partial
631 * coverage checksum (UDP-Lite), do it before the copy.
632 */
633
634 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
635 if (udp_lib_checksum_complete(skb))
636 goto csum_copy_err;
637 }
638
639 if (skb_csum_unnecessary(skb))
640 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
641 msg->msg_iov, copied );
642 else {
643 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
644
645 if (err == -EINVAL)
646 goto csum_copy_err;
647 }
648
649 if (err)
650 goto out_free;
651
652 if (!peeked)
653 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
654
655 sock_recv_timestamp(msg, sk, skb);
656
657 /* Copy the address. */
658 if (sin)
659 {
660 sin->sin_family = AF_INET;
661 sin->sin_port = udp_hdr(skb)->source;
662 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
663 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
664 }
665 if (inet->cmsg_flags)
666 ip_cmsg_recv(msg, skb);
667
668 err = copied;
669 if (flags & MSG_TRUNC)
670 err = ulen;
671
672out_free:
673 lock_sock(sk);
674 skb_free_datagram(sk, skb);
675 release_sock(sk);
676out:
677 return err;
678
679csum_copy_err:
680 lock_sock(sk);
681 if (!skb_kill_datagram(sk, skb, flags))
682 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
683 release_sock(sk);
684
685 if (noblock)
686 return -EAGAIN;
687 goto try_again;
688}
689
690
691/* returns:
692 * -1: error
693 * 0: success
694 * >0: "udp encap" protocol resubmission
695 *
696 * Note that in the success and error cases, the skb is assumed to
697 * have either been requeued or freed.
698 */
699int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
700{
701 struct udp_sock *up = udp_sk(sk);
702 int rc;
703 int is_udplite = IS_UDPLITE(sk);
704
705 /*
706 * Charge it to the socket, dropping if the queue is full.
707 */
708 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
709 goto drop;
710 nf_reset(skb);
711
712 if (up->encap_type) {
713 /*
714 * This is an encapsulation socket so pass the skb to
715 * the socket's udp_encap_rcv() hook. Otherwise, just
716 * fall through and pass this up the UDP socket.
717 * up->encap_rcv() returns the following value:
718 * =0 if skb was successfully passed to the encap
719 * handler or was discarded by it.
720 * >0 if skb should be passed on to UDP.
721 * <0 if skb should be resubmitted as proto -N
722 */
723
724 /* if we're overly short, let UDP handle it */
725 if (skb->len > sizeof(struct udphdr) &&
726 up->encap_rcv != NULL) {
727 int ret;
728
729 ret = (*up->encap_rcv)(sk, skb);
730 if (ret <= 0) {
731 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
732 is_udplite);
733 return -ret;
734 }
735 }
736
737 /* FALLTHROUGH -- it's a UDP Packet */
738 }
739
740 /*
741 * UDP-Lite specific tests, ignored on UDP sockets
742 */
743 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
744
745 /*
746 * MIB statistics other than incrementing the error count are
747 * disabled for the following two types of errors: these depend
748 * on the application settings, not on the functioning of the
749 * protocol stack as such.
750 *
751 * RFC 3828 here recommends (sec 3.3): "There should also be a
752 * way ... to ... at least let the receiving application block
753 * delivery of packets with coverage values less than a value
754 * provided by the application."
755 */
756 if (up->pcrlen == 0) { /* full coverage was set */
757 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
758 "%d while full coverage %d requested\n",
759 UDP_SKB_CB(skb)->cscov, skb->len);
760 goto drop;
761 }
762 /* The next case involves violating the min. coverage requested
763 * by the receiver. This is subtle: if receiver wants x and x is
764 * greater than the buffersize/MTU then receiver will complain
765 * that it wants x while sender emits packets of smaller size y.
766 * Therefore the above ...()->partial_cov statement is essential.
767 */
768 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
769 LIMIT_NETDEBUG(KERN_WARNING
770 "UDPLITE: coverage %d too small, need min %d\n",
771 UDP_SKB_CB(skb)->cscov, up->pcrlen);
772 goto drop;
773 }
774 }
775
776 if (sk->sk_filter) {
777 if (udp_lib_checksum_complete(skb))
778 goto drop;
779 }
780
781 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
782 /* Note that an ENOMEM error is charged twice */
783 if (rc == -ENOMEM)
784 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
785 goto drop;
786 }
787
788 return 0;
789
790drop:
791 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
792 kfree_skb(skb);
793 return -1;
794}
795
796/*
797 * Multicasts and broadcasts go to each listener.
798 *
799 * Note: called only from the BH handler context,
800 * so we don't need to lock the hashes.
801 */
802static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
803 struct udphdr *uh,
804 __be32 saddr, __be32 daddr,
805 struct hlist_head udptable[])
806{
807 struct sock *sk;
808 int dif;
809
810 read_lock(&udp_hash_lock);
811 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
812 dif = skb->dev->ifindex;
813 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
814 if (sk) {
815 struct sock *sknext = NULL;
816
817 do {
818 struct sk_buff *skb1 = skb;
819
820 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
821 uh->source, saddr, dif);
822 if (sknext)
823 skb1 = skb_clone(skb, GFP_ATOMIC);
824
825 if (skb1) {
826 int ret = 0;
827
828 bh_lock_sock_nested(sk);
829 if (!sock_owned_by_user(sk))
830 ret = udp_queue_rcv_skb(sk, skb1);
831 else
832 sk_add_backlog(sk, skb1);
833 bh_unlock_sock(sk);
834
835 if (ret > 0)
836 /* we should probably re-process instead
837 * of dropping packets here. */
838 kfree_skb(skb1);
839 }
840 sk = sknext;
841 } while (sknext);
842 } else
843 kfree_skb(skb);
844 read_unlock(&udp_hash_lock);
845 return 0;
846}
847
848/* Initialize UDP checksum. If exited with zero value (success),
849 * CHECKSUM_UNNECESSARY means, that no more checks are required.
850 * Otherwise, csum completion requires chacksumming packet body,
851 * including udp header and folding it to skb->csum.
852 */
853static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
854 int proto)
855{
856 const struct iphdr *iph;
857 int err;
858
859 UDP_SKB_CB(skb)->partial_cov = 0;
860 UDP_SKB_CB(skb)->cscov = skb->len;
861
862 if (IS_PROTO_UDPLITE(proto)) {
863 err = udplite_checksum_init(skb, uh);
864 if (err)
865 return err;
866 }
867
868 iph = ip_hdr(skb);
869 if (uh->check == 0) {
870 skb->ip_summed = CHECKSUM_UNNECESSARY;
871 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
872 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
873 proto, skb->csum))
874 skb->ip_summed = CHECKSUM_UNNECESSARY;
875 }
876 if (!skb_csum_unnecessary(skb))
877 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
878 skb->len, proto, 0);
879 /* Probably, we should checksum udp header (it should be in cache
880 * in any case) and data in tiny packets (< rx copybreak).
881 */
882
883 return 0;
884}
885
886/*
887 * All we need to do is get the socket, and then do a checksum.
888 */
889
890int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
891 int proto)
892{
893 struct sock *sk;
894 struct udphdr *uh = udp_hdr(skb);
895 unsigned short ulen;
896 struct rtable *rt = skb->rtable;
897 __be32 saddr = ip_hdr(skb)->saddr;
898 __be32 daddr = ip_hdr(skb)->daddr;
899
900 /*
901 * Validate the packet.
902 */
903 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
904 goto drop; /* No space for header. */
905
906 ulen = ntohs(uh->len);
907 if (ulen > skb->len)
908 goto short_packet;
909
910 if (IS_PROTO_UDPLITE(proto)) {
911 /* UDP validates ulen. */
912 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
913 goto short_packet;
914 uh = udp_hdr(skb);
915 }
916
917 if (udp4_csum_init(skb, uh, proto))
918 goto csum_error;
919
920 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
921 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
922
923 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
924 uh->dest, inet_iif(skb), udptable);
925
926 if (sk != NULL) {
927 int ret = 0;
928 bh_lock_sock_nested(sk);
929 if (!sock_owned_by_user(sk))
930 ret = udp_queue_rcv_skb(sk, skb);
931 else
932 sk_add_backlog(sk, skb);
933 bh_unlock_sock(sk);
934 sock_put(sk);
935
936 /* a return value > 0 means to resubmit the input, but
937 * it wants the return to be -protocol, or 0
938 */
939 if (ret > 0)
940 return -ret;
941 return 0;
942 }
943
944 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
945 goto drop;
946 nf_reset(skb);
947
948 /* No socket. Drop packet silently, if checksum is wrong */
949 if (udp_lib_checksum_complete(skb))
950 goto csum_error;
951
952 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
953 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
954
955 /*
956 * Hmm. We got an UDP packet to a port to which we
957 * don't wanna listen. Ignore it.
958 */
959 kfree_skb(skb);
960 return 0;
961
962short_packet:
963 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
964 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
965 NIPQUAD(saddr),
966 ntohs(uh->source),
967 ulen,
968 skb->len,
969 NIPQUAD(daddr),
970 ntohs(uh->dest));
971 goto drop;
972
973csum_error:
974 /*
975 * RFC1122: OK. Discards the bad packet silently (as far as
976 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
977 */
978 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
979 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
980 NIPQUAD(saddr),
981 ntohs(uh->source),
982 NIPQUAD(daddr),
983 ntohs(uh->dest),
984 ulen);
985drop:
986 UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
987 kfree_skb(skb);
988 return 0;
989}
990
991int udp_rcv(struct sk_buff *skb)
992{
993 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
994}
995
996int udp_destroy_sock(struct sock *sk)
997{
998 lock_sock(sk);
999 udp_flush_pending_frames(sk);
1000 release_sock(sk);
1001 return 0;
1002}
1003
1004int udp_setsockopt(struct sock *sk, int level, int optname,
1005 char __user *optval, int optlen)
1006{
1007 if (IS_SOL_UDPFAMILY(level))
1008 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1009 udp_push_pending_frames);
1010 return ip_setsockopt(sk, level, optname, optval, optlen);
1011}
1012
1013#ifdef CONFIG_COMPAT
1014int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1015 char __user *optval, int optlen)
1016{
1017 if (IS_SOL_UDPFAMILY(level))
1018 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1019 udp_push_pending_frames);
1020 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1021}
1022#endif
1023
1024int udp_getsockopt(struct sock *sk, int level, int optname,
1025 char __user *optval, int __user *optlen)
1026{
1027 if (IS_SOL_UDPFAMILY(level))
1028 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1029 return ip_getsockopt(sk, level, optname, optval, optlen);
1030}
1031
1032#ifdef CONFIG_COMPAT
1033int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1034 char __user *optval, int __user *optlen)
1035{
1036 if (IS_SOL_UDPFAMILY(level))
1037 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1038 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1039}
1040#endif
1041
1042/* ------------------------------------------------------------------------ */
1043DEFINE_PROTO_INUSE(udp)
1044
1045struct proto udp_prot = {
1046 .name = "UDP",
1047 .owner = THIS_MODULE,
1048 .close = udp_lib_close,
1049 .connect = ip4_datagram_connect,
1050 .disconnect = udp_disconnect,
1051 .ioctl = udp_ioctl,
1052 .destroy = udp_destroy_sock,
1053 .setsockopt = udp_setsockopt,
1054 .getsockopt = udp_getsockopt,
1055 .sendmsg = udp_sendmsg,
1056 .recvmsg = udp_recvmsg,
1057 .sendpage = udp_sendpage,
1058 .backlog_rcv = udp_queue_rcv_skb,
1059 .hash = udp_lib_hash,
1060 .unhash = udp_lib_unhash,
1061 .get_port = udp_v4_get_port,
1062 .memory_allocated = &udp_memory_allocated,
1063 .sysctl_mem = sysctl_udp_mem,
1064 .sysctl_wmem = &sysctl_udp_wmem_min,
1065 .sysctl_rmem = &sysctl_udp_rmem_min,
1066 .obj_size = sizeof(struct udp_sock),
1067#ifdef CONFIG_COMPAT
1068 .compat_setsockopt = compat_udp_setsockopt,
1069 .compat_getsockopt = compat_udp_getsockopt,
1070#endif
1071 REF_PROTO_INUSE(udp)
1072};
1073
1074/* ------------------------------------------------------------------------ */
1075static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1076{
1077 struct inet_sock *inet = inet_sk(sp);
1078 __be32 dest = inet->daddr;
1079 __be32 src = inet->rcv_saddr;
1080 __u16 destp = ntohs(inet->dport);
1081 __u16 srcp = ntohs(inet->sport);
1082
1083 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1084 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1085 bucket, src, srcp, dest, destp, sp->sk_state,
1086 atomic_read(&sp->sk_wmem_alloc),
1087 atomic_read(&sp->sk_rmem_alloc),
1088 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1089 atomic_read(&sp->sk_refcnt), sp);
1090}
1091
1092int udp4_seq_show(struct seq_file *seq, void *v)
1093{
1094 if (v == SEQ_START_TOKEN)
1095 seq_printf(seq, "%-127s\n",
1096 " sl local_address rem_address st tx_queue "
1097 "rx_queue tr tm->when retrnsmt uid timeout "
1098 "inode");
1099 else {
1100 char tmpbuf[129];
1101 struct udp_iter_state *state = seq->private;
1102
1103 udp4_format_sock(v, tmpbuf, state->bucket);
1104 seq_printf(seq, "%-127s\n", tmpbuf);
1105 }
1106 return 0;
1107}
1108
1109/* ------------------------------------------------------------------------ */
1110#ifdef CONFIG_PROC_FS
1111static struct file_operations udp4_seq_fops;
1112static struct udp_seq_afinfo udp4_seq_afinfo = {
1113 .owner = THIS_MODULE,
1114 .name = "udp",
1115 .family = AF_INET,
1116 .hashtable = udp_hash,
1117 .seq_show = udp4_seq_show,
1118 .seq_fops = &udp4_seq_fops,
1119};
1120
1121int __init udp4_proc_init(void)
1122{
1123 return udp_proc_register(&udp4_seq_afinfo);
1124}
1125
1126void udp4_proc_exit(void)
1127{
1128 udp_proc_unregister(&udp4_seq_afinfo);
1129}
1130#endif /* CONFIG_PROC_FS */
1131
1132EXPORT_SYMBOL(udp_prot);
1133EXPORT_SYMBOL(udp_sendmsg);
1134
diff --git a/net/ipv4/udplite_ipv4.c b/net/ipv4/udplite.c
index d49c6d68c8a9..d49c6d68c8a9 100644
--- a/net/ipv4/udplite_ipv4.c
+++ b/net/ipv4/udplite.c