aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp_ipv4.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
committerDavid S. Miller <davem@davemloft.net>2008-03-06 19:22:02 -0500
commitdb8dac20d5199307dcfcf4e01dac4bda5edf9e89 (patch)
tree3694d1aee5c0014fb45eec045a67ca150ca1231f /net/ipv4/udp_ipv4.c
parentba0fa4599484b98dbb21d279fbfdb40e9c07d30d (diff)
[UDP]: Revert udplite and code split.
This reverts commit db1ed684f6c430c4cdad67d058688b8a1b5e607c ("[IPV6] UDP: Rename IPv6 UDP files."), commit 8be8af8fa4405652e6c0797db5465a4be8afb998 ("[IPV4] UDP: Move IPv4-specific bits to other file.") and commit e898d4db2749c6052072e9bc4448e396cbdeb06a ("[UDP]: Allow users to configure UDP-Lite."). First, udplite is of such small cost, and it is a core protocol just like TCP and normal UDP are. We spent enormous amounts of effort to make udplite share as much code with core UDP as possible. All of that work is less valuable if we're just going to slap a config option on udplite support. It is also causing build failures, as reported on linux-next, showing that the changeset was not tested very well. In fact, this is the second build failure resulting from the udplite change. Finally, the config options provided was a bool, instead of a modular option. Meaning the udplite code does not even get build tested by allmodconfig builds, and furthermore the user is not presented with a reasonable modular build option which is particularly needed by distribution vendors. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp_ipv4.c')
-rw-r--r--net/ipv4/udp_ipv4.c1134
1 files changed, 0 insertions, 1134 deletions
diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c
deleted file mode 100644
index fd14c2c50ed4..000000000000
--- a/net/ipv4/udp_ipv4.c
+++ /dev/null
@@ -1,1134 +0,0 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * UDP for IPv4.
7 *
8 * For full credits, see net/ipv4/udp.c.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <asm/system.h>
17#include <asm/uaccess.h>
18#include <asm/ioctls.h>
19#include <linux/bootmem.h>
20#include <linux/types.h>
21#include <linux/fcntl.h>
22#include <linux/module.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/igmp.h>
26#include <linux/in.h>
27#include <linux/errno.h>
28#include <linux/timer.h>
29#include <linux/mm.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <net/tcp_states.h>
33#include <linux/skbuff.h>
34#include <linux/proc_fs.h>
35#include <linux/seq_file.h>
36#include <net/net_namespace.h>
37#include <net/icmp.h>
38#include <net/route.h>
39#include <net/checksum.h>
40#include <net/xfrm.h>
41#include "udp_impl.h"
42
43int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
44{
45 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
46
47 return ( !ipv6_only_sock(sk2) &&
48 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
49 inet1->rcv_saddr == inet2->rcv_saddr ));
50}
51
52static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
53{
54 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
55}
56
57/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
58 * harder than this. -DaveM
59 */
60static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
61 __be16 sport, __be32 daddr, __be16 dport,
62 int dif, struct hlist_head udptable[])
63{
64 struct sock *sk, *result = NULL;
65 struct hlist_node *node;
66 unsigned short hnum = ntohs(dport);
67 int badness = -1;
68
69 read_lock(&udp_hash_lock);
70 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
71 struct inet_sock *inet = inet_sk(sk);
72
73 if (sk->sk_net == net && sk->sk_hash == hnum &&
74 !ipv6_only_sock(sk)) {
75 int score = (sk->sk_family == PF_INET ? 1 : 0);
76 if (inet->rcv_saddr) {
77 if (inet->rcv_saddr != daddr)
78 continue;
79 score+=2;
80 }
81 if (inet->daddr) {
82 if (inet->daddr != saddr)
83 continue;
84 score+=2;
85 }
86 if (inet->dport) {
87 if (inet->dport != sport)
88 continue;
89 score+=2;
90 }
91 if (sk->sk_bound_dev_if) {
92 if (sk->sk_bound_dev_if != dif)
93 continue;
94 score+=2;
95 }
96 if (score == 9) {
97 result = sk;
98 break;
99 } else if (score > badness) {
100 result = sk;
101 badness = score;
102 }
103 }
104 }
105 if (result)
106 sock_hold(result);
107 read_unlock(&udp_hash_lock);
108 return result;
109}
110
111static inline struct sock *udp_v4_mcast_next(struct sock *sk,
112 __be16 loc_port, __be32 loc_addr,
113 __be16 rmt_port, __be32 rmt_addr,
114 int dif)
115{
116 struct hlist_node *node;
117 struct sock *s = sk;
118 unsigned short hnum = ntohs(loc_port);
119
120 sk_for_each_from(s, node) {
121 struct inet_sock *inet = inet_sk(s);
122
123 if (s->sk_hash != hnum ||
124 (inet->daddr && inet->daddr != rmt_addr) ||
125 (inet->dport != rmt_port && inet->dport) ||
126 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
127 ipv6_only_sock(s) ||
128 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
129 continue;
130 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
131 continue;
132 goto found;
133 }
134 s = NULL;
135found:
136 return s;
137}
138
139/*
140 * This routine is called by the ICMP module when it gets some
141 * sort of error condition. If err < 0 then the socket should
142 * be closed and the error returned to the user. If err > 0
143 * it's just the icmp type << 8 | icmp code.
144 * Header points to the ip header of the error packet. We move
145 * on past this. Then (as it used to claim before adjustment)
146 * header points to the first 8 bytes of the udp header. We need
147 * to find the appropriate port.
148 */
149
150void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
151{
152 struct inet_sock *inet;
153 struct iphdr *iph = (struct iphdr*)skb->data;
154 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
155 const int type = icmp_hdr(skb)->type;
156 const int code = icmp_hdr(skb)->code;
157 struct sock *sk;
158 int harderr;
159 int err;
160
161 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
162 iph->saddr, uh->source, skb->dev->ifindex, udptable);
163 if (sk == NULL) {
164 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
165 return; /* No socket for error */
166 }
167
168 err = 0;
169 harderr = 0;
170 inet = inet_sk(sk);
171
172 switch (type) {
173 default:
174 case ICMP_TIME_EXCEEDED:
175 err = EHOSTUNREACH;
176 break;
177 case ICMP_SOURCE_QUENCH:
178 goto out;
179 case ICMP_PARAMETERPROB:
180 err = EPROTO;
181 harderr = 1;
182 break;
183 case ICMP_DEST_UNREACH:
184 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
185 if (inet->pmtudisc != IP_PMTUDISC_DONT) {
186 err = EMSGSIZE;
187 harderr = 1;
188 break;
189 }
190 goto out;
191 }
192 err = EHOSTUNREACH;
193 if (code <= NR_ICMP_UNREACH) {
194 harderr = icmp_err_convert[code].fatal;
195 err = icmp_err_convert[code].errno;
196 }
197 break;
198 }
199
200 /*
201 * RFC1122: OK. Passes ICMP errors back to application, as per
202 * 4.1.3.3.
203 */
204 if (!inet->recverr) {
205 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
206 goto out;
207 } else {
208 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
209 }
210 sk->sk_err = err;
211 sk->sk_error_report(sk);
212out:
213 sock_put(sk);
214}
215
216void udp_err(struct sk_buff *skb, u32 info)
217{
218 __udp4_lib_err(skb, info, udp_hash);
219}
220
221/*
222 * Throw away all pending data and cancel the corking. Socket is locked.
223 */
224static void udp_flush_pending_frames(struct sock *sk)
225{
226 struct udp_sock *up = udp_sk(sk);
227
228 if (up->pending) {
229 up->len = 0;
230 up->pending = 0;
231 ip_flush_pending_frames(sk);
232 }
233}
234
235/**
236 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
237 * @sk: socket we are sending on
238 * @skb: sk_buff containing the filled-in UDP header
239 * (checksum field must be zeroed out)
240 */
241static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
242 __be32 src, __be32 dst, int len )
243{
244 unsigned int offset;
245 struct udphdr *uh = udp_hdr(skb);
246 __wsum csum = 0;
247
248 if (skb_queue_len(&sk->sk_write_queue) == 1) {
249 /*
250 * Only one fragment on the socket.
251 */
252 skb->csum_start = skb_transport_header(skb) - skb->head;
253 skb->csum_offset = offsetof(struct udphdr, check);
254 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
255 } else {
256 /*
257 * HW-checksum won't work as there are two or more
258 * fragments on the socket so that all csums of sk_buffs
259 * should be together
260 */
261 offset = skb_transport_offset(skb);
262 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
263
264 skb->ip_summed = CHECKSUM_NONE;
265
266 skb_queue_walk(&sk->sk_write_queue, skb) {
267 csum = csum_add(csum, skb->csum);
268 }
269
270 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
271 if (uh->check == 0)
272 uh->check = CSUM_MANGLED_0;
273 }
274}
275
276/*
277 * Push out all pending data as one UDP datagram. Socket is locked.
278 */
279static int udp_push_pending_frames(struct sock *sk)
280{
281 struct udp_sock *up = udp_sk(sk);
282 struct inet_sock *inet = inet_sk(sk);
283 struct flowi *fl = &inet->cork.fl;
284 struct sk_buff *skb;
285 struct udphdr *uh;
286 int err = 0;
287 int is_udplite = IS_UDPLITE(sk);
288 __wsum csum = 0;
289
290 /* Grab the skbuff where UDP header space exists. */
291 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
292 goto out;
293
294 /*
295 * Create a UDP header
296 */
297 uh = udp_hdr(skb);
298 uh->source = fl->fl_ip_sport;
299 uh->dest = fl->fl_ip_dport;
300 uh->len = htons(up->len);
301 uh->check = 0;
302
303 if (is_udplite) /* UDP-Lite */
304 csum = udplite_csum_outgoing(sk, skb);
305
306 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
307
308 skb->ip_summed = CHECKSUM_NONE;
309 goto send;
310
311 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
312
313 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
314 goto send;
315
316 } else /* `normal' UDP */
317 csum = udp_csum_outgoing(sk, skb);
318
319 /* add protocol-dependent pseudo-header */
320 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
321 sk->sk_protocol, csum );
322 if (uh->check == 0)
323 uh->check = CSUM_MANGLED_0;
324
325send:
326 err = ip_push_pending_frames(sk);
327out:
328 up->len = 0;
329 up->pending = 0;
330 if (!err)
331 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
332 return err;
333}
334
335int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
336 size_t len)
337{
338 struct inet_sock *inet = inet_sk(sk);
339 struct udp_sock *up = udp_sk(sk);
340 int ulen = len;
341 struct ipcm_cookie ipc;
342 struct rtable *rt = NULL;
343 int free = 0;
344 int connected = 0;
345 __be32 daddr, faddr, saddr;
346 __be16 dport;
347 u8 tos;
348 int err, is_udplite = IS_UDPLITE(sk);
349 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
350 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
351
352 if (len > 0xFFFF)
353 return -EMSGSIZE;
354
355 /*
356 * Check the flags.
357 */
358
359 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
360 return -EOPNOTSUPP;
361
362 ipc.opt = NULL;
363
364 if (up->pending) {
365 /*
366 * There are pending frames.
367 * The socket lock must be held while it's corked.
368 */
369 lock_sock(sk);
370 if (likely(up->pending)) {
371 if (unlikely(up->pending != AF_INET)) {
372 release_sock(sk);
373 return -EINVAL;
374 }
375 goto do_append_data;
376 }
377 release_sock(sk);
378 }
379 ulen += sizeof(struct udphdr);
380
381 /*
382 * Get and verify the address.
383 */
384 if (msg->msg_name) {
385 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
386 if (msg->msg_namelen < sizeof(*usin))
387 return -EINVAL;
388 if (usin->sin_family != AF_INET) {
389 if (usin->sin_family != AF_UNSPEC)
390 return -EAFNOSUPPORT;
391 }
392
393 daddr = usin->sin_addr.s_addr;
394 dport = usin->sin_port;
395 if (dport == 0)
396 return -EINVAL;
397 } else {
398 if (sk->sk_state != TCP_ESTABLISHED)
399 return -EDESTADDRREQ;
400 daddr = inet->daddr;
401 dport = inet->dport;
402 /* Open fast path for connected socket.
403 Route will not be used, if at least one option is set.
404 */
405 connected = 1;
406 }
407 ipc.addr = inet->saddr;
408
409 ipc.oif = sk->sk_bound_dev_if;
410 if (msg->msg_controllen) {
411 err = ip_cmsg_send(msg, &ipc);
412 if (err)
413 return err;
414 if (ipc.opt)
415 free = 1;
416 connected = 0;
417 }
418 if (!ipc.opt)
419 ipc.opt = inet->opt;
420
421 saddr = ipc.addr;
422 ipc.addr = faddr = daddr;
423
424 if (ipc.opt && ipc.opt->srr) {
425 if (!daddr)
426 return -EINVAL;
427 faddr = ipc.opt->faddr;
428 connected = 0;
429 }
430 tos = RT_TOS(inet->tos);
431 if (sock_flag(sk, SOCK_LOCALROUTE) ||
432 (msg->msg_flags & MSG_DONTROUTE) ||
433 (ipc.opt && ipc.opt->is_strictroute)) {
434 tos |= RTO_ONLINK;
435 connected = 0;
436 }
437
438 if (ipv4_is_multicast(daddr)) {
439 if (!ipc.oif)
440 ipc.oif = inet->mc_index;
441 if (!saddr)
442 saddr = inet->mc_addr;
443 connected = 0;
444 }
445
446 if (connected)
447 rt = (struct rtable*)sk_dst_check(sk, 0);
448
449 if (rt == NULL) {
450 struct flowi fl = { .oif = ipc.oif,
451 .nl_u = { .ip4_u =
452 { .daddr = faddr,
453 .saddr = saddr,
454 .tos = tos } },
455 .proto = sk->sk_protocol,
456 .uli_u = { .ports =
457 { .sport = inet->sport,
458 .dport = dport } } };
459 security_sk_classify_flow(sk, &fl);
460 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
461 if (err) {
462 if (err == -ENETUNREACH)
463 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
464 goto out;
465 }
466
467 err = -EACCES;
468 if ((rt->rt_flags & RTCF_BROADCAST) &&
469 !sock_flag(sk, SOCK_BROADCAST))
470 goto out;
471 if (connected)
472 sk_dst_set(sk, dst_clone(&rt->u.dst));
473 }
474
475 if (msg->msg_flags&MSG_CONFIRM)
476 goto do_confirm;
477back_from_confirm:
478
479 saddr = rt->rt_src;
480 if (!ipc.addr)
481 daddr = ipc.addr = rt->rt_dst;
482
483 lock_sock(sk);
484 if (unlikely(up->pending)) {
485 /* The socket is already corked while preparing it. */
486 /* ... which is an evident application bug. --ANK */
487 release_sock(sk);
488
489 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
490 err = -EINVAL;
491 goto out;
492 }
493 /*
494 * Now cork the socket to pend data.
495 */
496 inet->cork.fl.fl4_dst = daddr;
497 inet->cork.fl.fl_ip_dport = dport;
498 inet->cork.fl.fl4_src = saddr;
499 inet->cork.fl.fl_ip_sport = inet->sport;
500 up->pending = AF_INET;
501
502do_append_data:
503 up->len += ulen;
504 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
505 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
506 sizeof(struct udphdr), &ipc, rt,
507 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
508 if (err)
509 udp_flush_pending_frames(sk);
510 else if (!corkreq)
511 err = udp_push_pending_frames(sk);
512 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
513 up->pending = 0;
514 release_sock(sk);
515
516out:
517 ip_rt_put(rt);
518 if (free)
519 kfree(ipc.opt);
520 if (!err)
521 return len;
522 /*
523 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
524 * ENOBUFS might not be good (it's not tunable per se), but otherwise
525 * we don't have a good statistic (IpOutDiscards but it can be too many
526 * things). We could add another new stat but at least for now that
527 * seems like overkill.
528 */
529 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
530 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
531 }
532 return err;
533
534do_confirm:
535 dst_confirm(&rt->u.dst);
536 if (!(msg->msg_flags&MSG_PROBE) || len)
537 goto back_from_confirm;
538 err = 0;
539 goto out;
540}
541
542int udp_sendpage(struct sock *sk, struct page *page, int offset,
543 size_t size, int flags)
544{
545 struct udp_sock *up = udp_sk(sk);
546 int ret;
547
548 if (!up->pending) {
549 struct msghdr msg = { .msg_flags = flags|MSG_MORE };
550
551 /* Call udp_sendmsg to specify destination address which
552 * sendpage interface can't pass.
553 * This will succeed only when the socket is connected.
554 */
555 ret = udp_sendmsg(NULL, sk, &msg, 0);
556 if (ret < 0)
557 return ret;
558 }
559
560 lock_sock(sk);
561
562 if (unlikely(!up->pending)) {
563 release_sock(sk);
564
565 LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
566 return -EINVAL;
567 }
568
569 ret = ip_append_page(sk, page, offset, size, flags);
570 if (ret == -EOPNOTSUPP) {
571 release_sock(sk);
572 return sock_no_sendpage(sk->sk_socket, page, offset,
573 size, flags);
574 }
575 if (ret < 0) {
576 udp_flush_pending_frames(sk);
577 goto out;
578 }
579
580 up->len += size;
581 if (!(up->corkflag || (flags&MSG_MORE)))
582 ret = udp_push_pending_frames(sk);
583 if (!ret)
584 ret = size;
585out:
586 release_sock(sk);
587 return ret;
588}
589
590/*
591 * This should be easy, if there is something there we
592 * return it, otherwise we block.
593 */
594
595int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
596 size_t len, int noblock, int flags, int *addr_len)
597{
598 struct inet_sock *inet = inet_sk(sk);
599 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
600 struct sk_buff *skb;
601 unsigned int ulen, copied;
602 int peeked;
603 int err;
604 int is_udplite = IS_UDPLITE(sk);
605
606 /*
607 * Check any passed addresses
608 */
609 if (addr_len)
610 *addr_len=sizeof(*sin);
611
612 if (flags & MSG_ERRQUEUE)
613 return ip_recv_error(sk, msg, len);
614
615try_again:
616 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
617 &peeked, &err);
618 if (!skb)
619 goto out;
620
621 ulen = skb->len - sizeof(struct udphdr);
622 copied = len;
623 if (copied > ulen)
624 copied = ulen;
625 else if (copied < ulen)
626 msg->msg_flags |= MSG_TRUNC;
627
628 /*
629 * If checksum is needed at all, try to do it while copying the
630 * data. If the data is truncated, or if we only want a partial
631 * coverage checksum (UDP-Lite), do it before the copy.
632 */
633
634 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
635 if (udp_lib_checksum_complete(skb))
636 goto csum_copy_err;
637 }
638
639 if (skb_csum_unnecessary(skb))
640 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
641 msg->msg_iov, copied );
642 else {
643 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
644
645 if (err == -EINVAL)
646 goto csum_copy_err;
647 }
648
649 if (err)
650 goto out_free;
651
652 if (!peeked)
653 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
654
655 sock_recv_timestamp(msg, sk, skb);
656
657 /* Copy the address. */
658 if (sin)
659 {
660 sin->sin_family = AF_INET;
661 sin->sin_port = udp_hdr(skb)->source;
662 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
663 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
664 }
665 if (inet->cmsg_flags)
666 ip_cmsg_recv(msg, skb);
667
668 err = copied;
669 if (flags & MSG_TRUNC)
670 err = ulen;
671
672out_free:
673 lock_sock(sk);
674 skb_free_datagram(sk, skb);
675 release_sock(sk);
676out:
677 return err;
678
679csum_copy_err:
680 lock_sock(sk);
681 if (!skb_kill_datagram(sk, skb, flags))
682 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
683 release_sock(sk);
684
685 if (noblock)
686 return -EAGAIN;
687 goto try_again;
688}
689
690
691/* returns:
692 * -1: error
693 * 0: success
694 * >0: "udp encap" protocol resubmission
695 *
696 * Note that in the success and error cases, the skb is assumed to
697 * have either been requeued or freed.
698 */
699int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
700{
701 struct udp_sock *up = udp_sk(sk);
702 int rc;
703 int is_udplite = IS_UDPLITE(sk);
704
705 /*
706 * Charge it to the socket, dropping if the queue is full.
707 */
708 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
709 goto drop;
710 nf_reset(skb);
711
712 if (up->encap_type) {
713 /*
714 * This is an encapsulation socket so pass the skb to
715 * the socket's udp_encap_rcv() hook. Otherwise, just
716 * fall through and pass this up the UDP socket.
717 * up->encap_rcv() returns the following value:
718 * =0 if skb was successfully passed to the encap
719 * handler or was discarded by it.
720 * >0 if skb should be passed on to UDP.
721 * <0 if skb should be resubmitted as proto -N
722 */
723
724 /* if we're overly short, let UDP handle it */
725 if (skb->len > sizeof(struct udphdr) &&
726 up->encap_rcv != NULL) {
727 int ret;
728
729 ret = (*up->encap_rcv)(sk, skb);
730 if (ret <= 0) {
731 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
732 is_udplite);
733 return -ret;
734 }
735 }
736
737 /* FALLTHROUGH -- it's a UDP Packet */
738 }
739
740 /*
741 * UDP-Lite specific tests, ignored on UDP sockets
742 */
743 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
744
745 /*
746 * MIB statistics other than incrementing the error count are
747 * disabled for the following two types of errors: these depend
748 * on the application settings, not on the functioning of the
749 * protocol stack as such.
750 *
751 * RFC 3828 here recommends (sec 3.3): "There should also be a
752 * way ... to ... at least let the receiving application block
753 * delivery of packets with coverage values less than a value
754 * provided by the application."
755 */
756 if (up->pcrlen == 0) { /* full coverage was set */
757 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
758 "%d while full coverage %d requested\n",
759 UDP_SKB_CB(skb)->cscov, skb->len);
760 goto drop;
761 }
762 /* The next case involves violating the min. coverage requested
763 * by the receiver. This is subtle: if receiver wants x and x is
764 * greater than the buffersize/MTU then receiver will complain
765 * that it wants x while sender emits packets of smaller size y.
766 * Therefore the above ...()->partial_cov statement is essential.
767 */
768 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
769 LIMIT_NETDEBUG(KERN_WARNING
770 "UDPLITE: coverage %d too small, need min %d\n",
771 UDP_SKB_CB(skb)->cscov, up->pcrlen);
772 goto drop;
773 }
774 }
775
776 if (sk->sk_filter) {
777 if (udp_lib_checksum_complete(skb))
778 goto drop;
779 }
780
781 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
782 /* Note that an ENOMEM error is charged twice */
783 if (rc == -ENOMEM)
784 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
785 goto drop;
786 }
787
788 return 0;
789
790drop:
791 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
792 kfree_skb(skb);
793 return -1;
794}
795
796/*
797 * Multicasts and broadcasts go to each listener.
798 *
799 * Note: called only from the BH handler context,
800 * so we don't need to lock the hashes.
801 */
802static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
803 struct udphdr *uh,
804 __be32 saddr, __be32 daddr,
805 struct hlist_head udptable[])
806{
807 struct sock *sk;
808 int dif;
809
810 read_lock(&udp_hash_lock);
811 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
812 dif = skb->dev->ifindex;
813 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
814 if (sk) {
815 struct sock *sknext = NULL;
816
817 do {
818 struct sk_buff *skb1 = skb;
819
820 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
821 uh->source, saddr, dif);
822 if (sknext)
823 skb1 = skb_clone(skb, GFP_ATOMIC);
824
825 if (skb1) {
826 int ret = 0;
827
828 bh_lock_sock_nested(sk);
829 if (!sock_owned_by_user(sk))
830 ret = udp_queue_rcv_skb(sk, skb1);
831 else
832 sk_add_backlog(sk, skb1);
833 bh_unlock_sock(sk);
834
835 if (ret > 0)
836 /* we should probably re-process instead
837 * of dropping packets here. */
838 kfree_skb(skb1);
839 }
840 sk = sknext;
841 } while (sknext);
842 } else
843 kfree_skb(skb);
844 read_unlock(&udp_hash_lock);
845 return 0;
846}
847
848/* Initialize UDP checksum. If exited with zero value (success),
849 * CHECKSUM_UNNECESSARY means, that no more checks are required.
850 * Otherwise, csum completion requires chacksumming packet body,
851 * including udp header and folding it to skb->csum.
852 */
853static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
854 int proto)
855{
856 const struct iphdr *iph;
857 int err;
858
859 UDP_SKB_CB(skb)->partial_cov = 0;
860 UDP_SKB_CB(skb)->cscov = skb->len;
861
862 if (IS_PROTO_UDPLITE(proto)) {
863 err = udplite_checksum_init(skb, uh);
864 if (err)
865 return err;
866 }
867
868 iph = ip_hdr(skb);
869 if (uh->check == 0) {
870 skb->ip_summed = CHECKSUM_UNNECESSARY;
871 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
872 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
873 proto, skb->csum))
874 skb->ip_summed = CHECKSUM_UNNECESSARY;
875 }
876 if (!skb_csum_unnecessary(skb))
877 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
878 skb->len, proto, 0);
879 /* Probably, we should checksum udp header (it should be in cache
880 * in any case) and data in tiny packets (< rx copybreak).
881 */
882
883 return 0;
884}
885
886/*
887 * All we need to do is get the socket, and then do a checksum.
888 */
889
890int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
891 int proto)
892{
893 struct sock *sk;
894 struct udphdr *uh = udp_hdr(skb);
895 unsigned short ulen;
896 struct rtable *rt = skb->rtable;
897 __be32 saddr = ip_hdr(skb)->saddr;
898 __be32 daddr = ip_hdr(skb)->daddr;
899
900 /*
901 * Validate the packet.
902 */
903 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
904 goto drop; /* No space for header. */
905
906 ulen = ntohs(uh->len);
907 if (ulen > skb->len)
908 goto short_packet;
909
910 if (IS_PROTO_UDPLITE(proto)) {
911 /* UDP validates ulen. */
912 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
913 goto short_packet;
914 uh = udp_hdr(skb);
915 }
916
917 if (udp4_csum_init(skb, uh, proto))
918 goto csum_error;
919
920 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
921 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
922
923 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
924 uh->dest, inet_iif(skb), udptable);
925
926 if (sk != NULL) {
927 int ret = 0;
928 bh_lock_sock_nested(sk);
929 if (!sock_owned_by_user(sk))
930 ret = udp_queue_rcv_skb(sk, skb);
931 else
932 sk_add_backlog(sk, skb);
933 bh_unlock_sock(sk);
934 sock_put(sk);
935
936 /* a return value > 0 means to resubmit the input, but
937 * it wants the return to be -protocol, or 0
938 */
939 if (ret > 0)
940 return -ret;
941 return 0;
942 }
943
944 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
945 goto drop;
946 nf_reset(skb);
947
948 /* No socket. Drop packet silently, if checksum is wrong */
949 if (udp_lib_checksum_complete(skb))
950 goto csum_error;
951
952 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
953 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
954
955 /*
956 * Hmm. We got an UDP packet to a port to which we
957 * don't wanna listen. Ignore it.
958 */
959 kfree_skb(skb);
960 return 0;
961
962short_packet:
963 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
964 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
965 NIPQUAD(saddr),
966 ntohs(uh->source),
967 ulen,
968 skb->len,
969 NIPQUAD(daddr),
970 ntohs(uh->dest));
971 goto drop;
972
973csum_error:
974 /*
975 * RFC1122: OK. Discards the bad packet silently (as far as
976 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
977 */
978 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
979 IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
980 NIPQUAD(saddr),
981 ntohs(uh->source),
982 NIPQUAD(daddr),
983 ntohs(uh->dest),
984 ulen);
985drop:
986 UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
987 kfree_skb(skb);
988 return 0;
989}
990
991int udp_rcv(struct sk_buff *skb)
992{
993 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
994}
995
996int udp_destroy_sock(struct sock *sk)
997{
998 lock_sock(sk);
999 udp_flush_pending_frames(sk);
1000 release_sock(sk);
1001 return 0;
1002}
1003
1004int udp_setsockopt(struct sock *sk, int level, int optname,
1005 char __user *optval, int optlen)
1006{
1007 if (IS_SOL_UDPFAMILY(level))
1008 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1009 udp_push_pending_frames);
1010 return ip_setsockopt(sk, level, optname, optval, optlen);
1011}
1012
1013#ifdef CONFIG_COMPAT
1014int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1015 char __user *optval, int optlen)
1016{
1017 if (IS_SOL_UDPFAMILY(level))
1018 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1019 udp_push_pending_frames);
1020 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1021}
1022#endif
1023
1024int udp_getsockopt(struct sock *sk, int level, int optname,
1025 char __user *optval, int __user *optlen)
1026{
1027 if (IS_SOL_UDPFAMILY(level))
1028 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1029 return ip_getsockopt(sk, level, optname, optval, optlen);
1030}
1031
1032#ifdef CONFIG_COMPAT
1033int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1034 char __user *optval, int __user *optlen)
1035{
1036 if (IS_SOL_UDPFAMILY(level))
1037 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1038 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1039}
1040#endif
1041
1042/* ------------------------------------------------------------------------ */
1043DEFINE_PROTO_INUSE(udp)
1044
1045struct proto udp_prot = {
1046 .name = "UDP",
1047 .owner = THIS_MODULE,
1048 .close = udp_lib_close,
1049 .connect = ip4_datagram_connect,
1050 .disconnect = udp_disconnect,
1051 .ioctl = udp_ioctl,
1052 .destroy = udp_destroy_sock,
1053 .setsockopt = udp_setsockopt,
1054 .getsockopt = udp_getsockopt,
1055 .sendmsg = udp_sendmsg,
1056 .recvmsg = udp_recvmsg,
1057 .sendpage = udp_sendpage,
1058 .backlog_rcv = udp_queue_rcv_skb,
1059 .hash = udp_lib_hash,
1060 .unhash = udp_lib_unhash,
1061 .get_port = udp_v4_get_port,
1062 .memory_allocated = &udp_memory_allocated,
1063 .sysctl_mem = sysctl_udp_mem,
1064 .sysctl_wmem = &sysctl_udp_wmem_min,
1065 .sysctl_rmem = &sysctl_udp_rmem_min,
1066 .obj_size = sizeof(struct udp_sock),
1067#ifdef CONFIG_COMPAT
1068 .compat_setsockopt = compat_udp_setsockopt,
1069 .compat_getsockopt = compat_udp_getsockopt,
1070#endif
1071 REF_PROTO_INUSE(udp)
1072};
1073
1074/* ------------------------------------------------------------------------ */
1075static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1076{
1077 struct inet_sock *inet = inet_sk(sp);
1078 __be32 dest = inet->daddr;
1079 __be32 src = inet->rcv_saddr;
1080 __u16 destp = ntohs(inet->dport);
1081 __u16 srcp = ntohs(inet->sport);
1082
1083 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1084 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1085 bucket, src, srcp, dest, destp, sp->sk_state,
1086 atomic_read(&sp->sk_wmem_alloc),
1087 atomic_read(&sp->sk_rmem_alloc),
1088 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1089 atomic_read(&sp->sk_refcnt), sp);
1090}
1091
1092int udp4_seq_show(struct seq_file *seq, void *v)
1093{
1094 if (v == SEQ_START_TOKEN)
1095 seq_printf(seq, "%-127s\n",
1096 " sl local_address rem_address st tx_queue "
1097 "rx_queue tr tm->when retrnsmt uid timeout "
1098 "inode");
1099 else {
1100 char tmpbuf[129];
1101 struct udp_iter_state *state = seq->private;
1102
1103 udp4_format_sock(v, tmpbuf, state->bucket);
1104 seq_printf(seq, "%-127s\n", tmpbuf);
1105 }
1106 return 0;
1107}
1108
1109/* ------------------------------------------------------------------------ */
1110#ifdef CONFIG_PROC_FS
1111static struct file_operations udp4_seq_fops;
1112static struct udp_seq_afinfo udp4_seq_afinfo = {
1113 .owner = THIS_MODULE,
1114 .name = "udp",
1115 .family = AF_INET,
1116 .hashtable = udp_hash,
1117 .seq_show = udp4_seq_show,
1118 .seq_fops = &udp4_seq_fops,
1119};
1120
1121int __init udp4_proc_init(void)
1122{
1123 return udp_proc_register(&udp4_seq_afinfo);
1124}
1125
1126void udp4_proc_exit(void)
1127{
1128 udp_proc_unregister(&udp4_seq_afinfo);
1129}
1130#endif /* CONFIG_PROC_FS */
1131
1132EXPORT_SYMBOL(udp_prot);
1133EXPORT_SYMBOL(udp_sendmsg);
1134