aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c558
1 files changed, 328 insertions, 230 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 865d75214a9a..035915fc9ed3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -92,22 +92,16 @@
92#include <linux/timer.h> 92#include <linux/timer.h>
93#include <linux/mm.h> 93#include <linux/mm.h>
94#include <linux/inet.h> 94#include <linux/inet.h>
95#include <linux/ipv6.h>
96#include <linux/netdevice.h> 95#include <linux/netdevice.h>
97#include <net/snmp.h>
98#include <net/ip.h>
99#include <net/tcp_states.h> 96#include <net/tcp_states.h>
100#include <net/protocol.h>
101#include <linux/skbuff.h> 97#include <linux/skbuff.h>
102#include <linux/proc_fs.h> 98#include <linux/proc_fs.h>
103#include <linux/seq_file.h> 99#include <linux/seq_file.h>
104#include <net/sock.h>
105#include <net/udp.h>
106#include <net/icmp.h> 100#include <net/icmp.h>
107#include <net/route.h> 101#include <net/route.h>
108#include <net/inet_common.h>
109#include <net/checksum.h> 102#include <net/checksum.h>
110#include <net/xfrm.h> 103#include <net/xfrm.h>
104#include "udp_impl.h"
111 105
112/* 106/*
113 * Snmp MIB for the UDP layer 107 * Snmp MIB for the UDP layer
@@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock);
120 114
121static int udp_port_rover; 115static int udp_port_rover;
122 116
123static inline int udp_lport_inuse(u16 num) 117static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
124{ 118{
125 struct sock *sk; 119 struct sock *sk;
126 struct hlist_node *node; 120 struct hlist_node *node;
127 121
128 sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) 122 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
129 if (inet_sk(sk)->num == num) 123 if (inet_sk(sk)->num == num)
130 return 1; 124 return 1;
131 return 0; 125 return 0;
132} 126}
133 127
134/** 128/**
135 * udp_get_port - common port lookup for IPv4 and IPv6 129 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
136 * 130 *
137 * @sk: socket struct in question 131 * @sk: socket struct in question
138 * @snum: port number to look up 132 * @snum: port number to look up
133 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
134 * @port_rover: pointer to record of last unallocated port
139 * @saddr_comp: AF-dependent comparison of bound local IP addresses 135 * @saddr_comp: AF-dependent comparison of bound local IP addresses
140 */ 136 */
141int udp_get_port(struct sock *sk, unsigned short snum, 137int __udp_lib_get_port(struct sock *sk, unsigned short snum,
142 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) 138 struct hlist_head udptable[], int *port_rover,
139 int (*saddr_comp)(const struct sock *sk1,
140 const struct sock *sk2 ) )
143{ 141{
144 struct hlist_node *node; 142 struct hlist_node *node;
145 struct hlist_head *head; 143 struct hlist_head *head;
@@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
150 if (snum == 0) { 148 if (snum == 0) {
151 int best_size_so_far, best, result, i; 149 int best_size_so_far, best, result, i;
152 150
153 if (udp_port_rover > sysctl_local_port_range[1] || 151 if (*port_rover > sysctl_local_port_range[1] ||
154 udp_port_rover < sysctl_local_port_range[0]) 152 *port_rover < sysctl_local_port_range[0])
155 udp_port_rover = sysctl_local_port_range[0]; 153 *port_rover = sysctl_local_port_range[0];
156 best_size_so_far = 32767; 154 best_size_so_far = 32767;
157 best = result = udp_port_rover; 155 best = result = *port_rover;
158 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 156 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
159 int size; 157 int size;
160 158
161 head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; 159 head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
162 if (hlist_empty(head)) { 160 if (hlist_empty(head)) {
163 if (result > sysctl_local_port_range[1]) 161 if (result > sysctl_local_port_range[1])
164 result = sysctl_local_port_range[0] + 162 result = sysctl_local_port_range[0] +
@@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
179 result = sysctl_local_port_range[0] 177 result = sysctl_local_port_range[0]
180 + ((result - sysctl_local_port_range[0]) & 178 + ((result - sysctl_local_port_range[0]) &
181 (UDP_HTABLE_SIZE - 1)); 179 (UDP_HTABLE_SIZE - 1));
182 if (!udp_lport_inuse(result)) 180 if (! __udp_lib_lport_inuse(result, udptable))
183 break; 181 break;
184 } 182 }
185 if (i >= (1 << 16) / UDP_HTABLE_SIZE) 183 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
186 goto fail; 184 goto fail;
187gotit: 185gotit:
188 udp_port_rover = snum = result; 186 *port_rover = snum = result;
189 } else { 187 } else {
190 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 188 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
191 189
192 sk_for_each(sk2, node, head) 190 sk_for_each(sk2, node, head)
193 if (inet_sk(sk2)->num == snum && 191 if (inet_sk(sk2)->num == snum &&
@@ -195,12 +193,12 @@ gotit:
195 (!sk2->sk_reuse || !sk->sk_reuse) && 193 (!sk2->sk_reuse || !sk->sk_reuse) &&
196 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 194 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
197 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 195 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
198 (*saddr_cmp)(sk, sk2) ) 196 (*saddr_comp)(sk, sk2) )
199 goto fail; 197 goto fail;
200 } 198 }
201 inet_sk(sk)->num = snum; 199 inet_sk(sk)->num = snum;
202 if (sk_unhashed(sk)) { 200 if (sk_unhashed(sk)) {
203 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 201 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
204 sk_add_node(sk, head); 202 sk_add_node(sk, head);
205 sock_prot_inc_use(sk->sk_prot); 203 sock_prot_inc_use(sk->sk_prot);
206 } 204 }
@@ -210,7 +208,13 @@ fail:
210 return error; 208 return error;
211} 209}
212 210
213static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 211__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
212 int (*scmp)(const struct sock *, const struct sock *))
213{
214 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
215}
216
217inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
214{ 218{
215 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 219 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
216 220
@@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
224 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); 228 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
225} 229}
226 230
227
228static void udp_v4_hash(struct sock *sk)
229{
230 BUG();
231}
232
233static void udp_v4_unhash(struct sock *sk)
234{
235 write_lock_bh(&udp_hash_lock);
236 if (sk_del_node_init(sk)) {
237 inet_sk(sk)->num = 0;
238 sock_prot_dec_use(sk->sk_prot);
239 }
240 write_unlock_bh(&udp_hash_lock);
241}
242
243/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 231/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
244 * harder than this. -DaveM 232 * harder than this. -DaveM
245 */ 233 */
246static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, 234static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
247 __be32 daddr, __be16 dport, int dif) 235 __be32 daddr, __be16 dport,
236 int dif, struct hlist_head udptable[])
248{ 237{
249 struct sock *sk, *result = NULL; 238 struct sock *sk, *result = NULL;
250 struct hlist_node *node; 239 struct hlist_node *node;
251 unsigned short hnum = ntohs(dport); 240 unsigned short hnum = ntohs(dport);
252 int badness = -1; 241 int badness = -1;
253 242
254 sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { 243 read_lock(&udp_hash_lock);
244 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
255 struct inet_sock *inet = inet_sk(sk); 245 struct inet_sock *inet = inet_sk(sk);
256 246
257 if (inet->num == hnum && !ipv6_only_sock(sk)) { 247 if (inet->num == hnum && !ipv6_only_sock(sk)) {
@@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport,
285 } 275 }
286 } 276 }
287 } 277 }
288 return result; 278 if (result)
289} 279 sock_hold(result);
290
291static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport,
292 __be32 daddr, __be16 dport, int dif)
293{
294 struct sock *sk;
295
296 read_lock(&udp_hash_lock);
297 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
298 if (sk)
299 sock_hold(sk);
300 read_unlock(&udp_hash_lock); 280 read_unlock(&udp_hash_lock);
301 return sk; 281 return result;
302} 282}
303 283
304static inline struct sock *udp_v4_mcast_next(struct sock *sk, 284static inline struct sock *udp_v4_mcast_next(struct sock *sk,
@@ -340,7 +320,7 @@ found:
340 * to find the appropriate port. 320 * to find the appropriate port.
341 */ 321 */
342 322
343void udp_err(struct sk_buff *skb, u32 info) 323void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
344{ 324{
345 struct inet_sock *inet; 325 struct inet_sock *inet;
346 struct iphdr *iph = (struct iphdr*)skb->data; 326 struct iphdr *iph = (struct iphdr*)skb->data;
@@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info)
351 int harderr; 331 int harderr;
352 int err; 332 int err;
353 333
354 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); 334 sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
335 skb->dev->ifindex, udptable );
355 if (sk == NULL) { 336 if (sk == NULL) {
356 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 337 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
357 return; /* No socket for error */ 338 return; /* No socket for error */
@@ -405,6 +386,11 @@ out:
405 sock_put(sk); 386 sock_put(sk);
406} 387}
407 388
389__inline__ void udp_err(struct sk_buff *skb, u32 info)
390{
391 return __udp4_lib_err(skb, info, udp_hash);
392}
393
408/* 394/*
409 * Throw away all pending data and cancel the corking. Socket is locked. 395 * Throw away all pending data and cancel the corking. Socket is locked.
410 */ 396 */
@@ -419,16 +405,58 @@ static void udp_flush_pending_frames(struct sock *sk)
419 } 405 }
420} 406}
421 407
408/**
409 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
410 * @sk: socket we are sending on
411 * @skb: sk_buff containing the filled-in UDP header
412 * (checksum field must be zeroed out)
413 */
414static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
415 __be32 src, __be32 dst, int len )
416{
417 unsigned int offset;
418 struct udphdr *uh = skb->h.uh;
419 __wsum csum = 0;
420
421 if (skb_queue_len(&sk->sk_write_queue) == 1) {
422 /*
423 * Only one fragment on the socket.
424 */
425 skb->csum_offset = offsetof(struct udphdr, check);
426 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
427 } else {
428 /*
429 * HW-checksum won't work as there are two or more
430 * fragments on the socket so that all csums of sk_buffs
431 * should be together
432 */
433 offset = skb->h.raw - skb->data;
434 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
435
436 skb->ip_summed = CHECKSUM_NONE;
437
438 skb_queue_walk(&sk->sk_write_queue, skb) {
439 csum = csum_add(csum, skb->csum);
440 }
441
442 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
443 if (uh->check == 0)
444 uh->check = CSUM_MANGLED_0;
445 }
446}
447
422/* 448/*
423 * Push out all pending data as one UDP datagram. Socket is locked. 449 * Push out all pending data as one UDP datagram. Socket is locked.
424 */ 450 */
425static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) 451static int udp_push_pending_frames(struct sock *sk)
426{ 452{
453 struct udp_sock *up = udp_sk(sk);
427 struct inet_sock *inet = inet_sk(sk); 454 struct inet_sock *inet = inet_sk(sk);
428 struct flowi *fl = &inet->cork.fl; 455 struct flowi *fl = &inet->cork.fl;
429 struct sk_buff *skb; 456 struct sk_buff *skb;
430 struct udphdr *uh; 457 struct udphdr *uh;
431 int err = 0; 458 int err = 0;
459 __wsum csum = 0;
432 460
433 /* Grab the skbuff where UDP header space exists. */ 461 /* Grab the skbuff where UDP header space exists. */
434 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 462 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
@@ -443,52 +471,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
443 uh->len = htons(up->len); 471 uh->len = htons(up->len);
444 uh->check = 0; 472 uh->check = 0;
445 473
446 if (sk->sk_no_check == UDP_CSUM_NOXMIT) { 474 if (up->pcflag) /* UDP-Lite */
475 csum = udplite_csum_outgoing(sk, skb);
476
477 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
478
447 skb->ip_summed = CHECKSUM_NONE; 479 skb->ip_summed = CHECKSUM_NONE;
448 goto send; 480 goto send;
449 }
450 481
451 if (skb_queue_len(&sk->sk_write_queue) == 1) { 482 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
452 /*
453 * Only one fragment on the socket.
454 */
455 if (skb->ip_summed == CHECKSUM_PARTIAL) {
456 skb->csum = offsetof(struct udphdr, check);
457 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
458 up->len, IPPROTO_UDP, 0);
459 } else {
460 skb->csum = csum_partial((char *)uh,
461 sizeof(struct udphdr), skb->csum);
462 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
463 up->len, IPPROTO_UDP, skb->csum);
464 if (uh->check == 0)
465 uh->check = -1;
466 }
467 } else {
468 unsigned int csum = 0;
469 /*
470 * HW-checksum won't work as there are two or more
471 * fragments on the socket so that all csums of sk_buffs
472 * should be together.
473 */
474 if (skb->ip_summed == CHECKSUM_PARTIAL) {
475 int offset = (unsigned char *)uh - skb->data;
476 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
477 483
478 skb->ip_summed = CHECKSUM_NONE; 484 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
479 } else { 485 goto send;
480 skb->csum = csum_partial((char *)uh, 486
481 sizeof(struct udphdr), skb->csum); 487 } else /* `normal' UDP */
482 } 488 csum = udp_csum_outgoing(sk, skb);
489
490 /* add protocol-dependent pseudo-header */
491 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
492 sk->sk_protocol, csum );
493 if (uh->check == 0)
494 uh->check = CSUM_MANGLED_0;
483 495
484 skb_queue_walk(&sk->sk_write_queue, skb) {
485 csum = csum_add(csum, skb->csum);
486 }
487 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
488 up->len, IPPROTO_UDP, csum);
489 if (uh->check == 0)
490 uh->check = -1;
491 }
492send: 496send:
493 err = ip_push_pending_frames(sk); 497 err = ip_push_pending_frames(sk);
494out: 498out:
@@ -497,12 +501,6 @@ out:
497 return err; 501 return err;
498} 502}
499 503
500
501static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base)
502{
503 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
504}
505
506int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 504int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
507 size_t len) 505 size_t len)
508{ 506{
@@ -516,8 +514,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
516 __be32 daddr, faddr, saddr; 514 __be32 daddr, faddr, saddr;
517 __be16 dport; 515 __be16 dport;
518 u8 tos; 516 u8 tos;
519 int err; 517 int err, is_udplite = up->pcflag;
520 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 518 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
519 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
521 520
522 if (len > 0xFFFF) 521 if (len > 0xFFFF)
523 return -EMSGSIZE; 522 return -EMSGSIZE;
@@ -622,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
622 { .daddr = faddr, 621 { .daddr = faddr,
623 .saddr = saddr, 622 .saddr = saddr,
624 .tos = tos } }, 623 .tos = tos } },
625 .proto = IPPROTO_UDP, 624 .proto = sk->sk_protocol,
626 .uli_u = { .ports = 625 .uli_u = { .ports =
627 { .sport = inet->sport, 626 { .sport = inet->sport,
628 .dport = dport } } }; 627 .dport = dport } } };
@@ -668,13 +667,14 @@ back_from_confirm:
668 667
669do_append_data: 668do_append_data:
670 up->len += ulen; 669 up->len += ulen;
671 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 670 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
672 sizeof(struct udphdr), &ipc, rt, 671 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
672 sizeof(struct udphdr), &ipc, rt,
673 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 673 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
674 if (err) 674 if (err)
675 udp_flush_pending_frames(sk); 675 udp_flush_pending_frames(sk);
676 else if (!corkreq) 676 else if (!corkreq)
677 err = udp_push_pending_frames(sk, up); 677 err = udp_push_pending_frames(sk);
678 else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 678 else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
679 up->pending = 0; 679 up->pending = 0;
680 release_sock(sk); 680 release_sock(sk);
@@ -684,7 +684,7 @@ out:
684 if (free) 684 if (free)
685 kfree(ipc.opt); 685 kfree(ipc.opt);
686 if (!err) { 686 if (!err) {
687 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); 687 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
688 return len; 688 return len;
689 } 689 }
690 /* 690 /*
@@ -695,7 +695,7 @@ out:
695 * seems like overkill. 695 * seems like overkill.
696 */ 696 */
697 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 697 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
698 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); 698 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
699 } 699 }
700 return err; 700 return err;
701 701
@@ -707,8 +707,8 @@ do_confirm:
707 goto out; 707 goto out;
708} 708}
709 709
710static int udp_sendpage(struct sock *sk, struct page *page, int offset, 710int udp_sendpage(struct sock *sk, struct page *page, int offset,
711 size_t size, int flags) 711 size_t size, int flags)
712{ 712{
713 struct udp_sock *up = udp_sk(sk); 713 struct udp_sock *up = udp_sk(sk);
714 int ret; 714 int ret;
@@ -747,7 +747,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset,
747 747
748 up->len += size; 748 up->len += size;
749 if (!(up->corkflag || (flags&MSG_MORE))) 749 if (!(up->corkflag || (flags&MSG_MORE)))
750 ret = udp_push_pending_frames(sk, up); 750 ret = udp_push_pending_frames(sk);
751 if (!ret) 751 if (!ret)
752 ret = size; 752 ret = size;
753out: 753out:
@@ -795,29 +795,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
795 return(0); 795 return(0);
796} 796}
797 797
798static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
799{
800 return __skb_checksum_complete(skb);
801}
802
803static __inline__ int udp_checksum_complete(struct sk_buff *skb)
804{
805 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
806 __udp_checksum_complete(skb);
807}
808
809/* 798/*
810 * This should be easy, if there is something there we 799 * This should be easy, if there is something there we
811 * return it, otherwise we block. 800 * return it, otherwise we block.
812 */ 801 */
813 802
814static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 803int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
815 size_t len, int noblock, int flags, int *addr_len) 804 size_t len, int noblock, int flags, int *addr_len)
816{ 805{
817 struct inet_sock *inet = inet_sk(sk); 806 struct inet_sock *inet = inet_sk(sk);
818 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 807 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
819 struct sk_buff *skb; 808 struct sk_buff *skb;
820 int copied, err; 809 int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
821 810
822 /* 811 /*
823 * Check any passed addresses 812 * Check any passed addresses
@@ -839,15 +828,25 @@ try_again:
839 msg->msg_flags |= MSG_TRUNC; 828 msg->msg_flags |= MSG_TRUNC;
840 } 829 }
841 830
842 if (skb->ip_summed==CHECKSUM_UNNECESSARY) { 831 /*
843 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 832 * Decide whether to checksum and/or copy data.
844 copied); 833 *
845 } else if (msg->msg_flags&MSG_TRUNC) { 834 * UDP: checksum may have been computed in HW,
846 if (__udp_checksum_complete(skb)) 835 * (re-)compute it if message is truncated.
836 * UDP-Lite: always needs to checksum, no HW support.
837 */
838 copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
839
840 if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
841 if (__udp_lib_checksum_complete(skb))
847 goto csum_copy_err; 842 goto csum_copy_err;
848 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 843 copy_only = 1;
849 copied); 844 }
850 } else { 845
846 if (copy_only)
847 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
848 msg->msg_iov, copied );
849 else {
851 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 850 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
852 851
853 if (err == -EINVAL) 852 if (err == -EINVAL)
@@ -880,7 +879,7 @@ out:
880 return err; 879 return err;
881 880
882csum_copy_err: 881csum_copy_err:
883 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 882 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
884 883
885 skb_kill_datagram(sk, skb, flags); 884 skb_kill_datagram(sk, skb, flags);
886 885
@@ -912,11 +911,6 @@ int udp_disconnect(struct sock *sk, int flags)
912 return 0; 911 return 0;
913} 912}
914 913
915static void udp_close(struct sock *sk, long timeout)
916{
917 sk_common_release(sk);
918}
919
920/* return: 914/* return:
921 * 1 if the the UDP system should process it 915 * 1 if the the UDP system should process it
922 * 0 if we should drop this packet 916 * 0 if we should drop this packet
@@ -928,23 +922,32 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
928 return 1; 922 return 1;
929#else 923#else
930 struct udp_sock *up = udp_sk(sk); 924 struct udp_sock *up = udp_sk(sk);
931 struct udphdr *uh = skb->h.uh; 925 struct udphdr *uh;
932 struct iphdr *iph; 926 struct iphdr *iph;
933 int iphlen, len; 927 int iphlen, len;
934 928
935 __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); 929 __u8 *udpdata;
936 __be32 *udpdata32 = (__be32 *)udpdata; 930 __be32 *udpdata32;
937 __u16 encap_type = up->encap_type; 931 __u16 encap_type = up->encap_type;
938 932
939 /* if we're overly short, let UDP handle it */ 933 /* if we're overly short, let UDP handle it */
940 if (udpdata > skb->tail) 934 len = skb->len - sizeof(struct udphdr);
935 if (len <= 0)
941 return 1; 936 return 1;
942 937
943 /* if this is not encapsulated socket, then just return now */ 938 /* if this is not encapsulated socket, then just return now */
944 if (!encap_type) 939 if (!encap_type)
945 return 1; 940 return 1;
946 941
947 len = skb->tail - udpdata; 942 /* If this is a paged skb, make sure we pull up
943 * whatever data we need to look at. */
944 if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
945 return 1;
946
947 /* Now we can get the pointers */
948 uh = skb->h.uh;
949 udpdata = (__u8 *)uh + sizeof(struct udphdr);
950 udpdata32 = (__be32 *)udpdata;
948 951
949 switch (encap_type) { 952 switch (encap_type) {
950 default: 953 default:
@@ -1013,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
1013 * Note that in the success and error cases, the skb is assumed to 1016 * Note that in the success and error cases, the skb is assumed to
1014 * have either been requeued or freed. 1017 * have either been requeued or freed.
1015 */ 1018 */
1016static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1019int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1017{ 1020{
1018 struct udp_sock *up = udp_sk(sk); 1021 struct udp_sock *up = udp_sk(sk);
1019 int rc; 1022 int rc;
@@ -1021,10 +1024,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1021 /* 1024 /*
1022 * Charge it to the socket, dropping if the queue is full. 1025 * Charge it to the socket, dropping if the queue is full.
1023 */ 1026 */
1024 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 1027 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1025 kfree_skb(skb); 1028 goto drop;
1026 return -1;
1027 }
1028 nf_reset(skb); 1029 nf_reset(skb);
1029 1030
1030 if (up->encap_type) { 1031 if (up->encap_type) {
@@ -1048,31 +1049,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1048 if (ret < 0) { 1049 if (ret < 0) {
1049 /* process the ESP packet */ 1050 /* process the ESP packet */
1050 ret = xfrm4_rcv_encap(skb, up->encap_type); 1051 ret = xfrm4_rcv_encap(skb, up->encap_type);
1051 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); 1052 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1052 return -ret; 1053 return -ret;
1053 } 1054 }
1054 /* FALLTHROUGH -- it's a UDP Packet */ 1055 /* FALLTHROUGH -- it's a UDP Packet */
1055 } 1056 }
1056 1057
1057 if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { 1058 /*
1058 if (__udp_checksum_complete(skb)) { 1059 * UDP-Lite specific tests, ignored on UDP sockets
1059 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1060 */
1060 kfree_skb(skb); 1061 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1061 return -1; 1062
1063 /*
1064 * MIB statistics other than incrementing the error count are
1065 * disabled for the following two types of errors: these depend
1066 * on the application settings, not on the functioning of the
1067 * protocol stack as such.
1068 *
1069 * RFC 3828 here recommends (sec 3.3): "There should also be a
1070 * way ... to ... at least let the receiving application block
1071 * delivery of packets with coverage values less than a value
1072 * provided by the application."
1073 */
1074 if (up->pcrlen == 0) { /* full coverage was set */
1075 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1076 "%d while full coverage %d requested\n",
1077 UDP_SKB_CB(skb)->cscov, skb->len);
1078 goto drop;
1062 } 1079 }
1080 /* The next case involves violating the min. coverage requested
1081 * by the receiver. This is subtle: if receiver wants x and x is
1082 * greater than the buffersize/MTU then receiver will complain
1083 * that it wants x while sender emits packets of smaller size y.
1084 * Therefore the above ...()->partial_cov statement is essential.
1085 */
1086 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1087 LIMIT_NETDEBUG(KERN_WARNING
1088 "UDPLITE: coverage %d too small, need min %d\n",
1089 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1090 goto drop;
1091 }
1092 }
1093
1094 if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
1095 if (__udp_lib_checksum_complete(skb))
1096 goto drop;
1063 skb->ip_summed = CHECKSUM_UNNECESSARY; 1097 skb->ip_summed = CHECKSUM_UNNECESSARY;
1064 } 1098 }
1065 1099
1066 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1100 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1067 /* Note that an ENOMEM error is charged twice */ 1101 /* Note that an ENOMEM error is charged twice */
1068 if (rc == -ENOMEM) 1102 if (rc == -ENOMEM)
1069 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); 1103 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
1070 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1104 goto drop;
1071 kfree_skb(skb);
1072 return -1;
1073 } 1105 }
1074 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); 1106
1107 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1075 return 0; 1108 return 0;
1109
1110drop:
1111 UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
1112 kfree_skb(skb);
1113 return -1;
1076} 1114}
1077 1115
1078/* 1116/*
@@ -1081,14 +1119,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1081 * Note: called only from the BH handler context, 1119 * Note: called only from the BH handler context,
1082 * so we don't need to lock the hashes. 1120 * so we don't need to lock the hashes.
1083 */ 1121 */
1084static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, 1122static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1085 __be32 saddr, __be32 daddr) 1123 struct udphdr *uh,
1124 __be32 saddr, __be32 daddr,
1125 struct hlist_head udptable[])
1086{ 1126{
1087 struct sock *sk; 1127 struct sock *sk;
1088 int dif; 1128 int dif;
1089 1129
1090 read_lock(&udp_hash_lock); 1130 read_lock(&udp_hash_lock);
1091 sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); 1131 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1092 dif = skb->dev->ifindex; 1132 dif = skb->dev->ifindex;
1093 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 1133 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1094 if (sk) { 1134 if (sk) {
@@ -1122,65 +1162,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1122 * Otherwise, csum completion requires chacksumming packet body, 1162 * Otherwise, csum completion requires chacksumming packet body,
1123 * including udp header and folding it to skb->csum. 1163 * including udp header and folding it to skb->csum.
1124 */ 1164 */
1125static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, 1165static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
1126 unsigned short ulen, __be32 saddr, __be32 daddr)
1127{ 1166{
1128 if (uh->check == 0) { 1167 if (uh->check == 0) {
1129 skb->ip_summed = CHECKSUM_UNNECESSARY; 1168 skb->ip_summed = CHECKSUM_UNNECESSARY;
1130 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1169 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1131 if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) 1170 if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
1171 skb->len, IPPROTO_UDP, skb->csum ))
1132 skb->ip_summed = CHECKSUM_UNNECESSARY; 1172 skb->ip_summed = CHECKSUM_UNNECESSARY;
1133 } 1173 }
1134 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 1174 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
1135 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 1175 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
1176 skb->nh.iph->daddr,
1177 skb->len, IPPROTO_UDP, 0);
1136 /* Probably, we should checksum udp header (it should be in cache 1178 /* Probably, we should checksum udp header (it should be in cache
1137 * in any case) and data in tiny packets (< rx copybreak). 1179 * in any case) and data in tiny packets (< rx copybreak).
1138 */ 1180 */
1181
1182 /* UDP = UDP-Lite with a non-partial checksum coverage */
1183 UDP_SKB_CB(skb)->partial_cov = 0;
1139} 1184}
1140 1185
1141/* 1186/*
1142 * All we need to do is get the socket, and then do a checksum. 1187 * All we need to do is get the socket, and then do a checksum.
1143 */ 1188 */
1144 1189
1145int udp_rcv(struct sk_buff *skb) 1190int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1191 int is_udplite)
1146{ 1192{
1147 struct sock *sk; 1193 struct sock *sk;
1148 struct udphdr *uh; 1194 struct udphdr *uh = skb->h.uh;
1149 unsigned short ulen; 1195 unsigned short ulen;
1150 struct rtable *rt = (struct rtable*)skb->dst; 1196 struct rtable *rt = (struct rtable*)skb->dst;
1151 __be32 saddr = skb->nh.iph->saddr; 1197 __be32 saddr = skb->nh.iph->saddr;
1152 __be32 daddr = skb->nh.iph->daddr; 1198 __be32 daddr = skb->nh.iph->daddr;
1153 int len = skb->len;
1154 1199
1155 /* 1200 /*
1156 * Validate the packet and the UDP length. 1201 * Validate the packet.
1157 */ 1202 */
1158 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1203 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1159 goto no_header; 1204 goto drop; /* No space for header. */
1160
1161 uh = skb->h.uh;
1162 1205
1163 ulen = ntohs(uh->len); 1206 ulen = ntohs(uh->len);
1164 1207 if (ulen > skb->len)
1165 if (ulen > len || ulen < sizeof(*uh))
1166 goto short_packet; 1208 goto short_packet;
1167 1209
1168 if (pskb_trim_rcsum(skb, ulen)) 1210 if(! is_udplite ) { /* UDP validates ulen. */
1169 goto short_packet; 1211
1212 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1213 goto short_packet;
1170 1214
1171 udp_checksum_init(skb, uh, ulen, saddr, daddr); 1215 udp4_csum_init(skb, uh);
1216
1217 } else { /* UDP-Lite validates cscov. */
1218 if (udplite4_csum_init(skb, uh))
1219 goto csum_error;
1220 }
1172 1221
1173 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1222 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1174 return udp_v4_mcast_deliver(skb, uh, saddr, daddr); 1223 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1175 1224
1176 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); 1225 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
1226 skb->dev->ifindex, udptable );
1177 1227
1178 if (sk != NULL) { 1228 if (sk != NULL) {
1179 int ret = udp_queue_rcv_skb(sk, skb); 1229 int ret = udp_queue_rcv_skb(sk, skb);
1180 sock_put(sk); 1230 sock_put(sk);
1181 1231
1182 /* a return value > 0 means to resubmit the input, but 1232 /* a return value > 0 means to resubmit the input, but
1183 * it it wants the return to be -protocol, or 0 1233 * it wants the return to be -protocol, or 0
1184 */ 1234 */
1185 if (ret > 0) 1235 if (ret > 0)
1186 return -ret; 1236 return -ret;
@@ -1192,10 +1242,10 @@ int udp_rcv(struct sk_buff *skb)
1192 nf_reset(skb); 1242 nf_reset(skb);
1193 1243
1194 /* No socket. Drop packet silently, if checksum is wrong */ 1244 /* No socket. Drop packet silently, if checksum is wrong */
1195 if (udp_checksum_complete(skb)) 1245 if (udp_lib_checksum_complete(skb))
1196 goto csum_error; 1246 goto csum_error;
1197 1247
1198 UDP_INC_STATS_BH(UDP_MIB_NOPORTS); 1248 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
1199 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1249 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1200 1250
1201 /* 1251 /*
@@ -1206,36 +1256,40 @@ int udp_rcv(struct sk_buff *skb)
1206 return(0); 1256 return(0);
1207 1257
1208short_packet: 1258short_packet:
1209 LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", 1259 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1260 is_udplite? "-Lite" : "",
1210 NIPQUAD(saddr), 1261 NIPQUAD(saddr),
1211 ntohs(uh->source), 1262 ntohs(uh->source),
1212 ulen, 1263 ulen,
1213 len, 1264 skb->len,
1214 NIPQUAD(daddr), 1265 NIPQUAD(daddr),
1215 ntohs(uh->dest)); 1266 ntohs(uh->dest));
1216no_header: 1267 goto drop;
1217 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
1218 kfree_skb(skb);
1219 return(0);
1220 1268
1221csum_error: 1269csum_error:
1222 /* 1270 /*
1223 * RFC1122: OK. Discards the bad packet silently (as far as 1271 * RFC1122: OK. Discards the bad packet silently (as far as
1224 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1272 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1225 */ 1273 */
1226 LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", 1274 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1275 is_udplite? "-Lite" : "",
1227 NIPQUAD(saddr), 1276 NIPQUAD(saddr),
1228 ntohs(uh->source), 1277 ntohs(uh->source),
1229 NIPQUAD(daddr), 1278 NIPQUAD(daddr),
1230 ntohs(uh->dest), 1279 ntohs(uh->dest),
1231 ulen); 1280 ulen);
1232drop: 1281drop:
1233 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1282 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1234 kfree_skb(skb); 1283 kfree_skb(skb);
1235 return(0); 1284 return(0);
1236} 1285}
1237 1286
1238static int udp_destroy_sock(struct sock *sk) 1287__inline__ int udp_rcv(struct sk_buff *skb)
1288{
1289 return __udp4_lib_rcv(skb, udp_hash, 0);
1290}
1291
1292int udp_destroy_sock(struct sock *sk)
1239{ 1293{
1240 lock_sock(sk); 1294 lock_sock(sk);
1241 udp_flush_pending_frames(sk); 1295 udp_flush_pending_frames(sk);
@@ -1246,8 +1300,9 @@ static int udp_destroy_sock(struct sock *sk)
1246/* 1300/*
1247 * Socket option code for UDP 1301 * Socket option code for UDP
1248 */ 1302 */
1249static int do_udp_setsockopt(struct sock *sk, int level, int optname, 1303int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1250 char __user *optval, int optlen) 1304 char __user *optval, int optlen,
1305 int (*push_pending_frames)(struct sock *))
1251{ 1306{
1252 struct udp_sock *up = udp_sk(sk); 1307 struct udp_sock *up = udp_sk(sk);
1253 int val; 1308 int val;
@@ -1266,7 +1321,7 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
1266 } else { 1321 } else {
1267 up->corkflag = 0; 1322 up->corkflag = 0;
1268 lock_sock(sk); 1323 lock_sock(sk);
1269 udp_push_pending_frames(sk, up); 1324 (*push_pending_frames)(sk);
1270 release_sock(sk); 1325 release_sock(sk);
1271 } 1326 }
1272 break; 1327 break;
@@ -1284,6 +1339,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
1284 } 1339 }
1285 break; 1340 break;
1286 1341
1342 /*
1343 * UDP-Lite's partial checksum coverage (RFC 3828).
1344 */
1345 /* The sender sets actual checksum coverage length via this option.
1346 * The case coverage > packet length is handled by send module. */
1347 case UDPLITE_SEND_CSCOV:
1348 if (!up->pcflag) /* Disable the option on UDP sockets */
1349 return -ENOPROTOOPT;
1350 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1351 val = 8;
1352 up->pcslen = val;
1353 up->pcflag |= UDPLITE_SEND_CC;
1354 break;
1355
1356 /* The receiver specifies a minimum checksum coverage value. To make
1357 * sense, this should be set to at least 8 (as done below). If zero is
1358 * used, this again means full checksum coverage. */
1359 case UDPLITE_RECV_CSCOV:
1360 if (!up->pcflag) /* Disable the option on UDP sockets */
1361 return -ENOPROTOOPT;
1362 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1363 val = 8;
1364 up->pcrlen = val;
1365 up->pcflag |= UDPLITE_RECV_CC;
1366 break;
1367
1287 default: 1368 default:
1288 err = -ENOPROTOOPT; 1369 err = -ENOPROTOOPT;
1289 break; 1370 break;
@@ -1292,26 +1373,28 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
1292 return err; 1373 return err;
1293} 1374}
1294 1375
1295static int udp_setsockopt(struct sock *sk, int level, int optname, 1376int udp_setsockopt(struct sock *sk, int level, int optname,
1296 char __user *optval, int optlen) 1377 char __user *optval, int optlen)
1297{ 1378{
1298 if (level != SOL_UDP) 1379 if (level == SOL_UDP || level == SOL_UDPLITE)
1299 return ip_setsockopt(sk, level, optname, optval, optlen); 1380 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1300 return do_udp_setsockopt(sk, level, optname, optval, optlen); 1381 udp_push_pending_frames);
1382 return ip_setsockopt(sk, level, optname, optval, optlen);
1301} 1383}
1302 1384
1303#ifdef CONFIG_COMPAT 1385#ifdef CONFIG_COMPAT
1304static int compat_udp_setsockopt(struct sock *sk, int level, int optname, 1386int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1305 char __user *optval, int optlen) 1387 char __user *optval, int optlen)
1306{ 1388{
1307 if (level != SOL_UDP) 1389 if (level == SOL_UDP || level == SOL_UDPLITE)
1308 return compat_ip_setsockopt(sk, level, optname, optval, optlen); 1390 return udp_lib_setsockopt(sk, level, optname, optval, optlen,
1309 return do_udp_setsockopt(sk, level, optname, optval, optlen); 1391 udp_push_pending_frames);
1392 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1310} 1393}
1311#endif 1394#endif
1312 1395
1313static int do_udp_getsockopt(struct sock *sk, int level, int optname, 1396int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1314 char __user *optval, int __user *optlen) 1397 char __user *optval, int __user *optlen)
1315{ 1398{
1316 struct udp_sock *up = udp_sk(sk); 1399 struct udp_sock *up = udp_sk(sk);
1317 int val, len; 1400 int val, len;
@@ -1333,6 +1416,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
1333 val = up->encap_type; 1416 val = up->encap_type;
1334 break; 1417 break;
1335 1418
1419 /* The following two cannot be changed on UDP sockets, the return is
1420 * always 0 (which corresponds to the full checksum coverage of UDP). */
1421 case UDPLITE_SEND_CSCOV:
1422 val = up->pcslen;
1423 break;
1424
1425 case UDPLITE_RECV_CSCOV:
1426 val = up->pcrlen;
1427 break;
1428
1336 default: 1429 default:
1337 return -ENOPROTOOPT; 1430 return -ENOPROTOOPT;
1338 }; 1431 };
@@ -1344,21 +1437,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
1344 return 0; 1437 return 0;
1345} 1438}
1346 1439
1347static int udp_getsockopt(struct sock *sk, int level, int optname, 1440int udp_getsockopt(struct sock *sk, int level, int optname,
1348 char __user *optval, int __user *optlen) 1441 char __user *optval, int __user *optlen)
1349{ 1442{
1350 if (level != SOL_UDP) 1443 if (level == SOL_UDP || level == SOL_UDPLITE)
1351 return ip_getsockopt(sk, level, optname, optval, optlen); 1444 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1352 return do_udp_getsockopt(sk, level, optname, optval, optlen); 1445 return ip_getsockopt(sk, level, optname, optval, optlen);
1353} 1446}
1354 1447
1355#ifdef CONFIG_COMPAT 1448#ifdef CONFIG_COMPAT
1356static int compat_udp_getsockopt(struct sock *sk, int level, int optname, 1449int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1357 char __user *optval, int __user *optlen) 1450 char __user *optval, int __user *optlen)
1358{ 1451{
1359 if (level != SOL_UDP) 1452 if (level == SOL_UDP || level == SOL_UDPLITE)
1360 return compat_ip_getsockopt(sk, level, optname, optval, optlen); 1453 return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1361 return do_udp_getsockopt(sk, level, optname, optval, optlen); 1454 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1362} 1455}
1363#endif 1456#endif
1364/** 1457/**
@@ -1378,7 +1471,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1378{ 1471{
1379 unsigned int mask = datagram_poll(file, sock, wait); 1472 unsigned int mask = datagram_poll(file, sock, wait);
1380 struct sock *sk = sock->sk; 1473 struct sock *sk = sock->sk;
1381 1474 int is_lite = IS_UDPLITE(sk);
1475
1382 /* Check for false positives due to checksum errors */ 1476 /* Check for false positives due to checksum errors */
1383 if ( (mask & POLLRDNORM) && 1477 if ( (mask & POLLRDNORM) &&
1384 !(file->f_flags & O_NONBLOCK) && 1478 !(file->f_flags & O_NONBLOCK) &&
@@ -1388,8 +1482,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1388 1482
1389 spin_lock_bh(&rcvq->lock); 1483 spin_lock_bh(&rcvq->lock);
1390 while ((skb = skb_peek(rcvq)) != NULL) { 1484 while ((skb = skb_peek(rcvq)) != NULL) {
1391 if (udp_checksum_complete(skb)) { 1485 if (udp_lib_checksum_complete(skb)) {
1392 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1486 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
1393 __skb_unlink(skb, rcvq); 1487 __skb_unlink(skb, rcvq);
1394 kfree_skb(skb); 1488 kfree_skb(skb);
1395 } else { 1489 } else {
@@ -1411,7 +1505,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1411struct proto udp_prot = { 1505struct proto udp_prot = {
1412 .name = "UDP", 1506 .name = "UDP",
1413 .owner = THIS_MODULE, 1507 .owner = THIS_MODULE,
1414 .close = udp_close, 1508 .close = udp_lib_close,
1415 .connect = ip4_datagram_connect, 1509 .connect = ip4_datagram_connect,
1416 .disconnect = udp_disconnect, 1510 .disconnect = udp_disconnect,
1417 .ioctl = udp_ioctl, 1511 .ioctl = udp_ioctl,
@@ -1422,8 +1516,8 @@ struct proto udp_prot = {
1422 .recvmsg = udp_recvmsg, 1516 .recvmsg = udp_recvmsg,
1423 .sendpage = udp_sendpage, 1517 .sendpage = udp_sendpage,
1424 .backlog_rcv = udp_queue_rcv_skb, 1518 .backlog_rcv = udp_queue_rcv_skb,
1425 .hash = udp_v4_hash, 1519 .hash = udp_lib_hash,
1426 .unhash = udp_v4_unhash, 1520 .unhash = udp_lib_unhash,
1427 .get_port = udp_v4_get_port, 1521 .get_port = udp_v4_get_port,
1428 .obj_size = sizeof(struct udp_sock), 1522 .obj_size = sizeof(struct udp_sock),
1429#ifdef CONFIG_COMPAT 1523#ifdef CONFIG_COMPAT
@@ -1442,7 +1536,7 @@ static struct sock *udp_get_first(struct seq_file *seq)
1442 1536
1443 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1537 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1444 struct hlist_node *node; 1538 struct hlist_node *node;
1445 sk_for_each(sk, node, &udp_hash[state->bucket]) { 1539 sk_for_each(sk, node, state->hashtable + state->bucket) {
1446 if (sk->sk_family == state->family) 1540 if (sk->sk_family == state->family)
1447 goto found; 1541 goto found;
1448 } 1542 }
@@ -1463,7 +1557,7 @@ try_again:
1463 } while (sk && sk->sk_family != state->family); 1557 } while (sk && sk->sk_family != state->family);
1464 1558
1465 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { 1559 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
1466 sk = sk_head(&udp_hash[state->bucket]); 1560 sk = sk_head(state->hashtable + state->bucket);
1467 goto try_again; 1561 goto try_again;
1468 } 1562 }
1469 return sk; 1563 return sk;
@@ -1513,6 +1607,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
1513 if (!s) 1607 if (!s)
1514 goto out; 1608 goto out;
1515 s->family = afinfo->family; 1609 s->family = afinfo->family;
1610 s->hashtable = afinfo->hashtable;
1516 s->seq_ops.start = udp_seq_start; 1611 s->seq_ops.start = udp_seq_start;
1517 s->seq_ops.next = udp_seq_next; 1612 s->seq_ops.next = udp_seq_next;
1518 s->seq_ops.show = afinfo->seq_show; 1613 s->seq_ops.show = afinfo->seq_show;
@@ -1579,7 +1674,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1579 atomic_read(&sp->sk_refcnt), sp); 1674 atomic_read(&sp->sk_refcnt), sp);
1580} 1675}
1581 1676
1582static int udp4_seq_show(struct seq_file *seq, void *v) 1677int udp4_seq_show(struct seq_file *seq, void *v)
1583{ 1678{
1584 if (v == SEQ_START_TOKEN) 1679 if (v == SEQ_START_TOKEN)
1585 seq_printf(seq, "%-127s\n", 1680 seq_printf(seq, "%-127s\n",
@@ -1602,6 +1697,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = {
1602 .owner = THIS_MODULE, 1697 .owner = THIS_MODULE,
1603 .name = "udp", 1698 .name = "udp",
1604 .family = AF_INET, 1699 .family = AF_INET,
1700 .hashtable = udp_hash,
1605 .seq_show = udp4_seq_show, 1701 .seq_show = udp4_seq_show,
1606 .seq_fops = &udp4_seq_fops, 1702 .seq_fops = &udp4_seq_fops,
1607}; 1703};
@@ -1624,6 +1720,8 @@ EXPORT_SYMBOL(udp_ioctl);
1624EXPORT_SYMBOL(udp_get_port); 1720EXPORT_SYMBOL(udp_get_port);
1625EXPORT_SYMBOL(udp_prot); 1721EXPORT_SYMBOL(udp_prot);
1626EXPORT_SYMBOL(udp_sendmsg); 1722EXPORT_SYMBOL(udp_sendmsg);
1723EXPORT_SYMBOL(udp_lib_getsockopt);
1724EXPORT_SYMBOL(udp_lib_setsockopt);
1627EXPORT_SYMBOL(udp_poll); 1725EXPORT_SYMBOL(udp_poll);
1628 1726
1629#ifdef CONFIG_PROC_FS 1727#ifdef CONFIG_PROC_FS