diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2006-11-27 14:10:57 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-12-03 00:22:46 -0500 |
commit | ba4e58eca8aa9473b44fdfd312f26c4a2e7798b3 (patch) | |
tree | 700f8f989f48da480beb83b983637cfd2b5a3f67 /net/ipv4/udp.c | |
parent | 6051e2f4fb68fc8e5343db58fa680ece376f405c (diff) |
[NET]: Supporting UDP-Lite (RFC 3828) in Linux
This is a revision of the previously submitted patch, which alters
the way files are organized and compiled in the following manner:
* UDP and UDP-Lite now use separate object files
* source file dependencies resolved via header files
net/ipv{4,6}/udp_impl.h
* order of inclusion files in udp.c/udplite.c adapted
accordingly
[NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828)
This patch adds support for UDP-Lite to the IPv4 stack, provided as an
extension to the existing UDPv4 code:
* generic routines are all located in net/ipv4/udp.c
* UDP-Lite specific routines are in net/ipv4/udplite.c
* MIB/statistics support in /proc/net/snmp and /proc/net/udplite
* shared API with extensions for partial checksum coverage
[NET/IPv6]: Extension for UDP-Lite over IPv6
It extends the existing UDPv6 code base with support for UDP-Lite
in the same manner as per UDPv4. In particular,
* UDPv6 generic and shared code is in net/ipv6/udp.c
* UDP-Litev6 specific extensions are in net/ipv6/udplite.c
* MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6
* support for IPV6_ADDRFORM
* aligned the coding style of protocol initialisation with af_inet6.c
* made the error handling in udpv6_queue_rcv_skb consistent;
to return `-1' on error on all error cases
* consolidation of shared code
[NET]: UDP-Lite Documentation and basic XFRM/Netfilter support
The UDP-Lite patch further provides
* API documentation for UDP-Lite
* basic xfrm support
* basic netfilter support for IPv4 and IPv6 (LOG target)
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 518 |
1 files changed, 300 insertions, 218 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9e1bd374875e..98ba75096175 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -92,22 +92,16 @@ | |||
92 | #include <linux/timer.h> | 92 | #include <linux/timer.h> |
93 | #include <linux/mm.h> | 93 | #include <linux/mm.h> |
94 | #include <linux/inet.h> | 94 | #include <linux/inet.h> |
95 | #include <linux/ipv6.h> | ||
96 | #include <linux/netdevice.h> | 95 | #include <linux/netdevice.h> |
97 | #include <net/snmp.h> | ||
98 | #include <net/ip.h> | ||
99 | #include <net/tcp_states.h> | 96 | #include <net/tcp_states.h> |
100 | #include <net/protocol.h> | ||
101 | #include <linux/skbuff.h> | 97 | #include <linux/skbuff.h> |
102 | #include <linux/proc_fs.h> | 98 | #include <linux/proc_fs.h> |
103 | #include <linux/seq_file.h> | 99 | #include <linux/seq_file.h> |
104 | #include <net/sock.h> | ||
105 | #include <net/udp.h> | ||
106 | #include <net/icmp.h> | 100 | #include <net/icmp.h> |
107 | #include <net/route.h> | 101 | #include <net/route.h> |
108 | #include <net/inet_common.h> | ||
109 | #include <net/checksum.h> | 102 | #include <net/checksum.h> |
110 | #include <net/xfrm.h> | 103 | #include <net/xfrm.h> |
104 | #include "udp_impl.h" | ||
111 | 105 | ||
112 | /* | 106 | /* |
113 | * Snmp MIB for the UDP layer | 107 | * Snmp MIB for the UDP layer |
@@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock); | |||
120 | 114 | ||
121 | static int udp_port_rover; | 115 | static int udp_port_rover; |
122 | 116 | ||
123 | static inline int udp_lport_inuse(u16 num) | 117 | static inline int __udp_lib_lport_inuse(__be16 num, struct hlist_head udptable[]) |
124 | { | 118 | { |
125 | struct sock *sk; | 119 | struct sock *sk; |
126 | struct hlist_node *node; | 120 | struct hlist_node *node; |
127 | 121 | ||
128 | sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) | 122 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) |
129 | if (inet_sk(sk)->num == num) | 123 | if (inet_sk(sk)->num == num) |
130 | return 1; | 124 | return 1; |
131 | return 0; | 125 | return 0; |
132 | } | 126 | } |
133 | 127 | ||
134 | /** | 128 | /** |
135 | * udp_get_port - common port lookup for IPv4 and IPv6 | 129 | * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
136 | * | 130 | * |
137 | * @sk: socket struct in question | 131 | * @sk: socket struct in question |
138 | * @snum: port number to look up | 132 | * @snum: port number to look up |
133 | * @udptable: hash list table, must be of UDP_HTABLE_SIZE | ||
134 | * @port_rover: pointer to record of last unallocated port | ||
139 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 135 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
140 | */ | 136 | */ |
141 | int udp_get_port(struct sock *sk, unsigned short snum, | 137 | int __udp_lib_get_port(struct sock *sk, unsigned short snum, |
142 | int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) | 138 | struct hlist_head udptable[], int *port_rover, |
139 | int (*saddr_comp)(const struct sock *sk1, | ||
140 | const struct sock *sk2 ) ) | ||
143 | { | 141 | { |
144 | struct hlist_node *node; | 142 | struct hlist_node *node; |
145 | struct hlist_head *head; | 143 | struct hlist_head *head; |
@@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
150 | if (snum == 0) { | 148 | if (snum == 0) { |
151 | int best_size_so_far, best, result, i; | 149 | int best_size_so_far, best, result, i; |
152 | 150 | ||
153 | if (udp_port_rover > sysctl_local_port_range[1] || | 151 | if (*port_rover > sysctl_local_port_range[1] || |
154 | udp_port_rover < sysctl_local_port_range[0]) | 152 | *port_rover < sysctl_local_port_range[0]) |
155 | udp_port_rover = sysctl_local_port_range[0]; | 153 | *port_rover = sysctl_local_port_range[0]; |
156 | best_size_so_far = 32767; | 154 | best_size_so_far = 32767; |
157 | best = result = udp_port_rover; | 155 | best = result = *port_rover; |
158 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | 156 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { |
159 | int size; | 157 | int size; |
160 | 158 | ||
161 | head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; | 159 | head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; |
162 | if (hlist_empty(head)) { | 160 | if (hlist_empty(head)) { |
163 | if (result > sysctl_local_port_range[1]) | 161 | if (result > sysctl_local_port_range[1]) |
164 | result = sysctl_local_port_range[0] + | 162 | result = sysctl_local_port_range[0] + |
@@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum, | |||
179 | result = sysctl_local_port_range[0] | 177 | result = sysctl_local_port_range[0] |
180 | + ((result - sysctl_local_port_range[0]) & | 178 | + ((result - sysctl_local_port_range[0]) & |
181 | (UDP_HTABLE_SIZE - 1)); | 179 | (UDP_HTABLE_SIZE - 1)); |
182 | if (!udp_lport_inuse(result)) | 180 | if (! __udp_lib_lport_inuse(result, udptable)) |
183 | break; | 181 | break; |
184 | } | 182 | } |
185 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) | 183 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) |
186 | goto fail; | 184 | goto fail; |
187 | gotit: | 185 | gotit: |
188 | udp_port_rover = snum = result; | 186 | *port_rover = snum = result; |
189 | } else { | 187 | } else { |
190 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; | 188 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
191 | 189 | ||
192 | sk_for_each(sk2, node, head) | 190 | sk_for_each(sk2, node, head) |
193 | if (inet_sk(sk2)->num == snum && | 191 | if (inet_sk(sk2)->num == snum && |
@@ -195,12 +193,12 @@ gotit: | |||
195 | (!sk2->sk_reuse || !sk->sk_reuse) && | 193 | (!sk2->sk_reuse || !sk->sk_reuse) && |
196 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 194 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
197 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 195 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
198 | (*saddr_cmp)(sk, sk2) ) | 196 | (*saddr_comp)(sk, sk2) ) |
199 | goto fail; | 197 | goto fail; |
200 | } | 198 | } |
201 | inet_sk(sk)->num = snum; | 199 | inet_sk(sk)->num = snum; |
202 | if (sk_unhashed(sk)) { | 200 | if (sk_unhashed(sk)) { |
203 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; | 201 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
204 | sk_add_node(sk, head); | 202 | sk_add_node(sk, head); |
205 | sock_prot_inc_use(sk->sk_prot); | 203 | sock_prot_inc_use(sk->sk_prot); |
206 | } | 204 | } |
@@ -210,7 +208,13 @@ fail: | |||
210 | return error; | 208 | return error; |
211 | } | 209 | } |
212 | 210 | ||
213 | static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 211 | __inline__ int udp_get_port(struct sock *sk, unsigned short snum, |
212 | int (*scmp)(const struct sock *, const struct sock *)) | ||
213 | { | ||
214 | return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); | ||
215 | } | ||
216 | |||
217 | inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
214 | { | 218 | { |
215 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 219 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
216 | 220 | ||
@@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
224 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | 228 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); |
225 | } | 229 | } |
226 | 230 | ||
227 | |||
228 | static void udp_v4_hash(struct sock *sk) | ||
229 | { | ||
230 | BUG(); | ||
231 | } | ||
232 | |||
233 | static void udp_v4_unhash(struct sock *sk) | ||
234 | { | ||
235 | write_lock_bh(&udp_hash_lock); | ||
236 | if (sk_del_node_init(sk)) { | ||
237 | inet_sk(sk)->num = 0; | ||
238 | sock_prot_dec_use(sk->sk_prot); | ||
239 | } | ||
240 | write_unlock_bh(&udp_hash_lock); | ||
241 | } | ||
242 | |||
243 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 231 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
244 | * harder than this. -DaveM | 232 | * harder than this. -DaveM |
245 | */ | 233 | */ |
246 | static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, | 234 | static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, |
247 | __be32 daddr, __be16 dport, int dif) | 235 | __be32 daddr, __be16 dport, |
236 | int dif, struct hlist_head udptable[]) | ||
248 | { | 237 | { |
249 | struct sock *sk, *result = NULL; | 238 | struct sock *sk, *result = NULL; |
250 | struct hlist_node *node; | 239 | struct hlist_node *node; |
251 | unsigned short hnum = ntohs(dport); | 240 | unsigned short hnum = ntohs(dport); |
252 | int badness = -1; | 241 | int badness = -1; |
253 | 242 | ||
254 | sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { | 243 | read_lock(&udp_hash_lock); |
244 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | ||
255 | struct inet_sock *inet = inet_sk(sk); | 245 | struct inet_sock *inet = inet_sk(sk); |
256 | 246 | ||
257 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | 247 | if (inet->num == hnum && !ipv6_only_sock(sk)) { |
@@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, | |||
285 | } | 275 | } |
286 | } | 276 | } |
287 | } | 277 | } |
288 | return result; | 278 | if (result) |
289 | } | 279 | sock_hold(result); |
290 | |||
291 | static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, | ||
292 | __be32 daddr, __be16 dport, int dif) | ||
293 | { | ||
294 | struct sock *sk; | ||
295 | |||
296 | read_lock(&udp_hash_lock); | ||
297 | sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); | ||
298 | if (sk) | ||
299 | sock_hold(sk); | ||
300 | read_unlock(&udp_hash_lock); | 280 | read_unlock(&udp_hash_lock); |
301 | return sk; | 281 | return result; |
302 | } | 282 | } |
303 | 283 | ||
304 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, | 284 | static inline struct sock *udp_v4_mcast_next(struct sock *sk, |
@@ -340,7 +320,7 @@ found: | |||
340 | * to find the appropriate port. | 320 | * to find the appropriate port. |
341 | */ | 321 | */ |
342 | 322 | ||
343 | void udp_err(struct sk_buff *skb, u32 info) | 323 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) |
344 | { | 324 | { |
345 | struct inet_sock *inet; | 325 | struct inet_sock *inet; |
346 | struct iphdr *iph = (struct iphdr*)skb->data; | 326 | struct iphdr *iph = (struct iphdr*)skb->data; |
@@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info) | |||
351 | int harderr; | 331 | int harderr; |
352 | int err; | 332 | int err; |
353 | 333 | ||
354 | sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); | 334 | sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, |
335 | skb->dev->ifindex, udptable ); | ||
355 | if (sk == NULL) { | 336 | if (sk == NULL) { |
356 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 337 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
357 | return; /* No socket for error */ | 338 | return; /* No socket for error */ |
@@ -405,6 +386,11 @@ out: | |||
405 | sock_put(sk); | 386 | sock_put(sk); |
406 | } | 387 | } |
407 | 388 | ||
389 | __inline__ void udp_err(struct sk_buff *skb, u32 info) | ||
390 | { | ||
391 | return __udp4_lib_err(skb, info, udp_hash); | ||
392 | } | ||
393 | |||
408 | /* | 394 | /* |
409 | * Throw away all pending data and cancel the corking. Socket is locked. | 395 | * Throw away all pending data and cancel the corking. Socket is locked. |
410 | */ | 396 | */ |
@@ -419,16 +405,56 @@ static void udp_flush_pending_frames(struct sock *sk) | |||
419 | } | 405 | } |
420 | } | 406 | } |
421 | 407 | ||
408 | /** | ||
409 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | ||
410 | * @sk: socket we are sending on | ||
411 | * @skb: sk_buff containing the filled-in UDP header | ||
412 | * (checksum field must be zeroed out) | ||
413 | */ | ||
414 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | ||
415 | __be32 src, __be32 dst, int len ) | ||
416 | { | ||
417 | unsigned int csum = 0, offset; | ||
418 | struct udphdr *uh = skb->h.uh; | ||
419 | |||
420 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | ||
421 | /* | ||
422 | * Only one fragment on the socket. | ||
423 | */ | ||
424 | skb->csum = offsetof(struct udphdr, check); | ||
425 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | ||
426 | } else { | ||
427 | /* | ||
428 | * HW-checksum won't work as there are two or more | ||
429 | * fragments on the socket so that all csums of sk_buffs | ||
430 | * should be together | ||
431 | */ | ||
432 | offset = skb->h.raw - skb->data; | ||
433 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
434 | |||
435 | skb->ip_summed = CHECKSUM_NONE; | ||
436 | |||
437 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
438 | csum = csum_add(csum, skb->csum); | ||
439 | } | ||
440 | |||
441 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | ||
442 | if (uh->check == 0) | ||
443 | uh->check = -1; | ||
444 | } | ||
445 | } | ||
446 | |||
422 | /* | 447 | /* |
423 | * Push out all pending data as one UDP datagram. Socket is locked. | 448 | * Push out all pending data as one UDP datagram. Socket is locked. |
424 | */ | 449 | */ |
425 | static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | 450 | int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) |
426 | { | 451 | { |
427 | struct inet_sock *inet = inet_sk(sk); | 452 | struct inet_sock *inet = inet_sk(sk); |
428 | struct flowi *fl = &inet->cork.fl; | 453 | struct flowi *fl = &inet->cork.fl; |
429 | struct sk_buff *skb; | 454 | struct sk_buff *skb; |
430 | struct udphdr *uh; | 455 | struct udphdr *uh; |
431 | int err = 0; | 456 | int err = 0; |
457 | u32 csum = 0; | ||
432 | 458 | ||
433 | /* Grab the skbuff where UDP header space exists. */ | 459 | /* Grab the skbuff where UDP header space exists. */ |
434 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | 460 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) |
@@ -443,52 +469,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | |||
443 | uh->len = htons(up->len); | 469 | uh->len = htons(up->len); |
444 | uh->check = 0; | 470 | uh->check = 0; |
445 | 471 | ||
446 | if (sk->sk_no_check == UDP_CSUM_NOXMIT) { | 472 | if (up->pcflag) /* UDP-Lite */ |
473 | csum = udplite_csum_outgoing(sk, skb); | ||
474 | |||
475 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | ||
476 | |||
447 | skb->ip_summed = CHECKSUM_NONE; | 477 | skb->ip_summed = CHECKSUM_NONE; |
448 | goto send; | 478 | goto send; |
449 | } | ||
450 | 479 | ||
451 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 480 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
452 | /* | ||
453 | * Only one fragment on the socket. | ||
454 | */ | ||
455 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
456 | skb->csum = offsetof(struct udphdr, check); | ||
457 | uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
458 | up->len, IPPROTO_UDP, 0); | ||
459 | } else { | ||
460 | skb->csum = csum_partial((char *)uh, | ||
461 | sizeof(struct udphdr), skb->csum); | ||
462 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
463 | up->len, IPPROTO_UDP, skb->csum); | ||
464 | if (uh->check == 0) | ||
465 | uh->check = -1; | ||
466 | } | ||
467 | } else { | ||
468 | unsigned int csum = 0; | ||
469 | /* | ||
470 | * HW-checksum won't work as there are two or more | ||
471 | * fragments on the socket so that all csums of sk_buffs | ||
472 | * should be together. | ||
473 | */ | ||
474 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
475 | int offset = (unsigned char *)uh - skb->data; | ||
476 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
477 | 481 | ||
478 | skb->ip_summed = CHECKSUM_NONE; | 482 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); |
479 | } else { | 483 | goto send; |
480 | skb->csum = csum_partial((char *)uh, | 484 | |
481 | sizeof(struct udphdr), skb->csum); | 485 | } else /* `normal' UDP */ |
482 | } | 486 | csum = udp_csum_outgoing(sk, skb); |
487 | |||
488 | /* add protocol-dependent pseudo-header */ | ||
489 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | ||
490 | sk->sk_protocol, csum ); | ||
491 | if (uh->check == 0) | ||
492 | uh->check = -1; | ||
483 | 493 | ||
484 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
485 | csum = csum_add(csum, skb->csum); | ||
486 | } | ||
487 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | ||
488 | up->len, IPPROTO_UDP, csum); | ||
489 | if (uh->check == 0) | ||
490 | uh->check = -1; | ||
491 | } | ||
492 | send: | 494 | send: |
493 | err = ip_push_pending_frames(sk); | 495 | err = ip_push_pending_frames(sk); |
494 | out: | 496 | out: |
@@ -497,12 +499,6 @@ out: | |||
497 | return err; | 499 | return err; |
498 | } | 500 | } |
499 | 501 | ||
500 | |||
501 | static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) | ||
502 | { | ||
503 | return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); | ||
504 | } | ||
505 | |||
506 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 502 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
507 | size_t len) | 503 | size_t len) |
508 | { | 504 | { |
@@ -516,8 +512,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
516 | __be32 daddr, faddr, saddr; | 512 | __be32 daddr, faddr, saddr; |
517 | __be16 dport; | 513 | __be16 dport; |
518 | u8 tos; | 514 | u8 tos; |
519 | int err; | 515 | int err, is_udplite = up->pcflag; |
520 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 516 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
517 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | ||
521 | 518 | ||
522 | if (len > 0xFFFF) | 519 | if (len > 0xFFFF) |
523 | return -EMSGSIZE; | 520 | return -EMSGSIZE; |
@@ -622,7 +619,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
622 | { .daddr = faddr, | 619 | { .daddr = faddr, |
623 | .saddr = saddr, | 620 | .saddr = saddr, |
624 | .tos = tos } }, | 621 | .tos = tos } }, |
625 | .proto = IPPROTO_UDP, | 622 | .proto = sk->sk_protocol, |
626 | .uli_u = { .ports = | 623 | .uli_u = { .ports = |
627 | { .sport = inet->sport, | 624 | { .sport = inet->sport, |
628 | .dport = dport } } }; | 625 | .dport = dport } } }; |
@@ -668,8 +665,9 @@ back_from_confirm: | |||
668 | 665 | ||
669 | do_append_data: | 666 | do_append_data: |
670 | up->len += ulen; | 667 | up->len += ulen; |
671 | err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, | 668 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
672 | sizeof(struct udphdr), &ipc, rt, | 669 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
670 | sizeof(struct udphdr), &ipc, rt, | ||
673 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 671 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
674 | if (err) | 672 | if (err) |
675 | udp_flush_pending_frames(sk); | 673 | udp_flush_pending_frames(sk); |
@@ -684,7 +682,7 @@ out: | |||
684 | if (free) | 682 | if (free) |
685 | kfree(ipc.opt); | 683 | kfree(ipc.opt); |
686 | if (!err) { | 684 | if (!err) { |
687 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); | 685 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); |
688 | return len; | 686 | return len; |
689 | } | 687 | } |
690 | /* | 688 | /* |
@@ -695,7 +693,7 @@ out: | |||
695 | * seems like overkill. | 693 | * seems like overkill. |
696 | */ | 694 | */ |
697 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 695 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
698 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); | 696 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); |
699 | } | 697 | } |
700 | return err; | 698 | return err; |
701 | 699 | ||
@@ -707,8 +705,8 @@ do_confirm: | |||
707 | goto out; | 705 | goto out; |
708 | } | 706 | } |
709 | 707 | ||
710 | static int udp_sendpage(struct sock *sk, struct page *page, int offset, | 708 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
711 | size_t size, int flags) | 709 | size_t size, int flags) |
712 | { | 710 | { |
713 | struct udp_sock *up = udp_sk(sk); | 711 | struct udp_sock *up = udp_sk(sk); |
714 | int ret; | 712 | int ret; |
@@ -795,29 +793,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
795 | return(0); | 793 | return(0); |
796 | } | 794 | } |
797 | 795 | ||
798 | static __inline__ int __udp_checksum_complete(struct sk_buff *skb) | ||
799 | { | ||
800 | return __skb_checksum_complete(skb); | ||
801 | } | ||
802 | |||
803 | static __inline__ int udp_checksum_complete(struct sk_buff *skb) | ||
804 | { | ||
805 | return skb->ip_summed != CHECKSUM_UNNECESSARY && | ||
806 | __udp_checksum_complete(skb); | ||
807 | } | ||
808 | |||
809 | /* | 796 | /* |
810 | * This should be easy, if there is something there we | 797 | * This should be easy, if there is something there we |
811 | * return it, otherwise we block. | 798 | * return it, otherwise we block. |
812 | */ | 799 | */ |
813 | 800 | ||
814 | static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 801 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
815 | size_t len, int noblock, int flags, int *addr_len) | 802 | size_t len, int noblock, int flags, int *addr_len) |
816 | { | 803 | { |
817 | struct inet_sock *inet = inet_sk(sk); | 804 | struct inet_sock *inet = inet_sk(sk); |
818 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 805 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
819 | struct sk_buff *skb; | 806 | struct sk_buff *skb; |
820 | int copied, err; | 807 | int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); |
821 | 808 | ||
822 | /* | 809 | /* |
823 | * Check any passed addresses | 810 | * Check any passed addresses |
@@ -839,15 +826,25 @@ try_again: | |||
839 | msg->msg_flags |= MSG_TRUNC; | 826 | msg->msg_flags |= MSG_TRUNC; |
840 | } | 827 | } |
841 | 828 | ||
842 | if (skb->ip_summed==CHECKSUM_UNNECESSARY) { | 829 | /* |
843 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 830 | * Decide whether to checksum and/or copy data. |
844 | copied); | 831 | * |
845 | } else if (msg->msg_flags&MSG_TRUNC) { | 832 | * UDP: checksum may have been computed in HW, |
846 | if (__udp_checksum_complete(skb)) | 833 | * (re-)compute it if message is truncated. |
834 | * UDP-Lite: always needs to checksum, no HW support. | ||
835 | */ | ||
836 | copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY); | ||
837 | |||
838 | if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { | ||
839 | if (__udp_lib_checksum_complete(skb)) | ||
847 | goto csum_copy_err; | 840 | goto csum_copy_err; |
848 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, | 841 | copy_only = 1; |
849 | copied); | 842 | } |
850 | } else { | 843 | |
844 | if (copy_only) | ||
845 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | ||
846 | msg->msg_iov, copied ); | ||
847 | else { | ||
851 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | 848 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); |
852 | 849 | ||
853 | if (err == -EINVAL) | 850 | if (err == -EINVAL) |
@@ -880,7 +877,7 @@ out: | |||
880 | return err; | 877 | return err; |
881 | 878 | ||
882 | csum_copy_err: | 879 | csum_copy_err: |
883 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 880 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); |
884 | 881 | ||
885 | skb_kill_datagram(sk, skb, flags); | 882 | skb_kill_datagram(sk, skb, flags); |
886 | 883 | ||
@@ -912,11 +909,6 @@ int udp_disconnect(struct sock *sk, int flags) | |||
912 | return 0; | 909 | return 0; |
913 | } | 910 | } |
914 | 911 | ||
915 | static void udp_close(struct sock *sk, long timeout) | ||
916 | { | ||
917 | sk_common_release(sk); | ||
918 | } | ||
919 | |||
920 | /* return: | 912 | /* return: |
921 | * 1 if the the UDP system should process it | 913 | * 1 if the the UDP system should process it |
922 | * 0 if we should drop this packet | 914 | * 0 if we should drop this packet |
@@ -1022,7 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
1022 | * Note that in the success and error cases, the skb is assumed to | 1014 | * Note that in the success and error cases, the skb is assumed to |
1023 | * have either been requeued or freed. | 1015 | * have either been requeued or freed. |
1024 | */ | 1016 | */ |
1025 | static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 1017 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) |
1026 | { | 1018 | { |
1027 | struct udp_sock *up = udp_sk(sk); | 1019 | struct udp_sock *up = udp_sk(sk); |
1028 | int rc; | 1020 | int rc; |
@@ -1030,10 +1022,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1030 | /* | 1022 | /* |
1031 | * Charge it to the socket, dropping if the queue is full. | 1023 | * Charge it to the socket, dropping if the queue is full. |
1032 | */ | 1024 | */ |
1033 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | 1025 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1034 | kfree_skb(skb); | 1026 | goto drop; |
1035 | return -1; | ||
1036 | } | ||
1037 | nf_reset(skb); | 1027 | nf_reset(skb); |
1038 | 1028 | ||
1039 | if (up->encap_type) { | 1029 | if (up->encap_type) { |
@@ -1057,31 +1047,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1057 | if (ret < 0) { | 1047 | if (ret < 0) { |
1058 | /* process the ESP packet */ | 1048 | /* process the ESP packet */ |
1059 | ret = xfrm4_rcv_encap(skb, up->encap_type); | 1049 | ret = xfrm4_rcv_encap(skb, up->encap_type); |
1060 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); | 1050 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); |
1061 | return -ret; | 1051 | return -ret; |
1062 | } | 1052 | } |
1063 | /* FALLTHROUGH -- it's a UDP Packet */ | 1053 | /* FALLTHROUGH -- it's a UDP Packet */ |
1064 | } | 1054 | } |
1065 | 1055 | ||
1066 | if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { | 1056 | /* |
1067 | if (__udp_checksum_complete(skb)) { | 1057 | * UDP-Lite specific tests, ignored on UDP sockets |
1068 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1058 | */ |
1069 | kfree_skb(skb); | 1059 | if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { |
1070 | return -1; | 1060 | |
1061 | /* | ||
1062 | * MIB statistics other than incrementing the error count are | ||
1063 | * disabled for the following two types of errors: these depend | ||
1064 | * on the application settings, not on the functioning of the | ||
1065 | * protocol stack as such. | ||
1066 | * | ||
1067 | * RFC 3828 here recommends (sec 3.3): "There should also be a | ||
1068 | * way ... to ... at least let the receiving application block | ||
1069 | * delivery of packets with coverage values less than a value | ||
1070 | * provided by the application." | ||
1071 | */ | ||
1072 | if (up->pcrlen == 0) { /* full coverage was set */ | ||
1073 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | ||
1074 | "%d while full coverage %d requested\n", | ||
1075 | UDP_SKB_CB(skb)->cscov, skb->len); | ||
1076 | goto drop; | ||
1071 | } | 1077 | } |
1078 | /* The next case involves violating the min. coverage requested | ||
1079 | * by the receiver. This is subtle: if receiver wants x and x is | ||
1080 | * greater than the buffersize/MTU then receiver will complain | ||
1081 | * that it wants x while sender emits packets of smaller size y. | ||
1082 | * Therefore the above ...()->partial_cov statement is essential. | ||
1083 | */ | ||
1084 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | ||
1085 | LIMIT_NETDEBUG(KERN_WARNING | ||
1086 | "UDPLITE: coverage %d too small, need min %d\n", | ||
1087 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | ||
1088 | goto drop; | ||
1089 | } | ||
1090 | } | ||
1091 | |||
1092 | if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { | ||
1093 | if (__udp_lib_checksum_complete(skb)) | ||
1094 | goto drop; | ||
1072 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1095 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1073 | } | 1096 | } |
1074 | 1097 | ||
1075 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | 1098 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
1076 | /* Note that an ENOMEM error is charged twice */ | 1099 | /* Note that an ENOMEM error is charged twice */ |
1077 | if (rc == -ENOMEM) | 1100 | if (rc == -ENOMEM) |
1078 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); | 1101 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); |
1079 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1102 | goto drop; |
1080 | kfree_skb(skb); | ||
1081 | return -1; | ||
1082 | } | 1103 | } |
1083 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); | 1104 | |
1105 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); | ||
1084 | return 0; | 1106 | return 0; |
1107 | |||
1108 | drop: | ||
1109 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); | ||
1110 | kfree_skb(skb); | ||
1111 | return -1; | ||
1085 | } | 1112 | } |
1086 | 1113 | ||
1087 | /* | 1114 | /* |
@@ -1090,14 +1117,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1090 | * Note: called only from the BH handler context, | 1117 | * Note: called only from the BH handler context, |
1091 | * so we don't need to lock the hashes. | 1118 | * so we don't need to lock the hashes. |
1092 | */ | 1119 | */ |
1093 | static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, | 1120 | static int __udp4_lib_mcast_deliver(struct sk_buff *skb, |
1094 | __be32 saddr, __be32 daddr) | 1121 | struct udphdr *uh, |
1122 | __be32 saddr, __be32 daddr, | ||
1123 | struct hlist_head udptable[]) | ||
1095 | { | 1124 | { |
1096 | struct sock *sk; | 1125 | struct sock *sk; |
1097 | int dif; | 1126 | int dif; |
1098 | 1127 | ||
1099 | read_lock(&udp_hash_lock); | 1128 | read_lock(&udp_hash_lock); |
1100 | sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); | 1129 | sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); |
1101 | dif = skb->dev->ifindex; | 1130 | dif = skb->dev->ifindex; |
1102 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); | 1131 | sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); |
1103 | if (sk) { | 1132 | if (sk) { |
@@ -1131,65 +1160,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, | |||
1131 | * Otherwise, csum completion requires chacksumming packet body, | 1160 | * Otherwise, csum completion requires chacksumming packet body, |
1132 | * including udp header and folding it to skb->csum. | 1161 | * including udp header and folding it to skb->csum. |
1133 | */ | 1162 | */ |
1134 | static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | 1163 | static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) |
1135 | unsigned short ulen, __be32 saddr, __be32 daddr) | ||
1136 | { | 1164 | { |
1137 | if (uh->check == 0) { | 1165 | if (uh->check == 0) { |
1138 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1166 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1139 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1167 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1140 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) | 1168 | if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, |
1169 | skb->len, IPPROTO_UDP, skb->csum )) | ||
1141 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1170 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1142 | } | 1171 | } |
1143 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 1172 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
1144 | skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | 1173 | skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, |
1174 | skb->nh.iph->daddr, | ||
1175 | skb->len, IPPROTO_UDP, 0); | ||
1145 | /* Probably, we should checksum udp header (it should be in cache | 1176 | /* Probably, we should checksum udp header (it should be in cache |
1146 | * in any case) and data in tiny packets (< rx copybreak). | 1177 | * in any case) and data in tiny packets (< rx copybreak). |
1147 | */ | 1178 | */ |
1179 | |||
1180 | /* UDP = UDP-Lite with a non-partial checksum coverage */ | ||
1181 | UDP_SKB_CB(skb)->partial_cov = 0; | ||
1148 | } | 1182 | } |
1149 | 1183 | ||
1150 | /* | 1184 | /* |
1151 | * All we need to do is get the socket, and then do a checksum. | 1185 | * All we need to do is get the socket, and then do a checksum. |
1152 | */ | 1186 | */ |
1153 | 1187 | ||
1154 | int udp_rcv(struct sk_buff *skb) | 1188 | int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], |
1189 | int is_udplite) | ||
1155 | { | 1190 | { |
1156 | struct sock *sk; | 1191 | struct sock *sk; |
1157 | struct udphdr *uh; | 1192 | struct udphdr *uh = skb->h.uh; |
1158 | unsigned short ulen; | 1193 | unsigned short ulen; |
1159 | struct rtable *rt = (struct rtable*)skb->dst; | 1194 | struct rtable *rt = (struct rtable*)skb->dst; |
1160 | __be32 saddr = skb->nh.iph->saddr; | 1195 | __be32 saddr = skb->nh.iph->saddr; |
1161 | __be32 daddr = skb->nh.iph->daddr; | 1196 | __be32 daddr = skb->nh.iph->daddr; |
1162 | int len = skb->len; | ||
1163 | 1197 | ||
1164 | /* | 1198 | /* |
1165 | * Validate the packet and the UDP length. | 1199 | * Validate the packet. |
1166 | */ | 1200 | */ |
1167 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 1201 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
1168 | goto no_header; | 1202 | goto drop; /* No space for header. */ |
1169 | |||
1170 | uh = skb->h.uh; | ||
1171 | 1203 | ||
1172 | ulen = ntohs(uh->len); | 1204 | ulen = ntohs(uh->len); |
1173 | 1205 | if (ulen > skb->len) | |
1174 | if (ulen > len || ulen < sizeof(*uh)) | ||
1175 | goto short_packet; | 1206 | goto short_packet; |
1176 | 1207 | ||
1177 | if (pskb_trim_rcsum(skb, ulen)) | 1208 | if(! is_udplite ) { /* UDP validates ulen. */ |
1178 | goto short_packet; | 1209 | |
1210 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | ||
1211 | goto short_packet; | ||
1179 | 1212 | ||
1180 | udp_checksum_init(skb, uh, ulen, saddr, daddr); | 1213 | udp4_csum_init(skb, uh); |
1214 | |||
1215 | } else { /* UDP-Lite validates cscov. */ | ||
1216 | if (udplite4_csum_init(skb, uh)) | ||
1217 | goto csum_error; | ||
1218 | } | ||
1181 | 1219 | ||
1182 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1220 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1183 | return udp_v4_mcast_deliver(skb, uh, saddr, daddr); | 1221 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1184 | 1222 | ||
1185 | sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); | 1223 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, |
1224 | skb->dev->ifindex, udptable ); | ||
1186 | 1225 | ||
1187 | if (sk != NULL) { | 1226 | if (sk != NULL) { |
1188 | int ret = udp_queue_rcv_skb(sk, skb); | 1227 | int ret = udp_queue_rcv_skb(sk, skb); |
1189 | sock_put(sk); | 1228 | sock_put(sk); |
1190 | 1229 | ||
1191 | /* a return value > 0 means to resubmit the input, but | 1230 | /* a return value > 0 means to resubmit the input, but |
1192 | * it it wants the return to be -protocol, or 0 | 1231 | * it wants the return to be -protocol, or 0 |
1193 | */ | 1232 | */ |
1194 | if (ret > 0) | 1233 | if (ret > 0) |
1195 | return -ret; | 1234 | return -ret; |
@@ -1201,10 +1240,10 @@ int udp_rcv(struct sk_buff *skb) | |||
1201 | nf_reset(skb); | 1240 | nf_reset(skb); |
1202 | 1241 | ||
1203 | /* No socket. Drop packet silently, if checksum is wrong */ | 1242 | /* No socket. Drop packet silently, if checksum is wrong */ |
1204 | if (udp_checksum_complete(skb)) | 1243 | if (udp_lib_checksum_complete(skb)) |
1205 | goto csum_error; | 1244 | goto csum_error; |
1206 | 1245 | ||
1207 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS); | 1246 | UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); |
1208 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 1247 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
1209 | 1248 | ||
1210 | /* | 1249 | /* |
@@ -1215,36 +1254,40 @@ int udp_rcv(struct sk_buff *skb) | |||
1215 | return(0); | 1254 | return(0); |
1216 | 1255 | ||
1217 | short_packet: | 1256 | short_packet: |
1218 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | 1257 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", |
1258 | is_udplite? "-Lite" : "", | ||
1219 | NIPQUAD(saddr), | 1259 | NIPQUAD(saddr), |
1220 | ntohs(uh->source), | 1260 | ntohs(uh->source), |
1221 | ulen, | 1261 | ulen, |
1222 | len, | 1262 | skb->len, |
1223 | NIPQUAD(daddr), | 1263 | NIPQUAD(daddr), |
1224 | ntohs(uh->dest)); | 1264 | ntohs(uh->dest)); |
1225 | no_header: | 1265 | goto drop; |
1226 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | ||
1227 | kfree_skb(skb); | ||
1228 | return(0); | ||
1229 | 1266 | ||
1230 | csum_error: | 1267 | csum_error: |
1231 | /* | 1268 | /* |
1232 | * RFC1122: OK. Discards the bad packet silently (as far as | 1269 | * RFC1122: OK. Discards the bad packet silently (as far as |
1233 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1270 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1234 | */ | 1271 | */ |
1235 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | 1272 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", |
1273 | is_udplite? "-Lite" : "", | ||
1236 | NIPQUAD(saddr), | 1274 | NIPQUAD(saddr), |
1237 | ntohs(uh->source), | 1275 | ntohs(uh->source), |
1238 | NIPQUAD(daddr), | 1276 | NIPQUAD(daddr), |
1239 | ntohs(uh->dest), | 1277 | ntohs(uh->dest), |
1240 | ulen); | 1278 | ulen); |
1241 | drop: | 1279 | drop: |
1242 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1280 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); |
1243 | kfree_skb(skb); | 1281 | kfree_skb(skb); |
1244 | return(0); | 1282 | return(0); |
1245 | } | 1283 | } |
1246 | 1284 | ||
1247 | static int udp_destroy_sock(struct sock *sk) | 1285 | __inline__ int udp_rcv(struct sk_buff *skb) |
1286 | { | ||
1287 | return __udp4_lib_rcv(skb, udp_hash, 0); | ||
1288 | } | ||
1289 | |||
1290 | int udp_destroy_sock(struct sock *sk) | ||
1248 | { | 1291 | { |
1249 | lock_sock(sk); | 1292 | lock_sock(sk); |
1250 | udp_flush_pending_frames(sk); | 1293 | udp_flush_pending_frames(sk); |
@@ -1293,6 +1336,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, | |||
1293 | } | 1336 | } |
1294 | break; | 1337 | break; |
1295 | 1338 | ||
1339 | /* | ||
1340 | * UDP-Lite's partial checksum coverage (RFC 3828). | ||
1341 | */ | ||
1342 | /* The sender sets actual checksum coverage length via this option. | ||
1343 | * The case coverage > packet length is handled by send module. */ | ||
1344 | case UDPLITE_SEND_CSCOV: | ||
1345 | if (!up->pcflag) /* Disable the option on UDP sockets */ | ||
1346 | return -ENOPROTOOPT; | ||
1347 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ | ||
1348 | val = 8; | ||
1349 | up->pcslen = val; | ||
1350 | up->pcflag |= UDPLITE_SEND_CC; | ||
1351 | break; | ||
1352 | |||
1353 | /* The receiver specifies a minimum checksum coverage value. To make | ||
1354 | * sense, this should be set to at least 8 (as done below). If zero is | ||
1355 | * used, this again means full checksum coverage. */ | ||
1356 | case UDPLITE_RECV_CSCOV: | ||
1357 | if (!up->pcflag) /* Disable the option on UDP sockets */ | ||
1358 | return -ENOPROTOOPT; | ||
1359 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ | ||
1360 | val = 8; | ||
1361 | up->pcrlen = val; | ||
1362 | up->pcflag |= UDPLITE_RECV_CC; | ||
1363 | break; | ||
1364 | |||
1296 | default: | 1365 | default: |
1297 | err = -ENOPROTOOPT; | 1366 | err = -ENOPROTOOPT; |
1298 | break; | 1367 | break; |
@@ -1301,21 +1370,21 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname, | |||
1301 | return err; | 1370 | return err; |
1302 | } | 1371 | } |
1303 | 1372 | ||
1304 | static int udp_setsockopt(struct sock *sk, int level, int optname, | 1373 | int udp_setsockopt(struct sock *sk, int level, int optname, |
1305 | char __user *optval, int optlen) | 1374 | char __user *optval, int optlen) |
1306 | { | 1375 | { |
1307 | if (level != SOL_UDP) | 1376 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1308 | return ip_setsockopt(sk, level, optname, optval, optlen); | 1377 | return do_udp_setsockopt(sk, level, optname, optval, optlen); |
1309 | return do_udp_setsockopt(sk, level, optname, optval, optlen); | 1378 | return ip_setsockopt(sk, level, optname, optval, optlen); |
1310 | } | 1379 | } |
1311 | 1380 | ||
1312 | #ifdef CONFIG_COMPAT | 1381 | #ifdef CONFIG_COMPAT |
1313 | static int compat_udp_setsockopt(struct sock *sk, int level, int optname, | 1382 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, |
1314 | char __user *optval, int optlen) | 1383 | char __user *optval, int optlen) |
1315 | { | 1384 | { |
1316 | if (level != SOL_UDP) | 1385 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1317 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | 1386 | return do_udp_setsockopt(sk, level, optname, optval, optlen); |
1318 | return do_udp_setsockopt(sk, level, optname, optval, optlen); | 1387 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); |
1319 | } | 1388 | } |
1320 | #endif | 1389 | #endif |
1321 | 1390 | ||
@@ -1342,6 +1411,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, | |||
1342 | val = up->encap_type; | 1411 | val = up->encap_type; |
1343 | break; | 1412 | break; |
1344 | 1413 | ||
1414 | /* The following two cannot be changed on UDP sockets, the return is | ||
1415 | * always 0 (which corresponds to the full checksum coverage of UDP). */ | ||
1416 | case UDPLITE_SEND_CSCOV: | ||
1417 | val = up->pcslen; | ||
1418 | break; | ||
1419 | |||
1420 | case UDPLITE_RECV_CSCOV: | ||
1421 | val = up->pcrlen; | ||
1422 | break; | ||
1423 | |||
1345 | default: | 1424 | default: |
1346 | return -ENOPROTOOPT; | 1425 | return -ENOPROTOOPT; |
1347 | }; | 1426 | }; |
@@ -1353,21 +1432,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname, | |||
1353 | return 0; | 1432 | return 0; |
1354 | } | 1433 | } |
1355 | 1434 | ||
1356 | static int udp_getsockopt(struct sock *sk, int level, int optname, | 1435 | int udp_getsockopt(struct sock *sk, int level, int optname, |
1357 | char __user *optval, int __user *optlen) | 1436 | char __user *optval, int __user *optlen) |
1358 | { | 1437 | { |
1359 | if (level != SOL_UDP) | 1438 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1360 | return ip_getsockopt(sk, level, optname, optval, optlen); | 1439 | return do_udp_getsockopt(sk, level, optname, optval, optlen); |
1361 | return do_udp_getsockopt(sk, level, optname, optval, optlen); | 1440 | return ip_getsockopt(sk, level, optname, optval, optlen); |
1362 | } | 1441 | } |
1363 | 1442 | ||
1364 | #ifdef CONFIG_COMPAT | 1443 | #ifdef CONFIG_COMPAT |
1365 | static int compat_udp_getsockopt(struct sock *sk, int level, int optname, | 1444 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, |
1366 | char __user *optval, int __user *optlen) | 1445 | char __user *optval, int __user *optlen) |
1367 | { | 1446 | { |
1368 | if (level != SOL_UDP) | 1447 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1369 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | 1448 | return do_udp_getsockopt(sk, level, optname, optval, optlen); |
1370 | return do_udp_getsockopt(sk, level, optname, optval, optlen); | 1449 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); |
1371 | } | 1450 | } |
1372 | #endif | 1451 | #endif |
1373 | /** | 1452 | /** |
@@ -1387,7 +1466,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1387 | { | 1466 | { |
1388 | unsigned int mask = datagram_poll(file, sock, wait); | 1467 | unsigned int mask = datagram_poll(file, sock, wait); |
1389 | struct sock *sk = sock->sk; | 1468 | struct sock *sk = sock->sk; |
1390 | 1469 | int is_lite = IS_UDPLITE(sk); | |
1470 | |||
1391 | /* Check for false positives due to checksum errors */ | 1471 | /* Check for false positives due to checksum errors */ |
1392 | if ( (mask & POLLRDNORM) && | 1472 | if ( (mask & POLLRDNORM) && |
1393 | !(file->f_flags & O_NONBLOCK) && | 1473 | !(file->f_flags & O_NONBLOCK) && |
@@ -1397,8 +1477,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1397 | 1477 | ||
1398 | spin_lock_bh(&rcvq->lock); | 1478 | spin_lock_bh(&rcvq->lock); |
1399 | while ((skb = skb_peek(rcvq)) != NULL) { | 1479 | while ((skb = skb_peek(rcvq)) != NULL) { |
1400 | if (udp_checksum_complete(skb)) { | 1480 | if (udp_lib_checksum_complete(skb)) { |
1401 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1481 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); |
1402 | __skb_unlink(skb, rcvq); | 1482 | __skb_unlink(skb, rcvq); |
1403 | kfree_skb(skb); | 1483 | kfree_skb(skb); |
1404 | } else { | 1484 | } else { |
@@ -1420,7 +1500,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1420 | struct proto udp_prot = { | 1500 | struct proto udp_prot = { |
1421 | .name = "UDP", | 1501 | .name = "UDP", |
1422 | .owner = THIS_MODULE, | 1502 | .owner = THIS_MODULE, |
1423 | .close = udp_close, | 1503 | .close = udp_lib_close, |
1424 | .connect = ip4_datagram_connect, | 1504 | .connect = ip4_datagram_connect, |
1425 | .disconnect = udp_disconnect, | 1505 | .disconnect = udp_disconnect, |
1426 | .ioctl = udp_ioctl, | 1506 | .ioctl = udp_ioctl, |
@@ -1431,8 +1511,8 @@ struct proto udp_prot = { | |||
1431 | .recvmsg = udp_recvmsg, | 1511 | .recvmsg = udp_recvmsg, |
1432 | .sendpage = udp_sendpage, | 1512 | .sendpage = udp_sendpage, |
1433 | .backlog_rcv = udp_queue_rcv_skb, | 1513 | .backlog_rcv = udp_queue_rcv_skb, |
1434 | .hash = udp_v4_hash, | 1514 | .hash = udp_lib_hash, |
1435 | .unhash = udp_v4_unhash, | 1515 | .unhash = udp_lib_unhash, |
1436 | .get_port = udp_v4_get_port, | 1516 | .get_port = udp_v4_get_port, |
1437 | .obj_size = sizeof(struct udp_sock), | 1517 | .obj_size = sizeof(struct udp_sock), |
1438 | #ifdef CONFIG_COMPAT | 1518 | #ifdef CONFIG_COMPAT |
@@ -1451,7 +1531,7 @@ static struct sock *udp_get_first(struct seq_file *seq) | |||
1451 | 1531 | ||
1452 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1532 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1453 | struct hlist_node *node; | 1533 | struct hlist_node *node; |
1454 | sk_for_each(sk, node, &udp_hash[state->bucket]) { | 1534 | sk_for_each(sk, node, state->hashtable + state->bucket) { |
1455 | if (sk->sk_family == state->family) | 1535 | if (sk->sk_family == state->family) |
1456 | goto found; | 1536 | goto found; |
1457 | } | 1537 | } |
@@ -1472,7 +1552,7 @@ try_again: | |||
1472 | } while (sk && sk->sk_family != state->family); | 1552 | } while (sk && sk->sk_family != state->family); |
1473 | 1553 | ||
1474 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { | 1554 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { |
1475 | sk = sk_head(&udp_hash[state->bucket]); | 1555 | sk = sk_head(state->hashtable + state->bucket); |
1476 | goto try_again; | 1556 | goto try_again; |
1477 | } | 1557 | } |
1478 | return sk; | 1558 | return sk; |
@@ -1522,6 +1602,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) | |||
1522 | if (!s) | 1602 | if (!s) |
1523 | goto out; | 1603 | goto out; |
1524 | s->family = afinfo->family; | 1604 | s->family = afinfo->family; |
1605 | s->hashtable = afinfo->hashtable; | ||
1525 | s->seq_ops.start = udp_seq_start; | 1606 | s->seq_ops.start = udp_seq_start; |
1526 | s->seq_ops.next = udp_seq_next; | 1607 | s->seq_ops.next = udp_seq_next; |
1527 | s->seq_ops.show = afinfo->seq_show; | 1608 | s->seq_ops.show = afinfo->seq_show; |
@@ -1588,7 +1669,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) | |||
1588 | atomic_read(&sp->sk_refcnt), sp); | 1669 | atomic_read(&sp->sk_refcnt), sp); |
1589 | } | 1670 | } |
1590 | 1671 | ||
1591 | static int udp4_seq_show(struct seq_file *seq, void *v) | 1672 | int udp4_seq_show(struct seq_file *seq, void *v) |
1592 | { | 1673 | { |
1593 | if (v == SEQ_START_TOKEN) | 1674 | if (v == SEQ_START_TOKEN) |
1594 | seq_printf(seq, "%-127s\n", | 1675 | seq_printf(seq, "%-127s\n", |
@@ -1611,6 +1692,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { | |||
1611 | .owner = THIS_MODULE, | 1692 | .owner = THIS_MODULE, |
1612 | .name = "udp", | 1693 | .name = "udp", |
1613 | .family = AF_INET, | 1694 | .family = AF_INET, |
1695 | .hashtable = udp_hash, | ||
1614 | .seq_show = udp4_seq_show, | 1696 | .seq_show = udp4_seq_show, |
1615 | .seq_fops = &udp4_seq_fops, | 1697 | .seq_fops = &udp4_seq_fops, |
1616 | }; | 1698 | }; |