aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c11
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/udp.c518
-rw-r--r--net/ipv4/udp_impl.h38
-rw-r--r--net/ipv4/udplite.c119
-rw-r--r--net/ipv4/xfrm4_policy.c1
8 files changed, 488 insertions, 223 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15645c51520c..7a068626feea 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,8 @@ obj-y := route.o inetpeer.o protocol.o \
8 inet_timewait_sock.o inet_connection_sock.o \ 8 inet_timewait_sock.o inet_connection_sock.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \
12 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o 13 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
13 14
14obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 15obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4a81d54a7569..8db39f7e3bf0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -104,6 +104,7 @@
104#include <net/inet_connection_sock.h> 104#include <net/inet_connection_sock.h>
105#include <net/tcp.h> 105#include <net/tcp.h>
106#include <net/udp.h> 106#include <net/udp.h>
107#include <net/udplite.h>
107#include <linux/skbuff.h> 108#include <linux/skbuff.h>
108#include <net/sock.h> 109#include <net/sock.h>
109#include <net/raw.h> 110#include <net/raw.h>
@@ -1223,10 +1224,13 @@ static int __init init_ipv4_mibs(void)
1223 tcp_statistics[1] = alloc_percpu(struct tcp_mib); 1224 tcp_statistics[1] = alloc_percpu(struct tcp_mib);
1224 udp_statistics[0] = alloc_percpu(struct udp_mib); 1225 udp_statistics[0] = alloc_percpu(struct udp_mib);
1225 udp_statistics[1] = alloc_percpu(struct udp_mib); 1226 udp_statistics[1] = alloc_percpu(struct udp_mib);
1227 udplite_statistics[0] = alloc_percpu(struct udp_mib);
1228 udplite_statistics[1] = alloc_percpu(struct udp_mib);
1226 if (! 1229 if (!
1227 (net_statistics[0] && net_statistics[1] && ip_statistics[0] 1230 (net_statistics[0] && net_statistics[1] && ip_statistics[0]
1228 && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] 1231 && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
1229 && udp_statistics[0] && udp_statistics[1])) 1232 && udp_statistics[0] && udp_statistics[1]
1233 && udplite_statistics[0] && udplite_statistics[1] ) )
1230 return -ENOMEM; 1234 return -ENOMEM;
1231 1235
1232 (void) tcp_mib_init(); 1236 (void) tcp_mib_init();
@@ -1313,6 +1317,8 @@ static int __init inet_init(void)
1313 /* Setup TCP slab cache for open requests. */ 1317 /* Setup TCP slab cache for open requests. */
1314 tcp_init(); 1318 tcp_init();
1315 1319
1320 /* Add UDP-Lite (RFC 3828) */
1321 udplite4_register();
1316 1322
1317 /* 1323 /*
1318 * Set the ICMP layer up 1324 * Set the ICMP layer up
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 7dc820df8bc5..46eee64a11f6 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -171,11 +171,15 @@ static void dump_packet(const struct nf_loginfo *info,
171 } 171 }
172 break; 172 break;
173 } 173 }
174 case IPPROTO_UDP: { 174 case IPPROTO_UDP:
175 case IPPROTO_UDPLITE: {
175 struct udphdr _udph, *uh; 176 struct udphdr _udph, *uh;
176 177
177 /* Max length: 10 "PROTO=UDP " */ 178 if (ih->protocol == IPPROTO_UDP)
178 printk("PROTO=UDP "); 179 /* Max length: 10 "PROTO=UDP " */
180 printk("PROTO=UDP " );
181 else /* Max length: 14 "PROTO=UDPLITE " */
182 printk("PROTO=UDPLITE ");
179 183
180 if (ntohs(ih->frag_off) & IP_OFFSET) 184 if (ntohs(ih->frag_off) & IP_OFFSET)
181 break; 185 break;
@@ -341,6 +345,7 @@ static void dump_packet(const struct nf_loginfo *info,
341 /* IP: 40+46+6+11+127 = 230 */ 345 /* IP: 40+46+6+11+127 = 230 */
342 /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ 346 /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
343 /* UDP: 10+max(25,20) = 35 */ 347 /* UDP: 10+max(25,20) = 35 */
348 /* UDPLITE: 14+max(25,20) = 39 */
344 /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ 349 /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
345 /* ESP: 10+max(25)+15 = 50 */ 350 /* ESP: 10+max(25)+15 = 50 */
346 /* AH: 9+max(25)+15 = 49 */ 351 /* AH: 9+max(25)+15 = 49 */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9c6cbe3d9fb8..cd873da54cbe 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
38#include <net/protocol.h> 38#include <net/protocol.h>
39#include <net/tcp.h> 39#include <net/tcp.h>
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/udplite.h>
41#include <linux/inetdevice.h> 42#include <linux/inetdevice.h>
42#include <linux/proc_fs.h> 43#include <linux/proc_fs.h>
43#include <linux/seq_file.h> 44#include <linux/seq_file.h>
@@ -66,6 +67,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
66 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 67 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
67 atomic_read(&tcp_memory_allocated)); 68 atomic_read(&tcp_memory_allocated));
68 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); 69 seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
70 seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
69 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); 71 seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
70 seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, 72 seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
71 atomic_read(&ip_frag_mem)); 73 atomic_read(&ip_frag_mem));
@@ -304,6 +306,17 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
304 fold_field((void **) udp_statistics, 306 fold_field((void **) udp_statistics,
305 snmp4_udp_list[i].entry)); 307 snmp4_udp_list[i].entry));
306 308
309 /* the UDP and UDP-Lite MIBs are the same */
310 seq_puts(seq, "\nUdpLite:");
311 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
312 seq_printf(seq, " %s", snmp4_udp_list[i].name);
313
314 seq_puts(seq, "\nUdpLite:");
315 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
316 seq_printf(seq, " %lu",
317 fold_field((void **) udplite_statistics,
318 snmp4_udp_list[i].entry) );
319
307 seq_putc(seq, '\n'); 320 seq_putc(seq, '\n');
308 return 0; 321 return 0;
309} 322}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9e1bd374875e..98ba75096175 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -92,22 +92,16 @@
92#include <linux/timer.h> 92#include <linux/timer.h>
93#include <linux/mm.h> 93#include <linux/mm.h>
94#include <linux/inet.h> 94#include <linux/inet.h>
95#include <linux/ipv6.h>
96#include <linux/netdevice.h> 95#include <linux/netdevice.h>
97#include <net/snmp.h>
98#include <net/ip.h>
99#include <net/tcp_states.h> 96#include <net/tcp_states.h>
100#include <net/protocol.h>
101#include <linux/skbuff.h> 97#include <linux/skbuff.h>
102#include <linux/proc_fs.h> 98#include <linux/proc_fs.h>
103#include <linux/seq_file.h> 99#include <linux/seq_file.h>
104#include <net/sock.h>
105#include <net/udp.h>
106#include <net/icmp.h> 100#include <net/icmp.h>
107#include <net/route.h> 101#include <net/route.h>
108#include <net/inet_common.h>
109#include <net/checksum.h> 102#include <net/checksum.h>
110#include <net/xfrm.h> 103#include <net/xfrm.h>
104#include "udp_impl.h"
111 105
112/* 106/*
113 * Snmp MIB for the UDP layer 107 * Snmp MIB for the UDP layer
@@ -120,26 +114,30 @@ DEFINE_RWLOCK(udp_hash_lock);
120 114
121static int udp_port_rover; 115static int udp_port_rover;
122 116
123static inline int udp_lport_inuse(u16 num) 117static inline int __udp_lib_lport_inuse(__be16 num, struct hlist_head udptable[])
124{ 118{
125 struct sock *sk; 119 struct sock *sk;
126 struct hlist_node *node; 120 struct hlist_node *node;
127 121
128 sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) 122 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
129 if (inet_sk(sk)->num == num) 123 if (inet_sk(sk)->num == num)
130 return 1; 124 return 1;
131 return 0; 125 return 0;
132} 126}
133 127
134/** 128/**
135 * udp_get_port - common port lookup for IPv4 and IPv6 129 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
136 * 130 *
137 * @sk: socket struct in question 131 * @sk: socket struct in question
138 * @snum: port number to look up 132 * @snum: port number to look up
133 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
134 * @port_rover: pointer to record of last unallocated port
139 * @saddr_comp: AF-dependent comparison of bound local IP addresses 135 * @saddr_comp: AF-dependent comparison of bound local IP addresses
140 */ 136 */
141int udp_get_port(struct sock *sk, unsigned short snum, 137int __udp_lib_get_port(struct sock *sk, unsigned short snum,
142 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) 138 struct hlist_head udptable[], int *port_rover,
139 int (*saddr_comp)(const struct sock *sk1,
140 const struct sock *sk2 ) )
143{ 141{
144 struct hlist_node *node; 142 struct hlist_node *node;
145 struct hlist_head *head; 143 struct hlist_head *head;
@@ -150,15 +148,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
150 if (snum == 0) { 148 if (snum == 0) {
151 int best_size_so_far, best, result, i; 149 int best_size_so_far, best, result, i;
152 150
153 if (udp_port_rover > sysctl_local_port_range[1] || 151 if (*port_rover > sysctl_local_port_range[1] ||
154 udp_port_rover < sysctl_local_port_range[0]) 152 *port_rover < sysctl_local_port_range[0])
155 udp_port_rover = sysctl_local_port_range[0]; 153 *port_rover = sysctl_local_port_range[0];
156 best_size_so_far = 32767; 154 best_size_so_far = 32767;
157 best = result = udp_port_rover; 155 best = result = *port_rover;
158 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 156 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
159 int size; 157 int size;
160 158
161 head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; 159 head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
162 if (hlist_empty(head)) { 160 if (hlist_empty(head)) {
163 if (result > sysctl_local_port_range[1]) 161 if (result > sysctl_local_port_range[1])
164 result = sysctl_local_port_range[0] + 162 result = sysctl_local_port_range[0] +
@@ -179,15 +177,15 @@ int udp_get_port(struct sock *sk, unsigned short snum,
179 result = sysctl_local_port_range[0] 177 result = sysctl_local_port_range[0]
180 + ((result - sysctl_local_port_range[0]) & 178 + ((result - sysctl_local_port_range[0]) &
181 (UDP_HTABLE_SIZE - 1)); 179 (UDP_HTABLE_SIZE - 1));
182 if (!udp_lport_inuse(result)) 180 if (! __udp_lib_lport_inuse(result, udptable))
183 break; 181 break;
184 } 182 }
185 if (i >= (1 << 16) / UDP_HTABLE_SIZE) 183 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
186 goto fail; 184 goto fail;
187gotit: 185gotit:
188 udp_port_rover = snum = result; 186 *port_rover = snum = result;
189 } else { 187 } else {
190 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 188 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
191 189
192 sk_for_each(sk2, node, head) 190 sk_for_each(sk2, node, head)
193 if (inet_sk(sk2)->num == snum && 191 if (inet_sk(sk2)->num == snum &&
@@ -195,12 +193,12 @@ gotit:
195 (!sk2->sk_reuse || !sk->sk_reuse) && 193 (!sk2->sk_reuse || !sk->sk_reuse) &&
196 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 194 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
197 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 195 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
198 (*saddr_cmp)(sk, sk2) ) 196 (*saddr_comp)(sk, sk2) )
199 goto fail; 197 goto fail;
200 } 198 }
201 inet_sk(sk)->num = snum; 199 inet_sk(sk)->num = snum;
202 if (sk_unhashed(sk)) { 200 if (sk_unhashed(sk)) {
203 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 201 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
204 sk_add_node(sk, head); 202 sk_add_node(sk, head);
205 sock_prot_inc_use(sk->sk_prot); 203 sock_prot_inc_use(sk->sk_prot);
206 } 204 }
@@ -210,7 +208,13 @@ fail:
210 return error; 208 return error;
211} 209}
212 210
213static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 211__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
212 int (*scmp)(const struct sock *, const struct sock *))
213{
214 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
215}
216
217inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
214{ 218{
215 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 219 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
216 220
@@ -224,34 +228,20 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
224 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); 228 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
225} 229}
226 230
227
228static void udp_v4_hash(struct sock *sk)
229{
230 BUG();
231}
232
233static void udp_v4_unhash(struct sock *sk)
234{
235 write_lock_bh(&udp_hash_lock);
236 if (sk_del_node_init(sk)) {
237 inet_sk(sk)->num = 0;
238 sock_prot_dec_use(sk->sk_prot);
239 }
240 write_unlock_bh(&udp_hash_lock);
241}
242
243/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 231/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
244 * harder than this. -DaveM 232 * harder than this. -DaveM
245 */ 233 */
246static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, 234static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
247 __be32 daddr, __be16 dport, int dif) 235 __be32 daddr, __be16 dport,
236 int dif, struct hlist_head udptable[])
248{ 237{
249 struct sock *sk, *result = NULL; 238 struct sock *sk, *result = NULL;
250 struct hlist_node *node; 239 struct hlist_node *node;
251 unsigned short hnum = ntohs(dport); 240 unsigned short hnum = ntohs(dport);
252 int badness = -1; 241 int badness = -1;
253 242
254 sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { 243 read_lock(&udp_hash_lock);
244 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
255 struct inet_sock *inet = inet_sk(sk); 245 struct inet_sock *inet = inet_sk(sk);
256 246
257 if (inet->num == hnum && !ipv6_only_sock(sk)) { 247 if (inet->num == hnum && !ipv6_only_sock(sk)) {
@@ -285,20 +275,10 @@ static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport,
285 } 275 }
286 } 276 }
287 } 277 }
288 return result; 278 if (result)
289} 279 sock_hold(result);
290
291static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport,
292 __be32 daddr, __be16 dport, int dif)
293{
294 struct sock *sk;
295
296 read_lock(&udp_hash_lock);
297 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
298 if (sk)
299 sock_hold(sk);
300 read_unlock(&udp_hash_lock); 280 read_unlock(&udp_hash_lock);
301 return sk; 281 return result;
302} 282}
303 283
304static inline struct sock *udp_v4_mcast_next(struct sock *sk, 284static inline struct sock *udp_v4_mcast_next(struct sock *sk,
@@ -340,7 +320,7 @@ found:
340 * to find the appropriate port. 320 * to find the appropriate port.
341 */ 321 */
342 322
343void udp_err(struct sk_buff *skb, u32 info) 323void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
344{ 324{
345 struct inet_sock *inet; 325 struct inet_sock *inet;
346 struct iphdr *iph = (struct iphdr*)skb->data; 326 struct iphdr *iph = (struct iphdr*)skb->data;
@@ -351,7 +331,8 @@ void udp_err(struct sk_buff *skb, u32 info)
351 int harderr; 331 int harderr;
352 int err; 332 int err;
353 333
354 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); 334 sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
335 skb->dev->ifindex, udptable );
355 if (sk == NULL) { 336 if (sk == NULL) {
356 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 337 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
357 return; /* No socket for error */ 338 return; /* No socket for error */
@@ -405,6 +386,11 @@ out:
405 sock_put(sk); 386 sock_put(sk);
406} 387}
407 388
389__inline__ void udp_err(struct sk_buff *skb, u32 info)
390{
391 return __udp4_lib_err(skb, info, udp_hash);
392}
393
408/* 394/*
409 * Throw away all pending data and cancel the corking. Socket is locked. 395 * Throw away all pending data and cancel the corking. Socket is locked.
410 */ 396 */
@@ -419,16 +405,56 @@ static void udp_flush_pending_frames(struct sock *sk)
419 } 405 }
420} 406}
421 407
408/**
409 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
410 * @sk: socket we are sending on
411 * @skb: sk_buff containing the filled-in UDP header
412 * (checksum field must be zeroed out)
413 */
414static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
415 __be32 src, __be32 dst, int len )
416{
417 unsigned int csum = 0, offset;
418 struct udphdr *uh = skb->h.uh;
419
420 if (skb_queue_len(&sk->sk_write_queue) == 1) {
421 /*
422 * Only one fragment on the socket.
423 */
424 skb->csum = offsetof(struct udphdr, check);
425 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
426 } else {
427 /*
428 * HW-checksum won't work as there are two or more
429 * fragments on the socket so that all csums of sk_buffs
430 * should be together
431 */
432 offset = skb->h.raw - skb->data;
433 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
434
435 skb->ip_summed = CHECKSUM_NONE;
436
437 skb_queue_walk(&sk->sk_write_queue, skb) {
438 csum = csum_add(csum, skb->csum);
439 }
440
441 uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
442 if (uh->check == 0)
443 uh->check = -1;
444 }
445}
446
422/* 447/*
423 * Push out all pending data as one UDP datagram. Socket is locked. 448 * Push out all pending data as one UDP datagram. Socket is locked.
424 */ 449 */
425static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) 450int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
426{ 451{
427 struct inet_sock *inet = inet_sk(sk); 452 struct inet_sock *inet = inet_sk(sk);
428 struct flowi *fl = &inet->cork.fl; 453 struct flowi *fl = &inet->cork.fl;
429 struct sk_buff *skb; 454 struct sk_buff *skb;
430 struct udphdr *uh; 455 struct udphdr *uh;
431 int err = 0; 456 int err = 0;
457 u32 csum = 0;
432 458
433 /* Grab the skbuff where UDP header space exists. */ 459 /* Grab the skbuff where UDP header space exists. */
434 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 460 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
@@ -443,52 +469,28 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
443 uh->len = htons(up->len); 469 uh->len = htons(up->len);
444 uh->check = 0; 470 uh->check = 0;
445 471
446 if (sk->sk_no_check == UDP_CSUM_NOXMIT) { 472 if (up->pcflag) /* UDP-Lite */
473 csum = udplite_csum_outgoing(sk, skb);
474
475 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
476
447 skb->ip_summed = CHECKSUM_NONE; 477 skb->ip_summed = CHECKSUM_NONE;
448 goto send; 478 goto send;
449 }
450 479
451 if (skb_queue_len(&sk->sk_write_queue) == 1) { 480 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
452 /*
453 * Only one fragment on the socket.
454 */
455 if (skb->ip_summed == CHECKSUM_PARTIAL) {
456 skb->csum = offsetof(struct udphdr, check);
457 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
458 up->len, IPPROTO_UDP, 0);
459 } else {
460 skb->csum = csum_partial((char *)uh,
461 sizeof(struct udphdr), skb->csum);
462 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
463 up->len, IPPROTO_UDP, skb->csum);
464 if (uh->check == 0)
465 uh->check = -1;
466 }
467 } else {
468 unsigned int csum = 0;
469 /*
470 * HW-checksum won't work as there are two or more
471 * fragments on the socket so that all csums of sk_buffs
472 * should be together.
473 */
474 if (skb->ip_summed == CHECKSUM_PARTIAL) {
475 int offset = (unsigned char *)uh - skb->data;
476 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
477 481
478 skb->ip_summed = CHECKSUM_NONE; 482 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
479 } else { 483 goto send;
480 skb->csum = csum_partial((char *)uh, 484
481 sizeof(struct udphdr), skb->csum); 485 } else /* `normal' UDP */
482 } 486 csum = udp_csum_outgoing(sk, skb);
487
488 /* add protocol-dependent pseudo-header */
489 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
490 sk->sk_protocol, csum );
491 if (uh->check == 0)
492 uh->check = -1;
483 493
484 skb_queue_walk(&sk->sk_write_queue, skb) {
485 csum = csum_add(csum, skb->csum);
486 }
487 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
488 up->len, IPPROTO_UDP, csum);
489 if (uh->check == 0)
490 uh->check = -1;
491 }
492send: 494send:
493 err = ip_push_pending_frames(sk); 495 err = ip_push_pending_frames(sk);
494out: 496out:
@@ -497,12 +499,6 @@ out:
497 return err; 499 return err;
498} 500}
499 501
500
501static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base)
502{
503 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
504}
505
506int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 502int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
507 size_t len) 503 size_t len)
508{ 504{
@@ -516,8 +512,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
516 __be32 daddr, faddr, saddr; 512 __be32 daddr, faddr, saddr;
517 __be16 dport; 513 __be16 dport;
518 u8 tos; 514 u8 tos;
519 int err; 515 int err, is_udplite = up->pcflag;
520 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 516 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
517 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
521 518
522 if (len > 0xFFFF) 519 if (len > 0xFFFF)
523 return -EMSGSIZE; 520 return -EMSGSIZE;
@@ -622,7 +619,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
622 { .daddr = faddr, 619 { .daddr = faddr,
623 .saddr = saddr, 620 .saddr = saddr,
624 .tos = tos } }, 621 .tos = tos } },
625 .proto = IPPROTO_UDP, 622 .proto = sk->sk_protocol,
626 .uli_u = { .ports = 623 .uli_u = { .ports =
627 { .sport = inet->sport, 624 { .sport = inet->sport,
628 .dport = dport } } }; 625 .dport = dport } } };
@@ -668,8 +665,9 @@ back_from_confirm:
668 665
669do_append_data: 666do_append_data:
670 up->len += ulen; 667 up->len += ulen;
671 err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 668 getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
672 sizeof(struct udphdr), &ipc, rt, 669 err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
670 sizeof(struct udphdr), &ipc, rt,
673 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 671 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
674 if (err) 672 if (err)
675 udp_flush_pending_frames(sk); 673 udp_flush_pending_frames(sk);
@@ -684,7 +682,7 @@ out:
684 if (free) 682 if (free)
685 kfree(ipc.opt); 683 kfree(ipc.opt);
686 if (!err) { 684 if (!err) {
687 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); 685 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
688 return len; 686 return len;
689 } 687 }
690 /* 688 /*
@@ -695,7 +693,7 @@ out:
695 * seems like overkill. 693 * seems like overkill.
696 */ 694 */
697 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 695 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
698 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); 696 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
699 } 697 }
700 return err; 698 return err;
701 699
@@ -707,8 +705,8 @@ do_confirm:
707 goto out; 705 goto out;
708} 706}
709 707
710static int udp_sendpage(struct sock *sk, struct page *page, int offset, 708int udp_sendpage(struct sock *sk, struct page *page, int offset,
711 size_t size, int flags) 709 size_t size, int flags)
712{ 710{
713 struct udp_sock *up = udp_sk(sk); 711 struct udp_sock *up = udp_sk(sk);
714 int ret; 712 int ret;
@@ -795,29 +793,18 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
795 return(0); 793 return(0);
796} 794}
797 795
798static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
799{
800 return __skb_checksum_complete(skb);
801}
802
803static __inline__ int udp_checksum_complete(struct sk_buff *skb)
804{
805 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
806 __udp_checksum_complete(skb);
807}
808
809/* 796/*
810 * This should be easy, if there is something there we 797 * This should be easy, if there is something there we
811 * return it, otherwise we block. 798 * return it, otherwise we block.
812 */ 799 */
813 800
814static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 801int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
815 size_t len, int noblock, int flags, int *addr_len) 802 size_t len, int noblock, int flags, int *addr_len)
816{ 803{
817 struct inet_sock *inet = inet_sk(sk); 804 struct inet_sock *inet = inet_sk(sk);
818 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 805 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
819 struct sk_buff *skb; 806 struct sk_buff *skb;
820 int copied, err; 807 int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
821 808
822 /* 809 /*
823 * Check any passed addresses 810 * Check any passed addresses
@@ -839,15 +826,25 @@ try_again:
839 msg->msg_flags |= MSG_TRUNC; 826 msg->msg_flags |= MSG_TRUNC;
840 } 827 }
841 828
842 if (skb->ip_summed==CHECKSUM_UNNECESSARY) { 829 /*
843 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 830 * Decide whether to checksum and/or copy data.
844 copied); 831 *
845 } else if (msg->msg_flags&MSG_TRUNC) { 832 * UDP: checksum may have been computed in HW,
846 if (__udp_checksum_complete(skb)) 833 * (re-)compute it if message is truncated.
834 * UDP-Lite: always needs to checksum, no HW support.
835 */
836 copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
837
838 if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
839 if (__udp_lib_checksum_complete(skb))
847 goto csum_copy_err; 840 goto csum_copy_err;
848 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, 841 copy_only = 1;
849 copied); 842 }
850 } else { 843
844 if (copy_only)
845 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
846 msg->msg_iov, copied );
847 else {
851 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 848 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
852 849
853 if (err == -EINVAL) 850 if (err == -EINVAL)
@@ -880,7 +877,7 @@ out:
880 return err; 877 return err;
881 878
882csum_copy_err: 879csum_copy_err:
883 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 880 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
884 881
885 skb_kill_datagram(sk, skb, flags); 882 skb_kill_datagram(sk, skb, flags);
886 883
@@ -912,11 +909,6 @@ int udp_disconnect(struct sock *sk, int flags)
912 return 0; 909 return 0;
913} 910}
914 911
915static void udp_close(struct sock *sk, long timeout)
916{
917 sk_common_release(sk);
918}
919
920/* return: 912/* return:
921 * 1 if the the UDP system should process it 913 * 1 if the the UDP system should process it
922 * 0 if we should drop this packet 914 * 0 if we should drop this packet
@@ -1022,7 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
1022 * Note that in the success and error cases, the skb is assumed to 1014 * Note that in the success and error cases, the skb is assumed to
1023 * have either been requeued or freed. 1015 * have either been requeued or freed.
1024 */ 1016 */
1025static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1017int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1026{ 1018{
1027 struct udp_sock *up = udp_sk(sk); 1019 struct udp_sock *up = udp_sk(sk);
1028 int rc; 1020 int rc;
@@ -1030,10 +1022,8 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1030 /* 1022 /*
1031 * Charge it to the socket, dropping if the queue is full. 1023 * Charge it to the socket, dropping if the queue is full.
1032 */ 1024 */
1033 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 1025 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1034 kfree_skb(skb); 1026 goto drop;
1035 return -1;
1036 }
1037 nf_reset(skb); 1027 nf_reset(skb);
1038 1028
1039 if (up->encap_type) { 1029 if (up->encap_type) {
@@ -1057,31 +1047,68 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1057 if (ret < 0) { 1047 if (ret < 0) {
1058 /* process the ESP packet */ 1048 /* process the ESP packet */
1059 ret = xfrm4_rcv_encap(skb, up->encap_type); 1049 ret = xfrm4_rcv_encap(skb, up->encap_type);
1060 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); 1050 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1061 return -ret; 1051 return -ret;
1062 } 1052 }
1063 /* FALLTHROUGH -- it's a UDP Packet */ 1053 /* FALLTHROUGH -- it's a UDP Packet */
1064 } 1054 }
1065 1055
1066 if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { 1056 /*
1067 if (__udp_checksum_complete(skb)) { 1057 * UDP-Lite specific tests, ignored on UDP sockets
1068 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1058 */
1069 kfree_skb(skb); 1059 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
1070 return -1; 1060
1061 /*
1062 * MIB statistics other than incrementing the error count are
1063 * disabled for the following two types of errors: these depend
1064 * on the application settings, not on the functioning of the
1065 * protocol stack as such.
1066 *
1067 * RFC 3828 here recommends (sec 3.3): "There should also be a
1068 * way ... to ... at least let the receiving application block
1069 * delivery of packets with coverage values less than a value
1070 * provided by the application."
1071 */
1072 if (up->pcrlen == 0) { /* full coverage was set */
1073 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
1074 "%d while full coverage %d requested\n",
1075 UDP_SKB_CB(skb)->cscov, skb->len);
1076 goto drop;
1071 } 1077 }
1078 /* The next case involves violating the min. coverage requested
1079 * by the receiver. This is subtle: if receiver wants x and x is
1080 * greater than the buffersize/MTU then receiver will complain
1081 * that it wants x while sender emits packets of smaller size y.
1082 * Therefore the above ...()->partial_cov statement is essential.
1083 */
1084 if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
1085 LIMIT_NETDEBUG(KERN_WARNING
1086 "UDPLITE: coverage %d too small, need min %d\n",
1087 UDP_SKB_CB(skb)->cscov, up->pcrlen);
1088 goto drop;
1089 }
1090 }
1091
1092 if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
1093 if (__udp_lib_checksum_complete(skb))
1094 goto drop;
1072 skb->ip_summed = CHECKSUM_UNNECESSARY; 1095 skb->ip_summed = CHECKSUM_UNNECESSARY;
1073 } 1096 }
1074 1097
1075 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1098 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1076 /* Note that an ENOMEM error is charged twice */ 1099 /* Note that an ENOMEM error is charged twice */
1077 if (rc == -ENOMEM) 1100 if (rc == -ENOMEM)
1078 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); 1101 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag);
1079 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1102 goto drop;
1080 kfree_skb(skb);
1081 return -1;
1082 } 1103 }
1083 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); 1104
1105 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1084 return 0; 1106 return 0;
1107
1108drop:
1109 UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
1110 kfree_skb(skb);
1111 return -1;
1085} 1112}
1086 1113
1087/* 1114/*
@@ -1090,14 +1117,16 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1090 * Note: called only from the BH handler context, 1117 * Note: called only from the BH handler context,
1091 * so we don't need to lock the hashes. 1118 * so we don't need to lock the hashes.
1092 */ 1119 */
1093static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, 1120static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1094 __be32 saddr, __be32 daddr) 1121 struct udphdr *uh,
1122 __be32 saddr, __be32 daddr,
1123 struct hlist_head udptable[])
1095{ 1124{
1096 struct sock *sk; 1125 struct sock *sk;
1097 int dif; 1126 int dif;
1098 1127
1099 read_lock(&udp_hash_lock); 1128 read_lock(&udp_hash_lock);
1100 sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); 1129 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
1101 dif = skb->dev->ifindex; 1130 dif = skb->dev->ifindex;
1102 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 1131 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1103 if (sk) { 1132 if (sk) {
@@ -1131,65 +1160,75 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1131 * Otherwise, csum completion requires chacksumming packet body, 1160 * Otherwise, csum completion requires chacksumming packet body,
1132 * including udp header and folding it to skb->csum. 1161 * including udp header and folding it to skb->csum.
1133 */ 1162 */
1134static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, 1163static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
1135 unsigned short ulen, __be32 saddr, __be32 daddr)
1136{ 1164{
1137 if (uh->check == 0) { 1165 if (uh->check == 0) {
1138 skb->ip_summed = CHECKSUM_UNNECESSARY; 1166 skb->ip_summed = CHECKSUM_UNNECESSARY;
1139 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1167 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1140 if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) 1168 if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
1169 skb->len, IPPROTO_UDP, skb->csum ))
1141 skb->ip_summed = CHECKSUM_UNNECESSARY; 1170 skb->ip_summed = CHECKSUM_UNNECESSARY;
1142 } 1171 }
1143 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 1172 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
1144 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 1173 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
1174 skb->nh.iph->daddr,
1175 skb->len, IPPROTO_UDP, 0);
1145 /* Probably, we should checksum udp header (it should be in cache 1176 /* Probably, we should checksum udp header (it should be in cache
1146 * in any case) and data in tiny packets (< rx copybreak). 1177 * in any case) and data in tiny packets (< rx copybreak).
1147 */ 1178 */
1179
1180 /* UDP = UDP-Lite with a non-partial checksum coverage */
1181 UDP_SKB_CB(skb)->partial_cov = 0;
1148} 1182}
1149 1183
1150/* 1184/*
1151 * All we need to do is get the socket, and then do a checksum. 1185 * All we need to do is get the socket, and then do a checksum.
1152 */ 1186 */
1153 1187
1154int udp_rcv(struct sk_buff *skb) 1188int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1189 int is_udplite)
1155{ 1190{
1156 struct sock *sk; 1191 struct sock *sk;
1157 struct udphdr *uh; 1192 struct udphdr *uh = skb->h.uh;
1158 unsigned short ulen; 1193 unsigned short ulen;
1159 struct rtable *rt = (struct rtable*)skb->dst; 1194 struct rtable *rt = (struct rtable*)skb->dst;
1160 __be32 saddr = skb->nh.iph->saddr; 1195 __be32 saddr = skb->nh.iph->saddr;
1161 __be32 daddr = skb->nh.iph->daddr; 1196 __be32 daddr = skb->nh.iph->daddr;
1162 int len = skb->len;
1163 1197
1164 /* 1198 /*
1165 * Validate the packet and the UDP length. 1199 * Validate the packet.
1166 */ 1200 */
1167 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1201 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1168 goto no_header; 1202 goto drop; /* No space for header. */
1169
1170 uh = skb->h.uh;
1171 1203
1172 ulen = ntohs(uh->len); 1204 ulen = ntohs(uh->len);
1173 1205 if (ulen > skb->len)
1174 if (ulen > len || ulen < sizeof(*uh))
1175 goto short_packet; 1206 goto short_packet;
1176 1207
1177 if (pskb_trim_rcsum(skb, ulen)) 1208 if(! is_udplite ) { /* UDP validates ulen. */
1178 goto short_packet; 1209
1210 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1211 goto short_packet;
1179 1212
1180 udp_checksum_init(skb, uh, ulen, saddr, daddr); 1213 udp4_csum_init(skb, uh);
1214
1215 } else { /* UDP-Lite validates cscov. */
1216 if (udplite4_csum_init(skb, uh))
1217 goto csum_error;
1218 }
1181 1219
1182 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1220 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1183 return udp_v4_mcast_deliver(skb, uh, saddr, daddr); 1221 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1184 1222
1185 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); 1223 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
1224 skb->dev->ifindex, udptable );
1186 1225
1187 if (sk != NULL) { 1226 if (sk != NULL) {
1188 int ret = udp_queue_rcv_skb(sk, skb); 1227 int ret = udp_queue_rcv_skb(sk, skb);
1189 sock_put(sk); 1228 sock_put(sk);
1190 1229
1191 /* a return value > 0 means to resubmit the input, but 1230 /* a return value > 0 means to resubmit the input, but
1192 * it it wants the return to be -protocol, or 0 1231 * it wants the return to be -protocol, or 0
1193 */ 1232 */
1194 if (ret > 0) 1233 if (ret > 0)
1195 return -ret; 1234 return -ret;
@@ -1201,10 +1240,10 @@ int udp_rcv(struct sk_buff *skb)
1201 nf_reset(skb); 1240 nf_reset(skb);
1202 1241
1203 /* No socket. Drop packet silently, if checksum is wrong */ 1242 /* No socket. Drop packet silently, if checksum is wrong */
1204 if (udp_checksum_complete(skb)) 1243 if (udp_lib_checksum_complete(skb))
1205 goto csum_error; 1244 goto csum_error;
1206 1245
1207 UDP_INC_STATS_BH(UDP_MIB_NOPORTS); 1246 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
1208 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1247 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1209 1248
1210 /* 1249 /*
@@ -1215,36 +1254,40 @@ int udp_rcv(struct sk_buff *skb)
1215 return(0); 1254 return(0);
1216 1255
1217short_packet: 1256short_packet:
1218 LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", 1257 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1258 is_udplite? "-Lite" : "",
1219 NIPQUAD(saddr), 1259 NIPQUAD(saddr),
1220 ntohs(uh->source), 1260 ntohs(uh->source),
1221 ulen, 1261 ulen,
1222 len, 1262 skb->len,
1223 NIPQUAD(daddr), 1263 NIPQUAD(daddr),
1224 ntohs(uh->dest)); 1264 ntohs(uh->dest));
1225no_header: 1265 goto drop;
1226 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
1227 kfree_skb(skb);
1228 return(0);
1229 1266
1230csum_error: 1267csum_error:
1231 /* 1268 /*
1232 * RFC1122: OK. Discards the bad packet silently (as far as 1269 * RFC1122: OK. Discards the bad packet silently (as far as
1233 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1270 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1234 */ 1271 */
1235 LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", 1272 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1273 is_udplite? "-Lite" : "",
1236 NIPQUAD(saddr), 1274 NIPQUAD(saddr),
1237 ntohs(uh->source), 1275 ntohs(uh->source),
1238 NIPQUAD(daddr), 1276 NIPQUAD(daddr),
1239 ntohs(uh->dest), 1277 ntohs(uh->dest),
1240 ulen); 1278 ulen);
1241drop: 1279drop:
1242 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1280 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1243 kfree_skb(skb); 1281 kfree_skb(skb);
1244 return(0); 1282 return(0);
1245} 1283}
1246 1284
1247static int udp_destroy_sock(struct sock *sk) 1285__inline__ int udp_rcv(struct sk_buff *skb)
1286{
1287 return __udp4_lib_rcv(skb, udp_hash, 0);
1288}
1289
1290int udp_destroy_sock(struct sock *sk)
1248{ 1291{
1249 lock_sock(sk); 1292 lock_sock(sk);
1250 udp_flush_pending_frames(sk); 1293 udp_flush_pending_frames(sk);
@@ -1293,6 +1336,32 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
1293 } 1336 }
1294 break; 1337 break;
1295 1338
1339 /*
1340 * UDP-Lite's partial checksum coverage (RFC 3828).
1341 */
1342 /* The sender sets actual checksum coverage length via this option.
1343 * The case coverage > packet length is handled by send module. */
1344 case UDPLITE_SEND_CSCOV:
1345 if (!up->pcflag) /* Disable the option on UDP sockets */
1346 return -ENOPROTOOPT;
1347 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1348 val = 8;
1349 up->pcslen = val;
1350 up->pcflag |= UDPLITE_SEND_CC;
1351 break;
1352
1353 /* The receiver specifies a minimum checksum coverage value. To make
1354 * sense, this should be set to at least 8 (as done below). If zero is
1355 * used, this again means full checksum coverage. */
1356 case UDPLITE_RECV_CSCOV:
1357 if (!up->pcflag) /* Disable the option on UDP sockets */
1358 return -ENOPROTOOPT;
1359 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1360 val = 8;
1361 up->pcrlen = val;
1362 up->pcflag |= UDPLITE_RECV_CC;
1363 break;
1364
1296 default: 1365 default:
1297 err = -ENOPROTOOPT; 1366 err = -ENOPROTOOPT;
1298 break; 1367 break;
@@ -1301,21 +1370,21 @@ static int do_udp_setsockopt(struct sock *sk, int level, int optname,
1301 return err; 1370 return err;
1302} 1371}
1303 1372
1304static int udp_setsockopt(struct sock *sk, int level, int optname, 1373int udp_setsockopt(struct sock *sk, int level, int optname,
1305 char __user *optval, int optlen) 1374 char __user *optval, int optlen)
1306{ 1375{
1307 if (level != SOL_UDP) 1376 if (level == SOL_UDP || level == SOL_UDPLITE)
1308 return ip_setsockopt(sk, level, optname, optval, optlen); 1377 return do_udp_setsockopt(sk, level, optname, optval, optlen);
1309 return do_udp_setsockopt(sk, level, optname, optval, optlen); 1378 return ip_setsockopt(sk, level, optname, optval, optlen);
1310} 1379}
1311 1380
1312#ifdef CONFIG_COMPAT 1381#ifdef CONFIG_COMPAT
1313static int compat_udp_setsockopt(struct sock *sk, int level, int optname, 1382int compat_udp_setsockopt(struct sock *sk, int level, int optname,
1314 char __user *optval, int optlen) 1383 char __user *optval, int optlen)
1315{ 1384{
1316 if (level != SOL_UDP) 1385 if (level == SOL_UDP || level == SOL_UDPLITE)
1317 return compat_ip_setsockopt(sk, level, optname, optval, optlen); 1386 return do_udp_setsockopt(sk, level, optname, optval, optlen);
1318 return do_udp_setsockopt(sk, level, optname, optval, optlen); 1387 return compat_ip_setsockopt(sk, level, optname, optval, optlen);
1319} 1388}
1320#endif 1389#endif
1321 1390
@@ -1342,6 +1411,16 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
1342 val = up->encap_type; 1411 val = up->encap_type;
1343 break; 1412 break;
1344 1413
1414 /* The following two cannot be changed on UDP sockets, the return is
1415 * always 0 (which corresponds to the full checksum coverage of UDP). */
1416 case UDPLITE_SEND_CSCOV:
1417 val = up->pcslen;
1418 break;
1419
1420 case UDPLITE_RECV_CSCOV:
1421 val = up->pcrlen;
1422 break;
1423
1345 default: 1424 default:
1346 return -ENOPROTOOPT; 1425 return -ENOPROTOOPT;
1347 }; 1426 };
@@ -1353,21 +1432,21 @@ static int do_udp_getsockopt(struct sock *sk, int level, int optname,
1353 return 0; 1432 return 0;
1354} 1433}
1355 1434
1356static int udp_getsockopt(struct sock *sk, int level, int optname, 1435int udp_getsockopt(struct sock *sk, int level, int optname,
1357 char __user *optval, int __user *optlen) 1436 char __user *optval, int __user *optlen)
1358{ 1437{
1359 if (level != SOL_UDP) 1438 if (level == SOL_UDP || level == SOL_UDPLITE)
1360 return ip_getsockopt(sk, level, optname, optval, optlen); 1439 return do_udp_getsockopt(sk, level, optname, optval, optlen);
1361 return do_udp_getsockopt(sk, level, optname, optval, optlen); 1440 return ip_getsockopt(sk, level, optname, optval, optlen);
1362} 1441}
1363 1442
1364#ifdef CONFIG_COMPAT 1443#ifdef CONFIG_COMPAT
1365static int compat_udp_getsockopt(struct sock *sk, int level, int optname, 1444int compat_udp_getsockopt(struct sock *sk, int level, int optname,
1366 char __user *optval, int __user *optlen) 1445 char __user *optval, int __user *optlen)
1367{ 1446{
1368 if (level != SOL_UDP) 1447 if (level == SOL_UDP || level == SOL_UDPLITE)
1369 return compat_ip_getsockopt(sk, level, optname, optval, optlen); 1448 return do_udp_getsockopt(sk, level, optname, optval, optlen);
1370 return do_udp_getsockopt(sk, level, optname, optval, optlen); 1449 return compat_ip_getsockopt(sk, level, optname, optval, optlen);
1371} 1450}
1372#endif 1451#endif
1373/** 1452/**
@@ -1387,7 +1466,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1387{ 1466{
1388 unsigned int mask = datagram_poll(file, sock, wait); 1467 unsigned int mask = datagram_poll(file, sock, wait);
1389 struct sock *sk = sock->sk; 1468 struct sock *sk = sock->sk;
1390 1469 int is_lite = IS_UDPLITE(sk);
1470
1391 /* Check for false positives due to checksum errors */ 1471 /* Check for false positives due to checksum errors */
1392 if ( (mask & POLLRDNORM) && 1472 if ( (mask & POLLRDNORM) &&
1393 !(file->f_flags & O_NONBLOCK) && 1473 !(file->f_flags & O_NONBLOCK) &&
@@ -1397,8 +1477,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1397 1477
1398 spin_lock_bh(&rcvq->lock); 1478 spin_lock_bh(&rcvq->lock);
1399 while ((skb = skb_peek(rcvq)) != NULL) { 1479 while ((skb = skb_peek(rcvq)) != NULL) {
1400 if (udp_checksum_complete(skb)) { 1480 if (udp_lib_checksum_complete(skb)) {
1401 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1481 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
1402 __skb_unlink(skb, rcvq); 1482 __skb_unlink(skb, rcvq);
1403 kfree_skb(skb); 1483 kfree_skb(skb);
1404 } else { 1484 } else {
@@ -1420,7 +1500,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1420struct proto udp_prot = { 1500struct proto udp_prot = {
1421 .name = "UDP", 1501 .name = "UDP",
1422 .owner = THIS_MODULE, 1502 .owner = THIS_MODULE,
1423 .close = udp_close, 1503 .close = udp_lib_close,
1424 .connect = ip4_datagram_connect, 1504 .connect = ip4_datagram_connect,
1425 .disconnect = udp_disconnect, 1505 .disconnect = udp_disconnect,
1426 .ioctl = udp_ioctl, 1506 .ioctl = udp_ioctl,
@@ -1431,8 +1511,8 @@ struct proto udp_prot = {
1431 .recvmsg = udp_recvmsg, 1511 .recvmsg = udp_recvmsg,
1432 .sendpage = udp_sendpage, 1512 .sendpage = udp_sendpage,
1433 .backlog_rcv = udp_queue_rcv_skb, 1513 .backlog_rcv = udp_queue_rcv_skb,
1434 .hash = udp_v4_hash, 1514 .hash = udp_lib_hash,
1435 .unhash = udp_v4_unhash, 1515 .unhash = udp_lib_unhash,
1436 .get_port = udp_v4_get_port, 1516 .get_port = udp_v4_get_port,
1437 .obj_size = sizeof(struct udp_sock), 1517 .obj_size = sizeof(struct udp_sock),
1438#ifdef CONFIG_COMPAT 1518#ifdef CONFIG_COMPAT
@@ -1451,7 +1531,7 @@ static struct sock *udp_get_first(struct seq_file *seq)
1451 1531
1452 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1532 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1453 struct hlist_node *node; 1533 struct hlist_node *node;
1454 sk_for_each(sk, node, &udp_hash[state->bucket]) { 1534 sk_for_each(sk, node, state->hashtable + state->bucket) {
1455 if (sk->sk_family == state->family) 1535 if (sk->sk_family == state->family)
1456 goto found; 1536 goto found;
1457 } 1537 }
@@ -1472,7 +1552,7 @@ try_again:
1472 } while (sk && sk->sk_family != state->family); 1552 } while (sk && sk->sk_family != state->family);
1473 1553
1474 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { 1554 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
1475 sk = sk_head(&udp_hash[state->bucket]); 1555 sk = sk_head(state->hashtable + state->bucket);
1476 goto try_again; 1556 goto try_again;
1477 } 1557 }
1478 return sk; 1558 return sk;
@@ -1522,6 +1602,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
1522 if (!s) 1602 if (!s)
1523 goto out; 1603 goto out;
1524 s->family = afinfo->family; 1604 s->family = afinfo->family;
1605 s->hashtable = afinfo->hashtable;
1525 s->seq_ops.start = udp_seq_start; 1606 s->seq_ops.start = udp_seq_start;
1526 s->seq_ops.next = udp_seq_next; 1607 s->seq_ops.next = udp_seq_next;
1527 s->seq_ops.show = afinfo->seq_show; 1608 s->seq_ops.show = afinfo->seq_show;
@@ -1588,7 +1669,7 @@ static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
1588 atomic_read(&sp->sk_refcnt), sp); 1669 atomic_read(&sp->sk_refcnt), sp);
1589} 1670}
1590 1671
1591static int udp4_seq_show(struct seq_file *seq, void *v) 1672int udp4_seq_show(struct seq_file *seq, void *v)
1592{ 1673{
1593 if (v == SEQ_START_TOKEN) 1674 if (v == SEQ_START_TOKEN)
1594 seq_printf(seq, "%-127s\n", 1675 seq_printf(seq, "%-127s\n",
@@ -1611,6 +1692,7 @@ static struct udp_seq_afinfo udp4_seq_afinfo = {
1611 .owner = THIS_MODULE, 1692 .owner = THIS_MODULE,
1612 .name = "udp", 1693 .name = "udp",
1613 .family = AF_INET, 1694 .family = AF_INET,
1695 .hashtable = udp_hash,
1614 .seq_show = udp4_seq_show, 1696 .seq_show = udp4_seq_show,
1615 .seq_fops = &udp4_seq_fops, 1697 .seq_fops = &udp4_seq_fops,
1616}; 1698};
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
new file mode 100644
index 000000000000..f6f4277ba6dc
--- /dev/null
+++ b/net/ipv4/udp_impl.h
@@ -0,0 +1,38 @@
1#ifndef _UDP4_IMPL_H
2#define _UDP4_IMPL_H
3#include <net/udp.h>
4#include <net/udplite.h>
5#include <net/protocol.h>
6#include <net/inet_common.h>
7
8extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
9extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
10
11extern int __udp_lib_get_port(struct sock *sk, unsigned short snum,
12 struct hlist_head udptable[], int *port_rover,
13 int (*)(const struct sock*,const struct sock*));
14extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
15
16
17extern int udp_setsockopt(struct sock *sk, int level, int optname,
18 char __user *optval, int optlen);
19extern int udp_getsockopt(struct sock *sk, int level, int optname,
20 char __user *optval, int __user *optlen);
21
22#ifdef CONFIG_COMPAT
23extern int compat_udp_setsockopt(struct sock *sk, int level, int optname,
24 char __user *optval, int optlen);
25extern int compat_udp_getsockopt(struct sock *sk, int level, int optname,
26 char __user *optval, int __user *optlen);
27#endif
28extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
29 size_t len, int noblock, int flags, int *addr_len);
30extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
31 size_t size, int flags);
32extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
33extern int udp_destroy_sock(struct sock *sk);
34
35#ifdef CONFIG_PROC_FS
36extern int udp4_seq_show(struct seq_file *seq, void *v);
37#endif
38#endif /* _UDP4_IMPL_H */
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
new file mode 100644
index 000000000000..561de6d8c734
--- /dev/null
+++ b/net/ipv4/udplite.c
@@ -0,0 +1,119 @@
1/*
2 * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
3 *
4 * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
5 *
6 * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
7 *
8 * Changes:
9 * Fixes:
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15#include "udp_impl.h"
16DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
17
18struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
19static int udplite_port_rover;
20
21__inline__ int udplite_get_port(struct sock *sk, unsigned short p,
22 int (*c)(const struct sock *, const struct sock *))
23{
24 return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c);
25}
26
27static __inline__ int udplite_v4_get_port(struct sock *sk, unsigned short snum)
28{
29 return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
30}
31
32__inline__ int udplite_rcv(struct sk_buff *skb)
33{
34 return __udp4_lib_rcv(skb, udplite_hash, 1);
35}
36
37__inline__ void udplite_err(struct sk_buff *skb, u32 info)
38{
39 return __udp4_lib_err(skb, info, udplite_hash);
40}
41
42static struct net_protocol udplite_protocol = {
43 .handler = udplite_rcv,
44 .err_handler = udplite_err,
45 .no_policy = 1,
46};
47
48struct proto udplite_prot = {
49 .name = "UDP-Lite",
50 .owner = THIS_MODULE,
51 .close = udp_lib_close,
52 .connect = ip4_datagram_connect,
53 .disconnect = udp_disconnect,
54 .ioctl = udp_ioctl,
55 .init = udplite_sk_init,
56 .destroy = udp_destroy_sock,
57 .setsockopt = udp_setsockopt,
58 .getsockopt = udp_getsockopt,
59 .sendmsg = udp_sendmsg,
60 .recvmsg = udp_recvmsg,
61 .sendpage = udp_sendpage,
62 .backlog_rcv = udp_queue_rcv_skb,
63 .hash = udp_lib_hash,
64 .unhash = udp_lib_unhash,
65 .get_port = udplite_v4_get_port,
66 .obj_size = sizeof(struct udp_sock),
67#ifdef CONFIG_COMPAT
68 .compat_setsockopt = compat_udp_setsockopt,
69 .compat_getsockopt = compat_udp_getsockopt,
70#endif
71};
72
73static struct inet_protosw udplite4_protosw = {
74 .type = SOCK_DGRAM,
75 .protocol = IPPROTO_UDPLITE,
76 .prot = &udplite_prot,
77 .ops = &inet_dgram_ops,
78 .capability = -1,
79 .no_check = 0, /* must checksum (RFC 3828) */
80 .flags = INET_PROTOSW_PERMANENT,
81};
82
83#ifdef CONFIG_PROC_FS
84static struct file_operations udplite4_seq_fops;
85static struct udp_seq_afinfo udplite4_seq_afinfo = {
86 .owner = THIS_MODULE,
87 .name = "udplite",
88 .family = AF_INET,
89 .hashtable = udplite_hash,
90 .seq_show = udp4_seq_show,
91 .seq_fops = &udplite4_seq_fops,
92};
93#endif
94
95void __init udplite4_register(void)
96{
97 if (proto_register(&udplite_prot, 1))
98 goto out_register_err;
99
100 if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0)
101 goto out_unregister_proto;
102
103 inet_register_protosw(&udplite4_protosw);
104
105#ifdef CONFIG_PROC_FS
106 if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
107 printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
108#endif
109 return;
110
111out_unregister_proto:
112 proto_unregister(&udplite_prot);
113out_register_err:
114 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__);
115}
116
117EXPORT_SYMBOL(udplite_hash);
118EXPORT_SYMBOL(udplite_prot);
119EXPORT_SYMBOL(udplite_get_port);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index da766234607b..d4107bb701b5 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -199,6 +199,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
199 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 199 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
200 switch (iph->protocol) { 200 switch (iph->protocol) {
201 case IPPROTO_UDP: 201 case IPPROTO_UDP:
202 case IPPROTO_UDPLITE:
202 case IPPROTO_TCP: 203 case IPPROTO_TCP:
203 case IPPROTO_SCTP: 204 case IPPROTO_SCTP:
204 case IPPROTO_DCCP: 205 case IPPROTO_DCCP: