aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-09-21 22:34:08 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-09-21 22:34:08 -0400
commita3536c839f04682ed06c84a7f75968c27c6108c8 (patch)
tree92c26ea74c0ffb9b83a2285ad2539cc271b09856 /net
parenta33a1982012e9070736e3717231714dc9892303b (diff)
parentefb0372bbaf5b829ff8c39db372779928af542a7 (diff)
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/Kconfig3
-rw-r--r--net/bridge/br_netfilter.c8
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/ackvec.c419
-rw-r--r--net/dccp/ackvec.h133
-rw-r--r--net/dccp/ccid.h31
-rw-r--r--net/dccp/ccids/ccid3.c56
-rw-r--r--net/dccp/ccids/ccid3.h23
-rw-r--r--net/dccp/dccp.h91
-rw-r--r--net/dccp/input.c89
-rw-r--r--net/dccp/ipv4.c101
-rw-r--r--net/dccp/minisocks.c19
-rw-r--r--net/dccp/options.c443
-rw-r--r--net/dccp/output.c26
-rw-r--r--net/dccp/proto.c94
-rw-r--r--net/ipv4/fib_trie.c49
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c43
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c20
-rw-r--r--net/ipv4/netfilter/Kconfig36
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c805
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c327
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c401
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c214
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/tcp_input.c16
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c52
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c81
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c88
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c73
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c90
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c88
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c83
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/udp.c15
-rw-r--r--net/packet/af_packet.c65
-rw-r--r--net/socket.c3
48 files changed, 3037 insertions, 1335 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 145f5cde96cf..b74864889670 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
120 unsigned short vid; 120 unsigned short vid;
121 struct net_device_stats *stats; 121 struct net_device_stats *stats;
122 unsigned short vlan_TCI; 122 unsigned short vlan_TCI;
123 unsigned short proto; 123 __be16 proto;
124 124
125 /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ 125 /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
126 vlan_TCI = ntohs(vhdr->h_vlan_TCI); 126 vlan_TCI = ntohs(vhdr->h_vlan_TCI);
diff --git a/net/Kconfig b/net/Kconfig
index 2bdd5623fdd5..60f6f321bd76 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
140 140
141 If unsure, say N. 141 If unsure, say N.
142 142
143source "net/netfilter/Kconfig"
143source "net/ipv4/netfilter/Kconfig" 144source "net/ipv4/netfilter/Kconfig"
144source "net/ipv6/netfilter/Kconfig" 145source "net/ipv6/netfilter/Kconfig"
145source "net/decnet/netfilter/Kconfig" 146source "net/decnet/netfilter/Kconfig"
@@ -206,8 +207,6 @@ config NET_PKTGEN
206 To compile this code as a module, choose M here: the 207 To compile this code as a module, choose M here: the
207 module will be called pktgen. 208 module will be called pktgen.
208 209
209source "net/netfilter/Kconfig"
210
211endmenu 210endmenu
212 211
213endmenu 212endmenu
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2d52fee63a8c..d8e36b775125 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
214 .tos = RT_TOS(iph->tos)} }, .proto = 0}; 214 .tos = RT_TOS(iph->tos)} }, .proto = 0};
215 215
216 if (!ip_route_output_key(&rt, &fl)) { 216 if (!ip_route_output_key(&rt, &fl)) {
217 /* Bridged-and-DNAT'ed traffic doesn't 217 /* - Bridged-and-DNAT'ed traffic doesn't
218 * require ip_forwarding. */ 218 * require ip_forwarding.
219 if (((struct dst_entry *)rt)->dev == dev) { 219 * - Deal with redirected traffic. */
220 if (((struct dst_entry *)rt)->dev == dev ||
221 rt->rt_type == RTN_LOCAL) {
220 skb->dst = (struct dst_entry *)rt; 222 skb->dst = (struct dst_entry *)rt;
221 goto bridged_dnat; 223 goto bridged_dnat;
222 } 224 }
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index fb97bb042455..344a8da153fc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ 3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
4 timer.o 4 timer.o
5 5
6dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
7
6obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o 8obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
7 9
8dccp_diag-y := diag.o 10dccp_diag-y := diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
new file mode 100644
index 000000000000..6530283eafca
--- /dev/null
+++ b/net/dccp/ackvec.c
@@ -0,0 +1,419 @@
1/*
2 * net/dccp/ackvec.c
3 *
4 * An implementation of the DCCP protocol
5 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License;
10 */
11
12#include "ackvec.h"
13#include "dccp.h"
14
15#include <linux/dccp.h>
16#include <linux/skbuff.h>
17
18#include <net/sock.h>
19
20int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
21{
22 struct dccp_sock *dp = dccp_sk(sk);
23 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
24 int len = av->dccpav_vec_len + 2;
25 struct timeval now;
26 u32 elapsed_time;
27 unsigned char *to, *from;
28
29 dccp_timestamp(sk, &now);
30 elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
31
32 if (elapsed_time != 0)
33 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
34
35 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
36 return -1;
37
38 /*
39 * XXX: now we have just one ack vector sent record, so
40 * we have to wait for it to be cleared.
41 *
42 * Of course this is not acceptable, but this is just for
43 * basic testing now.
44 */
45 if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
46 return -1;
47
48 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
49
50 to = skb_push(skb, len);
51 *to++ = DCCPO_ACK_VECTOR_0;
52 *to++ = len;
53
54 len = av->dccpav_vec_len;
55 from = av->dccpav_buf + av->dccpav_buf_head;
56
57 /* Check if buf_head wraps */
58 if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
59 const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
60
61 memcpy(to, from, tailsize);
62 to += tailsize;
63 len -= tailsize;
64 from = av->dccpav_buf;
65 }
66
67 memcpy(to, from, len);
68 /*
69 * From draft-ietf-dccp-spec-11.txt:
70 *
71 * For each acknowledgement it sends, the HC-Receiver will add an
72 * acknowledgement record. ack_seqno will equal the HC-Receiver
73 * sequence number it used for the ack packet; ack_ptr will equal
74 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
75 * equal buf_nonce.
76 *
77 * This implemention uses just one ack record for now.
78 */
79 av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
80 av->dccpav_ack_ptr = av->dccpav_buf_head;
81 av->dccpav_ack_ackno = av->dccpav_buf_ackno;
82 av->dccpav_ack_nonce = av->dccpav_buf_nonce;
83 av->dccpav_sent_len = av->dccpav_vec_len;
84
85 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
86 "ack_ackno=%llu\n",
87 debug_prefix, av->dccpav_sent_len,
88 (unsigned long long)av->dccpav_ack_seqno,
89 (unsigned long long)av->dccpav_ack_ackno);
90 return -1;
91}
92
93struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
94 const unsigned int __nocast priority)
95{
96 struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
97
98 if (av != NULL) {
99 av->dccpav_buf_len = len;
100 av->dccpav_buf_head =
101 av->dccpav_buf_tail = av->dccpav_buf_len - 1;
102 av->dccpav_buf_ackno =
103 av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
104 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
105 av->dccpav_ack_ptr = 0;
106 av->dccpav_time.tv_sec = 0;
107 av->dccpav_time.tv_usec = 0;
108 av->dccpav_sent_len = av->dccpav_vec_len = 0;
109 }
110
111 return av;
112}
113
114void dccp_ackvec_free(struct dccp_ackvec *av)
115{
116 kfree(av);
117}
118
119static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
120 const unsigned int index)
121{
122 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
123}
124
125static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
126 const unsigned int index)
127{
128 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
129}
130
131/*
132 * If several packets are missing, the HC-Receiver may prefer to enter multiple
133 * bytes with run length 0, rather than a single byte with a larger run length;
134 * this simplifies table updates if one of the missing packets arrives.
135 */
136static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
137 const unsigned int packets,
138 const unsigned char state)
139{
140 unsigned int gap;
141 signed long new_head;
142
143 if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
144 return -ENOBUFS;
145
146 gap = packets - 1;
147 new_head = av->dccpav_buf_head - packets;
148
149 if (new_head < 0) {
150 if (gap > 0) {
151 memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
152 gap + new_head + 1);
153 gap = -new_head;
154 }
155 new_head += av->dccpav_buf_len;
156 }
157
158 av->dccpav_buf_head = new_head;
159
160 if (gap > 0)
161 memset(av->dccpav_buf + av->dccpav_buf_head + 1,
162 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
163
164 av->dccpav_buf[av->dccpav_buf_head] = state;
165 av->dccpav_vec_len += packets;
166 return 0;
167}
168
169/*
170 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
171 */
172int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
173 const u64 ackno, const u8 state)
174{
175 /*
176 * Check at the right places if the buffer is full, if it is, tell the
177 * caller to start dropping packets till the HC-Sender acks our ACK
178 * vectors, when we will free up space in dccpav_buf.
179 *
180 * We may well decide to do buffer compression, etc, but for now lets
181 * just drop.
182 *
183 * From Appendix A:
184 *
185 * Of course, the circular buffer may overflow, either when the
186 * HC-Sender is sending data at a very high rate, when the
187 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
188 * or when the HC-Sender is forgetting to acknowledge those acks
189 * (so the HC-Receiver is unable to clean up old state). In this
190 * case, the HC-Receiver should either compress the buffer (by
191 * increasing run lengths when possible), transfer its state to
192 * a larger buffer, or, as a last resort, drop all received
193 * packets, without processing them whatsoever, until its buffer
194 * shrinks again.
195 */
196
197 /* See if this is the first ackno being inserted */
198 if (av->dccpav_vec_len == 0) {
199 av->dccpav_buf[av->dccpav_buf_head] = state;
200 av->dccpav_vec_len = 1;
201 } else if (after48(ackno, av->dccpav_buf_ackno)) {
202 const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
203 ackno);
204
205 /*
206 * Look if the state of this packet is the same as the
207 * previous ackno and if so if we can bump the head len.
208 */
209 if (delta == 1 &&
210 dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
211 (dccp_ackvec_len(av, av->dccpav_buf_head) <
212 DCCP_ACKVEC_LEN_MASK))
213 av->dccpav_buf[av->dccpav_buf_head]++;
214 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
215 return -ENOBUFS;
216 } else {
217 /*
218 * A.1.2. Old Packets
219 *
220 * When a packet with Sequence Number S arrives, and
221 * S <= buf_ackno, the HC-Receiver will scan the table
222 * for the byte corresponding to S. (Indexing structures
223 * could reduce the complexity of this scan.)
224 */
225 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
226 unsigned int index = av->dccpav_buf_head;
227
228 while (1) {
229 const u8 len = dccp_ackvec_len(av, index);
230 const u8 state = dccp_ackvec_state(av, index);
231 /*
232 * valid packets not yet in dccpav_buf have a reserved
233 * entry, with a len equal to 0.
234 */
235 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
236 len == 0 && delta == 0) { /* Found our
237 reserved seat! */
238 dccp_pr_debug("Found %llu reserved seat!\n",
239 (unsigned long long)ackno);
240 av->dccpav_buf[index] = state;
241 goto out;
242 }
243 /* len == 0 means one packet */
244 if (delta < len + 1)
245 goto out_duplicate;
246
247 delta -= len + 1;
248 if (++index == av->dccpav_buf_len)
249 index = 0;
250 }
251 }
252
253 av->dccpav_buf_ackno = ackno;
254 dccp_timestamp(sk, &av->dccpav_time);
255out:
256 dccp_pr_debug("");
257 return 0;
258
259out_duplicate:
260 /* Duplicate packet */
261 dccp_pr_debug("Received a dup or already considered lost "
262 "packet: %llu\n", (unsigned long long)ackno);
263 return -EILSEQ;
264}
265
266#ifdef CONFIG_IP_DCCP_DEBUG
267void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
268{
269 if (!dccp_debug)
270 return;
271
272 printk("ACK vector len=%d, ackno=%llu |", len,
273 (unsigned long long)ackno);
274
275 while (len--) {
276 const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
277 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
278
279 printk("%d,%d|", state, rl);
280 ++vector;
281 }
282
283 printk("\n");
284}
285
286void dccp_ackvec_print(const struct dccp_ackvec *av)
287{
288 dccp_ackvector_print(av->dccpav_buf_ackno,
289 av->dccpav_buf + av->dccpav_buf_head,
290 av->dccpav_vec_len);
291}
292#endif
293
294static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
295{
296 /*
297 * As we're keeping track of the ack vector size (dccpav_vec_len) and
298 * the sent ack vector size (dccpav_sent_len) we don't need
299 * dccpav_buf_tail at all, but keep this code here as in the future
300 * we'll implement a vector of ack records, as suggested in
301 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
302 */
303#if 0
304 av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
305 if (av->dccpav_buf_tail >= av->dccpav_vec_len)
306 av->dccpav_buf_tail -= av->dccpav_vec_len;
307#endif
308 av->dccpav_vec_len -= av->dccpav_sent_len;
309}
310
311void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
312 const u64 ackno)
313{
314 /* Check if we actually sent an ACK vector */
315 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
316 return;
317
318 if (ackno == av->dccpav_ack_seqno) {
319#ifdef CONFIG_IP_DCCP_DEBUG
320 struct dccp_sock *dp = dccp_sk(sk);
321 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
322 "CLIENT rx ack: " : "server rx ack: ";
323#endif
324 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
325 "ack_ackno=%llu, ACKED!\n",
326 debug_prefix, 1,
327 (unsigned long long)av->dccpav_ack_seqno,
328 (unsigned long long)av->dccpav_ack_ackno);
329 dccp_ackvec_trow_away_ack_record(av);
330 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
331 }
332}
333
334static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
335 struct sock *sk, u64 ackno,
336 const unsigned char len,
337 const unsigned char *vector)
338{
339 unsigned char i;
340
341 /* Check if we actually sent an ACK vector */
342 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
343 return;
344 /*
345 * We're in the receiver half connection, so if the received an ACK
346 * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
347 * not interested.
348 *
349 * Extra explanation with example:
350 *
351 * if we received an ACK vector with ackno 50, it can only be acking
352 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
353 */
354 /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
355 if (before48(ackno, av->dccpav_ack_seqno)) {
356 /* dccp_pr_debug_cat("yes\n"); */
357 return;
358 }
359 /* dccp_pr_debug_cat("no\n"); */
360
361 i = len;
362 while (i--) {
363 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
364 u64 ackno_end_rl;
365
366 dccp_set_seqno(&ackno_end_rl, ackno - rl);
367
368 /*
369 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
370 * av->dccpav_ack_seqno, ackno);
371 */
372 if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) {
373 const u8 state = (*vector &
374 DCCP_ACKVEC_STATE_MASK) >> 6;
375 /* dccp_pr_debug_cat("yes\n"); */
376
377 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
378#ifdef CONFIG_IP_DCCP_DEBUG
379 struct dccp_sock *dp = dccp_sk(sk);
380 const char *debug_prefix =
381 dp->dccps_role == DCCP_ROLE_CLIENT ?
382 "CLIENT rx ack: " : "server rx ack: ";
383#endif
384 dccp_pr_debug("%sACK vector 0, len=%d, "
385 "ack_seqno=%llu, ack_ackno=%llu, "
386 "ACKED!\n",
387 debug_prefix, len,
388 (unsigned long long)
389 av->dccpav_ack_seqno,
390 (unsigned long long)
391 av->dccpav_ack_ackno);
392 dccp_ackvec_trow_away_ack_record(av);
393 }
394 /*
395 * If dccpav_ack_seqno was not received, no problem
396 * we'll send another ACK vector.
397 */
398 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
399 break;
400 }
401 /* dccp_pr_debug_cat("no\n"); */
402
403 dccp_set_seqno(&ackno, ackno_end_rl - 1);
404 ++vector;
405 }
406}
407
408int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
409 const u8 opt, const u8 *value, const u8 len)
410{
411 if (len > DCCP_MAX_ACKVEC_LEN)
412 return -1;
413
414 /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
415 dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
416 DCCP_SKB_CB(skb)->dccpd_ack_seq,
417 len, value);
418 return 0;
419}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
new file mode 100644
index 000000000000..8ca51c9191f7
--- /dev/null
+++ b/net/dccp/ackvec.h
@@ -0,0 +1,133 @@
1#ifndef _ACKVEC_H
2#define _ACKVEC_H
3/*
4 * net/dccp/ackvec.h
5 *
6 * An implementation of the DCCP protocol
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/config.h>
15#include <linux/compiler.h>
16#include <linux/time.h>
17#include <linux/types.h>
18
19/* Read about the ECN nonce to see why it is 253 */
20#define DCCP_MAX_ACKVEC_LEN 253
21
22#define DCCP_ACKVEC_STATE_RECEIVED 0
23#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
24#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
25
26#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */
27#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */
28
29/** struct dccp_ackvec - ack vector
30 *
31 * This data structure is the one defined in the DCCP draft
32 * Appendix A.
33 *
34 * @dccpav_buf_head - circular buffer head
35 * @dccpav_buf_tail - circular buffer tail
36 * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
37 * buffer (i.e. %dccpav_buf_head)
38 * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
39 * by the buffer with State 0
40 *
41 * Additionally, the HC-Receiver must keep some information about the
42 * Ack Vectors it has recently sent. For each packet sent carrying an
43 * Ack Vector, it remembers four variables:
44 *
45 * @dccpav_ack_seqno - the Sequence Number used for the packet
46 * (HC-Receiver seqno)
47 * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
48 * @dccpav_ack_ackno - the Acknowledgement Number used for the packet
49 * (HC-Sender seqno)
50 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
51 *
52 * @dccpav_buf_len - circular buffer length
53 * @dccpav_time - the time in usecs
54 * @dccpav_buf - circular buffer of acknowledgeable packets
55 */
56struct dccp_ackvec {
57 unsigned int dccpav_buf_head;
58 unsigned int dccpav_buf_tail;
59 u64 dccpav_buf_ackno;
60 u64 dccpav_ack_seqno;
61 u64 dccpav_ack_ackno;
62 unsigned int dccpav_ack_ptr;
63 unsigned int dccpav_sent_len;
64 unsigned int dccpav_vec_len;
65 unsigned int dccpav_buf_len;
66 struct timeval dccpav_time;
67 u8 dccpav_buf_nonce;
68 u8 dccpav_ack_nonce;
69 u8 dccpav_buf[0];
70};
71
72struct sock;
73struct sk_buff;
74
75#ifdef CONFIG_IP_DCCP_ACKVEC
76extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
77 const unsigned int __nocast priority);
78extern void dccp_ackvec_free(struct dccp_ackvec *av);
79
80extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
81 const u64 ackno, const u8 state);
82
83extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
84 struct sock *sk, const u64 ackno);
85extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
86 const u8 opt, const u8 *value, const u8 len);
87
88extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
89
90static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
91{
92 return av->dccpav_sent_len != av->dccpav_vec_len;
93}
94#else /* CONFIG_IP_DCCP_ACKVEC */
95static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
96 const unsigned int __nocast priority)
97{
98 return NULL;
99}
100
101static inline void dccp_ackvec_free(struct dccp_ackvec *av)
102{
103}
104
105static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
106 const u64 ackno, const u8 state)
107{
108 return -1;
109}
110
111static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
112 struct sock *sk, const u64 ackno)
113{
114}
115
116static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
117 const u8 opt, const u8 *value, const u8 len)
118{
119 return -1;
120}
121
122static inline int dccp_insert_option_ackvec(const struct sock *sk,
123 const struct sk_buff *skb)
124{
125 return -1;
126}
127
128static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
129{
130 return 0;
131}
132#endif /* CONFIG_IP_DCCP_ACKVEC */
133#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 962f1e9e2f7e..21e55142dcd3 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -14,6 +14,7 @@
14 */ 14 */
15 15
16#include <net/sock.h> 16#include <net/sock.h>
17#include <linux/compiler.h>
17#include <linux/dccp.h> 18#include <linux/dccp.h>
18#include <linux/list.h> 19#include <linux/list.h>
19#include <linux/module.h> 20#include <linux/module.h>
@@ -54,6 +55,14 @@ struct ccid {
54 struct tcp_info *info); 55 struct tcp_info *info);
55 void (*ccid_hc_tx_get_info)(struct sock *sk, 56 void (*ccid_hc_tx_get_info)(struct sock *sk,
56 struct tcp_info *info); 57 struct tcp_info *info);
58 int (*ccid_hc_rx_getsockopt)(struct sock *sk,
59 const int optname, int len,
60 u32 __user *optval,
61 int __user *optlen);
62 int (*ccid_hc_tx_getsockopt)(struct sock *sk,
63 const int optname, int len,
64 u32 __user *optval,
65 int __user *optlen);
57}; 66};
58 67
59extern int ccid_register(struct ccid *ccid); 68extern int ccid_register(struct ccid *ccid);
@@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
177 if (ccid->ccid_hc_tx_get_info != NULL) 186 if (ccid->ccid_hc_tx_get_info != NULL)
178 ccid->ccid_hc_tx_get_info(sk, info); 187 ccid->ccid_hc_tx_get_info(sk, info);
179} 188}
189
190static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
191 const int optname, int len,
192 u32 __user *optval, int __user *optlen)
193{
194 int rc = -ENOPROTOOPT;
195 if (ccid->ccid_hc_rx_getsockopt != NULL)
196 rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len,
197 optval, optlen);
198 return rc;
199}
200
201static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
202 const int optname, int len,
203 u32 __user *optval, int __user *optlen)
204{
205 int rc = -ENOPROTOOPT;
206 if (ccid->ccid_hc_tx_getsockopt != NULL)
207 rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len,
208 optval, optlen);
209 return rc;
210}
180#endif /* _CCID_H */ 211#endif /* _CCID_H */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 38aa84986118..aa68e0ab274d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1120 info->tcpi_rtt = hctx->ccid3hctx_rtt; 1120 info->tcpi_rtt = hctx->ccid3hctx_rtt;
1121} 1121}
1122 1122
1123static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1124 u32 __user *optval, int __user *optlen)
1125{
1126 const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1127 const void *val;
1128
1129 /* Listen socks doesn't have a private CCID block */
1130 if (sk->sk_state == DCCP_LISTEN)
1131 return -EINVAL;
1132
1133 switch (optname) {
1134 case DCCP_SOCKOPT_CCID_RX_INFO:
1135 if (len < sizeof(hcrx->ccid3hcrx_tfrc))
1136 return -EINVAL;
1137 len = sizeof(hcrx->ccid3hcrx_tfrc);
1138 val = &hcrx->ccid3hcrx_tfrc;
1139 break;
1140 default:
1141 return -ENOPROTOOPT;
1142 }
1143
1144 if (put_user(len, optlen) || copy_to_user(optval, val, len))
1145 return -EFAULT;
1146
1147 return 0;
1148}
1149
1150static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
1151 u32 __user *optval, int __user *optlen)
1152{
1153 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1154 const void *val;
1155
1156 /* Listen socks doesn't have a private CCID block */
1157 if (sk->sk_state == DCCP_LISTEN)
1158 return -EINVAL;
1159
1160 switch (optname) {
1161 case DCCP_SOCKOPT_CCID_TX_INFO:
1162 if (len < sizeof(hctx->ccid3hctx_tfrc))
1163 return -EINVAL;
1164 len = sizeof(hctx->ccid3hctx_tfrc);
1165 val = &hctx->ccid3hctx_tfrc;
1166 break;
1167 default:
1168 return -ENOPROTOOPT;
1169 }
1170
1171 if (put_user(len, optlen) || copy_to_user(optval, val, len))
1172 return -EFAULT;
1173
1174 return 0;
1175}
1176
1123static struct ccid ccid3 = { 1177static struct ccid ccid3 = {
1124 .ccid_id = 3, 1178 .ccid_id = 3,
1125 .ccid_name = "ccid3", 1179 .ccid_name = "ccid3",
@@ -1139,6 +1193,8 @@ static struct ccid ccid3 = {
1139 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, 1193 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
1140 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, 1194 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
1141 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, 1195 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
1196 .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt,
1197 .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
1142}; 1198};
1143 1199
1144module_param(ccid3_debug, int, 0444); 1200module_param(ccid3_debug, int, 0444);
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index eb248778eea3..0bde4583d091 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -40,6 +40,7 @@
40#include <linux/list.h> 40#include <linux/list.h>
41#include <linux/time.h> 41#include <linux/time.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/tfrc.h>
43 44
44#define TFRC_MIN_PACKET_SIZE 16 45#define TFRC_MIN_PACKET_SIZE 16
45#define TFRC_STD_PACKET_SIZE 256 46#define TFRC_STD_PACKET_SIZE 256
@@ -93,12 +94,15 @@ struct ccid3_options_received {
93 * @ccid3hctx_hist - Packet history 94 * @ccid3hctx_hist - Packet history
94 */ 95 */
95struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
96 u32 ccid3hctx_x; 97 struct tfrc_tx_info ccid3hctx_tfrc;
97 u32 ccid3hctx_x_recv; 98#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
98 u32 ccid3hctx_x_calc; 99#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
100#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
101#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
102#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
103#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
104#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
99 u16 ccid3hctx_s; 105 u16 ccid3hctx_s;
100 u32 ccid3hctx_rtt;
101 u32 ccid3hctx_p;
102 u8 ccid3hctx_state; 106 u8 ccid3hctx_state;
103 u8 ccid3hctx_last_win_count; 107 u8 ccid3hctx_last_win_count;
104 u8 ccid3hctx_idle; 108 u8 ccid3hctx_idle;
@@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock {
106 struct timer_list ccid3hctx_no_feedback_timer; 110 struct timer_list ccid3hctx_no_feedback_timer;
107 struct timeval ccid3hctx_t_ld; 111 struct timeval ccid3hctx_t_ld;
108 struct timeval ccid3hctx_t_nom; 112 struct timeval ccid3hctx_t_nom;
109 u32 ccid3hctx_t_rto;
110 u32 ccid3hctx_t_ipi;
111 u32 ccid3hctx_delta; 113 u32 ccid3hctx_delta;
112 struct list_head ccid3hctx_hist; 114 struct list_head ccid3hctx_hist;
113 struct ccid3_options_received ccid3hctx_options_received; 115 struct ccid3_options_received ccid3hctx_options_received;
114}; 116};
115 117
116struct ccid3_hc_rx_sock { 118struct ccid3_hc_rx_sock {
119 struct tfrc_rx_info ccid3hcrx_tfrc;
120#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
121#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
122#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
117 u64 ccid3hcrx_seqno_last_counter:48, 123 u64 ccid3hcrx_seqno_last_counter:48,
118 ccid3hcrx_state:8, 124 ccid3hcrx_state:8,
119 ccid3hcrx_last_counter:4; 125 ccid3hcrx_last_counter:4;
120 u32 ccid3hcrx_rtt;
121 u32 ccid3hcrx_p;
122 u32 ccid3hcrx_bytes_recv; 126 u32 ccid3hcrx_bytes_recv;
123 struct timeval ccid3hcrx_tstamp_last_feedback; 127 struct timeval ccid3hcrx_tstamp_last_feedback;
124 struct timeval ccid3hcrx_tstamp_last_ack; 128 struct timeval ccid3hcrx_tstamp_last_ack;
@@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock {
127 u16 ccid3hcrx_s; 131 u16 ccid3hcrx_s;
128 u32 ccid3hcrx_pinv; 132 u32 ccid3hcrx_pinv;
129 u32 ccid3hcrx_elapsed_time; 133 u32 ccid3hcrx_elapsed_time;
130 u32 ccid3hcrx_x_recv;
131}; 134};
132 135
133static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 136static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 95c4630b3b18..5871c027f9dc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -17,6 +17,7 @@
17#include <net/snmp.h> 17#include <net/snmp.h>
18#include <net/sock.h> 18#include <net/sock.h>
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include "ackvec.h"
20 21
21#ifdef CONFIG_IP_DCCP_DEBUG 22#ifdef CONFIG_IP_DCCP_DEBUG
22extern int dccp_debug; 23extern int dccp_debug;
@@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk,
258extern void dccp_send_close(struct sock *sk, const int active); 259extern void dccp_send_close(struct sock *sk, const int active);
259 260
260struct dccp_skb_cb { 261struct dccp_skb_cb {
261 __u8 dccpd_type; 262 __u8 dccpd_type:4;
262 __u8 dccpd_reset_code; 263 __u8 dccpd_ccval:4;
263 __u8 dccpd_service; 264 __u8 dccpd_reset_code;
264 __u8 dccpd_ccval; 265 __u16 dccpd_opt_len;
265 __u64 dccpd_seq; 266 __u64 dccpd_seq;
266 __u64 dccpd_ack_seq; 267 __u64 dccpd_ack_seq;
267 int dccpd_opt_len;
268}; 268};
269 269
270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
@@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
359 (dp->dccps_gss - 359 (dp->dccps_gss -
360 dp->dccps_options.dccpo_sequence_window + 1)); 360 dp->dccps_options.dccpo_sequence_window + 1));
361} 361}
362
363static inline int dccp_ack_pending(const struct sock *sk)
364{
365 const struct dccp_sock *dp = dccp_sk(sk);
366 return dp->dccps_timestamp_echo != 0 ||
367#ifdef CONFIG_IP_DCCP_ACKVEC
368 (dp->dccps_options.dccpo_send_ack_vector &&
369 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
370#endif
371 inet_csk_ack_scheduled(sk);
372}
362 373
363extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); 374extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
364extern void dccp_insert_option_elapsed_time(struct sock *sk, 375extern void dccp_insert_option_elapsed_time(struct sock *sk,
@@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
372 383
373extern struct socket *dccp_ctl_socket; 384extern struct socket *dccp_ctl_socket;
374 385
375#define DCCP_ACKPKTS_STATE_RECEIVED 0
376#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
377#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
378
379#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
380#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
381
382/** struct dccp_ackpkts - acknowledgeable packets
383 *
384 * This data structure is the one defined in the DCCP draft
385 * Appendix A.
386 *
387 * @dccpap_buf_head - circular buffer head
388 * @dccpap_buf_tail - circular buffer tail
389 * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
390 * buffer (i.e. %dccpap_buf_head)
391 * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
392 * by the buffer with State 0
393 *
394 * Additionally, the HC-Receiver must keep some information about the
395 * Ack Vectors it has recently sent. For each packet sent carrying an
396 * Ack Vector, it remembers four variables:
397 *
398 * @dccpap_ack_seqno - the Sequence Number used for the packet
399 * (HC-Receiver seqno)
400 * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
401 * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
402 * (HC-Sender seqno)
403 * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
404 *
405 * @dccpap_buf_len - circular buffer length
406 * @dccpap_time - the time in usecs
407 * @dccpap_buf - circular buffer of acknowledgeable packets
408 */
409struct dccp_ackpkts {
410 unsigned int dccpap_buf_head;
411 unsigned int dccpap_buf_tail;
412 u64 dccpap_buf_ackno;
413 u64 dccpap_ack_seqno;
414 u64 dccpap_ack_ackno;
415 unsigned int dccpap_ack_ptr;
416 unsigned int dccpap_buf_vector_len;
417 unsigned int dccpap_ack_vector_len;
418 unsigned int dccpap_buf_len;
419 struct timeval dccpap_time;
420 u8 dccpap_buf_nonce;
421 u8 dccpap_ack_nonce;
422 u8 dccpap_buf[0];
423};
424
425extern struct dccp_ackpkts *
426 dccp_ackpkts_alloc(unsigned int len,
427 const unsigned int __nocast priority);
428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
430 u64 ackno, u8 state);
431extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
432 struct sock *sk, u64 ackno);
433
434extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); 386extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
435 387
436static inline suseconds_t timeval_usecs(const struct timeval *tv) 388static inline suseconds_t timeval_usecs(const struct timeval *tv)
@@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv,
471 } 423 }
472} 424}
473 425
474#ifdef CONFIG_IP_DCCP_DEBUG
475extern void dccp_ackvector_print(const u64 ackno,
476 const unsigned char *vector, int len);
477extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
478#else
479static inline void dccp_ackvector_print(const u64 ackno,
480 const unsigned char *vector,
481 int len) { }
482static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
483#endif
484
485#endif /* _DCCP_H */ 426#endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index c74034cf7ede..1b6b2cb12376 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -16,6 +16,7 @@
16 16
17#include <net/sock.h> 17#include <net/sock.h>
18 18
19#include "ackvec.h"
19#include "ccid.h" 20#include "ccid.h"
20#include "dccp.h" 21#include "dccp.h"
21 22
@@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
60 struct dccp_sock *dp = dccp_sk(sk); 61 struct dccp_sock *dp = dccp_sk(sk);
61 62
62 if (dp->dccps_options.dccpo_send_ack_vector) 63 if (dp->dccps_options.dccpo_send_ack_vector)
63 dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, 64 dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
64 DCCP_SKB_CB(skb)->dccpd_ack_seq); 65 DCCP_SKB_CB(skb)->dccpd_ack_seq);
65} 66}
66 67
67static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) 68static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
@@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
164 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 165 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
165 dccp_event_ack_recv(sk, skb); 166 dccp_event_ack_recv(sk, skb);
166 167
167 /* 168 if (dp->dccps_options.dccpo_send_ack_vector &&
168 * FIXME: check ECN to see if we should use 169 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
169 * DCCP_ACKPKTS_STATE_ECN_MARKED 170 DCCP_SKB_CB(skb)->dccpd_seq,
170 */ 171 DCCP_ACKVEC_STATE_RECEIVED))
171 if (dp->dccps_options.dccpo_send_ack_vector) { 172 goto discard;
172 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
173
174 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
175 DCCP_SKB_CB(skb)->dccpd_seq,
176 DCCP_ACKPKTS_STATE_RECEIVED)) {
177 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
178 "packets buffer full!\n");
179 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
180 inet_csk_schedule_ack(sk);
181 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
182 TCP_DELACK_MIN,
183 DCCP_RTO_MAX);
184 goto discard;
185 }
186
187 /*
188 * FIXME: this activation is probably wrong, have to study more
189 * TCP delack machinery and how it fits into DCCP draft, but
190 * for now it kinda "works" 8)
191 */
192 if (!inet_csk_ack_scheduled(sk)) {
193 inet_csk_schedule_ack(sk);
194 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
195 DCCP_RTO_MAX);
196 }
197 }
198 173
199 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 174 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
200 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 175 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
@@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
384 } 359 }
385 360
386out_invalid_packet: 361out_invalid_packet:
387 return 1; /* dccp_v4_do_rcv will send a reset, but... 362 /* dccp_v4_do_rcv will send a reset */
388 FIXME: the reset code should be 363 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
389 DCCP_RESET_CODE_PACKET_ERROR */ 364 return 1;
390} 365}
391 366
392static int dccp_rcv_respond_partopen_state_process(struct sock *sk, 367static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -433,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
433 struct dccp_hdr *dh, unsigned len) 408 struct dccp_hdr *dh, unsigned len)
434{ 409{
435 struct dccp_sock *dp = dccp_sk(sk); 410 struct dccp_sock *dp = dccp_sk(sk);
411 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
436 const int old_state = sk->sk_state; 412 const int old_state = sk->sk_state;
437 int queued = 0; 413 int queued = 0;
438 414
@@ -473,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
473 if (dh->dccph_type == DCCP_PKT_RESET) 449 if (dh->dccph_type == DCCP_PKT_RESET)
474 goto discard; 450 goto discard;
475 451
476 /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ 452 /* Caller (dccp_v4_do_rcv) will send Reset */
453 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
477 return 1; 454 return 1;
478 } 455 }
479 456
@@ -487,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
487 if (dccp_parse_options(sk, skb)) 464 if (dccp_parse_options(sk, skb))
488 goto discard; 465 goto discard;
489 466
490 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != 467 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
491 DCCP_PKT_WITHOUT_ACK_SEQ)
492 dccp_event_ack_recv(sk, skb); 468 dccp_event_ack_recv(sk, skb);
493 469
494 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 470 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
495 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 471 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
496 472
497 /* 473 if (dp->dccps_options.dccpo_send_ack_vector &&
498 * FIXME: check ECN to see if we should use 474 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
499 * DCCP_ACKPKTS_STATE_ECN_MARKED 475 DCCP_SKB_CB(skb)->dccpd_seq,
500 */ 476 DCCP_ACKVEC_STATE_RECEIVED))
501 if (dp->dccps_options.dccpo_send_ack_vector) { 477 goto discard;
502 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
503 DCCP_SKB_CB(skb)->dccpd_seq,
504 DCCP_ACKPKTS_STATE_RECEIVED))
505 goto discard;
506 /*
507 * FIXME: this activation is probably wrong, have to
508 * study more TCP delack machinery and how it fits into
509 * DCCP draft, but for now it kinda "works" 8)
510 */
511 if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
512 DCCP_MAX_SEQNO + 1) &&
513 !inet_csk_ack_scheduled(sk)) {
514 inet_csk_schedule_ack(sk);
515 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
516 TCP_DELACK_MIN,
517 DCCP_RTO_MAX);
518 }
519 }
520 } 478 }
521 479
522 /* 480 /*
@@ -551,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
551 dh->dccph_type == DCCP_PKT_REQUEST) || 509 dh->dccph_type == DCCP_PKT_REQUEST) ||
552 (sk->sk_state == DCCP_RESPOND && 510 (sk->sk_state == DCCP_RESPOND &&
553 dh->dccph_type == DCCP_PKT_DATA)) { 511 dh->dccph_type == DCCP_PKT_DATA)) {
554 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 512 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
555 DCCP_PKT_SYNC);
556 goto discard; 513 goto discard;
557 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { 514 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
558 dccp_rcv_closereq(sk, skb); 515 dccp_rcv_closereq(sk, skb);
@@ -563,13 +520,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
563 } 520 }
564 521
565 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { 522 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
566 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 523 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
567 DCCP_PKT_SYNCACK);
568 goto discard; 524 goto discard;
569 } 525 }
570 526
571 switch (sk->sk_state) { 527 switch (sk->sk_state) {
572 case DCCP_CLOSED: 528 case DCCP_CLOSED:
529 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
573 return 1; 530 return 1;
574 531
575 case DCCP_REQUESTING: 532 case DCCP_REQUESTING:
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2afaa464e7f0..40fe6afacde6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -23,6 +23,7 @@
23#include <net/tcp_states.h> 23#include <net/tcp_states.h>
24#include <net/xfrm.h> 24#include <net/xfrm.h>
25 25
26#include "ackvec.h"
26#include "ccid.h" 27#include "ccid.h"
27#include "dccp.h" 28#include "dccp.h"
28 29
@@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
246 247
247 dp->dccps_role = DCCP_ROLE_CLIENT; 248 dp->dccps_role = DCCP_ROLE_CLIENT;
248 249
250 if (dccp_service_not_initialized(sk))
251 return -EPROTO;
252
249 if (addr_len < sizeof(struct sockaddr_in)) 253 if (addr_len < sizeof(struct sockaddr_in))
250 return -EINVAL; 254 return -EINVAL;
251 255
@@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
661 dccp_hdr(skb)->dccph_sport); 665 dccp_hdr(skb)->dccph_sport);
662} 666}
663 667
668static inline int dccp_bad_service_code(const struct sock *sk,
669 const __u32 service)
670{
671 const struct dccp_sock *dp = dccp_sk(sk);
672
673 if (dp->dccps_service == service)
674 return 0;
675 return !dccp_list_has_service(dp->dccps_service_list, service);
676}
677
664int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 678int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
665{ 679{
666 struct inet_request_sock *ireq; 680 struct inet_request_sock *ireq;
@@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
669 struct dccp_request_sock *dreq; 683 struct dccp_request_sock *dreq;
670 const __u32 saddr = skb->nh.iph->saddr; 684 const __u32 saddr = skb->nh.iph->saddr;
671 const __u32 daddr = skb->nh.iph->daddr; 685 const __u32 daddr = skb->nh.iph->daddr;
686 const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
687 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
688 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
672 struct dst_entry *dst = NULL; 689 struct dst_entry *dst = NULL;
673 690
674 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 691 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
675 if (((struct rtable *)skb->dst)->rt_flags & 692 if (((struct rtable *)skb->dst)->rt_flags &
676 (RTCF_BROADCAST | RTCF_MULTICAST)) 693 (RTCF_BROADCAST | RTCF_MULTICAST)) {
694 reset_code = DCCP_RESET_CODE_NO_CONNECTION;
677 goto drop; 695 goto drop;
696 }
678 697
698 if (dccp_bad_service_code(sk, service)) {
699 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
700 goto drop;
701 }
679 /* 702 /*
680 * TW buckets are converted to open requests without 703 * TW buckets are converted to open requests without
681 * limitations, they conserve resources and peer is 704 * limitations, they conserve resources and peer is
@@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
718 * dccp_create_openreq_child. 741 * dccp_create_openreq_child.
719 */ 742 */
720 dreq = dccp_rsk(req); 743 dreq = dccp_rsk(req);
721 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; 744 dreq->dreq_isr = dcb->dccpd_seq;
722 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 745 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
723 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; 746 dreq->dreq_service = service;
724 747
725 if (dccp_v4_send_response(sk, req, dst)) 748 if (dccp_v4_send_response(sk, req, dst))
726 goto drop_and_free; 749 goto drop_and_free;
@@ -735,6 +758,7 @@ drop_and_free:
735 __reqsk_free(req); 758 __reqsk_free(req);
736drop: 759drop:
737 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 760 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
761 dcb->dccpd_reset_code = reset_code;
738 return -1; 762 return -1;
739} 763}
740 764
@@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1005 return 0; 1029 return 0;
1006 1030
1007reset: 1031reset:
1008 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1009 dccp_v4_ctl_send_reset(skb); 1032 dccp_v4_ctl_send_reset(skb);
1010discard: 1033discard:
1011 kfree_skb(skb); 1034 kfree_skb(skb);
@@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
1090 goto discard_it; 1113 goto discard_it;
1091 1114
1092 dh = dccp_hdr(skb); 1115 dh = dccp_hdr(skb);
1093#if 0
1094 /*
1095 * Use something like this to simulate some DATA/DATAACK loss to test
1096 * dccp_ackpkts_add, you'll get something like this on a session that
1097 * sends 10 DATA/DATAACK packets:
1098 *
1099 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
1100 *
1101 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
1102 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
1103 * with the same state
1104 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
1105 *
1106 * So...
1107 *
1108 * 281473596467422 was received
1109 * 281473596467421 was not received
1110 * 281473596467420 was received
1111 * 281473596467419 was not received
1112 * 281473596467418 was received
1113 * 281473596467417 was not received
1114 * 281473596467416 was received
1115 * 281473596467415 was not received
1116 * 281473596467414 was received
1117 * 281473596467413 was received (this one was the 3way handshake
1118 * RESPONSE)
1119 *
1120 */
1121 if (dh->dccph_type == DCCP_PKT_DATA ||
1122 dh->dccph_type == DCCP_PKT_DATAACK) {
1123 static int discard = 0;
1124 1116
1125 if (discard) {
1126 discard = 0;
1127 goto discard_it;
1128 }
1129 discard = 1;
1130 }
1131#endif
1132 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 1117 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1133 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; 1118 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1134 1119
@@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk)
1242 do_gettimeofday(&dp->dccps_epoch); 1227 do_gettimeofday(&dp->dccps_epoch);
1243 1228
1244 if (dp->dccps_options.dccpo_send_ack_vector) { 1229 if (dp->dccps_options.dccpo_send_ack_vector) {
1245 dp->dccps_hc_rx_ackpkts = 1230 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
1246 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 1231 GFP_KERNEL);
1247 GFP_KERNEL); 1232 if (dp->dccps_hc_rx_ackvec == NULL)
1248
1249 if (dp->dccps_hc_rx_ackpkts == NULL)
1250 return -ENOMEM; 1233 return -ENOMEM;
1251 } 1234 }
1252 1235
@@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk)
1258 * setsockopt(CCIDs-I-want/accept). -acme 1241 * setsockopt(CCIDs-I-want/accept). -acme
1259 */ 1242 */
1260 if (likely(!dccp_ctl_socket_init)) { 1243 if (likely(!dccp_ctl_socket_init)) {
1261 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, 1244 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
1262 sk); 1245 sk);
1263 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, 1246 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
1264 sk); 1247 sk);
1265 if (dp->dccps_hc_rx_ccid == NULL || 1248 if (dp->dccps_hc_rx_ccid == NULL ||
1266 dp->dccps_hc_tx_ccid == NULL) { 1249 dp->dccps_hc_tx_ccid == NULL) {
1267 ccid_exit(dp->dccps_hc_rx_ccid, sk); 1250 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1268 ccid_exit(dp->dccps_hc_tx_ccid, sk); 1251 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1269 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1252 if (dp->dccps_options.dccpo_send_ack_vector) {
1270 dp->dccps_hc_rx_ackpkts = NULL; 1253 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
1254 dp->dccps_hc_rx_ackvec = NULL;
1255 }
1271 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1256 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1272 return -ENOMEM; 1257 return -ENOMEM;
1273 } 1258 }
@@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk)
1280 sk->sk_write_space = dccp_write_space; 1265 sk->sk_write_space = dccp_write_space;
1281 dp->dccps_mss_cache = 536; 1266 dp->dccps_mss_cache = 536;
1282 dp->dccps_role = DCCP_ROLE_UNDEFINED; 1267 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1268 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
1283 1269
1284 return 0; 1270 return 0;
1285} 1271}
@@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk)
1301 if (inet_csk(sk)->icsk_bind_hash != NULL) 1287 if (inet_csk(sk)->icsk_bind_hash != NULL)
1302 inet_put_port(&dccp_hashinfo, sk); 1288 inet_put_port(&dccp_hashinfo, sk);
1303 1289
1290 if (dp->dccps_service_list != NULL) {
1291 kfree(dp->dccps_service_list);
1292 dp->dccps_service_list = NULL;
1293 }
1294
1304 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 1295 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
1305 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 1296 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
1306 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1297 if (dp->dccps_options.dccpo_send_ack_vector) {
1307 dp->dccps_hc_rx_ackpkts = NULL; 1298 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
1299 dp->dccps_hc_rx_ackvec = NULL;
1300 }
1308 ccid_exit(dp->dccps_hc_rx_ccid, sk); 1301 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1309 ccid_exit(dp->dccps_hc_tx_ccid, sk); 1302 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1310 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1303 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 18461bc04cbe..1393461898bb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -19,6 +19,7 @@
19#include <net/xfrm.h> 19#include <net/xfrm.h>
20#include <net/inet_timewait_sock.h> 20#include <net/inet_timewait_sock.h>
21 21
22#include "ackvec.h"
22#include "ccid.h" 23#include "ccid.h"
23#include "dccp.h" 24#include "dccp.h"
24 25
@@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
93 struct inet_connection_sock *newicsk = inet_csk(sk); 94 struct inet_connection_sock *newicsk = inet_csk(sk);
94 struct dccp_sock *newdp = dccp_sk(newsk); 95 struct dccp_sock *newdp = dccp_sk(newsk);
95 96
96 newdp->dccps_hc_rx_ackpkts = NULL; 97 newdp->dccps_role = DCCP_ROLE_SERVER;
97 newdp->dccps_role = DCCP_ROLE_SERVER; 98 newdp->dccps_hc_rx_ackvec = NULL;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 99 newdp->dccps_service_list = NULL;
100 newdp->dccps_service = dreq->dreq_service;
101 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99 do_gettimeofday(&newdp->dccps_epoch); 102 do_gettimeofday(&newdp->dccps_epoch);
100 103
101 if (newdp->dccps_options.dccpo_send_ack_vector) { 104 if (newdp->dccps_options.dccpo_send_ack_vector) {
102 newdp->dccps_hc_rx_ackpkts = 105 newdp->dccps_hc_rx_ackvec =
103 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 106 dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
104 GFP_ATOMIC); 107 GFP_ATOMIC);
105 /* 108 /*
106 * XXX: We're using the same CCIDs set on the parent, 109 * XXX: We're using the same CCIDs set on the parent,
107 * i.e. sk_clone copied the master sock and left the 110 * i.e. sk_clone copied the master sock and left the
108 * CCID pointers for this child, that is why we do the 111 * CCID pointers for this child, that is why we do the
109 * __ccid_get calls. 112 * __ccid_get calls.
110 */ 113 */
111 if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) 114 if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
112 goto out_free; 115 goto out_free;
113 } 116 }
114 117
@@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
116 newsk) != 0 || 119 newsk) != 0 ||
117 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, 120 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
118 newsk) != 0)) { 121 newsk) != 0)) {
119 dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); 122 dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
120 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); 123 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
121 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); 124 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
122out_free: 125out_free:
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d4c4242d8dd7..0a76426c9aea 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -18,19 +18,15 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20 20
21#include "ackvec.h"
21#include "ccid.h" 22#include "ccid.h"
22#include "dccp.h" 23#include "dccp.h"
23 24
24static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
25 struct sock *sk,
26 const u64 ackno,
27 const unsigned char len,
28 const unsigned char *vector);
29
30/* stores the default values for new connection. may be changed with sysctl */ 25/* stores the default values for new connection. may be changed with sysctl */
31static const struct dccp_options dccpo_default_values = { 26static const struct dccp_options dccpo_default_values = {
32 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, 27 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
33 .dccpo_ccid = DCCPF_INITIAL_CCID, 28 .dccpo_rx_ccid = DCCPF_INITIAL_CCID,
29 .dccpo_tx_ccid = DCCPF_INITIAL_CCID,
34 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, 30 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
35 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, 31 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
36}; 32};
@@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
113 opt_recv->dccpor_ndp); 109 opt_recv->dccpor_ndp);
114 break; 110 break;
115 case DCCPO_ACK_VECTOR_0: 111 case DCCPO_ACK_VECTOR_0:
116 if (len > DCCP_MAX_ACK_VECTOR_LEN) 112 case DCCPO_ACK_VECTOR_1:
117 goto out_invalid_option;
118
119 if (pkt_type == DCCP_PKT_DATA) 113 if (pkt_type == DCCP_PKT_DATA)
120 continue; 114 continue;
121 115
122 opt_recv->dccpor_ack_vector_len = len; 116 if (dp->dccps_options.dccpo_send_ack_vector &&
123 opt_recv->dccpor_ack_vector_idx = value - options; 117 dccp_ackvec_parse(sk, skb, opt, value, len))
124 118 goto out_invalid_option;
125 dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
126 debug_prefix, len,
127 (unsigned long long)
128 DCCP_SKB_CB(skb)->dccpd_ack_seq);
129 dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
130 value, len);
131 dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
132 sk,
133 DCCP_SKB_CB(skb)->dccpd_ack_seq,
134 len, value);
135 break; 119 break;
136 case DCCPO_TIMESTAMP: 120 case DCCPO_TIMESTAMP:
137 if (len != 4) 121 if (len != 4)
@@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk,
352 336
353EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 337EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
354 338
355static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
356{
357 struct dccp_sock *dp = dccp_sk(sk);
358#ifdef CONFIG_IP_DCCP_DEBUG
359 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
360 "CLIENT TX opt: " : "server TX opt: ";
361#endif
362 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
363 int len = ap->dccpap_buf_vector_len + 2;
364 struct timeval now;
365 u32 elapsed_time;
366 unsigned char *to, *from;
367
368 dccp_timestamp(sk, &now);
369 elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
370
371 if (elapsed_time != 0)
372 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
373
374 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
375 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
376 "insert ACK Vector!\n");
377 return;
378 }
379
380 /*
381 * XXX: now we have just one ack vector sent record, so
382 * we have to wait for it to be cleared.
383 *
384 * Of course this is not acceptable, but this is just for
385 * basic testing now.
386 */
387 if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
388 return;
389
390 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
391
392 to = skb_push(skb, len);
393 *to++ = DCCPO_ACK_VECTOR_0;
394 *to++ = len;
395
396 len = ap->dccpap_buf_vector_len;
397 from = ap->dccpap_buf + ap->dccpap_buf_head;
398
399 /* Check if buf_head wraps */
400 if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
401 const unsigned int tailsize = (ap->dccpap_buf_len -
402 ap->dccpap_buf_head);
403
404 memcpy(to, from, tailsize);
405 to += tailsize;
406 len -= tailsize;
407 from = ap->dccpap_buf;
408 }
409
410 memcpy(to, from, len);
411 /*
412 * From draft-ietf-dccp-spec-11.txt:
413 *
414 * For each acknowledgement it sends, the HC-Receiver will add an
415 * acknowledgement record. ack_seqno will equal the HC-Receiver
416 * sequence number it used for the ack packet; ack_ptr will equal
417 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
418 * equal buf_nonce.
419 *
420 * This implemention uses just one ack record for now.
421 */
422 ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
423 ap->dccpap_ack_ptr = ap->dccpap_buf_head;
424 ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
425 ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
426 ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
427
428 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
429 "ack_ackno=%llu\n",
430 debug_prefix, ap->dccpap_ack_vector_len,
431 (unsigned long long) ap->dccpap_ack_seqno,
432 (unsigned long long) ap->dccpap_ack_ackno);
433}
434
435void dccp_timestamp(const struct sock *sk, struct timeval *tv) 339void dccp_timestamp(const struct sock *sk, struct timeval *tv)
436{ 340{
437 const struct dccp_sock *dp = dccp_sk(sk); 341 const struct dccp_sock *dp = dccp_sk(sk);
@@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
528 432
529 if (!dccp_packet_without_ack(skb)) { 433 if (!dccp_packet_without_ack(skb)) {
530 if (dp->dccps_options.dccpo_send_ack_vector && 434 if (dp->dccps_options.dccpo_send_ack_vector &&
531 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != 435 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec))
532 DCCP_MAX_SEQNO + 1)) 436 dccp_insert_option_ackvec(sk, skb);
533 dccp_insert_option_ack_vector(sk, skb);
534 if (dp->dccps_timestamp_echo != 0) 437 if (dp->dccps_timestamp_echo != 0)
535 dccp_insert_option_timestamp_echo(sk, skb); 438 dccp_insert_option_timestamp_echo(sk, skb);
536 } 439 }
@@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
557 } 460 }
558 } 461 }
559} 462}
560
561struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
562 const unsigned int __nocast priority)
563{
564 struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
565
566 if (ap != NULL) {
567#ifdef CONFIG_IP_DCCP_DEBUG
568 memset(ap->dccpap_buf, 0xFF, len);
569#endif
570 ap->dccpap_buf_len = len;
571 ap->dccpap_buf_head =
572 ap->dccpap_buf_tail =
573 ap->dccpap_buf_len - 1;
574 ap->dccpap_buf_ackno =
575 ap->dccpap_ack_ackno =
576 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
577 ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
578 ap->dccpap_ack_ptr = 0;
579 ap->dccpap_time.tv_sec = 0;
580 ap->dccpap_time.tv_usec = 0;
581 ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
582 }
583
584 return ap;
585}
586
587void dccp_ackpkts_free(struct dccp_ackpkts *ap)
588{
589 if (ap != NULL) {
590#ifdef CONFIG_IP_DCCP_DEBUG
591 memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
592#endif
593 kfree(ap);
594 }
595}
596
597static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
598 const unsigned int index)
599{
600 return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
601}
602
603static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
604 const unsigned int index)
605{
606 return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
607}
608
609/*
610 * If several packets are missing, the HC-Receiver may prefer to enter multiple
611 * bytes with run length 0, rather than a single byte with a larger run length;
612 * this simplifies table updates if one of the missing packets arrives.
613 */
614static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
615 const unsigned int packets,
616 const unsigned char state)
617{
618 unsigned int gap;
619 signed long new_head;
620
621 if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
622 return -ENOBUFS;
623
624 gap = packets - 1;
625 new_head = ap->dccpap_buf_head - packets;
626
627 if (new_head < 0) {
628 if (gap > 0) {
629 memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
630 gap + new_head + 1);
631 gap = -new_head;
632 }
633 new_head += ap->dccpap_buf_len;
634 }
635
636 ap->dccpap_buf_head = new_head;
637
638 if (gap > 0)
639 memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
640 DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
641
642 ap->dccpap_buf[ap->dccpap_buf_head] = state;
643 ap->dccpap_buf_vector_len += packets;
644 return 0;
645}
646
647/*
648 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
649 */
650int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
651 u64 ackno, u8 state)
652{
653 /*
654 * Check at the right places if the buffer is full, if it is, tell the
655 * caller to start dropping packets till the HC-Sender acks our ACK
656 * vectors, when we will free up space in dccpap_buf.
657 *
658 * We may well decide to do buffer compression, etc, but for now lets
659 * just drop.
660 *
661 * From Appendix A:
662 *
663 * Of course, the circular buffer may overflow, either when the
664 * HC-Sender is sending data at a very high rate, when the
665 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
666 * or when the HC-Sender is forgetting to acknowledge those acks
667 * (so the HC-Receiver is unable to clean up old state). In this
668 * case, the HC-Receiver should either compress the buffer (by
669 * increasing run lengths when possible), transfer its state to
670 * a larger buffer, or, as a last resort, drop all received
671 * packets, without processing them whatsoever, until its buffer
672 * shrinks again.
673 */
674
675 /* See if this is the first ackno being inserted */
676 if (ap->dccpap_buf_vector_len == 0) {
677 ap->dccpap_buf[ap->dccpap_buf_head] = state;
678 ap->dccpap_buf_vector_len = 1;
679 } else if (after48(ackno, ap->dccpap_buf_ackno)) {
680 const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
681 ackno);
682
683 /*
684 * Look if the state of this packet is the same as the
685 * previous ackno and if so if we can bump the head len.
686 */
687 if (delta == 1 &&
688 dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
689 (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
690 DCCP_ACKPKTS_LEN_MASK))
691 ap->dccpap_buf[ap->dccpap_buf_head]++;
692 else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
693 return -ENOBUFS;
694 } else {
695 /*
696 * A.1.2. Old Packets
697 *
698 * When a packet with Sequence Number S arrives, and
699 * S <= buf_ackno, the HC-Receiver will scan the table
700 * for the byte corresponding to S. (Indexing structures
701 * could reduce the complexity of this scan.)
702 */
703 u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
704 unsigned int index = ap->dccpap_buf_head;
705
706 while (1) {
707 const u8 len = dccp_ackpkts_len(ap, index);
708 const u8 state = dccp_ackpkts_state(ap, index);
709 /*
710 * valid packets not yet in dccpap_buf have a reserved
711 * entry, with a len equal to 0.
712 */
713 if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
714 len == 0 && delta == 0) { /* Found our
715 reserved seat! */
716 dccp_pr_debug("Found %llu reserved seat!\n",
717 (unsigned long long) ackno);
718 ap->dccpap_buf[index] = state;
719 goto out;
720 }
721 /* len == 0 means one packet */
722 if (delta < len + 1)
723 goto out_duplicate;
724
725 delta -= len + 1;
726 if (++index == ap->dccpap_buf_len)
727 index = 0;
728 }
729 }
730
731 ap->dccpap_buf_ackno = ackno;
732 dccp_timestamp(sk, &ap->dccpap_time);
733out:
734 dccp_pr_debug("");
735 dccp_ackpkts_print(ap);
736 return 0;
737
738out_duplicate:
739 /* Duplicate packet */
740 dccp_pr_debug("Received a dup or already considered lost "
741 "packet: %llu\n", (unsigned long long) ackno);
742 return -EILSEQ;
743}
744
745#ifdef CONFIG_IP_DCCP_DEBUG
746void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
747 int len)
748{
749 if (!dccp_debug)
750 return;
751
752 printk("ACK vector len=%d, ackno=%llu |", len,
753 (unsigned long long) ackno);
754
755 while (len--) {
756 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
757 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
758
759 printk("%d,%d|", state, rl);
760 ++vector;
761 }
762
763 printk("\n");
764}
765
766void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
767{
768 dccp_ackvector_print(ap->dccpap_buf_ackno,
769 ap->dccpap_buf + ap->dccpap_buf_head,
770 ap->dccpap_buf_vector_len);
771}
772#endif
773
774static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
775{
776 /*
777 * As we're keeping track of the ack vector size
778 * (dccpap_buf_vector_len) and the sent ack vector size
779 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
780 * keep this code here as in the future we'll implement a vector of
781 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
782 * Appendix A. -acme
783 */
784#if 0
785 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
786 if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
787 ap->dccpap_buf_tail -= ap->dccpap_buf_len;
788#endif
789 ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
790}
791
792void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
793 u64 ackno)
794{
795 /* Check if we actually sent an ACK vector */
796 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
797 return;
798
799 if (ackno == ap->dccpap_ack_seqno) {
800#ifdef CONFIG_IP_DCCP_DEBUG
801 struct dccp_sock *dp = dccp_sk(sk);
802 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
803 "CLIENT rx ack: " : "server rx ack: ";
804#endif
805 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
806 "ack_ackno=%llu, ACKED!\n",
807 debug_prefix, 1,
808 (unsigned long long) ap->dccpap_ack_seqno,
809 (unsigned long long) ap->dccpap_ack_ackno);
810 dccp_ackpkts_trow_away_ack_record(ap);
811 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
812 }
813}
814
815static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
816 struct sock *sk, u64 ackno,
817 const unsigned char len,
818 const unsigned char *vector)
819{
820 unsigned char i;
821
822 /* Check if we actually sent an ACK vector */
823 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
824 return;
825 /*
826 * We're in the receiver half connection, so if the received an ACK
827 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
828 * not interested.
829 *
830 * Extra explanation with example:
831 *
832 * if we received an ACK vector with ackno 50, it can only be acking
833 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
834 */
835 /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
836 if (before48(ackno, ap->dccpap_ack_seqno)) {
837 /* dccp_pr_debug_cat("yes\n"); */
838 return;
839 }
840 /* dccp_pr_debug_cat("no\n"); */
841
842 i = len;
843 while (i--) {
844 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
845 u64 ackno_end_rl;
846
847 dccp_set_seqno(&ackno_end_rl, ackno - rl);
848
849 /*
850 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
851 * ap->dccpap_ack_seqno, ackno);
852 */
853 if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
854 const u8 state = (*vector &
855 DCCP_ACKPKTS_STATE_MASK) >> 6;
856 /* dccp_pr_debug_cat("yes\n"); */
857
858 if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
859#ifdef CONFIG_IP_DCCP_DEBUG
860 struct dccp_sock *dp = dccp_sk(sk);
861 const char *debug_prefix =
862 dp->dccps_role == DCCP_ROLE_CLIENT ?
863 "CLIENT rx ack: " : "server rx ack: ";
864#endif
865 dccp_pr_debug("%sACK vector 0, len=%d, "
866 "ack_seqno=%llu, ack_ackno=%llu, "
867 "ACKED!\n",
868 debug_prefix, len,
869 (unsigned long long)
870 ap->dccpap_ack_seqno,
871 (unsigned long long)
872 ap->dccpap_ack_ackno);
873 dccp_ackpkts_trow_away_ack_record(ap);
874 }
875 /*
876 * If dccpap_ack_seqno was not received, no problem
877 * we'll send another ACK vector.
878 */
879 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
880 break;
881 }
882 /* dccp_pr_debug_cat("no\n"); */
883
884 dccp_set_seqno(&ackno, ackno_end_rl - 1);
885 ++vector;
886 }
887}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index ea6d0e91e511..4786bdcddcc9 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -16,6 +16,7 @@
16 16
17#include <net/sock.h> 17#include <net/sock.h>
18 18
19#include "ackvec.h"
19#include "ccid.h" 20#include "ccid.h"
20#include "dccp.h" 21#include "dccp.h"
21 22
@@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
85 switch (dcb->dccpd_type) { 86 switch (dcb->dccpd_type) {
86 case DCCP_PKT_REQUEST: 87 case DCCP_PKT_REQUEST:
87 dccp_hdr_request(skb)->dccph_req_service = 88 dccp_hdr_request(skb)->dccph_req_service =
88 dcb->dccpd_service; 89 dp->dccps_service;
89 break; 90 break;
90 case DCCP_PKT_RESET: 91 case DCCP_PKT_RESET:
91 dccp_hdr_reset(skb)->dccph_reset_code = 92 dccp_hdr_reset(skb)->dccph_reset_code =
@@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
225 err = dccp_wait_for_ccid(sk, skb, timeo); 226 err = dccp_wait_for_ccid(sk, skb, timeo);
226 227
227 if (err == 0) { 228 if (err == 0) {
228 const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
230 const int len = skb->len; 230 const int len = skb->len;
231 231
@@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
236 inet_csk(sk)->icsk_rto, 236 inet_csk(sk)->icsk_rto,
237 DCCP_RTO_MAX); 237 DCCP_RTO_MAX);
238 dcb->dccpd_type = DCCP_PKT_DATAACK; 238 dcb->dccpd_type = DCCP_PKT_DATAACK;
239 /* 239 } else if (dccp_ack_pending(sk))
240 * FIXME: we really should have a
241 * dccps_ack_pending or use icsk.
242 */
243 } else if (inet_csk_ack_scheduled(sk) ||
244 dp->dccps_timestamp_echo != 0 ||
245 (dp->dccps_options.dccpo_send_ack_vector &&
246 ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
247 ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
248 dcb->dccpd_type = DCCP_PKT_DATAACK; 240 dcb->dccpd_type = DCCP_PKT_DATAACK;
249 else 241 else
250 dcb->dccpd_type = DCCP_PKT_DATA; 242 dcb->dccpd_type = DCCP_PKT_DATA;
@@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
270 struct request_sock *req) 262 struct request_sock *req)
271{ 263{
272 struct dccp_hdr *dh; 264 struct dccp_hdr *dh;
265 struct dccp_request_sock *dreq;
273 const int dccp_header_size = sizeof(struct dccp_hdr) + 266 const int dccp_header_size = sizeof(struct dccp_hdr) +
274 sizeof(struct dccp_hdr_ext) + 267 sizeof(struct dccp_hdr_ext) +
275 sizeof(struct dccp_hdr_response); 268 sizeof(struct dccp_hdr_response);
@@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
285 skb->dst = dst_clone(dst); 278 skb->dst = dst_clone(dst);
286 skb->csum = 0; 279 skb->csum = 0;
287 280
281 dreq = dccp_rsk(req);
288 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 282 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
289 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; 283 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
290 dccp_insert_options(sk, skb); 284 dccp_insert_options(sk, skb);
291 285
292 skb->h.raw = skb_push(skb, dccp_header_size); 286 skb->h.raw = skb_push(skb, dccp_header_size);
@@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
300 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 294 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
301 dh->dccph_type = DCCP_PKT_RESPONSE; 295 dh->dccph_type = DCCP_PKT_RESPONSE;
302 dh->dccph_x = 1; 296 dh->dccph_x = 1;
303 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); 297 dccp_hdr_set_seq(dh, dreq->dreq_iss);
304 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); 298 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
299 dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
305 300
306 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, 301 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
307 inet_rsk(req)->rmt_addr); 302 inet_rsk(req)->rmt_addr);
@@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk)
397 skb_reserve(skb, MAX_DCCP_HEADER); 392 skb_reserve(skb, MAX_DCCP_HEADER);
398 393
399 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 394 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
400 /* FIXME: set service to something meaningful, coming
401 * from userspace*/
402 DCCP_SKB_CB(skb)->dccpd_service = 0;
403 skb->csum = 0; 395 skb->csum = 0;
404 skb_set_owner_w(skb, sk); 396 skb_set_owner_w(skb, sk);
405 397
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 18a0e69c9dc7..a1cfd0e9e3bc 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
94 94
95static inline int dccp_listen_start(struct sock *sk) 95static inline int dccp_listen_start(struct sock *sk)
96{ 96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; 97 struct dccp_sock *dp = dccp_sk(sk);
98
99 dp->dccps_role = DCCP_ROLE_LISTEN;
100 /*
101 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 * before calling listen()
103 */
104 if (dccp_service_not_initialized(sk))
105 return -EPROTO;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); 106 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99} 107}
100 108
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
202 return -ENOIOCTLCMD; 210 return -ENOIOCTLCMD;
203} 211}
204 212
213static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 char __user *optval, int optlen)
215{
216 struct dccp_sock *dp = dccp_sk(sk);
217 struct dccp_service_list *sl = NULL;
218
219 if (service == DCCP_SERVICE_INVALID_VALUE ||
220 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 return -EINVAL;
222
223 if (optlen > sizeof(service)) {
224 sl = kmalloc(optlen, GFP_KERNEL);
225 if (sl == NULL)
226 return -ENOMEM;
227
228 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 if (copy_from_user(sl->dccpsl_list,
230 optval + sizeof(service),
231 optlen - sizeof(service)) ||
232 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 kfree(sl);
234 return -EFAULT;
235 }
236 }
237
238 lock_sock(sk);
239 dp->dccps_service = service;
240
241 if (dp->dccps_service_list != NULL)
242 kfree(dp->dccps_service_list);
243
244 dp->dccps_service_list = sl;
245 release_sock(sk);
246 return 0;
247}
248
205int dccp_setsockopt(struct sock *sk, int level, int optname, 249int dccp_setsockopt(struct sock *sk, int level, int optname,
206 char __user *optval, int optlen) 250 char __user *optval, int optlen)
207{ 251{
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
218 if (get_user(val, (int __user *)optval)) 262 if (get_user(val, (int __user *)optval))
219 return -EFAULT; 263 return -EFAULT;
220 264
221 lock_sock(sk); 265 if (optname == DCCP_SOCKOPT_SERVICE)
266 return dccp_setsockopt_service(sk, val, optval, optlen);
222 267
268 lock_sock(sk);
223 dp = dccp_sk(sk); 269 dp = dccp_sk(sk);
224 err = 0; 270 err = 0;
225 271
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
236 return err; 282 return err;
237} 283}
238 284
285static int dccp_getsockopt_service(struct sock *sk, int len,
286 u32 __user *optval,
287 int __user *optlen)
288{
289 const struct dccp_sock *dp = dccp_sk(sk);
290 const struct dccp_service_list *sl;
291 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293 lock_sock(sk);
294 if (dccp_service_not_initialized(sk))
295 goto out;
296
297 if ((sl = dp->dccps_service_list) != NULL) {
298 slen = sl->dccpsl_nr * sizeof(u32);
299 total_len += slen;
300 }
301
302 err = -EINVAL;
303 if (total_len > len)
304 goto out;
305
306 err = 0;
307 if (put_user(total_len, optlen) ||
308 put_user(dp->dccps_service, optval) ||
309 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310 err = -EFAULT;
311out:
312 release_sock(sk);
313 return err;
314}
315
239int dccp_getsockopt(struct sock *sk, int level, int optname, 316int dccp_getsockopt(struct sock *sk, int level, int optname,
240 char __user *optval, int __user *optlen) 317 char __user *optval, int __user *optlen)
241{ 318{
@@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
248 if (get_user(len, optlen)) 325 if (get_user(len, optlen))
249 return -EFAULT; 326 return -EFAULT;
250 327
251 len = min_t(unsigned int, len, sizeof(int)); 328 if (len < sizeof(int))
252 if (len < 0)
253 return -EINVAL; 329 return -EINVAL;
254 330
255 dp = dccp_sk(sk); 331 dp = dccp_sk(sk);
@@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
257 switch (optname) { 333 switch (optname) {
258 case DCCP_SOCKOPT_PACKET_SIZE: 334 case DCCP_SOCKOPT_PACKET_SIZE:
259 val = dp->dccps_packet_size; 335 val = dp->dccps_packet_size;
336 len = sizeof(dp->dccps_packet_size);
260 break; 337 break;
338 case DCCP_SOCKOPT_SERVICE:
339 return dccp_getsockopt_service(sk, len,
340 (u32 __user *)optval, optlen);
341 case 128 ... 191:
342 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343 len, (u32 __user *)optval, optlen);
344 case 192 ... 255:
345 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346 len, (u32 __user *)optval, optlen);
261 default: 347 default:
262 return -ENOPROTOOPT; 348 return -ENOPROTOOPT;
263 } 349 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1b63b4824164..50c0519cd70d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.403" 46#define VERSION "0.404"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
224 Consider a node 'n' and its parent 'tp'. 224 Consider a node 'n' and its parent 'tp'.
225 225
226 If n is a leaf, every bit in its key is significant. Its presence is 226 If n is a leaf, every bit in its key is significant. Its presence is
227 necessitaded by path compression, since during a tree traversal (when 227 necessitated by path compression, since during a tree traversal (when
228 searching for a leaf - unless we are doing an insertion) we will completely 228 searching for a leaf - unless we are doing an insertion) we will completely
229 ignore all skipped bits we encounter. Thus we need to verify, at the end of 229 ignore all skipped bits we encounter. Thus we need to verify, at the end of
230 a potentially successful search, that we have indeed been walking the 230 a potentially successful search, that we have indeed been walking the
@@ -836,11 +836,12 @@ static void trie_init(struct trie *t)
836#endif 836#endif
837} 837}
838 838
839/* readside most use rcu_read_lock currently dump routines 839/* readside must use rcu_read_lock currently dump routines
840 via get_fa_head and dump */ 840 via get_fa_head and dump */
841 841
842static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) 842static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
843{ 843{
844 struct hlist_head *head = &l->list;
844 struct hlist_node *node; 845 struct hlist_node *node;
845 struct leaf_info *li; 846 struct leaf_info *li;
846 847
@@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
853 854
854static inline struct list_head * get_fa_head(struct leaf *l, int plen) 855static inline struct list_head * get_fa_head(struct leaf *l, int plen)
855{ 856{
856 struct leaf_info *li = find_leaf_info(&l->list, plen); 857 struct leaf_info *li = find_leaf_info(l, plen);
857 858
858 if (!li) 859 if (!li)
859 return NULL; 860 return NULL;
@@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1085 } 1086 }
1086 1087
1087 if (tp && tp->pos + tp->bits > 32) 1088 if (tp && tp->pos + tp->bits > 32)
1088 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1089 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1089 tp, tp->pos, tp->bits, key, plen); 1090 tp, tp->pos, tp->bits, key, plen);
1090 1091
1091 /* Rebalance the trie */ 1092 /* Rebalance the trie */
@@ -1248,7 +1249,7 @@ err:
1248} 1249}
1249 1250
1250 1251
1251/* should be clalled with rcu_read_lock */ 1252/* should be called with rcu_read_lock */
1252static inline int check_leaf(struct trie *t, struct leaf *l, 1253static inline int check_leaf(struct trie *t, struct leaf *l,
1253 t_key key, int *plen, const struct flowi *flp, 1254 t_key key, int *plen, const struct flowi *flp,
1254 struct fib_result *res) 1255 struct fib_result *res)
@@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1590 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1591 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1591 1592
1592 l = fib_find_node(t, key); 1593 l = fib_find_node(t, key);
1593 li = find_leaf_info(&l->list, plen); 1594 li = find_leaf_info(l, plen);
1594 1595
1595 list_del_rcu(&fa->fa_list); 1596 list_del_rcu(&fa->fa_list);
1596 1597
@@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb)
1714 1715
1715 t->revision++; 1716 t->revision++;
1716 1717
1717 rcu_read_lock();
1718 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1718 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1719 found += trie_flush_leaf(t, l); 1719 found += trie_flush_leaf(t, l);
1720 1720
@@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb)
1722 trie_leaf_remove(t, ll->key); 1722 trie_leaf_remove(t, ll->key);
1723 ll = l; 1723 ll = l;
1724 } 1724 }
1725 rcu_read_unlock();
1726 1725
1727 if (ll && hlist_empty(&ll->list)) 1726 if (ll && hlist_empty(&ll->list))
1728 trie_leaf_remove(t, ll->key); 1727 trie_leaf_remove(t, ll->key);
@@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1833 i++; 1832 i++;
1834 continue; 1833 continue;
1835 } 1834 }
1836 if (fa->fa_info->fib_nh == NULL) { 1835 BUG_ON(!fa->fa_info);
1837 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1838 i++;
1839 continue;
1840 }
1841 if (fa->fa_info == NULL) {
1842 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1843 i++;
1844 continue;
1845 }
1846 1836
1847 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1837 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1848 cb->nlh->nlmsg_seq, 1838 cb->nlh->nlmsg_seq,
@@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id)
1965 trie_main = t; 1955 trie_main = t;
1966 1956
1967 if (id == RT_TABLE_LOCAL) 1957 if (id == RT_TABLE_LOCAL)
1968 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); 1958 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
1969 1959
1970 return tb; 1960 return tb;
1971} 1961}
@@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2029 iter->tnode = (struct tnode *) n; 2019 iter->tnode = (struct tnode *) n;
2030 iter->trie = t; 2020 iter->trie = t;
2031 iter->index = 0; 2021 iter->index = 0;
2032 iter->depth = 0; 2022 iter->depth = 1;
2033 return n; 2023 return n;
2034 } 2024 }
2035 return NULL; 2025 return NULL;
@@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2274 seq_puts(seq, "<local>:\n"); 2264 seq_puts(seq, "<local>:\n");
2275 else 2265 else
2276 seq_puts(seq, "<main>:\n"); 2266 seq_puts(seq, "<main>:\n");
2277 } else { 2267 }
2278 seq_indent(seq, iter->depth-1); 2268 seq_indent(seq, iter->depth-1);
2279 seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", 2269 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
2280 NIPQUAD(prf), tn->pos); 2270 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2281 } 2271 tn->empty_children);
2272
2282 } else { 2273 } else {
2283 struct leaf *l = (struct leaf *) n; 2274 struct leaf *l = (struct leaf *) n;
2284 int i; 2275 int i;
@@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2287 seq_indent(seq, iter->depth); 2278 seq_indent(seq, iter->depth);
2288 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2279 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2289 for (i = 32; i >= 0; i--) { 2280 for (i = 32; i >= 0; i--) {
2290 struct leaf_info *li = find_leaf_info(&l->list, i); 2281 struct leaf_info *li = find_leaf_info(l, i);
2291 if (li) { 2282 if (li) {
2292 struct fib_alias *fa; 2283 struct fib_alias *fa;
2293 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2284 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2383 return 0; 2374 return 0;
2384 2375
2385 for (i=32; i>=0; i--) { 2376 for (i=32; i>=0; i--) {
2386 struct leaf_info *li = find_leaf_info(&l->list, i); 2377 struct leaf_info *li = find_leaf_info(l, i);
2387 struct fib_alias *fa; 2378 struct fib_alias *fa;
2388 u32 mask, prefix; 2379 u32 mask, prefix;
2389 2380
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44607f4767b8..70c44e4c3ceb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
1603 } 1603 }
1604 pmc->sources = NULL; 1604 pmc->sources = NULL;
1605 pmc->sfmode = MCAST_EXCLUDE; 1605 pmc->sfmode = MCAST_EXCLUDE;
1606 pmc->sfcount[MCAST_EXCLUDE] = 0; 1606 pmc->sfcount[MCAST_INCLUDE] = 0;
1607 pmc->sfcount[MCAST_EXCLUDE] = 1; 1607 pmc->sfcount[MCAST_EXCLUDE] = 1;
1608} 1608}
1609 1609
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index e11952ea17af..f828fa2eb7de 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
197 if (s_addr==cp->caddr && s_port==cp->cport && 197 if (s_addr==cp->caddr && s_port==cp->cport &&
198 d_port==cp->vport && d_addr==cp->vaddr && 198 d_port==cp->vport && d_addr==cp->vaddr &&
199 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
199 protocol==cp->protocol) { 200 protocol==cp->protocol) {
200 /* HIT */ 201 /* HIT */
201 atomic_inc(&cp->refcnt); 202 atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
227 return cp; 228 return cp;
228} 229}
229 230
231/* Get reference to connection template */
232struct ip_vs_conn *ip_vs_ct_in_get
233(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
234{
235 unsigned hash;
236 struct ip_vs_conn *cp;
237
238 hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
239
240 ct_read_lock(hash);
241
242 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
243 if (s_addr==cp->caddr && s_port==cp->cport &&
244 d_port==cp->vport && d_addr==cp->vaddr &&
245 cp->flags & IP_VS_CONN_F_TEMPLATE &&
246 protocol==cp->protocol) {
247 /* HIT */
248 atomic_inc(&cp->refcnt);
249 goto out;
250 }
251 }
252 cp = NULL;
253
254 out:
255 ct_read_unlock(hash);
256
257 IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
258 ip_vs_proto_name(protocol),
259 NIPQUAD(s_addr), ntohs(s_port),
260 NIPQUAD(d_addr), ntohs(d_port),
261 cp?"hit":"not hit");
262
263 return cp;
264}
230 265
231/* 266/*
232 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 267 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
367 atomic_read(&dest->refcnt)); 402 atomic_read(&dest->refcnt));
368 403
369 /* Update the connection counters */ 404 /* Update the connection counters */
370 if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 405 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
371 /* It is a normal connection, so increase the inactive 406 /* It is a normal connection, so increase the inactive
372 connection counter because it is in TCP SYNRECV 407 connection counter because it is in TCP SYNRECV
373 state (inactive) or other protocol inacive state */ 408 state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
406 atomic_read(&dest->refcnt)); 441 atomic_read(&dest->refcnt));
407 442
408 /* Update the connection counters */ 443 /* Update the connection counters */
409 if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 444 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
410 /* It is a normal connection, so decrease the inactconns 445 /* It is a normal connection, so decrease the inactconns
411 or activeconns counter */ 446 or activeconns counter */
412 if (cp->flags & IP_VS_CONN_F_INACTIVE) { 447 if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
467 /* 502 /*
468 * Invalidate the connection template 503 * Invalidate the connection template
469 */ 504 */
470 if (ct->cport) { 505 if (ct->vport != 65535) {
471 if (ip_vs_conn_unhash(ct)) { 506 if (ip_vs_conn_unhash(ct)) {
472 ct->dport = 65535; 507 ct->dport = 65535;
473 ct->vport = 65535; 508 ct->vport = 65535;
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
776 ct_write_lock_bh(hash); 811 ct_write_lock_bh(hash);
777 812
778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 813 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
779 if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) 814 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
780 /* connection template */ 815 /* connection template */
781 continue; 816 continue;
782 817
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3ac7eeca04ac..981cc3244ef2 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
243 if (ports[1] == svc->port) { 243 if (ports[1] == svc->port) {
244 /* Check if a template already exists */ 244 /* Check if a template already exists */
245 if (svc->port != FTPPORT) 245 if (svc->port != FTPPORT)
246 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 246 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
247 iph->daddr, ports[1]); 247 iph->daddr, ports[1]);
248 else 248 else
249 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 249 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
250 iph->daddr, 0); 250 iph->daddr, 0);
251 251
252 if (!ct || !ip_vs_check_template(ct)) { 252 if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
272 iph->daddr, 272 iph->daddr,
273 ports[1], 273 ports[1],
274 dest->addr, dest->port, 274 dest->addr, dest->port,
275 0, 275 IP_VS_CONN_F_TEMPLATE,
276 dest); 276 dest);
277 else 277 else
278 ct = ip_vs_conn_new(iph->protocol, 278 ct = ip_vs_conn_new(iph->protocol,
279 snet, 0, 279 snet, 0,
280 iph->daddr, 0, 280 iph->daddr, 0,
281 dest->addr, 0, 281 dest->addr, 0,
282 0, 282 IP_VS_CONN_F_TEMPLATE,
283 dest); 283 dest);
284 if (ct == NULL) 284 if (ct == NULL)
285 return NULL; 285 return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> 298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
299 */ 299 */
300 if (svc->fwmark) 300 if (svc->fwmark)
301 ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, 301 ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
302 htonl(svc->fwmark), 0); 302 htonl(svc->fwmark), 0);
303 else 303 else
304 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 304 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
305 iph->daddr, 0); 305 iph->daddr, 0);
306 306
307 if (!ct || !ip_vs_check_template(ct)) { 307 if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
326 snet, 0, 326 snet, 0,
327 htonl(svc->fwmark), 0, 327 htonl(svc->fwmark), 0,
328 dest->addr, 0, 328 dest->addr, 0,
329 0, 329 IP_VS_CONN_F_TEMPLATE,
330 dest); 330 dest);
331 else 331 else
332 ct = ip_vs_conn_new(iph->protocol, 332 ct = ip_vs_conn_new(iph->protocol,
333 snet, 0, 333 snet, 0,
334 iph->daddr, 0, 334 iph->daddr, 0,
335 dest->addr, 0, 335 dest->addr, 0,
336 0, 336 IP_VS_CONN_F_TEMPLATE,
337 dest); 337 dest);
338 if (ct == NULL) 338 if (ct == NULL)
339 return NULL; 339 return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 574d1f509b46..2e5ced3d8062 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
297 297
298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
299 for (i=0; i<m->nr_conns; i++) { 299 for (i=0; i<m->nr_conns; i++) {
300 unsigned flags;
301
300 s = (struct ip_vs_sync_conn *)p; 302 s = (struct ip_vs_sync_conn *)p;
301 cp = ip_vs_conn_in_get(s->protocol, 303 flags = ntohs(s->flags);
302 s->caddr, s->cport, 304 if (!(flags & IP_VS_CONN_F_TEMPLATE))
303 s->vaddr, s->vport); 305 cp = ip_vs_conn_in_get(s->protocol,
306 s->caddr, s->cport,
307 s->vaddr, s->vport);
308 else
309 cp = ip_vs_ct_in_get(s->protocol,
310 s->caddr, s->cport,
311 s->vaddr, s->vport);
304 if (!cp) { 312 if (!cp) {
305 cp = ip_vs_conn_new(s->protocol, 313 cp = ip_vs_conn_new(s->protocol,
306 s->caddr, s->cport, 314 s->caddr, s->cport,
307 s->vaddr, s->vport, 315 s->vaddr, s->vport,
308 s->daddr, s->dport, 316 s->daddr, s->dport,
309 ntohs(s->flags), NULL); 317 flags, NULL);
310 if (!cp) { 318 if (!cp) {
311 IP_VS_ERR("ip_vs_conn_new failed\n"); 319 IP_VS_ERR("ip_vs_conn_new failed\n");
312 return; 320 return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
315 } else if (!cp->dest) { 323 } else if (!cp->dest) {
316 /* it is an entry created by the synchronization */ 324 /* it is an entry created by the synchronization */
317 cp->state = ntohs(s->state); 325 cp->state = ntohs(s->state);
318 cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; 326 cp->flags = flags | IP_VS_CONN_F_HASHED;
319 } /* Note that we don't touch its state and flags 327 } /* Note that we don't touch its state and flags
320 if it is a normal entry. */ 328 if it is a normal entry. */
321 329
322 if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { 330 if (flags & IP_VS_CONN_F_SEQ_MASK) {
323 opt = (struct ip_vs_sync_conn_options *)&s[1]; 331 opt = (struct ip_vs_sync_conn_options *)&s[1];
324 memcpy(&cp->in_seq, opt, sizeof(*opt)); 332 memcpy(&cp->in_seq, opt, sizeof(*opt));
325 p += FULL_CONN_SIZE; 333 p += FULL_CONN_SIZE;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..3cf9b451675c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
51 51
52 IF unsure, say `N'. 52 IF unsure, say `N'.
53 53
54config IP_NF_CONNTRACK_NETLINK
55 tristate 'Connection tracking netlink interface'
56 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
57 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
58 help
59 This option enables support for a netlink-based userspace interface
60
61
54config IP_NF_CT_PROTO_SCTP 62config IP_NF_CT_PROTO_SCTP
55 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 63 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
56 depends on IP_NF_CONNTRACK && EXPERIMENTAL 64 depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -129,6 +137,22 @@ config IP_NF_AMANDA
129 137
130 To compile it as a module, choose M here. If unsure, say Y. 138 To compile it as a module, choose M here. If unsure, say Y.
131 139
140config IP_NF_PPTP
141 tristate 'PPTP protocol support'
142 help
143 This module adds support for PPTP (Point to Point Tunnelling
144 Protocol, RFC2637) conncection tracking and NAT.
145
146 If you are running PPTP sessions over a stateful firewall or NAT
147 box, you may want to enable this feature.
148
149 Please note that not all PPTP modes of operation are supported yet.
150 For more info, read top of the file
151 net/ipv4/netfilter/ip_conntrack_pptp.c
152
153 If you want to compile it as a module, say M here and read
154 Documentation/modules.txt. If unsure, say `N'.
155
132config IP_NF_QUEUE 156config IP_NF_QUEUE
133 tristate "IP Userspace queueing via NETLINK (OBSOLETE)" 157 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
134 help 158 help
@@ -613,6 +637,12 @@ config IP_NF_NAT_AMANDA
613 default IP_NF_NAT if IP_NF_AMANDA=y 637 default IP_NF_NAT if IP_NF_AMANDA=y
614 default m if IP_NF_AMANDA=m 638 default m if IP_NF_AMANDA=m
615 639
640config IP_NF_NAT_PPTP
641 tristate
642 depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
643 default IP_NF_NAT if IP_NF_PPTP=y
644 default m if IP_NF_PPTP=m
645
616# mangle + specific targets 646# mangle + specific targets
617config IP_NF_MANGLE 647config IP_NF_MANGLE
618 tristate "Packet mangling" 648 tristate "Packet mangling"
@@ -774,11 +804,5 @@ config IP_NF_ARP_MANGLE
774 Allows altering the ARP packet payload: source and destination 804 Allows altering the ARP packet payload: source and destination
775 hardware and network addresses. 805 hardware and network addresses.
776 806
777config IP_NF_CONNTRACK_NETLINK
778 tristate 'Connection tracking netlink interface'
779 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
780 help
781 This option enables support for a netlink-based userspace interface
782
783endmenu 807endmenu
784 808
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1ba0db746817..3d45d3c0283c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o 6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
7iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o 7iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
8 8
9ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
10ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
11
9# connection tracking 12# connection tracking
10obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o 13obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
11 14
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
17obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o 20obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
18 21
19# connection tracking helpers 22# connection tracking helpers
23obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
20obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o 24obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
21obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o 25obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
22obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o 26obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
24obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o 28obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
25 29
26# NAT helpers 30# NAT helpers
31obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
27obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o 32obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
28obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o 33obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
29obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o 34obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..c1f82e0c81cf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
233 233
234/* Just find a expectation corresponding to a tuple. */ 234/* Just find a expectation corresponding to a tuple. */
235struct ip_conntrack_expect * 235struct ip_conntrack_expect *
236ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) 236ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
237{ 237{
238 struct ip_conntrack_expect *i; 238 struct ip_conntrack_expect *i;
239 239
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
1143 if (del_timer(&ct->timeout)) { 1143 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1144 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1145 add_timer(&ct->timeout);
1146 ip_conntrack_event_cache(IPCT_REFRESH, skb); 1146 /* FIXME: We loose some REFRESH events if this function
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1147 } 1150 }
1148 ct_add_counters(ct, ctinfo, skb); 1151 ct_add_counters(ct, ctinfo, skb);
1149 write_unlock_bh(&ip_conntrack_lock); 1152 write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 000000000000..79db5b70d5f6
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,805 @@
1/*
2 * ip_conntrack_pptp.c - Version 3.0
3 *
4 * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * Limitations:
17 * - We blindly assume that control connections are always
18 * established in PNS->PAC direction. This is a violation
19 * of RFFC2673
20 * - We can only support one single call within each session
21 *
22 * TODO:
23 * - testing of incoming PPTP calls
24 *
25 * Changes:
26 * 2002-02-05 - Version 1.3
27 * - Call ip_conntrack_unexpect_related() from
28 * pptp_destroy_siblings() to destroy expectations in case
29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
30 * (Philip Craig <philipc@snapgear.com>)
31 * - Add Version information at module loadtime
32 * 2002-02-10 - Version 1.6
33 * - move to C99 style initializers
34 * - remove second expectation if first arrives
35 * 2004-10-22 - Version 2.0
36 * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
37 * - fix lots of linear skb assumptions from Mandrake's port
38 * 2005-06-10 - Version 2.1
39 * - use ip_conntrack_expect_free() instead of kfree() on the
40 * expect's (which are from the slab for quite some time)
41 * 2005-06-10 - Version 3.0
42 * - port helper to post-2.6.11 API changes,
43 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
44 * 2005-07-30 - Version 3.1
45 * - port helper to 2.6.13 API changes
46 *
47 */
48
49#include <linux/config.h>
50#include <linux/module.h>
51#include <linux/netfilter.h>
52#include <linux/ip.h>
53#include <net/checksum.h>
54#include <net/tcp.h>
55
56#include <linux/netfilter_ipv4/ip_conntrack.h>
57#include <linux/netfilter_ipv4/ip_conntrack_core.h>
58#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
59#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
60#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
61
62#define IP_CT_PPTP_VERSION "3.1"
63
64MODULE_LICENSE("GPL");
65MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
66MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
67
68static DEFINE_SPINLOCK(ip_pptp_lock);
69
70int
71(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
72 struct ip_conntrack *ct,
73 enum ip_conntrack_info ctinfo,
74 struct PptpControlHeader *ctlh,
75 union pptp_ctrl_union *pptpReq);
76
77int
78(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
79 struct ip_conntrack *ct,
80 enum ip_conntrack_info ctinfo,
81 struct PptpControlHeader *ctlh,
82 union pptp_ctrl_union *pptpReq);
83
84int
85(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
86 struct ip_conntrack_expect *expect_reply);
87
88void
89(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
90 struct ip_conntrack_expect *exp);
91
92#if 0
93/* PptpControlMessageType names */
94const char *pptp_msg_name[] = {
95 "UNKNOWN_MESSAGE",
96 "START_SESSION_REQUEST",
97 "START_SESSION_REPLY",
98 "STOP_SESSION_REQUEST",
99 "STOP_SESSION_REPLY",
100 "ECHO_REQUEST",
101 "ECHO_REPLY",
102 "OUT_CALL_REQUEST",
103 "OUT_CALL_REPLY",
104 "IN_CALL_REQUEST",
105 "IN_CALL_REPLY",
106 "IN_CALL_CONNECT",
107 "CALL_CLEAR_REQUEST",
108 "CALL_DISCONNECT_NOTIFY",
109 "WAN_ERROR_NOTIFY",
110 "SET_LINK_INFO"
111};
112EXPORT_SYMBOL(pptp_msg_name);
113#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
114#else
115#define DEBUGP(format, args...)
116#endif
117
118#define SECS *HZ
119#define MINS * 60 SECS
120#define HOURS * 60 MINS
121
122#define PPTP_GRE_TIMEOUT (10 MINS)
123#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
124
125static void pptp_expectfn(struct ip_conntrack *ct,
126 struct ip_conntrack_expect *exp)
127{
128 DEBUGP("increasing timeouts\n");
129
130 /* increase timeout of GRE data channel conntrack entry */
131 ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
132 ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
133
134 /* Can you see how rusty this code is, compared with the pre-2.6.11
135 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
136
137 if (!ip_nat_pptp_hook_expectfn) {
138 struct ip_conntrack_tuple inv_t;
139 struct ip_conntrack_expect *exp_other;
140
141 /* obviously this tuple inversion only works until you do NAT */
142 invert_tuplepr(&inv_t, &exp->tuple);
143 DEBUGP("trying to unexpect other dir: ");
144 DUMP_TUPLE(&inv_t);
145
146 exp_other = ip_conntrack_expect_find(&inv_t);
147 if (exp_other) {
148 /* delete other expectation. */
149 DEBUGP("found\n");
150 ip_conntrack_unexpect_related(exp_other);
151 ip_conntrack_expect_put(exp_other);
152 } else {
153 DEBUGP("not found\n");
154 }
155 } else {
156 /* we need more than simple inversion */
157 ip_nat_pptp_hook_expectfn(ct, exp);
158 }
159}
160
161static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
162{
163 struct ip_conntrack_tuple_hash *h;
164 struct ip_conntrack_expect *exp;
165
166 DEBUGP("trying to timeout ct or exp for tuple ");
167 DUMP_TUPLE(t);
168
169 h = ip_conntrack_find_get(t, NULL);
170 if (h) {
171 struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
172 DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
173 sibling->proto.gre.timeout = 0;
174 sibling->proto.gre.stream_timeout = 0;
175 /* refresh_acct will not modify counters if skb == NULL */
176 if (del_timer(&sibling->timeout))
177 sibling->timeout.function((unsigned long)sibling);
178 ip_conntrack_put(sibling);
179 return 1;
180 } else {
181 exp = ip_conntrack_expect_find(t);
182 if (exp) {
183 DEBUGP("unexpect_related of expect %p\n", exp);
184 ip_conntrack_unexpect_related(exp);
185 ip_conntrack_expect_put(exp);
186 return 1;
187 }
188 }
189
190 return 0;
191}
192
193
194/* timeout GRE data connections */
195static void pptp_destroy_siblings(struct ip_conntrack *ct)
196{
197 struct ip_conntrack_tuple t;
198
199 /* Since ct->sibling_list has literally rusted away in 2.6.11,
200 * we now need another way to find out about our sibling
201 * contrack and expects... -HW */
202
203 /* try original (pns->pac) tuple */
204 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
205 t.dst.protonum = IPPROTO_GRE;
206 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
207 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
208
209 if (!destroy_sibling_or_exp(&t))
210 DEBUGP("failed to timeout original pns->pac ct/exp\n");
211
212 /* try reply (pac->pns) tuple */
213 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
214 t.dst.protonum = IPPROTO_GRE;
215 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
216 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
217
218 if (!destroy_sibling_or_exp(&t))
219 DEBUGP("failed to timeout reply pac->pns ct/exp\n");
220}
221
222/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
223static inline int
224exp_gre(struct ip_conntrack *master,
225 u_int32_t seq,
226 u_int16_t callid,
227 u_int16_t peer_callid)
228{
229 struct ip_conntrack_tuple inv_tuple;
230 struct ip_conntrack_tuple exp_tuples[] = {
231 /* tuple in original direction, PNS->PAC */
232 { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
233 .u = { .gre = { .key = peer_callid } }
234 },
235 .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
236 .u = { .gre = { .key = callid } },
237 .protonum = IPPROTO_GRE
238 },
239 },
240 /* tuple in reply direction, PAC->PNS */
241 { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
242 .u = { .gre = { .key = callid } }
243 },
244 .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
245 .u = { .gre = { .key = peer_callid } },
246 .protonum = IPPROTO_GRE
247 },
248 }
249 };
250 struct ip_conntrack_expect *exp_orig, *exp_reply;
251 int ret = 1;
252
253 exp_orig = ip_conntrack_expect_alloc(master);
254 if (exp_orig == NULL)
255 goto out;
256
257 exp_reply = ip_conntrack_expect_alloc(master);
258 if (exp_reply == NULL)
259 goto out_put_orig;
260
261 memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
262
263 exp_orig->mask.src.ip = 0xffffffff;
264 exp_orig->mask.src.u.all = 0;
265 exp_orig->mask.dst.u.all = 0;
266 exp_orig->mask.dst.u.gre.key = 0xffff;
267 exp_orig->mask.dst.ip = 0xffffffff;
268 exp_orig->mask.dst.protonum = 0xff;
269
270 exp_orig->master = master;
271 exp_orig->expectfn = pptp_expectfn;
272 exp_orig->flags = 0;
273
274 exp_orig->dir = IP_CT_DIR_ORIGINAL;
275
276 /* both expectations are identical apart from tuple */
277 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
278 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
279
280 exp_reply->dir = !exp_orig->dir;
281
282 if (ip_nat_pptp_hook_exp_gre)
283 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
284 else {
285
286 DEBUGP("calling expect_related PNS->PAC");
287 DUMP_TUPLE(&exp_orig->tuple);
288
289 if (ip_conntrack_expect_related(exp_orig) != 0) {
290 DEBUGP("cannot expect_related()\n");
291 goto out_put_both;
292 }
293
294 DEBUGP("calling expect_related PAC->PNS");
295 DUMP_TUPLE(&exp_reply->tuple);
296
297 if (ip_conntrack_expect_related(exp_reply) != 0) {
298 DEBUGP("cannot expect_related()\n");
299 goto out_unexpect_orig;
300 }
301
302 /* Add GRE keymap entries */
303 if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
304 DEBUGP("cannot keymap_add() exp\n");
305 goto out_unexpect_both;
306 }
307
308 invert_tuplepr(&inv_tuple, &exp_reply->tuple);
309 if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
310 ip_ct_gre_keymap_destroy(master);
311 DEBUGP("cannot keymap_add() exp_inv\n");
312 goto out_unexpect_both;
313 }
314 ret = 0;
315 }
316
317out_put_both:
318 ip_conntrack_expect_put(exp_reply);
319out_put_orig:
320 ip_conntrack_expect_put(exp_orig);
321out:
322 return ret;
323
324out_unexpect_both:
325 ip_conntrack_unexpect_related(exp_reply);
326out_unexpect_orig:
327 ip_conntrack_unexpect_related(exp_orig);
328 goto out_put_both;
329}
330
331static inline int
332pptp_inbound_pkt(struct sk_buff **pskb,
333 struct tcphdr *tcph,
334 unsigned int nexthdr_off,
335 unsigned int datalen,
336 struct ip_conntrack *ct,
337 enum ip_conntrack_info ctinfo)
338{
339 struct PptpControlHeader _ctlh, *ctlh;
340 unsigned int reqlen;
341 union pptp_ctrl_union _pptpReq, *pptpReq;
342 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
343 u_int16_t msg, *cid, *pcid;
344 u_int32_t seq;
345
346 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
347 if (!ctlh) {
348 DEBUGP("error during skb_header_pointer\n");
349 return NF_ACCEPT;
350 }
351 nexthdr_off += sizeof(_ctlh);
352 datalen -= sizeof(_ctlh);
353
354 reqlen = datalen;
355 if (reqlen > sizeof(*pptpReq))
356 reqlen = sizeof(*pptpReq);
357 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
358 if (!pptpReq) {
359 DEBUGP("error during skb_header_pointer\n");
360 return NF_ACCEPT;
361 }
362
363 msg = ntohs(ctlh->messageType);
364 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
365
366 switch (msg) {
367 case PPTP_START_SESSION_REPLY:
368 if (reqlen < sizeof(_pptpReq.srep)) {
369 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
370 break;
371 }
372
373 /* server confirms new control session */
374 if (info->sstate < PPTP_SESSION_REQUESTED) {
375 DEBUGP("%s without START_SESS_REQUEST\n",
376 pptp_msg_name[msg]);
377 break;
378 }
379 if (pptpReq->srep.resultCode == PPTP_START_OK)
380 info->sstate = PPTP_SESSION_CONFIRMED;
381 else
382 info->sstate = PPTP_SESSION_ERROR;
383 break;
384
385 case PPTP_STOP_SESSION_REPLY:
386 if (reqlen < sizeof(_pptpReq.strep)) {
387 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
388 break;
389 }
390
391 /* server confirms end of control session */
392 if (info->sstate > PPTP_SESSION_STOPREQ) {
393 DEBUGP("%s without STOP_SESS_REQUEST\n",
394 pptp_msg_name[msg]);
395 break;
396 }
397 if (pptpReq->strep.resultCode == PPTP_STOP_OK)
398 info->sstate = PPTP_SESSION_NONE;
399 else
400 info->sstate = PPTP_SESSION_ERROR;
401 break;
402
403 case PPTP_OUT_CALL_REPLY:
404 if (reqlen < sizeof(_pptpReq.ocack)) {
405 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
406 break;
407 }
408
409 /* server accepted call, we now expect GRE frames */
410 if (info->sstate != PPTP_SESSION_CONFIRMED) {
411 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
412 break;
413 }
414 if (info->cstate != PPTP_CALL_OUT_REQ &&
415 info->cstate != PPTP_CALL_OUT_CONF) {
416 DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
417 break;
418 }
419 if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
420 info->cstate = PPTP_CALL_NONE;
421 break;
422 }
423
424 cid = &pptpReq->ocack.callID;
425 pcid = &pptpReq->ocack.peersCallID;
426
427 info->pac_call_id = ntohs(*cid);
428
429 if (htons(info->pns_call_id) != *pcid) {
430 DEBUGP("%s for unknown callid %u\n",
431 pptp_msg_name[msg], ntohs(*pcid));
432 break;
433 }
434
435 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
436 ntohs(*cid), ntohs(*pcid));
437
438 info->cstate = PPTP_CALL_OUT_CONF;
439
440 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
441 + sizeof(struct PptpControlHeader)
442 + ((void *)pcid - (void *)pptpReq);
443
444 if (exp_gre(ct, seq, *cid, *pcid) != 0)
445 printk("ip_conntrack_pptp: error during exp_gre\n");
446 break;
447
448 case PPTP_IN_CALL_REQUEST:
449 if (reqlen < sizeof(_pptpReq.icack)) {
450 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
451 break;
452 }
453
454 /* server tells us about incoming call request */
455 if (info->sstate != PPTP_SESSION_CONFIRMED) {
456 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
457 break;
458 }
459 pcid = &pptpReq->icack.peersCallID;
460 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
461 info->cstate = PPTP_CALL_IN_REQ;
462 info->pac_call_id = ntohs(*pcid);
463 break;
464
465 case PPTP_IN_CALL_CONNECT:
466 if (reqlen < sizeof(_pptpReq.iccon)) {
467 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
468 break;
469 }
470
471 /* server tells us about incoming call established */
472 if (info->sstate != PPTP_SESSION_CONFIRMED) {
473 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
474 break;
475 }
476 if (info->sstate != PPTP_CALL_IN_REP
477 && info->sstate != PPTP_CALL_IN_CONF) {
478 DEBUGP("%s but never sent IN_CALL_REPLY\n",
479 pptp_msg_name[msg]);
480 break;
481 }
482
483 pcid = &pptpReq->iccon.peersCallID;
484 cid = &info->pac_call_id;
485
486 if (info->pns_call_id != ntohs(*pcid)) {
487 DEBUGP("%s for unknown CallID %u\n",
488 pptp_msg_name[msg], ntohs(*cid));
489 break;
490 }
491
492 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
493 info->cstate = PPTP_CALL_IN_CONF;
494
495 /* we expect a GRE connection from PAC to PNS */
496 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
497 + sizeof(struct PptpControlHeader)
498 + ((void *)pcid - (void *)pptpReq);
499
500 if (exp_gre(ct, seq, *cid, *pcid) != 0)
501 printk("ip_conntrack_pptp: error during exp_gre\n");
502
503 break;
504
505 case PPTP_CALL_DISCONNECT_NOTIFY:
506 if (reqlen < sizeof(_pptpReq.disc)) {
507 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
508 break;
509 }
510
511 /* server confirms disconnect */
512 cid = &pptpReq->disc.callID;
513 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
514 info->cstate = PPTP_CALL_NONE;
515
516 /* untrack this call id, unexpect GRE packets */
517 pptp_destroy_siblings(ct);
518 break;
519
520 case PPTP_WAN_ERROR_NOTIFY:
521 break;
522
523 case PPTP_ECHO_REQUEST:
524 case PPTP_ECHO_REPLY:
525 /* I don't have to explain these ;) */
526 break;
527 default:
528 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
529 ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
530 break;
531 }
532
533
534 if (ip_nat_pptp_hook_inbound)
535 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
536 pptpReq);
537
538 return NF_ACCEPT;
539
540}
541
542static inline int
543pptp_outbound_pkt(struct sk_buff **pskb,
544 struct tcphdr *tcph,
545 unsigned int nexthdr_off,
546 unsigned int datalen,
547 struct ip_conntrack *ct,
548 enum ip_conntrack_info ctinfo)
549{
550 struct PptpControlHeader _ctlh, *ctlh;
551 unsigned int reqlen;
552 union pptp_ctrl_union _pptpReq, *pptpReq;
553 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
554 u_int16_t msg, *cid, *pcid;
555
556 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
557 if (!ctlh)
558 return NF_ACCEPT;
559 nexthdr_off += sizeof(_ctlh);
560 datalen -= sizeof(_ctlh);
561
562 reqlen = datalen;
563 if (reqlen > sizeof(*pptpReq))
564 reqlen = sizeof(*pptpReq);
565 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
566 if (!pptpReq)
567 return NF_ACCEPT;
568
569 msg = ntohs(ctlh->messageType);
570 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
571
572 switch (msg) {
573 case PPTP_START_SESSION_REQUEST:
574 /* client requests for new control session */
575 if (info->sstate != PPTP_SESSION_NONE) {
576 DEBUGP("%s but we already have one",
577 pptp_msg_name[msg]);
578 }
579 info->sstate = PPTP_SESSION_REQUESTED;
580 break;
581 case PPTP_STOP_SESSION_REQUEST:
582 /* client requests end of control session */
583 info->sstate = PPTP_SESSION_STOPREQ;
584 break;
585
586 case PPTP_OUT_CALL_REQUEST:
587 if (reqlen < sizeof(_pptpReq.ocreq)) {
588 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
589 /* FIXME: break; */
590 }
591
592 /* client initiating connection to server */
593 if (info->sstate != PPTP_SESSION_CONFIRMED) {
594 DEBUGP("%s but no session\n",
595 pptp_msg_name[msg]);
596 break;
597 }
598 info->cstate = PPTP_CALL_OUT_REQ;
599 /* track PNS call id */
600 cid = &pptpReq->ocreq.callID;
601 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
602 info->pns_call_id = ntohs(*cid);
603 break;
604 case PPTP_IN_CALL_REPLY:
605 if (reqlen < sizeof(_pptpReq.icack)) {
606 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
607 break;
608 }
609
610 /* client answers incoming call */
611 if (info->cstate != PPTP_CALL_IN_REQ
612 && info->cstate != PPTP_CALL_IN_REP) {
613 DEBUGP("%s without incall_req\n",
614 pptp_msg_name[msg]);
615 break;
616 }
617 if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
618 info->cstate = PPTP_CALL_NONE;
619 break;
620 }
621 pcid = &pptpReq->icack.peersCallID;
622 if (info->pac_call_id != ntohs(*pcid)) {
623 DEBUGP("%s for unknown call %u\n",
624 pptp_msg_name[msg], ntohs(*pcid));
625 break;
626 }
627 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
628 /* part two of the three-way handshake */
629 info->cstate = PPTP_CALL_IN_REP;
630 info->pns_call_id = ntohs(pptpReq->icack.callID);
631 break;
632
633 case PPTP_CALL_CLEAR_REQUEST:
634 /* client requests hangup of call */
635 if (info->sstate != PPTP_SESSION_CONFIRMED) {
636 DEBUGP("CLEAR_CALL but no session\n");
637 break;
638 }
639 /* FUTURE: iterate over all calls and check if
640 * call ID is valid. We don't do this without newnat,
641 * because we only know about last call */
642 info->cstate = PPTP_CALL_CLEAR_REQ;
643 break;
644 case PPTP_SET_LINK_INFO:
645 break;
646 case PPTP_ECHO_REQUEST:
647 case PPTP_ECHO_REPLY:
648 /* I don't have to explain these ;) */
649 break;
650 default:
651 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
652 pptp_msg_name[msg]:pptp_msg_name[0], msg);
653 /* unknown: no need to create GRE masq table entry */
654 break;
655 }
656
657 if (ip_nat_pptp_hook_outbound)
658 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
659 pptpReq);
660
661 return NF_ACCEPT;
662}
663
664
665/* track caller id inside control connection, call expect_related */
666static int
667conntrack_pptp_help(struct sk_buff **pskb,
668 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
669
670{
671 struct pptp_pkt_hdr _pptph, *pptph;
672 struct tcphdr _tcph, *tcph;
673 u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
674 u_int32_t datalen;
675 int dir = CTINFO2DIR(ctinfo);
676 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
677 unsigned int nexthdr_off;
678
679 int oldsstate, oldcstate;
680 int ret;
681
682 /* don't do any tracking before tcp handshake complete */
683 if (ctinfo != IP_CT_ESTABLISHED
684 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
685 DEBUGP("ctinfo = %u, skipping\n", ctinfo);
686 return NF_ACCEPT;
687 }
688
689 nexthdr_off = (*pskb)->nh.iph->ihl*4;
690 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
691 BUG_ON(!tcph);
692 nexthdr_off += tcph->doff * 4;
693 datalen = tcplen - tcph->doff * 4;
694
695 if (tcph->fin || tcph->rst) {
696 DEBUGP("RST/FIN received, timeouting GRE\n");
697 /* can't do this after real newnat */
698 info->cstate = PPTP_CALL_NONE;
699
700 /* untrack this call id, unexpect GRE packets */
701 pptp_destroy_siblings(ct);
702 }
703
704 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
705 if (!pptph) {
706 DEBUGP("no full PPTP header, can't track\n");
707 return NF_ACCEPT;
708 }
709 nexthdr_off += sizeof(_pptph);
710 datalen -= sizeof(_pptph);
711
712 /* if it's not a control message we can't do anything with it */
713 if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
714 ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
715 DEBUGP("not a control packet\n");
716 return NF_ACCEPT;
717 }
718
719 oldsstate = info->sstate;
720 oldcstate = info->cstate;
721
722 spin_lock_bh(&ip_pptp_lock);
723
724 /* FIXME: We just blindly assume that the control connection is always
725 * established from PNS->PAC. However, RFC makes no guarantee */
726 if (dir == IP_CT_DIR_ORIGINAL)
727 /* client -> server (PNS -> PAC) */
728 ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
729 ctinfo);
730 else
731 /* server -> client (PAC -> PNS) */
732 ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
733 ctinfo);
734 DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
735 oldsstate, info->sstate, oldcstate, info->cstate);
736 spin_unlock_bh(&ip_pptp_lock);
737
738 return ret;
739}
740
741/* control protocol helper */
742static struct ip_conntrack_helper pptp = {
743 .list = { NULL, NULL },
744 .name = "pptp",
745 .me = THIS_MODULE,
746 .max_expected = 2,
747 .timeout = 5 * 60,
748 .tuple = { .src = { .ip = 0,
749 .u = { .tcp = { .port =
750 __constant_htons(PPTP_CONTROL_PORT) } }
751 },
752 .dst = { .ip = 0,
753 .u = { .all = 0 },
754 .protonum = IPPROTO_TCP
755 }
756 },
757 .mask = { .src = { .ip = 0,
758 .u = { .tcp = { .port = 0xffff } }
759 },
760 .dst = { .ip = 0,
761 .u = { .all = 0 },
762 .protonum = 0xff
763 }
764 },
765 .help = conntrack_pptp_help
766};
767
768extern void __exit ip_ct_proto_gre_fini(void);
769extern int __init ip_ct_proto_gre_init(void);
770
771/* ip_conntrack_pptp initialization */
772static int __init init(void)
773{
774 int retcode;
775
776 retcode = ip_ct_proto_gre_init();
777 if (retcode < 0)
778 return retcode;
779
780 DEBUGP(" registering helper\n");
781 if ((retcode = ip_conntrack_helper_register(&pptp))) {
782 printk(KERN_ERR "Unable to register conntrack application "
783 "helper for pptp: %d\n", retcode);
784 ip_ct_proto_gre_fini();
785 return retcode;
786 }
787
788 printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
789 return 0;
790}
791
792static void __exit fini(void)
793{
794 ip_conntrack_helper_unregister(&pptp);
795 ip_ct_proto_gre_fini();
796 printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
797}
798
799module_init(init);
800module_exit(fini);
801
802EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
803EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
804EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
805EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 15aef3564742..b08a432efcf8 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1270 if (err < 0) 1270 if (err < 0)
1271 return err; 1271 return err;
1272 1272
1273 exp = ip_conntrack_expect_find_get(&tuple); 1273 exp = ip_conntrack_expect_find(&tuple);
1274 if (!exp) 1274 if (!exp)
1275 return -ENOENT; 1275 return -ENOENT;
1276 1276
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1318 return err; 1318 return err;
1319 1319
1320 /* bump usage count to 2 */ 1320 /* bump usage count to 2 */
1321 exp = ip_conntrack_expect_find_get(&tuple); 1321 exp = ip_conntrack_expect_find(&tuple);
1322 if (!exp) 1322 if (!exp)
1323 return -ENOENT; 1323 return -ENOENT;
1324 1324
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 000000000000..de3cb9db6f85
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
1/*
2 * ip_conntrack_proto_gre.c - Version 3.0
3 *
4 * Connection tracking protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/types.h>
29#include <linux/timer.h>
30#include <linux/netfilter.h>
31#include <linux/ip.h>
32#include <linux/in.h>
33#include <linux/list.h>
34
35static DEFINE_RWLOCK(ip_ct_gre_lock);
36#define ASSERT_READ_LOCK(x)
37#define ASSERT_WRITE_LOCK(x)
38
39#include <linux/netfilter_ipv4/listhelp.h>
40#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
41#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
42#include <linux/netfilter_ipv4/ip_conntrack_core.h>
43
44#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
45#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
46
47MODULE_LICENSE("GPL");
48MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
49MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
50
51/* shamelessly stolen from ip_conntrack_proto_udp.c */
52#define GRE_TIMEOUT (30*HZ)
53#define GRE_STREAM_TIMEOUT (180*HZ)
54
55#if 0
56#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
57#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
58 NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
59 NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
60#else
61#define DEBUGP(x, args...)
62#define DUMP_TUPLE_GRE(x)
63#endif
64
65/* GRE KEYMAP HANDLING FUNCTIONS */
66static LIST_HEAD(gre_keymap_list);
67
68static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
69 const struct ip_conntrack_tuple *t)
70{
71 return ((km->tuple.src.ip == t->src.ip) &&
72 (km->tuple.dst.ip == t->dst.ip) &&
73 (km->tuple.dst.protonum == t->dst.protonum) &&
74 (km->tuple.dst.u.all == t->dst.u.all));
75}
76
77/* look up the source key for a given tuple */
78static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
79{
80 struct ip_ct_gre_keymap *km;
81 u_int32_t key = 0;
82
83 read_lock_bh(&ip_ct_gre_lock);
84 km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
85 struct ip_ct_gre_keymap *, t);
86 if (km)
87 key = km->tuple.src.u.gre.key;
88 read_unlock_bh(&ip_ct_gre_lock);
89
90 DEBUGP("lookup src key 0x%x up key for ", key);
91 DUMP_TUPLE_GRE(t);
92
93 return key;
94}
95
96/* add a single keymap entry, associate with specified master ct */
97int
98ip_ct_gre_keymap_add(struct ip_conntrack *ct,
99 struct ip_conntrack_tuple *t, int reply)
100{
101 struct ip_ct_gre_keymap **exist_km, *km, *old;
102
103 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
104 DEBUGP("refusing to add GRE keymap to non-pptp session\n");
105 return -1;
106 }
107
108 if (!reply)
109 exist_km = &ct->help.ct_pptp_info.keymap_orig;
110 else
111 exist_km = &ct->help.ct_pptp_info.keymap_reply;
112
113 if (*exist_km) {
114 /* check whether it's a retransmission */
115 old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
116 struct ip_ct_gre_keymap *, t);
117 if (old == *exist_km) {
118 DEBUGP("retransmission\n");
119 return 0;
120 }
121
122 DEBUGP("trying to override keymap_%s for ct %p\n",
123 reply? "reply":"orig", ct);
124 return -EEXIST;
125 }
126
127 km = kmalloc(sizeof(*km), GFP_ATOMIC);
128 if (!km)
129 return -ENOMEM;
130
131 memcpy(&km->tuple, t, sizeof(*t));
132 *exist_km = km;
133
134 DEBUGP("adding new entry %p: ", km);
135 DUMP_TUPLE_GRE(&km->tuple);
136
137 write_lock_bh(&ip_ct_gre_lock);
138 list_append(&gre_keymap_list, km);
139 write_unlock_bh(&ip_ct_gre_lock);
140
141 return 0;
142}
143
144/* destroy the keymap entries associated with specified master ct */
145void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
146{
147 DEBUGP("entering for ct %p\n", ct);
148
149 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
150 DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
151 return;
152 }
153
154 write_lock_bh(&ip_ct_gre_lock);
155 if (ct->help.ct_pptp_info.keymap_orig) {
156 DEBUGP("removing %p from list\n",
157 ct->help.ct_pptp_info.keymap_orig);
158 list_del(&ct->help.ct_pptp_info.keymap_orig->list);
159 kfree(ct->help.ct_pptp_info.keymap_orig);
160 ct->help.ct_pptp_info.keymap_orig = NULL;
161 }
162 if (ct->help.ct_pptp_info.keymap_reply) {
163 DEBUGP("removing %p from list\n",
164 ct->help.ct_pptp_info.keymap_reply);
165 list_del(&ct->help.ct_pptp_info.keymap_reply->list);
166 kfree(ct->help.ct_pptp_info.keymap_reply);
167 ct->help.ct_pptp_info.keymap_reply = NULL;
168 }
169 write_unlock_bh(&ip_ct_gre_lock);
170}
171
172
173/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
174
175/* invert gre part of tuple */
176static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
177 const struct ip_conntrack_tuple *orig)
178{
179 tuple->dst.u.gre.key = orig->src.u.gre.key;
180 tuple->src.u.gre.key = orig->dst.u.gre.key;
181
182 return 1;
183}
184
185/* gre hdr info to tuple */
186static int gre_pkt_to_tuple(const struct sk_buff *skb,
187 unsigned int dataoff,
188 struct ip_conntrack_tuple *tuple)
189{
190 struct gre_hdr_pptp _pgrehdr, *pgrehdr;
191 u_int32_t srckey;
192 struct gre_hdr _grehdr, *grehdr;
193
194 /* first only delinearize old RFC1701 GRE header */
195 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
196 if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
197 /* try to behave like "ip_conntrack_proto_generic" */
198 tuple->src.u.all = 0;
199 tuple->dst.u.all = 0;
200 return 1;
201 }
202
203 /* PPTP header is variable length, only need up to the call_id field */
204 pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
205 if (!pgrehdr)
206 return 1;
207
208 if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
209 DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
210 return 0;
211 }
212
213 tuple->dst.u.gre.key = pgrehdr->call_id;
214 srckey = gre_keymap_lookup(tuple);
215 tuple->src.u.gre.key = srckey;
216
217 return 1;
218}
219
220/* print gre part of tuple */
221static int gre_print_tuple(struct seq_file *s,
222 const struct ip_conntrack_tuple *tuple)
223{
224 return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
225 ntohs(tuple->src.u.gre.key),
226 ntohs(tuple->dst.u.gre.key));
227}
228
229/* print private data for conntrack */
230static int gre_print_conntrack(struct seq_file *s,
231 const struct ip_conntrack *ct)
232{
233 return seq_printf(s, "timeout=%u, stream_timeout=%u ",
234 (ct->proto.gre.timeout / HZ),
235 (ct->proto.gre.stream_timeout / HZ));
236}
237
238/* Returns verdict for packet, and may modify conntrack */
239static int gre_packet(struct ip_conntrack *ct,
240 const struct sk_buff *skb,
241 enum ip_conntrack_info conntrackinfo)
242{
243 /* If we've seen traffic both ways, this is a GRE connection.
244 * Extend timeout. */
245 if (ct->status & IPS_SEEN_REPLY) {
246 ip_ct_refresh_acct(ct, conntrackinfo, skb,
247 ct->proto.gre.stream_timeout);
248 /* Also, more likely to be important, and not a probe. */
249 set_bit(IPS_ASSURED_BIT, &ct->status);
250 } else
251 ip_ct_refresh_acct(ct, conntrackinfo, skb,
252 ct->proto.gre.timeout);
253
254 return NF_ACCEPT;
255}
256
257/* Called when a new connection for this protocol found. */
258static int gre_new(struct ip_conntrack *ct,
259 const struct sk_buff *skb)
260{
261 DEBUGP(": ");
262 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
263
264 /* initialize to sane value. Ideally a conntrack helper
265 * (e.g. in case of pptp) is increasing them */
266 ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
267 ct->proto.gre.timeout = GRE_TIMEOUT;
268
269 return 1;
270}
271
272/* Called when a conntrack entry has already been removed from the hashes
273 * and is about to be deleted from memory */
274static void gre_destroy(struct ip_conntrack *ct)
275{
276 struct ip_conntrack *master = ct->master;
277 DEBUGP(" entering\n");
278
279 if (!master)
280 DEBUGP("no master !?!\n");
281 else
282 ip_ct_gre_keymap_destroy(master);
283}
284
285/* protocol helper struct */
286static struct ip_conntrack_protocol gre = {
287 .proto = IPPROTO_GRE,
288 .name = "gre",
289 .pkt_to_tuple = gre_pkt_to_tuple,
290 .invert_tuple = gre_invert_tuple,
291 .print_tuple = gre_print_tuple,
292 .print_conntrack = gre_print_conntrack,
293 .packet = gre_packet,
294 .new = gre_new,
295 .destroy = gre_destroy,
296 .me = THIS_MODULE,
297#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
298 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
299 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
300 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
301#endif
302};
303
304/* ip_conntrack_proto_gre initialization */
305int __init ip_ct_proto_gre_init(void)
306{
307 return ip_conntrack_protocol_register(&gre);
308}
309
310void __exit ip_ct_proto_gre_fini(void)
311{
312 struct list_head *pos, *n;
313
314 /* delete all keymap entries */
315 write_lock_bh(&ip_ct_gre_lock);
316 list_for_each_safe(pos, n, &gre_keymap_list) {
317 DEBUGP("deleting keymap %p at module unload time\n", pos);
318 list_del(pos);
319 kfree(pos);
320 }
321 write_unlock_bh(&ip_ct_gre_lock);
322
323 ip_conntrack_protocol_unregister(&gre);
324}
325
326EXPORT_SYMBOL(ip_ct_gre_keymap_add);
327EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ae3e3e655db5..d3c7808010ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct);
993 993
994EXPORT_SYMBOL(ip_conntrack_expect_alloc); 994EXPORT_SYMBOL(ip_conntrack_expect_alloc);
995EXPORT_SYMBOL(ip_conntrack_expect_put); 995EXPORT_SYMBOL(ip_conntrack_expect_put);
996EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); 996EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
997EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
997EXPORT_SYMBOL(ip_conntrack_expect_related); 998EXPORT_SYMBOL(ip_conntrack_expect_related);
998EXPORT_SYMBOL(ip_conntrack_unexpect_related); 999EXPORT_SYMBOL(ip_conntrack_unexpect_related);
999EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); 1000EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
1000EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
1001EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); 1001EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
1002 1002
1003EXPORT_SYMBOL(ip_conntrack_tuple_taken); 1003EXPORT_SYMBOL(ip_conntrack_tuple_taken);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1adedb743f60..c3ea891d38e7 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
578 578
579 return ret; 579 return ret;
580} 580}
581EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
582EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
581#endif 583#endif
582 584
583int __init ip_nat_init(void) 585int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 000000000000..3cdd0684d30d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
1/*
2 * ip_nat_pptp.c - Version 3.0
3 *
4 * NAT support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * TODO: - NAT to a unique tuple, not to TCP source port
17 * (needs netfilter tuple reservation)
18 *
19 * Changes:
20 * 2002-02-10 - Version 1.3
21 * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
22 * in local connections (Philip Craig <philipc@snapgear.com>)
23 * - add checks for magicCookie and pptp version
24 * - make argument list of pptp_{out,in}bound_packet() shorter
25 * - move to C99 style initializers
26 * - print version number at module loadtime
27 * 2003-09-22 - Version 1.5
28 * - use SNATed tcp sourceport as callid, since we get called before
29 * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
30 * 2004-10-22 - Version 2.0
31 * - kernel 2.6.x version
32 * 2005-06-10 - Version 3.0
33 * - kernel >= 2.6.11 version,
34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
35 *
36 */
37
38#include <linux/config.h>
39#include <linux/module.h>
40#include <linux/ip.h>
41#include <linux/tcp.h>
42#include <net/tcp.h>
43
44#include <linux/netfilter_ipv4/ip_nat.h>
45#include <linux/netfilter_ipv4/ip_nat_rule.h>
46#include <linux/netfilter_ipv4/ip_nat_helper.h>
47#include <linux/netfilter_ipv4/ip_nat_pptp.h>
48#include <linux/netfilter_ipv4/ip_conntrack_core.h>
49#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
50#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
51#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
52
53#define IP_NAT_PPTP_VERSION "3.0"
54
55MODULE_LICENSE("GPL");
56MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
57MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
58
59
60#if 0
61extern const char *pptp_msg_name[];
62#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
63 __FUNCTION__, ## args)
64#else
65#define DEBUGP(format, args...)
66#endif
67
68static void pptp_nat_expected(struct ip_conntrack *ct,
69 struct ip_conntrack_expect *exp)
70{
71 struct ip_conntrack *master = ct->master;
72 struct ip_conntrack_expect *other_exp;
73 struct ip_conntrack_tuple t;
74 struct ip_ct_pptp_master *ct_pptp_info;
75 struct ip_nat_pptp *nat_pptp_info;
76
77 ct_pptp_info = &master->help.ct_pptp_info;
78 nat_pptp_info = &master->nat.help.nat_pptp_info;
79
80 /* And here goes the grand finale of corrosion... */
81
82 if (exp->dir == IP_CT_DIR_ORIGINAL) {
83 DEBUGP("we are PNS->PAC\n");
84 /* therefore, build tuple for PAC->PNS */
85 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
86 t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
87 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
88 t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
89 t.dst.protonum = IPPROTO_GRE;
90 } else {
91 DEBUGP("we are PAC->PNS\n");
92 /* build tuple for PNS->PAC */
93 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
94 t.src.u.gre.key =
95 htons(master->nat.help.nat_pptp_info.pns_call_id);
96 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
97 t.dst.u.gre.key =
98 htons(master->nat.help.nat_pptp_info.pac_call_id);
99 t.dst.protonum = IPPROTO_GRE;
100 }
101
102 DEBUGP("trying to unexpect other dir: ");
103 DUMP_TUPLE(&t);
104 other_exp = ip_conntrack_expect_find(&t);
105 if (other_exp) {
106 ip_conntrack_unexpect_related(other_exp);
107 ip_conntrack_expect_put(other_exp);
108 DEBUGP("success\n");
109 } else {
110 DEBUGP("not found!\n");
111 }
112
113 ip_nat_follow_master(ct, exp);
114}
115
116/* outbound packets == from PNS to PAC */
117static int
118pptp_outbound_pkt(struct sk_buff **pskb,
119 struct ip_conntrack *ct,
120 enum ip_conntrack_info ctinfo,
121 struct PptpControlHeader *ctlh,
122 union pptp_ctrl_union *pptpReq)
123
124{
125 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
126 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
127
128 u_int16_t msg, *cid = NULL, new_callid;
129
130 new_callid = htons(ct_pptp_info->pns_call_id);
131
132 switch (msg = ntohs(ctlh->messageType)) {
133 case PPTP_OUT_CALL_REQUEST:
134 cid = &pptpReq->ocreq.callID;
135 /* FIXME: ideally we would want to reserve a call ID
136 * here. current netfilter NAT core is not able to do
137 * this :( For now we use TCP source port. This breaks
138 * multiple calls within one control session */
139
140 /* save original call ID in nat_info */
141 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
142
143 /* don't use tcph->source since we are at a DSTmanip
144 * hook (e.g. PREROUTING) and pkt is not mangled yet */
145 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
146
147 /* save new call ID in ct info */
148 ct_pptp_info->pns_call_id = ntohs(new_callid);
149 break;
150 case PPTP_IN_CALL_REPLY:
151 cid = &pptpReq->icreq.callID;
152 break;
153 case PPTP_CALL_CLEAR_REQUEST:
154 cid = &pptpReq->clrreq.callID;
155 break;
156 default:
157 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
158 (msg <= PPTP_MSG_MAX)?
159 pptp_msg_name[msg]:pptp_msg_name[0]);
160 /* fall through */
161
162 case PPTP_SET_LINK_INFO:
163 /* only need to NAT in case PAC is behind NAT box */
164 case PPTP_START_SESSION_REQUEST:
165 case PPTP_START_SESSION_REPLY:
166 case PPTP_STOP_SESSION_REQUEST:
167 case PPTP_STOP_SESSION_REPLY:
168 case PPTP_ECHO_REQUEST:
169 case PPTP_ECHO_REPLY:
170 /* no need to alter packet */
171 return NF_ACCEPT;
172 }
173
174 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
175 * down to here */
176
177 IP_NF_ASSERT(cid);
178
179 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
180 ntohs(*cid), ntohs(new_callid));
181
182 /* mangle packet */
183 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
184 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
185 sizeof(new_callid),
186 (char *)&new_callid,
187 sizeof(new_callid)) == 0)
188 return NF_DROP;
189
190 return NF_ACCEPT;
191}
192
193static int
194pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
195 struct ip_conntrack_expect *expect_reply)
196{
197 struct ip_ct_pptp_master *ct_pptp_info =
198 &expect_orig->master->help.ct_pptp_info;
199 struct ip_nat_pptp *nat_pptp_info =
200 &expect_orig->master->nat.help.nat_pptp_info;
201
202 struct ip_conntrack *ct = expect_orig->master;
203
204 struct ip_conntrack_tuple inv_t;
205 struct ip_conntrack_tuple *orig_t, *reply_t;
206
207 /* save original PAC call ID in nat_info */
208 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
209
210 /* alter expectation */
211 orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
212 reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
213
214 /* alter expectation for PNS->PAC direction */
215 invert_tuplepr(&inv_t, &expect_orig->tuple);
216 expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
217 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
218 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
219 inv_t.src.ip = reply_t->src.ip;
220 inv_t.dst.ip = reply_t->dst.ip;
221 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
222 inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
223
224 if (!ip_conntrack_expect_related(expect_orig)) {
225 DEBUGP("successfully registered expect\n");
226 } else {
227 DEBUGP("can't expect_related(expect_orig)\n");
228 return 1;
229 }
230
231 /* alter expectation for PAC->PNS direction */
232 invert_tuplepr(&inv_t, &expect_reply->tuple);
233 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
234 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
235 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
236 inv_t.src.ip = orig_t->src.ip;
237 inv_t.dst.ip = orig_t->dst.ip;
238 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
239 inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
240
241 if (!ip_conntrack_expect_related(expect_reply)) {
242 DEBUGP("successfully registered expect\n");
243 } else {
244 DEBUGP("can't expect_related(expect_reply)\n");
245 ip_conntrack_unexpect_related(expect_orig);
246 return 1;
247 }
248
249 if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
250 DEBUGP("can't register original keymap\n");
251 ip_conntrack_unexpect_related(expect_orig);
252 ip_conntrack_unexpect_related(expect_reply);
253 return 1;
254 }
255
256 if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
257 DEBUGP("can't register reply keymap\n");
258 ip_conntrack_unexpect_related(expect_orig);
259 ip_conntrack_unexpect_related(expect_reply);
260 ip_ct_gre_keymap_destroy(ct);
261 return 1;
262 }
263
264 return 0;
265}
266
267/* inbound packets == from PAC to PNS */
268static int
269pptp_inbound_pkt(struct sk_buff **pskb,
270 struct ip_conntrack *ct,
271 enum ip_conntrack_info ctinfo,
272 struct PptpControlHeader *ctlh,
273 union pptp_ctrl_union *pptpReq)
274{
275 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
276 u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
277
278 int ret = NF_ACCEPT, rv;
279
280 new_pcid = htons(nat_pptp_info->pns_call_id);
281
282 switch (msg = ntohs(ctlh->messageType)) {
283 case PPTP_OUT_CALL_REPLY:
284 pcid = &pptpReq->ocack.peersCallID;
285 cid = &pptpReq->ocack.callID;
286 break;
287 case PPTP_IN_CALL_CONNECT:
288 pcid = &pptpReq->iccon.peersCallID;
289 break;
290 case PPTP_IN_CALL_REQUEST:
291 /* only need to nat in case PAC is behind NAT box */
292 break;
293 case PPTP_WAN_ERROR_NOTIFY:
294 pcid = &pptpReq->wanerr.peersCallID;
295 break;
296 case PPTP_CALL_DISCONNECT_NOTIFY:
297 pcid = &pptpReq->disc.callID;
298 break;
299 case PPTP_SET_LINK_INFO:
300 pcid = &pptpReq->setlink.peersCallID;
301 break;
302
303 default:
304 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
305 pptp_msg_name[msg]:pptp_msg_name[0]);
306 /* fall through */
307
308 case PPTP_START_SESSION_REQUEST:
309 case PPTP_START_SESSION_REPLY:
310 case PPTP_STOP_SESSION_REQUEST:
311 case PPTP_STOP_SESSION_REPLY:
312 case PPTP_ECHO_REQUEST:
313 case PPTP_ECHO_REPLY:
314 /* no need to alter packet */
315 return NF_ACCEPT;
316 }
317
318 /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
319 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
320
321 /* mangle packet */
322 IP_NF_ASSERT(pcid);
323 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
324 ntohs(*pcid), ntohs(new_pcid));
325
326 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
327 (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
328 sizeof(new_pcid), (char *)&new_pcid,
329 sizeof(new_pcid));
330 if (rv != NF_ACCEPT)
331 return rv;
332
333 if (new_cid) {
334 IP_NF_ASSERT(cid);
335 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
336 ntohs(*cid), ntohs(new_cid));
337 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
338 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
339 sizeof(new_cid),
340 (char *)&new_cid,
341 sizeof(new_cid));
342 if (rv != NF_ACCEPT)
343 return rv;
344 }
345
346 /* check for earlier return value of 'switch' above */
347 if (ret != NF_ACCEPT)
348 return ret;
349
350 /* great, at least we don't need to resize packets */
351 return NF_ACCEPT;
352}
353
354
355extern int __init ip_nat_proto_gre_init(void);
356extern void __exit ip_nat_proto_gre_fini(void);
357
358static int __init init(void)
359{
360 int ret;
361
362 DEBUGP("%s: registering NAT helper\n", __FILE__);
363
364 ret = ip_nat_proto_gre_init();
365 if (ret < 0)
366 return ret;
367
368 BUG_ON(ip_nat_pptp_hook_outbound);
369 ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
370
371 BUG_ON(ip_nat_pptp_hook_inbound);
372 ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
373
374 BUG_ON(ip_nat_pptp_hook_exp_gre);
375 ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
376
377 BUG_ON(ip_nat_pptp_hook_expectfn);
378 ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
379
380 printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
381 return 0;
382}
383
384static void __exit fini(void)
385{
386 DEBUGP("cleanup_module\n" );
387
388 ip_nat_pptp_hook_expectfn = NULL;
389 ip_nat_pptp_hook_exp_gre = NULL;
390 ip_nat_pptp_hook_inbound = NULL;
391 ip_nat_pptp_hook_outbound = NULL;
392
393 ip_nat_proto_gre_fini();
394 /* Make sure noone calls it, meanwhile */
395 synchronize_net();
396
397 printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
398}
399
400module_init(init);
401module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 000000000000..7c1285401672
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
1/*
2 * ip_nat_proto_gre.c - Version 2.0
3 *
4 * NAT protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/ip.h>
29#include <linux/netfilter_ipv4/ip_nat.h>
30#include <linux/netfilter_ipv4/ip_nat_rule.h>
31#include <linux/netfilter_ipv4/ip_nat_protocol.h>
32#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
36MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
37
38#if 0
39#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
40 __FUNCTION__, ## args)
41#else
42#define DEBUGP(x, args...)
43#endif
44
45/* is key in given range between min and max */
46static int
47gre_in_range(const struct ip_conntrack_tuple *tuple,
48 enum ip_nat_manip_type maniptype,
49 const union ip_conntrack_manip_proto *min,
50 const union ip_conntrack_manip_proto *max)
51{
52 u_int32_t key;
53
54 if (maniptype == IP_NAT_MANIP_SRC)
55 key = tuple->src.u.gre.key;
56 else
57 key = tuple->dst.u.gre.key;
58
59 return ntohl(key) >= ntohl(min->gre.key)
60 && ntohl(key) <= ntohl(max->gre.key);
61}
62
63/* generate unique tuple ... */
64static int
65gre_unique_tuple(struct ip_conntrack_tuple *tuple,
66 const struct ip_nat_range *range,
67 enum ip_nat_manip_type maniptype,
68 const struct ip_conntrack *conntrack)
69{
70 static u_int16_t key;
71 u_int16_t *keyptr;
72 unsigned int min, i, range_size;
73
74 if (maniptype == IP_NAT_MANIP_SRC)
75 keyptr = &tuple->src.u.gre.key;
76 else
77 keyptr = &tuple->dst.u.gre.key;
78
79 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
80 DEBUGP("%p: NATing GRE PPTP\n", conntrack);
81 min = 1;
82 range_size = 0xffff;
83 } else {
84 min = ntohl(range->min.gre.key);
85 range_size = ntohl(range->max.gre.key) - min + 1;
86 }
87
88 DEBUGP("min = %u, range_size = %u\n", min, range_size);
89
90 for (i = 0; i < range_size; i++, key++) {
91 *keyptr = htonl(min + key % range_size);
92 if (!ip_nat_used_tuple(tuple, conntrack))
93 return 1;
94 }
95
96 DEBUGP("%p: no NAT mapping\n", conntrack);
97
98 return 0;
99}
100
101/* manipulate a GRE packet according to maniptype */
102static int
103gre_manip_pkt(struct sk_buff **pskb,
104 unsigned int iphdroff,
105 const struct ip_conntrack_tuple *tuple,
106 enum ip_nat_manip_type maniptype)
107{
108 struct gre_hdr *greh;
109 struct gre_hdr_pptp *pgreh;
110 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
111 unsigned int hdroff = iphdroff + iph->ihl*4;
112
113 /* pgreh includes two optional 32bit fields which are not required
114 * to be there. That's where the magic '8' comes from */
115 if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
116 return 0;
117
118 greh = (void *)(*pskb)->data + hdroff;
119 pgreh = (struct gre_hdr_pptp *) greh;
120
121 /* we only have destination manip of a packet, since 'source key'
122 * is not present in the packet itself */
123 if (maniptype == IP_NAT_MANIP_DST) {
124 /* key manipulation is always dest */
125 switch (greh->version) {
126 case 0:
127 if (!greh->key) {
128 DEBUGP("can't nat GRE w/o key\n");
129 break;
130 }
131 if (greh->csum) {
132 /* FIXME: Never tested this code... */
133 *(gre_csum(greh)) =
134 ip_nat_cheat_check(~*(gre_key(greh)),
135 tuple->dst.u.gre.key,
136 *(gre_csum(greh)));
137 }
138 *(gre_key(greh)) = tuple->dst.u.gre.key;
139 break;
140 case GRE_VERSION_PPTP:
141 DEBUGP("call_id -> 0x%04x\n",
142 ntohl(tuple->dst.u.gre.key));
143 pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
144 break;
145 default:
146 DEBUGP("can't nat unknown GRE version\n");
147 return 0;
148 break;
149 }
150 }
151 return 1;
152}
153
154/* print out a nat tuple */
155static unsigned int
156gre_print(char *buffer,
157 const struct ip_conntrack_tuple *match,
158 const struct ip_conntrack_tuple *mask)
159{
160 unsigned int len = 0;
161
162 if (mask->src.u.gre.key)
163 len += sprintf(buffer + len, "srckey=0x%x ",
164 ntohl(match->src.u.gre.key));
165
166 if (mask->dst.u.gre.key)
167 len += sprintf(buffer + len, "dstkey=0x%x ",
168 ntohl(match->src.u.gre.key));
169
170 return len;
171}
172
173/* print a range of keys */
174static unsigned int
175gre_print_range(char *buffer, const struct ip_nat_range *range)
176{
177 if (range->min.gre.key != 0
178 || range->max.gre.key != 0xFFFF) {
179 if (range->min.gre.key == range->max.gre.key)
180 return sprintf(buffer, "key 0x%x ",
181 ntohl(range->min.gre.key));
182 else
183 return sprintf(buffer, "keys 0x%u-0x%u ",
184 ntohl(range->min.gre.key),
185 ntohl(range->max.gre.key));
186 } else
187 return 0;
188}
189
190/* nat helper struct */
191static struct ip_nat_protocol gre = {
192 .name = "GRE",
193 .protonum = IPPROTO_GRE,
194 .manip_pkt = gre_manip_pkt,
195 .in_range = gre_in_range,
196 .unique_tuple = gre_unique_tuple,
197 .print = gre_print,
198 .print_range = gre_print_range,
199#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
200 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
201 .range_to_nfattr = ip_nat_port_range_to_nfattr,
202 .nfattr_to_range = ip_nat_port_nfattr_to_range,
203#endif
204};
205
206int __init ip_nat_proto_gre_init(void)
207{
208 return ip_nat_protocol_register(&gre);
209}
210
211void __exit ip_nat_proto_gre_fini(void)
212{
213 ip_nat_protocol_unregister(&gre);
214}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
@@ -30,7 +31,7 @@
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <linux/netfilter_ipv4/ip_conntrack.h>
32 33
33#define CLUSTERIP_VERSION "0.7" 34#define CLUSTERIP_VERSION "0.8"
34 35
35#define DEBUG_CLUSTERIP 36#define DEBUG_CLUSTERIP
36 37
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
49struct clusterip_config { 50struct clusterip_config {
50 struct list_head list; /* list of all configs */ 51 struct list_head list; /* list of all configs */
51 atomic_t refcount; /* reference count */ 52 atomic_t refcount; /* reference count */
53 atomic_t entries; /* number of entries/rules
54 * referencing us */
52 55
53 u_int32_t clusterip; /* the IP address */ 56 u_int32_t clusterip; /* the IP address */
54 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 57 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
55 struct net_device *dev; /* device */ 58 struct net_device *dev; /* device */
56 u_int16_t num_total_nodes; /* total number of nodes */ 59 u_int16_t num_total_nodes; /* total number of nodes */
57 u_int16_t num_local_nodes; /* number of local nodes */ 60 unsigned long local_nodes; /* node number array */
58 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
59 61
60#ifdef CONFIG_PROC_FS 62#ifdef CONFIG_PROC_FS
61 struct proc_dir_entry *pde; /* proc dir entry */ 63 struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
66 68
67static LIST_HEAD(clusterip_configs); 69static LIST_HEAD(clusterip_configs);
68 70
69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 71/* clusterip_lock protects the clusterip_configs list */
70 * data within all structurses (num_local_nodes, local_nodes[]) */
71static DEFINE_RWLOCK(clusterip_lock); 72static DEFINE_RWLOCK(clusterip_lock);
72 73
73#ifdef CONFIG_PROC_FS 74#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
76#endif 77#endif
77 78
78static inline void 79static inline void
79clusterip_config_get(struct clusterip_config *c) { 80clusterip_config_get(struct clusterip_config *c)
81{
80 atomic_inc(&c->refcount); 82 atomic_inc(&c->refcount);
81} 83}
82 84
83static inline void 85static inline void
84clusterip_config_put(struct clusterip_config *c) { 86clusterip_config_put(struct clusterip_config *c)
85 if (atomic_dec_and_test(&c->refcount)) { 87{
88 if (atomic_dec_and_test(&c->refcount))
89 kfree(c);
90}
91
92/* increase the count of entries(rules) using/referencing this config */
93static inline void
94clusterip_config_entry_get(struct clusterip_config *c)
95{
96 atomic_inc(&c->entries);
97}
98
99/* decrease the count of entries using/referencing this config. If last
100 * entry(rule) is removed, remove the config from lists, but don't free it
101 * yet, since proc-files could still be holding references */
102static inline void
103clusterip_config_entry_put(struct clusterip_config *c)
104{
105 if (atomic_dec_and_test(&c->entries)) {
86 write_lock_bh(&clusterip_lock); 106 write_lock_bh(&clusterip_lock);
87 list_del(&c->list); 107 list_del(&c->list);
88 write_unlock_bh(&clusterip_lock); 108 write_unlock_bh(&clusterip_lock);
109
89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 110 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
90 dev_put(c->dev); 111 dev_put(c->dev);
91 kfree(c); 112
113 /* In case anyone still accesses the file, the open/close
114 * functions are also incrementing the refcount on their own,
115 * so it's safe to remove the entry even if it's in use. */
116#ifdef CONFIG_PROC_FS
117 remove_proc_entry(c->pde->name, c->pde->parent);
118#endif
92 } 119 }
93} 120}
94 121
95
96static struct clusterip_config * 122static struct clusterip_config *
97__clusterip_config_find(u_int32_t clusterip) 123__clusterip_config_find(u_int32_t clusterip)
98{ 124{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
111} 137}
112 138
113static inline struct clusterip_config * 139static inline struct clusterip_config *
114clusterip_config_find_get(u_int32_t clusterip) 140clusterip_config_find_get(u_int32_t clusterip, int entry)
115{ 141{
116 struct clusterip_config *c; 142 struct clusterip_config *c;
117 143
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
122 return NULL; 148 return NULL;
123 } 149 }
124 atomic_inc(&c->refcount); 150 atomic_inc(&c->refcount);
151 if (entry)
152 atomic_inc(&c->entries);
125 read_unlock_bh(&clusterip_lock); 153 read_unlock_bh(&clusterip_lock);
126 154
127 return c; 155 return c;
128} 156}
129 157
158static void
159clusterip_config_init_nodelist(struct clusterip_config *c,
160 const struct ipt_clusterip_tgt_info *i)
161{
162 int n;
163
164 for (n = 0; n < i->num_local_nodes; n++) {
165 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
166 }
167}
168
130static struct clusterip_config * 169static struct clusterip_config *
131clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, 170clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
132 struct net_device *dev) 171 struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
143 c->clusterip = ip; 182 c->clusterip = ip;
144 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 183 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
145 c->num_total_nodes = i->num_total_nodes; 184 c->num_total_nodes = i->num_total_nodes;
146 c->num_local_nodes = i->num_local_nodes; 185 clusterip_config_init_nodelist(c, i);
147 memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
148 c->hash_mode = i->hash_mode; 186 c->hash_mode = i->hash_mode;
149 c->hash_initval = i->hash_initval; 187 c->hash_initval = i->hash_initval;
150 atomic_set(&c->refcount, 1); 188 atomic_set(&c->refcount, 1);
189 atomic_set(&c->entries, 1);
151 190
152#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
153 /* create proc dir entry */ 192 /* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
171static int 210static int
172clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 211clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
173{ 212{
174 int i;
175
176 write_lock_bh(&clusterip_lock);
177 213
178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 214 if (nodenum == 0 ||
179 || nodenum > CLUSTERIP_MAX_NODES) { 215 nodenum > c->num_total_nodes)
180 write_unlock_bh(&clusterip_lock);
181 return 1; 216 return 1;
182 }
183
184 /* check if we alrady have this number in our array */
185 for (i = 0; i < c->num_local_nodes; i++) {
186 if (c->local_nodes[i] == nodenum) {
187 write_unlock_bh(&clusterip_lock);
188 return 1;
189 }
190 }
191 217
192 c->local_nodes[c->num_local_nodes++] = nodenum; 218 /* check if we already have this number in our bitfield */
219 if (test_and_set_bit(nodenum - 1, &c->local_nodes))
220 return 1;
193 221
194 write_unlock_bh(&clusterip_lock);
195 return 0; 222 return 0;
196} 223}
197 224
198static int 225static int
199clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 226clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
200{ 227{
201 int i; 228 if (nodenum == 0 ||
202 229 nodenum > c->num_total_nodes)
203 write_lock_bh(&clusterip_lock);
204
205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
206 write_unlock_bh(&clusterip_lock);
207 return 1; 230 return 1;
208 }
209 231
210 for (i = 0; i < c->num_local_nodes; i++) { 232 if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
211 if (c->local_nodes[i] == nodenum) { 233 return 0;
212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
214 c->num_local_nodes--;
215 write_unlock_bh(&clusterip_lock);
216 return 0;
217 }
218 }
219 234
220 write_unlock_bh(&clusterip_lock);
221 return 1; 235 return 1;
222} 236}
223 237
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
285static inline int 299static inline int
286clusterip_responsible(struct clusterip_config *config, u_int32_t hash) 300clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
287{ 301{
288 int i; 302 return test_bit(hash - 1, &config->local_nodes);
289
290 read_lock_bh(&clusterip_lock);
291
292 if (config->num_local_nodes == 0) {
293 read_unlock_bh(&clusterip_lock);
294 return 0;
295 }
296
297 for (i = 0; i < config->num_local_nodes; i++) {
298 if (config->local_nodes[i] == hash) {
299 read_unlock_bh(&clusterip_lock);
300 return 1;
301 }
302 }
303
304 read_unlock_bh(&clusterip_lock);
305
306 return 0;
307} 303}
308 304
309/*********************************************************************** 305/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
415 411
416 /* FIXME: further sanity checks */ 412 /* FIXME: further sanity checks */
417 413
418 config = clusterip_config_find_get(e->ip.dst.s_addr); 414 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
419 if (!config) { 415 if (config) {
416 if (cipinfo->config != NULL) {
417 /* Case A: This is an entry that gets reloaded, since
418 * it still has a cipinfo->config pointer. Simply
419 * increase the entry refcount and return */
420 if (cipinfo->config != config) {
421 printk(KERN_ERR "CLUSTERIP: Reloaded entry "
422 "has invalid config pointer!\n");
423 return 0;
424 }
425 clusterip_config_entry_get(cipinfo->config);
426 } else {
427 /* Case B: This is a new rule referring to an existing
428 * clusterip config. */
429 cipinfo->config = config;
430 clusterip_config_entry_get(cipinfo->config);
431 }
432 } else {
433 /* Case C: This is a completely new clusterip config */
420 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 434 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
421 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); 435 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
422 return 0; 436 return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
443 } 457 }
444 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 458 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
445 } 459 }
460 cipinfo->config = config;
446 } 461 }
447 462
448 cipinfo->config = config;
449
450 return 1; 463 return 1;
451} 464}
452 465
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
455{ 468{
456 struct ipt_clusterip_tgt_info *cipinfo = matchinfo; 469 struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
457 470
458 /* we first remove the proc entry and then drop the reference 471 /* if no more entries are referencing the config, remove it
459 * count. In case anyone still accesses the file, the open/close 472 * from the list and destroy the proc entry */
460 * functions are also incrementing the refcount on their own */ 473 clusterip_config_entry_put(cipinfo->config);
461#ifdef CONFIG_PROC_FS 474
462 remove_proc_entry(cipinfo->config->pde->name,
463 cipinfo->config->pde->parent);
464#endif
465 clusterip_config_put(cipinfo->config); 475 clusterip_config_put(cipinfo->config);
466} 476}
467 477
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
533 543
534 /* if there is no clusterip configuration for the arp reply's 544 /* if there is no clusterip configuration for the arp reply's
535 * source ip, we don't want to mangle it */ 545 * source ip, we don't want to mangle it */
536 c = clusterip_config_find_get(payload->src_ip); 546 c = clusterip_config_find_get(payload->src_ip, 0);
537 if (!c) 547 if (!c)
538 return NF_ACCEPT; 548 return NF_ACCEPT;
539 549
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
574 584
575#ifdef CONFIG_PROC_FS 585#ifdef CONFIG_PROC_FS
576 586
587struct clusterip_seq_position {
588 unsigned int pos; /* position */
589 unsigned int weight; /* number of bits set == size */
590 unsigned int bit; /* current bit */
591 unsigned long val; /* current value */
592};
593
577static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 594static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578{ 595{
579 struct proc_dir_entry *pde = s->private; 596 struct proc_dir_entry *pde = s->private;
580 struct clusterip_config *c = pde->data; 597 struct clusterip_config *c = pde->data;
581 unsigned int *nodeidx; 598 unsigned int weight;
582 599 u_int32_t local_nodes;
583 read_lock_bh(&clusterip_lock); 600 struct clusterip_seq_position *idx;
584 if (*pos >= c->num_local_nodes) 601
602 /* FIXME: possible race */
603 local_nodes = c->local_nodes;
604 weight = hweight32(local_nodes);
605 if (*pos >= weight)
585 return NULL; 606 return NULL;
586 607
587 nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); 608 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
588 if (!nodeidx) 609 if (!idx)
589 return ERR_PTR(-ENOMEM); 610 return ERR_PTR(-ENOMEM);
590 611
591 *nodeidx = *pos; 612 idx->pos = *pos;
592 return nodeidx; 613 idx->weight = weight;
614 idx->bit = ffs(local_nodes);
615 idx->val = local_nodes;
616 clear_bit(idx->bit - 1, &idx->val);
617
618 return idx;
593} 619}
594 620
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 621static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 622{
597 struct proc_dir_entry *pde = s->private; 623 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
598 struct clusterip_config *c = pde->data;
599 unsigned int *nodeidx = (unsigned int *)v;
600 624
601 *pos = ++(*nodeidx); 625 *pos = ++idx->pos;
602 if (*pos >= c->num_local_nodes) { 626 if (*pos >= idx->weight) {
603 kfree(v); 627 kfree(v);
604 return NULL; 628 return NULL;
605 } 629 }
606 return nodeidx; 630 idx->bit = ffs(idx->val);
631 clear_bit(idx->bit - 1, &idx->val);
632 return idx;
607} 633}
608 634
609static void clusterip_seq_stop(struct seq_file *s, void *v) 635static void clusterip_seq_stop(struct seq_file *s, void *v)
610{ 636{
611 kfree(v); 637 kfree(v);
612
613 read_unlock_bh(&clusterip_lock);
614} 638}
615 639
616static int clusterip_seq_show(struct seq_file *s, void *v) 640static int clusterip_seq_show(struct seq_file *s, void *v)
617{ 641{
618 struct proc_dir_entry *pde = s->private; 642 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
619 struct clusterip_config *c = pde->data;
620 unsigned int *nodeidx = (unsigned int *)v;
621 643
622 if (*nodeidx != 0) 644 if (idx->pos != 0)
623 seq_putc(s, ','); 645 seq_putc(s, ',');
624 seq_printf(s, "%u", c->local_nodes[*nodeidx]);
625 646
626 if (*nodeidx == c->num_local_nodes-1) 647 seq_printf(s, "%u", idx->bit);
648
649 if (idx->pos == idx->weight - 1)
627 seq_putc(s, '\n'); 650 seq_putc(s, '\n');
628 651
629 return 0; 652 return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 304bb0a1d4f0..4b0d7e4d6269 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
361 361
362 if (type && code) { 362 if (type && code) {
363 get_user(fl->fl_icmp_type, type); 363 get_user(fl->fl_icmp_type, type);
364 __get_user(fl->fl_icmp_code, code); 364 get_user(fl->fl_icmp_code, code);
365 probed = 1; 365 probed = 1;
366 } 366 }
367 break; 367 break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 29222b964951..a7537c7bbd06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,14 +979,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
979 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 979 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
980 break; 980 break;
981 981
982 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
983 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
984
982 pcount = tcp_skb_pcount(skb); 985 pcount = tcp_skb_pcount(skb);
983 986
984 if (pcount > 1 && 987 if (pcount > 1 && !in_sack &&
985 (after(start_seq, TCP_SKB_CB(skb)->seq) || 988 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
986 before(end_seq, TCP_SKB_CB(skb)->end_seq))) {
987 unsigned int pkt_len; 989 unsigned int pkt_len;
988 990
989 if (after(start_seq, TCP_SKB_CB(skb)->seq)) 991 in_sack = !after(start_seq,
992 TCP_SKB_CB(skb)->seq);
993
994 if (!in_sack)
990 pkt_len = (start_seq - 995 pkt_len = (start_seq -
991 TCP_SKB_CB(skb)->seq); 996 TCP_SKB_CB(skb)->seq);
992 else 997 else
@@ -999,9 +1004,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
999 1004
1000 fack_count += pcount; 1005 fack_count += pcount;
1001 1006
1002 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1003 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1004
1005 sacked = TCP_SKB_CB(skb)->sacked; 1007 sacked = TCP_SKB_CB(skb)->sacked;
1006 1008
1007 /* Account D-SACK for retransmitted packet. */ 1009 /* Account D-SACK for retransmitted packet. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af7..b1a63b2c6b4a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
384 newtp->frto_counter = 0; 384 newtp->frto_counter = 0;
385 newtp->frto_highmark = 0; 385 newtp->frto_highmark = 0;
386 386
387 newicsk->icsk_ca_ops = &tcp_reno; 387 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
388 388
389 tcp_set_ca_state(newsk, TCP_CA_Open); 389 tcp_set_ca_state(newsk, TCP_CA_Open);
390 tcp_init_xmit_timers(newsk); 390 tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c10e4435e3b1..5dd6dd7d091e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -435,6 +435,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
435 int nsize, old_factor; 435 int nsize, old_factor;
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len >= skb->len);
439
438 nsize = skb_headlen(skb) - len; 440 nsize = skb_headlen(skb) - len;
439 if (nsize < 0) 441 if (nsize < 0)
440 nsize = 0; 442 nsize = 0;
@@ -459,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
459 flags = TCP_SKB_CB(skb)->flags; 461 flags = TCP_SKB_CB(skb)->flags;
460 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 462 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
461 TCP_SKB_CB(buff)->flags = flags; 463 TCP_SKB_CB(buff)->flags = flags;
462 TCP_SKB_CB(buff)->sacked = 464 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
463 (TCP_SKB_CB(skb)->sacked &
464 (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
465 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; 465 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
466 466
467 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { 467 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -499,6 +499,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
499 tcp_skb_pcount(buff); 499 tcp_skb_pcount(buff);
500 500
501 tp->packets_out -= diff; 501 tp->packets_out -= diff;
502
503 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
504 tp->sacked_out -= diff;
505 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
506 tp->retrans_out -= diff;
507
502 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { 508 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
503 tp->lost_out -= diff; 509 tp->lost_out -= diff;
504 tp->left_out -= diff; 510 tp->left_out -= diff;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 29fed6e58d0a..519899fb11d5 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
1968 } 1968 }
1969 pmc->mca_sources = NULL; 1969 pmc->mca_sources = NULL;
1970 pmc->mca_sfmode = MCAST_EXCLUDE; 1970 pmc->mca_sfmode = MCAST_EXCLUDE;
1971 pmc->mca_sfcount[MCAST_EXCLUDE] = 0; 1971 pmc->mca_sfcount[MCAST_INCLUDE] = 0;
1972 pmc->mca_sfcount[MCAST_EXCLUDE] = 1; 1972 pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
1973} 1973}
1974 1974
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1cb8adb2787f..2da514b16d95 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1955,6 +1955,57 @@ static void __exit fini(void)
1955#endif 1955#endif
1956} 1956}
1957 1957
1958/*
1959 * find specified header up to transport protocol header.
1960 * If found target header, the offset to the header is set to *offset
1961 * and return 0. otherwise, return -1.
1962 *
1963 * Notes: - non-1st Fragment Header isn't skipped.
1964 * - ESP header isn't skipped.
1965 * - The target header may be trancated.
1966 */
1967int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
1968{
1969 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
1970 u8 nexthdr = skb->nh.ipv6h->nexthdr;
1971 unsigned int len = skb->len - start;
1972
1973 while (nexthdr != target) {
1974 struct ipv6_opt_hdr _hdr, *hp;
1975 unsigned int hdrlen;
1976
1977 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
1978 return -1;
1979 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
1980 if (hp == NULL)
1981 return -1;
1982 if (nexthdr == NEXTHDR_FRAGMENT) {
1983 unsigned short _frag_off, *fp;
1984 fp = skb_header_pointer(skb,
1985 start+offsetof(struct frag_hdr,
1986 frag_off),
1987 sizeof(_frag_off),
1988 &_frag_off);
1989 if (fp == NULL)
1990 return -1;
1991
1992 if (ntohs(*fp) & ~0x7)
1993 return -1;
1994 hdrlen = 8;
1995 } else if (nexthdr == NEXTHDR_AUTH)
1996 hdrlen = (hp->hdrlen + 2) << 2;
1997 else
1998 hdrlen = ipv6_optlen(hp);
1999
2000 nexthdr = hp->nexthdr;
2001 len -= hdrlen;
2002 start += hdrlen;
2003 }
2004
2005 *offset = start;
2006 return 0;
2007}
2008
1958EXPORT_SYMBOL(ip6t_register_table); 2009EXPORT_SYMBOL(ip6t_register_table);
1959EXPORT_SYMBOL(ip6t_unregister_table); 2010EXPORT_SYMBOL(ip6t_unregister_table);
1960EXPORT_SYMBOL(ip6t_do_table); 2011EXPORT_SYMBOL(ip6t_do_table);
@@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match);
1963EXPORT_SYMBOL(ip6t_register_target); 2014EXPORT_SYMBOL(ip6t_register_target);
1964EXPORT_SYMBOL(ip6t_unregister_target); 2015EXPORT_SYMBOL(ip6t_unregister_target);
1965EXPORT_SYMBOL(ip6t_ext_hdr); 2016EXPORT_SYMBOL(ip6t_ext_hdr);
2017EXPORT_SYMBOL(ipv6_find_hdr);
1966 2018
1967module_init(init); 2019module_init(init);
1968module_exit(fini); 2020module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index d5b94f142bba..dde37793d20b 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,92 +48,21 @@ match(const struct sk_buff *skb,
48 unsigned int protoff, 48 unsigned int protoff,
49 int *hotdrop) 49 int *hotdrop)
50{ 50{
51 struct ip_auth_hdr *ah = NULL, _ah; 51 struct ip_auth_hdr *ah, _ah;
52 const struct ip6t_ah *ahinfo = matchinfo; 52 const struct ip6t_ah *ahinfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr; 53 unsigned int ptr;
57 unsigned int hdrlen = 0; 54 unsigned int hdrlen = 0;
58 55
59 /*DEBUGP("IPv6 AH entered\n");*/ 56 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
60 /* if (opt->auth == 0) return 0;
61 * It does not filled on output */
62
63 /* type of the 1st exthdr */
64 nexthdr = skb->nh.ipv6h->nexthdr;
65 /* pointer to the 1st exthdr */
66 ptr = sizeof(struct ipv6hdr);
67 /* available length */
68 len = skb->len - ptr;
69 temp = 0;
70
71 while (ip6t_ext_hdr(nexthdr)) {
72 struct ipv6_opt_hdr _hdr, *hp;
73
74 DEBUGP("ipv6_ah header iteration \n");
75
76 /* Is there enough space for the next ext header? */
77 if (len < sizeof(struct ipv6_opt_hdr))
78 return 0;
79 /* No more exthdr -> evaluate */
80 if (nexthdr == NEXTHDR_NONE)
81 break;
82 /* ESP -> evaluate */
83 if (nexthdr == NEXTHDR_ESP)
84 break;
85
86 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
87 BUG_ON(hp == NULL);
88
89 /* Calculate the header length */
90 if (nexthdr == NEXTHDR_FRAGMENT)
91 hdrlen = 8;
92 else if (nexthdr == NEXTHDR_AUTH)
93 hdrlen = (hp->hdrlen+2)<<2;
94 else
95 hdrlen = ipv6_optlen(hp);
96
97 /* AH -> evaluate */
98 if (nexthdr == NEXTHDR_AUTH) {
99 temp |= MASK_AH;
100 break;
101 }
102
103
104 /* set the flag */
105 switch (nexthdr) {
106 case NEXTHDR_HOP:
107 case NEXTHDR_ROUTING:
108 case NEXTHDR_FRAGMENT:
109 case NEXTHDR_AUTH:
110 case NEXTHDR_DEST:
111 break;
112 default:
113 DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
114 return 0;
115 }
116
117 nexthdr = hp->nexthdr;
118 len -= hdrlen;
119 ptr += hdrlen;
120 if (ptr > skb->len) {
121 DEBUGP("ipv6_ah: new pointer too large! \n");
122 break;
123 }
124 }
125
126 /* AH header not found */
127 if (temp != MASK_AH)
128 return 0; 57 return 0;
129 58
130 if (len < sizeof(struct ip_auth_hdr)){ 59 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
60 if (ah == NULL) {
131 *hotdrop = 1; 61 *hotdrop = 1;
132 return 0; 62 return 0;
133 } 63 }
134 64
135 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); 65 hdrlen = (ah->hdrlen + 2) << 2;
136 BUG_ON(ah == NULL);
137 66
138 DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); 67 DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
139 DEBUGP("RES %04X ", ah->reserved); 68 DEBUGP("RES %04X ", ah->reserved);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index 540925e4a7a8..c450a635e54b 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
63 struct ipv6_opt_hdr _optsh, *oh; 63 struct ipv6_opt_hdr _optsh, *oh;
64 const struct ip6t_opts *optinfo = matchinfo; 64 const struct ip6t_opts *optinfo = matchinfo;
65 unsigned int temp; 65 unsigned int temp;
66 unsigned int len;
67 u8 nexthdr;
68 unsigned int ptr; 66 unsigned int ptr;
69 unsigned int hdrlen = 0; 67 unsigned int hdrlen = 0;
70 unsigned int ret = 0; 68 unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
72 u8 _optlen, *lp = NULL; 70 u8 _optlen, *lp = NULL;
73 unsigned int optlen; 71 unsigned int optlen;
74 72
75 /* type of the 1st exthdr */
76 nexthdr = skb->nh.ipv6h->nexthdr;
77 /* pointer to the 1st exthdr */
78 ptr = sizeof(struct ipv6hdr);
79 /* available length */
80 len = skb->len - ptr;
81 temp = 0;
82
83 while (ip6t_ext_hdr(nexthdr)) {
84 struct ipv6_opt_hdr _hdr, *hp;
85
86 DEBUGP("ipv6_opts header iteration \n");
87
88 /* Is there enough space for the next ext header? */
89 if (len < (int)sizeof(struct ipv6_opt_hdr))
90 return 0;
91 /* No more exthdr -> evaluate */
92 if (nexthdr == NEXTHDR_NONE) {
93 break;
94 }
95 /* ESP -> evaluate */
96 if (nexthdr == NEXTHDR_ESP) {
97 break;
98 }
99
100 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
101 BUG_ON(hp == NULL);
102
103 /* Calculate the header length */
104 if (nexthdr == NEXTHDR_FRAGMENT) {
105 hdrlen = 8;
106 } else if (nexthdr == NEXTHDR_AUTH)
107 hdrlen = (hp->hdrlen+2)<<2;
108 else
109 hdrlen = ipv6_optlen(hp);
110
111 /* OPTS -> evaluate */
112#if HOPBYHOP 73#if HOPBYHOP
113 if (nexthdr == NEXTHDR_HOP) { 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
114 temp |= MASK_HOPOPTS;
115#else 75#else
116 if (nexthdr == NEXTHDR_DEST) { 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
117 temp |= MASK_DSTOPTS;
118#endif 77#endif
119 break; 78 return 0;
120 }
121
122 79
123 /* set the flag */ 80 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
124 switch (nexthdr){ 81 if (oh == NULL){
125 case NEXTHDR_HOP:
126 case NEXTHDR_ROUTING:
127 case NEXTHDR_FRAGMENT:
128 case NEXTHDR_AUTH:
129 case NEXTHDR_DEST:
130 break;
131 default:
132 DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
133 return 0;
134 break;
135 }
136
137 nexthdr = hp->nexthdr;
138 len -= hdrlen;
139 ptr += hdrlen;
140 if ( ptr > skb->len ) {
141 DEBUGP("ipv6_opts: new pointer is too large! \n");
142 break;
143 }
144 }
145
146 /* OPTIONS header not found */
147#if HOPBYHOP
148 if ( temp != MASK_HOPOPTS ) return 0;
149#else
150 if ( temp != MASK_DSTOPTS ) return 0;
151#endif
152
153 if (len < (int)sizeof(struct ipv6_opt_hdr)){
154 *hotdrop = 1; 82 *hotdrop = 1;
155 return 0; 83 return 0;
156 } 84 }
157 85
158 if (len < hdrlen){ 86 hdrlen = ipv6_optlen(oh);
87 if (skb->len - ptr < hdrlen){
159 /* Packet smaller than it's length field */ 88 /* Packet smaller than it's length field */
160 return 0; 89 return 0;
161 } 90 }
162 91
163 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
164 BUG_ON(oh == NULL);
165
166 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); 92 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
167 93
168 DEBUGP("len %02X %04X %02X ", 94 DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index e39dd236fd8e..24bc0cde43a1 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -48,87 +48,22 @@ match(const struct sk_buff *skb,
48 unsigned int protoff, 48 unsigned int protoff,
49 int *hotdrop) 49 int *hotdrop)
50{ 50{
51 struct ip_esp_hdr _esp, *eh = NULL; 51 struct ip_esp_hdr _esp, *eh;
52 const struct ip6t_esp *espinfo = matchinfo; 52 const struct ip6t_esp *espinfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr; 53 unsigned int ptr;
57 54
58 /* Make sure this isn't an evil packet */ 55 /* Make sure this isn't an evil packet */
59 /*DEBUGP("ipv6_esp entered \n");*/ 56 /*DEBUGP("ipv6_esp entered \n");*/
60 57
61 /* type of the 1st exthdr */ 58 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
62 nexthdr = skb->nh.ipv6h->nexthdr;
63 /* pointer to the 1st exthdr */
64 ptr = sizeof(struct ipv6hdr);
65 /* available length */
66 len = skb->len - ptr;
67 temp = 0;
68
69 while (ip6t_ext_hdr(nexthdr)) {
70 struct ipv6_opt_hdr _hdr, *hp;
71 int hdrlen;
72
73 DEBUGP("ipv6_esp header iteration \n");
74
75 /* Is there enough space for the next ext header? */
76 if (len < sizeof(struct ipv6_opt_hdr))
77 return 0;
78 /* No more exthdr -> evaluate */
79 if (nexthdr == NEXTHDR_NONE)
80 break;
81 /* ESP -> evaluate */
82 if (nexthdr == NEXTHDR_ESP) {
83 temp |= MASK_ESP;
84 break;
85 }
86
87 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
88 BUG_ON(hp == NULL);
89
90 /* Calculate the header length */
91 if (nexthdr == NEXTHDR_FRAGMENT)
92 hdrlen = 8;
93 else if (nexthdr == NEXTHDR_AUTH)
94 hdrlen = (hp->hdrlen+2)<<2;
95 else
96 hdrlen = ipv6_optlen(hp);
97
98 /* set the flag */
99 switch (nexthdr) {
100 case NEXTHDR_HOP:
101 case NEXTHDR_ROUTING:
102 case NEXTHDR_FRAGMENT:
103 case NEXTHDR_AUTH:
104 case NEXTHDR_DEST:
105 break;
106 default:
107 DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
108 return 0;
109 }
110
111 nexthdr = hp->nexthdr;
112 len -= hdrlen;
113 ptr += hdrlen;
114 if (ptr > skb->len) {
115 DEBUGP("ipv6_esp: new pointer too large! \n");
116 break;
117 }
118 }
119
120 /* ESP header not found */
121 if (temp != MASK_ESP)
122 return 0; 59 return 0;
123 60
124 if (len < sizeof(struct ip_esp_hdr)) { 61 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
62 if (eh == NULL) {
125 *hotdrop = 1; 63 *hotdrop = 1;
126 return 0; 64 return 0;
127 } 65 }
128 66
129 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
130 BUG_ON(eh == NULL);
131
132 DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); 67 DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
133 68
134 return (eh != NULL) 69 return (eh != NULL)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 4bfa30a9bc80..085d5f8eea29 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -48,90 +48,18 @@ match(const struct sk_buff *skb,
48 unsigned int protoff, 48 unsigned int protoff,
49 int *hotdrop) 49 int *hotdrop)
50{ 50{
51 struct frag_hdr _frag, *fh = NULL; 51 struct frag_hdr _frag, *fh;
52 const struct ip6t_frag *fraginfo = matchinfo; 52 const struct ip6t_frag *fraginfo = matchinfo;
53 unsigned int temp;
54 int len;
55 u8 nexthdr;
56 unsigned int ptr; 53 unsigned int ptr;
57 unsigned int hdrlen = 0;
58
59 /* type of the 1st exthdr */
60 nexthdr = skb->nh.ipv6h->nexthdr;
61 /* pointer to the 1st exthdr */
62 ptr = sizeof(struct ipv6hdr);
63 /* available length */
64 len = skb->len - ptr;
65 temp = 0;
66
67 while (ip6t_ext_hdr(nexthdr)) {
68 struct ipv6_opt_hdr _hdr, *hp;
69
70 DEBUGP("ipv6_frag header iteration \n");
71
72 /* Is there enough space for the next ext header? */
73 if (len < (int)sizeof(struct ipv6_opt_hdr))
74 return 0;
75 /* No more exthdr -> evaluate */
76 if (nexthdr == NEXTHDR_NONE) {
77 break;
78 }
79 /* ESP -> evaluate */
80 if (nexthdr == NEXTHDR_ESP) {
81 break;
82 }
83
84 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
85 BUG_ON(hp == NULL);
86
87 /* Calculate the header length */
88 if (nexthdr == NEXTHDR_FRAGMENT) {
89 hdrlen = 8;
90 } else if (nexthdr == NEXTHDR_AUTH)
91 hdrlen = (hp->hdrlen+2)<<2;
92 else
93 hdrlen = ipv6_optlen(hp);
94
95 /* FRAG -> evaluate */
96 if (nexthdr == NEXTHDR_FRAGMENT) {
97 temp |= MASK_FRAGMENT;
98 break;
99 }
100
101
102 /* set the flag */
103 switch (nexthdr){
104 case NEXTHDR_HOP:
105 case NEXTHDR_ROUTING:
106 case NEXTHDR_FRAGMENT:
107 case NEXTHDR_AUTH:
108 case NEXTHDR_DEST:
109 break;
110 default:
111 DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
112 return 0;
113 break;
114 }
115
116 nexthdr = hp->nexthdr;
117 len -= hdrlen;
118 ptr += hdrlen;
119 if ( ptr > skb->len ) {
120 DEBUGP("ipv6_frag: new pointer too large! \n");
121 break;
122 }
123 }
124
125 /* FRAG header not found */
126 if ( temp != MASK_FRAGMENT ) return 0;
127
128 if (len < sizeof(struct frag_hdr)){
129 *hotdrop = 1;
130 return 0;
131 }
132 54
133 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); 55 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
134 BUG_ON(fh == NULL); 56 return 0;
57
58 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
59 if (fh == NULL){
60 *hotdrop = 1;
61 return 0;
62 }
135 63
136 DEBUGP("INFO %04X ", fh->frag_off); 64 DEBUGP("INFO %04X ", fh->frag_off);
137 DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); 65 DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 27f3650d127e..1d09485111d0 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
63 struct ipv6_opt_hdr _optsh, *oh; 63 struct ipv6_opt_hdr _optsh, *oh;
64 const struct ip6t_opts *optinfo = matchinfo; 64 const struct ip6t_opts *optinfo = matchinfo;
65 unsigned int temp; 65 unsigned int temp;
66 unsigned int len;
67 u8 nexthdr;
68 unsigned int ptr; 66 unsigned int ptr;
69 unsigned int hdrlen = 0; 67 unsigned int hdrlen = 0;
70 unsigned int ret = 0; 68 unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
72 u8 _optlen, *lp = NULL; 70 u8 _optlen, *lp = NULL;
73 unsigned int optlen; 71 unsigned int optlen;
74 72
75 /* type of the 1st exthdr */
76 nexthdr = skb->nh.ipv6h->nexthdr;
77 /* pointer to the 1st exthdr */
78 ptr = sizeof(struct ipv6hdr);
79 /* available length */
80 len = skb->len - ptr;
81 temp = 0;
82
83 while (ip6t_ext_hdr(nexthdr)) {
84 struct ipv6_opt_hdr _hdr, *hp;
85
86 DEBUGP("ipv6_opts header iteration \n");
87
88 /* Is there enough space for the next ext header? */
89 if (len < (int)sizeof(struct ipv6_opt_hdr))
90 return 0;
91 /* No more exthdr -> evaluate */
92 if (nexthdr == NEXTHDR_NONE) {
93 break;
94 }
95 /* ESP -> evaluate */
96 if (nexthdr == NEXTHDR_ESP) {
97 break;
98 }
99
100 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
101 BUG_ON(hp == NULL);
102
103 /* Calculate the header length */
104 if (nexthdr == NEXTHDR_FRAGMENT) {
105 hdrlen = 8;
106 } else if (nexthdr == NEXTHDR_AUTH)
107 hdrlen = (hp->hdrlen+2)<<2;
108 else
109 hdrlen = ipv6_optlen(hp);
110
111 /* OPTS -> evaluate */
112#if HOPBYHOP 73#if HOPBYHOP
113 if (nexthdr == NEXTHDR_HOP) { 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
114 temp |= MASK_HOPOPTS;
115#else 75#else
116 if (nexthdr == NEXTHDR_DEST) { 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
117 temp |= MASK_DSTOPTS;
118#endif 77#endif
119 break; 78 return 0;
120 }
121
122 79
123 /* set the flag */ 80 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
124 switch (nexthdr){ 81 if (oh == NULL){
125 case NEXTHDR_HOP:
126 case NEXTHDR_ROUTING:
127 case NEXTHDR_FRAGMENT:
128 case NEXTHDR_AUTH:
129 case NEXTHDR_DEST:
130 break;
131 default:
132 DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
133 return 0;
134 break;
135 }
136
137 nexthdr = hp->nexthdr;
138 len -= hdrlen;
139 ptr += hdrlen;
140 if ( ptr > skb->len ) {
141 DEBUGP("ipv6_opts: new pointer is too large! \n");
142 break;
143 }
144 }
145
146 /* OPTIONS header not found */
147#if HOPBYHOP
148 if ( temp != MASK_HOPOPTS ) return 0;
149#else
150 if ( temp != MASK_DSTOPTS ) return 0;
151#endif
152
153 if (len < (int)sizeof(struct ipv6_opt_hdr)){
154 *hotdrop = 1; 82 *hotdrop = 1;
155 return 0; 83 return 0;
156 } 84 }
157 85
158 if (len < hdrlen){ 86 hdrlen = ipv6_optlen(oh);
87 if (skb->len - ptr < hdrlen){
159 /* Packet smaller than it's length field */ 88 /* Packet smaller than it's length field */
160 return 0; 89 return 0;
161 } 90 }
162 91
163 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
164 BUG_ON(oh == NULL);
165
166 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); 92 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
167 93
168 DEBUGP("len %02X %04X %02X ", 94 DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2bb670037df3..beb2fd5cebbb 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -50,98 +50,29 @@ match(const struct sk_buff *skb,
50 unsigned int protoff, 50 unsigned int protoff,
51 int *hotdrop) 51 int *hotdrop)
52{ 52{
53 struct ipv6_rt_hdr _route, *rh = NULL; 53 struct ipv6_rt_hdr _route, *rh;
54 const struct ip6t_rt *rtinfo = matchinfo; 54 const struct ip6t_rt *rtinfo = matchinfo;
55 unsigned int temp; 55 unsigned int temp;
56 unsigned int len;
57 u8 nexthdr;
58 unsigned int ptr; 56 unsigned int ptr;
59 unsigned int hdrlen = 0; 57 unsigned int hdrlen = 0;
60 unsigned int ret = 0; 58 unsigned int ret = 0;
61 struct in6_addr *ap, _addr; 59 struct in6_addr *ap, _addr;
62 60
63 /* type of the 1st exthdr */ 61 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
64 nexthdr = skb->nh.ipv6h->nexthdr; 62 return 0;
65 /* pointer to the 1st exthdr */
66 ptr = sizeof(struct ipv6hdr);
67 /* available length */
68 len = skb->len - ptr;
69 temp = 0;
70 63
71 while (ip6t_ext_hdr(nexthdr)) { 64 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
72 struct ipv6_opt_hdr _hdr, *hp; 65 if (rh == NULL){
73
74 DEBUGP("ipv6_rt header iteration \n");
75
76 /* Is there enough space for the next ext header? */
77 if (len < (int)sizeof(struct ipv6_opt_hdr))
78 return 0;
79 /* No more exthdr -> evaluate */
80 if (nexthdr == NEXTHDR_NONE) {
81 break;
82 }
83 /* ESP -> evaluate */
84 if (nexthdr == NEXTHDR_ESP) {
85 break;
86 }
87
88 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
89 BUG_ON(hp == NULL);
90
91 /* Calculate the header length */
92 if (nexthdr == NEXTHDR_FRAGMENT) {
93 hdrlen = 8;
94 } else if (nexthdr == NEXTHDR_AUTH)
95 hdrlen = (hp->hdrlen+2)<<2;
96 else
97 hdrlen = ipv6_optlen(hp);
98
99 /* ROUTING -> evaluate */
100 if (nexthdr == NEXTHDR_ROUTING) {
101 temp |= MASK_ROUTING;
102 break;
103 }
104
105
106 /* set the flag */
107 switch (nexthdr){
108 case NEXTHDR_HOP:
109 case NEXTHDR_ROUTING:
110 case NEXTHDR_FRAGMENT:
111 case NEXTHDR_AUTH:
112 case NEXTHDR_DEST:
113 break;
114 default:
115 DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
116 return 0;
117 break;
118 }
119
120 nexthdr = hp->nexthdr;
121 len -= hdrlen;
122 ptr += hdrlen;
123 if ( ptr > skb->len ) {
124 DEBUGP("ipv6_rt: new pointer is too large! \n");
125 break;
126 }
127 }
128
129 /* ROUTING header not found */
130 if ( temp != MASK_ROUTING ) return 0;
131
132 if (len < (int)sizeof(struct ipv6_rt_hdr)){
133 *hotdrop = 1; 66 *hotdrop = 1;
134 return 0; 67 return 0;
135 } 68 }
136 69
137 if (len < hdrlen){ 70 hdrlen = ipv6_optlen(rh);
71 if (skb->len - ptr < hdrlen){
138 /* Pcket smaller than its length field */ 72 /* Pcket smaller than its length field */
139 return 0; 73 return 0;
140 } 74 }
141 75
142 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
143 BUG_ON(rh == NULL);
144
145 DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); 76 DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
146 DEBUGP("TYPE %04X ", rh->type); 77 DEBUGP("TYPE %04X ", rh->type);
147 DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); 78 DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5aa3691c578d..a1265a320b11 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
627 627
628 if (type && code) { 628 if (type && code) {
629 get_user(fl->fl_icmp_type, type); 629 get_user(fl->fl_icmp_type, type);
630 __get_user(fl->fl_icmp_code, code); 630 get_user(fl->fl_icmp_code, code);
631 probed = 1; 631 probed = 1;
632 } 632 }
633 break; 633 break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 69b146843a20..6001948600f3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,9 +405,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
405 continue; 405 continue;
406 406
407 if (!ipv6_addr_any(&np->rcv_saddr)) { 407 if (!ipv6_addr_any(&np->rcv_saddr)) {
408 if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) 408 if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
409 return s; 409 continue;
410 continue;
411 } 410 }
412 if(!inet6_mc_check(s, loc_addr, rmt_addr)) 411 if(!inet6_mc_check(s, loc_addr, rmt_addr))
413 continue; 412 continue;
@@ -640,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
640 int tclass = -1; 639 int tclass = -1;
641 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 640 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
642 int err; 641 int err;
642 int connected = 0;
643 643
644 /* destination address check */ 644 /* destination address check */
645 if (sin6) { 645 if (sin6) {
@@ -749,6 +749,7 @@ do_udp_sendmsg:
749 fl->fl_ip_dport = inet->dport; 749 fl->fl_ip_dport = inet->dport;
750 daddr = &np->daddr; 750 daddr = &np->daddr;
751 fl->fl6_flowlabel = np->flow_label; 751 fl->fl6_flowlabel = np->flow_label;
752 connected = 1;
752 } 753 }
753 754
754 if (!fl->oif) 755 if (!fl->oif)
@@ -771,6 +772,7 @@ do_udp_sendmsg:
771 } 772 }
772 if (!(opt->opt_nflen|opt->opt_flen)) 773 if (!(opt->opt_nflen|opt->opt_flen))
773 opt = NULL; 774 opt = NULL;
775 connected = 0;
774 } 776 }
775 if (opt == NULL) 777 if (opt == NULL)
776 opt = np->opt; 778 opt = np->opt;
@@ -788,10 +790,13 @@ do_udp_sendmsg:
788 ipv6_addr_copy(&final, &fl->fl6_dst); 790 ipv6_addr_copy(&final, &fl->fl6_dst);
789 ipv6_addr_copy(&fl->fl6_dst, rt0->addr); 791 ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
790 final_p = &final; 792 final_p = &final;
793 connected = 0;
791 } 794 }
792 795
793 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) 796 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
794 fl->oif = np->mcast_oif; 797 fl->oif = np->mcast_oif;
798 connected = 0;
799 }
795 800
796 err = ip6_dst_lookup(sk, &dst, fl); 801 err = ip6_dst_lookup(sk, &dst, fl);
797 if (err) 802 if (err)
@@ -847,7 +852,7 @@ do_append_data:
847 else if (!corkreq) 852 else if (!corkreq)
848 err = udp_v6_push_pending_frames(sk, up); 853 err = udp_v6_push_pending_frames(sk, up);
849 854
850 if (dst) 855 if (dst && connected)
851 ip6_dst_store(sk, dst, 856 ip6_dst_store(sk, dst,
852 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? 857 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
853 &np->daddr : NULL); 858 &np->daddr : NULL);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8690f171c1ef..ee865d88183b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -36,6 +36,11 @@
36 * Michal Ostrowski : Module initialization cleanup. 36 * Michal Ostrowski : Module initialization cleanup.
37 * Ulises Alonso : Frame number limit removal and 37 * Ulises Alonso : Frame number limit removal and
38 * packet_set_ring memory leak. 38 * packet_set_ring memory leak.
39 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
42 * byte arrays at the end of sockaddr_ll
43 * and packet_mreq.
39 * 44 *
40 * This program is free software; you can redistribute it and/or 45 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License 46 * modify it under the terms of the GNU General Public License
@@ -161,7 +166,17 @@ struct packet_mclist
161 int count; 166 int count;
162 unsigned short type; 167 unsigned short type;
163 unsigned short alen; 168 unsigned short alen;
164 unsigned char addr[8]; 169 unsigned char addr[MAX_ADDR_LEN];
170};
171/* identical to struct packet_mreq except it has
172 * a longer address field.
173 */
174struct packet_mreq_max
175{
176 int mr_ifindex;
177 unsigned short mr_type;
178 unsigned short mr_alen;
179 unsigned char mr_address[MAX_ADDR_LEN];
165}; 180};
166#endif 181#endif
167#ifdef CONFIG_PACKET_MMAP 182#ifdef CONFIG_PACKET_MMAP
@@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
716 err = -EINVAL; 731 err = -EINVAL;
717 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 732 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
718 goto out; 733 goto out;
734 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
735 goto out;
719 ifindex = saddr->sll_ifindex; 736 ifindex = saddr->sll_ifindex;
720 proto = saddr->sll_protocol; 737 proto = saddr->sll_protocol;
721 addr = saddr->sll_addr; 738 addr = saddr->sll_addr;
@@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
744 if (dev->hard_header) { 761 if (dev->hard_header) {
745 int res; 762 int res;
746 err = -EINVAL; 763 err = -EINVAL;
764 if (saddr) {
765 if (saddr->sll_halen != dev->addr_len)
766 goto out_free;
767 if (saddr->sll_hatype != dev->type)
768 goto out_free;
769 }
747 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); 770 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
748 if (sock->type != SOCK_DGRAM) { 771 if (sock->type != SOCK_DGRAM) {
749 skb->tail = skb->data; 772 skb->tail = skb->data;
@@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1045 struct sock *sk = sock->sk; 1068 struct sock *sk = sock->sk;
1046 struct sk_buff *skb; 1069 struct sk_buff *skb;
1047 int copied, err; 1070 int copied, err;
1071 struct sockaddr_ll *sll;
1048 1072
1049 err = -EINVAL; 1073 err = -EINVAL;
1050 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) 1074 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1057#endif 1081#endif
1058 1082
1059 /* 1083 /*
1060 * If the address length field is there to be filled in, we fill
1061 * it in now.
1062 */
1063
1064 if (sock->type == SOCK_PACKET)
1065 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1066 else
1067 msg->msg_namelen = sizeof(struct sockaddr_ll);
1068
1069 /*
1070 * Call the generic datagram receiver. This handles all sorts 1084 * Call the generic datagram receiver. This handles all sorts
1071 * of horrible races and re-entrancy so we can forget about it 1085 * of horrible races and re-entrancy so we can forget about it
1072 * in the protocol layers. 1086 * in the protocol layers.
@@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1087 goto out; 1101 goto out;
1088 1102
1089 /* 1103 /*
1104 * If the address length field is there to be filled in, we fill
1105 * it in now.
1106 */
1107
1108 sll = (struct sockaddr_ll*)skb->cb;
1109 if (sock->type == SOCK_PACKET)
1110 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1111 else
1112 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1113
1114 /*
1090 * You lose any data beyond the buffer you gave. If it worries a 1115 * You lose any data beyond the buffer you gave. If it worries a
1091 * user program they can ask the device for its MTU anyway. 1116 * user program they can ask the device for its MTU anyway.
1092 */ 1117 */
@@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1166 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ 1191 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1167 sll->sll_halen = 0; 1192 sll->sll_halen = 0;
1168 } 1193 }
1169 *uaddr_len = sizeof(*sll); 1194 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1170 1195
1171 return 0; 1196 return 0;
1172} 1197}
@@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i
1199 } 1224 }
1200} 1225}
1201 1226
1202static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) 1227static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1203{ 1228{
1204 struct packet_sock *po = pkt_sk(sk); 1229 struct packet_sock *po = pkt_sk(sk);
1205 struct packet_mclist *ml, *i; 1230 struct packet_mclist *ml, *i;
@@ -1249,7 +1274,7 @@ done:
1249 return err; 1274 return err;
1250} 1275}
1251 1276
1252static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) 1277static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1253{ 1278{
1254 struct packet_mclist *ml, **mlp; 1279 struct packet_mclist *ml, **mlp;
1255 1280
@@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1315 case PACKET_ADD_MEMBERSHIP: 1340 case PACKET_ADD_MEMBERSHIP:
1316 case PACKET_DROP_MEMBERSHIP: 1341 case PACKET_DROP_MEMBERSHIP:
1317 { 1342 {
1318 struct packet_mreq mreq; 1343 struct packet_mreq_max mreq;
1319 if (optlen<sizeof(mreq)) 1344 int len = optlen;
1345 memset(&mreq, 0, sizeof(mreq));
1346 if (len < sizeof(struct packet_mreq))
1320 return -EINVAL; 1347 return -EINVAL;
1321 if (copy_from_user(&mreq,optval,sizeof(mreq))) 1348 if (len > sizeof(mreq))
1349 len = sizeof(mreq);
1350 if (copy_from_user(&mreq,optval,len))
1322 return -EFAULT; 1351 return -EFAULT;
1352 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1353 return -EINVAL;
1323 if (optname == PACKET_ADD_MEMBERSHIP) 1354 if (optname == PACKET_ADD_MEMBERSHIP)
1324 ret = packet_mc_add(sk, &mreq); 1355 ret = packet_mc_add(sk, &mreq);
1325 else 1356 else
diff --git a/net/socket.c b/net/socket.c
index c699e93c33d7..f9264472377f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1862 if (err < 0) 1862 if (err < 0)
1863 goto out_freeiov; 1863 goto out_freeiov;
1864 } 1864 }
1865 err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); 1865 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1866 COMPAT_FLAGS(msg));
1866 if (err) 1867 if (err)
1867 goto out_freeiov; 1868 goto out_freeiov;
1868 if (MSG_CMSG_COMPAT & flags) 1869 if (MSG_CMSG_COMPAT & flags)