diff options
author | Jeff Garzik <jgarzik@pobox.com> | 2005-09-21 22:34:08 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-09-21 22:34:08 -0400 |
commit | a3536c839f04682ed06c84a7f75968c27c6108c8 (patch) | |
tree | 92c26ea74c0ffb9b83a2285ad2539cc271b09856 /net | |
parent | a33a1982012e9070736e3717231714dc9892303b (diff) | |
parent | efb0372bbaf5b829ff8c39db372779928af542a7 (diff) |
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'net')
48 files changed, 3037 insertions, 1335 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 145f5cde96cf..b74864889670 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c | |||
@@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, | |||
120 | unsigned short vid; | 120 | unsigned short vid; |
121 | struct net_device_stats *stats; | 121 | struct net_device_stats *stats; |
122 | unsigned short vlan_TCI; | 122 | unsigned short vlan_TCI; |
123 | unsigned short proto; | 123 | __be16 proto; |
124 | 124 | ||
125 | /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ | 125 | /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ |
126 | vlan_TCI = ntohs(vhdr->h_vlan_TCI); | 126 | vlan_TCI = ntohs(vhdr->h_vlan_TCI); |
diff --git a/net/Kconfig b/net/Kconfig index 2bdd5623fdd5..60f6f321bd76 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER | |||
140 | 140 | ||
141 | If unsure, say N. | 141 | If unsure, say N. |
142 | 142 | ||
143 | source "net/netfilter/Kconfig" | ||
143 | source "net/ipv4/netfilter/Kconfig" | 144 | source "net/ipv4/netfilter/Kconfig" |
144 | source "net/ipv6/netfilter/Kconfig" | 145 | source "net/ipv6/netfilter/Kconfig" |
145 | source "net/decnet/netfilter/Kconfig" | 146 | source "net/decnet/netfilter/Kconfig" |
@@ -206,8 +207,6 @@ config NET_PKTGEN | |||
206 | To compile this code as a module, choose M here: the | 207 | To compile this code as a module, choose M here: the |
207 | module will be called pktgen. | 208 | module will be called pktgen. |
208 | 209 | ||
209 | source "net/netfilter/Kconfig" | ||
210 | |||
211 | endmenu | 210 | endmenu |
212 | 211 | ||
213 | endmenu | 212 | endmenu |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2d52fee63a8c..d8e36b775125 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) | |||
214 | .tos = RT_TOS(iph->tos)} }, .proto = 0}; | 214 | .tos = RT_TOS(iph->tos)} }, .proto = 0}; |
215 | 215 | ||
216 | if (!ip_route_output_key(&rt, &fl)) { | 216 | if (!ip_route_output_key(&rt, &fl)) { |
217 | /* Bridged-and-DNAT'ed traffic doesn't | 217 | /* - Bridged-and-DNAT'ed traffic doesn't |
218 | * require ip_forwarding. */ | 218 | * require ip_forwarding. |
219 | if (((struct dst_entry *)rt)->dev == dev) { | 219 | * - Deal with redirected traffic. */ |
220 | if (((struct dst_entry *)rt)->dev == dev || | ||
221 | rt->rt_type == RTN_LOCAL) { | ||
220 | skb->dst = (struct dst_entry *)rt; | 222 | skb->dst = (struct dst_entry *)rt; |
221 | goto bridged_dnat; | 223 | goto bridged_dnat; |
222 | } | 224 | } |
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index fb97bb042455..344a8da153fc 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o | |||
3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ | 3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ |
4 | timer.o | 4 | timer.o |
5 | 5 | ||
6 | dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o | ||
7 | |||
6 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o | 8 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o |
7 | 9 | ||
8 | dccp_diag-y := diag.o | 10 | dccp_diag-y := diag.o |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c new file mode 100644 index 000000000000..6530283eafca --- /dev/null +++ b/net/dccp/ackvec.c | |||
@@ -0,0 +1,419 @@ | |||
1 | /* | ||
2 | * net/dccp/ackvec.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License as published by the | ||
9 | * Free Software Foundation; version 2 of the License; | ||
10 | */ | ||
11 | |||
12 | #include "ackvec.h" | ||
13 | #include "dccp.h" | ||
14 | |||
15 | #include <linux/dccp.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | |||
18 | #include <net/sock.h> | ||
19 | |||
20 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | ||
21 | { | ||
22 | struct dccp_sock *dp = dccp_sk(sk); | ||
23 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
24 | int len = av->dccpav_vec_len + 2; | ||
25 | struct timeval now; | ||
26 | u32 elapsed_time; | ||
27 | unsigned char *to, *from; | ||
28 | |||
29 | dccp_timestamp(sk, &now); | ||
30 | elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; | ||
31 | |||
32 | if (elapsed_time != 0) | ||
33 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
34 | |||
35 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
36 | return -1; | ||
37 | |||
38 | /* | ||
39 | * XXX: now we have just one ack vector sent record, so | ||
40 | * we have to wait for it to be cleared. | ||
41 | * | ||
42 | * Of course this is not acceptable, but this is just for | ||
43 | * basic testing now. | ||
44 | */ | ||
45 | if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
46 | return -1; | ||
47 | |||
48 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
49 | |||
50 | to = skb_push(skb, len); | ||
51 | *to++ = DCCPO_ACK_VECTOR_0; | ||
52 | *to++ = len; | ||
53 | |||
54 | len = av->dccpav_vec_len; | ||
55 | from = av->dccpav_buf + av->dccpav_buf_head; | ||
56 | |||
57 | /* Check if buf_head wraps */ | ||
58 | if (av->dccpav_buf_head + len > av->dccpav_vec_len) { | ||
59 | const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); | ||
60 | |||
61 | memcpy(to, from, tailsize); | ||
62 | to += tailsize; | ||
63 | len -= tailsize; | ||
64 | from = av->dccpav_buf; | ||
65 | } | ||
66 | |||
67 | memcpy(to, from, len); | ||
68 | /* | ||
69 | * From draft-ietf-dccp-spec-11.txt: | ||
70 | * | ||
71 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
72 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
73 | * sequence number it used for the ack packet; ack_ptr will equal | ||
74 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
75 | * equal buf_nonce. | ||
76 | * | ||
77 | * This implemention uses just one ack record for now. | ||
78 | */ | ||
79 | av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
80 | av->dccpav_ack_ptr = av->dccpav_buf_head; | ||
81 | av->dccpav_ack_ackno = av->dccpav_buf_ackno; | ||
82 | av->dccpav_ack_nonce = av->dccpav_buf_nonce; | ||
83 | av->dccpav_sent_len = av->dccpav_vec_len; | ||
84 | |||
85 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
86 | "ack_ackno=%llu\n", | ||
87 | debug_prefix, av->dccpav_sent_len, | ||
88 | (unsigned long long)av->dccpav_ack_seqno, | ||
89 | (unsigned long long)av->dccpav_ack_ackno); | ||
90 | return -1; | ||
91 | } | ||
92 | |||
93 | struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, | ||
94 | const unsigned int __nocast priority) | ||
95 | { | ||
96 | struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); | ||
97 | |||
98 | if (av != NULL) { | ||
99 | av->dccpav_buf_len = len; | ||
100 | av->dccpav_buf_head = | ||
101 | av->dccpav_buf_tail = av->dccpav_buf_len - 1; | ||
102 | av->dccpav_buf_ackno = | ||
103 | av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; | ||
104 | av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; | ||
105 | av->dccpav_ack_ptr = 0; | ||
106 | av->dccpav_time.tv_sec = 0; | ||
107 | av->dccpav_time.tv_usec = 0; | ||
108 | av->dccpav_sent_len = av->dccpav_vec_len = 0; | ||
109 | } | ||
110 | |||
111 | return av; | ||
112 | } | ||
113 | |||
114 | void dccp_ackvec_free(struct dccp_ackvec *av) | ||
115 | { | ||
116 | kfree(av); | ||
117 | } | ||
118 | |||
119 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | ||
120 | const unsigned int index) | ||
121 | { | ||
122 | return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; | ||
123 | } | ||
124 | |||
125 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | ||
126 | const unsigned int index) | ||
127 | { | ||
128 | return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
133 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
134 | * this simplifies table updates if one of the missing packets arrives. | ||
135 | */ | ||
136 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | ||
137 | const unsigned int packets, | ||
138 | const unsigned char state) | ||
139 | { | ||
140 | unsigned int gap; | ||
141 | signed long new_head; | ||
142 | |||
143 | if (av->dccpav_vec_len + packets > av->dccpav_buf_len) | ||
144 | return -ENOBUFS; | ||
145 | |||
146 | gap = packets - 1; | ||
147 | new_head = av->dccpav_buf_head - packets; | ||
148 | |||
149 | if (new_head < 0) { | ||
150 | if (gap > 0) { | ||
151 | memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, | ||
152 | gap + new_head + 1); | ||
153 | gap = -new_head; | ||
154 | } | ||
155 | new_head += av->dccpav_buf_len; | ||
156 | } | ||
157 | |||
158 | av->dccpav_buf_head = new_head; | ||
159 | |||
160 | if (gap > 0) | ||
161 | memset(av->dccpav_buf + av->dccpav_buf_head + 1, | ||
162 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); | ||
163 | |||
164 | av->dccpav_buf[av->dccpav_buf_head] = state; | ||
165 | av->dccpav_vec_len += packets; | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
171 | */ | ||
172 | int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
173 | const u64 ackno, const u8 state) | ||
174 | { | ||
175 | /* | ||
176 | * Check at the right places if the buffer is full, if it is, tell the | ||
177 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
178 | * vectors, when we will free up space in dccpav_buf. | ||
179 | * | ||
180 | * We may well decide to do buffer compression, etc, but for now lets | ||
181 | * just drop. | ||
182 | * | ||
183 | * From Appendix A: | ||
184 | * | ||
185 | * Of course, the circular buffer may overflow, either when the | ||
186 | * HC-Sender is sending data at a very high rate, when the | ||
187 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
188 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
189 | * (so the HC-Receiver is unable to clean up old state). In this | ||
190 | * case, the HC-Receiver should either compress the buffer (by | ||
191 | * increasing run lengths when possible), transfer its state to | ||
192 | * a larger buffer, or, as a last resort, drop all received | ||
193 | * packets, without processing them whatsoever, until its buffer | ||
194 | * shrinks again. | ||
195 | */ | ||
196 | |||
197 | /* See if this is the first ackno being inserted */ | ||
198 | if (av->dccpav_vec_len == 0) { | ||
199 | av->dccpav_buf[av->dccpav_buf_head] = state; | ||
200 | av->dccpav_vec_len = 1; | ||
201 | } else if (after48(ackno, av->dccpav_buf_ackno)) { | ||
202 | const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, | ||
203 | ackno); | ||
204 | |||
205 | /* | ||
206 | * Look if the state of this packet is the same as the | ||
207 | * previous ackno and if so if we can bump the head len. | ||
208 | */ | ||
209 | if (delta == 1 && | ||
210 | dccp_ackvec_state(av, av->dccpav_buf_head) == state && | ||
211 | (dccp_ackvec_len(av, av->dccpav_buf_head) < | ||
212 | DCCP_ACKVEC_LEN_MASK)) | ||
213 | av->dccpav_buf[av->dccpav_buf_head]++; | ||
214 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | ||
215 | return -ENOBUFS; | ||
216 | } else { | ||
217 | /* | ||
218 | * A.1.2. Old Packets | ||
219 | * | ||
220 | * When a packet with Sequence Number S arrives, and | ||
221 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
222 | * for the byte corresponding to S. (Indexing structures | ||
223 | * could reduce the complexity of this scan.) | ||
224 | */ | ||
225 | u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); | ||
226 | unsigned int index = av->dccpav_buf_head; | ||
227 | |||
228 | while (1) { | ||
229 | const u8 len = dccp_ackvec_len(av, index); | ||
230 | const u8 state = dccp_ackvec_state(av, index); | ||
231 | /* | ||
232 | * valid packets not yet in dccpav_buf have a reserved | ||
233 | * entry, with a len equal to 0. | ||
234 | */ | ||
235 | if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && | ||
236 | len == 0 && delta == 0) { /* Found our | ||
237 | reserved seat! */ | ||
238 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
239 | (unsigned long long)ackno); | ||
240 | av->dccpav_buf[index] = state; | ||
241 | goto out; | ||
242 | } | ||
243 | /* len == 0 means one packet */ | ||
244 | if (delta < len + 1) | ||
245 | goto out_duplicate; | ||
246 | |||
247 | delta -= len + 1; | ||
248 | if (++index == av->dccpav_buf_len) | ||
249 | index = 0; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | av->dccpav_buf_ackno = ackno; | ||
254 | dccp_timestamp(sk, &av->dccpav_time); | ||
255 | out: | ||
256 | dccp_pr_debug(""); | ||
257 | return 0; | ||
258 | |||
259 | out_duplicate: | ||
260 | /* Duplicate packet */ | ||
261 | dccp_pr_debug("Received a dup or already considered lost " | ||
262 | "packet: %llu\n", (unsigned long long)ackno); | ||
263 | return -EILSEQ; | ||
264 | } | ||
265 | |||
266 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
267 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) | ||
268 | { | ||
269 | if (!dccp_debug) | ||
270 | return; | ||
271 | |||
272 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
273 | (unsigned long long)ackno); | ||
274 | |||
275 | while (len--) { | ||
276 | const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; | ||
277 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
278 | |||
279 | printk("%d,%d|", state, rl); | ||
280 | ++vector; | ||
281 | } | ||
282 | |||
283 | printk("\n"); | ||
284 | } | ||
285 | |||
286 | void dccp_ackvec_print(const struct dccp_ackvec *av) | ||
287 | { | ||
288 | dccp_ackvector_print(av->dccpav_buf_ackno, | ||
289 | av->dccpav_buf + av->dccpav_buf_head, | ||
290 | av->dccpav_vec_len); | ||
291 | } | ||
292 | #endif | ||
293 | |||
294 | static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) | ||
295 | { | ||
296 | /* | ||
297 | * As we're keeping track of the ack vector size (dccpav_vec_len) and | ||
298 | * the sent ack vector size (dccpav_sent_len) we don't need | ||
299 | * dccpav_buf_tail at all, but keep this code here as in the future | ||
300 | * we'll implement a vector of ack records, as suggested in | ||
301 | * draft-ietf-dccp-spec-11.txt Appendix A. -acme | ||
302 | */ | ||
303 | #if 0 | ||
304 | av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; | ||
305 | if (av->dccpav_buf_tail >= av->dccpav_vec_len) | ||
306 | av->dccpav_buf_tail -= av->dccpav_vec_len; | ||
307 | #endif | ||
308 | av->dccpav_vec_len -= av->dccpav_sent_len; | ||
309 | } | ||
310 | |||
311 | void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | ||
312 | const u64 ackno) | ||
313 | { | ||
314 | /* Check if we actually sent an ACK vector */ | ||
315 | if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
316 | return; | ||
317 | |||
318 | if (ackno == av->dccpav_ack_seqno) { | ||
319 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
320 | struct dccp_sock *dp = dccp_sk(sk); | ||
321 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
322 | "CLIENT rx ack: " : "server rx ack: "; | ||
323 | #endif | ||
324 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
325 | "ack_ackno=%llu, ACKED!\n", | ||
326 | debug_prefix, 1, | ||
327 | (unsigned long long)av->dccpav_ack_seqno, | ||
328 | (unsigned long long)av->dccpav_ack_ackno); | ||
329 | dccp_ackvec_trow_away_ack_record(av); | ||
330 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | ||
335 | struct sock *sk, u64 ackno, | ||
336 | const unsigned char len, | ||
337 | const unsigned char *vector) | ||
338 | { | ||
339 | unsigned char i; | ||
340 | |||
341 | /* Check if we actually sent an ACK vector */ | ||
342 | if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
343 | return; | ||
344 | /* | ||
345 | * We're in the receiver half connection, so if the received an ACK | ||
346 | * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're | ||
347 | * not interested. | ||
348 | * | ||
349 | * Extra explanation with example: | ||
350 | * | ||
351 | * if we received an ACK vector with ackno 50, it can only be acking | ||
352 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
353 | */ | ||
354 | /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ | ||
355 | if (before48(ackno, av->dccpav_ack_seqno)) { | ||
356 | /* dccp_pr_debug_cat("yes\n"); */ | ||
357 | return; | ||
358 | } | ||
359 | /* dccp_pr_debug_cat("no\n"); */ | ||
360 | |||
361 | i = len; | ||
362 | while (i--) { | ||
363 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
364 | u64 ackno_end_rl; | ||
365 | |||
366 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
367 | |||
368 | /* | ||
369 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
370 | * av->dccpav_ack_seqno, ackno); | ||
371 | */ | ||
372 | if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { | ||
373 | const u8 state = (*vector & | ||
374 | DCCP_ACKVEC_STATE_MASK) >> 6; | ||
375 | /* dccp_pr_debug_cat("yes\n"); */ | ||
376 | |||
377 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | ||
378 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
379 | struct dccp_sock *dp = dccp_sk(sk); | ||
380 | const char *debug_prefix = | ||
381 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
382 | "CLIENT rx ack: " : "server rx ack: "; | ||
383 | #endif | ||
384 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
385 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
386 | "ACKED!\n", | ||
387 | debug_prefix, len, | ||
388 | (unsigned long long) | ||
389 | av->dccpav_ack_seqno, | ||
390 | (unsigned long long) | ||
391 | av->dccpav_ack_ackno); | ||
392 | dccp_ackvec_trow_away_ack_record(av); | ||
393 | } | ||
394 | /* | ||
395 | * If dccpav_ack_seqno was not received, no problem | ||
396 | * we'll send another ACK vector. | ||
397 | */ | ||
398 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
399 | break; | ||
400 | } | ||
401 | /* dccp_pr_debug_cat("no\n"); */ | ||
402 | |||
403 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
404 | ++vector; | ||
405 | } | ||
406 | } | ||
407 | |||
408 | int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
409 | const u8 opt, const u8 *value, const u8 len) | ||
410 | { | ||
411 | if (len > DCCP_MAX_ACKVEC_LEN) | ||
412 | return -1; | ||
413 | |||
414 | /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ | ||
415 | dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, | ||
416 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
417 | len, value); | ||
418 | return 0; | ||
419 | } | ||
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h new file mode 100644 index 000000000000..8ca51c9191f7 --- /dev/null +++ b/net/dccp/ackvec.h | |||
@@ -0,0 +1,133 @@ | |||
1 | #ifndef _ACKVEC_H | ||
2 | #define _ACKVEC_H | ||
3 | /* | ||
4 | * net/dccp/ackvec.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/compiler.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/types.h> | ||
18 | |||
19 | /* Read about the ECN nonce to see why it is 253 */ | ||
20 | #define DCCP_MAX_ACKVEC_LEN 253 | ||
21 | |||
22 | #define DCCP_ACKVEC_STATE_RECEIVED 0 | ||
23 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) | ||
24 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) | ||
25 | |||
26 | #define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ | ||
27 | #define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ | ||
28 | |||
29 | /** struct dccp_ackvec - ack vector | ||
30 | * | ||
31 | * This data structure is the one defined in the DCCP draft | ||
32 | * Appendix A. | ||
33 | * | ||
34 | * @dccpav_buf_head - circular buffer head | ||
35 | * @dccpav_buf_tail - circular buffer tail | ||
36 | * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
37 | * buffer (i.e. %dccpav_buf_head) | ||
38 | * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
39 | * by the buffer with State 0 | ||
40 | * | ||
41 | * Additionally, the HC-Receiver must keep some information about the | ||
42 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
43 | * Ack Vector, it remembers four variables: | ||
44 | * | ||
45 | * @dccpav_ack_seqno - the Sequence Number used for the packet | ||
46 | * (HC-Receiver seqno) | ||
47 | * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
48 | * @dccpav_ack_ackno - the Acknowledgement Number used for the packet | ||
49 | * (HC-Sender seqno) | ||
50 | * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
51 | * | ||
52 | * @dccpav_buf_len - circular buffer length | ||
53 | * @dccpav_time - the time in usecs | ||
54 | * @dccpav_buf - circular buffer of acknowledgeable packets | ||
55 | */ | ||
56 | struct dccp_ackvec { | ||
57 | unsigned int dccpav_buf_head; | ||
58 | unsigned int dccpav_buf_tail; | ||
59 | u64 dccpav_buf_ackno; | ||
60 | u64 dccpav_ack_seqno; | ||
61 | u64 dccpav_ack_ackno; | ||
62 | unsigned int dccpav_ack_ptr; | ||
63 | unsigned int dccpav_sent_len; | ||
64 | unsigned int dccpav_vec_len; | ||
65 | unsigned int dccpav_buf_len; | ||
66 | struct timeval dccpav_time; | ||
67 | u8 dccpav_buf_nonce; | ||
68 | u8 dccpav_ack_nonce; | ||
69 | u8 dccpav_buf[0]; | ||
70 | }; | ||
71 | |||
72 | struct sock; | ||
73 | struct sk_buff; | ||
74 | |||
75 | #ifdef CONFIG_IP_DCCP_ACKVEC | ||
76 | extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, | ||
77 | const unsigned int __nocast priority); | ||
78 | extern void dccp_ackvec_free(struct dccp_ackvec *av); | ||
79 | |||
80 | extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
81 | const u64 ackno, const u8 state); | ||
82 | |||
83 | extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | ||
84 | struct sock *sk, const u64 ackno); | ||
85 | extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
86 | const u8 opt, const u8 *value, const u8 len); | ||
87 | |||
88 | extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | ||
89 | |||
90 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
91 | { | ||
92 | return av->dccpav_sent_len != av->dccpav_vec_len; | ||
93 | } | ||
94 | #else /* CONFIG_IP_DCCP_ACKVEC */ | ||
95 | static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, | ||
96 | const unsigned int __nocast priority) | ||
97 | { | ||
98 | return NULL; | ||
99 | } | ||
100 | |||
101 | static inline void dccp_ackvec_free(struct dccp_ackvec *av) | ||
102 | { | ||
103 | } | ||
104 | |||
105 | static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
106 | const u64 ackno, const u8 state) | ||
107 | { | ||
108 | return -1; | ||
109 | } | ||
110 | |||
111 | static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | ||
112 | struct sock *sk, const u64 ackno) | ||
113 | { | ||
114 | } | ||
115 | |||
116 | static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
117 | const u8 opt, const u8 *value, const u8 len) | ||
118 | { | ||
119 | return -1; | ||
120 | } | ||
121 | |||
122 | static inline int dccp_insert_option_ackvec(const struct sock *sk, | ||
123 | const struct sk_buff *skb) | ||
124 | { | ||
125 | return -1; | ||
126 | } | ||
127 | |||
128 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
129 | { | ||
130 | return 0; | ||
131 | } | ||
132 | #endif /* CONFIG_IP_DCCP_ACKVEC */ | ||
133 | #endif /* _ACKVEC_H */ | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 962f1e9e2f7e..21e55142dcd3 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
@@ -14,6 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <net/sock.h> | 16 | #include <net/sock.h> |
17 | #include <linux/compiler.h> | ||
17 | #include <linux/dccp.h> | 18 | #include <linux/dccp.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
@@ -54,6 +55,14 @@ struct ccid { | |||
54 | struct tcp_info *info); | 55 | struct tcp_info *info); |
55 | void (*ccid_hc_tx_get_info)(struct sock *sk, | 56 | void (*ccid_hc_tx_get_info)(struct sock *sk, |
56 | struct tcp_info *info); | 57 | struct tcp_info *info); |
58 | int (*ccid_hc_rx_getsockopt)(struct sock *sk, | ||
59 | const int optname, int len, | ||
60 | u32 __user *optval, | ||
61 | int __user *optlen); | ||
62 | int (*ccid_hc_tx_getsockopt)(struct sock *sk, | ||
63 | const int optname, int len, | ||
64 | u32 __user *optval, | ||
65 | int __user *optlen); | ||
57 | }; | 66 | }; |
58 | 67 | ||
59 | extern int ccid_register(struct ccid *ccid); | 68 | extern int ccid_register(struct ccid *ccid); |
@@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, | |||
177 | if (ccid->ccid_hc_tx_get_info != NULL) | 186 | if (ccid->ccid_hc_tx_get_info != NULL) |
178 | ccid->ccid_hc_tx_get_info(sk, info); | 187 | ccid->ccid_hc_tx_get_info(sk, info); |
179 | } | 188 | } |
189 | |||
190 | static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, | ||
191 | const int optname, int len, | ||
192 | u32 __user *optval, int __user *optlen) | ||
193 | { | ||
194 | int rc = -ENOPROTOOPT; | ||
195 | if (ccid->ccid_hc_rx_getsockopt != NULL) | ||
196 | rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, | ||
197 | optval, optlen); | ||
198 | return rc; | ||
199 | } | ||
200 | |||
201 | static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, | ||
202 | const int optname, int len, | ||
203 | u32 __user *optval, int __user *optlen) | ||
204 | { | ||
205 | int rc = -ENOPROTOOPT; | ||
206 | if (ccid->ccid_hc_tx_getsockopt != NULL) | ||
207 | rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, | ||
208 | optval, optlen); | ||
209 | return rc; | ||
210 | } | ||
180 | #endif /* _CCID_H */ | 211 | #endif /* _CCID_H */ |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 38aa84986118..aa68e0ab274d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | |||
1120 | info->tcpi_rtt = hctx->ccid3hctx_rtt; | 1120 | info->tcpi_rtt = hctx->ccid3hctx_rtt; |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | ||
1124 | u32 __user *optval, int __user *optlen) | ||
1125 | { | ||
1126 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | ||
1127 | const void *val; | ||
1128 | |||
1129 | /* Listen socks doesn't have a private CCID block */ | ||
1130 | if (sk->sk_state == DCCP_LISTEN) | ||
1131 | return -EINVAL; | ||
1132 | |||
1133 | switch (optname) { | ||
1134 | case DCCP_SOCKOPT_CCID_RX_INFO: | ||
1135 | if (len < sizeof(hcrx->ccid3hcrx_tfrc)) | ||
1136 | return -EINVAL; | ||
1137 | len = sizeof(hcrx->ccid3hcrx_tfrc); | ||
1138 | val = &hcrx->ccid3hcrx_tfrc; | ||
1139 | break; | ||
1140 | default: | ||
1141 | return -ENOPROTOOPT; | ||
1142 | } | ||
1143 | |||
1144 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | ||
1145 | return -EFAULT; | ||
1146 | |||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | ||
1151 | u32 __user *optval, int __user *optlen) | ||
1152 | { | ||
1153 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | ||
1154 | const void *val; | ||
1155 | |||
1156 | /* Listen socks doesn't have a private CCID block */ | ||
1157 | if (sk->sk_state == DCCP_LISTEN) | ||
1158 | return -EINVAL; | ||
1159 | |||
1160 | switch (optname) { | ||
1161 | case DCCP_SOCKOPT_CCID_TX_INFO: | ||
1162 | if (len < sizeof(hctx->ccid3hctx_tfrc)) | ||
1163 | return -EINVAL; | ||
1164 | len = sizeof(hctx->ccid3hctx_tfrc); | ||
1165 | val = &hctx->ccid3hctx_tfrc; | ||
1166 | break; | ||
1167 | default: | ||
1168 | return -ENOPROTOOPT; | ||
1169 | } | ||
1170 | |||
1171 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | ||
1172 | return -EFAULT; | ||
1173 | |||
1174 | return 0; | ||
1175 | } | ||
1176 | |||
1123 | static struct ccid ccid3 = { | 1177 | static struct ccid ccid3 = { |
1124 | .ccid_id = 3, | 1178 | .ccid_id = 3, |
1125 | .ccid_name = "ccid3", | 1179 | .ccid_name = "ccid3", |
@@ -1139,6 +1193,8 @@ static struct ccid ccid3 = { | |||
1139 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | 1193 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, |
1140 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, | 1194 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, |
1141 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, | 1195 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, |
1196 | .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, | ||
1197 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, | ||
1142 | }; | 1198 | }; |
1143 | 1199 | ||
1144 | module_param(ccid3_debug, int, 0444); | 1200 | module_param(ccid3_debug, int, 0444); |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index eb248778eea3..0bde4583d091 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
41 | #include <linux/time.h> | 41 | #include <linux/time.h> |
42 | #include <linux/types.h> | 42 | #include <linux/types.h> |
43 | #include <linux/tfrc.h> | ||
43 | 44 | ||
44 | #define TFRC_MIN_PACKET_SIZE 16 | 45 | #define TFRC_MIN_PACKET_SIZE 16 |
45 | #define TFRC_STD_PACKET_SIZE 256 | 46 | #define TFRC_STD_PACKET_SIZE 256 |
@@ -93,12 +94,15 @@ struct ccid3_options_received { | |||
93 | * @ccid3hctx_hist - Packet history | 94 | * @ccid3hctx_hist - Packet history |
94 | */ | 95 | */ |
95 | struct ccid3_hc_tx_sock { | 96 | struct ccid3_hc_tx_sock { |
96 | u32 ccid3hctx_x; | 97 | struct tfrc_tx_info ccid3hctx_tfrc; |
97 | u32 ccid3hctx_x_recv; | 98 | #define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x |
98 | u32 ccid3hctx_x_calc; | 99 | #define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv |
100 | #define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc | ||
101 | #define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt | ||
102 | #define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p | ||
103 | #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto | ||
104 | #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi | ||
99 | u16 ccid3hctx_s; | 105 | u16 ccid3hctx_s; |
100 | u32 ccid3hctx_rtt; | ||
101 | u32 ccid3hctx_p; | ||
102 | u8 ccid3hctx_state; | 106 | u8 ccid3hctx_state; |
103 | u8 ccid3hctx_last_win_count; | 107 | u8 ccid3hctx_last_win_count; |
104 | u8 ccid3hctx_idle; | 108 | u8 ccid3hctx_idle; |
@@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock { | |||
106 | struct timer_list ccid3hctx_no_feedback_timer; | 110 | struct timer_list ccid3hctx_no_feedback_timer; |
107 | struct timeval ccid3hctx_t_ld; | 111 | struct timeval ccid3hctx_t_ld; |
108 | struct timeval ccid3hctx_t_nom; | 112 | struct timeval ccid3hctx_t_nom; |
109 | u32 ccid3hctx_t_rto; | ||
110 | u32 ccid3hctx_t_ipi; | ||
111 | u32 ccid3hctx_delta; | 113 | u32 ccid3hctx_delta; |
112 | struct list_head ccid3hctx_hist; | 114 | struct list_head ccid3hctx_hist; |
113 | struct ccid3_options_received ccid3hctx_options_received; | 115 | struct ccid3_options_received ccid3hctx_options_received; |
114 | }; | 116 | }; |
115 | 117 | ||
116 | struct ccid3_hc_rx_sock { | 118 | struct ccid3_hc_rx_sock { |
119 | struct tfrc_rx_info ccid3hcrx_tfrc; | ||
120 | #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv | ||
121 | #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt | ||
122 | #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p | ||
117 | u64 ccid3hcrx_seqno_last_counter:48, | 123 | u64 ccid3hcrx_seqno_last_counter:48, |
118 | ccid3hcrx_state:8, | 124 | ccid3hcrx_state:8, |
119 | ccid3hcrx_last_counter:4; | 125 | ccid3hcrx_last_counter:4; |
120 | u32 ccid3hcrx_rtt; | ||
121 | u32 ccid3hcrx_p; | ||
122 | u32 ccid3hcrx_bytes_recv; | 126 | u32 ccid3hcrx_bytes_recv; |
123 | struct timeval ccid3hcrx_tstamp_last_feedback; | 127 | struct timeval ccid3hcrx_tstamp_last_feedback; |
124 | struct timeval ccid3hcrx_tstamp_last_ack; | 128 | struct timeval ccid3hcrx_tstamp_last_ack; |
@@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock { | |||
127 | u16 ccid3hcrx_s; | 131 | u16 ccid3hcrx_s; |
128 | u32 ccid3hcrx_pinv; | 132 | u32 ccid3hcrx_pinv; |
129 | u32 ccid3hcrx_elapsed_time; | 133 | u32 ccid3hcrx_elapsed_time; |
130 | u32 ccid3hcrx_x_recv; | ||
131 | }; | 134 | }; |
132 | 135 | ||
133 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | 136 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 95c4630b3b18..5871c027f9dc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <net/snmp.h> | 17 | #include <net/snmp.h> |
18 | #include <net/sock.h> | 18 | #include <net/sock.h> |
19 | #include <net/tcp.h> | 19 | #include <net/tcp.h> |
20 | #include "ackvec.h" | ||
20 | 21 | ||
21 | #ifdef CONFIG_IP_DCCP_DEBUG | 22 | #ifdef CONFIG_IP_DCCP_DEBUG |
22 | extern int dccp_debug; | 23 | extern int dccp_debug; |
@@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk, | |||
258 | extern void dccp_send_close(struct sock *sk, const int active); | 259 | extern void dccp_send_close(struct sock *sk, const int active); |
259 | 260 | ||
260 | struct dccp_skb_cb { | 261 | struct dccp_skb_cb { |
261 | __u8 dccpd_type; | 262 | __u8 dccpd_type:4; |
262 | __u8 dccpd_reset_code; | 263 | __u8 dccpd_ccval:4; |
263 | __u8 dccpd_service; | 264 | __u8 dccpd_reset_code; |
264 | __u8 dccpd_ccval; | 265 | __u16 dccpd_opt_len; |
265 | __u64 dccpd_seq; | 266 | __u64 dccpd_seq; |
266 | __u64 dccpd_ack_seq; | 267 | __u64 dccpd_ack_seq; |
267 | int dccpd_opt_len; | ||
268 | }; | 268 | }; |
269 | 269 | ||
270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | 270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) |
@@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) | |||
359 | (dp->dccps_gss - | 359 | (dp->dccps_gss - |
360 | dp->dccps_options.dccpo_sequence_window + 1)); | 360 | dp->dccps_options.dccpo_sequence_window + 1)); |
361 | } | 361 | } |
362 | |||
363 | static inline int dccp_ack_pending(const struct sock *sk) | ||
364 | { | ||
365 | const struct dccp_sock *dp = dccp_sk(sk); | ||
366 | return dp->dccps_timestamp_echo != 0 || | ||
367 | #ifdef CONFIG_IP_DCCP_ACKVEC | ||
368 | (dp->dccps_options.dccpo_send_ack_vector && | ||
369 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || | ||
370 | #endif | ||
371 | inet_csk_ack_scheduled(sk); | ||
372 | } | ||
362 | 373 | ||
363 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | 374 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); |
364 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | 375 | extern void dccp_insert_option_elapsed_time(struct sock *sk, |
@@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | |||
372 | 383 | ||
373 | extern struct socket *dccp_ctl_socket; | 384 | extern struct socket *dccp_ctl_socket; |
374 | 385 | ||
375 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
376 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
377 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
378 | |||
379 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
380 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
381 | |||
382 | /** struct dccp_ackpkts - acknowledgeable packets | ||
383 | * | ||
384 | * This data structure is the one defined in the DCCP draft | ||
385 | * Appendix A. | ||
386 | * | ||
387 | * @dccpap_buf_head - circular buffer head | ||
388 | * @dccpap_buf_tail - circular buffer tail | ||
389 | * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
390 | * buffer (i.e. %dccpap_buf_head) | ||
391 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
392 | * by the buffer with State 0 | ||
393 | * | ||
394 | * Additionally, the HC-Receiver must keep some information about the | ||
395 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
396 | * Ack Vector, it remembers four variables: | ||
397 | * | ||
398 | * @dccpap_ack_seqno - the Sequence Number used for the packet | ||
399 | * (HC-Receiver seqno) | ||
400 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
401 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet | ||
402 | * (HC-Sender seqno) | ||
403 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
404 | * | ||
405 | * @dccpap_buf_len - circular buffer length | ||
406 | * @dccpap_time - the time in usecs | ||
407 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
408 | */ | ||
409 | struct dccp_ackpkts { | ||
410 | unsigned int dccpap_buf_head; | ||
411 | unsigned int dccpap_buf_tail; | ||
412 | u64 dccpap_buf_ackno; | ||
413 | u64 dccpap_ack_seqno; | ||
414 | u64 dccpap_ack_ackno; | ||
415 | unsigned int dccpap_ack_ptr; | ||
416 | unsigned int dccpap_buf_vector_len; | ||
417 | unsigned int dccpap_ack_vector_len; | ||
418 | unsigned int dccpap_buf_len; | ||
419 | struct timeval dccpap_time; | ||
420 | u8 dccpap_buf_nonce; | ||
421 | u8 dccpap_ack_nonce; | ||
422 | u8 dccpap_buf[0]; | ||
423 | }; | ||
424 | |||
425 | extern struct dccp_ackpkts * | ||
426 | dccp_ackpkts_alloc(unsigned int len, | ||
427 | const unsigned int __nocast priority); | ||
428 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
429 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, | ||
430 | u64 ackno, u8 state); | ||
431 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
432 | struct sock *sk, u64 ackno); | ||
433 | |||
434 | extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); | 386 | extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); |
435 | 387 | ||
436 | static inline suseconds_t timeval_usecs(const struct timeval *tv) | 388 | static inline suseconds_t timeval_usecs(const struct timeval *tv) |
@@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv, | |||
471 | } | 423 | } |
472 | } | 424 | } |
473 | 425 | ||
474 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
475 | extern void dccp_ackvector_print(const u64 ackno, | ||
476 | const unsigned char *vector, int len); | ||
477 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
478 | #else | ||
479 | static inline void dccp_ackvector_print(const u64 ackno, | ||
480 | const unsigned char *vector, | ||
481 | int len) { } | ||
482 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
483 | #endif | ||
484 | |||
485 | #endif /* _DCCP_H */ | 426 | #endif /* _DCCP_H */ |
diff --git a/net/dccp/input.c b/net/dccp/input.c index c74034cf7ede..1b6b2cb12376 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <net/sock.h> | 17 | #include <net/sock.h> |
18 | 18 | ||
19 | #include "ackvec.h" | ||
19 | #include "ccid.h" | 20 | #include "ccid.h" |
20 | #include "dccp.h" | 21 | #include "dccp.h" |
21 | 22 | ||
@@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | |||
60 | struct dccp_sock *dp = dccp_sk(sk); | 61 | struct dccp_sock *dp = dccp_sk(sk); |
61 | 62 | ||
62 | if (dp->dccps_options.dccpo_send_ack_vector) | 63 | if (dp->dccps_options.dccpo_send_ack_vector) |
63 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | 64 | dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, |
64 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 65 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
65 | } | 66 | } |
66 | 67 | ||
67 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | 68 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) |
@@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
164 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 165 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
165 | dccp_event_ack_recv(sk, skb); | 166 | dccp_event_ack_recv(sk, skb); |
166 | 167 | ||
167 | /* | 168 | if (dp->dccps_options.dccpo_send_ack_vector && |
168 | * FIXME: check ECN to see if we should use | 169 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, |
169 | * DCCP_ACKPKTS_STATE_ECN_MARKED | 170 | DCCP_SKB_CB(skb)->dccpd_seq, |
170 | */ | 171 | DCCP_ACKVEC_STATE_RECEIVED)) |
171 | if (dp->dccps_options.dccpo_send_ack_vector) { | 172 | goto discard; |
172 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
173 | |||
174 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, | ||
175 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
176 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
177 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " | ||
178 | "packets buffer full!\n"); | ||
179 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
180 | inet_csk_schedule_ack(sk); | ||
181 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
182 | TCP_DELACK_MIN, | ||
183 | DCCP_RTO_MAX); | ||
184 | goto discard; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * FIXME: this activation is probably wrong, have to study more | ||
189 | * TCP delack machinery and how it fits into DCCP draft, but | ||
190 | * for now it kinda "works" 8) | ||
191 | */ | ||
192 | if (!inet_csk_ack_scheduled(sk)) { | ||
193 | inet_csk_schedule_ack(sk); | ||
194 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, | ||
195 | DCCP_RTO_MAX); | ||
196 | } | ||
197 | } | ||
198 | 173 | ||
199 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | 174 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); |
200 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | 175 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); |
@@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
384 | } | 359 | } |
385 | 360 | ||
386 | out_invalid_packet: | 361 | out_invalid_packet: |
387 | return 1; /* dccp_v4_do_rcv will send a reset, but... | 362 | /* dccp_v4_do_rcv will send a reset */ |
388 | FIXME: the reset code should be | 363 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; |
389 | DCCP_RESET_CODE_PACKET_ERROR */ | 364 | return 1; |
390 | } | 365 | } |
391 | 366 | ||
392 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | 367 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, |
@@ -433,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
433 | struct dccp_hdr *dh, unsigned len) | 408 | struct dccp_hdr *dh, unsigned len) |
434 | { | 409 | { |
435 | struct dccp_sock *dp = dccp_sk(sk); | 410 | struct dccp_sock *dp = dccp_sk(sk); |
411 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
436 | const int old_state = sk->sk_state; | 412 | const int old_state = sk->sk_state; |
437 | int queued = 0; | 413 | int queued = 0; |
438 | 414 | ||
@@ -473,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
473 | if (dh->dccph_type == DCCP_PKT_RESET) | 449 | if (dh->dccph_type == DCCP_PKT_RESET) |
474 | goto discard; | 450 | goto discard; |
475 | 451 | ||
476 | /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ | 452 | /* Caller (dccp_v4_do_rcv) will send Reset */ |
453 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
477 | return 1; | 454 | return 1; |
478 | } | 455 | } |
479 | 456 | ||
@@ -487,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
487 | if (dccp_parse_options(sk, skb)) | 464 | if (dccp_parse_options(sk, skb)) |
488 | goto discard; | 465 | goto discard; |
489 | 466 | ||
490 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != | 467 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
491 | DCCP_PKT_WITHOUT_ACK_SEQ) | ||
492 | dccp_event_ack_recv(sk, skb); | 468 | dccp_event_ack_recv(sk, skb); |
493 | 469 | ||
494 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | 470 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); |
495 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | 471 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); |
496 | 472 | ||
497 | /* | 473 | if (dp->dccps_options.dccpo_send_ack_vector && |
498 | * FIXME: check ECN to see if we should use | 474 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, |
499 | * DCCP_ACKPKTS_STATE_ECN_MARKED | 475 | DCCP_SKB_CB(skb)->dccpd_seq, |
500 | */ | 476 | DCCP_ACKVEC_STATE_RECEIVED)) |
501 | if (dp->dccps_options.dccpo_send_ack_vector) { | 477 | goto discard; |
502 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, | ||
503 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
504 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
505 | goto discard; | ||
506 | /* | ||
507 | * FIXME: this activation is probably wrong, have to | ||
508 | * study more TCP delack machinery and how it fits into | ||
509 | * DCCP draft, but for now it kinda "works" 8) | ||
510 | */ | ||
511 | if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == | ||
512 | DCCP_MAX_SEQNO + 1) && | ||
513 | !inet_csk_ack_scheduled(sk)) { | ||
514 | inet_csk_schedule_ack(sk); | ||
515 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
516 | TCP_DELACK_MIN, | ||
517 | DCCP_RTO_MAX); | ||
518 | } | ||
519 | } | ||
520 | } | 478 | } |
521 | 479 | ||
522 | /* | 480 | /* |
@@ -551,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
551 | dh->dccph_type == DCCP_PKT_REQUEST) || | 509 | dh->dccph_type == DCCP_PKT_REQUEST) || |
552 | (sk->sk_state == DCCP_RESPOND && | 510 | (sk->sk_state == DCCP_RESPOND && |
553 | dh->dccph_type == DCCP_PKT_DATA)) { | 511 | dh->dccph_type == DCCP_PKT_DATA)) { |
554 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | 512 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); |
555 | DCCP_PKT_SYNC); | ||
556 | goto discard; | 513 | goto discard; |
557 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { | 514 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { |
558 | dccp_rcv_closereq(sk, skb); | 515 | dccp_rcv_closereq(sk, skb); |
@@ -563,13 +520,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
563 | } | 520 | } |
564 | 521 | ||
565 | if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { | 522 | if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { |
566 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | 523 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); |
567 | DCCP_PKT_SYNCACK); | ||
568 | goto discard; | 524 | goto discard; |
569 | } | 525 | } |
570 | 526 | ||
571 | switch (sk->sk_state) { | 527 | switch (sk->sk_state) { |
572 | case DCCP_CLOSED: | 528 | case DCCP_CLOSED: |
529 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
573 | return 1; | 530 | return 1; |
574 | 531 | ||
575 | case DCCP_REQUESTING: | 532 | case DCCP_REQUESTING: |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2afaa464e7f0..40fe6afacde6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <net/tcp_states.h> | 23 | #include <net/tcp_states.h> |
24 | #include <net/xfrm.h> | 24 | #include <net/xfrm.h> |
25 | 25 | ||
26 | #include "ackvec.h" | ||
26 | #include "ccid.h" | 27 | #include "ccid.h" |
27 | #include "dccp.h" | 28 | #include "dccp.h" |
28 | 29 | ||
@@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | |||
246 | 247 | ||
247 | dp->dccps_role = DCCP_ROLE_CLIENT; | 248 | dp->dccps_role = DCCP_ROLE_CLIENT; |
248 | 249 | ||
250 | if (dccp_service_not_initialized(sk)) | ||
251 | return -EPROTO; | ||
252 | |||
249 | if (addr_len < sizeof(struct sockaddr_in)) | 253 | if (addr_len < sizeof(struct sockaddr_in)) |
250 | return -EINVAL; | 254 | return -EINVAL; |
251 | 255 | ||
@@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, | |||
661 | dccp_hdr(skb)->dccph_sport); | 665 | dccp_hdr(skb)->dccph_sport); |
662 | } | 666 | } |
663 | 667 | ||
668 | static inline int dccp_bad_service_code(const struct sock *sk, | ||
669 | const __u32 service) | ||
670 | { | ||
671 | const struct dccp_sock *dp = dccp_sk(sk); | ||
672 | |||
673 | if (dp->dccps_service == service) | ||
674 | return 0; | ||
675 | return !dccp_list_has_service(dp->dccps_service_list, service); | ||
676 | } | ||
677 | |||
664 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 678 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
665 | { | 679 | { |
666 | struct inet_request_sock *ireq; | 680 | struct inet_request_sock *ireq; |
@@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
669 | struct dccp_request_sock *dreq; | 683 | struct dccp_request_sock *dreq; |
670 | const __u32 saddr = skb->nh.iph->saddr; | 684 | const __u32 saddr = skb->nh.iph->saddr; |
671 | const __u32 daddr = skb->nh.iph->daddr; | 685 | const __u32 daddr = skb->nh.iph->daddr; |
686 | const __u32 service = dccp_hdr_request(skb)->dccph_req_service; | ||
687 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
688 | __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
672 | struct dst_entry *dst = NULL; | 689 | struct dst_entry *dst = NULL; |
673 | 690 | ||
674 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | 691 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ |
675 | if (((struct rtable *)skb->dst)->rt_flags & | 692 | if (((struct rtable *)skb->dst)->rt_flags & |
676 | (RTCF_BROADCAST | RTCF_MULTICAST)) | 693 | (RTCF_BROADCAST | RTCF_MULTICAST)) { |
694 | reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
677 | goto drop; | 695 | goto drop; |
696 | } | ||
678 | 697 | ||
698 | if (dccp_bad_service_code(sk, service)) { | ||
699 | reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; | ||
700 | goto drop; | ||
701 | } | ||
679 | /* | 702 | /* |
680 | * TW buckets are converted to open requests without | 703 | * TW buckets are converted to open requests without |
681 | * limitations, they conserve resources and peer is | 704 | * limitations, they conserve resources and peer is |
@@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
718 | * dccp_create_openreq_child. | 741 | * dccp_create_openreq_child. |
719 | */ | 742 | */ |
720 | dreq = dccp_rsk(req); | 743 | dreq = dccp_rsk(req); |
721 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | 744 | dreq->dreq_isr = dcb->dccpd_seq; |
722 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | 745 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); |
723 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | 746 | dreq->dreq_service = service; |
724 | 747 | ||
725 | if (dccp_v4_send_response(sk, req, dst)) | 748 | if (dccp_v4_send_response(sk, req, dst)) |
726 | goto drop_and_free; | 749 | goto drop_and_free; |
@@ -735,6 +758,7 @@ drop_and_free: | |||
735 | __reqsk_free(req); | 758 | __reqsk_free(req); |
736 | drop: | 759 | drop: |
737 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | 760 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); |
761 | dcb->dccpd_reset_code = reset_code; | ||
738 | return -1; | 762 | return -1; |
739 | } | 763 | } |
740 | 764 | ||
@@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1005 | return 0; | 1029 | return 0; |
1006 | 1030 | ||
1007 | reset: | 1031 | reset: |
1008 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
1009 | dccp_v4_ctl_send_reset(skb); | 1032 | dccp_v4_ctl_send_reset(skb); |
1010 | discard: | 1033 | discard: |
1011 | kfree_skb(skb); | 1034 | kfree_skb(skb); |
@@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb) | |||
1090 | goto discard_it; | 1113 | goto discard_it; |
1091 | 1114 | ||
1092 | dh = dccp_hdr(skb); | 1115 | dh = dccp_hdr(skb); |
1093 | #if 0 | ||
1094 | /* | ||
1095 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
1096 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
1097 | * sends 10 DATA/DATAACK packets: | ||
1098 | * | ||
1099 | * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
1100 | * | ||
1101 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
1102 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets | ||
1103 | * with the same state | ||
1104 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
1105 | * | ||
1106 | * So... | ||
1107 | * | ||
1108 | * 281473596467422 was received | ||
1109 | * 281473596467421 was not received | ||
1110 | * 281473596467420 was received | ||
1111 | * 281473596467419 was not received | ||
1112 | * 281473596467418 was received | ||
1113 | * 281473596467417 was not received | ||
1114 | * 281473596467416 was received | ||
1115 | * 281473596467415 was not received | ||
1116 | * 281473596467414 was received | ||
1117 | * 281473596467413 was received (this one was the 3way handshake | ||
1118 | * RESPONSE) | ||
1119 | * | ||
1120 | */ | ||
1121 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
1122 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
1123 | static int discard = 0; | ||
1124 | 1116 | ||
1125 | if (discard) { | ||
1126 | discard = 0; | ||
1127 | goto discard_it; | ||
1128 | } | ||
1129 | discard = 1; | ||
1130 | } | ||
1131 | #endif | ||
1132 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | 1117 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); |
1133 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | 1118 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; |
1134 | 1119 | ||
@@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
1242 | do_gettimeofday(&dp->dccps_epoch); | 1227 | do_gettimeofday(&dp->dccps_epoch); |
1243 | 1228 | ||
1244 | if (dp->dccps_options.dccpo_send_ack_vector) { | 1229 | if (dp->dccps_options.dccpo_send_ack_vector) { |
1245 | dp->dccps_hc_rx_ackpkts = | 1230 | dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, |
1246 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | 1231 | GFP_KERNEL); |
1247 | GFP_KERNEL); | 1232 | if (dp->dccps_hc_rx_ackvec == NULL) |
1248 | |||
1249 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
1250 | return -ENOMEM; | 1233 | return -ENOMEM; |
1251 | } | 1234 | } |
1252 | 1235 | ||
@@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
1258 | * setsockopt(CCIDs-I-want/accept). -acme | 1241 | * setsockopt(CCIDs-I-want/accept). -acme |
1259 | */ | 1242 | */ |
1260 | if (likely(!dccp_ctl_socket_init)) { | 1243 | if (likely(!dccp_ctl_socket_init)) { |
1261 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | 1244 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, |
1262 | sk); | 1245 | sk); |
1263 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | 1246 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, |
1264 | sk); | 1247 | sk); |
1265 | if (dp->dccps_hc_rx_ccid == NULL || | 1248 | if (dp->dccps_hc_rx_ccid == NULL || |
1266 | dp->dccps_hc_tx_ccid == NULL) { | 1249 | dp->dccps_hc_tx_ccid == NULL) { |
1267 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | 1250 | ccid_exit(dp->dccps_hc_rx_ccid, sk); |
1268 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | 1251 | ccid_exit(dp->dccps_hc_tx_ccid, sk); |
1269 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | 1252 | if (dp->dccps_options.dccpo_send_ack_vector) { |
1270 | dp->dccps_hc_rx_ackpkts = NULL; | 1253 | dccp_ackvec_free(dp->dccps_hc_rx_ackvec); |
1254 | dp->dccps_hc_rx_ackvec = NULL; | ||
1255 | } | ||
1271 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | 1256 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; |
1272 | return -ENOMEM; | 1257 | return -ENOMEM; |
1273 | } | 1258 | } |
@@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
1280 | sk->sk_write_space = dccp_write_space; | 1265 | sk->sk_write_space = dccp_write_space; |
1281 | dp->dccps_mss_cache = 536; | 1266 | dp->dccps_mss_cache = 536; |
1282 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 1267 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
1268 | dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; | ||
1283 | 1269 | ||
1284 | return 0; | 1270 | return 0; |
1285 | } | 1271 | } |
@@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk) | |||
1301 | if (inet_csk(sk)->icsk_bind_hash != NULL) | 1287 | if (inet_csk(sk)->icsk_bind_hash != NULL) |
1302 | inet_put_port(&dccp_hashinfo, sk); | 1288 | inet_put_port(&dccp_hashinfo, sk); |
1303 | 1289 | ||
1290 | if (dp->dccps_service_list != NULL) { | ||
1291 | kfree(dp->dccps_service_list); | ||
1292 | dp->dccps_service_list = NULL; | ||
1293 | } | ||
1294 | |||
1304 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | 1295 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); |
1305 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | 1296 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); |
1306 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | 1297 | if (dp->dccps_options.dccpo_send_ack_vector) { |
1307 | dp->dccps_hc_rx_ackpkts = NULL; | 1298 | dccp_ackvec_free(dp->dccps_hc_rx_ackvec); |
1299 | dp->dccps_hc_rx_ackvec = NULL; | ||
1300 | } | ||
1308 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | 1301 | ccid_exit(dp->dccps_hc_rx_ccid, sk); |
1309 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | 1302 | ccid_exit(dp->dccps_hc_tx_ccid, sk); |
1310 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | 1303 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 18461bc04cbe..1393461898bb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <net/xfrm.h> | 19 | #include <net/xfrm.h> |
20 | #include <net/inet_timewait_sock.h> | 20 | #include <net/inet_timewait_sock.h> |
21 | 21 | ||
22 | #include "ackvec.h" | ||
22 | #include "ccid.h" | 23 | #include "ccid.h" |
23 | #include "dccp.h" | 24 | #include "dccp.h" |
24 | 25 | ||
@@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
93 | struct inet_connection_sock *newicsk = inet_csk(sk); | 94 | struct inet_connection_sock *newicsk = inet_csk(sk); |
94 | struct dccp_sock *newdp = dccp_sk(newsk); | 95 | struct dccp_sock *newdp = dccp_sk(newsk); |
95 | 96 | ||
96 | newdp->dccps_hc_rx_ackpkts = NULL; | 97 | newdp->dccps_role = DCCP_ROLE_SERVER; |
97 | newdp->dccps_role = DCCP_ROLE_SERVER; | 98 | newdp->dccps_hc_rx_ackvec = NULL; |
98 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | 99 | newdp->dccps_service_list = NULL; |
100 | newdp->dccps_service = dreq->dreq_service; | ||
101 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
99 | do_gettimeofday(&newdp->dccps_epoch); | 102 | do_gettimeofday(&newdp->dccps_epoch); |
100 | 103 | ||
101 | if (newdp->dccps_options.dccpo_send_ack_vector) { | 104 | if (newdp->dccps_options.dccpo_send_ack_vector) { |
102 | newdp->dccps_hc_rx_ackpkts = | 105 | newdp->dccps_hc_rx_ackvec = |
103 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | 106 | dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, |
104 | GFP_ATOMIC); | 107 | GFP_ATOMIC); |
105 | /* | 108 | /* |
106 | * XXX: We're using the same CCIDs set on the parent, | 109 | * XXX: We're using the same CCIDs set on the parent, |
107 | * i.e. sk_clone copied the master sock and left the | 110 | * i.e. sk_clone copied the master sock and left the |
108 | * CCID pointers for this child, that is why we do the | 111 | * CCID pointers for this child, that is why we do the |
109 | * __ccid_get calls. | 112 | * __ccid_get calls. |
110 | */ | 113 | */ |
111 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | 114 | if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) |
112 | goto out_free; | 115 | goto out_free; |
113 | } | 116 | } |
114 | 117 | ||
@@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
116 | newsk) != 0 || | 119 | newsk) != 0 || |
117 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, | 120 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, |
118 | newsk) != 0)) { | 121 | newsk) != 0)) { |
119 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | 122 | dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); |
120 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | 123 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); |
121 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | 124 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); |
122 | out_free: | 125 | out_free: |
diff --git a/net/dccp/options.c b/net/dccp/options.c index d4c4242d8dd7..0a76426c9aea 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -18,19 +18,15 @@ | |||
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
20 | 20 | ||
21 | #include "ackvec.h" | ||
21 | #include "ccid.h" | 22 | #include "ccid.h" |
22 | #include "dccp.h" | 23 | #include "dccp.h" |
23 | 24 | ||
24 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
25 | struct sock *sk, | ||
26 | const u64 ackno, | ||
27 | const unsigned char len, | ||
28 | const unsigned char *vector); | ||
29 | |||
30 | /* stores the default values for new connection. may be changed with sysctl */ | 25 | /* stores the default values for new connection. may be changed with sysctl */ |
31 | static const struct dccp_options dccpo_default_values = { | 26 | static const struct dccp_options dccpo_default_values = { |
32 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | 27 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, |
33 | .dccpo_ccid = DCCPF_INITIAL_CCID, | 28 | .dccpo_rx_ccid = DCCPF_INITIAL_CCID, |
29 | .dccpo_tx_ccid = DCCPF_INITIAL_CCID, | ||
34 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | 30 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, |
35 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | 31 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, |
36 | }; | 32 | }; |
@@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
113 | opt_recv->dccpor_ndp); | 109 | opt_recv->dccpor_ndp); |
114 | break; | 110 | break; |
115 | case DCCPO_ACK_VECTOR_0: | 111 | case DCCPO_ACK_VECTOR_0: |
116 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | 112 | case DCCPO_ACK_VECTOR_1: |
117 | goto out_invalid_option; | ||
118 | |||
119 | if (pkt_type == DCCP_PKT_DATA) | 113 | if (pkt_type == DCCP_PKT_DATA) |
120 | continue; | 114 | continue; |
121 | 115 | ||
122 | opt_recv->dccpor_ack_vector_len = len; | 116 | if (dp->dccps_options.dccpo_send_ack_vector && |
123 | opt_recv->dccpor_ack_vector_idx = value - options; | 117 | dccp_ackvec_parse(sk, skb, opt, value, len)) |
124 | 118 | goto out_invalid_option; | |
125 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
126 | debug_prefix, len, | ||
127 | (unsigned long long) | ||
128 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
129 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
130 | value, len); | ||
131 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, | ||
132 | sk, | ||
133 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
134 | len, value); | ||
135 | break; | 119 | break; |
136 | case DCCPO_TIMESTAMP: | 120 | case DCCPO_TIMESTAMP: |
137 | if (len != 4) | 121 | if (len != 4) |
@@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk, | |||
352 | 336 | ||
353 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | 337 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); |
354 | 338 | ||
355 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
356 | { | ||
357 | struct dccp_sock *dp = dccp_sk(sk); | ||
358 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
359 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
360 | "CLIENT TX opt: " : "server TX opt: "; | ||
361 | #endif | ||
362 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
363 | int len = ap->dccpap_buf_vector_len + 2; | ||
364 | struct timeval now; | ||
365 | u32 elapsed_time; | ||
366 | unsigned char *to, *from; | ||
367 | |||
368 | dccp_timestamp(sk, &now); | ||
369 | elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; | ||
370 | |||
371 | if (elapsed_time != 0) | ||
372 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
373 | |||
374 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
375 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
376 | "insert ACK Vector!\n"); | ||
377 | return; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * XXX: now we have just one ack vector sent record, so | ||
382 | * we have to wait for it to be cleared. | ||
383 | * | ||
384 | * Of course this is not acceptable, but this is just for | ||
385 | * basic testing now. | ||
386 | */ | ||
387 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
388 | return; | ||
389 | |||
390 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
391 | |||
392 | to = skb_push(skb, len); | ||
393 | *to++ = DCCPO_ACK_VECTOR_0; | ||
394 | *to++ = len; | ||
395 | |||
396 | len = ap->dccpap_buf_vector_len; | ||
397 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
398 | |||
399 | /* Check if buf_head wraps */ | ||
400 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
401 | const unsigned int tailsize = (ap->dccpap_buf_len - | ||
402 | ap->dccpap_buf_head); | ||
403 | |||
404 | memcpy(to, from, tailsize); | ||
405 | to += tailsize; | ||
406 | len -= tailsize; | ||
407 | from = ap->dccpap_buf; | ||
408 | } | ||
409 | |||
410 | memcpy(to, from, len); | ||
411 | /* | ||
412 | * From draft-ietf-dccp-spec-11.txt: | ||
413 | * | ||
414 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
415 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
416 | * sequence number it used for the ack packet; ack_ptr will equal | ||
417 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
418 | * equal buf_nonce. | ||
419 | * | ||
420 | * This implemention uses just one ack record for now. | ||
421 | */ | ||
422 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
423 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
424 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
425 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
426 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
427 | |||
428 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
429 | "ack_ackno=%llu\n", | ||
430 | debug_prefix, ap->dccpap_ack_vector_len, | ||
431 | (unsigned long long) ap->dccpap_ack_seqno, | ||
432 | (unsigned long long) ap->dccpap_ack_ackno); | ||
433 | } | ||
434 | |||
435 | void dccp_timestamp(const struct sock *sk, struct timeval *tv) | 339 | void dccp_timestamp(const struct sock *sk, struct timeval *tv) |
436 | { | 340 | { |
437 | const struct dccp_sock *dp = dccp_sk(sk); | 341 | const struct dccp_sock *dp = dccp_sk(sk); |
@@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
528 | 432 | ||
529 | if (!dccp_packet_without_ack(skb)) { | 433 | if (!dccp_packet_without_ack(skb)) { |
530 | if (dp->dccps_options.dccpo_send_ack_vector && | 434 | if (dp->dccps_options.dccpo_send_ack_vector && |
531 | (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != | 435 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) |
532 | DCCP_MAX_SEQNO + 1)) | 436 | dccp_insert_option_ackvec(sk, skb); |
533 | dccp_insert_option_ack_vector(sk, skb); | ||
534 | if (dp->dccps_timestamp_echo != 0) | 437 | if (dp->dccps_timestamp_echo != 0) |
535 | dccp_insert_option_timestamp_echo(sk, skb); | 438 | dccp_insert_option_timestamp_echo(sk, skb); |
536 | } | 439 | } |
@@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
557 | } | 460 | } |
558 | } | 461 | } |
559 | } | 462 | } |
560 | |||
561 | struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, | ||
562 | const unsigned int __nocast priority) | ||
563 | { | ||
564 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
565 | |||
566 | if (ap != NULL) { | ||
567 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
568 | memset(ap->dccpap_buf, 0xFF, len); | ||
569 | #endif | ||
570 | ap->dccpap_buf_len = len; | ||
571 | ap->dccpap_buf_head = | ||
572 | ap->dccpap_buf_tail = | ||
573 | ap->dccpap_buf_len - 1; | ||
574 | ap->dccpap_buf_ackno = | ||
575 | ap->dccpap_ack_ackno = | ||
576 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
577 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
578 | ap->dccpap_ack_ptr = 0; | ||
579 | ap->dccpap_time.tv_sec = 0; | ||
580 | ap->dccpap_time.tv_usec = 0; | ||
581 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
582 | } | ||
583 | |||
584 | return ap; | ||
585 | } | ||
586 | |||
587 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
588 | { | ||
589 | if (ap != NULL) { | ||
590 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
591 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
592 | #endif | ||
593 | kfree(ap); | ||
594 | } | ||
595 | } | ||
596 | |||
597 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
598 | const unsigned int index) | ||
599 | { | ||
600 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
601 | } | ||
602 | |||
603 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
604 | const unsigned int index) | ||
605 | { | ||
606 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
611 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
612 | * this simplifies table updates if one of the missing packets arrives. | ||
613 | */ | ||
614 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
615 | const unsigned int packets, | ||
616 | const unsigned char state) | ||
617 | { | ||
618 | unsigned int gap; | ||
619 | signed long new_head; | ||
620 | |||
621 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
622 | return -ENOBUFS; | ||
623 | |||
624 | gap = packets - 1; | ||
625 | new_head = ap->dccpap_buf_head - packets; | ||
626 | |||
627 | if (new_head < 0) { | ||
628 | if (gap > 0) { | ||
629 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
630 | gap + new_head + 1); | ||
631 | gap = -new_head; | ||
632 | } | ||
633 | new_head += ap->dccpap_buf_len; | ||
634 | } | ||
635 | |||
636 | ap->dccpap_buf_head = new_head; | ||
637 | |||
638 | if (gap > 0) | ||
639 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
640 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
641 | |||
642 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
643 | ap->dccpap_buf_vector_len += packets; | ||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
649 | */ | ||
650 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, | ||
651 | u64 ackno, u8 state) | ||
652 | { | ||
653 | /* | ||
654 | * Check at the right places if the buffer is full, if it is, tell the | ||
655 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
656 | * vectors, when we will free up space in dccpap_buf. | ||
657 | * | ||
658 | * We may well decide to do buffer compression, etc, but for now lets | ||
659 | * just drop. | ||
660 | * | ||
661 | * From Appendix A: | ||
662 | * | ||
663 | * Of course, the circular buffer may overflow, either when the | ||
664 | * HC-Sender is sending data at a very high rate, when the | ||
665 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
666 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
667 | * (so the HC-Receiver is unable to clean up old state). In this | ||
668 | * case, the HC-Receiver should either compress the buffer (by | ||
669 | * increasing run lengths when possible), transfer its state to | ||
670 | * a larger buffer, or, as a last resort, drop all received | ||
671 | * packets, without processing them whatsoever, until its buffer | ||
672 | * shrinks again. | ||
673 | */ | ||
674 | |||
675 | /* See if this is the first ackno being inserted */ | ||
676 | if (ap->dccpap_buf_vector_len == 0) { | ||
677 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
678 | ap->dccpap_buf_vector_len = 1; | ||
679 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
680 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, | ||
681 | ackno); | ||
682 | |||
683 | /* | ||
684 | * Look if the state of this packet is the same as the | ||
685 | * previous ackno and if so if we can bump the head len. | ||
686 | */ | ||
687 | if (delta == 1 && | ||
688 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
689 | (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < | ||
690 | DCCP_ACKPKTS_LEN_MASK)) | ||
691 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
692 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
693 | return -ENOBUFS; | ||
694 | } else { | ||
695 | /* | ||
696 | * A.1.2. Old Packets | ||
697 | * | ||
698 | * When a packet with Sequence Number S arrives, and | ||
699 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
700 | * for the byte corresponding to S. (Indexing structures | ||
701 | * could reduce the complexity of this scan.) | ||
702 | */ | ||
703 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
704 | unsigned int index = ap->dccpap_buf_head; | ||
705 | |||
706 | while (1) { | ||
707 | const u8 len = dccp_ackpkts_len(ap, index); | ||
708 | const u8 state = dccp_ackpkts_state(ap, index); | ||
709 | /* | ||
710 | * valid packets not yet in dccpap_buf have a reserved | ||
711 | * entry, with a len equal to 0. | ||
712 | */ | ||
713 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
714 | len == 0 && delta == 0) { /* Found our | ||
715 | reserved seat! */ | ||
716 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
717 | (unsigned long long) ackno); | ||
718 | ap->dccpap_buf[index] = state; | ||
719 | goto out; | ||
720 | } | ||
721 | /* len == 0 means one packet */ | ||
722 | if (delta < len + 1) | ||
723 | goto out_duplicate; | ||
724 | |||
725 | delta -= len + 1; | ||
726 | if (++index == ap->dccpap_buf_len) | ||
727 | index = 0; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | ap->dccpap_buf_ackno = ackno; | ||
732 | dccp_timestamp(sk, &ap->dccpap_time); | ||
733 | out: | ||
734 | dccp_pr_debug(""); | ||
735 | dccp_ackpkts_print(ap); | ||
736 | return 0; | ||
737 | |||
738 | out_duplicate: | ||
739 | /* Duplicate packet */ | ||
740 | dccp_pr_debug("Received a dup or already considered lost " | ||
741 | "packet: %llu\n", (unsigned long long) ackno); | ||
742 | return -EILSEQ; | ||
743 | } | ||
744 | |||
745 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
746 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, | ||
747 | int len) | ||
748 | { | ||
749 | if (!dccp_debug) | ||
750 | return; | ||
751 | |||
752 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
753 | (unsigned long long) ackno); | ||
754 | |||
755 | while (len--) { | ||
756 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
757 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
758 | |||
759 | printk("%d,%d|", state, rl); | ||
760 | ++vector; | ||
761 | } | ||
762 | |||
763 | printk("\n"); | ||
764 | } | ||
765 | |||
766 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
767 | { | ||
768 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
769 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
770 | ap->dccpap_buf_vector_len); | ||
771 | } | ||
772 | #endif | ||
773 | |||
774 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
775 | { | ||
776 | /* | ||
777 | * As we're keeping track of the ack vector size | ||
778 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
779 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
780 | * keep this code here as in the future we'll implement a vector of | ||
781 | * ack records, as suggested in draft-ietf-dccp-spec-11.txt | ||
782 | * Appendix A. -acme | ||
783 | */ | ||
784 | #if 0 | ||
785 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
786 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
787 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
788 | #endif | ||
789 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
790 | } | ||
791 | |||
792 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
793 | u64 ackno) | ||
794 | { | ||
795 | /* Check if we actually sent an ACK vector */ | ||
796 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
797 | return; | ||
798 | |||
799 | if (ackno == ap->dccpap_ack_seqno) { | ||
800 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
801 | struct dccp_sock *dp = dccp_sk(sk); | ||
802 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
803 | "CLIENT rx ack: " : "server rx ack: "; | ||
804 | #endif | ||
805 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
806 | "ack_ackno=%llu, ACKED!\n", | ||
807 | debug_prefix, 1, | ||
808 | (unsigned long long) ap->dccpap_ack_seqno, | ||
809 | (unsigned long long) ap->dccpap_ack_ackno); | ||
810 | dccp_ackpkts_trow_away_ack_record(ap); | ||
811 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
812 | } | ||
813 | } | ||
814 | |||
815 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
816 | struct sock *sk, u64 ackno, | ||
817 | const unsigned char len, | ||
818 | const unsigned char *vector) | ||
819 | { | ||
820 | unsigned char i; | ||
821 | |||
822 | /* Check if we actually sent an ACK vector */ | ||
823 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
824 | return; | ||
825 | /* | ||
826 | * We're in the receiver half connection, so if the received an ACK | ||
827 | * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're | ||
828 | * not interested. | ||
829 | * | ||
830 | * Extra explanation with example: | ||
831 | * | ||
832 | * if we received an ACK vector with ackno 50, it can only be acking | ||
833 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
834 | */ | ||
835 | /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ | ||
836 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
837 | /* dccp_pr_debug_cat("yes\n"); */ | ||
838 | return; | ||
839 | } | ||
840 | /* dccp_pr_debug_cat("no\n"); */ | ||
841 | |||
842 | i = len; | ||
843 | while (i--) { | ||
844 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
845 | u64 ackno_end_rl; | ||
846 | |||
847 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
848 | |||
849 | /* | ||
850 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
851 | * ap->dccpap_ack_seqno, ackno); | ||
852 | */ | ||
853 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
854 | const u8 state = (*vector & | ||
855 | DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
856 | /* dccp_pr_debug_cat("yes\n"); */ | ||
857 | |||
858 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
859 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
860 | struct dccp_sock *dp = dccp_sk(sk); | ||
861 | const char *debug_prefix = | ||
862 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
863 | "CLIENT rx ack: " : "server rx ack: "; | ||
864 | #endif | ||
865 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
866 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
867 | "ACKED!\n", | ||
868 | debug_prefix, len, | ||
869 | (unsigned long long) | ||
870 | ap->dccpap_ack_seqno, | ||
871 | (unsigned long long) | ||
872 | ap->dccpap_ack_ackno); | ||
873 | dccp_ackpkts_trow_away_ack_record(ap); | ||
874 | } | ||
875 | /* | ||
876 | * If dccpap_ack_seqno was not received, no problem | ||
877 | * we'll send another ACK vector. | ||
878 | */ | ||
879 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
880 | break; | ||
881 | } | ||
882 | /* dccp_pr_debug_cat("no\n"); */ | ||
883 | |||
884 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
885 | ++vector; | ||
886 | } | ||
887 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index ea6d0e91e511..4786bdcddcc9 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <net/sock.h> | 17 | #include <net/sock.h> |
18 | 18 | ||
19 | #include "ackvec.h" | ||
19 | #include "ccid.h" | 20 | #include "ccid.h" |
20 | #include "dccp.h" | 21 | #include "dccp.h" |
21 | 22 | ||
@@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
85 | switch (dcb->dccpd_type) { | 86 | switch (dcb->dccpd_type) { |
86 | case DCCP_PKT_REQUEST: | 87 | case DCCP_PKT_REQUEST: |
87 | dccp_hdr_request(skb)->dccph_req_service = | 88 | dccp_hdr_request(skb)->dccph_req_service = |
88 | dcb->dccpd_service; | 89 | dp->dccps_service; |
89 | break; | 90 | break; |
90 | case DCCP_PKT_RESET: | 91 | case DCCP_PKT_RESET: |
91 | dccp_hdr_reset(skb)->dccph_reset_code = | 92 | dccp_hdr_reset(skb)->dccph_reset_code = |
@@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | |||
225 | err = dccp_wait_for_ccid(sk, skb, timeo); | 226 | err = dccp_wait_for_ccid(sk, skb, timeo); |
226 | 227 | ||
227 | if (err == 0) { | 228 | if (err == 0) { |
228 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
230 | const int len = skb->len; | 230 | const int len = skb->len; |
231 | 231 | ||
@@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | |||
236 | inet_csk(sk)->icsk_rto, | 236 | inet_csk(sk)->icsk_rto, |
237 | DCCP_RTO_MAX); | 237 | DCCP_RTO_MAX); |
238 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 238 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
239 | /* | 239 | } else if (dccp_ack_pending(sk)) |
240 | * FIXME: we really should have a | ||
241 | * dccps_ack_pending or use icsk. | ||
242 | */ | ||
243 | } else if (inet_csk_ack_scheduled(sk) || | ||
244 | dp->dccps_timestamp_echo != 0 || | ||
245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 240 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
249 | else | 241 | else |
250 | dcb->dccpd_type = DCCP_PKT_DATA; | 242 | dcb->dccpd_type = DCCP_PKT_DATA; |
@@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
270 | struct request_sock *req) | 262 | struct request_sock *req) |
271 | { | 263 | { |
272 | struct dccp_hdr *dh; | 264 | struct dccp_hdr *dh; |
265 | struct dccp_request_sock *dreq; | ||
273 | const int dccp_header_size = sizeof(struct dccp_hdr) + | 266 | const int dccp_header_size = sizeof(struct dccp_hdr) + |
274 | sizeof(struct dccp_hdr_ext) + | 267 | sizeof(struct dccp_hdr_ext) + |
275 | sizeof(struct dccp_hdr_response); | 268 | sizeof(struct dccp_hdr_response); |
@@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
285 | skb->dst = dst_clone(dst); | 278 | skb->dst = dst_clone(dst); |
286 | skb->csum = 0; | 279 | skb->csum = 0; |
287 | 280 | ||
281 | dreq = dccp_rsk(req); | ||
288 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | 282 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; |
289 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | 283 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; |
290 | dccp_insert_options(sk, skb); | 284 | dccp_insert_options(sk, skb); |
291 | 285 | ||
292 | skb->h.raw = skb_push(skb, dccp_header_size); | 286 | skb->h.raw = skb_push(skb, dccp_header_size); |
@@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
300 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | 294 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; |
301 | dh->dccph_type = DCCP_PKT_RESPONSE; | 295 | dh->dccph_type = DCCP_PKT_RESPONSE; |
302 | dh->dccph_x = 1; | 296 | dh->dccph_x = 1; |
303 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | 297 | dccp_hdr_set_seq(dh, dreq->dreq_iss); |
304 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | 298 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); |
299 | dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; | ||
305 | 300 | ||
306 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, | 301 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, |
307 | inet_rsk(req)->rmt_addr); | 302 | inet_rsk(req)->rmt_addr); |
@@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk) | |||
397 | skb_reserve(skb, MAX_DCCP_HEADER); | 392 | skb_reserve(skb, MAX_DCCP_HEADER); |
398 | 393 | ||
399 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | 394 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; |
400 | /* FIXME: set service to something meaningful, coming | ||
401 | * from userspace*/ | ||
402 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
403 | skb->csum = 0; | 395 | skb->csum = 0; |
404 | skb_set_owner_w(skb, sk); | 396 | skb_set_owner_w(skb, sk); |
405 | 397 | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69c9dc7..a1cfd0e9e3bc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); | |||
94 | 94 | ||
95 | static inline int dccp_listen_start(struct sock *sk) | 95 | static inline int dccp_listen_start(struct sock *sk) |
96 | { | 96 | { |
97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | 97 | struct dccp_sock *dp = dccp_sk(sk); |
98 | |||
99 | dp->dccps_role = DCCP_ROLE_LISTEN; | ||
100 | /* | ||
101 | * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) | ||
102 | * before calling listen() | ||
103 | */ | ||
104 | if (dccp_service_not_initialized(sk)) | ||
105 | return -EPROTO; | ||
98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | 106 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); |
99 | } | 107 | } |
100 | 108 | ||
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
202 | return -ENOIOCTLCMD; | 210 | return -ENOIOCTLCMD; |
203 | } | 211 | } |
204 | 212 | ||
213 | static int dccp_setsockopt_service(struct sock *sk, const u32 service, | ||
214 | char __user *optval, int optlen) | ||
215 | { | ||
216 | struct dccp_sock *dp = dccp_sk(sk); | ||
217 | struct dccp_service_list *sl = NULL; | ||
218 | |||
219 | if (service == DCCP_SERVICE_INVALID_VALUE || | ||
220 | optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) | ||
221 | return -EINVAL; | ||
222 | |||
223 | if (optlen > sizeof(service)) { | ||
224 | sl = kmalloc(optlen, GFP_KERNEL); | ||
225 | if (sl == NULL) | ||
226 | return -ENOMEM; | ||
227 | |||
228 | sl->dccpsl_nr = optlen / sizeof(u32) - 1; | ||
229 | if (copy_from_user(sl->dccpsl_list, | ||
230 | optval + sizeof(service), | ||
231 | optlen - sizeof(service)) || | ||
232 | dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { | ||
233 | kfree(sl); | ||
234 | return -EFAULT; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | lock_sock(sk); | ||
239 | dp->dccps_service = service; | ||
240 | |||
241 | if (dp->dccps_service_list != NULL) | ||
242 | kfree(dp->dccps_service_list); | ||
243 | |||
244 | dp->dccps_service_list = sl; | ||
245 | release_sock(sk); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
205 | int dccp_setsockopt(struct sock *sk, int level, int optname, | 249 | int dccp_setsockopt(struct sock *sk, int level, int optname, |
206 | char __user *optval, int optlen) | 250 | char __user *optval, int optlen) |
207 | { | 251 | { |
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
218 | if (get_user(val, (int __user *)optval)) | 262 | if (get_user(val, (int __user *)optval)) |
219 | return -EFAULT; | 263 | return -EFAULT; |
220 | 264 | ||
221 | lock_sock(sk); | 265 | if (optname == DCCP_SOCKOPT_SERVICE) |
266 | return dccp_setsockopt_service(sk, val, optval, optlen); | ||
222 | 267 | ||
268 | lock_sock(sk); | ||
223 | dp = dccp_sk(sk); | 269 | dp = dccp_sk(sk); |
224 | err = 0; | 270 | err = 0; |
225 | 271 | ||
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
236 | return err; | 282 | return err; |
237 | } | 283 | } |
238 | 284 | ||
285 | static int dccp_getsockopt_service(struct sock *sk, int len, | ||
286 | u32 __user *optval, | ||
287 | int __user *optlen) | ||
288 | { | ||
289 | const struct dccp_sock *dp = dccp_sk(sk); | ||
290 | const struct dccp_service_list *sl; | ||
291 | int err = -ENOENT, slen = 0, total_len = sizeof(u32); | ||
292 | |||
293 | lock_sock(sk); | ||
294 | if (dccp_service_not_initialized(sk)) | ||
295 | goto out; | ||
296 | |||
297 | if ((sl = dp->dccps_service_list) != NULL) { | ||
298 | slen = sl->dccpsl_nr * sizeof(u32); | ||
299 | total_len += slen; | ||
300 | } | ||
301 | |||
302 | err = -EINVAL; | ||
303 | if (total_len > len) | ||
304 | goto out; | ||
305 | |||
306 | err = 0; | ||
307 | if (put_user(total_len, optlen) || | ||
308 | put_user(dp->dccps_service, optval) || | ||
309 | (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) | ||
310 | err = -EFAULT; | ||
311 | out: | ||
312 | release_sock(sk); | ||
313 | return err; | ||
314 | } | ||
315 | |||
239 | int dccp_getsockopt(struct sock *sk, int level, int optname, | 316 | int dccp_getsockopt(struct sock *sk, int level, int optname, |
240 | char __user *optval, int __user *optlen) | 317 | char __user *optval, int __user *optlen) |
241 | { | 318 | { |
@@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
248 | if (get_user(len, optlen)) | 325 | if (get_user(len, optlen)) |
249 | return -EFAULT; | 326 | return -EFAULT; |
250 | 327 | ||
251 | len = min_t(unsigned int, len, sizeof(int)); | 328 | if (len < sizeof(int)) |
252 | if (len < 0) | ||
253 | return -EINVAL; | 329 | return -EINVAL; |
254 | 330 | ||
255 | dp = dccp_sk(sk); | 331 | dp = dccp_sk(sk); |
@@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
257 | switch (optname) { | 333 | switch (optname) { |
258 | case DCCP_SOCKOPT_PACKET_SIZE: | 334 | case DCCP_SOCKOPT_PACKET_SIZE: |
259 | val = dp->dccps_packet_size; | 335 | val = dp->dccps_packet_size; |
336 | len = sizeof(dp->dccps_packet_size); | ||
260 | break; | 337 | break; |
338 | case DCCP_SOCKOPT_SERVICE: | ||
339 | return dccp_getsockopt_service(sk, len, | ||
340 | (u32 __user *)optval, optlen); | ||
341 | case 128 ... 191: | ||
342 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | ||
343 | len, (u32 __user *)optval, optlen); | ||
344 | case 192 ... 255: | ||
345 | return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, | ||
346 | len, (u32 __user *)optval, optlen); | ||
261 | default: | 347 | default: |
262 | return -ENOPROTOOPT; | 348 | return -ENOPROTOOPT; |
263 | } | 349 | } |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1b63b4824164..50c0519cd70d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #define VERSION "0.403" | 46 | #define VERSION "0.404" |
47 | 47 | ||
48 | #include <linux/config.h> | 48 | #include <linux/config.h> |
49 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
@@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) | |||
224 | Consider a node 'n' and its parent 'tp'. | 224 | Consider a node 'n' and its parent 'tp'. |
225 | 225 | ||
226 | If n is a leaf, every bit in its key is significant. Its presence is | 226 | If n is a leaf, every bit in its key is significant. Its presence is |
227 | necessitaded by path compression, since during a tree traversal (when | 227 | necessitated by path compression, since during a tree traversal (when |
228 | searching for a leaf - unless we are doing an insertion) we will completely | 228 | searching for a leaf - unless we are doing an insertion) we will completely |
229 | ignore all skipped bits we encounter. Thus we need to verify, at the end of | 229 | ignore all skipped bits we encounter. Thus we need to verify, at the end of |
230 | a potentially successful search, that we have indeed been walking the | 230 | a potentially successful search, that we have indeed been walking the |
@@ -836,11 +836,12 @@ static void trie_init(struct trie *t) | |||
836 | #endif | 836 | #endif |
837 | } | 837 | } |
838 | 838 | ||
839 | /* readside most use rcu_read_lock currently dump routines | 839 | /* readside must use rcu_read_lock currently dump routines |
840 | via get_fa_head and dump */ | 840 | via get_fa_head and dump */ |
841 | 841 | ||
842 | static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) | 842 | static struct leaf_info *find_leaf_info(struct leaf *l, int plen) |
843 | { | 843 | { |
844 | struct hlist_head *head = &l->list; | ||
844 | struct hlist_node *node; | 845 | struct hlist_node *node; |
845 | struct leaf_info *li; | 846 | struct leaf_info *li; |
846 | 847 | ||
@@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) | |||
853 | 854 | ||
854 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) | 855 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) |
855 | { | 856 | { |
856 | struct leaf_info *li = find_leaf_info(&l->list, plen); | 857 | struct leaf_info *li = find_leaf_info(l, plen); |
857 | 858 | ||
858 | if (!li) | 859 | if (!li) |
859 | return NULL; | 860 | return NULL; |
@@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1085 | } | 1086 | } |
1086 | 1087 | ||
1087 | if (tp && tp->pos + tp->bits > 32) | 1088 | if (tp && tp->pos + tp->bits > 32) |
1088 | printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", | 1089 | printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", |
1089 | tp, tp->pos, tp->bits, key, plen); | 1090 | tp, tp->pos, tp->bits, key, plen); |
1090 | 1091 | ||
1091 | /* Rebalance the trie */ | 1092 | /* Rebalance the trie */ |
@@ -1248,7 +1249,7 @@ err: | |||
1248 | } | 1249 | } |
1249 | 1250 | ||
1250 | 1251 | ||
1251 | /* should be clalled with rcu_read_lock */ | 1252 | /* should be called with rcu_read_lock */ |
1252 | static inline int check_leaf(struct trie *t, struct leaf *l, | 1253 | static inline int check_leaf(struct trie *t, struct leaf *l, |
1253 | t_key key, int *plen, const struct flowi *flp, | 1254 | t_key key, int *plen, const struct flowi *flp, |
1254 | struct fib_result *res) | 1255 | struct fib_result *res) |
@@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1590 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); | 1591 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); |
1591 | 1592 | ||
1592 | l = fib_find_node(t, key); | 1593 | l = fib_find_node(t, key); |
1593 | li = find_leaf_info(&l->list, plen); | 1594 | li = find_leaf_info(l, plen); |
1594 | 1595 | ||
1595 | list_del_rcu(&fa->fa_list); | 1596 | list_del_rcu(&fa->fa_list); |
1596 | 1597 | ||
@@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1714 | 1715 | ||
1715 | t->revision++; | 1716 | t->revision++; |
1716 | 1717 | ||
1717 | rcu_read_lock(); | ||
1718 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | 1718 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { |
1719 | found += trie_flush_leaf(t, l); | 1719 | found += trie_flush_leaf(t, l); |
1720 | 1720 | ||
@@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1722 | trie_leaf_remove(t, ll->key); | 1722 | trie_leaf_remove(t, ll->key); |
1723 | ll = l; | 1723 | ll = l; |
1724 | } | 1724 | } |
1725 | rcu_read_unlock(); | ||
1726 | 1725 | ||
1727 | if (ll && hlist_empty(&ll->list)) | 1726 | if (ll && hlist_empty(&ll->list)) |
1728 | trie_leaf_remove(t, ll->key); | 1727 | trie_leaf_remove(t, ll->key); |
@@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1833 | i++; | 1832 | i++; |
1834 | continue; | 1833 | continue; |
1835 | } | 1834 | } |
1836 | if (fa->fa_info->fib_nh == NULL) { | 1835 | BUG_ON(!fa->fa_info); |
1837 | printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); | ||
1838 | i++; | ||
1839 | continue; | ||
1840 | } | ||
1841 | if (fa->fa_info == NULL) { | ||
1842 | printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); | ||
1843 | i++; | ||
1844 | continue; | ||
1845 | } | ||
1846 | 1836 | ||
1847 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, | 1837 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, |
1848 | cb->nlh->nlmsg_seq, | 1838 | cb->nlh->nlmsg_seq, |
@@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id) | |||
1965 | trie_main = t; | 1955 | trie_main = t; |
1966 | 1956 | ||
1967 | if (id == RT_TABLE_LOCAL) | 1957 | if (id == RT_TABLE_LOCAL) |
1968 | printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); | 1958 | printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); |
1969 | 1959 | ||
1970 | return tb; | 1960 | return tb; |
1971 | } | 1961 | } |
@@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
2029 | iter->tnode = (struct tnode *) n; | 2019 | iter->tnode = (struct tnode *) n; |
2030 | iter->trie = t; | 2020 | iter->trie = t; |
2031 | iter->index = 0; | 2021 | iter->index = 0; |
2032 | iter->depth = 0; | 2022 | iter->depth = 1; |
2033 | return n; | 2023 | return n; |
2034 | } | 2024 | } |
2035 | return NULL; | 2025 | return NULL; |
@@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2274 | seq_puts(seq, "<local>:\n"); | 2264 | seq_puts(seq, "<local>:\n"); |
2275 | else | 2265 | else |
2276 | seq_puts(seq, "<main>:\n"); | 2266 | seq_puts(seq, "<main>:\n"); |
2277 | } else { | 2267 | } |
2278 | seq_indent(seq, iter->depth-1); | 2268 | seq_indent(seq, iter->depth-1); |
2279 | seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", | 2269 | seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", |
2280 | NIPQUAD(prf), tn->pos); | 2270 | NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, |
2281 | } | 2271 | tn->empty_children); |
2272 | |||
2282 | } else { | 2273 | } else { |
2283 | struct leaf *l = (struct leaf *) n; | 2274 | struct leaf *l = (struct leaf *) n; |
2284 | int i; | 2275 | int i; |
@@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2287 | seq_indent(seq, iter->depth); | 2278 | seq_indent(seq, iter->depth); |
2288 | seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); | 2279 | seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); |
2289 | for (i = 32; i >= 0; i--) { | 2280 | for (i = 32; i >= 0; i--) { |
2290 | struct leaf_info *li = find_leaf_info(&l->list, i); | 2281 | struct leaf_info *li = find_leaf_info(l, i); |
2291 | if (li) { | 2282 | if (li) { |
2292 | struct fib_alias *fa; | 2283 | struct fib_alias *fa; |
2293 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { | 2284 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
@@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) | |||
2383 | return 0; | 2374 | return 0; |
2384 | 2375 | ||
2385 | for (i=32; i>=0; i--) { | 2376 | for (i=32; i>=0; i--) { |
2386 | struct leaf_info *li = find_leaf_info(&l->list, i); | 2377 | struct leaf_info *li = find_leaf_info(l, i); |
2387 | struct fib_alias *fa; | 2378 | struct fib_alias *fa; |
2388 | u32 mask, prefix; | 2379 | u32 mask, prefix; |
2389 | 2380 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44607f4767b8..70c44e4c3ceb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) | |||
1603 | } | 1603 | } |
1604 | pmc->sources = NULL; | 1604 | pmc->sources = NULL; |
1605 | pmc->sfmode = MCAST_EXCLUDE; | 1605 | pmc->sfmode = MCAST_EXCLUDE; |
1606 | pmc->sfcount[MCAST_EXCLUDE] = 0; | 1606 | pmc->sfcount[MCAST_INCLUDE] = 0; |
1607 | pmc->sfcount[MCAST_EXCLUDE] = 1; | 1607 | pmc->sfcount[MCAST_EXCLUDE] = 1; |
1608 | } | 1608 | } |
1609 | 1609 | ||
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index e11952ea17af..f828fa2eb7de 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c | |||
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get | |||
196 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 196 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
197 | if (s_addr==cp->caddr && s_port==cp->cport && | 197 | if (s_addr==cp->caddr && s_port==cp->cport && |
198 | d_port==cp->vport && d_addr==cp->vaddr && | 198 | d_port==cp->vport && d_addr==cp->vaddr && |
199 | ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && | ||
199 | protocol==cp->protocol) { | 200 | protocol==cp->protocol) { |
200 | /* HIT */ | 201 | /* HIT */ |
201 | atomic_inc(&cp->refcnt); | 202 | atomic_inc(&cp->refcnt); |
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get | |||
227 | return cp; | 228 | return cp; |
228 | } | 229 | } |
229 | 230 | ||
231 | /* Get reference to connection template */ | ||
232 | struct ip_vs_conn *ip_vs_ct_in_get | ||
233 | (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) | ||
234 | { | ||
235 | unsigned hash; | ||
236 | struct ip_vs_conn *cp; | ||
237 | |||
238 | hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); | ||
239 | |||
240 | ct_read_lock(hash); | ||
241 | |||
242 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | ||
243 | if (s_addr==cp->caddr && s_port==cp->cport && | ||
244 | d_port==cp->vport && d_addr==cp->vaddr && | ||
245 | cp->flags & IP_VS_CONN_F_TEMPLATE && | ||
246 | protocol==cp->protocol) { | ||
247 | /* HIT */ | ||
248 | atomic_inc(&cp->refcnt); | ||
249 | goto out; | ||
250 | } | ||
251 | } | ||
252 | cp = NULL; | ||
253 | |||
254 | out: | ||
255 | ct_read_unlock(hash); | ||
256 | |||
257 | IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | ||
258 | ip_vs_proto_name(protocol), | ||
259 | NIPQUAD(s_addr), ntohs(s_port), | ||
260 | NIPQUAD(d_addr), ntohs(d_port), | ||
261 | cp?"hit":"not hit"); | ||
262 | |||
263 | return cp; | ||
264 | } | ||
230 | 265 | ||
231 | /* | 266 | /* |
232 | * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. | 267 | * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. |
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
367 | atomic_read(&dest->refcnt)); | 402 | atomic_read(&dest->refcnt)); |
368 | 403 | ||
369 | /* Update the connection counters */ | 404 | /* Update the connection counters */ |
370 | if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { | 405 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { |
371 | /* It is a normal connection, so increase the inactive | 406 | /* It is a normal connection, so increase the inactive |
372 | connection counter because it is in TCP SYNRECV | 407 | connection counter because it is in TCP SYNRECV |
373 | state (inactive) or other protocol inacive state */ | 408 | state (inactive) or other protocol inacive state */ |
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) | |||
406 | atomic_read(&dest->refcnt)); | 441 | atomic_read(&dest->refcnt)); |
407 | 442 | ||
408 | /* Update the connection counters */ | 443 | /* Update the connection counters */ |
409 | if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { | 444 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { |
410 | /* It is a normal connection, so decrease the inactconns | 445 | /* It is a normal connection, so decrease the inactconns |
411 | or activeconns counter */ | 446 | or activeconns counter */ |
412 | if (cp->flags & IP_VS_CONN_F_INACTIVE) { | 447 | if (cp->flags & IP_VS_CONN_F_INACTIVE) { |
@@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) | |||
467 | /* | 502 | /* |
468 | * Invalidate the connection template | 503 | * Invalidate the connection template |
469 | */ | 504 | */ |
470 | if (ct->cport) { | 505 | if (ct->vport != 65535) { |
471 | if (ip_vs_conn_unhash(ct)) { | 506 | if (ip_vs_conn_unhash(ct)) { |
472 | ct->dport = 65535; | 507 | ct->dport = 65535; |
473 | ct->vport = 65535; | 508 | ct->vport = 65535; |
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void) | |||
776 | ct_write_lock_bh(hash); | 811 | ct_write_lock_bh(hash); |
777 | 812 | ||
778 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 813 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
779 | if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) | 814 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
780 | /* connection template */ | 815 | /* connection template */ |
781 | continue; | 816 | continue; |
782 | 817 | ||
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3ac7eeca04ac..981cc3244ef2 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
243 | if (ports[1] == svc->port) { | 243 | if (ports[1] == svc->port) { |
244 | /* Check if a template already exists */ | 244 | /* Check if a template already exists */ |
245 | if (svc->port != FTPPORT) | 245 | if (svc->port != FTPPORT) |
246 | ct = ip_vs_conn_in_get(iph->protocol, snet, 0, | 246 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, |
247 | iph->daddr, ports[1]); | 247 | iph->daddr, ports[1]); |
248 | else | 248 | else |
249 | ct = ip_vs_conn_in_get(iph->protocol, snet, 0, | 249 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, |
250 | iph->daddr, 0); | 250 | iph->daddr, 0); |
251 | 251 | ||
252 | if (!ct || !ip_vs_check_template(ct)) { | 252 | if (!ct || !ip_vs_check_template(ct)) { |
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
272 | iph->daddr, | 272 | iph->daddr, |
273 | ports[1], | 273 | ports[1], |
274 | dest->addr, dest->port, | 274 | dest->addr, dest->port, |
275 | 0, | 275 | IP_VS_CONN_F_TEMPLATE, |
276 | dest); | 276 | dest); |
277 | else | 277 | else |
278 | ct = ip_vs_conn_new(iph->protocol, | 278 | ct = ip_vs_conn_new(iph->protocol, |
279 | snet, 0, | 279 | snet, 0, |
280 | iph->daddr, 0, | 280 | iph->daddr, 0, |
281 | dest->addr, 0, | 281 | dest->addr, 0, |
282 | 0, | 282 | IP_VS_CONN_F_TEMPLATE, |
283 | dest); | 283 | dest); |
284 | if (ct == NULL) | 284 | if (ct == NULL) |
285 | return NULL; | 285 | return NULL; |
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
298 | * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> | 298 | * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> |
299 | */ | 299 | */ |
300 | if (svc->fwmark) | 300 | if (svc->fwmark) |
301 | ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, | 301 | ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, |
302 | htonl(svc->fwmark), 0); | 302 | htonl(svc->fwmark), 0); |
303 | else | 303 | else |
304 | ct = ip_vs_conn_in_get(iph->protocol, snet, 0, | 304 | ct = ip_vs_ct_in_get(iph->protocol, snet, 0, |
305 | iph->daddr, 0); | 305 | iph->daddr, 0); |
306 | 306 | ||
307 | if (!ct || !ip_vs_check_template(ct)) { | 307 | if (!ct || !ip_vs_check_template(ct)) { |
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
326 | snet, 0, | 326 | snet, 0, |
327 | htonl(svc->fwmark), 0, | 327 | htonl(svc->fwmark), 0, |
328 | dest->addr, 0, | 328 | dest->addr, 0, |
329 | 0, | 329 | IP_VS_CONN_F_TEMPLATE, |
330 | dest); | 330 | dest); |
331 | else | 331 | else |
332 | ct = ip_vs_conn_new(iph->protocol, | 332 | ct = ip_vs_conn_new(iph->protocol, |
333 | snet, 0, | 333 | snet, 0, |
334 | iph->daddr, 0, | 334 | iph->daddr, 0, |
335 | dest->addr, 0, | 335 | dest->addr, 0, |
336 | 0, | 336 | IP_VS_CONN_F_TEMPLATE, |
337 | dest); | 337 | dest); |
338 | if (ct == NULL) | 338 | if (ct == NULL) |
339 | return NULL; | 339 | return NULL; |
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 574d1f509b46..2e5ced3d8062 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c | |||
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
297 | 297 | ||
298 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | 298 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); |
299 | for (i=0; i<m->nr_conns; i++) { | 299 | for (i=0; i<m->nr_conns; i++) { |
300 | unsigned flags; | ||
301 | |||
300 | s = (struct ip_vs_sync_conn *)p; | 302 | s = (struct ip_vs_sync_conn *)p; |
301 | cp = ip_vs_conn_in_get(s->protocol, | 303 | flags = ntohs(s->flags); |
302 | s->caddr, s->cport, | 304 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
303 | s->vaddr, s->vport); | 305 | cp = ip_vs_conn_in_get(s->protocol, |
306 | s->caddr, s->cport, | ||
307 | s->vaddr, s->vport); | ||
308 | else | ||
309 | cp = ip_vs_ct_in_get(s->protocol, | ||
310 | s->caddr, s->cport, | ||
311 | s->vaddr, s->vport); | ||
304 | if (!cp) { | 312 | if (!cp) { |
305 | cp = ip_vs_conn_new(s->protocol, | 313 | cp = ip_vs_conn_new(s->protocol, |
306 | s->caddr, s->cport, | 314 | s->caddr, s->cport, |
307 | s->vaddr, s->vport, | 315 | s->vaddr, s->vport, |
308 | s->daddr, s->dport, | 316 | s->daddr, s->dport, |
309 | ntohs(s->flags), NULL); | 317 | flags, NULL); |
310 | if (!cp) { | 318 | if (!cp) { |
311 | IP_VS_ERR("ip_vs_conn_new failed\n"); | 319 | IP_VS_ERR("ip_vs_conn_new failed\n"); |
312 | return; | 320 | return; |
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
315 | } else if (!cp->dest) { | 323 | } else if (!cp->dest) { |
316 | /* it is an entry created by the synchronization */ | 324 | /* it is an entry created by the synchronization */ |
317 | cp->state = ntohs(s->state); | 325 | cp->state = ntohs(s->state); |
318 | cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; | 326 | cp->flags = flags | IP_VS_CONN_F_HASHED; |
319 | } /* Note that we don't touch its state and flags | 327 | } /* Note that we don't touch its state and flags |
320 | if it is a normal entry. */ | 328 | if it is a normal entry. */ |
321 | 329 | ||
322 | if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { | 330 | if (flags & IP_VS_CONN_F_SEQ_MASK) { |
323 | opt = (struct ip_vs_sync_conn_options *)&s[1]; | 331 | opt = (struct ip_vs_sync_conn_options *)&s[1]; |
324 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | 332 | memcpy(&cp->in_seq, opt, sizeof(*opt)); |
325 | p += FULL_CONN_SIZE; | 333 | p += FULL_CONN_SIZE; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30aa8e2ee214..3cf9b451675c 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS | |||
51 | 51 | ||
52 | IF unsure, say `N'. | 52 | IF unsure, say `N'. |
53 | 53 | ||
54 | config IP_NF_CONNTRACK_NETLINK | ||
55 | tristate 'Connection tracking netlink interface' | ||
56 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
57 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m | ||
58 | help | ||
59 | This option enables support for a netlink-based userspace interface | ||
60 | |||
61 | |||
54 | config IP_NF_CT_PROTO_SCTP | 62 | config IP_NF_CT_PROTO_SCTP |
55 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' | 63 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' |
56 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | 64 | depends on IP_NF_CONNTRACK && EXPERIMENTAL |
@@ -129,6 +137,22 @@ config IP_NF_AMANDA | |||
129 | 137 | ||
130 | To compile it as a module, choose M here. If unsure, say Y. | 138 | To compile it as a module, choose M here. If unsure, say Y. |
131 | 139 | ||
140 | config IP_NF_PPTP | ||
141 | tristate 'PPTP protocol support' | ||
142 | help | ||
143 | This module adds support for PPTP (Point to Point Tunnelling | ||
144 | Protocol, RFC2637) conncection tracking and NAT. | ||
145 | |||
146 | If you are running PPTP sessions over a stateful firewall or NAT | ||
147 | box, you may want to enable this feature. | ||
148 | |||
149 | Please note that not all PPTP modes of operation are supported yet. | ||
150 | For more info, read top of the file | ||
151 | net/ipv4/netfilter/ip_conntrack_pptp.c | ||
152 | |||
153 | If you want to compile it as a module, say M here and read | ||
154 | Documentation/modules.txt. If unsure, say `N'. | ||
155 | |||
132 | config IP_NF_QUEUE | 156 | config IP_NF_QUEUE |
133 | tristate "IP Userspace queueing via NETLINK (OBSOLETE)" | 157 | tristate "IP Userspace queueing via NETLINK (OBSOLETE)" |
134 | help | 158 | help |
@@ -613,6 +637,12 @@ config IP_NF_NAT_AMANDA | |||
613 | default IP_NF_NAT if IP_NF_AMANDA=y | 637 | default IP_NF_NAT if IP_NF_AMANDA=y |
614 | default m if IP_NF_AMANDA=m | 638 | default m if IP_NF_AMANDA=m |
615 | 639 | ||
640 | config IP_NF_NAT_PPTP | ||
641 | tristate | ||
642 | depends on IP_NF_NAT!=n && IP_NF_PPTP!=n | ||
643 | default IP_NF_NAT if IP_NF_PPTP=y | ||
644 | default m if IP_NF_PPTP=m | ||
645 | |||
616 | # mangle + specific targets | 646 | # mangle + specific targets |
617 | config IP_NF_MANGLE | 647 | config IP_NF_MANGLE |
618 | tristate "Packet mangling" | 648 | tristate "Packet mangling" |
@@ -774,11 +804,5 @@ config IP_NF_ARP_MANGLE | |||
774 | Allows altering the ARP packet payload: source and destination | 804 | Allows altering the ARP packet payload: source and destination |
775 | hardware and network addresses. | 805 | hardware and network addresses. |
776 | 806 | ||
777 | config IP_NF_CONNTRACK_NETLINK | ||
778 | tristate 'Connection tracking netlink interface' | ||
779 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
780 | help | ||
781 | This option enables support for a netlink-based userspace interface | ||
782 | |||
783 | endmenu | 807 | endmenu |
784 | 808 | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1ba0db746817..3d45d3c0283c 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -6,6 +6,9 @@ | |||
6 | ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o | 6 | ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o |
7 | iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o | 7 | iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o |
8 | 8 | ||
9 | ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o | ||
10 | ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o | ||
11 | |||
9 | # connection tracking | 12 | # connection tracking |
10 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o | 13 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o |
11 | 14 | ||
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o | |||
17 | obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o | 20 | obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o |
18 | 21 | ||
19 | # connection tracking helpers | 22 | # connection tracking helpers |
23 | obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o | ||
20 | obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o | 24 | obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o |
21 | obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o | 25 | obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o |
22 | obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o | 26 | obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o |
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o | |||
24 | obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o | 28 | obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o |
25 | 29 | ||
26 | # NAT helpers | 30 | # NAT helpers |
31 | obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o | ||
27 | obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o | 32 | obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o |
28 | obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o | 33 | obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o |
29 | obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o | 34 | obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19cba16e6e1e..c1f82e0c81cf 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) | |||
233 | 233 | ||
234 | /* Just find a expectation corresponding to a tuple. */ | 234 | /* Just find a expectation corresponding to a tuple. */ |
235 | struct ip_conntrack_expect * | 235 | struct ip_conntrack_expect * |
236 | ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) | 236 | ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) |
237 | { | 237 | { |
238 | struct ip_conntrack_expect *i; | 238 | struct ip_conntrack_expect *i; |
239 | 239 | ||
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, | |||
1143 | if (del_timer(&ct->timeout)) { | 1143 | if (del_timer(&ct->timeout)) { |
1144 | ct->timeout.expires = jiffies + extra_jiffies; | 1144 | ct->timeout.expires = jiffies + extra_jiffies; |
1145 | add_timer(&ct->timeout); | 1145 | add_timer(&ct->timeout); |
1146 | ip_conntrack_event_cache(IPCT_REFRESH, skb); | 1146 | /* FIXME: We loose some REFRESH events if this function |
1147 | * is called without an skb. I'll fix this later -HW */ | ||
1148 | if (skb) | ||
1149 | ip_conntrack_event_cache(IPCT_REFRESH, skb); | ||
1147 | } | 1150 | } |
1148 | ct_add_counters(ct, ctinfo, skb); | 1151 | ct_add_counters(ct, ctinfo, skb); |
1149 | write_unlock_bh(&ip_conntrack_lock); | 1152 | write_unlock_bh(&ip_conntrack_lock); |
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c new file mode 100644 index 000000000000..79db5b70d5f6 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c | |||
@@ -0,0 +1,805 @@ | |||
1 | /* | ||
2 | * ip_conntrack_pptp.c - Version 3.0 | ||
3 | * | ||
4 | * Connection tracking support for PPTP (Point to Point Tunneling Protocol). | ||
5 | * PPTP is a a protocol for creating virtual private networks. | ||
6 | * It is a specification defined by Microsoft and some vendors | ||
7 | * working with Microsoft. PPTP is built on top of a modified | ||
8 | * version of the Internet Generic Routing Encapsulation Protocol. | ||
9 | * GRE is defined in RFC 1701 and RFC 1702. Documentation of | ||
10 | * PPTP can be found in RFC 2637 | ||
11 | * | ||
12 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
13 | * | ||
14 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
15 | * | ||
16 | * Limitations: | ||
17 | * - We blindly assume that control connections are always | ||
18 | * established in PNS->PAC direction. This is a violation | ||
19 | * of RFFC2673 | ||
20 | * - We can only support one single call within each session | ||
21 | * | ||
22 | * TODO: | ||
23 | * - testing of incoming PPTP calls | ||
24 | * | ||
25 | * Changes: | ||
26 | * 2002-02-05 - Version 1.3 | ||
27 | * - Call ip_conntrack_unexpect_related() from | ||
28 | * pptp_destroy_siblings() to destroy expectations in case | ||
29 | * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen | ||
30 | * (Philip Craig <philipc@snapgear.com>) | ||
31 | * - Add Version information at module loadtime | ||
32 | * 2002-02-10 - Version 1.6 | ||
33 | * - move to C99 style initializers | ||
34 | * - remove second expectation if first arrives | ||
35 | * 2004-10-22 - Version 2.0 | ||
36 | * - merge Mandrake's 2.6.x port with recent 2.6.x API changes | ||
37 | * - fix lots of linear skb assumptions from Mandrake's port | ||
38 | * 2005-06-10 - Version 2.1 | ||
39 | * - use ip_conntrack_expect_free() instead of kfree() on the | ||
40 | * expect's (which are from the slab for quite some time) | ||
41 | * 2005-06-10 - Version 3.0 | ||
42 | * - port helper to post-2.6.11 API changes, | ||
43 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) | ||
44 | * 2005-07-30 - Version 3.1 | ||
45 | * - port helper to 2.6.13 API changes | ||
46 | * | ||
47 | */ | ||
48 | |||
49 | #include <linux/config.h> | ||
50 | #include <linux/module.h> | ||
51 | #include <linux/netfilter.h> | ||
52 | #include <linux/ip.h> | ||
53 | #include <net/checksum.h> | ||
54 | #include <net/tcp.h> | ||
55 | |||
56 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
57 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
58 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
59 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
60 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
61 | |||
62 | #define IP_CT_PPTP_VERSION "3.1" | ||
63 | |||
64 | MODULE_LICENSE("GPL"); | ||
65 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
66 | MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); | ||
67 | |||
68 | static DEFINE_SPINLOCK(ip_pptp_lock); | ||
69 | |||
70 | int | ||
71 | (*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, | ||
72 | struct ip_conntrack *ct, | ||
73 | enum ip_conntrack_info ctinfo, | ||
74 | struct PptpControlHeader *ctlh, | ||
75 | union pptp_ctrl_union *pptpReq); | ||
76 | |||
77 | int | ||
78 | (*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, | ||
79 | struct ip_conntrack *ct, | ||
80 | enum ip_conntrack_info ctinfo, | ||
81 | struct PptpControlHeader *ctlh, | ||
82 | union pptp_ctrl_union *pptpReq); | ||
83 | |||
84 | int | ||
85 | (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, | ||
86 | struct ip_conntrack_expect *expect_reply); | ||
87 | |||
88 | void | ||
89 | (*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, | ||
90 | struct ip_conntrack_expect *exp); | ||
91 | |||
92 | #if 0 | ||
93 | /* PptpControlMessageType names */ | ||
94 | const char *pptp_msg_name[] = { | ||
95 | "UNKNOWN_MESSAGE", | ||
96 | "START_SESSION_REQUEST", | ||
97 | "START_SESSION_REPLY", | ||
98 | "STOP_SESSION_REQUEST", | ||
99 | "STOP_SESSION_REPLY", | ||
100 | "ECHO_REQUEST", | ||
101 | "ECHO_REPLY", | ||
102 | "OUT_CALL_REQUEST", | ||
103 | "OUT_CALL_REPLY", | ||
104 | "IN_CALL_REQUEST", | ||
105 | "IN_CALL_REPLY", | ||
106 | "IN_CALL_CONNECT", | ||
107 | "CALL_CLEAR_REQUEST", | ||
108 | "CALL_DISCONNECT_NOTIFY", | ||
109 | "WAN_ERROR_NOTIFY", | ||
110 | "SET_LINK_INFO" | ||
111 | }; | ||
112 | EXPORT_SYMBOL(pptp_msg_name); | ||
113 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) | ||
114 | #else | ||
115 | #define DEBUGP(format, args...) | ||
116 | #endif | ||
117 | |||
118 | #define SECS *HZ | ||
119 | #define MINS * 60 SECS | ||
120 | #define HOURS * 60 MINS | ||
121 | |||
122 | #define PPTP_GRE_TIMEOUT (10 MINS) | ||
123 | #define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) | ||
124 | |||
125 | static void pptp_expectfn(struct ip_conntrack *ct, | ||
126 | struct ip_conntrack_expect *exp) | ||
127 | { | ||
128 | DEBUGP("increasing timeouts\n"); | ||
129 | |||
130 | /* increase timeout of GRE data channel conntrack entry */ | ||
131 | ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; | ||
132 | ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; | ||
133 | |||
134 | /* Can you see how rusty this code is, compared with the pre-2.6.11 | ||
135 | * one? That's what happened to my shiny newnat of 2002 ;( -HW */ | ||
136 | |||
137 | if (!ip_nat_pptp_hook_expectfn) { | ||
138 | struct ip_conntrack_tuple inv_t; | ||
139 | struct ip_conntrack_expect *exp_other; | ||
140 | |||
141 | /* obviously this tuple inversion only works until you do NAT */ | ||
142 | invert_tuplepr(&inv_t, &exp->tuple); | ||
143 | DEBUGP("trying to unexpect other dir: "); | ||
144 | DUMP_TUPLE(&inv_t); | ||
145 | |||
146 | exp_other = ip_conntrack_expect_find(&inv_t); | ||
147 | if (exp_other) { | ||
148 | /* delete other expectation. */ | ||
149 | DEBUGP("found\n"); | ||
150 | ip_conntrack_unexpect_related(exp_other); | ||
151 | ip_conntrack_expect_put(exp_other); | ||
152 | } else { | ||
153 | DEBUGP("not found\n"); | ||
154 | } | ||
155 | } else { | ||
156 | /* we need more than simple inversion */ | ||
157 | ip_nat_pptp_hook_expectfn(ct, exp); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) | ||
162 | { | ||
163 | struct ip_conntrack_tuple_hash *h; | ||
164 | struct ip_conntrack_expect *exp; | ||
165 | |||
166 | DEBUGP("trying to timeout ct or exp for tuple "); | ||
167 | DUMP_TUPLE(t); | ||
168 | |||
169 | h = ip_conntrack_find_get(t, NULL); | ||
170 | if (h) { | ||
171 | struct ip_conntrack *sibling = tuplehash_to_ctrack(h); | ||
172 | DEBUGP("setting timeout of conntrack %p to 0\n", sibling); | ||
173 | sibling->proto.gre.timeout = 0; | ||
174 | sibling->proto.gre.stream_timeout = 0; | ||
175 | /* refresh_acct will not modify counters if skb == NULL */ | ||
176 | if (del_timer(&sibling->timeout)) | ||
177 | sibling->timeout.function((unsigned long)sibling); | ||
178 | ip_conntrack_put(sibling); | ||
179 | return 1; | ||
180 | } else { | ||
181 | exp = ip_conntrack_expect_find(t); | ||
182 | if (exp) { | ||
183 | DEBUGP("unexpect_related of expect %p\n", exp); | ||
184 | ip_conntrack_unexpect_related(exp); | ||
185 | ip_conntrack_expect_put(exp); | ||
186 | return 1; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | |||
194 | /* timeout GRE data connections */ | ||
195 | static void pptp_destroy_siblings(struct ip_conntrack *ct) | ||
196 | { | ||
197 | struct ip_conntrack_tuple t; | ||
198 | |||
199 | /* Since ct->sibling_list has literally rusted away in 2.6.11, | ||
200 | * we now need another way to find out about our sibling | ||
201 | * contrack and expects... -HW */ | ||
202 | |||
203 | /* try original (pns->pac) tuple */ | ||
204 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); | ||
205 | t.dst.protonum = IPPROTO_GRE; | ||
206 | t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); | ||
207 | t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); | ||
208 | |||
209 | if (!destroy_sibling_or_exp(&t)) | ||
210 | DEBUGP("failed to timeout original pns->pac ct/exp\n"); | ||
211 | |||
212 | /* try reply (pac->pns) tuple */ | ||
213 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); | ||
214 | t.dst.protonum = IPPROTO_GRE; | ||
215 | t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); | ||
216 | t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); | ||
217 | |||
218 | if (!destroy_sibling_or_exp(&t)) | ||
219 | DEBUGP("failed to timeout reply pac->pns ct/exp\n"); | ||
220 | } | ||
221 | |||
222 | /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ | ||
223 | static inline int | ||
224 | exp_gre(struct ip_conntrack *master, | ||
225 | u_int32_t seq, | ||
226 | u_int16_t callid, | ||
227 | u_int16_t peer_callid) | ||
228 | { | ||
229 | struct ip_conntrack_tuple inv_tuple; | ||
230 | struct ip_conntrack_tuple exp_tuples[] = { | ||
231 | /* tuple in original direction, PNS->PAC */ | ||
232 | { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, | ||
233 | .u = { .gre = { .key = peer_callid } } | ||
234 | }, | ||
235 | .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, | ||
236 | .u = { .gre = { .key = callid } }, | ||
237 | .protonum = IPPROTO_GRE | ||
238 | }, | ||
239 | }, | ||
240 | /* tuple in reply direction, PAC->PNS */ | ||
241 | { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, | ||
242 | .u = { .gre = { .key = callid } } | ||
243 | }, | ||
244 | .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, | ||
245 | .u = { .gre = { .key = peer_callid } }, | ||
246 | .protonum = IPPROTO_GRE | ||
247 | }, | ||
248 | } | ||
249 | }; | ||
250 | struct ip_conntrack_expect *exp_orig, *exp_reply; | ||
251 | int ret = 1; | ||
252 | |||
253 | exp_orig = ip_conntrack_expect_alloc(master); | ||
254 | if (exp_orig == NULL) | ||
255 | goto out; | ||
256 | |||
257 | exp_reply = ip_conntrack_expect_alloc(master); | ||
258 | if (exp_reply == NULL) | ||
259 | goto out_put_orig; | ||
260 | |||
261 | memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); | ||
262 | |||
263 | exp_orig->mask.src.ip = 0xffffffff; | ||
264 | exp_orig->mask.src.u.all = 0; | ||
265 | exp_orig->mask.dst.u.all = 0; | ||
266 | exp_orig->mask.dst.u.gre.key = 0xffff; | ||
267 | exp_orig->mask.dst.ip = 0xffffffff; | ||
268 | exp_orig->mask.dst.protonum = 0xff; | ||
269 | |||
270 | exp_orig->master = master; | ||
271 | exp_orig->expectfn = pptp_expectfn; | ||
272 | exp_orig->flags = 0; | ||
273 | |||
274 | exp_orig->dir = IP_CT_DIR_ORIGINAL; | ||
275 | |||
276 | /* both expectations are identical apart from tuple */ | ||
277 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | ||
278 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); | ||
279 | |||
280 | exp_reply->dir = !exp_orig->dir; | ||
281 | |||
282 | if (ip_nat_pptp_hook_exp_gre) | ||
283 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); | ||
284 | else { | ||
285 | |||
286 | DEBUGP("calling expect_related PNS->PAC"); | ||
287 | DUMP_TUPLE(&exp_orig->tuple); | ||
288 | |||
289 | if (ip_conntrack_expect_related(exp_orig) != 0) { | ||
290 | DEBUGP("cannot expect_related()\n"); | ||
291 | goto out_put_both; | ||
292 | } | ||
293 | |||
294 | DEBUGP("calling expect_related PAC->PNS"); | ||
295 | DUMP_TUPLE(&exp_reply->tuple); | ||
296 | |||
297 | if (ip_conntrack_expect_related(exp_reply) != 0) { | ||
298 | DEBUGP("cannot expect_related()\n"); | ||
299 | goto out_unexpect_orig; | ||
300 | } | ||
301 | |||
302 | /* Add GRE keymap entries */ | ||
303 | if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { | ||
304 | DEBUGP("cannot keymap_add() exp\n"); | ||
305 | goto out_unexpect_both; | ||
306 | } | ||
307 | |||
308 | invert_tuplepr(&inv_tuple, &exp_reply->tuple); | ||
309 | if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { | ||
310 | ip_ct_gre_keymap_destroy(master); | ||
311 | DEBUGP("cannot keymap_add() exp_inv\n"); | ||
312 | goto out_unexpect_both; | ||
313 | } | ||
314 | ret = 0; | ||
315 | } | ||
316 | |||
317 | out_put_both: | ||
318 | ip_conntrack_expect_put(exp_reply); | ||
319 | out_put_orig: | ||
320 | ip_conntrack_expect_put(exp_orig); | ||
321 | out: | ||
322 | return ret; | ||
323 | |||
324 | out_unexpect_both: | ||
325 | ip_conntrack_unexpect_related(exp_reply); | ||
326 | out_unexpect_orig: | ||
327 | ip_conntrack_unexpect_related(exp_orig); | ||
328 | goto out_put_both; | ||
329 | } | ||
330 | |||
331 | static inline int | ||
332 | pptp_inbound_pkt(struct sk_buff **pskb, | ||
333 | struct tcphdr *tcph, | ||
334 | unsigned int nexthdr_off, | ||
335 | unsigned int datalen, | ||
336 | struct ip_conntrack *ct, | ||
337 | enum ip_conntrack_info ctinfo) | ||
338 | { | ||
339 | struct PptpControlHeader _ctlh, *ctlh; | ||
340 | unsigned int reqlen; | ||
341 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
342 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
343 | u_int16_t msg, *cid, *pcid; | ||
344 | u_int32_t seq; | ||
345 | |||
346 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
347 | if (!ctlh) { | ||
348 | DEBUGP("error during skb_header_pointer\n"); | ||
349 | return NF_ACCEPT; | ||
350 | } | ||
351 | nexthdr_off += sizeof(_ctlh); | ||
352 | datalen -= sizeof(_ctlh); | ||
353 | |||
354 | reqlen = datalen; | ||
355 | if (reqlen > sizeof(*pptpReq)) | ||
356 | reqlen = sizeof(*pptpReq); | ||
357 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
358 | if (!pptpReq) { | ||
359 | DEBUGP("error during skb_header_pointer\n"); | ||
360 | return NF_ACCEPT; | ||
361 | } | ||
362 | |||
363 | msg = ntohs(ctlh->messageType); | ||
364 | DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); | ||
365 | |||
366 | switch (msg) { | ||
367 | case PPTP_START_SESSION_REPLY: | ||
368 | if (reqlen < sizeof(_pptpReq.srep)) { | ||
369 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
370 | break; | ||
371 | } | ||
372 | |||
373 | /* server confirms new control session */ | ||
374 | if (info->sstate < PPTP_SESSION_REQUESTED) { | ||
375 | DEBUGP("%s without START_SESS_REQUEST\n", | ||
376 | pptp_msg_name[msg]); | ||
377 | break; | ||
378 | } | ||
379 | if (pptpReq->srep.resultCode == PPTP_START_OK) | ||
380 | info->sstate = PPTP_SESSION_CONFIRMED; | ||
381 | else | ||
382 | info->sstate = PPTP_SESSION_ERROR; | ||
383 | break; | ||
384 | |||
385 | case PPTP_STOP_SESSION_REPLY: | ||
386 | if (reqlen < sizeof(_pptpReq.strep)) { | ||
387 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
388 | break; | ||
389 | } | ||
390 | |||
391 | /* server confirms end of control session */ | ||
392 | if (info->sstate > PPTP_SESSION_STOPREQ) { | ||
393 | DEBUGP("%s without STOP_SESS_REQUEST\n", | ||
394 | pptp_msg_name[msg]); | ||
395 | break; | ||
396 | } | ||
397 | if (pptpReq->strep.resultCode == PPTP_STOP_OK) | ||
398 | info->sstate = PPTP_SESSION_NONE; | ||
399 | else | ||
400 | info->sstate = PPTP_SESSION_ERROR; | ||
401 | break; | ||
402 | |||
403 | case PPTP_OUT_CALL_REPLY: | ||
404 | if (reqlen < sizeof(_pptpReq.ocack)) { | ||
405 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
406 | break; | ||
407 | } | ||
408 | |||
409 | /* server accepted call, we now expect GRE frames */ | ||
410 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | ||
411 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | ||
412 | break; | ||
413 | } | ||
414 | if (info->cstate != PPTP_CALL_OUT_REQ && | ||
415 | info->cstate != PPTP_CALL_OUT_CONF) { | ||
416 | DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); | ||
417 | break; | ||
418 | } | ||
419 | if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { | ||
420 | info->cstate = PPTP_CALL_NONE; | ||
421 | break; | ||
422 | } | ||
423 | |||
424 | cid = &pptpReq->ocack.callID; | ||
425 | pcid = &pptpReq->ocack.peersCallID; | ||
426 | |||
427 | info->pac_call_id = ntohs(*cid); | ||
428 | |||
429 | if (htons(info->pns_call_id) != *pcid) { | ||
430 | DEBUGP("%s for unknown callid %u\n", | ||
431 | pptp_msg_name[msg], ntohs(*pcid)); | ||
432 | break; | ||
433 | } | ||
434 | |||
435 | DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], | ||
436 | ntohs(*cid), ntohs(*pcid)); | ||
437 | |||
438 | info->cstate = PPTP_CALL_OUT_CONF; | ||
439 | |||
440 | seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) | ||
441 | + sizeof(struct PptpControlHeader) | ||
442 | + ((void *)pcid - (void *)pptpReq); | ||
443 | |||
444 | if (exp_gre(ct, seq, *cid, *pcid) != 0) | ||
445 | printk("ip_conntrack_pptp: error during exp_gre\n"); | ||
446 | break; | ||
447 | |||
448 | case PPTP_IN_CALL_REQUEST: | ||
449 | if (reqlen < sizeof(_pptpReq.icack)) { | ||
450 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
451 | break; | ||
452 | } | ||
453 | |||
454 | /* server tells us about incoming call request */ | ||
455 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | ||
456 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | ||
457 | break; | ||
458 | } | ||
459 | pcid = &pptpReq->icack.peersCallID; | ||
460 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | ||
461 | info->cstate = PPTP_CALL_IN_REQ; | ||
462 | info->pac_call_id = ntohs(*pcid); | ||
463 | break; | ||
464 | |||
465 | case PPTP_IN_CALL_CONNECT: | ||
466 | if (reqlen < sizeof(_pptpReq.iccon)) { | ||
467 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
468 | break; | ||
469 | } | ||
470 | |||
471 | /* server tells us about incoming call established */ | ||
472 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | ||
473 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | ||
474 | break; | ||
475 | } | ||
476 | if (info->sstate != PPTP_CALL_IN_REP | ||
477 | && info->sstate != PPTP_CALL_IN_CONF) { | ||
478 | DEBUGP("%s but never sent IN_CALL_REPLY\n", | ||
479 | pptp_msg_name[msg]); | ||
480 | break; | ||
481 | } | ||
482 | |||
483 | pcid = &pptpReq->iccon.peersCallID; | ||
484 | cid = &info->pac_call_id; | ||
485 | |||
486 | if (info->pns_call_id != ntohs(*pcid)) { | ||
487 | DEBUGP("%s for unknown CallID %u\n", | ||
488 | pptp_msg_name[msg], ntohs(*cid)); | ||
489 | break; | ||
490 | } | ||
491 | |||
492 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | ||
493 | info->cstate = PPTP_CALL_IN_CONF; | ||
494 | |||
495 | /* we expect a GRE connection from PAC to PNS */ | ||
496 | seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) | ||
497 | + sizeof(struct PptpControlHeader) | ||
498 | + ((void *)pcid - (void *)pptpReq); | ||
499 | |||
500 | if (exp_gre(ct, seq, *cid, *pcid) != 0) | ||
501 | printk("ip_conntrack_pptp: error during exp_gre\n"); | ||
502 | |||
503 | break; | ||
504 | |||
505 | case PPTP_CALL_DISCONNECT_NOTIFY: | ||
506 | if (reqlen < sizeof(_pptpReq.disc)) { | ||
507 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
508 | break; | ||
509 | } | ||
510 | |||
511 | /* server confirms disconnect */ | ||
512 | cid = &pptpReq->disc.callID; | ||
513 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); | ||
514 | info->cstate = PPTP_CALL_NONE; | ||
515 | |||
516 | /* untrack this call id, unexpect GRE packets */ | ||
517 | pptp_destroy_siblings(ct); | ||
518 | break; | ||
519 | |||
520 | case PPTP_WAN_ERROR_NOTIFY: | ||
521 | break; | ||
522 | |||
523 | case PPTP_ECHO_REQUEST: | ||
524 | case PPTP_ECHO_REPLY: | ||
525 | /* I don't have to explain these ;) */ | ||
526 | break; | ||
527 | default: | ||
528 | DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) | ||
529 | ? pptp_msg_name[msg]:pptp_msg_name[0], msg); | ||
530 | break; | ||
531 | } | ||
532 | |||
533 | |||
534 | if (ip_nat_pptp_hook_inbound) | ||
535 | return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, | ||
536 | pptpReq); | ||
537 | |||
538 | return NF_ACCEPT; | ||
539 | |||
540 | } | ||
541 | |||
542 | static inline int | ||
543 | pptp_outbound_pkt(struct sk_buff **pskb, | ||
544 | struct tcphdr *tcph, | ||
545 | unsigned int nexthdr_off, | ||
546 | unsigned int datalen, | ||
547 | struct ip_conntrack *ct, | ||
548 | enum ip_conntrack_info ctinfo) | ||
549 | { | ||
550 | struct PptpControlHeader _ctlh, *ctlh; | ||
551 | unsigned int reqlen; | ||
552 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
553 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
554 | u_int16_t msg, *cid, *pcid; | ||
555 | |||
556 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
557 | if (!ctlh) | ||
558 | return NF_ACCEPT; | ||
559 | nexthdr_off += sizeof(_ctlh); | ||
560 | datalen -= sizeof(_ctlh); | ||
561 | |||
562 | reqlen = datalen; | ||
563 | if (reqlen > sizeof(*pptpReq)) | ||
564 | reqlen = sizeof(*pptpReq); | ||
565 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
566 | if (!pptpReq) | ||
567 | return NF_ACCEPT; | ||
568 | |||
569 | msg = ntohs(ctlh->messageType); | ||
570 | DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); | ||
571 | |||
572 | switch (msg) { | ||
573 | case PPTP_START_SESSION_REQUEST: | ||
574 | /* client requests for new control session */ | ||
575 | if (info->sstate != PPTP_SESSION_NONE) { | ||
576 | DEBUGP("%s but we already have one", | ||
577 | pptp_msg_name[msg]); | ||
578 | } | ||
579 | info->sstate = PPTP_SESSION_REQUESTED; | ||
580 | break; | ||
581 | case PPTP_STOP_SESSION_REQUEST: | ||
582 | /* client requests end of control session */ | ||
583 | info->sstate = PPTP_SESSION_STOPREQ; | ||
584 | break; | ||
585 | |||
586 | case PPTP_OUT_CALL_REQUEST: | ||
587 | if (reqlen < sizeof(_pptpReq.ocreq)) { | ||
588 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
589 | /* FIXME: break; */ | ||
590 | } | ||
591 | |||
592 | /* client initiating connection to server */ | ||
593 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | ||
594 | DEBUGP("%s but no session\n", | ||
595 | pptp_msg_name[msg]); | ||
596 | break; | ||
597 | } | ||
598 | info->cstate = PPTP_CALL_OUT_REQ; | ||
599 | /* track PNS call id */ | ||
600 | cid = &pptpReq->ocreq.callID; | ||
601 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); | ||
602 | info->pns_call_id = ntohs(*cid); | ||
603 | break; | ||
604 | case PPTP_IN_CALL_REPLY: | ||
605 | if (reqlen < sizeof(_pptpReq.icack)) { | ||
606 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
607 | break; | ||
608 | } | ||
609 | |||
610 | /* client answers incoming call */ | ||
611 | if (info->cstate != PPTP_CALL_IN_REQ | ||
612 | && info->cstate != PPTP_CALL_IN_REP) { | ||
613 | DEBUGP("%s without incall_req\n", | ||
614 | pptp_msg_name[msg]); | ||
615 | break; | ||
616 | } | ||
617 | if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { | ||
618 | info->cstate = PPTP_CALL_NONE; | ||
619 | break; | ||
620 | } | ||
621 | pcid = &pptpReq->icack.peersCallID; | ||
622 | if (info->pac_call_id != ntohs(*pcid)) { | ||
623 | DEBUGP("%s for unknown call %u\n", | ||
624 | pptp_msg_name[msg], ntohs(*pcid)); | ||
625 | break; | ||
626 | } | ||
627 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | ||
628 | /* part two of the three-way handshake */ | ||
629 | info->cstate = PPTP_CALL_IN_REP; | ||
630 | info->pns_call_id = ntohs(pptpReq->icack.callID); | ||
631 | break; | ||
632 | |||
633 | case PPTP_CALL_CLEAR_REQUEST: | ||
634 | /* client requests hangup of call */ | ||
635 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | ||
636 | DEBUGP("CLEAR_CALL but no session\n"); | ||
637 | break; | ||
638 | } | ||
639 | /* FUTURE: iterate over all calls and check if | ||
640 | * call ID is valid. We don't do this without newnat, | ||
641 | * because we only know about last call */ | ||
642 | info->cstate = PPTP_CALL_CLEAR_REQ; | ||
643 | break; | ||
644 | case PPTP_SET_LINK_INFO: | ||
645 | break; | ||
646 | case PPTP_ECHO_REQUEST: | ||
647 | case PPTP_ECHO_REPLY: | ||
648 | /* I don't have to explain these ;) */ | ||
649 | break; | ||
650 | default: | ||
651 | DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? | ||
652 | pptp_msg_name[msg]:pptp_msg_name[0], msg); | ||
653 | /* unknown: no need to create GRE masq table entry */ | ||
654 | break; | ||
655 | } | ||
656 | |||
657 | if (ip_nat_pptp_hook_outbound) | ||
658 | return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, | ||
659 | pptpReq); | ||
660 | |||
661 | return NF_ACCEPT; | ||
662 | } | ||
663 | |||
664 | |||
665 | /* track caller id inside control connection, call expect_related */ | ||
666 | static int | ||
667 | conntrack_pptp_help(struct sk_buff **pskb, | ||
668 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | ||
669 | |||
670 | { | ||
671 | struct pptp_pkt_hdr _pptph, *pptph; | ||
672 | struct tcphdr _tcph, *tcph; | ||
673 | u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; | ||
674 | u_int32_t datalen; | ||
675 | int dir = CTINFO2DIR(ctinfo); | ||
676 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | ||
677 | unsigned int nexthdr_off; | ||
678 | |||
679 | int oldsstate, oldcstate; | ||
680 | int ret; | ||
681 | |||
682 | /* don't do any tracking before tcp handshake complete */ | ||
683 | if (ctinfo != IP_CT_ESTABLISHED | ||
684 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | ||
685 | DEBUGP("ctinfo = %u, skipping\n", ctinfo); | ||
686 | return NF_ACCEPT; | ||
687 | } | ||
688 | |||
689 | nexthdr_off = (*pskb)->nh.iph->ihl*4; | ||
690 | tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); | ||
691 | BUG_ON(!tcph); | ||
692 | nexthdr_off += tcph->doff * 4; | ||
693 | datalen = tcplen - tcph->doff * 4; | ||
694 | |||
695 | if (tcph->fin || tcph->rst) { | ||
696 | DEBUGP("RST/FIN received, timeouting GRE\n"); | ||
697 | /* can't do this after real newnat */ | ||
698 | info->cstate = PPTP_CALL_NONE; | ||
699 | |||
700 | /* untrack this call id, unexpect GRE packets */ | ||
701 | pptp_destroy_siblings(ct); | ||
702 | } | ||
703 | |||
704 | pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); | ||
705 | if (!pptph) { | ||
706 | DEBUGP("no full PPTP header, can't track\n"); | ||
707 | return NF_ACCEPT; | ||
708 | } | ||
709 | nexthdr_off += sizeof(_pptph); | ||
710 | datalen -= sizeof(_pptph); | ||
711 | |||
712 | /* if it's not a control message we can't do anything with it */ | ||
713 | if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || | ||
714 | ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { | ||
715 | DEBUGP("not a control packet\n"); | ||
716 | return NF_ACCEPT; | ||
717 | } | ||
718 | |||
719 | oldsstate = info->sstate; | ||
720 | oldcstate = info->cstate; | ||
721 | |||
722 | spin_lock_bh(&ip_pptp_lock); | ||
723 | |||
724 | /* FIXME: We just blindly assume that the control connection is always | ||
725 | * established from PNS->PAC. However, RFC makes no guarantee */ | ||
726 | if (dir == IP_CT_DIR_ORIGINAL) | ||
727 | /* client -> server (PNS -> PAC) */ | ||
728 | ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, | ||
729 | ctinfo); | ||
730 | else | ||
731 | /* server -> client (PAC -> PNS) */ | ||
732 | ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, | ||
733 | ctinfo); | ||
734 | DEBUGP("sstate: %d->%d, cstate: %d->%d\n", | ||
735 | oldsstate, info->sstate, oldcstate, info->cstate); | ||
736 | spin_unlock_bh(&ip_pptp_lock); | ||
737 | |||
738 | return ret; | ||
739 | } | ||
740 | |||
741 | /* control protocol helper */ | ||
742 | static struct ip_conntrack_helper pptp = { | ||
743 | .list = { NULL, NULL }, | ||
744 | .name = "pptp", | ||
745 | .me = THIS_MODULE, | ||
746 | .max_expected = 2, | ||
747 | .timeout = 5 * 60, | ||
748 | .tuple = { .src = { .ip = 0, | ||
749 | .u = { .tcp = { .port = | ||
750 | __constant_htons(PPTP_CONTROL_PORT) } } | ||
751 | }, | ||
752 | .dst = { .ip = 0, | ||
753 | .u = { .all = 0 }, | ||
754 | .protonum = IPPROTO_TCP | ||
755 | } | ||
756 | }, | ||
757 | .mask = { .src = { .ip = 0, | ||
758 | .u = { .tcp = { .port = 0xffff } } | ||
759 | }, | ||
760 | .dst = { .ip = 0, | ||
761 | .u = { .all = 0 }, | ||
762 | .protonum = 0xff | ||
763 | } | ||
764 | }, | ||
765 | .help = conntrack_pptp_help | ||
766 | }; | ||
767 | |||
768 | extern void __exit ip_ct_proto_gre_fini(void); | ||
769 | extern int __init ip_ct_proto_gre_init(void); | ||
770 | |||
771 | /* ip_conntrack_pptp initialization */ | ||
772 | static int __init init(void) | ||
773 | { | ||
774 | int retcode; | ||
775 | |||
776 | retcode = ip_ct_proto_gre_init(); | ||
777 | if (retcode < 0) | ||
778 | return retcode; | ||
779 | |||
780 | DEBUGP(" registering helper\n"); | ||
781 | if ((retcode = ip_conntrack_helper_register(&pptp))) { | ||
782 | printk(KERN_ERR "Unable to register conntrack application " | ||
783 | "helper for pptp: %d\n", retcode); | ||
784 | ip_ct_proto_gre_fini(); | ||
785 | return retcode; | ||
786 | } | ||
787 | |||
788 | printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); | ||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | static void __exit fini(void) | ||
793 | { | ||
794 | ip_conntrack_helper_unregister(&pptp); | ||
795 | ip_ct_proto_gre_fini(); | ||
796 | printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); | ||
797 | } | ||
798 | |||
799 | module_init(init); | ||
800 | module_exit(fini); | ||
801 | |||
802 | EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); | ||
803 | EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); | ||
804 | EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); | ||
805 | EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 15aef3564742..b08a432efcf8 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | |||
1270 | if (err < 0) | 1270 | if (err < 0) |
1271 | return err; | 1271 | return err; |
1272 | 1272 | ||
1273 | exp = ip_conntrack_expect_find_get(&tuple); | 1273 | exp = ip_conntrack_expect_find(&tuple); |
1274 | if (!exp) | 1274 | if (!exp) |
1275 | return -ENOENT; | 1275 | return -ENOENT; |
1276 | 1276 | ||
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
1318 | return err; | 1318 | return err; |
1319 | 1319 | ||
1320 | /* bump usage count to 2 */ | 1320 | /* bump usage count to 2 */ |
1321 | exp = ip_conntrack_expect_find_get(&tuple); | 1321 | exp = ip_conntrack_expect_find(&tuple); |
1322 | if (!exp) | 1322 | if (!exp) |
1323 | return -ENOENT; | 1323 | return -ENOENT; |
1324 | 1324 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c new file mode 100644 index 000000000000..de3cb9db6f85 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c | |||
@@ -0,0 +1,327 @@ | |||
1 | /* | ||
2 | * ip_conntrack_proto_gre.c - Version 3.0 | ||
3 | * | ||
4 | * Connection tracking protocol helper module for GRE. | ||
5 | * | ||
6 | * GRE is a generic encapsulation protocol, which is generally not very | ||
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | ||
8 | * | ||
9 | * It has an optional key field, which may help us distinguishing two | ||
10 | * connections between the same two hosts. | ||
11 | * | ||
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | ||
13 | * | ||
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | ||
15 | * field called "CallID", which serves us for the same purpose as the key | ||
16 | * field in plain GRE. | ||
17 | * | ||
18 | * Documentation about PPTP can be found in RFC 2637 | ||
19 | * | ||
20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
21 | * | ||
22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/timer.h> | ||
30 | #include <linux/netfilter.h> | ||
31 | #include <linux/ip.h> | ||
32 | #include <linux/in.h> | ||
33 | #include <linux/list.h> | ||
34 | |||
35 | static DEFINE_RWLOCK(ip_ct_gre_lock); | ||
36 | #define ASSERT_READ_LOCK(x) | ||
37 | #define ASSERT_WRITE_LOCK(x) | ||
38 | |||
39 | #include <linux/netfilter_ipv4/listhelp.h> | ||
40 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
41 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
42 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
43 | |||
44 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
45 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
46 | |||
47 | MODULE_LICENSE("GPL"); | ||
48 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
49 | MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); | ||
50 | |||
51 | /* shamelessly stolen from ip_conntrack_proto_udp.c */ | ||
52 | #define GRE_TIMEOUT (30*HZ) | ||
53 | #define GRE_STREAM_TIMEOUT (180*HZ) | ||
54 | |||
55 | #if 0 | ||
56 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) | ||
57 | #define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ | ||
58 | NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ | ||
59 | NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) | ||
60 | #else | ||
61 | #define DEBUGP(x, args...) | ||
62 | #define DUMP_TUPLE_GRE(x) | ||
63 | #endif | ||
64 | |||
65 | /* GRE KEYMAP HANDLING FUNCTIONS */ | ||
66 | static LIST_HEAD(gre_keymap_list); | ||
67 | |||
68 | static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, | ||
69 | const struct ip_conntrack_tuple *t) | ||
70 | { | ||
71 | return ((km->tuple.src.ip == t->src.ip) && | ||
72 | (km->tuple.dst.ip == t->dst.ip) && | ||
73 | (km->tuple.dst.protonum == t->dst.protonum) && | ||
74 | (km->tuple.dst.u.all == t->dst.u.all)); | ||
75 | } | ||
76 | |||
77 | /* look up the source key for a given tuple */ | ||
78 | static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) | ||
79 | { | ||
80 | struct ip_ct_gre_keymap *km; | ||
81 | u_int32_t key = 0; | ||
82 | |||
83 | read_lock_bh(&ip_ct_gre_lock); | ||
84 | km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, | ||
85 | struct ip_ct_gre_keymap *, t); | ||
86 | if (km) | ||
87 | key = km->tuple.src.u.gre.key; | ||
88 | read_unlock_bh(&ip_ct_gre_lock); | ||
89 | |||
90 | DEBUGP("lookup src key 0x%x up key for ", key); | ||
91 | DUMP_TUPLE_GRE(t); | ||
92 | |||
93 | return key; | ||
94 | } | ||
95 | |||
96 | /* add a single keymap entry, associate with specified master ct */ | ||
97 | int | ||
98 | ip_ct_gre_keymap_add(struct ip_conntrack *ct, | ||
99 | struct ip_conntrack_tuple *t, int reply) | ||
100 | { | ||
101 | struct ip_ct_gre_keymap **exist_km, *km, *old; | ||
102 | |||
103 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { | ||
104 | DEBUGP("refusing to add GRE keymap to non-pptp session\n"); | ||
105 | return -1; | ||
106 | } | ||
107 | |||
108 | if (!reply) | ||
109 | exist_km = &ct->help.ct_pptp_info.keymap_orig; | ||
110 | else | ||
111 | exist_km = &ct->help.ct_pptp_info.keymap_reply; | ||
112 | |||
113 | if (*exist_km) { | ||
114 | /* check whether it's a retransmission */ | ||
115 | old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, | ||
116 | struct ip_ct_gre_keymap *, t); | ||
117 | if (old == *exist_km) { | ||
118 | DEBUGP("retransmission\n"); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | DEBUGP("trying to override keymap_%s for ct %p\n", | ||
123 | reply? "reply":"orig", ct); | ||
124 | return -EEXIST; | ||
125 | } | ||
126 | |||
127 | km = kmalloc(sizeof(*km), GFP_ATOMIC); | ||
128 | if (!km) | ||
129 | return -ENOMEM; | ||
130 | |||
131 | memcpy(&km->tuple, t, sizeof(*t)); | ||
132 | *exist_km = km; | ||
133 | |||
134 | DEBUGP("adding new entry %p: ", km); | ||
135 | DUMP_TUPLE_GRE(&km->tuple); | ||
136 | |||
137 | write_lock_bh(&ip_ct_gre_lock); | ||
138 | list_append(&gre_keymap_list, km); | ||
139 | write_unlock_bh(&ip_ct_gre_lock); | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | /* destroy the keymap entries associated with specified master ct */ | ||
145 | void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) | ||
146 | { | ||
147 | DEBUGP("entering for ct %p\n", ct); | ||
148 | |||
149 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { | ||
150 | DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); | ||
151 | return; | ||
152 | } | ||
153 | |||
154 | write_lock_bh(&ip_ct_gre_lock); | ||
155 | if (ct->help.ct_pptp_info.keymap_orig) { | ||
156 | DEBUGP("removing %p from list\n", | ||
157 | ct->help.ct_pptp_info.keymap_orig); | ||
158 | list_del(&ct->help.ct_pptp_info.keymap_orig->list); | ||
159 | kfree(ct->help.ct_pptp_info.keymap_orig); | ||
160 | ct->help.ct_pptp_info.keymap_orig = NULL; | ||
161 | } | ||
162 | if (ct->help.ct_pptp_info.keymap_reply) { | ||
163 | DEBUGP("removing %p from list\n", | ||
164 | ct->help.ct_pptp_info.keymap_reply); | ||
165 | list_del(&ct->help.ct_pptp_info.keymap_reply->list); | ||
166 | kfree(ct->help.ct_pptp_info.keymap_reply); | ||
167 | ct->help.ct_pptp_info.keymap_reply = NULL; | ||
168 | } | ||
169 | write_unlock_bh(&ip_ct_gre_lock); | ||
170 | } | ||
171 | |||
172 | |||
173 | /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ | ||
174 | |||
175 | /* invert gre part of tuple */ | ||
176 | static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, | ||
177 | const struct ip_conntrack_tuple *orig) | ||
178 | { | ||
179 | tuple->dst.u.gre.key = orig->src.u.gre.key; | ||
180 | tuple->src.u.gre.key = orig->dst.u.gre.key; | ||
181 | |||
182 | return 1; | ||
183 | } | ||
184 | |||
185 | /* gre hdr info to tuple */ | ||
186 | static int gre_pkt_to_tuple(const struct sk_buff *skb, | ||
187 | unsigned int dataoff, | ||
188 | struct ip_conntrack_tuple *tuple) | ||
189 | { | ||
190 | struct gre_hdr_pptp _pgrehdr, *pgrehdr; | ||
191 | u_int32_t srckey; | ||
192 | struct gre_hdr _grehdr, *grehdr; | ||
193 | |||
194 | /* first only delinearize old RFC1701 GRE header */ | ||
195 | grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); | ||
196 | if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { | ||
197 | /* try to behave like "ip_conntrack_proto_generic" */ | ||
198 | tuple->src.u.all = 0; | ||
199 | tuple->dst.u.all = 0; | ||
200 | return 1; | ||
201 | } | ||
202 | |||
203 | /* PPTP header is variable length, only need up to the call_id field */ | ||
204 | pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); | ||
205 | if (!pgrehdr) | ||
206 | return 1; | ||
207 | |||
208 | if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { | ||
209 | DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | tuple->dst.u.gre.key = pgrehdr->call_id; | ||
214 | srckey = gre_keymap_lookup(tuple); | ||
215 | tuple->src.u.gre.key = srckey; | ||
216 | |||
217 | return 1; | ||
218 | } | ||
219 | |||
220 | /* print gre part of tuple */ | ||
221 | static int gre_print_tuple(struct seq_file *s, | ||
222 | const struct ip_conntrack_tuple *tuple) | ||
223 | { | ||
224 | return seq_printf(s, "srckey=0x%x dstkey=0x%x ", | ||
225 | ntohs(tuple->src.u.gre.key), | ||
226 | ntohs(tuple->dst.u.gre.key)); | ||
227 | } | ||
228 | |||
229 | /* print private data for conntrack */ | ||
230 | static int gre_print_conntrack(struct seq_file *s, | ||
231 | const struct ip_conntrack *ct) | ||
232 | { | ||
233 | return seq_printf(s, "timeout=%u, stream_timeout=%u ", | ||
234 | (ct->proto.gre.timeout / HZ), | ||
235 | (ct->proto.gre.stream_timeout / HZ)); | ||
236 | } | ||
237 | |||
238 | /* Returns verdict for packet, and may modify conntrack */ | ||
239 | static int gre_packet(struct ip_conntrack *ct, | ||
240 | const struct sk_buff *skb, | ||
241 | enum ip_conntrack_info conntrackinfo) | ||
242 | { | ||
243 | /* If we've seen traffic both ways, this is a GRE connection. | ||
244 | * Extend timeout. */ | ||
245 | if (ct->status & IPS_SEEN_REPLY) { | ||
246 | ip_ct_refresh_acct(ct, conntrackinfo, skb, | ||
247 | ct->proto.gre.stream_timeout); | ||
248 | /* Also, more likely to be important, and not a probe. */ | ||
249 | set_bit(IPS_ASSURED_BIT, &ct->status); | ||
250 | } else | ||
251 | ip_ct_refresh_acct(ct, conntrackinfo, skb, | ||
252 | ct->proto.gre.timeout); | ||
253 | |||
254 | return NF_ACCEPT; | ||
255 | } | ||
256 | |||
257 | /* Called when a new connection for this protocol found. */ | ||
258 | static int gre_new(struct ip_conntrack *ct, | ||
259 | const struct sk_buff *skb) | ||
260 | { | ||
261 | DEBUGP(": "); | ||
262 | DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
263 | |||
264 | /* initialize to sane value. Ideally a conntrack helper | ||
265 | * (e.g. in case of pptp) is increasing them */ | ||
266 | ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; | ||
267 | ct->proto.gre.timeout = GRE_TIMEOUT; | ||
268 | |||
269 | return 1; | ||
270 | } | ||
271 | |||
272 | /* Called when a conntrack entry has already been removed from the hashes | ||
273 | * and is about to be deleted from memory */ | ||
274 | static void gre_destroy(struct ip_conntrack *ct) | ||
275 | { | ||
276 | struct ip_conntrack *master = ct->master; | ||
277 | DEBUGP(" entering\n"); | ||
278 | |||
279 | if (!master) | ||
280 | DEBUGP("no master !?!\n"); | ||
281 | else | ||
282 | ip_ct_gre_keymap_destroy(master); | ||
283 | } | ||
284 | |||
285 | /* protocol helper struct */ | ||
286 | static struct ip_conntrack_protocol gre = { | ||
287 | .proto = IPPROTO_GRE, | ||
288 | .name = "gre", | ||
289 | .pkt_to_tuple = gre_pkt_to_tuple, | ||
290 | .invert_tuple = gre_invert_tuple, | ||
291 | .print_tuple = gre_print_tuple, | ||
292 | .print_conntrack = gre_print_conntrack, | ||
293 | .packet = gre_packet, | ||
294 | .new = gre_new, | ||
295 | .destroy = gre_destroy, | ||
296 | .me = THIS_MODULE, | ||
297 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
298 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
299 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
300 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
301 | #endif | ||
302 | }; | ||
303 | |||
304 | /* ip_conntrack_proto_gre initialization */ | ||
305 | int __init ip_ct_proto_gre_init(void) | ||
306 | { | ||
307 | return ip_conntrack_protocol_register(&gre); | ||
308 | } | ||
309 | |||
310 | void __exit ip_ct_proto_gre_fini(void) | ||
311 | { | ||
312 | struct list_head *pos, *n; | ||
313 | |||
314 | /* delete all keymap entries */ | ||
315 | write_lock_bh(&ip_ct_gre_lock); | ||
316 | list_for_each_safe(pos, n, &gre_keymap_list) { | ||
317 | DEBUGP("deleting keymap %p at module unload time\n", pos); | ||
318 | list_del(pos); | ||
319 | kfree(pos); | ||
320 | } | ||
321 | write_unlock_bh(&ip_ct_gre_lock); | ||
322 | |||
323 | ip_conntrack_protocol_unregister(&gre); | ||
324 | } | ||
325 | |||
326 | EXPORT_SYMBOL(ip_ct_gre_keymap_add); | ||
327 | EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ae3e3e655db5..d3c7808010ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c | |||
@@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct); | |||
993 | 993 | ||
994 | EXPORT_SYMBOL(ip_conntrack_expect_alloc); | 994 | EXPORT_SYMBOL(ip_conntrack_expect_alloc); |
995 | EXPORT_SYMBOL(ip_conntrack_expect_put); | 995 | EXPORT_SYMBOL(ip_conntrack_expect_put); |
996 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); | 996 | EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); |
997 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); | ||
997 | EXPORT_SYMBOL(ip_conntrack_expect_related); | 998 | EXPORT_SYMBOL(ip_conntrack_expect_related); |
998 | EXPORT_SYMBOL(ip_conntrack_unexpect_related); | 999 | EXPORT_SYMBOL(ip_conntrack_unexpect_related); |
999 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); | 1000 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); |
1000 | EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); | ||
1001 | EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); | 1001 | EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); |
1002 | 1002 | ||
1003 | EXPORT_SYMBOL(ip_conntrack_tuple_taken); | 1003 | EXPORT_SYMBOL(ip_conntrack_tuple_taken); |
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1adedb743f60..c3ea891d38e7 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c | |||
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) | |||
578 | 578 | ||
579 | return ret; | 579 | return ret; |
580 | } | 580 | } |
581 | EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); | ||
582 | EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); | ||
581 | #endif | 583 | #endif |
582 | 584 | ||
583 | int __init ip_nat_init(void) | 585 | int __init ip_nat_init(void) |
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c new file mode 100644 index 000000000000..3cdd0684d30d --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c | |||
@@ -0,0 +1,401 @@ | |||
1 | /* | ||
2 | * ip_nat_pptp.c - Version 3.0 | ||
3 | * | ||
4 | * NAT support for PPTP (Point to Point Tunneling Protocol). | ||
5 | * PPTP is a a protocol for creating virtual private networks. | ||
6 | * It is a specification defined by Microsoft and some vendors | ||
7 | * working with Microsoft. PPTP is built on top of a modified | ||
8 | * version of the Internet Generic Routing Encapsulation Protocol. | ||
9 | * GRE is defined in RFC 1701 and RFC 1702. Documentation of | ||
10 | * PPTP can be found in RFC 2637 | ||
11 | * | ||
12 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
13 | * | ||
14 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
15 | * | ||
16 | * TODO: - NAT to a unique tuple, not to TCP source port | ||
17 | * (needs netfilter tuple reservation) | ||
18 | * | ||
19 | * Changes: | ||
20 | * 2002-02-10 - Version 1.3 | ||
21 | * - Use ip_nat_mangle_tcp_packet() because of cloned skb's | ||
22 | * in local connections (Philip Craig <philipc@snapgear.com>) | ||
23 | * - add checks for magicCookie and pptp version | ||
24 | * - make argument list of pptp_{out,in}bound_packet() shorter | ||
25 | * - move to C99 style initializers | ||
26 | * - print version number at module loadtime | ||
27 | * 2003-09-22 - Version 1.5 | ||
28 | * - use SNATed tcp sourceport as callid, since we get called before | ||
29 | * TCP header is mangled (Philip Craig <philipc@snapgear.com>) | ||
30 | * 2004-10-22 - Version 2.0 | ||
31 | * - kernel 2.6.x version | ||
32 | * 2005-06-10 - Version 3.0 | ||
33 | * - kernel >= 2.6.11 version, | ||
34 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) | ||
35 | * | ||
36 | */ | ||
37 | |||
38 | #include <linux/config.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/ip.h> | ||
41 | #include <linux/tcp.h> | ||
42 | #include <net/tcp.h> | ||
43 | |||
44 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
45 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
46 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | ||
47 | #include <linux/netfilter_ipv4/ip_nat_pptp.h> | ||
48 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
49 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
50 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
51 | #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> | ||
52 | |||
53 | #define IP_NAT_PPTP_VERSION "3.0" | ||
54 | |||
55 | MODULE_LICENSE("GPL"); | ||
56 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
57 | MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); | ||
58 | |||
59 | |||
60 | #if 0 | ||
61 | extern const char *pptp_msg_name[]; | ||
62 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ | ||
63 | __FUNCTION__, ## args) | ||
64 | #else | ||
65 | #define DEBUGP(format, args...) | ||
66 | #endif | ||
67 | |||
68 | static void pptp_nat_expected(struct ip_conntrack *ct, | ||
69 | struct ip_conntrack_expect *exp) | ||
70 | { | ||
71 | struct ip_conntrack *master = ct->master; | ||
72 | struct ip_conntrack_expect *other_exp; | ||
73 | struct ip_conntrack_tuple t; | ||
74 | struct ip_ct_pptp_master *ct_pptp_info; | ||
75 | struct ip_nat_pptp *nat_pptp_info; | ||
76 | |||
77 | ct_pptp_info = &master->help.ct_pptp_info; | ||
78 | nat_pptp_info = &master->nat.help.nat_pptp_info; | ||
79 | |||
80 | /* And here goes the grand finale of corrosion... */ | ||
81 | |||
82 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | ||
83 | DEBUGP("we are PNS->PAC\n"); | ||
84 | /* therefore, build tuple for PAC->PNS */ | ||
85 | t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; | ||
86 | t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); | ||
87 | t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; | ||
88 | t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); | ||
89 | t.dst.protonum = IPPROTO_GRE; | ||
90 | } else { | ||
91 | DEBUGP("we are PAC->PNS\n"); | ||
92 | /* build tuple for PNS->PAC */ | ||
93 | t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | ||
94 | t.src.u.gre.key = | ||
95 | htons(master->nat.help.nat_pptp_info.pns_call_id); | ||
96 | t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | ||
97 | t.dst.u.gre.key = | ||
98 | htons(master->nat.help.nat_pptp_info.pac_call_id); | ||
99 | t.dst.protonum = IPPROTO_GRE; | ||
100 | } | ||
101 | |||
102 | DEBUGP("trying to unexpect other dir: "); | ||
103 | DUMP_TUPLE(&t); | ||
104 | other_exp = ip_conntrack_expect_find(&t); | ||
105 | if (other_exp) { | ||
106 | ip_conntrack_unexpect_related(other_exp); | ||
107 | ip_conntrack_expect_put(other_exp); | ||
108 | DEBUGP("success\n"); | ||
109 | } else { | ||
110 | DEBUGP("not found!\n"); | ||
111 | } | ||
112 | |||
113 | ip_nat_follow_master(ct, exp); | ||
114 | } | ||
115 | |||
116 | /* outbound packets == from PNS to PAC */ | ||
117 | static int | ||
118 | pptp_outbound_pkt(struct sk_buff **pskb, | ||
119 | struct ip_conntrack *ct, | ||
120 | enum ip_conntrack_info ctinfo, | ||
121 | struct PptpControlHeader *ctlh, | ||
122 | union pptp_ctrl_union *pptpReq) | ||
123 | |||
124 | { | ||
125 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; | ||
126 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | ||
127 | |||
128 | u_int16_t msg, *cid = NULL, new_callid; | ||
129 | |||
130 | new_callid = htons(ct_pptp_info->pns_call_id); | ||
131 | |||
132 | switch (msg = ntohs(ctlh->messageType)) { | ||
133 | case PPTP_OUT_CALL_REQUEST: | ||
134 | cid = &pptpReq->ocreq.callID; | ||
135 | /* FIXME: ideally we would want to reserve a call ID | ||
136 | * here. current netfilter NAT core is not able to do | ||
137 | * this :( For now we use TCP source port. This breaks | ||
138 | * multiple calls within one control session */ | ||
139 | |||
140 | /* save original call ID in nat_info */ | ||
141 | nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; | ||
142 | |||
143 | /* don't use tcph->source since we are at a DSTmanip | ||
144 | * hook (e.g. PREROUTING) and pkt is not mangled yet */ | ||
145 | new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; | ||
146 | |||
147 | /* save new call ID in ct info */ | ||
148 | ct_pptp_info->pns_call_id = ntohs(new_callid); | ||
149 | break; | ||
150 | case PPTP_IN_CALL_REPLY: | ||
151 | cid = &pptpReq->icreq.callID; | ||
152 | break; | ||
153 | case PPTP_CALL_CLEAR_REQUEST: | ||
154 | cid = &pptpReq->clrreq.callID; | ||
155 | break; | ||
156 | default: | ||
157 | DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, | ||
158 | (msg <= PPTP_MSG_MAX)? | ||
159 | pptp_msg_name[msg]:pptp_msg_name[0]); | ||
160 | /* fall through */ | ||
161 | |||
162 | case PPTP_SET_LINK_INFO: | ||
163 | /* only need to NAT in case PAC is behind NAT box */ | ||
164 | case PPTP_START_SESSION_REQUEST: | ||
165 | case PPTP_START_SESSION_REPLY: | ||
166 | case PPTP_STOP_SESSION_REQUEST: | ||
167 | case PPTP_STOP_SESSION_REPLY: | ||
168 | case PPTP_ECHO_REQUEST: | ||
169 | case PPTP_ECHO_REPLY: | ||
170 | /* no need to alter packet */ | ||
171 | return NF_ACCEPT; | ||
172 | } | ||
173 | |||
174 | /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass | ||
175 | * down to here */ | ||
176 | |||
177 | IP_NF_ASSERT(cid); | ||
178 | |||
179 | DEBUGP("altering call id from 0x%04x to 0x%04x\n", | ||
180 | ntohs(*cid), ntohs(new_callid)); | ||
181 | |||
182 | /* mangle packet */ | ||
183 | if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
184 | (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), | ||
185 | sizeof(new_callid), | ||
186 | (char *)&new_callid, | ||
187 | sizeof(new_callid)) == 0) | ||
188 | return NF_DROP; | ||
189 | |||
190 | return NF_ACCEPT; | ||
191 | } | ||
192 | |||
193 | static int | ||
194 | pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | ||
195 | struct ip_conntrack_expect *expect_reply) | ||
196 | { | ||
197 | struct ip_ct_pptp_master *ct_pptp_info = | ||
198 | &expect_orig->master->help.ct_pptp_info; | ||
199 | struct ip_nat_pptp *nat_pptp_info = | ||
200 | &expect_orig->master->nat.help.nat_pptp_info; | ||
201 | |||
202 | struct ip_conntrack *ct = expect_orig->master; | ||
203 | |||
204 | struct ip_conntrack_tuple inv_t; | ||
205 | struct ip_conntrack_tuple *orig_t, *reply_t; | ||
206 | |||
207 | /* save original PAC call ID in nat_info */ | ||
208 | nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; | ||
209 | |||
210 | /* alter expectation */ | ||
211 | orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
212 | reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
213 | |||
214 | /* alter expectation for PNS->PAC direction */ | ||
215 | invert_tuplepr(&inv_t, &expect_orig->tuple); | ||
216 | expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); | ||
217 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | ||
218 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | ||
219 | inv_t.src.ip = reply_t->src.ip; | ||
220 | inv_t.dst.ip = reply_t->dst.ip; | ||
221 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | ||
222 | inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | ||
223 | |||
224 | if (!ip_conntrack_expect_related(expect_orig)) { | ||
225 | DEBUGP("successfully registered expect\n"); | ||
226 | } else { | ||
227 | DEBUGP("can't expect_related(expect_orig)\n"); | ||
228 | return 1; | ||
229 | } | ||
230 | |||
231 | /* alter expectation for PAC->PNS direction */ | ||
232 | invert_tuplepr(&inv_t, &expect_reply->tuple); | ||
233 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); | ||
234 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | ||
235 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | ||
236 | inv_t.src.ip = orig_t->src.ip; | ||
237 | inv_t.dst.ip = orig_t->dst.ip; | ||
238 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | ||
239 | inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | ||
240 | |||
241 | if (!ip_conntrack_expect_related(expect_reply)) { | ||
242 | DEBUGP("successfully registered expect\n"); | ||
243 | } else { | ||
244 | DEBUGP("can't expect_related(expect_reply)\n"); | ||
245 | ip_conntrack_unexpect_related(expect_orig); | ||
246 | return 1; | ||
247 | } | ||
248 | |||
249 | if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { | ||
250 | DEBUGP("can't register original keymap\n"); | ||
251 | ip_conntrack_unexpect_related(expect_orig); | ||
252 | ip_conntrack_unexpect_related(expect_reply); | ||
253 | return 1; | ||
254 | } | ||
255 | |||
256 | if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { | ||
257 | DEBUGP("can't register reply keymap\n"); | ||
258 | ip_conntrack_unexpect_related(expect_orig); | ||
259 | ip_conntrack_unexpect_related(expect_reply); | ||
260 | ip_ct_gre_keymap_destroy(ct); | ||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | /* inbound packets == from PAC to PNS */ | ||
268 | static int | ||
269 | pptp_inbound_pkt(struct sk_buff **pskb, | ||
270 | struct ip_conntrack *ct, | ||
271 | enum ip_conntrack_info ctinfo, | ||
272 | struct PptpControlHeader *ctlh, | ||
273 | union pptp_ctrl_union *pptpReq) | ||
274 | { | ||
275 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | ||
276 | u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; | ||
277 | |||
278 | int ret = NF_ACCEPT, rv; | ||
279 | |||
280 | new_pcid = htons(nat_pptp_info->pns_call_id); | ||
281 | |||
282 | switch (msg = ntohs(ctlh->messageType)) { | ||
283 | case PPTP_OUT_CALL_REPLY: | ||
284 | pcid = &pptpReq->ocack.peersCallID; | ||
285 | cid = &pptpReq->ocack.callID; | ||
286 | break; | ||
287 | case PPTP_IN_CALL_CONNECT: | ||
288 | pcid = &pptpReq->iccon.peersCallID; | ||
289 | break; | ||
290 | case PPTP_IN_CALL_REQUEST: | ||
291 | /* only need to nat in case PAC is behind NAT box */ | ||
292 | break; | ||
293 | case PPTP_WAN_ERROR_NOTIFY: | ||
294 | pcid = &pptpReq->wanerr.peersCallID; | ||
295 | break; | ||
296 | case PPTP_CALL_DISCONNECT_NOTIFY: | ||
297 | pcid = &pptpReq->disc.callID; | ||
298 | break; | ||
299 | case PPTP_SET_LINK_INFO: | ||
300 | pcid = &pptpReq->setlink.peersCallID; | ||
301 | break; | ||
302 | |||
303 | default: | ||
304 | DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? | ||
305 | pptp_msg_name[msg]:pptp_msg_name[0]); | ||
306 | /* fall through */ | ||
307 | |||
308 | case PPTP_START_SESSION_REQUEST: | ||
309 | case PPTP_START_SESSION_REPLY: | ||
310 | case PPTP_STOP_SESSION_REQUEST: | ||
311 | case PPTP_STOP_SESSION_REPLY: | ||
312 | case PPTP_ECHO_REQUEST: | ||
313 | case PPTP_ECHO_REPLY: | ||
314 | /* no need to alter packet */ | ||
315 | return NF_ACCEPT; | ||
316 | } | ||
317 | |||
318 | /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, | ||
319 | * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ | ||
320 | |||
321 | /* mangle packet */ | ||
322 | IP_NF_ASSERT(pcid); | ||
323 | DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", | ||
324 | ntohs(*pcid), ntohs(new_pcid)); | ||
325 | |||
326 | rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
327 | (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), | ||
328 | sizeof(new_pcid), (char *)&new_pcid, | ||
329 | sizeof(new_pcid)); | ||
330 | if (rv != NF_ACCEPT) | ||
331 | return rv; | ||
332 | |||
333 | if (new_cid) { | ||
334 | IP_NF_ASSERT(cid); | ||
335 | DEBUGP("altering call id from 0x%04x to 0x%04x\n", | ||
336 | ntohs(*cid), ntohs(new_cid)); | ||
337 | rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
338 | (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), | ||
339 | sizeof(new_cid), | ||
340 | (char *)&new_cid, | ||
341 | sizeof(new_cid)); | ||
342 | if (rv != NF_ACCEPT) | ||
343 | return rv; | ||
344 | } | ||
345 | |||
346 | /* check for earlier return value of 'switch' above */ | ||
347 | if (ret != NF_ACCEPT) | ||
348 | return ret; | ||
349 | |||
350 | /* great, at least we don't need to resize packets */ | ||
351 | return NF_ACCEPT; | ||
352 | } | ||
353 | |||
354 | |||
355 | extern int __init ip_nat_proto_gre_init(void); | ||
356 | extern void __exit ip_nat_proto_gre_fini(void); | ||
357 | |||
358 | static int __init init(void) | ||
359 | { | ||
360 | int ret; | ||
361 | |||
362 | DEBUGP("%s: registering NAT helper\n", __FILE__); | ||
363 | |||
364 | ret = ip_nat_proto_gre_init(); | ||
365 | if (ret < 0) | ||
366 | return ret; | ||
367 | |||
368 | BUG_ON(ip_nat_pptp_hook_outbound); | ||
369 | ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; | ||
370 | |||
371 | BUG_ON(ip_nat_pptp_hook_inbound); | ||
372 | ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; | ||
373 | |||
374 | BUG_ON(ip_nat_pptp_hook_exp_gre); | ||
375 | ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; | ||
376 | |||
377 | BUG_ON(ip_nat_pptp_hook_expectfn); | ||
378 | ip_nat_pptp_hook_expectfn = &pptp_nat_expected; | ||
379 | |||
380 | printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static void __exit fini(void) | ||
385 | { | ||
386 | DEBUGP("cleanup_module\n" ); | ||
387 | |||
388 | ip_nat_pptp_hook_expectfn = NULL; | ||
389 | ip_nat_pptp_hook_exp_gre = NULL; | ||
390 | ip_nat_pptp_hook_inbound = NULL; | ||
391 | ip_nat_pptp_hook_outbound = NULL; | ||
392 | |||
393 | ip_nat_proto_gre_fini(); | ||
394 | /* Make sure noone calls it, meanwhile */ | ||
395 | synchronize_net(); | ||
396 | |||
397 | printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); | ||
398 | } | ||
399 | |||
400 | module_init(init); | ||
401 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c new file mode 100644 index 000000000000..7c1285401672 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c | |||
@@ -0,0 +1,214 @@ | |||
1 | /* | ||
2 | * ip_nat_proto_gre.c - Version 2.0 | ||
3 | * | ||
4 | * NAT protocol helper module for GRE. | ||
5 | * | ||
6 | * GRE is a generic encapsulation protocol, which is generally not very | ||
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | ||
8 | * | ||
9 | * It has an optional key field, which may help us distinguishing two | ||
10 | * connections between the same two hosts. | ||
11 | * | ||
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | ||
13 | * | ||
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | ||
15 | * field called "CallID", which serves us for the same purpose as the key | ||
16 | * field in plain GRE. | ||
17 | * | ||
18 | * Documentation about PPTP can be found in RFC 2637 | ||
19 | * | ||
20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | ||
21 | * | ||
22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/ip.h> | ||
29 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
30 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | ||
31 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
32 | #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> | ||
33 | |||
34 | MODULE_LICENSE("GPL"); | ||
35 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | ||
36 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | ||
37 | |||
38 | #if 0 | ||
39 | #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ | ||
40 | __FUNCTION__, ## args) | ||
41 | #else | ||
42 | #define DEBUGP(x, args...) | ||
43 | #endif | ||
44 | |||
45 | /* is key in given range between min and max */ | ||
46 | static int | ||
47 | gre_in_range(const struct ip_conntrack_tuple *tuple, | ||
48 | enum ip_nat_manip_type maniptype, | ||
49 | const union ip_conntrack_manip_proto *min, | ||
50 | const union ip_conntrack_manip_proto *max) | ||
51 | { | ||
52 | u_int32_t key; | ||
53 | |||
54 | if (maniptype == IP_NAT_MANIP_SRC) | ||
55 | key = tuple->src.u.gre.key; | ||
56 | else | ||
57 | key = tuple->dst.u.gre.key; | ||
58 | |||
59 | return ntohl(key) >= ntohl(min->gre.key) | ||
60 | && ntohl(key) <= ntohl(max->gre.key); | ||
61 | } | ||
62 | |||
63 | /* generate unique tuple ... */ | ||
64 | static int | ||
65 | gre_unique_tuple(struct ip_conntrack_tuple *tuple, | ||
66 | const struct ip_nat_range *range, | ||
67 | enum ip_nat_manip_type maniptype, | ||
68 | const struct ip_conntrack *conntrack) | ||
69 | { | ||
70 | static u_int16_t key; | ||
71 | u_int16_t *keyptr; | ||
72 | unsigned int min, i, range_size; | ||
73 | |||
74 | if (maniptype == IP_NAT_MANIP_SRC) | ||
75 | keyptr = &tuple->src.u.gre.key; | ||
76 | else | ||
77 | keyptr = &tuple->dst.u.gre.key; | ||
78 | |||
79 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
80 | DEBUGP("%p: NATing GRE PPTP\n", conntrack); | ||
81 | min = 1; | ||
82 | range_size = 0xffff; | ||
83 | } else { | ||
84 | min = ntohl(range->min.gre.key); | ||
85 | range_size = ntohl(range->max.gre.key) - min + 1; | ||
86 | } | ||
87 | |||
88 | DEBUGP("min = %u, range_size = %u\n", min, range_size); | ||
89 | |||
90 | for (i = 0; i < range_size; i++, key++) { | ||
91 | *keyptr = htonl(min + key % range_size); | ||
92 | if (!ip_nat_used_tuple(tuple, conntrack)) | ||
93 | return 1; | ||
94 | } | ||
95 | |||
96 | DEBUGP("%p: no NAT mapping\n", conntrack); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* manipulate a GRE packet according to maniptype */ | ||
102 | static int | ||
103 | gre_manip_pkt(struct sk_buff **pskb, | ||
104 | unsigned int iphdroff, | ||
105 | const struct ip_conntrack_tuple *tuple, | ||
106 | enum ip_nat_manip_type maniptype) | ||
107 | { | ||
108 | struct gre_hdr *greh; | ||
109 | struct gre_hdr_pptp *pgreh; | ||
110 | struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); | ||
111 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
112 | |||
113 | /* pgreh includes two optional 32bit fields which are not required | ||
114 | * to be there. That's where the magic '8' comes from */ | ||
115 | if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) | ||
116 | return 0; | ||
117 | |||
118 | greh = (void *)(*pskb)->data + hdroff; | ||
119 | pgreh = (struct gre_hdr_pptp *) greh; | ||
120 | |||
121 | /* we only have destination manip of a packet, since 'source key' | ||
122 | * is not present in the packet itself */ | ||
123 | if (maniptype == IP_NAT_MANIP_DST) { | ||
124 | /* key manipulation is always dest */ | ||
125 | switch (greh->version) { | ||
126 | case 0: | ||
127 | if (!greh->key) { | ||
128 | DEBUGP("can't nat GRE w/o key\n"); | ||
129 | break; | ||
130 | } | ||
131 | if (greh->csum) { | ||
132 | /* FIXME: Never tested this code... */ | ||
133 | *(gre_csum(greh)) = | ||
134 | ip_nat_cheat_check(~*(gre_key(greh)), | ||
135 | tuple->dst.u.gre.key, | ||
136 | *(gre_csum(greh))); | ||
137 | } | ||
138 | *(gre_key(greh)) = tuple->dst.u.gre.key; | ||
139 | break; | ||
140 | case GRE_VERSION_PPTP: | ||
141 | DEBUGP("call_id -> 0x%04x\n", | ||
142 | ntohl(tuple->dst.u.gre.key)); | ||
143 | pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); | ||
144 | break; | ||
145 | default: | ||
146 | DEBUGP("can't nat unknown GRE version\n"); | ||
147 | return 0; | ||
148 | break; | ||
149 | } | ||
150 | } | ||
151 | return 1; | ||
152 | } | ||
153 | |||
154 | /* print out a nat tuple */ | ||
155 | static unsigned int | ||
156 | gre_print(char *buffer, | ||
157 | const struct ip_conntrack_tuple *match, | ||
158 | const struct ip_conntrack_tuple *mask) | ||
159 | { | ||
160 | unsigned int len = 0; | ||
161 | |||
162 | if (mask->src.u.gre.key) | ||
163 | len += sprintf(buffer + len, "srckey=0x%x ", | ||
164 | ntohl(match->src.u.gre.key)); | ||
165 | |||
166 | if (mask->dst.u.gre.key) | ||
167 | len += sprintf(buffer + len, "dstkey=0x%x ", | ||
168 | ntohl(match->src.u.gre.key)); | ||
169 | |||
170 | return len; | ||
171 | } | ||
172 | |||
173 | /* print a range of keys */ | ||
174 | static unsigned int | ||
175 | gre_print_range(char *buffer, const struct ip_nat_range *range) | ||
176 | { | ||
177 | if (range->min.gre.key != 0 | ||
178 | || range->max.gre.key != 0xFFFF) { | ||
179 | if (range->min.gre.key == range->max.gre.key) | ||
180 | return sprintf(buffer, "key 0x%x ", | ||
181 | ntohl(range->min.gre.key)); | ||
182 | else | ||
183 | return sprintf(buffer, "keys 0x%u-0x%u ", | ||
184 | ntohl(range->min.gre.key), | ||
185 | ntohl(range->max.gre.key)); | ||
186 | } else | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | /* nat helper struct */ | ||
191 | static struct ip_nat_protocol gre = { | ||
192 | .name = "GRE", | ||
193 | .protonum = IPPROTO_GRE, | ||
194 | .manip_pkt = gre_manip_pkt, | ||
195 | .in_range = gre_in_range, | ||
196 | .unique_tuple = gre_unique_tuple, | ||
197 | .print = gre_print, | ||
198 | .print_range = gre_print_range, | ||
199 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
200 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
201 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
202 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
203 | #endif | ||
204 | }; | ||
205 | |||
206 | int __init ip_nat_proto_gre_init(void) | ||
207 | { | ||
208 | return ip_nat_protocol_register(&gre); | ||
209 | } | ||
210 | |||
211 | void __exit ip_nat_proto_gre_fini(void) | ||
212 | { | ||
213 | ip_nat_protocol_unregister(&gre); | ||
214 | } | ||
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d38913754b1..9bcb398fbc1f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |
15 | #include <linux/jhash.h> | 15 | #include <linux/jhash.h> |
16 | #include <linux/bitops.h> | ||
16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
18 | #include <linux/tcp.h> | 19 | #include <linux/tcp.h> |
@@ -30,7 +31,7 @@ | |||
30 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
32 | 33 | ||
33 | #define CLUSTERIP_VERSION "0.7" | 34 | #define CLUSTERIP_VERSION "0.8" |
34 | 35 | ||
35 | #define DEBUG_CLUSTERIP | 36 | #define DEBUG_CLUSTERIP |
36 | 37 | ||
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); | |||
49 | struct clusterip_config { | 50 | struct clusterip_config { |
50 | struct list_head list; /* list of all configs */ | 51 | struct list_head list; /* list of all configs */ |
51 | atomic_t refcount; /* reference count */ | 52 | atomic_t refcount; /* reference count */ |
53 | atomic_t entries; /* number of entries/rules | ||
54 | * referencing us */ | ||
52 | 55 | ||
53 | u_int32_t clusterip; /* the IP address */ | 56 | u_int32_t clusterip; /* the IP address */ |
54 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ | 57 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
55 | struct net_device *dev; /* device */ | 58 | struct net_device *dev; /* device */ |
56 | u_int16_t num_total_nodes; /* total number of nodes */ | 59 | u_int16_t num_total_nodes; /* total number of nodes */ |
57 | u_int16_t num_local_nodes; /* number of local nodes */ | 60 | unsigned long local_nodes; /* node number array */ |
58 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ | ||
59 | 61 | ||
60 | #ifdef CONFIG_PROC_FS | 62 | #ifdef CONFIG_PROC_FS |
61 | struct proc_dir_entry *pde; /* proc dir entry */ | 63 | struct proc_dir_entry *pde; /* proc dir entry */ |
@@ -66,8 +68,7 @@ struct clusterip_config { | |||
66 | 68 | ||
67 | static LIST_HEAD(clusterip_configs); | 69 | static LIST_HEAD(clusterip_configs); |
68 | 70 | ||
69 | /* clusterip_lock protects the clusterip_configs list _AND_ the configurable | 71 | /* clusterip_lock protects the clusterip_configs list */ |
70 | * data within all structurses (num_local_nodes, local_nodes[]) */ | ||
71 | static DEFINE_RWLOCK(clusterip_lock); | 72 | static DEFINE_RWLOCK(clusterip_lock); |
72 | 73 | ||
73 | #ifdef CONFIG_PROC_FS | 74 | #ifdef CONFIG_PROC_FS |
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir; | |||
76 | #endif | 77 | #endif |
77 | 78 | ||
78 | static inline void | 79 | static inline void |
79 | clusterip_config_get(struct clusterip_config *c) { | 80 | clusterip_config_get(struct clusterip_config *c) |
81 | { | ||
80 | atomic_inc(&c->refcount); | 82 | atomic_inc(&c->refcount); |
81 | } | 83 | } |
82 | 84 | ||
83 | static inline void | 85 | static inline void |
84 | clusterip_config_put(struct clusterip_config *c) { | 86 | clusterip_config_put(struct clusterip_config *c) |
85 | if (atomic_dec_and_test(&c->refcount)) { | 87 | { |
88 | if (atomic_dec_and_test(&c->refcount)) | ||
89 | kfree(c); | ||
90 | } | ||
91 | |||
92 | /* increase the count of entries(rules) using/referencing this config */ | ||
93 | static inline void | ||
94 | clusterip_config_entry_get(struct clusterip_config *c) | ||
95 | { | ||
96 | atomic_inc(&c->entries); | ||
97 | } | ||
98 | |||
99 | /* decrease the count of entries using/referencing this config. If last | ||
100 | * entry(rule) is removed, remove the config from lists, but don't free it | ||
101 | * yet, since proc-files could still be holding references */ | ||
102 | static inline void | ||
103 | clusterip_config_entry_put(struct clusterip_config *c) | ||
104 | { | ||
105 | if (atomic_dec_and_test(&c->entries)) { | ||
86 | write_lock_bh(&clusterip_lock); | 106 | write_lock_bh(&clusterip_lock); |
87 | list_del(&c->list); | 107 | list_del(&c->list); |
88 | write_unlock_bh(&clusterip_lock); | 108 | write_unlock_bh(&clusterip_lock); |
109 | |||
89 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); | 110 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); |
90 | dev_put(c->dev); | 111 | dev_put(c->dev); |
91 | kfree(c); | 112 | |
113 | /* In case anyone still accesses the file, the open/close | ||
114 | * functions are also incrementing the refcount on their own, | ||
115 | * so it's safe to remove the entry even if it's in use. */ | ||
116 | #ifdef CONFIG_PROC_FS | ||
117 | remove_proc_entry(c->pde->name, c->pde->parent); | ||
118 | #endif | ||
92 | } | 119 | } |
93 | } | 120 | } |
94 | 121 | ||
95 | |||
96 | static struct clusterip_config * | 122 | static struct clusterip_config * |
97 | __clusterip_config_find(u_int32_t clusterip) | 123 | __clusterip_config_find(u_int32_t clusterip) |
98 | { | 124 | { |
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip) | |||
111 | } | 137 | } |
112 | 138 | ||
113 | static inline struct clusterip_config * | 139 | static inline struct clusterip_config * |
114 | clusterip_config_find_get(u_int32_t clusterip) | 140 | clusterip_config_find_get(u_int32_t clusterip, int entry) |
115 | { | 141 | { |
116 | struct clusterip_config *c; | 142 | struct clusterip_config *c; |
117 | 143 | ||
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip) | |||
122 | return NULL; | 148 | return NULL; |
123 | } | 149 | } |
124 | atomic_inc(&c->refcount); | 150 | atomic_inc(&c->refcount); |
151 | if (entry) | ||
152 | atomic_inc(&c->entries); | ||
125 | read_unlock_bh(&clusterip_lock); | 153 | read_unlock_bh(&clusterip_lock); |
126 | 154 | ||
127 | return c; | 155 | return c; |
128 | } | 156 | } |
129 | 157 | ||
158 | static void | ||
159 | clusterip_config_init_nodelist(struct clusterip_config *c, | ||
160 | const struct ipt_clusterip_tgt_info *i) | ||
161 | { | ||
162 | int n; | ||
163 | |||
164 | for (n = 0; n < i->num_local_nodes; n++) { | ||
165 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); | ||
166 | } | ||
167 | } | ||
168 | |||
130 | static struct clusterip_config * | 169 | static struct clusterip_config * |
131 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | 170 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, |
132 | struct net_device *dev) | 171 | struct net_device *dev) |
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
143 | c->clusterip = ip; | 182 | c->clusterip = ip; |
144 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | 183 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); |
145 | c->num_total_nodes = i->num_total_nodes; | 184 | c->num_total_nodes = i->num_total_nodes; |
146 | c->num_local_nodes = i->num_local_nodes; | 185 | clusterip_config_init_nodelist(c, i); |
147 | memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); | ||
148 | c->hash_mode = i->hash_mode; | 186 | c->hash_mode = i->hash_mode; |
149 | c->hash_initval = i->hash_initval; | 187 | c->hash_initval = i->hash_initval; |
150 | atomic_set(&c->refcount, 1); | 188 | atomic_set(&c->refcount, 1); |
189 | atomic_set(&c->entries, 1); | ||
151 | 190 | ||
152 | #ifdef CONFIG_PROC_FS | 191 | #ifdef CONFIG_PROC_FS |
153 | /* create proc dir entry */ | 192 | /* create proc dir entry */ |
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
171 | static int | 210 | static int |
172 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | 211 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) |
173 | { | 212 | { |
174 | int i; | ||
175 | |||
176 | write_lock_bh(&clusterip_lock); | ||
177 | 213 | ||
178 | if (c->num_local_nodes >= CLUSTERIP_MAX_NODES | 214 | if (nodenum == 0 || |
179 | || nodenum > CLUSTERIP_MAX_NODES) { | 215 | nodenum > c->num_total_nodes) |
180 | write_unlock_bh(&clusterip_lock); | ||
181 | return 1; | 216 | return 1; |
182 | } | ||
183 | |||
184 | /* check if we alrady have this number in our array */ | ||
185 | for (i = 0; i < c->num_local_nodes; i++) { | ||
186 | if (c->local_nodes[i] == nodenum) { | ||
187 | write_unlock_bh(&clusterip_lock); | ||
188 | return 1; | ||
189 | } | ||
190 | } | ||
191 | 217 | ||
192 | c->local_nodes[c->num_local_nodes++] = nodenum; | 218 | /* check if we already have this number in our bitfield */ |
219 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | ||
220 | return 1; | ||
193 | 221 | ||
194 | write_unlock_bh(&clusterip_lock); | ||
195 | return 0; | 222 | return 0; |
196 | } | 223 | } |
197 | 224 | ||
198 | static int | 225 | static int |
199 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | 226 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
200 | { | 227 | { |
201 | int i; | 228 | if (nodenum == 0 || |
202 | 229 | nodenum > c->num_total_nodes) | |
203 | write_lock_bh(&clusterip_lock); | ||
204 | |||
205 | if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { | ||
206 | write_unlock_bh(&clusterip_lock); | ||
207 | return 1; | 230 | return 1; |
208 | } | ||
209 | 231 | ||
210 | for (i = 0; i < c->num_local_nodes; i++) { | 232 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
211 | if (c->local_nodes[i] == nodenum) { | 233 | return 0; |
212 | int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); | ||
213 | memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); | ||
214 | c->num_local_nodes--; | ||
215 | write_unlock_bh(&clusterip_lock); | ||
216 | return 0; | ||
217 | } | ||
218 | } | ||
219 | 234 | ||
220 | write_unlock_bh(&clusterip_lock); | ||
221 | return 1; | 235 | return 1; |
222 | } | 236 | } |
223 | 237 | ||
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | |||
285 | static inline int | 299 | static inline int |
286 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) | 300 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) |
287 | { | 301 | { |
288 | int i; | 302 | return test_bit(hash - 1, &config->local_nodes); |
289 | |||
290 | read_lock_bh(&clusterip_lock); | ||
291 | |||
292 | if (config->num_local_nodes == 0) { | ||
293 | read_unlock_bh(&clusterip_lock); | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | for (i = 0; i < config->num_local_nodes; i++) { | ||
298 | if (config->local_nodes[i] == hash) { | ||
299 | read_unlock_bh(&clusterip_lock); | ||
300 | return 1; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | read_unlock_bh(&clusterip_lock); | ||
305 | |||
306 | return 0; | ||
307 | } | 303 | } |
308 | 304 | ||
309 | /*********************************************************************** | 305 | /*********************************************************************** |
@@ -415,8 +411,26 @@ checkentry(const char *tablename, | |||
415 | 411 | ||
416 | /* FIXME: further sanity checks */ | 412 | /* FIXME: further sanity checks */ |
417 | 413 | ||
418 | config = clusterip_config_find_get(e->ip.dst.s_addr); | 414 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
419 | if (!config) { | 415 | if (config) { |
416 | if (cipinfo->config != NULL) { | ||
417 | /* Case A: This is an entry that gets reloaded, since | ||
418 | * it still has a cipinfo->config pointer. Simply | ||
419 | * increase the entry refcount and return */ | ||
420 | if (cipinfo->config != config) { | ||
421 | printk(KERN_ERR "CLUSTERIP: Reloaded entry " | ||
422 | "has invalid config pointer!\n"); | ||
423 | return 0; | ||
424 | } | ||
425 | clusterip_config_entry_get(cipinfo->config); | ||
426 | } else { | ||
427 | /* Case B: This is a new rule referring to an existing | ||
428 | * clusterip config. */ | ||
429 | cipinfo->config = config; | ||
430 | clusterip_config_entry_get(cipinfo->config); | ||
431 | } | ||
432 | } else { | ||
433 | /* Case C: This is a completely new clusterip config */ | ||
420 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { | 434 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
421 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | 435 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); |
422 | return 0; | 436 | return 0; |
@@ -443,10 +457,9 @@ checkentry(const char *tablename, | |||
443 | } | 457 | } |
444 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | 458 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); |
445 | } | 459 | } |
460 | cipinfo->config = config; | ||
446 | } | 461 | } |
447 | 462 | ||
448 | cipinfo->config = config; | ||
449 | |||
450 | return 1; | 463 | return 1; |
451 | } | 464 | } |
452 | 465 | ||
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) | |||
455 | { | 468 | { |
456 | struct ipt_clusterip_tgt_info *cipinfo = matchinfo; | 469 | struct ipt_clusterip_tgt_info *cipinfo = matchinfo; |
457 | 470 | ||
458 | /* we first remove the proc entry and then drop the reference | 471 | /* if no more entries are referencing the config, remove it |
459 | * count. In case anyone still accesses the file, the open/close | 472 | * from the list and destroy the proc entry */ |
460 | * functions are also incrementing the refcount on their own */ | 473 | clusterip_config_entry_put(cipinfo->config); |
461 | #ifdef CONFIG_PROC_FS | 474 | |
462 | remove_proc_entry(cipinfo->config->pde->name, | ||
463 | cipinfo->config->pde->parent); | ||
464 | #endif | ||
465 | clusterip_config_put(cipinfo->config); | 475 | clusterip_config_put(cipinfo->config); |
466 | } | 476 | } |
467 | 477 | ||
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook, | |||
533 | 543 | ||
534 | /* if there is no clusterip configuration for the arp reply's | 544 | /* if there is no clusterip configuration for the arp reply's |
535 | * source ip, we don't want to mangle it */ | 545 | * source ip, we don't want to mangle it */ |
536 | c = clusterip_config_find_get(payload->src_ip); | 546 | c = clusterip_config_find_get(payload->src_ip, 0); |
537 | if (!c) | 547 | if (!c) |
538 | return NF_ACCEPT; | 548 | return NF_ACCEPT; |
539 | 549 | ||
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { | |||
574 | 584 | ||
575 | #ifdef CONFIG_PROC_FS | 585 | #ifdef CONFIG_PROC_FS |
576 | 586 | ||
587 | struct clusterip_seq_position { | ||
588 | unsigned int pos; /* position */ | ||
589 | unsigned int weight; /* number of bits set == size */ | ||
590 | unsigned int bit; /* current bit */ | ||
591 | unsigned long val; /* current value */ | ||
592 | }; | ||
593 | |||
577 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 594 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
578 | { | 595 | { |
579 | struct proc_dir_entry *pde = s->private; | 596 | struct proc_dir_entry *pde = s->private; |
580 | struct clusterip_config *c = pde->data; | 597 | struct clusterip_config *c = pde->data; |
581 | unsigned int *nodeidx; | 598 | unsigned int weight; |
582 | 599 | u_int32_t local_nodes; | |
583 | read_lock_bh(&clusterip_lock); | 600 | struct clusterip_seq_position *idx; |
584 | if (*pos >= c->num_local_nodes) | 601 | |
602 | /* FIXME: possible race */ | ||
603 | local_nodes = c->local_nodes; | ||
604 | weight = hweight32(local_nodes); | ||
605 | if (*pos >= weight) | ||
585 | return NULL; | 606 | return NULL; |
586 | 607 | ||
587 | nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); | 608 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
588 | if (!nodeidx) | 609 | if (!idx) |
589 | return ERR_PTR(-ENOMEM); | 610 | return ERR_PTR(-ENOMEM); |
590 | 611 | ||
591 | *nodeidx = *pos; | 612 | idx->pos = *pos; |
592 | return nodeidx; | 613 | idx->weight = weight; |
614 | idx->bit = ffs(local_nodes); | ||
615 | idx->val = local_nodes; | ||
616 | clear_bit(idx->bit - 1, &idx->val); | ||
617 | |||
618 | return idx; | ||
593 | } | 619 | } |
594 | 620 | ||
595 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | 621 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) |
596 | { | 622 | { |
597 | struct proc_dir_entry *pde = s->private; | 623 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
598 | struct clusterip_config *c = pde->data; | ||
599 | unsigned int *nodeidx = (unsigned int *)v; | ||
600 | 624 | ||
601 | *pos = ++(*nodeidx); | 625 | *pos = ++idx->pos; |
602 | if (*pos >= c->num_local_nodes) { | 626 | if (*pos >= idx->weight) { |
603 | kfree(v); | 627 | kfree(v); |
604 | return NULL; | 628 | return NULL; |
605 | } | 629 | } |
606 | return nodeidx; | 630 | idx->bit = ffs(idx->val); |
631 | clear_bit(idx->bit - 1, &idx->val); | ||
632 | return idx; | ||
607 | } | 633 | } |
608 | 634 | ||
609 | static void clusterip_seq_stop(struct seq_file *s, void *v) | 635 | static void clusterip_seq_stop(struct seq_file *s, void *v) |
610 | { | 636 | { |
611 | kfree(v); | 637 | kfree(v); |
612 | |||
613 | read_unlock_bh(&clusterip_lock); | ||
614 | } | 638 | } |
615 | 639 | ||
616 | static int clusterip_seq_show(struct seq_file *s, void *v) | 640 | static int clusterip_seq_show(struct seq_file *s, void *v) |
617 | { | 641 | { |
618 | struct proc_dir_entry *pde = s->private; | 642 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
619 | struct clusterip_config *c = pde->data; | ||
620 | unsigned int *nodeidx = (unsigned int *)v; | ||
621 | 643 | ||
622 | if (*nodeidx != 0) | 644 | if (idx->pos != 0) |
623 | seq_putc(s, ','); | 645 | seq_putc(s, ','); |
624 | seq_printf(s, "%u", c->local_nodes[*nodeidx]); | ||
625 | 646 | ||
626 | if (*nodeidx == c->num_local_nodes-1) | 647 | seq_printf(s, "%u", idx->bit); |
648 | |||
649 | if (idx->pos == idx->weight - 1) | ||
627 | seq_putc(s, '\n'); | 650 | seq_putc(s, '\n'); |
628 | 651 | ||
629 | return 0; | 652 | return 0; |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 304bb0a1d4f0..4b0d7e4d6269 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
361 | 361 | ||
362 | if (type && code) { | 362 | if (type && code) { |
363 | get_user(fl->fl_icmp_type, type); | 363 | get_user(fl->fl_icmp_type, type); |
364 | __get_user(fl->fl_icmp_code, code); | 364 | get_user(fl->fl_icmp_code, code); |
365 | probed = 1; | 365 | probed = 1; |
366 | } | 366 | } |
367 | break; | 367 | break; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 29222b964951..a7537c7bbd06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -979,14 +979,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
979 | if (!before(TCP_SKB_CB(skb)->seq, end_seq)) | 979 | if (!before(TCP_SKB_CB(skb)->seq, end_seq)) |
980 | break; | 980 | break; |
981 | 981 | ||
982 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | ||
983 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | ||
984 | |||
982 | pcount = tcp_skb_pcount(skb); | 985 | pcount = tcp_skb_pcount(skb); |
983 | 986 | ||
984 | if (pcount > 1 && | 987 | if (pcount > 1 && !in_sack && |
985 | (after(start_seq, TCP_SKB_CB(skb)->seq) || | 988 | after(TCP_SKB_CB(skb)->end_seq, start_seq)) { |
986 | before(end_seq, TCP_SKB_CB(skb)->end_seq))) { | ||
987 | unsigned int pkt_len; | 989 | unsigned int pkt_len; |
988 | 990 | ||
989 | if (after(start_seq, TCP_SKB_CB(skb)->seq)) | 991 | in_sack = !after(start_seq, |
992 | TCP_SKB_CB(skb)->seq); | ||
993 | |||
994 | if (!in_sack) | ||
990 | pkt_len = (start_seq - | 995 | pkt_len = (start_seq - |
991 | TCP_SKB_CB(skb)->seq); | 996 | TCP_SKB_CB(skb)->seq); |
992 | else | 997 | else |
@@ -999,9 +1004,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
999 | 1004 | ||
1000 | fack_count += pcount; | 1005 | fack_count += pcount; |
1001 | 1006 | ||
1002 | in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && | ||
1003 | !before(end_seq, TCP_SKB_CB(skb)->end_seq); | ||
1004 | |||
1005 | sacked = TCP_SKB_CB(skb)->sacked; | 1007 | sacked = TCP_SKB_CB(skb)->sacked; |
1006 | 1008 | ||
1007 | /* Account D-SACK for retransmitted packet. */ | 1009 | /* Account D-SACK for retransmitted packet. */ |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af7..b1a63b2c6b4a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
384 | newtp->frto_counter = 0; | 384 | newtp->frto_counter = 0; |
385 | newtp->frto_highmark = 0; | 385 | newtp->frto_highmark = 0; |
386 | 386 | ||
387 | newicsk->icsk_ca_ops = &tcp_reno; | 387 | newicsk->icsk_ca_ops = &tcp_init_congestion_ops; |
388 | 388 | ||
389 | tcp_set_ca_state(newsk, TCP_CA_Open); | 389 | tcp_set_ca_state(newsk, TCP_CA_Open); |
390 | tcp_init_xmit_timers(newsk); | 390 | tcp_init_xmit_timers(newsk); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c10e4435e3b1..5dd6dd7d091e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -435,6 +435,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
435 | int nsize, old_factor; | 435 | int nsize, old_factor; |
436 | u16 flags; | 436 | u16 flags; |
437 | 437 | ||
438 | BUG_ON(len >= skb->len); | ||
439 | |||
438 | nsize = skb_headlen(skb) - len; | 440 | nsize = skb_headlen(skb) - len; |
439 | if (nsize < 0) | 441 | if (nsize < 0) |
440 | nsize = 0; | 442 | nsize = 0; |
@@ -459,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
459 | flags = TCP_SKB_CB(skb)->flags; | 461 | flags = TCP_SKB_CB(skb)->flags; |
460 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); | 462 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); |
461 | TCP_SKB_CB(buff)->flags = flags; | 463 | TCP_SKB_CB(buff)->flags = flags; |
462 | TCP_SKB_CB(buff)->sacked = | 464 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
463 | (TCP_SKB_CB(skb)->sacked & | ||
464 | (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); | ||
465 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; | 465 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; |
466 | 466 | ||
467 | if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { | 467 | if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { |
@@ -499,6 +499,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
499 | tcp_skb_pcount(buff); | 499 | tcp_skb_pcount(buff); |
500 | 500 | ||
501 | tp->packets_out -= diff; | 501 | tp->packets_out -= diff; |
502 | |||
503 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | ||
504 | tp->sacked_out -= diff; | ||
505 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) | ||
506 | tp->retrans_out -= diff; | ||
507 | |||
502 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { | 508 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { |
503 | tp->lost_out -= diff; | 509 | tp->lost_out -= diff; |
504 | tp->left_out -= diff; | 510 | tp->left_out -= diff; |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 29fed6e58d0a..519899fb11d5 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) | |||
1968 | } | 1968 | } |
1969 | pmc->mca_sources = NULL; | 1969 | pmc->mca_sources = NULL; |
1970 | pmc->mca_sfmode = MCAST_EXCLUDE; | 1970 | pmc->mca_sfmode = MCAST_EXCLUDE; |
1971 | pmc->mca_sfcount[MCAST_EXCLUDE] = 0; | 1971 | pmc->mca_sfcount[MCAST_INCLUDE] = 0; |
1972 | pmc->mca_sfcount[MCAST_EXCLUDE] = 1; | 1972 | pmc->mca_sfcount[MCAST_EXCLUDE] = 1; |
1973 | } | 1973 | } |
1974 | 1974 | ||
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb2787f..2da514b16d95 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -1955,6 +1955,57 @@ static void __exit fini(void) | |||
1955 | #endif | 1955 | #endif |
1956 | } | 1956 | } |
1957 | 1957 | ||
1958 | /* | ||
1959 | * find specified header up to transport protocol header. | ||
1960 | * If found target header, the offset to the header is set to *offset | ||
1961 | * and return 0. otherwise, return -1. | ||
1962 | * | ||
1963 | * Notes: - non-1st Fragment Header isn't skipped. | ||
1964 | * - ESP header isn't skipped. | ||
1965 | * - The target header may be trancated. | ||
1966 | */ | ||
1967 | int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) | ||
1968 | { | ||
1969 | unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; | ||
1970 | u8 nexthdr = skb->nh.ipv6h->nexthdr; | ||
1971 | unsigned int len = skb->len - start; | ||
1972 | |||
1973 | while (nexthdr != target) { | ||
1974 | struct ipv6_opt_hdr _hdr, *hp; | ||
1975 | unsigned int hdrlen; | ||
1976 | |||
1977 | if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) | ||
1978 | return -1; | ||
1979 | hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); | ||
1980 | if (hp == NULL) | ||
1981 | return -1; | ||
1982 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
1983 | unsigned short _frag_off, *fp; | ||
1984 | fp = skb_header_pointer(skb, | ||
1985 | start+offsetof(struct frag_hdr, | ||
1986 | frag_off), | ||
1987 | sizeof(_frag_off), | ||
1988 | &_frag_off); | ||
1989 | if (fp == NULL) | ||
1990 | return -1; | ||
1991 | |||
1992 | if (ntohs(*fp) & ~0x7) | ||
1993 | return -1; | ||
1994 | hdrlen = 8; | ||
1995 | } else if (nexthdr == NEXTHDR_AUTH) | ||
1996 | hdrlen = (hp->hdrlen + 2) << 2; | ||
1997 | else | ||
1998 | hdrlen = ipv6_optlen(hp); | ||
1999 | |||
2000 | nexthdr = hp->nexthdr; | ||
2001 | len -= hdrlen; | ||
2002 | start += hdrlen; | ||
2003 | } | ||
2004 | |||
2005 | *offset = start; | ||
2006 | return 0; | ||
2007 | } | ||
2008 | |||
1958 | EXPORT_SYMBOL(ip6t_register_table); | 2009 | EXPORT_SYMBOL(ip6t_register_table); |
1959 | EXPORT_SYMBOL(ip6t_unregister_table); | 2010 | EXPORT_SYMBOL(ip6t_unregister_table); |
1960 | EXPORT_SYMBOL(ip6t_do_table); | 2011 | EXPORT_SYMBOL(ip6t_do_table); |
@@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); | |||
1963 | EXPORT_SYMBOL(ip6t_register_target); | 2014 | EXPORT_SYMBOL(ip6t_register_target); |
1964 | EXPORT_SYMBOL(ip6t_unregister_target); | 2015 | EXPORT_SYMBOL(ip6t_unregister_target); |
1965 | EXPORT_SYMBOL(ip6t_ext_hdr); | 2016 | EXPORT_SYMBOL(ip6t_ext_hdr); |
2017 | EXPORT_SYMBOL(ipv6_find_hdr); | ||
1966 | 2018 | ||
1967 | module_init(init); | 2019 | module_init(init); |
1968 | module_exit(fini); | 2020 | module_exit(fini); |
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d5b94f142bba..dde37793d20b 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c | |||
@@ -48,92 +48,21 @@ match(const struct sk_buff *skb, | |||
48 | unsigned int protoff, | 48 | unsigned int protoff, |
49 | int *hotdrop) | 49 | int *hotdrop) |
50 | { | 50 | { |
51 | struct ip_auth_hdr *ah = NULL, _ah; | 51 | struct ip_auth_hdr *ah, _ah; |
52 | const struct ip6t_ah *ahinfo = matchinfo; | 52 | const struct ip6t_ah *ahinfo = matchinfo; |
53 | unsigned int temp; | ||
54 | int len; | ||
55 | u8 nexthdr; | ||
56 | unsigned int ptr; | 53 | unsigned int ptr; |
57 | unsigned int hdrlen = 0; | 54 | unsigned int hdrlen = 0; |
58 | 55 | ||
59 | /*DEBUGP("IPv6 AH entered\n");*/ | 56 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) |
60 | /* if (opt->auth == 0) return 0; | ||
61 | * It does not filled on output */ | ||
62 | |||
63 | /* type of the 1st exthdr */ | ||
64 | nexthdr = skb->nh.ipv6h->nexthdr; | ||
65 | /* pointer to the 1st exthdr */ | ||
66 | ptr = sizeof(struct ipv6hdr); | ||
67 | /* available length */ | ||
68 | len = skb->len - ptr; | ||
69 | temp = 0; | ||
70 | |||
71 | while (ip6t_ext_hdr(nexthdr)) { | ||
72 | struct ipv6_opt_hdr _hdr, *hp; | ||
73 | |||
74 | DEBUGP("ipv6_ah header iteration \n"); | ||
75 | |||
76 | /* Is there enough space for the next ext header? */ | ||
77 | if (len < sizeof(struct ipv6_opt_hdr)) | ||
78 | return 0; | ||
79 | /* No more exthdr -> evaluate */ | ||
80 | if (nexthdr == NEXTHDR_NONE) | ||
81 | break; | ||
82 | /* ESP -> evaluate */ | ||
83 | if (nexthdr == NEXTHDR_ESP) | ||
84 | break; | ||
85 | |||
86 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
87 | BUG_ON(hp == NULL); | ||
88 | |||
89 | /* Calculate the header length */ | ||
90 | if (nexthdr == NEXTHDR_FRAGMENT) | ||
91 | hdrlen = 8; | ||
92 | else if (nexthdr == NEXTHDR_AUTH) | ||
93 | hdrlen = (hp->hdrlen+2)<<2; | ||
94 | else | ||
95 | hdrlen = ipv6_optlen(hp); | ||
96 | |||
97 | /* AH -> evaluate */ | ||
98 | if (nexthdr == NEXTHDR_AUTH) { | ||
99 | temp |= MASK_AH; | ||
100 | break; | ||
101 | } | ||
102 | |||
103 | |||
104 | /* set the flag */ | ||
105 | switch (nexthdr) { | ||
106 | case NEXTHDR_HOP: | ||
107 | case NEXTHDR_ROUTING: | ||
108 | case NEXTHDR_FRAGMENT: | ||
109 | case NEXTHDR_AUTH: | ||
110 | case NEXTHDR_DEST: | ||
111 | break; | ||
112 | default: | ||
113 | DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | nexthdr = hp->nexthdr; | ||
118 | len -= hdrlen; | ||
119 | ptr += hdrlen; | ||
120 | if (ptr > skb->len) { | ||
121 | DEBUGP("ipv6_ah: new pointer too large! \n"); | ||
122 | break; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* AH header not found */ | ||
127 | if (temp != MASK_AH) | ||
128 | return 0; | 57 | return 0; |
129 | 58 | ||
130 | if (len < sizeof(struct ip_auth_hdr)){ | 59 | ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); |
60 | if (ah == NULL) { | ||
131 | *hotdrop = 1; | 61 | *hotdrop = 1; |
132 | return 0; | 62 | return 0; |
133 | } | 63 | } |
134 | 64 | ||
135 | ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); | 65 | hdrlen = (ah->hdrlen + 2) << 2; |
136 | BUG_ON(ah == NULL); | ||
137 | 66 | ||
138 | DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); | 67 | DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); |
139 | DEBUGP("RES %04X ", ah->reserved); | 68 | DEBUGP("RES %04X ", ah->reserved); |
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 540925e4a7a8..c450a635e54b 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c | |||
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb, | |||
63 | struct ipv6_opt_hdr _optsh, *oh; | 63 | struct ipv6_opt_hdr _optsh, *oh; |
64 | const struct ip6t_opts *optinfo = matchinfo; | 64 | const struct ip6t_opts *optinfo = matchinfo; |
65 | unsigned int temp; | 65 | unsigned int temp; |
66 | unsigned int len; | ||
67 | u8 nexthdr; | ||
68 | unsigned int ptr; | 66 | unsigned int ptr; |
69 | unsigned int hdrlen = 0; | 67 | unsigned int hdrlen = 0; |
70 | unsigned int ret = 0; | 68 | unsigned int ret = 0; |
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb, | |||
72 | u8 _optlen, *lp = NULL; | 70 | u8 _optlen, *lp = NULL; |
73 | unsigned int optlen; | 71 | unsigned int optlen; |
74 | 72 | ||
75 | /* type of the 1st exthdr */ | ||
76 | nexthdr = skb->nh.ipv6h->nexthdr; | ||
77 | /* pointer to the 1st exthdr */ | ||
78 | ptr = sizeof(struct ipv6hdr); | ||
79 | /* available length */ | ||
80 | len = skb->len - ptr; | ||
81 | temp = 0; | ||
82 | |||
83 | while (ip6t_ext_hdr(nexthdr)) { | ||
84 | struct ipv6_opt_hdr _hdr, *hp; | ||
85 | |||
86 | DEBUGP("ipv6_opts header iteration \n"); | ||
87 | |||
88 | /* Is there enough space for the next ext header? */ | ||
89 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
90 | return 0; | ||
91 | /* No more exthdr -> evaluate */ | ||
92 | if (nexthdr == NEXTHDR_NONE) { | ||
93 | break; | ||
94 | } | ||
95 | /* ESP -> evaluate */ | ||
96 | if (nexthdr == NEXTHDR_ESP) { | ||
97 | break; | ||
98 | } | ||
99 | |||
100 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
101 | BUG_ON(hp == NULL); | ||
102 | |||
103 | /* Calculate the header length */ | ||
104 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
105 | hdrlen = 8; | ||
106 | } else if (nexthdr == NEXTHDR_AUTH) | ||
107 | hdrlen = (hp->hdrlen+2)<<2; | ||
108 | else | ||
109 | hdrlen = ipv6_optlen(hp); | ||
110 | |||
111 | /* OPTS -> evaluate */ | ||
112 | #if HOPBYHOP | 73 | #if HOPBYHOP |
113 | if (nexthdr == NEXTHDR_HOP) { | 74 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) |
114 | temp |= MASK_HOPOPTS; | ||
115 | #else | 75 | #else |
116 | if (nexthdr == NEXTHDR_DEST) { | 76 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) |
117 | temp |= MASK_DSTOPTS; | ||
118 | #endif | 77 | #endif |
119 | break; | 78 | return 0; |
120 | } | ||
121 | |||
122 | 79 | ||
123 | /* set the flag */ | 80 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); |
124 | switch (nexthdr){ | 81 | if (oh == NULL){ |
125 | case NEXTHDR_HOP: | ||
126 | case NEXTHDR_ROUTING: | ||
127 | case NEXTHDR_FRAGMENT: | ||
128 | case NEXTHDR_AUTH: | ||
129 | case NEXTHDR_DEST: | ||
130 | break; | ||
131 | default: | ||
132 | DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); | ||
133 | return 0; | ||
134 | break; | ||
135 | } | ||
136 | |||
137 | nexthdr = hp->nexthdr; | ||
138 | len -= hdrlen; | ||
139 | ptr += hdrlen; | ||
140 | if ( ptr > skb->len ) { | ||
141 | DEBUGP("ipv6_opts: new pointer is too large! \n"); | ||
142 | break; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* OPTIONS header not found */ | ||
147 | #if HOPBYHOP | ||
148 | if ( temp != MASK_HOPOPTS ) return 0; | ||
149 | #else | ||
150 | if ( temp != MASK_DSTOPTS ) return 0; | ||
151 | #endif | ||
152 | |||
153 | if (len < (int)sizeof(struct ipv6_opt_hdr)){ | ||
154 | *hotdrop = 1; | 82 | *hotdrop = 1; |
155 | return 0; | 83 | return 0; |
156 | } | 84 | } |
157 | 85 | ||
158 | if (len < hdrlen){ | 86 | hdrlen = ipv6_optlen(oh); |
87 | if (skb->len - ptr < hdrlen){ | ||
159 | /* Packet smaller than it's length field */ | 88 | /* Packet smaller than it's length field */ |
160 | return 0; | 89 | return 0; |
161 | } | 90 | } |
162 | 91 | ||
163 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); | ||
164 | BUG_ON(oh == NULL); | ||
165 | |||
166 | DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); | 92 | DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); |
167 | 93 | ||
168 | DEBUGP("len %02X %04X %02X ", | 94 | DEBUGP("len %02X %04X %02X ", |
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index e39dd236fd8e..24bc0cde43a1 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c | |||
@@ -48,87 +48,22 @@ match(const struct sk_buff *skb, | |||
48 | unsigned int protoff, | 48 | unsigned int protoff, |
49 | int *hotdrop) | 49 | int *hotdrop) |
50 | { | 50 | { |
51 | struct ip_esp_hdr _esp, *eh = NULL; | 51 | struct ip_esp_hdr _esp, *eh; |
52 | const struct ip6t_esp *espinfo = matchinfo; | 52 | const struct ip6t_esp *espinfo = matchinfo; |
53 | unsigned int temp; | ||
54 | int len; | ||
55 | u8 nexthdr; | ||
56 | unsigned int ptr; | 53 | unsigned int ptr; |
57 | 54 | ||
58 | /* Make sure this isn't an evil packet */ | 55 | /* Make sure this isn't an evil packet */ |
59 | /*DEBUGP("ipv6_esp entered \n");*/ | 56 | /*DEBUGP("ipv6_esp entered \n");*/ |
60 | 57 | ||
61 | /* type of the 1st exthdr */ | 58 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) |
62 | nexthdr = skb->nh.ipv6h->nexthdr; | ||
63 | /* pointer to the 1st exthdr */ | ||
64 | ptr = sizeof(struct ipv6hdr); | ||
65 | /* available length */ | ||
66 | len = skb->len - ptr; | ||
67 | temp = 0; | ||
68 | |||
69 | while (ip6t_ext_hdr(nexthdr)) { | ||
70 | struct ipv6_opt_hdr _hdr, *hp; | ||
71 | int hdrlen; | ||
72 | |||
73 | DEBUGP("ipv6_esp header iteration \n"); | ||
74 | |||
75 | /* Is there enough space for the next ext header? */ | ||
76 | if (len < sizeof(struct ipv6_opt_hdr)) | ||
77 | return 0; | ||
78 | /* No more exthdr -> evaluate */ | ||
79 | if (nexthdr == NEXTHDR_NONE) | ||
80 | break; | ||
81 | /* ESP -> evaluate */ | ||
82 | if (nexthdr == NEXTHDR_ESP) { | ||
83 | temp |= MASK_ESP; | ||
84 | break; | ||
85 | } | ||
86 | |||
87 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
88 | BUG_ON(hp == NULL); | ||
89 | |||
90 | /* Calculate the header length */ | ||
91 | if (nexthdr == NEXTHDR_FRAGMENT) | ||
92 | hdrlen = 8; | ||
93 | else if (nexthdr == NEXTHDR_AUTH) | ||
94 | hdrlen = (hp->hdrlen+2)<<2; | ||
95 | else | ||
96 | hdrlen = ipv6_optlen(hp); | ||
97 | |||
98 | /* set the flag */ | ||
99 | switch (nexthdr) { | ||
100 | case NEXTHDR_HOP: | ||
101 | case NEXTHDR_ROUTING: | ||
102 | case NEXTHDR_FRAGMENT: | ||
103 | case NEXTHDR_AUTH: | ||
104 | case NEXTHDR_DEST: | ||
105 | break; | ||
106 | default: | ||
107 | DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr); | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | nexthdr = hp->nexthdr; | ||
112 | len -= hdrlen; | ||
113 | ptr += hdrlen; | ||
114 | if (ptr > skb->len) { | ||
115 | DEBUGP("ipv6_esp: new pointer too large! \n"); | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | /* ESP header not found */ | ||
121 | if (temp != MASK_ESP) | ||
122 | return 0; | 59 | return 0; |
123 | 60 | ||
124 | if (len < sizeof(struct ip_esp_hdr)) { | 61 | eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); |
62 | if (eh == NULL) { | ||
125 | *hotdrop = 1; | 63 | *hotdrop = 1; |
126 | return 0; | 64 | return 0; |
127 | } | 65 | } |
128 | 66 | ||
129 | eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); | ||
130 | BUG_ON(eh == NULL); | ||
131 | |||
132 | DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); | 67 | DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); |
133 | 68 | ||
134 | return (eh != NULL) | 69 | return (eh != NULL) |
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 4bfa30a9bc80..085d5f8eea29 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c | |||
@@ -48,90 +48,18 @@ match(const struct sk_buff *skb, | |||
48 | unsigned int protoff, | 48 | unsigned int protoff, |
49 | int *hotdrop) | 49 | int *hotdrop) |
50 | { | 50 | { |
51 | struct frag_hdr _frag, *fh = NULL; | 51 | struct frag_hdr _frag, *fh; |
52 | const struct ip6t_frag *fraginfo = matchinfo; | 52 | const struct ip6t_frag *fraginfo = matchinfo; |
53 | unsigned int temp; | ||
54 | int len; | ||
55 | u8 nexthdr; | ||
56 | unsigned int ptr; | 53 | unsigned int ptr; |
57 | unsigned int hdrlen = 0; | ||
58 | |||
59 | /* type of the 1st exthdr */ | ||
60 | nexthdr = skb->nh.ipv6h->nexthdr; | ||
61 | /* pointer to the 1st exthdr */ | ||
62 | ptr = sizeof(struct ipv6hdr); | ||
63 | /* available length */ | ||
64 | len = skb->len - ptr; | ||
65 | temp = 0; | ||
66 | |||
67 | while (ip6t_ext_hdr(nexthdr)) { | ||
68 | struct ipv6_opt_hdr _hdr, *hp; | ||
69 | |||
70 | DEBUGP("ipv6_frag header iteration \n"); | ||
71 | |||
72 | /* Is there enough space for the next ext header? */ | ||
73 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
74 | return 0; | ||
75 | /* No more exthdr -> evaluate */ | ||
76 | if (nexthdr == NEXTHDR_NONE) { | ||
77 | break; | ||
78 | } | ||
79 | /* ESP -> evaluate */ | ||
80 | if (nexthdr == NEXTHDR_ESP) { | ||
81 | break; | ||
82 | } | ||
83 | |||
84 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
85 | BUG_ON(hp == NULL); | ||
86 | |||
87 | /* Calculate the header length */ | ||
88 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
89 | hdrlen = 8; | ||
90 | } else if (nexthdr == NEXTHDR_AUTH) | ||
91 | hdrlen = (hp->hdrlen+2)<<2; | ||
92 | else | ||
93 | hdrlen = ipv6_optlen(hp); | ||
94 | |||
95 | /* FRAG -> evaluate */ | ||
96 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
97 | temp |= MASK_FRAGMENT; | ||
98 | break; | ||
99 | } | ||
100 | |||
101 | |||
102 | /* set the flag */ | ||
103 | switch (nexthdr){ | ||
104 | case NEXTHDR_HOP: | ||
105 | case NEXTHDR_ROUTING: | ||
106 | case NEXTHDR_FRAGMENT: | ||
107 | case NEXTHDR_AUTH: | ||
108 | case NEXTHDR_DEST: | ||
109 | break; | ||
110 | default: | ||
111 | DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr); | ||
112 | return 0; | ||
113 | break; | ||
114 | } | ||
115 | |||
116 | nexthdr = hp->nexthdr; | ||
117 | len -= hdrlen; | ||
118 | ptr += hdrlen; | ||
119 | if ( ptr > skb->len ) { | ||
120 | DEBUGP("ipv6_frag: new pointer too large! \n"); | ||
121 | break; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* FRAG header not found */ | ||
126 | if ( temp != MASK_FRAGMENT ) return 0; | ||
127 | |||
128 | if (len < sizeof(struct frag_hdr)){ | ||
129 | *hotdrop = 1; | ||
130 | return 0; | ||
131 | } | ||
132 | 54 | ||
133 | fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); | 55 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) |
134 | BUG_ON(fh == NULL); | 56 | return 0; |
57 | |||
58 | fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); | ||
59 | if (fh == NULL){ | ||
60 | *hotdrop = 1; | ||
61 | return 0; | ||
62 | } | ||
135 | 63 | ||
136 | DEBUGP("INFO %04X ", fh->frag_off); | 64 | DEBUGP("INFO %04X ", fh->frag_off); |
137 | DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); | 65 | DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); |
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 27f3650d127e..1d09485111d0 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c | |||
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb, | |||
63 | struct ipv6_opt_hdr _optsh, *oh; | 63 | struct ipv6_opt_hdr _optsh, *oh; |
64 | const struct ip6t_opts *optinfo = matchinfo; | 64 | const struct ip6t_opts *optinfo = matchinfo; |
65 | unsigned int temp; | 65 | unsigned int temp; |
66 | unsigned int len; | ||
67 | u8 nexthdr; | ||
68 | unsigned int ptr; | 66 | unsigned int ptr; |
69 | unsigned int hdrlen = 0; | 67 | unsigned int hdrlen = 0; |
70 | unsigned int ret = 0; | 68 | unsigned int ret = 0; |
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb, | |||
72 | u8 _optlen, *lp = NULL; | 70 | u8 _optlen, *lp = NULL; |
73 | unsigned int optlen; | 71 | unsigned int optlen; |
74 | 72 | ||
75 | /* type of the 1st exthdr */ | ||
76 | nexthdr = skb->nh.ipv6h->nexthdr; | ||
77 | /* pointer to the 1st exthdr */ | ||
78 | ptr = sizeof(struct ipv6hdr); | ||
79 | /* available length */ | ||
80 | len = skb->len - ptr; | ||
81 | temp = 0; | ||
82 | |||
83 | while (ip6t_ext_hdr(nexthdr)) { | ||
84 | struct ipv6_opt_hdr _hdr, *hp; | ||
85 | |||
86 | DEBUGP("ipv6_opts header iteration \n"); | ||
87 | |||
88 | /* Is there enough space for the next ext header? */ | ||
89 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
90 | return 0; | ||
91 | /* No more exthdr -> evaluate */ | ||
92 | if (nexthdr == NEXTHDR_NONE) { | ||
93 | break; | ||
94 | } | ||
95 | /* ESP -> evaluate */ | ||
96 | if (nexthdr == NEXTHDR_ESP) { | ||
97 | break; | ||
98 | } | ||
99 | |||
100 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
101 | BUG_ON(hp == NULL); | ||
102 | |||
103 | /* Calculate the header length */ | ||
104 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
105 | hdrlen = 8; | ||
106 | } else if (nexthdr == NEXTHDR_AUTH) | ||
107 | hdrlen = (hp->hdrlen+2)<<2; | ||
108 | else | ||
109 | hdrlen = ipv6_optlen(hp); | ||
110 | |||
111 | /* OPTS -> evaluate */ | ||
112 | #if HOPBYHOP | 73 | #if HOPBYHOP |
113 | if (nexthdr == NEXTHDR_HOP) { | 74 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) |
114 | temp |= MASK_HOPOPTS; | ||
115 | #else | 75 | #else |
116 | if (nexthdr == NEXTHDR_DEST) { | 76 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) |
117 | temp |= MASK_DSTOPTS; | ||
118 | #endif | 77 | #endif |
119 | break; | 78 | return 0; |
120 | } | ||
121 | |||
122 | 79 | ||
123 | /* set the flag */ | 80 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); |
124 | switch (nexthdr){ | 81 | if (oh == NULL){ |
125 | case NEXTHDR_HOP: | ||
126 | case NEXTHDR_ROUTING: | ||
127 | case NEXTHDR_FRAGMENT: | ||
128 | case NEXTHDR_AUTH: | ||
129 | case NEXTHDR_DEST: | ||
130 | break; | ||
131 | default: | ||
132 | DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); | ||
133 | return 0; | ||
134 | break; | ||
135 | } | ||
136 | |||
137 | nexthdr = hp->nexthdr; | ||
138 | len -= hdrlen; | ||
139 | ptr += hdrlen; | ||
140 | if ( ptr > skb->len ) { | ||
141 | DEBUGP("ipv6_opts: new pointer is too large! \n"); | ||
142 | break; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* OPTIONS header not found */ | ||
147 | #if HOPBYHOP | ||
148 | if ( temp != MASK_HOPOPTS ) return 0; | ||
149 | #else | ||
150 | if ( temp != MASK_DSTOPTS ) return 0; | ||
151 | #endif | ||
152 | |||
153 | if (len < (int)sizeof(struct ipv6_opt_hdr)){ | ||
154 | *hotdrop = 1; | 82 | *hotdrop = 1; |
155 | return 0; | 83 | return 0; |
156 | } | 84 | } |
157 | 85 | ||
158 | if (len < hdrlen){ | 86 | hdrlen = ipv6_optlen(oh); |
87 | if (skb->len - ptr < hdrlen){ | ||
159 | /* Packet smaller than it's length field */ | 88 | /* Packet smaller than it's length field */ |
160 | return 0; | 89 | return 0; |
161 | } | 90 | } |
162 | 91 | ||
163 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); | ||
164 | BUG_ON(oh == NULL); | ||
165 | |||
166 | DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); | 92 | DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); |
167 | 93 | ||
168 | DEBUGP("len %02X %04X %02X ", | 94 | DEBUGP("len %02X %04X %02X ", |
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2bb670037df3..beb2fd5cebbb 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c | |||
@@ -50,98 +50,29 @@ match(const struct sk_buff *skb, | |||
50 | unsigned int protoff, | 50 | unsigned int protoff, |
51 | int *hotdrop) | 51 | int *hotdrop) |
52 | { | 52 | { |
53 | struct ipv6_rt_hdr _route, *rh = NULL; | 53 | struct ipv6_rt_hdr _route, *rh; |
54 | const struct ip6t_rt *rtinfo = matchinfo; | 54 | const struct ip6t_rt *rtinfo = matchinfo; |
55 | unsigned int temp; | 55 | unsigned int temp; |
56 | unsigned int len; | ||
57 | u8 nexthdr; | ||
58 | unsigned int ptr; | 56 | unsigned int ptr; |
59 | unsigned int hdrlen = 0; | 57 | unsigned int hdrlen = 0; |
60 | unsigned int ret = 0; | 58 | unsigned int ret = 0; |
61 | struct in6_addr *ap, _addr; | 59 | struct in6_addr *ap, _addr; |
62 | 60 | ||
63 | /* type of the 1st exthdr */ | 61 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) |
64 | nexthdr = skb->nh.ipv6h->nexthdr; | 62 | return 0; |
65 | /* pointer to the 1st exthdr */ | ||
66 | ptr = sizeof(struct ipv6hdr); | ||
67 | /* available length */ | ||
68 | len = skb->len - ptr; | ||
69 | temp = 0; | ||
70 | 63 | ||
71 | while (ip6t_ext_hdr(nexthdr)) { | 64 | rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); |
72 | struct ipv6_opt_hdr _hdr, *hp; | 65 | if (rh == NULL){ |
73 | |||
74 | DEBUGP("ipv6_rt header iteration \n"); | ||
75 | |||
76 | /* Is there enough space for the next ext header? */ | ||
77 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
78 | return 0; | ||
79 | /* No more exthdr -> evaluate */ | ||
80 | if (nexthdr == NEXTHDR_NONE) { | ||
81 | break; | ||
82 | } | ||
83 | /* ESP -> evaluate */ | ||
84 | if (nexthdr == NEXTHDR_ESP) { | ||
85 | break; | ||
86 | } | ||
87 | |||
88 | hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); | ||
89 | BUG_ON(hp == NULL); | ||
90 | |||
91 | /* Calculate the header length */ | ||
92 | if (nexthdr == NEXTHDR_FRAGMENT) { | ||
93 | hdrlen = 8; | ||
94 | } else if (nexthdr == NEXTHDR_AUTH) | ||
95 | hdrlen = (hp->hdrlen+2)<<2; | ||
96 | else | ||
97 | hdrlen = ipv6_optlen(hp); | ||
98 | |||
99 | /* ROUTING -> evaluate */ | ||
100 | if (nexthdr == NEXTHDR_ROUTING) { | ||
101 | temp |= MASK_ROUTING; | ||
102 | break; | ||
103 | } | ||
104 | |||
105 | |||
106 | /* set the flag */ | ||
107 | switch (nexthdr){ | ||
108 | case NEXTHDR_HOP: | ||
109 | case NEXTHDR_ROUTING: | ||
110 | case NEXTHDR_FRAGMENT: | ||
111 | case NEXTHDR_AUTH: | ||
112 | case NEXTHDR_DEST: | ||
113 | break; | ||
114 | default: | ||
115 | DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr); | ||
116 | return 0; | ||
117 | break; | ||
118 | } | ||
119 | |||
120 | nexthdr = hp->nexthdr; | ||
121 | len -= hdrlen; | ||
122 | ptr += hdrlen; | ||
123 | if ( ptr > skb->len ) { | ||
124 | DEBUGP("ipv6_rt: new pointer is too large! \n"); | ||
125 | break; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /* ROUTING header not found */ | ||
130 | if ( temp != MASK_ROUTING ) return 0; | ||
131 | |||
132 | if (len < (int)sizeof(struct ipv6_rt_hdr)){ | ||
133 | *hotdrop = 1; | 66 | *hotdrop = 1; |
134 | return 0; | 67 | return 0; |
135 | } | 68 | } |
136 | 69 | ||
137 | if (len < hdrlen){ | 70 | hdrlen = ipv6_optlen(rh); |
71 | if (skb->len - ptr < hdrlen){ | ||
138 | /* Pcket smaller than its length field */ | 72 | /* Pcket smaller than its length field */ |
139 | return 0; | 73 | return 0; |
140 | } | 74 | } |
141 | 75 | ||
142 | rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); | ||
143 | BUG_ON(rh == NULL); | ||
144 | |||
145 | DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); | 76 | DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); |
146 | DEBUGP("TYPE %04X ", rh->type); | 77 | DEBUGP("TYPE %04X ", rh->type); |
147 | DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); | 78 | DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); |
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5aa3691c578d..a1265a320b11 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
627 | 627 | ||
628 | if (type && code) { | 628 | if (type && code) { |
629 | get_user(fl->fl_icmp_type, type); | 629 | get_user(fl->fl_icmp_type, type); |
630 | __get_user(fl->fl_icmp_code, code); | 630 | get_user(fl->fl_icmp_code, code); |
631 | probed = 1; | 631 | probed = 1; |
632 | } | 632 | } |
633 | break; | 633 | break; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 69b146843a20..6001948600f3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -405,9 +405,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, | |||
405 | continue; | 405 | continue; |
406 | 406 | ||
407 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 407 | if (!ipv6_addr_any(&np->rcv_saddr)) { |
408 | if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) | 408 | if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) |
409 | return s; | 409 | continue; |
410 | continue; | ||
411 | } | 410 | } |
412 | if(!inet6_mc_check(s, loc_addr, rmt_addr)) | 411 | if(!inet6_mc_check(s, loc_addr, rmt_addr)) |
413 | continue; | 412 | continue; |
@@ -640,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, | |||
640 | int tclass = -1; | 639 | int tclass = -1; |
641 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 640 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
642 | int err; | 641 | int err; |
642 | int connected = 0; | ||
643 | 643 | ||
644 | /* destination address check */ | 644 | /* destination address check */ |
645 | if (sin6) { | 645 | if (sin6) { |
@@ -749,6 +749,7 @@ do_udp_sendmsg: | |||
749 | fl->fl_ip_dport = inet->dport; | 749 | fl->fl_ip_dport = inet->dport; |
750 | daddr = &np->daddr; | 750 | daddr = &np->daddr; |
751 | fl->fl6_flowlabel = np->flow_label; | 751 | fl->fl6_flowlabel = np->flow_label; |
752 | connected = 1; | ||
752 | } | 753 | } |
753 | 754 | ||
754 | if (!fl->oif) | 755 | if (!fl->oif) |
@@ -771,6 +772,7 @@ do_udp_sendmsg: | |||
771 | } | 772 | } |
772 | if (!(opt->opt_nflen|opt->opt_flen)) | 773 | if (!(opt->opt_nflen|opt->opt_flen)) |
773 | opt = NULL; | 774 | opt = NULL; |
775 | connected = 0; | ||
774 | } | 776 | } |
775 | if (opt == NULL) | 777 | if (opt == NULL) |
776 | opt = np->opt; | 778 | opt = np->opt; |
@@ -788,10 +790,13 @@ do_udp_sendmsg: | |||
788 | ipv6_addr_copy(&final, &fl->fl6_dst); | 790 | ipv6_addr_copy(&final, &fl->fl6_dst); |
789 | ipv6_addr_copy(&fl->fl6_dst, rt0->addr); | 791 | ipv6_addr_copy(&fl->fl6_dst, rt0->addr); |
790 | final_p = &final; | 792 | final_p = &final; |
793 | connected = 0; | ||
791 | } | 794 | } |
792 | 795 | ||
793 | if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) | 796 | if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { |
794 | fl->oif = np->mcast_oif; | 797 | fl->oif = np->mcast_oif; |
798 | connected = 0; | ||
799 | } | ||
795 | 800 | ||
796 | err = ip6_dst_lookup(sk, &dst, fl); | 801 | err = ip6_dst_lookup(sk, &dst, fl); |
797 | if (err) | 802 | if (err) |
@@ -847,7 +852,7 @@ do_append_data: | |||
847 | else if (!corkreq) | 852 | else if (!corkreq) |
848 | err = udp_v6_push_pending_frames(sk, up); | 853 | err = udp_v6_push_pending_frames(sk, up); |
849 | 854 | ||
850 | if (dst) | 855 | if (dst && connected) |
851 | ip6_dst_store(sk, dst, | 856 | ip6_dst_store(sk, dst, |
852 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? | 857 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? |
853 | &np->daddr : NULL); | 858 | &np->daddr : NULL); |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8690f171c1ef..ee865d88183b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -36,6 +36,11 @@ | |||
36 | * Michal Ostrowski : Module initialization cleanup. | 36 | * Michal Ostrowski : Module initialization cleanup. |
37 | * Ulises Alonso : Frame number limit removal and | 37 | * Ulises Alonso : Frame number limit removal and |
38 | * packet_set_ring memory leak. | 38 | * packet_set_ring memory leak. |
39 | * Eric Biederman : Allow for > 8 byte hardware addresses. | ||
40 | * The convention is that longer addresses | ||
41 | * will simply extend the hardware address | ||
42 | * byte arrays at the end of sockaddr_ll | ||
43 | * and packet_mreq. | ||
39 | * | 44 | * |
40 | * This program is free software; you can redistribute it and/or | 45 | * This program is free software; you can redistribute it and/or |
41 | * modify it under the terms of the GNU General Public License | 46 | * modify it under the terms of the GNU General Public License |
@@ -161,7 +166,17 @@ struct packet_mclist | |||
161 | int count; | 166 | int count; |
162 | unsigned short type; | 167 | unsigned short type; |
163 | unsigned short alen; | 168 | unsigned short alen; |
164 | unsigned char addr[8]; | 169 | unsigned char addr[MAX_ADDR_LEN]; |
170 | }; | ||
171 | /* identical to struct packet_mreq except it has | ||
172 | * a longer address field. | ||
173 | */ | ||
174 | struct packet_mreq_max | ||
175 | { | ||
176 | int mr_ifindex; | ||
177 | unsigned short mr_type; | ||
178 | unsigned short mr_alen; | ||
179 | unsigned char mr_address[MAX_ADDR_LEN]; | ||
165 | }; | 180 | }; |
166 | #endif | 181 | #endif |
167 | #ifdef CONFIG_PACKET_MMAP | 182 | #ifdef CONFIG_PACKET_MMAP |
@@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
716 | err = -EINVAL; | 731 | err = -EINVAL; |
717 | if (msg->msg_namelen < sizeof(struct sockaddr_ll)) | 732 | if (msg->msg_namelen < sizeof(struct sockaddr_ll)) |
718 | goto out; | 733 | goto out; |
734 | if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) | ||
735 | goto out; | ||
719 | ifindex = saddr->sll_ifindex; | 736 | ifindex = saddr->sll_ifindex; |
720 | proto = saddr->sll_protocol; | 737 | proto = saddr->sll_protocol; |
721 | addr = saddr->sll_addr; | 738 | addr = saddr->sll_addr; |
@@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
744 | if (dev->hard_header) { | 761 | if (dev->hard_header) { |
745 | int res; | 762 | int res; |
746 | err = -EINVAL; | 763 | err = -EINVAL; |
764 | if (saddr) { | ||
765 | if (saddr->sll_halen != dev->addr_len) | ||
766 | goto out_free; | ||
767 | if (saddr->sll_hatype != dev->type) | ||
768 | goto out_free; | ||
769 | } | ||
747 | res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); | 770 | res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); |
748 | if (sock->type != SOCK_DGRAM) { | 771 | if (sock->type != SOCK_DGRAM) { |
749 | skb->tail = skb->data; | 772 | skb->tail = skb->data; |
@@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1045 | struct sock *sk = sock->sk; | 1068 | struct sock *sk = sock->sk; |
1046 | struct sk_buff *skb; | 1069 | struct sk_buff *skb; |
1047 | int copied, err; | 1070 | int copied, err; |
1071 | struct sockaddr_ll *sll; | ||
1048 | 1072 | ||
1049 | err = -EINVAL; | 1073 | err = -EINVAL; |
1050 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) | 1074 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) |
@@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1057 | #endif | 1081 | #endif |
1058 | 1082 | ||
1059 | /* | 1083 | /* |
1060 | * If the address length field is there to be filled in, we fill | ||
1061 | * it in now. | ||
1062 | */ | ||
1063 | |||
1064 | if (sock->type == SOCK_PACKET) | ||
1065 | msg->msg_namelen = sizeof(struct sockaddr_pkt); | ||
1066 | else | ||
1067 | msg->msg_namelen = sizeof(struct sockaddr_ll); | ||
1068 | |||
1069 | /* | ||
1070 | * Call the generic datagram receiver. This handles all sorts | 1084 | * Call the generic datagram receiver. This handles all sorts |
1071 | * of horrible races and re-entrancy so we can forget about it | 1085 | * of horrible races and re-entrancy so we can forget about it |
1072 | * in the protocol layers. | 1086 | * in the protocol layers. |
@@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1087 | goto out; | 1101 | goto out; |
1088 | 1102 | ||
1089 | /* | 1103 | /* |
1104 | * If the address length field is there to be filled in, we fill | ||
1105 | * it in now. | ||
1106 | */ | ||
1107 | |||
1108 | sll = (struct sockaddr_ll*)skb->cb; | ||
1109 | if (sock->type == SOCK_PACKET) | ||
1110 | msg->msg_namelen = sizeof(struct sockaddr_pkt); | ||
1111 | else | ||
1112 | msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); | ||
1113 | |||
1114 | /* | ||
1090 | * You lose any data beyond the buffer you gave. If it worries a | 1115 | * You lose any data beyond the buffer you gave. If it worries a |
1091 | * user program they can ask the device for its MTU anyway. | 1116 | * user program they can ask the device for its MTU anyway. |
1092 | */ | 1117 | */ |
@@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
1166 | sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ | 1191 | sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ |
1167 | sll->sll_halen = 0; | 1192 | sll->sll_halen = 0; |
1168 | } | 1193 | } |
1169 | *uaddr_len = sizeof(*sll); | 1194 | *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; |
1170 | 1195 | ||
1171 | return 0; | 1196 | return 0; |
1172 | } | 1197 | } |
@@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i | |||
1199 | } | 1224 | } |
1200 | } | 1225 | } |
1201 | 1226 | ||
1202 | static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) | 1227 | static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) |
1203 | { | 1228 | { |
1204 | struct packet_sock *po = pkt_sk(sk); | 1229 | struct packet_sock *po = pkt_sk(sk); |
1205 | struct packet_mclist *ml, *i; | 1230 | struct packet_mclist *ml, *i; |
@@ -1249,7 +1274,7 @@ done: | |||
1249 | return err; | 1274 | return err; |
1250 | } | 1275 | } |
1251 | 1276 | ||
1252 | static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) | 1277 | static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) |
1253 | { | 1278 | { |
1254 | struct packet_mclist *ml, **mlp; | 1279 | struct packet_mclist *ml, **mlp; |
1255 | 1280 | ||
@@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1315 | case PACKET_ADD_MEMBERSHIP: | 1340 | case PACKET_ADD_MEMBERSHIP: |
1316 | case PACKET_DROP_MEMBERSHIP: | 1341 | case PACKET_DROP_MEMBERSHIP: |
1317 | { | 1342 | { |
1318 | struct packet_mreq mreq; | 1343 | struct packet_mreq_max mreq; |
1319 | if (optlen<sizeof(mreq)) | 1344 | int len = optlen; |
1345 | memset(&mreq, 0, sizeof(mreq)); | ||
1346 | if (len < sizeof(struct packet_mreq)) | ||
1320 | return -EINVAL; | 1347 | return -EINVAL; |
1321 | if (copy_from_user(&mreq,optval,sizeof(mreq))) | 1348 | if (len > sizeof(mreq)) |
1349 | len = sizeof(mreq); | ||
1350 | if (copy_from_user(&mreq,optval,len)) | ||
1322 | return -EFAULT; | 1351 | return -EFAULT; |
1352 | if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) | ||
1353 | return -EINVAL; | ||
1323 | if (optname == PACKET_ADD_MEMBERSHIP) | 1354 | if (optname == PACKET_ADD_MEMBERSHIP) |
1324 | ret = packet_mc_add(sk, &mreq); | 1355 | ret = packet_mc_add(sk, &mreq); |
1325 | else | 1356 | else |
diff --git a/net/socket.c b/net/socket.c index c699e93c33d7..f9264472377f 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1862 | if (err < 0) | 1862 | if (err < 0) |
1863 | goto out_freeiov; | 1863 | goto out_freeiov; |
1864 | } | 1864 | } |
1865 | err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); | 1865 | err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), |
1866 | COMPAT_FLAGS(msg)); | ||
1866 | if (err) | 1867 | if (err) |
1867 | goto out_freeiov; | 1868 | goto out_freeiov; |
1868 | if (MSG_CMSG_COMPAT & flags) | 1869 | if (MSG_CMSG_COMPAT & flags) |