aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig3
-rw-r--r--net/dccp/Makefile2
-rw-r--r--net/dccp/ackvec.c419
-rw-r--r--net/dccp/ackvec.h133
-rw-r--r--net/dccp/ccid.h31
-rw-r--r--net/dccp/ccids/ccid3.c56
-rw-r--r--net/dccp/ccids/ccid3.h23
-rw-r--r--net/dccp/dccp.h91
-rw-r--r--net/dccp/input.c89
-rw-r--r--net/dccp/ipv4.c101
-rw-r--r--net/dccp/minisocks.c19
-rw-r--r--net/dccp/options.c443
-rw-r--r--net/dccp/output.c26
-rw-r--r--net/dccp/proto.c94
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
-rw-r--r--net/ipv6/udp.c10
-rw-r--r--net/socket.c3
19 files changed, 1005 insertions, 780 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 2bdd5623fdd5..60f6f321bd76 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
140 140
141 If unsure, say N. 141 If unsure, say N.
142 142
143source "net/netfilter/Kconfig"
143source "net/ipv4/netfilter/Kconfig" 144source "net/ipv4/netfilter/Kconfig"
144source "net/ipv6/netfilter/Kconfig" 145source "net/ipv6/netfilter/Kconfig"
145source "net/decnet/netfilter/Kconfig" 146source "net/decnet/netfilter/Kconfig"
@@ -206,8 +207,6 @@ config NET_PKTGEN
206 To compile this code as a module, choose M here: the 207 To compile this code as a module, choose M here: the
207 module will be called pktgen. 208 module will be called pktgen.
208 209
209source "net/netfilter/Kconfig"
210
211endmenu 210endmenu
212 211
213endmenu 212endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index fb97bb042455..344a8da153fc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ 3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
4 timer.o 4 timer.o
5 5
6dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
7
6obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o 8obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
7 9
8dccp_diag-y := diag.o 10dccp_diag-y := diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
new file mode 100644
index 000000000000..6530283eafca
--- /dev/null
+++ b/net/dccp/ackvec.c
@@ -0,0 +1,419 @@
1/*
2 * net/dccp/ackvec.c
3 *
4 * An implementation of the DCCP protocol
5 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License;
10 */
11
12#include "ackvec.h"
13#include "dccp.h"
14
15#include <linux/dccp.h>
16#include <linux/skbuff.h>
17
18#include <net/sock.h>
19
20int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
21{
22 struct dccp_sock *dp = dccp_sk(sk);
23 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
24 int len = av->dccpav_vec_len + 2;
25 struct timeval now;
26 u32 elapsed_time;
27 unsigned char *to, *from;
28
29 dccp_timestamp(sk, &now);
30 elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
31
32 if (elapsed_time != 0)
33 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
34
35 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
36 return -1;
37
38 /*
39 * XXX: now we have just one ack vector sent record, so
40 * we have to wait for it to be cleared.
41 *
42 * Of course this is not acceptable, but this is just for
43 * basic testing now.
44 */
45 if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
46 return -1;
47
48 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
49
50 to = skb_push(skb, len);
51 *to++ = DCCPO_ACK_VECTOR_0;
52 *to++ = len;
53
54 len = av->dccpav_vec_len;
55 from = av->dccpav_buf + av->dccpav_buf_head;
56
57 /* Check if buf_head wraps */
58 if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
59 const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
60
61 memcpy(to, from, tailsize);
62 to += tailsize;
63 len -= tailsize;
64 from = av->dccpav_buf;
65 }
66
67 memcpy(to, from, len);
68 /*
69 * From draft-ietf-dccp-spec-11.txt:
70 *
71 * For each acknowledgement it sends, the HC-Receiver will add an
72 * acknowledgement record. ack_seqno will equal the HC-Receiver
73 * sequence number it used for the ack packet; ack_ptr will equal
74 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
75 * equal buf_nonce.
76 *
77 * This implemention uses just one ack record for now.
78 */
79 av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
80 av->dccpav_ack_ptr = av->dccpav_buf_head;
81 av->dccpav_ack_ackno = av->dccpav_buf_ackno;
82 av->dccpav_ack_nonce = av->dccpav_buf_nonce;
83 av->dccpav_sent_len = av->dccpav_vec_len;
84
85 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
86 "ack_ackno=%llu\n",
87 debug_prefix, av->dccpav_sent_len,
88 (unsigned long long)av->dccpav_ack_seqno,
89 (unsigned long long)av->dccpav_ack_ackno);
90 return -1;
91}
92
93struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
94 const unsigned int __nocast priority)
95{
96 struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
97
98 if (av != NULL) {
99 av->dccpav_buf_len = len;
100 av->dccpav_buf_head =
101 av->dccpav_buf_tail = av->dccpav_buf_len - 1;
102 av->dccpav_buf_ackno =
103 av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
104 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
105 av->dccpav_ack_ptr = 0;
106 av->dccpav_time.tv_sec = 0;
107 av->dccpav_time.tv_usec = 0;
108 av->dccpav_sent_len = av->dccpav_vec_len = 0;
109 }
110
111 return av;
112}
113
114void dccp_ackvec_free(struct dccp_ackvec *av)
115{
116 kfree(av);
117}
118
119static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
120 const unsigned int index)
121{
122 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
123}
124
125static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
126 const unsigned int index)
127{
128 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
129}
130
131/*
132 * If several packets are missing, the HC-Receiver may prefer to enter multiple
133 * bytes with run length 0, rather than a single byte with a larger run length;
134 * this simplifies table updates if one of the missing packets arrives.
135 */
136static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
137 const unsigned int packets,
138 const unsigned char state)
139{
140 unsigned int gap;
141 signed long new_head;
142
143 if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
144 return -ENOBUFS;
145
146 gap = packets - 1;
147 new_head = av->dccpav_buf_head - packets;
148
149 if (new_head < 0) {
150 if (gap > 0) {
151 memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
152 gap + new_head + 1);
153 gap = -new_head;
154 }
155 new_head += av->dccpav_buf_len;
156 }
157
158 av->dccpav_buf_head = new_head;
159
160 if (gap > 0)
161 memset(av->dccpav_buf + av->dccpav_buf_head + 1,
162 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
163
164 av->dccpav_buf[av->dccpav_buf_head] = state;
165 av->dccpav_vec_len += packets;
166 return 0;
167}
168
169/*
170 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
171 */
172int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
173 const u64 ackno, const u8 state)
174{
175 /*
176 * Check at the right places if the buffer is full, if it is, tell the
177 * caller to start dropping packets till the HC-Sender acks our ACK
178 * vectors, when we will free up space in dccpav_buf.
179 *
180 * We may well decide to do buffer compression, etc, but for now lets
181 * just drop.
182 *
183 * From Appendix A:
184 *
185 * Of course, the circular buffer may overflow, either when the
186 * HC-Sender is sending data at a very high rate, when the
187 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
188 * or when the HC-Sender is forgetting to acknowledge those acks
189 * (so the HC-Receiver is unable to clean up old state). In this
190 * case, the HC-Receiver should either compress the buffer (by
191 * increasing run lengths when possible), transfer its state to
192 * a larger buffer, or, as a last resort, drop all received
193 * packets, without processing them whatsoever, until its buffer
194 * shrinks again.
195 */
196
197 /* See if this is the first ackno being inserted */
198 if (av->dccpav_vec_len == 0) {
199 av->dccpav_buf[av->dccpav_buf_head] = state;
200 av->dccpav_vec_len = 1;
201 } else if (after48(ackno, av->dccpav_buf_ackno)) {
202 const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
203 ackno);
204
205 /*
206 * Look if the state of this packet is the same as the
207 * previous ackno and if so if we can bump the head len.
208 */
209 if (delta == 1 &&
210 dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
211 (dccp_ackvec_len(av, av->dccpav_buf_head) <
212 DCCP_ACKVEC_LEN_MASK))
213 av->dccpav_buf[av->dccpav_buf_head]++;
214 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
215 return -ENOBUFS;
216 } else {
217 /*
218 * A.1.2. Old Packets
219 *
220 * When a packet with Sequence Number S arrives, and
221 * S <= buf_ackno, the HC-Receiver will scan the table
222 * for the byte corresponding to S. (Indexing structures
223 * could reduce the complexity of this scan.)
224 */
225 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
226 unsigned int index = av->dccpav_buf_head;
227
228 while (1) {
229 const u8 len = dccp_ackvec_len(av, index);
230 const u8 state = dccp_ackvec_state(av, index);
231 /*
232 * valid packets not yet in dccpav_buf have a reserved
233 * entry, with a len equal to 0.
234 */
235 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
236 len == 0 && delta == 0) { /* Found our
237 reserved seat! */
238 dccp_pr_debug("Found %llu reserved seat!\n",
239 (unsigned long long)ackno);
240 av->dccpav_buf[index] = state;
241 goto out;
242 }
243 /* len == 0 means one packet */
244 if (delta < len + 1)
245 goto out_duplicate;
246
247 delta -= len + 1;
248 if (++index == av->dccpav_buf_len)
249 index = 0;
250 }
251 }
252
253 av->dccpav_buf_ackno = ackno;
254 dccp_timestamp(sk, &av->dccpav_time);
255out:
256 dccp_pr_debug("");
257 return 0;
258
259out_duplicate:
260 /* Duplicate packet */
261 dccp_pr_debug("Received a dup or already considered lost "
262 "packet: %llu\n", (unsigned long long)ackno);
263 return -EILSEQ;
264}
265
266#ifdef CONFIG_IP_DCCP_DEBUG
267void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
268{
269 if (!dccp_debug)
270 return;
271
272 printk("ACK vector len=%d, ackno=%llu |", len,
273 (unsigned long long)ackno);
274
275 while (len--) {
276 const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
277 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
278
279 printk("%d,%d|", state, rl);
280 ++vector;
281 }
282
283 printk("\n");
284}
285
286void dccp_ackvec_print(const struct dccp_ackvec *av)
287{
288 dccp_ackvector_print(av->dccpav_buf_ackno,
289 av->dccpav_buf + av->dccpav_buf_head,
290 av->dccpav_vec_len);
291}
292#endif
293
294static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
295{
296 /*
297 * As we're keeping track of the ack vector size (dccpav_vec_len) and
298 * the sent ack vector size (dccpav_sent_len) we don't need
299 * dccpav_buf_tail at all, but keep this code here as in the future
300 * we'll implement a vector of ack records, as suggested in
301 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
302 */
303#if 0
304 av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
305 if (av->dccpav_buf_tail >= av->dccpav_vec_len)
306 av->dccpav_buf_tail -= av->dccpav_vec_len;
307#endif
308 av->dccpav_vec_len -= av->dccpav_sent_len;
309}
310
311void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
312 const u64 ackno)
313{
314 /* Check if we actually sent an ACK vector */
315 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
316 return;
317
318 if (ackno == av->dccpav_ack_seqno) {
319#ifdef CONFIG_IP_DCCP_DEBUG
320 struct dccp_sock *dp = dccp_sk(sk);
321 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
322 "CLIENT rx ack: " : "server rx ack: ";
323#endif
324 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
325 "ack_ackno=%llu, ACKED!\n",
326 debug_prefix, 1,
327 (unsigned long long)av->dccpav_ack_seqno,
328 (unsigned long long)av->dccpav_ack_ackno);
329 dccp_ackvec_trow_away_ack_record(av);
330 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
331 }
332}
333
334static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
335 struct sock *sk, u64 ackno,
336 const unsigned char len,
337 const unsigned char *vector)
338{
339 unsigned char i;
340
341 /* Check if we actually sent an ACK vector */
342 if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
343 return;
344 /*
345 * We're in the receiver half connection, so if the received an ACK
346 * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
347 * not interested.
348 *
349 * Extra explanation with example:
350 *
351 * if we received an ACK vector with ackno 50, it can only be acking
352 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
353 */
354 /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
355 if (before48(ackno, av->dccpav_ack_seqno)) {
356 /* dccp_pr_debug_cat("yes\n"); */
357 return;
358 }
359 /* dccp_pr_debug_cat("no\n"); */
360
361 i = len;
362 while (i--) {
363 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
364 u64 ackno_end_rl;
365
366 dccp_set_seqno(&ackno_end_rl, ackno - rl);
367
368 /*
369 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
370 * av->dccpav_ack_seqno, ackno);
371 */
372 if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) {
373 const u8 state = (*vector &
374 DCCP_ACKVEC_STATE_MASK) >> 6;
375 /* dccp_pr_debug_cat("yes\n"); */
376
377 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
378#ifdef CONFIG_IP_DCCP_DEBUG
379 struct dccp_sock *dp = dccp_sk(sk);
380 const char *debug_prefix =
381 dp->dccps_role == DCCP_ROLE_CLIENT ?
382 "CLIENT rx ack: " : "server rx ack: ";
383#endif
384 dccp_pr_debug("%sACK vector 0, len=%d, "
385 "ack_seqno=%llu, ack_ackno=%llu, "
386 "ACKED!\n",
387 debug_prefix, len,
388 (unsigned long long)
389 av->dccpav_ack_seqno,
390 (unsigned long long)
391 av->dccpav_ack_ackno);
392 dccp_ackvec_trow_away_ack_record(av);
393 }
394 /*
395 * If dccpav_ack_seqno was not received, no problem
396 * we'll send another ACK vector.
397 */
398 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
399 break;
400 }
401 /* dccp_pr_debug_cat("no\n"); */
402
403 dccp_set_seqno(&ackno, ackno_end_rl - 1);
404 ++vector;
405 }
406}
407
408int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
409 const u8 opt, const u8 *value, const u8 len)
410{
411 if (len > DCCP_MAX_ACKVEC_LEN)
412 return -1;
413
414 /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
415 dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
416 DCCP_SKB_CB(skb)->dccpd_ack_seq,
417 len, value);
418 return 0;
419}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
new file mode 100644
index 000000000000..8ca51c9191f7
--- /dev/null
+++ b/net/dccp/ackvec.h
@@ -0,0 +1,133 @@
1#ifndef _ACKVEC_H
2#define _ACKVEC_H
3/*
4 * net/dccp/ackvec.h
5 *
6 * An implementation of the DCCP protocol
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/config.h>
15#include <linux/compiler.h>
16#include <linux/time.h>
17#include <linux/types.h>
18
19/* Read about the ECN nonce to see why it is 253 */
20#define DCCP_MAX_ACKVEC_LEN 253
21
22#define DCCP_ACKVEC_STATE_RECEIVED 0
23#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
24#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
25
26#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */
27#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */
28
29/** struct dccp_ackvec - ack vector
30 *
31 * This data structure is the one defined in the DCCP draft
32 * Appendix A.
33 *
34 * @dccpav_buf_head - circular buffer head
35 * @dccpav_buf_tail - circular buffer tail
36 * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
37 * buffer (i.e. %dccpav_buf_head)
38 * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
39 * by the buffer with State 0
40 *
41 * Additionally, the HC-Receiver must keep some information about the
42 * Ack Vectors it has recently sent. For each packet sent carrying an
43 * Ack Vector, it remembers four variables:
44 *
45 * @dccpav_ack_seqno - the Sequence Number used for the packet
46 * (HC-Receiver seqno)
47 * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
48 * @dccpav_ack_ackno - the Acknowledgement Number used for the packet
49 * (HC-Sender seqno)
50 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
51 *
52 * @dccpav_buf_len - circular buffer length
53 * @dccpav_time - the time in usecs
54 * @dccpav_buf - circular buffer of acknowledgeable packets
55 */
56struct dccp_ackvec {
57 unsigned int dccpav_buf_head;
58 unsigned int dccpav_buf_tail;
59 u64 dccpav_buf_ackno;
60 u64 dccpav_ack_seqno;
61 u64 dccpav_ack_ackno;
62 unsigned int dccpav_ack_ptr;
63 unsigned int dccpav_sent_len;
64 unsigned int dccpav_vec_len;
65 unsigned int dccpav_buf_len;
66 struct timeval dccpav_time;
67 u8 dccpav_buf_nonce;
68 u8 dccpav_ack_nonce;
69 u8 dccpav_buf[0];
70};
71
72struct sock;
73struct sk_buff;
74
75#ifdef CONFIG_IP_DCCP_ACKVEC
76extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
77 const unsigned int __nocast priority);
78extern void dccp_ackvec_free(struct dccp_ackvec *av);
79
80extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
81 const u64 ackno, const u8 state);
82
83extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
84 struct sock *sk, const u64 ackno);
85extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
86 const u8 opt, const u8 *value, const u8 len);
87
88extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
89
90static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
91{
92 return av->dccpav_sent_len != av->dccpav_vec_len;
93}
94#else /* CONFIG_IP_DCCP_ACKVEC */
95static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
96 const unsigned int __nocast priority)
97{
98 return NULL;
99}
100
101static inline void dccp_ackvec_free(struct dccp_ackvec *av)
102{
103}
104
105static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
106 const u64 ackno, const u8 state)
107{
108 return -1;
109}
110
111static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
112 struct sock *sk, const u64 ackno)
113{
114}
115
116static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
117 const u8 opt, const u8 *value, const u8 len)
118{
119 return -1;
120}
121
122static inline int dccp_insert_option_ackvec(const struct sock *sk,
123 const struct sk_buff *skb)
124{
125 return -1;
126}
127
128static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
129{
130 return 0;
131}
132#endif /* CONFIG_IP_DCCP_ACKVEC */
133#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 962f1e9e2f7e..21e55142dcd3 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -14,6 +14,7 @@
14 */ 14 */
15 15
16#include <net/sock.h> 16#include <net/sock.h>
17#include <linux/compiler.h>
17#include <linux/dccp.h> 18#include <linux/dccp.h>
18#include <linux/list.h> 19#include <linux/list.h>
19#include <linux/module.h> 20#include <linux/module.h>
@@ -54,6 +55,14 @@ struct ccid {
54 struct tcp_info *info); 55 struct tcp_info *info);
55 void (*ccid_hc_tx_get_info)(struct sock *sk, 56 void (*ccid_hc_tx_get_info)(struct sock *sk,
56 struct tcp_info *info); 57 struct tcp_info *info);
58 int (*ccid_hc_rx_getsockopt)(struct sock *sk,
59 const int optname, int len,
60 u32 __user *optval,
61 int __user *optlen);
62 int (*ccid_hc_tx_getsockopt)(struct sock *sk,
63 const int optname, int len,
64 u32 __user *optval,
65 int __user *optlen);
57}; 66};
58 67
59extern int ccid_register(struct ccid *ccid); 68extern int ccid_register(struct ccid *ccid);
@@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
177 if (ccid->ccid_hc_tx_get_info != NULL) 186 if (ccid->ccid_hc_tx_get_info != NULL)
178 ccid->ccid_hc_tx_get_info(sk, info); 187 ccid->ccid_hc_tx_get_info(sk, info);
179} 188}
189
190static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
191 const int optname, int len,
192 u32 __user *optval, int __user *optlen)
193{
194 int rc = -ENOPROTOOPT;
195 if (ccid->ccid_hc_rx_getsockopt != NULL)
196 rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len,
197 optval, optlen);
198 return rc;
199}
200
201static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
202 const int optname, int len,
203 u32 __user *optval, int __user *optlen)
204{
205 int rc = -ENOPROTOOPT;
206 if (ccid->ccid_hc_tx_getsockopt != NULL)
207 rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len,
208 optval, optlen);
209 return rc;
210}
180#endif /* _CCID_H */ 211#endif /* _CCID_H */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 38aa84986118..aa68e0ab274d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1120 info->tcpi_rtt = hctx->ccid3hctx_rtt; 1120 info->tcpi_rtt = hctx->ccid3hctx_rtt;
1121} 1121}
1122 1122
1123static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1124 u32 __user *optval, int __user *optlen)
1125{
1126 const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1127 const void *val;
1128
1129 /* Listen socks doesn't have a private CCID block */
1130 if (sk->sk_state == DCCP_LISTEN)
1131 return -EINVAL;
1132
1133 switch (optname) {
1134 case DCCP_SOCKOPT_CCID_RX_INFO:
1135 if (len < sizeof(hcrx->ccid3hcrx_tfrc))
1136 return -EINVAL;
1137 len = sizeof(hcrx->ccid3hcrx_tfrc);
1138 val = &hcrx->ccid3hcrx_tfrc;
1139 break;
1140 default:
1141 return -ENOPROTOOPT;
1142 }
1143
1144 if (put_user(len, optlen) || copy_to_user(optval, val, len))
1145 return -EFAULT;
1146
1147 return 0;
1148}
1149
1150static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
1151 u32 __user *optval, int __user *optlen)
1152{
1153 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1154 const void *val;
1155
1156 /* Listen socks doesn't have a private CCID block */
1157 if (sk->sk_state == DCCP_LISTEN)
1158 return -EINVAL;
1159
1160 switch (optname) {
1161 case DCCP_SOCKOPT_CCID_TX_INFO:
1162 if (len < sizeof(hctx->ccid3hctx_tfrc))
1163 return -EINVAL;
1164 len = sizeof(hctx->ccid3hctx_tfrc);
1165 val = &hctx->ccid3hctx_tfrc;
1166 break;
1167 default:
1168 return -ENOPROTOOPT;
1169 }
1170
1171 if (put_user(len, optlen) || copy_to_user(optval, val, len))
1172 return -EFAULT;
1173
1174 return 0;
1175}
1176
1123static struct ccid ccid3 = { 1177static struct ccid ccid3 = {
1124 .ccid_id = 3, 1178 .ccid_id = 3,
1125 .ccid_name = "ccid3", 1179 .ccid_name = "ccid3",
@@ -1139,6 +1193,8 @@ static struct ccid ccid3 = {
1139 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, 1193 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
1140 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, 1194 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
1141 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, 1195 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
1196 .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt,
1197 .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
1142}; 1198};
1143 1199
1144module_param(ccid3_debug, int, 0444); 1200module_param(ccid3_debug, int, 0444);
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index eb248778eea3..0bde4583d091 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -40,6 +40,7 @@
40#include <linux/list.h> 40#include <linux/list.h>
41#include <linux/time.h> 41#include <linux/time.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/tfrc.h>
43 44
44#define TFRC_MIN_PACKET_SIZE 16 45#define TFRC_MIN_PACKET_SIZE 16
45#define TFRC_STD_PACKET_SIZE 256 46#define TFRC_STD_PACKET_SIZE 256
@@ -93,12 +94,15 @@ struct ccid3_options_received {
93 * @ccid3hctx_hist - Packet history 94 * @ccid3hctx_hist - Packet history
94 */ 95 */
95struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
96 u32 ccid3hctx_x; 97 struct tfrc_tx_info ccid3hctx_tfrc;
97 u32 ccid3hctx_x_recv; 98#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
98 u32 ccid3hctx_x_calc; 99#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
100#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
101#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
102#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
103#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
104#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
99 u16 ccid3hctx_s; 105 u16 ccid3hctx_s;
100 u32 ccid3hctx_rtt;
101 u32 ccid3hctx_p;
102 u8 ccid3hctx_state; 106 u8 ccid3hctx_state;
103 u8 ccid3hctx_last_win_count; 107 u8 ccid3hctx_last_win_count;
104 u8 ccid3hctx_idle; 108 u8 ccid3hctx_idle;
@@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock {
106 struct timer_list ccid3hctx_no_feedback_timer; 110 struct timer_list ccid3hctx_no_feedback_timer;
107 struct timeval ccid3hctx_t_ld; 111 struct timeval ccid3hctx_t_ld;
108 struct timeval ccid3hctx_t_nom; 112 struct timeval ccid3hctx_t_nom;
109 u32 ccid3hctx_t_rto;
110 u32 ccid3hctx_t_ipi;
111 u32 ccid3hctx_delta; 113 u32 ccid3hctx_delta;
112 struct list_head ccid3hctx_hist; 114 struct list_head ccid3hctx_hist;
113 struct ccid3_options_received ccid3hctx_options_received; 115 struct ccid3_options_received ccid3hctx_options_received;
114}; 116};
115 117
116struct ccid3_hc_rx_sock { 118struct ccid3_hc_rx_sock {
119 struct tfrc_rx_info ccid3hcrx_tfrc;
120#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
121#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
122#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
117 u64 ccid3hcrx_seqno_last_counter:48, 123 u64 ccid3hcrx_seqno_last_counter:48,
118 ccid3hcrx_state:8, 124 ccid3hcrx_state:8,
119 ccid3hcrx_last_counter:4; 125 ccid3hcrx_last_counter:4;
120 u32 ccid3hcrx_rtt;
121 u32 ccid3hcrx_p;
122 u32 ccid3hcrx_bytes_recv; 126 u32 ccid3hcrx_bytes_recv;
123 struct timeval ccid3hcrx_tstamp_last_feedback; 127 struct timeval ccid3hcrx_tstamp_last_feedback;
124 struct timeval ccid3hcrx_tstamp_last_ack; 128 struct timeval ccid3hcrx_tstamp_last_ack;
@@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock {
127 u16 ccid3hcrx_s; 131 u16 ccid3hcrx_s;
128 u32 ccid3hcrx_pinv; 132 u32 ccid3hcrx_pinv;
129 u32 ccid3hcrx_elapsed_time; 133 u32 ccid3hcrx_elapsed_time;
130 u32 ccid3hcrx_x_recv;
131}; 134};
132 135
133static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 136static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 95c4630b3b18..5871c027f9dc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -17,6 +17,7 @@
17#include <net/snmp.h> 17#include <net/snmp.h>
18#include <net/sock.h> 18#include <net/sock.h>
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include "ackvec.h"
20 21
21#ifdef CONFIG_IP_DCCP_DEBUG 22#ifdef CONFIG_IP_DCCP_DEBUG
22extern int dccp_debug; 23extern int dccp_debug;
@@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk,
258extern void dccp_send_close(struct sock *sk, const int active); 259extern void dccp_send_close(struct sock *sk, const int active);
259 260
260struct dccp_skb_cb { 261struct dccp_skb_cb {
261 __u8 dccpd_type; 262 __u8 dccpd_type:4;
262 __u8 dccpd_reset_code; 263 __u8 dccpd_ccval:4;
263 __u8 dccpd_service; 264 __u8 dccpd_reset_code;
264 __u8 dccpd_ccval; 265 __u16 dccpd_opt_len;
265 __u64 dccpd_seq; 266 __u64 dccpd_seq;
266 __u64 dccpd_ack_seq; 267 __u64 dccpd_ack_seq;
267 int dccpd_opt_len;
268}; 268};
269 269
270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
@@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
359 (dp->dccps_gss - 359 (dp->dccps_gss -
360 dp->dccps_options.dccpo_sequence_window + 1)); 360 dp->dccps_options.dccpo_sequence_window + 1));
361} 361}
362
363static inline int dccp_ack_pending(const struct sock *sk)
364{
365 const struct dccp_sock *dp = dccp_sk(sk);
366 return dp->dccps_timestamp_echo != 0 ||
367#ifdef CONFIG_IP_DCCP_ACKVEC
368 (dp->dccps_options.dccpo_send_ack_vector &&
369 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
370#endif
371 inet_csk_ack_scheduled(sk);
372}
362 373
363extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); 374extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
364extern void dccp_insert_option_elapsed_time(struct sock *sk, 375extern void dccp_insert_option_elapsed_time(struct sock *sk,
@@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
372 383
373extern struct socket *dccp_ctl_socket; 384extern struct socket *dccp_ctl_socket;
374 385
375#define DCCP_ACKPKTS_STATE_RECEIVED 0
376#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
377#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
378
379#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
380#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
381
382/** struct dccp_ackpkts - acknowledgeable packets
383 *
384 * This data structure is the one defined in the DCCP draft
385 * Appendix A.
386 *
387 * @dccpap_buf_head - circular buffer head
388 * @dccpap_buf_tail - circular buffer tail
389 * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
390 * buffer (i.e. %dccpap_buf_head)
391 * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
392 * by the buffer with State 0
393 *
394 * Additionally, the HC-Receiver must keep some information about the
395 * Ack Vectors it has recently sent. For each packet sent carrying an
396 * Ack Vector, it remembers four variables:
397 *
398 * @dccpap_ack_seqno - the Sequence Number used for the packet
399 * (HC-Receiver seqno)
400 * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
401 * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
402 * (HC-Sender seqno)
403 * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
404 *
405 * @dccpap_buf_len - circular buffer length
406 * @dccpap_time - the time in usecs
407 * @dccpap_buf - circular buffer of acknowledgeable packets
408 */
409struct dccp_ackpkts {
410 unsigned int dccpap_buf_head;
411 unsigned int dccpap_buf_tail;
412 u64 dccpap_buf_ackno;
413 u64 dccpap_ack_seqno;
414 u64 dccpap_ack_ackno;
415 unsigned int dccpap_ack_ptr;
416 unsigned int dccpap_buf_vector_len;
417 unsigned int dccpap_ack_vector_len;
418 unsigned int dccpap_buf_len;
419 struct timeval dccpap_time;
420 u8 dccpap_buf_nonce;
421 u8 dccpap_ack_nonce;
422 u8 dccpap_buf[0];
423};
424
425extern struct dccp_ackpkts *
426 dccp_ackpkts_alloc(unsigned int len,
427 const unsigned int __nocast priority);
428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
430 u64 ackno, u8 state);
431extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
432 struct sock *sk, u64 ackno);
433
434extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); 386extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
435 387
436static inline suseconds_t timeval_usecs(const struct timeval *tv) 388static inline suseconds_t timeval_usecs(const struct timeval *tv)
@@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv,
471 } 423 }
472} 424}
473 425
474#ifdef CONFIG_IP_DCCP_DEBUG
475extern void dccp_ackvector_print(const u64 ackno,
476 const unsigned char *vector, int len);
477extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
478#else
479static inline void dccp_ackvector_print(const u64 ackno,
480 const unsigned char *vector,
481 int len) { }
482static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
483#endif
484
485#endif /* _DCCP_H */ 426#endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index c74034cf7ede..1b6b2cb12376 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -16,6 +16,7 @@
16 16
17#include <net/sock.h> 17#include <net/sock.h>
18 18
19#include "ackvec.h"
19#include "ccid.h" 20#include "ccid.h"
20#include "dccp.h" 21#include "dccp.h"
21 22
@@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
60 struct dccp_sock *dp = dccp_sk(sk); 61 struct dccp_sock *dp = dccp_sk(sk);
61 62
62 if (dp->dccps_options.dccpo_send_ack_vector) 63 if (dp->dccps_options.dccpo_send_ack_vector)
63 dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, 64 dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
64 DCCP_SKB_CB(skb)->dccpd_ack_seq); 65 DCCP_SKB_CB(skb)->dccpd_ack_seq);
65} 66}
66 67
67static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) 68static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
@@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
164 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 165 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
165 dccp_event_ack_recv(sk, skb); 166 dccp_event_ack_recv(sk, skb);
166 167
167 /* 168 if (dp->dccps_options.dccpo_send_ack_vector &&
168 * FIXME: check ECN to see if we should use 169 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
169 * DCCP_ACKPKTS_STATE_ECN_MARKED 170 DCCP_SKB_CB(skb)->dccpd_seq,
170 */ 171 DCCP_ACKVEC_STATE_RECEIVED))
171 if (dp->dccps_options.dccpo_send_ack_vector) { 172 goto discard;
172 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
173
174 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
175 DCCP_SKB_CB(skb)->dccpd_seq,
176 DCCP_ACKPKTS_STATE_RECEIVED)) {
177 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
178 "packets buffer full!\n");
179 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
180 inet_csk_schedule_ack(sk);
181 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
182 TCP_DELACK_MIN,
183 DCCP_RTO_MAX);
184 goto discard;
185 }
186
187 /*
188 * FIXME: this activation is probably wrong, have to study more
189 * TCP delack machinery and how it fits into DCCP draft, but
190 * for now it kinda "works" 8)
191 */
192 if (!inet_csk_ack_scheduled(sk)) {
193 inet_csk_schedule_ack(sk);
194 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
195 DCCP_RTO_MAX);
196 }
197 }
198 173
199 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 174 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
200 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 175 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
@@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
384 } 359 }
385 360
386out_invalid_packet: 361out_invalid_packet:
387 return 1; /* dccp_v4_do_rcv will send a reset, but... 362 /* dccp_v4_do_rcv will send a reset */
388 FIXME: the reset code should be 363 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
389 DCCP_RESET_CODE_PACKET_ERROR */ 364 return 1;
390} 365}
391 366
392static int dccp_rcv_respond_partopen_state_process(struct sock *sk, 367static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -433,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
433 struct dccp_hdr *dh, unsigned len) 408 struct dccp_hdr *dh, unsigned len)
434{ 409{
435 struct dccp_sock *dp = dccp_sk(sk); 410 struct dccp_sock *dp = dccp_sk(sk);
411 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
436 const int old_state = sk->sk_state; 412 const int old_state = sk->sk_state;
437 int queued = 0; 413 int queued = 0;
438 414
@@ -473,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
473 if (dh->dccph_type == DCCP_PKT_RESET) 449 if (dh->dccph_type == DCCP_PKT_RESET)
474 goto discard; 450 goto discard;
475 451
476 /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ 452 /* Caller (dccp_v4_do_rcv) will send Reset */
453 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
477 return 1; 454 return 1;
478 } 455 }
479 456
@@ -487,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
487 if (dccp_parse_options(sk, skb)) 464 if (dccp_parse_options(sk, skb))
488 goto discard; 465 goto discard;
489 466
490 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != 467 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
491 DCCP_PKT_WITHOUT_ACK_SEQ)
492 dccp_event_ack_recv(sk, skb); 468 dccp_event_ack_recv(sk, skb);
493 469
494 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 470 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
495 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 471 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
496 472
497 /* 473 if (dp->dccps_options.dccpo_send_ack_vector &&
498 * FIXME: check ECN to see if we should use 474 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
499 * DCCP_ACKPKTS_STATE_ECN_MARKED 475 DCCP_SKB_CB(skb)->dccpd_seq,
500 */ 476 DCCP_ACKVEC_STATE_RECEIVED))
501 if (dp->dccps_options.dccpo_send_ack_vector) { 477 goto discard;
502 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
503 DCCP_SKB_CB(skb)->dccpd_seq,
504 DCCP_ACKPKTS_STATE_RECEIVED))
505 goto discard;
506 /*
507 * FIXME: this activation is probably wrong, have to
508 * study more TCP delack machinery and how it fits into
509 * DCCP draft, but for now it kinda "works" 8)
510 */
511 if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
512 DCCP_MAX_SEQNO + 1) &&
513 !inet_csk_ack_scheduled(sk)) {
514 inet_csk_schedule_ack(sk);
515 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
516 TCP_DELACK_MIN,
517 DCCP_RTO_MAX);
518 }
519 }
520 } 478 }
521 479
522 /* 480 /*
@@ -551,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
551 dh->dccph_type == DCCP_PKT_REQUEST) || 509 dh->dccph_type == DCCP_PKT_REQUEST) ||
552 (sk->sk_state == DCCP_RESPOND && 510 (sk->sk_state == DCCP_RESPOND &&
553 dh->dccph_type == DCCP_PKT_DATA)) { 511 dh->dccph_type == DCCP_PKT_DATA)) {
554 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 512 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
555 DCCP_PKT_SYNC);
556 goto discard; 513 goto discard;
557 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { 514 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
558 dccp_rcv_closereq(sk, skb); 515 dccp_rcv_closereq(sk, skb);
@@ -563,13 +520,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
563 } 520 }
564 521
565 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { 522 if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
566 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 523 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
567 DCCP_PKT_SYNCACK);
568 goto discard; 524 goto discard;
569 } 525 }
570 526
571 switch (sk->sk_state) { 527 switch (sk->sk_state) {
572 case DCCP_CLOSED: 528 case DCCP_CLOSED:
529 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
573 return 1; 530 return 1;
574 531
575 case DCCP_REQUESTING: 532 case DCCP_REQUESTING:
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2afaa464e7f0..40fe6afacde6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -23,6 +23,7 @@
23#include <net/tcp_states.h> 23#include <net/tcp_states.h>
24#include <net/xfrm.h> 24#include <net/xfrm.h>
25 25
26#include "ackvec.h"
26#include "ccid.h" 27#include "ccid.h"
27#include "dccp.h" 28#include "dccp.h"
28 29
@@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
246 247
247 dp->dccps_role = DCCP_ROLE_CLIENT; 248 dp->dccps_role = DCCP_ROLE_CLIENT;
248 249
250 if (dccp_service_not_initialized(sk))
251 return -EPROTO;
252
249 if (addr_len < sizeof(struct sockaddr_in)) 253 if (addr_len < sizeof(struct sockaddr_in))
250 return -EINVAL; 254 return -EINVAL;
251 255
@@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
661 dccp_hdr(skb)->dccph_sport); 665 dccp_hdr(skb)->dccph_sport);
662} 666}
663 667
668static inline int dccp_bad_service_code(const struct sock *sk,
669 const __u32 service)
670{
671 const struct dccp_sock *dp = dccp_sk(sk);
672
673 if (dp->dccps_service == service)
674 return 0;
675 return !dccp_list_has_service(dp->dccps_service_list, service);
676}
677
664int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 678int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
665{ 679{
666 struct inet_request_sock *ireq; 680 struct inet_request_sock *ireq;
@@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
669 struct dccp_request_sock *dreq; 683 struct dccp_request_sock *dreq;
670 const __u32 saddr = skb->nh.iph->saddr; 684 const __u32 saddr = skb->nh.iph->saddr;
671 const __u32 daddr = skb->nh.iph->daddr; 685 const __u32 daddr = skb->nh.iph->daddr;
686 const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
687 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
688 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
672 struct dst_entry *dst = NULL; 689 struct dst_entry *dst = NULL;
673 690
674 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 691 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
675 if (((struct rtable *)skb->dst)->rt_flags & 692 if (((struct rtable *)skb->dst)->rt_flags &
676 (RTCF_BROADCAST | RTCF_MULTICAST)) 693 (RTCF_BROADCAST | RTCF_MULTICAST)) {
694 reset_code = DCCP_RESET_CODE_NO_CONNECTION;
677 goto drop; 695 goto drop;
696 }
678 697
698 if (dccp_bad_service_code(sk, service)) {
699 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
700 goto drop;
701 }
679 /* 702 /*
680 * TW buckets are converted to open requests without 703 * TW buckets are converted to open requests without
681 * limitations, they conserve resources and peer is 704 * limitations, they conserve resources and peer is
@@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
718 * dccp_create_openreq_child. 741 * dccp_create_openreq_child.
719 */ 742 */
720 dreq = dccp_rsk(req); 743 dreq = dccp_rsk(req);
721 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; 744 dreq->dreq_isr = dcb->dccpd_seq;
722 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 745 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
723 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; 746 dreq->dreq_service = service;
724 747
725 if (dccp_v4_send_response(sk, req, dst)) 748 if (dccp_v4_send_response(sk, req, dst))
726 goto drop_and_free; 749 goto drop_and_free;
@@ -735,6 +758,7 @@ drop_and_free:
735 __reqsk_free(req); 758 __reqsk_free(req);
736drop: 759drop:
737 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 760 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
761 dcb->dccpd_reset_code = reset_code;
738 return -1; 762 return -1;
739} 763}
740 764
@@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1005 return 0; 1029 return 0;
1006 1030
1007reset: 1031reset:
1008 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1009 dccp_v4_ctl_send_reset(skb); 1032 dccp_v4_ctl_send_reset(skb);
1010discard: 1033discard:
1011 kfree_skb(skb); 1034 kfree_skb(skb);
@@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
1090 goto discard_it; 1113 goto discard_it;
1091 1114
1092 dh = dccp_hdr(skb); 1115 dh = dccp_hdr(skb);
1093#if 0
1094 /*
1095 * Use something like this to simulate some DATA/DATAACK loss to test
1096 * dccp_ackpkts_add, you'll get something like this on a session that
1097 * sends 10 DATA/DATAACK packets:
1098 *
1099 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
1100 *
1101 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
1102 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
1103 * with the same state
1104 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
1105 *
1106 * So...
1107 *
1108 * 281473596467422 was received
1109 * 281473596467421 was not received
1110 * 281473596467420 was received
1111 * 281473596467419 was not received
1112 * 281473596467418 was received
1113 * 281473596467417 was not received
1114 * 281473596467416 was received
1115 * 281473596467415 was not received
1116 * 281473596467414 was received
1117 * 281473596467413 was received (this one was the 3way handshake
1118 * RESPONSE)
1119 *
1120 */
1121 if (dh->dccph_type == DCCP_PKT_DATA ||
1122 dh->dccph_type == DCCP_PKT_DATAACK) {
1123 static int discard = 0;
1124 1116
1125 if (discard) {
1126 discard = 0;
1127 goto discard_it;
1128 }
1129 discard = 1;
1130 }
1131#endif
1132 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 1117 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1133 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; 1118 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1134 1119
@@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk)
1242 do_gettimeofday(&dp->dccps_epoch); 1227 do_gettimeofday(&dp->dccps_epoch);
1243 1228
1244 if (dp->dccps_options.dccpo_send_ack_vector) { 1229 if (dp->dccps_options.dccpo_send_ack_vector) {
1245 dp->dccps_hc_rx_ackpkts = 1230 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
1246 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 1231 GFP_KERNEL);
1247 GFP_KERNEL); 1232 if (dp->dccps_hc_rx_ackvec == NULL)
1248
1249 if (dp->dccps_hc_rx_ackpkts == NULL)
1250 return -ENOMEM; 1233 return -ENOMEM;
1251 } 1234 }
1252 1235
@@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk)
1258 * setsockopt(CCIDs-I-want/accept). -acme 1241 * setsockopt(CCIDs-I-want/accept). -acme
1259 */ 1242 */
1260 if (likely(!dccp_ctl_socket_init)) { 1243 if (likely(!dccp_ctl_socket_init)) {
1261 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, 1244 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
1262 sk); 1245 sk);
1263 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, 1246 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
1264 sk); 1247 sk);
1265 if (dp->dccps_hc_rx_ccid == NULL || 1248 if (dp->dccps_hc_rx_ccid == NULL ||
1266 dp->dccps_hc_tx_ccid == NULL) { 1249 dp->dccps_hc_tx_ccid == NULL) {
1267 ccid_exit(dp->dccps_hc_rx_ccid, sk); 1250 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1268 ccid_exit(dp->dccps_hc_tx_ccid, sk); 1251 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1269 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1252 if (dp->dccps_options.dccpo_send_ack_vector) {
1270 dp->dccps_hc_rx_ackpkts = NULL; 1253 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
1254 dp->dccps_hc_rx_ackvec = NULL;
1255 }
1271 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1256 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1272 return -ENOMEM; 1257 return -ENOMEM;
1273 } 1258 }
@@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk)
1280 sk->sk_write_space = dccp_write_space; 1265 sk->sk_write_space = dccp_write_space;
1281 dp->dccps_mss_cache = 536; 1266 dp->dccps_mss_cache = 536;
1282 dp->dccps_role = DCCP_ROLE_UNDEFINED; 1267 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1268 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
1283 1269
1284 return 0; 1270 return 0;
1285} 1271}
@@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk)
1301 if (inet_csk(sk)->icsk_bind_hash != NULL) 1287 if (inet_csk(sk)->icsk_bind_hash != NULL)
1302 inet_put_port(&dccp_hashinfo, sk); 1288 inet_put_port(&dccp_hashinfo, sk);
1303 1289
1290 if (dp->dccps_service_list != NULL) {
1291 kfree(dp->dccps_service_list);
1292 dp->dccps_service_list = NULL;
1293 }
1294
1304 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 1295 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
1305 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 1296 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
1306 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1297 if (dp->dccps_options.dccpo_send_ack_vector) {
1307 dp->dccps_hc_rx_ackpkts = NULL; 1298 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
1299 dp->dccps_hc_rx_ackvec = NULL;
1300 }
1308 ccid_exit(dp->dccps_hc_rx_ccid, sk); 1301 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1309 ccid_exit(dp->dccps_hc_tx_ccid, sk); 1302 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1310 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1303 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 18461bc04cbe..1393461898bb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -19,6 +19,7 @@
19#include <net/xfrm.h> 19#include <net/xfrm.h>
20#include <net/inet_timewait_sock.h> 20#include <net/inet_timewait_sock.h>
21 21
22#include "ackvec.h"
22#include "ccid.h" 23#include "ccid.h"
23#include "dccp.h" 24#include "dccp.h"
24 25
@@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
93 struct inet_connection_sock *newicsk = inet_csk(sk); 94 struct inet_connection_sock *newicsk = inet_csk(sk);
94 struct dccp_sock *newdp = dccp_sk(newsk); 95 struct dccp_sock *newdp = dccp_sk(newsk);
95 96
96 newdp->dccps_hc_rx_ackpkts = NULL; 97 newdp->dccps_role = DCCP_ROLE_SERVER;
97 newdp->dccps_role = DCCP_ROLE_SERVER; 98 newdp->dccps_hc_rx_ackvec = NULL;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 99 newdp->dccps_service_list = NULL;
100 newdp->dccps_service = dreq->dreq_service;
101 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99 do_gettimeofday(&newdp->dccps_epoch); 102 do_gettimeofday(&newdp->dccps_epoch);
100 103
101 if (newdp->dccps_options.dccpo_send_ack_vector) { 104 if (newdp->dccps_options.dccpo_send_ack_vector) {
102 newdp->dccps_hc_rx_ackpkts = 105 newdp->dccps_hc_rx_ackvec =
103 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 106 dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
104 GFP_ATOMIC); 107 GFP_ATOMIC);
105 /* 108 /*
106 * XXX: We're using the same CCIDs set on the parent, 109 * XXX: We're using the same CCIDs set on the parent,
107 * i.e. sk_clone copied the master sock and left the 110 * i.e. sk_clone copied the master sock and left the
108 * CCID pointers for this child, that is why we do the 111 * CCID pointers for this child, that is why we do the
109 * __ccid_get calls. 112 * __ccid_get calls.
110 */ 113 */
111 if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) 114 if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
112 goto out_free; 115 goto out_free;
113 } 116 }
114 117
@@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
116 newsk) != 0 || 119 newsk) != 0 ||
117 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, 120 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
118 newsk) != 0)) { 121 newsk) != 0)) {
119 dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); 122 dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
120 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); 123 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
121 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); 124 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
122out_free: 125out_free:
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d4c4242d8dd7..0a76426c9aea 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -18,19 +18,15 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20 20
21#include "ackvec.h"
21#include "ccid.h" 22#include "ccid.h"
22#include "dccp.h" 23#include "dccp.h"
23 24
24static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
25 struct sock *sk,
26 const u64 ackno,
27 const unsigned char len,
28 const unsigned char *vector);
29
30/* stores the default values for new connection. may be changed with sysctl */ 25/* stores the default values for new connection. may be changed with sysctl */
31static const struct dccp_options dccpo_default_values = { 26static const struct dccp_options dccpo_default_values = {
32 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, 27 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
33 .dccpo_ccid = DCCPF_INITIAL_CCID, 28 .dccpo_rx_ccid = DCCPF_INITIAL_CCID,
29 .dccpo_tx_ccid = DCCPF_INITIAL_CCID,
34 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, 30 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
35 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, 31 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
36}; 32};
@@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
113 opt_recv->dccpor_ndp); 109 opt_recv->dccpor_ndp);
114 break; 110 break;
115 case DCCPO_ACK_VECTOR_0: 111 case DCCPO_ACK_VECTOR_0:
116 if (len > DCCP_MAX_ACK_VECTOR_LEN) 112 case DCCPO_ACK_VECTOR_1:
117 goto out_invalid_option;
118
119 if (pkt_type == DCCP_PKT_DATA) 113 if (pkt_type == DCCP_PKT_DATA)
120 continue; 114 continue;
121 115
122 opt_recv->dccpor_ack_vector_len = len; 116 if (dp->dccps_options.dccpo_send_ack_vector &&
123 opt_recv->dccpor_ack_vector_idx = value - options; 117 dccp_ackvec_parse(sk, skb, opt, value, len))
124 118 goto out_invalid_option;
125 dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
126 debug_prefix, len,
127 (unsigned long long)
128 DCCP_SKB_CB(skb)->dccpd_ack_seq);
129 dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
130 value, len);
131 dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
132 sk,
133 DCCP_SKB_CB(skb)->dccpd_ack_seq,
134 len, value);
135 break; 119 break;
136 case DCCPO_TIMESTAMP: 120 case DCCPO_TIMESTAMP:
137 if (len != 4) 121 if (len != 4)
@@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk,
352 336
353EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 337EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
354 338
355static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
356{
357 struct dccp_sock *dp = dccp_sk(sk);
358#ifdef CONFIG_IP_DCCP_DEBUG
359 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
360 "CLIENT TX opt: " : "server TX opt: ";
361#endif
362 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
363 int len = ap->dccpap_buf_vector_len + 2;
364 struct timeval now;
365 u32 elapsed_time;
366 unsigned char *to, *from;
367
368 dccp_timestamp(sk, &now);
369 elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
370
371 if (elapsed_time != 0)
372 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
373
374 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
375 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
376 "insert ACK Vector!\n");
377 return;
378 }
379
380 /*
381 * XXX: now we have just one ack vector sent record, so
382 * we have to wait for it to be cleared.
383 *
384 * Of course this is not acceptable, but this is just for
385 * basic testing now.
386 */
387 if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
388 return;
389
390 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
391
392 to = skb_push(skb, len);
393 *to++ = DCCPO_ACK_VECTOR_0;
394 *to++ = len;
395
396 len = ap->dccpap_buf_vector_len;
397 from = ap->dccpap_buf + ap->dccpap_buf_head;
398
399 /* Check if buf_head wraps */
400 if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
401 const unsigned int tailsize = (ap->dccpap_buf_len -
402 ap->dccpap_buf_head);
403
404 memcpy(to, from, tailsize);
405 to += tailsize;
406 len -= tailsize;
407 from = ap->dccpap_buf;
408 }
409
410 memcpy(to, from, len);
411 /*
412 * From draft-ietf-dccp-spec-11.txt:
413 *
414 * For each acknowledgement it sends, the HC-Receiver will add an
415 * acknowledgement record. ack_seqno will equal the HC-Receiver
416 * sequence number it used for the ack packet; ack_ptr will equal
417 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
418 * equal buf_nonce.
419 *
420 * This implemention uses just one ack record for now.
421 */
422 ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
423 ap->dccpap_ack_ptr = ap->dccpap_buf_head;
424 ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
425 ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
426 ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
427
428 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
429 "ack_ackno=%llu\n",
430 debug_prefix, ap->dccpap_ack_vector_len,
431 (unsigned long long) ap->dccpap_ack_seqno,
432 (unsigned long long) ap->dccpap_ack_ackno);
433}
434
435void dccp_timestamp(const struct sock *sk, struct timeval *tv) 339void dccp_timestamp(const struct sock *sk, struct timeval *tv)
436{ 340{
437 const struct dccp_sock *dp = dccp_sk(sk); 341 const struct dccp_sock *dp = dccp_sk(sk);
@@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
528 432
529 if (!dccp_packet_without_ack(skb)) { 433 if (!dccp_packet_without_ack(skb)) {
530 if (dp->dccps_options.dccpo_send_ack_vector && 434 if (dp->dccps_options.dccpo_send_ack_vector &&
531 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != 435 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec))
532 DCCP_MAX_SEQNO + 1)) 436 dccp_insert_option_ackvec(sk, skb);
533 dccp_insert_option_ack_vector(sk, skb);
534 if (dp->dccps_timestamp_echo != 0) 437 if (dp->dccps_timestamp_echo != 0)
535 dccp_insert_option_timestamp_echo(sk, skb); 438 dccp_insert_option_timestamp_echo(sk, skb);
536 } 439 }
@@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
557 } 460 }
558 } 461 }
559} 462}
560
561struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
562 const unsigned int __nocast priority)
563{
564 struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
565
566 if (ap != NULL) {
567#ifdef CONFIG_IP_DCCP_DEBUG
568 memset(ap->dccpap_buf, 0xFF, len);
569#endif
570 ap->dccpap_buf_len = len;
571 ap->dccpap_buf_head =
572 ap->dccpap_buf_tail =
573 ap->dccpap_buf_len - 1;
574 ap->dccpap_buf_ackno =
575 ap->dccpap_ack_ackno =
576 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
577 ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
578 ap->dccpap_ack_ptr = 0;
579 ap->dccpap_time.tv_sec = 0;
580 ap->dccpap_time.tv_usec = 0;
581 ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
582 }
583
584 return ap;
585}
586
587void dccp_ackpkts_free(struct dccp_ackpkts *ap)
588{
589 if (ap != NULL) {
590#ifdef CONFIG_IP_DCCP_DEBUG
591 memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
592#endif
593 kfree(ap);
594 }
595}
596
597static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
598 const unsigned int index)
599{
600 return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
601}
602
603static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
604 const unsigned int index)
605{
606 return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
607}
608
609/*
610 * If several packets are missing, the HC-Receiver may prefer to enter multiple
611 * bytes with run length 0, rather than a single byte with a larger run length;
612 * this simplifies table updates if one of the missing packets arrives.
613 */
614static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
615 const unsigned int packets,
616 const unsigned char state)
617{
618 unsigned int gap;
619 signed long new_head;
620
621 if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
622 return -ENOBUFS;
623
624 gap = packets - 1;
625 new_head = ap->dccpap_buf_head - packets;
626
627 if (new_head < 0) {
628 if (gap > 0) {
629 memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
630 gap + new_head + 1);
631 gap = -new_head;
632 }
633 new_head += ap->dccpap_buf_len;
634 }
635
636 ap->dccpap_buf_head = new_head;
637
638 if (gap > 0)
639 memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
640 DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
641
642 ap->dccpap_buf[ap->dccpap_buf_head] = state;
643 ap->dccpap_buf_vector_len += packets;
644 return 0;
645}
646
647/*
648 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
649 */
650int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
651 u64 ackno, u8 state)
652{
653 /*
654 * Check at the right places if the buffer is full, if it is, tell the
655 * caller to start dropping packets till the HC-Sender acks our ACK
656 * vectors, when we will free up space in dccpap_buf.
657 *
658 * We may well decide to do buffer compression, etc, but for now lets
659 * just drop.
660 *
661 * From Appendix A:
662 *
663 * Of course, the circular buffer may overflow, either when the
664 * HC-Sender is sending data at a very high rate, when the
665 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
666 * or when the HC-Sender is forgetting to acknowledge those acks
667 * (so the HC-Receiver is unable to clean up old state). In this
668 * case, the HC-Receiver should either compress the buffer (by
669 * increasing run lengths when possible), transfer its state to
670 * a larger buffer, or, as a last resort, drop all received
671 * packets, without processing them whatsoever, until its buffer
672 * shrinks again.
673 */
674
675 /* See if this is the first ackno being inserted */
676 if (ap->dccpap_buf_vector_len == 0) {
677 ap->dccpap_buf[ap->dccpap_buf_head] = state;
678 ap->dccpap_buf_vector_len = 1;
679 } else if (after48(ackno, ap->dccpap_buf_ackno)) {
680 const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
681 ackno);
682
683 /*
684 * Look if the state of this packet is the same as the
685 * previous ackno and if so if we can bump the head len.
686 */
687 if (delta == 1 &&
688 dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
689 (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
690 DCCP_ACKPKTS_LEN_MASK))
691 ap->dccpap_buf[ap->dccpap_buf_head]++;
692 else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
693 return -ENOBUFS;
694 } else {
695 /*
696 * A.1.2. Old Packets
697 *
698 * When a packet with Sequence Number S arrives, and
699 * S <= buf_ackno, the HC-Receiver will scan the table
700 * for the byte corresponding to S. (Indexing structures
701 * could reduce the complexity of this scan.)
702 */
703 u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
704 unsigned int index = ap->dccpap_buf_head;
705
706 while (1) {
707 const u8 len = dccp_ackpkts_len(ap, index);
708 const u8 state = dccp_ackpkts_state(ap, index);
709 /*
710 * valid packets not yet in dccpap_buf have a reserved
711 * entry, with a len equal to 0.
712 */
713 if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
714 len == 0 && delta == 0) { /* Found our
715 reserved seat! */
716 dccp_pr_debug("Found %llu reserved seat!\n",
717 (unsigned long long) ackno);
718 ap->dccpap_buf[index] = state;
719 goto out;
720 }
721 /* len == 0 means one packet */
722 if (delta < len + 1)
723 goto out_duplicate;
724
725 delta -= len + 1;
726 if (++index == ap->dccpap_buf_len)
727 index = 0;
728 }
729 }
730
731 ap->dccpap_buf_ackno = ackno;
732 dccp_timestamp(sk, &ap->dccpap_time);
733out:
734 dccp_pr_debug("");
735 dccp_ackpkts_print(ap);
736 return 0;
737
738out_duplicate:
739 /* Duplicate packet */
740 dccp_pr_debug("Received a dup or already considered lost "
741 "packet: %llu\n", (unsigned long long) ackno);
742 return -EILSEQ;
743}
744
745#ifdef CONFIG_IP_DCCP_DEBUG
746void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
747 int len)
748{
749 if (!dccp_debug)
750 return;
751
752 printk("ACK vector len=%d, ackno=%llu |", len,
753 (unsigned long long) ackno);
754
755 while (len--) {
756 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
757 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
758
759 printk("%d,%d|", state, rl);
760 ++vector;
761 }
762
763 printk("\n");
764}
765
766void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
767{
768 dccp_ackvector_print(ap->dccpap_buf_ackno,
769 ap->dccpap_buf + ap->dccpap_buf_head,
770 ap->dccpap_buf_vector_len);
771}
772#endif
773
774static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
775{
776 /*
777 * As we're keeping track of the ack vector size
778 * (dccpap_buf_vector_len) and the sent ack vector size
779 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
780 * keep this code here as in the future we'll implement a vector of
781 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
782 * Appendix A. -acme
783 */
784#if 0
785 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
786 if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
787 ap->dccpap_buf_tail -= ap->dccpap_buf_len;
788#endif
789 ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
790}
791
792void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
793 u64 ackno)
794{
795 /* Check if we actually sent an ACK vector */
796 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
797 return;
798
799 if (ackno == ap->dccpap_ack_seqno) {
800#ifdef CONFIG_IP_DCCP_DEBUG
801 struct dccp_sock *dp = dccp_sk(sk);
802 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
803 "CLIENT rx ack: " : "server rx ack: ";
804#endif
805 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
806 "ack_ackno=%llu, ACKED!\n",
807 debug_prefix, 1,
808 (unsigned long long) ap->dccpap_ack_seqno,
809 (unsigned long long) ap->dccpap_ack_ackno);
810 dccp_ackpkts_trow_away_ack_record(ap);
811 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
812 }
813}
814
815static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
816 struct sock *sk, u64 ackno,
817 const unsigned char len,
818 const unsigned char *vector)
819{
820 unsigned char i;
821
822 /* Check if we actually sent an ACK vector */
823 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
824 return;
825 /*
826 * We're in the receiver half connection, so if the received an ACK
827 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
828 * not interested.
829 *
830 * Extra explanation with example:
831 *
832 * if we received an ACK vector with ackno 50, it can only be acking
833 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
834 */
835 /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
836 if (before48(ackno, ap->dccpap_ack_seqno)) {
837 /* dccp_pr_debug_cat("yes\n"); */
838 return;
839 }
840 /* dccp_pr_debug_cat("no\n"); */
841
842 i = len;
843 while (i--) {
844 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
845 u64 ackno_end_rl;
846
847 dccp_set_seqno(&ackno_end_rl, ackno - rl);
848
849 /*
850 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
851 * ap->dccpap_ack_seqno, ackno);
852 */
853 if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
854 const u8 state = (*vector &
855 DCCP_ACKPKTS_STATE_MASK) >> 6;
856 /* dccp_pr_debug_cat("yes\n"); */
857
858 if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
859#ifdef CONFIG_IP_DCCP_DEBUG
860 struct dccp_sock *dp = dccp_sk(sk);
861 const char *debug_prefix =
862 dp->dccps_role == DCCP_ROLE_CLIENT ?
863 "CLIENT rx ack: " : "server rx ack: ";
864#endif
865 dccp_pr_debug("%sACK vector 0, len=%d, "
866 "ack_seqno=%llu, ack_ackno=%llu, "
867 "ACKED!\n",
868 debug_prefix, len,
869 (unsigned long long)
870 ap->dccpap_ack_seqno,
871 (unsigned long long)
872 ap->dccpap_ack_ackno);
873 dccp_ackpkts_trow_away_ack_record(ap);
874 }
875 /*
876 * If dccpap_ack_seqno was not received, no problem
877 * we'll send another ACK vector.
878 */
879 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
880 break;
881 }
882 /* dccp_pr_debug_cat("no\n"); */
883
884 dccp_set_seqno(&ackno, ackno_end_rl - 1);
885 ++vector;
886 }
887}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index ea6d0e91e511..4786bdcddcc9 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -16,6 +16,7 @@
16 16
17#include <net/sock.h> 17#include <net/sock.h>
18 18
19#include "ackvec.h"
19#include "ccid.h" 20#include "ccid.h"
20#include "dccp.h" 21#include "dccp.h"
21 22
@@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
85 switch (dcb->dccpd_type) { 86 switch (dcb->dccpd_type) {
86 case DCCP_PKT_REQUEST: 87 case DCCP_PKT_REQUEST:
87 dccp_hdr_request(skb)->dccph_req_service = 88 dccp_hdr_request(skb)->dccph_req_service =
88 dcb->dccpd_service; 89 dp->dccps_service;
89 break; 90 break;
90 case DCCP_PKT_RESET: 91 case DCCP_PKT_RESET:
91 dccp_hdr_reset(skb)->dccph_reset_code = 92 dccp_hdr_reset(skb)->dccph_reset_code =
@@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
225 err = dccp_wait_for_ccid(sk, skb, timeo); 226 err = dccp_wait_for_ccid(sk, skb, timeo);
226 227
227 if (err == 0) { 228 if (err == 0) {
228 const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
230 const int len = skb->len; 230 const int len = skb->len;
231 231
@@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
236 inet_csk(sk)->icsk_rto, 236 inet_csk(sk)->icsk_rto,
237 DCCP_RTO_MAX); 237 DCCP_RTO_MAX);
238 dcb->dccpd_type = DCCP_PKT_DATAACK; 238 dcb->dccpd_type = DCCP_PKT_DATAACK;
239 /* 239 } else if (dccp_ack_pending(sk))
240 * FIXME: we really should have a
241 * dccps_ack_pending or use icsk.
242 */
243 } else if (inet_csk_ack_scheduled(sk) ||
244 dp->dccps_timestamp_echo != 0 ||
245 (dp->dccps_options.dccpo_send_ack_vector &&
246 ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
247 ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
248 dcb->dccpd_type = DCCP_PKT_DATAACK; 240 dcb->dccpd_type = DCCP_PKT_DATAACK;
249 else 241 else
250 dcb->dccpd_type = DCCP_PKT_DATA; 242 dcb->dccpd_type = DCCP_PKT_DATA;
@@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
270 struct request_sock *req) 262 struct request_sock *req)
271{ 263{
272 struct dccp_hdr *dh; 264 struct dccp_hdr *dh;
265 struct dccp_request_sock *dreq;
273 const int dccp_header_size = sizeof(struct dccp_hdr) + 266 const int dccp_header_size = sizeof(struct dccp_hdr) +
274 sizeof(struct dccp_hdr_ext) + 267 sizeof(struct dccp_hdr_ext) +
275 sizeof(struct dccp_hdr_response); 268 sizeof(struct dccp_hdr_response);
@@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
285 skb->dst = dst_clone(dst); 278 skb->dst = dst_clone(dst);
286 skb->csum = 0; 279 skb->csum = 0;
287 280
281 dreq = dccp_rsk(req);
288 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 282 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
289 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; 283 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
290 dccp_insert_options(sk, skb); 284 dccp_insert_options(sk, skb);
291 285
292 skb->h.raw = skb_push(skb, dccp_header_size); 286 skb->h.raw = skb_push(skb, dccp_header_size);
@@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
300 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 294 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
301 dh->dccph_type = DCCP_PKT_RESPONSE; 295 dh->dccph_type = DCCP_PKT_RESPONSE;
302 dh->dccph_x = 1; 296 dh->dccph_x = 1;
303 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); 297 dccp_hdr_set_seq(dh, dreq->dreq_iss);
304 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); 298 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
299 dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
305 300
306 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, 301 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
307 inet_rsk(req)->rmt_addr); 302 inet_rsk(req)->rmt_addr);
@@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk)
397 skb_reserve(skb, MAX_DCCP_HEADER); 392 skb_reserve(skb, MAX_DCCP_HEADER);
398 393
399 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 394 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
400 /* FIXME: set service to something meaningful, coming
401 * from userspace*/
402 DCCP_SKB_CB(skb)->dccpd_service = 0;
403 skb->csum = 0; 395 skb->csum = 0;
404 skb_set_owner_w(skb, sk); 396 skb_set_owner_w(skb, sk);
405 397
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 18a0e69c9dc7..a1cfd0e9e3bc 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
94 94
95static inline int dccp_listen_start(struct sock *sk) 95static inline int dccp_listen_start(struct sock *sk)
96{ 96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; 97 struct dccp_sock *dp = dccp_sk(sk);
98
99 dp->dccps_role = DCCP_ROLE_LISTEN;
100 /*
101 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102 * before calling listen()
103 */
104 if (dccp_service_not_initialized(sk))
105 return -EPROTO;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); 106 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99} 107}
100 108
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
202 return -ENOIOCTLCMD; 210 return -ENOIOCTLCMD;
203} 211}
204 212
213static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214 char __user *optval, int optlen)
215{
216 struct dccp_sock *dp = dccp_sk(sk);
217 struct dccp_service_list *sl = NULL;
218
219 if (service == DCCP_SERVICE_INVALID_VALUE ||
220 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221 return -EINVAL;
222
223 if (optlen > sizeof(service)) {
224 sl = kmalloc(optlen, GFP_KERNEL);
225 if (sl == NULL)
226 return -ENOMEM;
227
228 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229 if (copy_from_user(sl->dccpsl_list,
230 optval + sizeof(service),
231 optlen - sizeof(service)) ||
232 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233 kfree(sl);
234 return -EFAULT;
235 }
236 }
237
238 lock_sock(sk);
239 dp->dccps_service = service;
240
241 if (dp->dccps_service_list != NULL)
242 kfree(dp->dccps_service_list);
243
244 dp->dccps_service_list = sl;
245 release_sock(sk);
246 return 0;
247}
248
205int dccp_setsockopt(struct sock *sk, int level, int optname, 249int dccp_setsockopt(struct sock *sk, int level, int optname,
206 char __user *optval, int optlen) 250 char __user *optval, int optlen)
207{ 251{
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
218 if (get_user(val, (int __user *)optval)) 262 if (get_user(val, (int __user *)optval))
219 return -EFAULT; 263 return -EFAULT;
220 264
221 lock_sock(sk); 265 if (optname == DCCP_SOCKOPT_SERVICE)
266 return dccp_setsockopt_service(sk, val, optval, optlen);
222 267
268 lock_sock(sk);
223 dp = dccp_sk(sk); 269 dp = dccp_sk(sk);
224 err = 0; 270 err = 0;
225 271
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
236 return err; 282 return err;
237} 283}
238 284
285static int dccp_getsockopt_service(struct sock *sk, int len,
286 u32 __user *optval,
287 int __user *optlen)
288{
289 const struct dccp_sock *dp = dccp_sk(sk);
290 const struct dccp_service_list *sl;
291 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293 lock_sock(sk);
294 if (dccp_service_not_initialized(sk))
295 goto out;
296
297 if ((sl = dp->dccps_service_list) != NULL) {
298 slen = sl->dccpsl_nr * sizeof(u32);
299 total_len += slen;
300 }
301
302 err = -EINVAL;
303 if (total_len > len)
304 goto out;
305
306 err = 0;
307 if (put_user(total_len, optlen) ||
308 put_user(dp->dccps_service, optval) ||
309 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310 err = -EFAULT;
311out:
312 release_sock(sk);
313 return err;
314}
315
239int dccp_getsockopt(struct sock *sk, int level, int optname, 316int dccp_getsockopt(struct sock *sk, int level, int optname,
240 char __user *optval, int __user *optlen) 317 char __user *optval, int __user *optlen)
241{ 318{
@@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
248 if (get_user(len, optlen)) 325 if (get_user(len, optlen))
249 return -EFAULT; 326 return -EFAULT;
250 327
251 len = min_t(unsigned int, len, sizeof(int)); 328 if (len < sizeof(int))
252 if (len < 0)
253 return -EINVAL; 329 return -EINVAL;
254 330
255 dp = dccp_sk(sk); 331 dp = dccp_sk(sk);
@@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
257 switch (optname) { 333 switch (optname) {
258 case DCCP_SOCKOPT_PACKET_SIZE: 334 case DCCP_SOCKOPT_PACKET_SIZE:
259 val = dp->dccps_packet_size; 335 val = dp->dccps_packet_size;
336 len = sizeof(dp->dccps_packet_size);
260 break; 337 break;
338 case DCCP_SOCKOPT_SERVICE:
339 return dccp_getsockopt_service(sk, len,
340 (u32 __user *)optval, optlen);
341 case 128 ... 191:
342 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343 len, (u32 __user *)optval, optlen);
344 case 192 ... 255:
345 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346 len, (u32 __user *)optval, optlen);
261 default: 347 default:
262 return -ENOPROTOOPT; 348 return -ENOPROTOOPT;
263 } 349 }
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..e2162d270073 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
51 51
52 IF unsure, say `N'. 52 IF unsure, say `N'.
53 53
54config IP_NF_CONNTRACK_NETLINK
55 tristate 'Connection tracking netlink interface'
56 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
57 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
58 help
59 This option enables support for a netlink-based userspace interface
60
61
54config IP_NF_CT_PROTO_SCTP 62config IP_NF_CT_PROTO_SCTP
55 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 63 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
56 depends on IP_NF_CONNTRACK && EXPERIMENTAL 64 depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -774,11 +782,5 @@ config IP_NF_ARP_MANGLE
774 Allows altering the ARP packet payload: source and destination 782 Allows altering the ARP packet payload: source and destination
775 hardware and network addresses. 783 hardware and network addresses.
776 784
777config IP_NF_CONNTRACK_NETLINK
778 tristate 'Connection tracking netlink interface'
779 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
780 help
781 This option enables support for a netlink-based userspace interface
782
783endmenu 785endmenu
784 786
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..f8cd8e42961e 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
1143 if (del_timer(&ct->timeout)) { 1143 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1144 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1145 add_timer(&ct->timeout);
1146 ip_conntrack_event_cache(IPCT_REFRESH, skb); 1146 /* FIXME: We loose some REFRESH events if this function
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1147 } 1150 }
1148 ct_add_counters(ct, ctinfo, skb); 1151 ct_add_counters(ct, ctinfo, skb);
1149 write_unlock_bh(&ip_conntrack_lock); 1152 write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
@@ -30,7 +31,7 @@
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <linux/netfilter_ipv4/ip_conntrack.h>
32 33
33#define CLUSTERIP_VERSION "0.7" 34#define CLUSTERIP_VERSION "0.8"
34 35
35#define DEBUG_CLUSTERIP 36#define DEBUG_CLUSTERIP
36 37
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
49struct clusterip_config { 50struct clusterip_config {
50 struct list_head list; /* list of all configs */ 51 struct list_head list; /* list of all configs */
51 atomic_t refcount; /* reference count */ 52 atomic_t refcount; /* reference count */
53 atomic_t entries; /* number of entries/rules
54 * referencing us */
52 55
53 u_int32_t clusterip; /* the IP address */ 56 u_int32_t clusterip; /* the IP address */
54 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 57 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
55 struct net_device *dev; /* device */ 58 struct net_device *dev; /* device */
56 u_int16_t num_total_nodes; /* total number of nodes */ 59 u_int16_t num_total_nodes; /* total number of nodes */
57 u_int16_t num_local_nodes; /* number of local nodes */ 60 unsigned long local_nodes; /* node number array */
58 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
59 61
60#ifdef CONFIG_PROC_FS 62#ifdef CONFIG_PROC_FS
61 struct proc_dir_entry *pde; /* proc dir entry */ 63 struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
66 68
67static LIST_HEAD(clusterip_configs); 69static LIST_HEAD(clusterip_configs);
68 70
69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 71/* clusterip_lock protects the clusterip_configs list */
70 * data within all structurses (num_local_nodes, local_nodes[]) */
71static DEFINE_RWLOCK(clusterip_lock); 72static DEFINE_RWLOCK(clusterip_lock);
72 73
73#ifdef CONFIG_PROC_FS 74#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
76#endif 77#endif
77 78
78static inline void 79static inline void
79clusterip_config_get(struct clusterip_config *c) { 80clusterip_config_get(struct clusterip_config *c)
81{
80 atomic_inc(&c->refcount); 82 atomic_inc(&c->refcount);
81} 83}
82 84
83static inline void 85static inline void
84clusterip_config_put(struct clusterip_config *c) { 86clusterip_config_put(struct clusterip_config *c)
85 if (atomic_dec_and_test(&c->refcount)) { 87{
88 if (atomic_dec_and_test(&c->refcount))
89 kfree(c);
90}
91
92/* increase the count of entries(rules) using/referencing this config */
93static inline void
94clusterip_config_entry_get(struct clusterip_config *c)
95{
96 atomic_inc(&c->entries);
97}
98
99/* decrease the count of entries using/referencing this config. If last
100 * entry(rule) is removed, remove the config from lists, but don't free it
101 * yet, since proc-files could still be holding references */
102static inline void
103clusterip_config_entry_put(struct clusterip_config *c)
104{
105 if (atomic_dec_and_test(&c->entries)) {
86 write_lock_bh(&clusterip_lock); 106 write_lock_bh(&clusterip_lock);
87 list_del(&c->list); 107 list_del(&c->list);
88 write_unlock_bh(&clusterip_lock); 108 write_unlock_bh(&clusterip_lock);
109
89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 110 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
90 dev_put(c->dev); 111 dev_put(c->dev);
91 kfree(c); 112
113 /* In case anyone still accesses the file, the open/close
114 * functions are also incrementing the refcount on their own,
115 * so it's safe to remove the entry even if it's in use. */
116#ifdef CONFIG_PROC_FS
117 remove_proc_entry(c->pde->name, c->pde->parent);
118#endif
92 } 119 }
93} 120}
94 121
95
96static struct clusterip_config * 122static struct clusterip_config *
97__clusterip_config_find(u_int32_t clusterip) 123__clusterip_config_find(u_int32_t clusterip)
98{ 124{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
111} 137}
112 138
113static inline struct clusterip_config * 139static inline struct clusterip_config *
114clusterip_config_find_get(u_int32_t clusterip) 140clusterip_config_find_get(u_int32_t clusterip, int entry)
115{ 141{
116 struct clusterip_config *c; 142 struct clusterip_config *c;
117 143
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
122 return NULL; 148 return NULL;
123 } 149 }
124 atomic_inc(&c->refcount); 150 atomic_inc(&c->refcount);
151 if (entry)
152 atomic_inc(&c->entries);
125 read_unlock_bh(&clusterip_lock); 153 read_unlock_bh(&clusterip_lock);
126 154
127 return c; 155 return c;
128} 156}
129 157
158static void
159clusterip_config_init_nodelist(struct clusterip_config *c,
160 const struct ipt_clusterip_tgt_info *i)
161{
162 int n;
163
164 for (n = 0; n < i->num_local_nodes; n++) {
165 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
166 }
167}
168
130static struct clusterip_config * 169static struct clusterip_config *
131clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, 170clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
132 struct net_device *dev) 171 struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
143 c->clusterip = ip; 182 c->clusterip = ip;
144 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 183 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
145 c->num_total_nodes = i->num_total_nodes; 184 c->num_total_nodes = i->num_total_nodes;
146 c->num_local_nodes = i->num_local_nodes; 185 clusterip_config_init_nodelist(c, i);
147 memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
148 c->hash_mode = i->hash_mode; 186 c->hash_mode = i->hash_mode;
149 c->hash_initval = i->hash_initval; 187 c->hash_initval = i->hash_initval;
150 atomic_set(&c->refcount, 1); 188 atomic_set(&c->refcount, 1);
189 atomic_set(&c->entries, 1);
151 190
152#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
153 /* create proc dir entry */ 192 /* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
171static int 210static int
172clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 211clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
173{ 212{
174 int i;
175
176 write_lock_bh(&clusterip_lock);
177 213
178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 214 if (nodenum == 0 ||
179 || nodenum > CLUSTERIP_MAX_NODES) { 215 nodenum > c->num_total_nodes)
180 write_unlock_bh(&clusterip_lock);
181 return 1; 216 return 1;
182 }
183
184 /* check if we alrady have this number in our array */
185 for (i = 0; i < c->num_local_nodes; i++) {
186 if (c->local_nodes[i] == nodenum) {
187 write_unlock_bh(&clusterip_lock);
188 return 1;
189 }
190 }
191 217
192 c->local_nodes[c->num_local_nodes++] = nodenum; 218 /* check if we already have this number in our bitfield */
219 if (test_and_set_bit(nodenum - 1, &c->local_nodes))
220 return 1;
193 221
194 write_unlock_bh(&clusterip_lock);
195 return 0; 222 return 0;
196} 223}
197 224
198static int 225static int
199clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 226clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
200{ 227{
201 int i; 228 if (nodenum == 0 ||
202 229 nodenum > c->num_total_nodes)
203 write_lock_bh(&clusterip_lock);
204
205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
206 write_unlock_bh(&clusterip_lock);
207 return 1; 230 return 1;
208 }
209 231
210 for (i = 0; i < c->num_local_nodes; i++) { 232 if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
211 if (c->local_nodes[i] == nodenum) { 233 return 0;
212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
214 c->num_local_nodes--;
215 write_unlock_bh(&clusterip_lock);
216 return 0;
217 }
218 }
219 234
220 write_unlock_bh(&clusterip_lock);
221 return 1; 235 return 1;
222} 236}
223 237
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
285static inline int 299static inline int
286clusterip_responsible(struct clusterip_config *config, u_int32_t hash) 300clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
287{ 301{
288 int i; 302 return test_bit(hash - 1, &config->local_nodes);
289
290 read_lock_bh(&clusterip_lock);
291
292 if (config->num_local_nodes == 0) {
293 read_unlock_bh(&clusterip_lock);
294 return 0;
295 }
296
297 for (i = 0; i < config->num_local_nodes; i++) {
298 if (config->local_nodes[i] == hash) {
299 read_unlock_bh(&clusterip_lock);
300 return 1;
301 }
302 }
303
304 read_unlock_bh(&clusterip_lock);
305
306 return 0;
307} 303}
308 304
309/*********************************************************************** 305/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
415 411
416 /* FIXME: further sanity checks */ 412 /* FIXME: further sanity checks */
417 413
418 config = clusterip_config_find_get(e->ip.dst.s_addr); 414 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
419 if (!config) { 415 if (config) {
416 if (cipinfo->config != NULL) {
417 /* Case A: This is an entry that gets reloaded, since
418 * it still has a cipinfo->config pointer. Simply
419 * increase the entry refcount and return */
420 if (cipinfo->config != config) {
421 printk(KERN_ERR "CLUSTERIP: Reloaded entry "
422 "has invalid config pointer!\n");
423 return 0;
424 }
425 clusterip_config_entry_get(cipinfo->config);
426 } else {
427 /* Case B: This is a new rule referring to an existing
428 * clusterip config. */
429 cipinfo->config = config;
430 clusterip_config_entry_get(cipinfo->config);
431 }
432 } else {
433 /* Case C: This is a completely new clusterip config */
420 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 434 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
421 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); 435 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
422 return 0; 436 return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
443 } 457 }
444 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 458 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
445 } 459 }
460 cipinfo->config = config;
446 } 461 }
447 462
448 cipinfo->config = config;
449
450 return 1; 463 return 1;
451} 464}
452 465
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
455{ 468{
456 struct ipt_clusterip_tgt_info *cipinfo = matchinfo; 469 struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
457 470
458 /* we first remove the proc entry and then drop the reference 471 /* if no more entries are referencing the config, remove it
459 * count. In case anyone still accesses the file, the open/close 472 * from the list and destroy the proc entry */
460 * functions are also incrementing the refcount on their own */ 473 clusterip_config_entry_put(cipinfo->config);
461#ifdef CONFIG_PROC_FS 474
462 remove_proc_entry(cipinfo->config->pde->name,
463 cipinfo->config->pde->parent);
464#endif
465 clusterip_config_put(cipinfo->config); 475 clusterip_config_put(cipinfo->config);
466} 476}
467 477
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
533 543
534 /* if there is no clusterip configuration for the arp reply's 544 /* if there is no clusterip configuration for the arp reply's
535 * source ip, we don't want to mangle it */ 545 * source ip, we don't want to mangle it */
536 c = clusterip_config_find_get(payload->src_ip); 546 c = clusterip_config_find_get(payload->src_ip, 0);
537 if (!c) 547 if (!c)
538 return NF_ACCEPT; 548 return NF_ACCEPT;
539 549
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
574 584
575#ifdef CONFIG_PROC_FS 585#ifdef CONFIG_PROC_FS
576 586
587struct clusterip_seq_position {
588 unsigned int pos; /* position */
589 unsigned int weight; /* number of bits set == size */
590 unsigned int bit; /* current bit */
591 unsigned long val; /* current value */
592};
593
577static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 594static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578{ 595{
579 struct proc_dir_entry *pde = s->private; 596 struct proc_dir_entry *pde = s->private;
580 struct clusterip_config *c = pde->data; 597 struct clusterip_config *c = pde->data;
581 unsigned int *nodeidx; 598 unsigned int weight;
582 599 u_int32_t local_nodes;
583 read_lock_bh(&clusterip_lock); 600 struct clusterip_seq_position *idx;
584 if (*pos >= c->num_local_nodes) 601
602 /* FIXME: possible race */
603 local_nodes = c->local_nodes;
604 weight = hweight32(local_nodes);
605 if (*pos >= weight)
585 return NULL; 606 return NULL;
586 607
587 nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); 608 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
588 if (!nodeidx) 609 if (!idx)
589 return ERR_PTR(-ENOMEM); 610 return ERR_PTR(-ENOMEM);
590 611
591 *nodeidx = *pos; 612 idx->pos = *pos;
592 return nodeidx; 613 idx->weight = weight;
614 idx->bit = ffs(local_nodes);
615 idx->val = local_nodes;
616 clear_bit(idx->bit - 1, &idx->val);
617
618 return idx;
593} 619}
594 620
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 621static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 622{
597 struct proc_dir_entry *pde = s->private; 623 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
598 struct clusterip_config *c = pde->data;
599 unsigned int *nodeidx = (unsigned int *)v;
600 624
601 *pos = ++(*nodeidx); 625 *pos = ++idx->pos;
602 if (*pos >= c->num_local_nodes) { 626 if (*pos >= idx->weight) {
603 kfree(v); 627 kfree(v);
604 return NULL; 628 return NULL;
605 } 629 }
606 return nodeidx; 630 idx->bit = ffs(idx->val);
631 clear_bit(idx->bit - 1, &idx->val);
632 return idx;
607} 633}
608 634
609static void clusterip_seq_stop(struct seq_file *s, void *v) 635static void clusterip_seq_stop(struct seq_file *s, void *v)
610{ 636{
611 kfree(v); 637 kfree(v);
612
613 read_unlock_bh(&clusterip_lock);
614} 638}
615 639
616static int clusterip_seq_show(struct seq_file *s, void *v) 640static int clusterip_seq_show(struct seq_file *s, void *v)
617{ 641{
618 struct proc_dir_entry *pde = s->private; 642 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
619 struct clusterip_config *c = pde->data;
620 unsigned int *nodeidx = (unsigned int *)v;
621 643
622 if (*nodeidx != 0) 644 if (idx->pos != 0)
623 seq_putc(s, ','); 645 seq_putc(s, ',');
624 seq_printf(s, "%u", c->local_nodes[*nodeidx]);
625 646
626 if (*nodeidx == c->num_local_nodes-1) 647 seq_printf(s, "%u", idx->bit);
648
649 if (idx->pos == idx->weight - 1)
627 seq_putc(s, '\n'); 650 seq_putc(s, '\n');
628 651
629 return 0; 652 return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2b9bf9bd177f..6001948600f3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -639,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
639 int tclass = -1; 639 int tclass = -1;
640 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 640 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
641 int err; 641 int err;
642 int connected = 0;
642 643
643 /* destination address check */ 644 /* destination address check */
644 if (sin6) { 645 if (sin6) {
@@ -748,6 +749,7 @@ do_udp_sendmsg:
748 fl->fl_ip_dport = inet->dport; 749 fl->fl_ip_dport = inet->dport;
749 daddr = &np->daddr; 750 daddr = &np->daddr;
750 fl->fl6_flowlabel = np->flow_label; 751 fl->fl6_flowlabel = np->flow_label;
752 connected = 1;
751 } 753 }
752 754
753 if (!fl->oif) 755 if (!fl->oif)
@@ -770,6 +772,7 @@ do_udp_sendmsg:
770 } 772 }
771 if (!(opt->opt_nflen|opt->opt_flen)) 773 if (!(opt->opt_nflen|opt->opt_flen))
772 opt = NULL; 774 opt = NULL;
775 connected = 0;
773 } 776 }
774 if (opt == NULL) 777 if (opt == NULL)
775 opt = np->opt; 778 opt = np->opt;
@@ -787,10 +790,13 @@ do_udp_sendmsg:
787 ipv6_addr_copy(&final, &fl->fl6_dst); 790 ipv6_addr_copy(&final, &fl->fl6_dst);
788 ipv6_addr_copy(&fl->fl6_dst, rt0->addr); 791 ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
789 final_p = &final; 792 final_p = &final;
793 connected = 0;
790 } 794 }
791 795
792 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) 796 if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
793 fl->oif = np->mcast_oif; 797 fl->oif = np->mcast_oif;
798 connected = 0;
799 }
794 800
795 err = ip6_dst_lookup(sk, &dst, fl); 801 err = ip6_dst_lookup(sk, &dst, fl);
796 if (err) 802 if (err)
@@ -846,7 +852,7 @@ do_append_data:
846 else if (!corkreq) 852 else if (!corkreq)
847 err = udp_v6_push_pending_frames(sk, up); 853 err = udp_v6_push_pending_frames(sk, up);
848 854
849 if (dst) 855 if (dst && connected)
850 ip6_dst_store(sk, dst, 856 ip6_dst_store(sk, dst,
851 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? 857 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
852 &np->daddr : NULL); 858 &np->daddr : NULL);
diff --git a/net/socket.c b/net/socket.c
index c699e93c33d7..f9264472377f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1862 if (err < 0) 1862 if (err < 0)
1863 goto out_freeiov; 1863 goto out_freeiov;
1864 } 1864 }
1865 err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); 1865 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1866 COMPAT_FLAGS(msg));
1866 if (err) 1867 if (err)
1867 goto out_freeiov; 1868 goto out_freeiov;
1868 if (MSG_CMSG_COMPAT & flags) 1869 if (MSG_CMSG_COMPAT & flags)