aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bittau <a.bittau@cs.ucl.ac.uk>2006-03-20 20:41:47 -0500
committerDavid S. Miller <davem@davemloft.net>2006-03-20 20:41:47 -0500
commit2a91aa3967398fb94eccc8da67c82bce9f67afdf (patch)
tree62bf003487121bc629919c85810df11e52016b8f
parentaa5d7df3b20e0e493e90e1151510ab3ae8366bb5 (diff)
[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation
Original work by Andrea Bittau, Arnaldo Melo cleaned up and fixed several issues on the merge process. For now CCID2 was turned the default for all SOCK_DCCP connections, but this will be remedied soon with the merge of the feature negotiation code. Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/dccp.h8
-rw-r--r--net/dccp/Kconfig4
-rw-r--r--net/dccp/ccids/Kconfig39
-rw-r--r--net/dccp/ccids/Makefile4
-rw-r--r--net/dccp/ccids/ccid2.c838
-rw-r--r--net/dccp/ccids/ccid2.h69
-rw-r--r--net/dccp/ipv4.c1
7 files changed, 957 insertions, 6 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 088529f54965..268b4579d7e5 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -314,9 +314,9 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
314 314
315/* initial values for each feature */ 315/* initial values for each feature */
316#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 316#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
317/* FIXME: for now we're using CCID 3 (TFRC) */ 317/* FIXME: for now we're using CCID 2 (TCP-Like) */
318#define DCCPF_INITIAL_CCID 3 318#define DCCPF_INITIAL_CCID 2
319#define DCCPF_INITIAL_SEND_ACK_VECTOR 0 319#define DCCPF_INITIAL_SEND_ACK_VECTOR 1
320/* FIXME: for now we're default to 1 but it should really be 0 */ 320/* FIXME: for now we're default to 1 but it should really be 0 */
321#define DCCPF_INITIAL_SEND_NDP_COUNT 1 321#define DCCPF_INITIAL_SEND_NDP_COUNT 1
322 322
@@ -430,6 +430,8 @@ struct dccp_sock {
430 struct timeval dccps_timestamp_time; 430 struct timeval dccps_timestamp_time;
431 __u32 dccps_timestamp_echo; 431 __u32 dccps_timestamp_echo;
432 __u32 dccps_packet_size; 432 __u32 dccps_packet_size;
433 __u16 dccps_l_ack_ratio;
434 __u16 dccps_r_ack_ratio;
433 unsigned long dccps_ndp_count; 435 unsigned long dccps_ndp_count;
434 __u32 dccps_mss_cache; 436 __u32 dccps_mss_cache;
435 struct dccp_options dccps_options; 437 struct dccp_options dccps_options;
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 187ac182e24b..24a6981e209a 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -24,6 +24,10 @@ config INET_DCCP_DIAG
24 def_tristate y if (IP_DCCP = y && INET_DIAG = y) 24 def_tristate y if (IP_DCCP = y && INET_DIAG = y)
25 def_tristate m 25 def_tristate m
26 26
27config IP_DCCP_ACKVEC
28 depends on IP_DCCP
29 def_bool N
30
27source "net/dccp/ccids/Kconfig" 31source "net/dccp/ccids/Kconfig"
28 32
29menu "DCCP Kernel Hacking" 33menu "DCCP Kernel Hacking"
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 7684d83946a4..422af197171d 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,6 +1,34 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)" 1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL 2 depends on IP_DCCP && EXPERIMENTAL
3 3
4config IP_DCCP_CCID2
5 tristate "CCID2 (TCP) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 select IP_DCCP_ACKVEC
8 ---help---
9 CCID 2, TCP-like Congestion Control, denotes Additive Increase,
10 Multiplicative Decrease (AIMD) congestion control with behavior
11 modelled directly on TCP, including congestion window, slow start,
12 timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum
13 bandwidth over the long term, consistent with the use of end-to-end
14 congestion control, but halves its congestion window in response to
15 each congestion event. This leads to the abrupt rate changes
16 typical of TCP. Applications should use CCID 2 if they prefer
17 maximum bandwidth utilization to steadiness of rate. This is often
18 the case for applications that are not playing their data directly
19 to the user. For example, a hypothetical application that
20 transferred files over DCCP, using application-level retransmissions
21 for lost packets, would prefer CCID 2 to CCID 3. On-line games may
22 also prefer CCID 2.
23
24 CCID 2 is further described in:
25 http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid2-10.txt
26
27 This text was extracted from:
28 http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
29
30 If in doubt, say M.
31
4config IP_DCCP_CCID3 32config IP_DCCP_CCID3
5 tristate "CCID3 (TFRC) (EXPERIMENTAL)" 33 tristate "CCID3 (TFRC) (EXPERIMENTAL)"
6 depends on IP_DCCP 34 depends on IP_DCCP
@@ -15,10 +43,15 @@ config IP_DCCP_CCID3
15 suitable than CCID 2 for applications such streaming media where a 43 suitable than CCID 2 for applications such streaming media where a
16 relatively smooth sending rate is of importance. 44 relatively smooth sending rate is of importance.
17 45
18 CCID 3 is further described in [CCID 3 PROFILE]. The TFRC 46 CCID 3 is further described in:
19 congestion control algorithms were initially described in RFC 3448. 47
48 http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid3-11.txt.
49
50 The TFRC congestion control algorithms were initially described in
51 RFC 3448.
20 52
21 This text was extracted from draft-ietf-dccp-spec-11.txt. 53 This text was extracted from:
54 http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
22 55
23 If in doubt, say M. 56 If in doubt, say M.
24 57
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
index 956f79f50743..438f20bccff7 100644
--- a/net/dccp/ccids/Makefile
+++ b/net/dccp/ccids/Makefile
@@ -2,4 +2,8 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
2 2
3dccp_ccid3-y := ccid3.o 3dccp_ccid3-y := ccid3.o
4 4
5obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
6
7dccp_ccid2-y := ccid2.o
8
5obj-y += lib/ 9obj-y += lib/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
new file mode 100644
index 000000000000..4a7b87512560
--- /dev/null
+++ b/net/dccp/ccids/ccid2.c
@@ -0,0 +1,838 @@
1/*
2 * net/dccp/ccids/ccid2.c
3 *
4 * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
5 *
6 * Changes to meet Linux coding standards, and DCCP infrastructure fixes.
7 *
8 * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25/*
26 * This implementation should follow: draft-ietf-dccp-ccid2-10.txt
27 *
28 * BUGS:
29 * - sequence number wrapping
30 * - jiffies wrapping
31 */
32
33#include <linux/config.h>
34#include "../ccid.h"
35#include "../dccp.h"
36#include "ccid2.h"
37
38static int ccid2_debug;
39
40#if 0
41#define CCID2_DEBUG
42#endif
43
44#ifdef CCID2_DEBUG
45#define ccid2_pr_debug(format, a...) \
46 do { if (ccid2_debug) \
47 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
48 } while (0)
49#else
50#define ccid2_pr_debug(format, a...)
51#endif
52
53static const int ccid2_seq_len = 128;
54
55static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
56{
57 return dccp_sk(sk)->dccps_hc_tx_ccid_private;
58}
59
60static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk)
61{
62 return dccp_sk(sk)->dccps_hc_rx_ccid_private;
63}
64
65#ifdef CCID2_DEBUG
66static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
67{
68 int len = 0;
69 struct ccid2_seq *seqp;
70 int pipe = 0;
71
72 seqp = hctx->ccid2hctx_seqh;
73
74 /* there is data in the chain */
75 if (seqp != hctx->ccid2hctx_seqt) {
76 seqp = seqp->ccid2s_prev;
77 len++;
78 if (!seqp->ccid2s_acked)
79 pipe++;
80
81 while (seqp != hctx->ccid2hctx_seqt) {
82 struct ccid2_seq *prev;
83
84 prev = seqp->ccid2s_prev;
85 len++;
86 if (!prev->ccid2s_acked)
87 pipe++;
88
89 /* packets are sent sequentially */
90 BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
91 BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent);
92 BUG_ON(len > ccid2_seq_len);
93
94 seqp = prev;
95 }
96 }
97
98 BUG_ON(pipe != hctx->ccid2hctx_pipe);
99 ccid2_pr_debug("len of chain=%d\n", len);
100
101 do {
102 seqp = seqp->ccid2s_prev;
103 len++;
104 BUG_ON(len > ccid2_seq_len);
105 } while(seqp != hctx->ccid2hctx_seqh);
106
107 BUG_ON(len != ccid2_seq_len);
108 ccid2_pr_debug("total len=%d\n", len);
109}
110#else
111#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
112#endif
113
114static int ccid2_hc_tx_send_packet(struct sock *sk,
115 struct sk_buff *skb, int len)
116{
117 struct ccid2_hc_tx_sock *hctx;
118
119 switch (DCCP_SKB_CB(skb)->dccpd_type) {
120 case 0: /* XXX data packets from userland come through like this */
121 case DCCP_PKT_DATA:
122 case DCCP_PKT_DATAACK:
123 break;
124 /* No congestion control on other packets */
125 default:
126 return 0;
127 }
128
129 hctx = ccid2_hc_tx_sk(sk);
130
131 ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
132 hctx->ccid2hctx_cwnd);
133
134 if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
135 /* OK we can send... make sure previous packet was sent off */
136 if (!hctx->ccid2hctx_sendwait) {
137 hctx->ccid2hctx_sendwait = 1;
138 return 0;
139 }
140 }
141
142 return 100; /* XXX */
143}
144
145static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
146{
147 struct dccp_sock *dp = dccp_sk(sk);
148 /*
149 * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio
150 * should be 1... it shouldn't be allowed to become 2.
151 * -sorbo.
152 */
153 if (val != 2) {
154 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
155 int max = hctx->ccid2hctx_cwnd / 2;
156
157 /* round up */
158 if (hctx->ccid2hctx_cwnd & 1)
159 max++;
160
161 if (val > max)
162 val = max;
163 }
164
165 ccid2_pr_debug("changing local ack ratio to %d\n", val);
166 WARN_ON(val <= 0);
167 dp->dccps_l_ack_ratio = val;
168}
169
170static void ccid2_change_cwnd(struct sock *sk, int val)
171{
172 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
173
174 if (val == 0)
175 val = 1;
176
177 /* XXX do we need to change ack ratio? */
178 ccid2_pr_debug("change cwnd to %d\n", val);
179
180 BUG_ON(val < 1);
181 hctx->ccid2hctx_cwnd = val;
182}
183
184static void ccid2_start_rto_timer(struct sock *sk);
185
186static void ccid2_hc_tx_rto_expire(unsigned long data)
187{
188 struct sock *sk = (struct sock *)data;
189 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
190 long s;
191
192 /* XXX I don't think i'm locking correctly
193 * -sorbo.
194 */
195 bh_lock_sock(sk);
196 if (sock_owned_by_user(sk)) {
197 sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
198 jiffies + HZ / 5);
199 goto out;
200 }
201
202 ccid2_pr_debug("RTO_EXPIRE\n");
203
204 ccid2_hc_tx_check_sanity(hctx);
205
206 /* back-off timer */
207 hctx->ccid2hctx_rto <<= 1;
208
209 s = hctx->ccid2hctx_rto / HZ;
210 if (s > 60)
211 hctx->ccid2hctx_rto = 60 * HZ;
212
213 ccid2_start_rto_timer(sk);
214
215 /* adjust pipe, cwnd etc */
216 hctx->ccid2hctx_pipe = 0;
217 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
218 if (hctx->ccid2hctx_ssthresh < 2)
219 hctx->ccid2hctx_ssthresh = 2;
220 ccid2_change_cwnd(sk, 1);
221
222 /* clear state about stuff we sent */
223 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
224 hctx->ccid2hctx_ssacks = 0;
225 hctx->ccid2hctx_acks = 0;
226 hctx->ccid2hctx_sent = 0;
227
228 /* clear ack ratio state. */
229 hctx->ccid2hctx_arsent = 0;
230 hctx->ccid2hctx_ackloss = 0;
231 hctx->ccid2hctx_rpseq = 0;
232 hctx->ccid2hctx_rpdupack = -1;
233 ccid2_change_l_ack_ratio(sk, 1);
234 ccid2_hc_tx_check_sanity(hctx);
235out:
236 bh_unlock_sock(sk);
237/* sock_put(sk); */
238}
239
240static void ccid2_start_rto_timer(struct sock *sk)
241{
242 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
243
244 ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
245
246 BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
247 sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
248 jiffies + hctx->ccid2hctx_rto);
249}
250
251static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
252{
253 struct dccp_sock *dp = dccp_sk(sk);
254 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
255 u64 seq;
256
257 ccid2_hc_tx_check_sanity(hctx);
258
259 BUG_ON(!hctx->ccid2hctx_sendwait);
260 hctx->ccid2hctx_sendwait = 0;
261 hctx->ccid2hctx_pipe++;
262 BUG_ON(hctx->ccid2hctx_pipe < 0);
263
264 /* There is an issue. What if another packet is sent between
265 * packet_send() and packet_sent(). Then the sequence number would be
266 * wrong.
267 * -sorbo.
268 */
269 seq = dp->dccps_gss;
270
271 hctx->ccid2hctx_seqh->ccid2s_seq = seq;
272 hctx->ccid2hctx_seqh->ccid2s_acked = 0;
273 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
274 hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
275
276 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
277 hctx->ccid2hctx_pipe);
278
279 if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) {
280 /* XXX allocate more space */
281 WARN_ON(1);
282 }
283
284 hctx->ccid2hctx_sent++;
285
286 /* Ack Ratio. Need to maintain a concept of how many windows we sent */
287 hctx->ccid2hctx_arsent++;
288 /* We had an ack loss in this window... */
289 if (hctx->ccid2hctx_ackloss) {
290 if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
291 hctx->ccid2hctx_arsent = 0;
292 hctx->ccid2hctx_ackloss = 0;
293 }
294 }
295 /* No acks lost up to now... */
296 else {
297 /* decrease ack ratio if enough packets were sent */
298 if (dp->dccps_l_ack_ratio > 1) {
299 /* XXX don't calculate denominator each time */
300 int denom;
301
302 denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
303 dp->dccps_l_ack_ratio;
304 denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
305
306 if (hctx->ccid2hctx_arsent >= denom) {
307 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
308 hctx->ccid2hctx_arsent = 0;
309 }
310 }
311 /* we can't increase ack ratio further [1] */
312 else {
313 hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
314 }
315 }
316
317 /* setup RTO timer */
318 if (!timer_pending(&hctx->ccid2hctx_rtotimer)) {
319 ccid2_start_rto_timer(sk);
320 }
321#ifdef CCID2_DEBUG
322 ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
323 ccid2_pr_debug("Sent: seq=%llu\n", seq);
324 do {
325 struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
326
327 while (seqp != hctx->ccid2hctx_seqh) {
328 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
329 seqp->ccid2s_seq, seqp->ccid2s_acked,
330 seqp->ccid2s_sent);
331 seqp = seqp->ccid2s_next;
332 }
333 } while(0);
334 ccid2_pr_debug("=========\n");
335 ccid2_hc_tx_check_sanity(hctx);
336#endif
337}
338
339/* XXX Lame code duplication!
340 * returns -1 if none was found.
341 * else returns the next offset to use in the function call.
342 */
343static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
344 unsigned char **vec, unsigned char *veclen)
345{
346 const struct dccp_hdr *dh = dccp_hdr(skb);
347 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
348 unsigned char *opt_ptr;
349 const unsigned char *opt_end = (unsigned char *)dh +
350 (dh->dccph_doff * 4);
351 unsigned char opt, len;
352 unsigned char *value;
353
354 BUG_ON(offset < 0);
355 options += offset;
356 opt_ptr = options;
357 if (opt_ptr >= opt_end)
358 return -1;
359
360 while (opt_ptr != opt_end) {
361 opt = *opt_ptr++;
362 len = 0;
363 value = NULL;
364
365 /* Check if this isn't a single byte option */
366 if (opt > DCCPO_MAX_RESERVED) {
367 if (opt_ptr == opt_end)
368 goto out_invalid_option;
369
370 len = *opt_ptr++;
371 if (len < 3)
372 goto out_invalid_option;
373 /*
374 * Remove the type and len fields, leaving
375 * just the value size
376 */
377 len -= 2;
378 value = opt_ptr;
379 opt_ptr += len;
380
381 if (opt_ptr > opt_end)
382 goto out_invalid_option;
383 }
384
385 switch (opt) {
386 case DCCPO_ACK_VECTOR_0:
387 case DCCPO_ACK_VECTOR_1:
388 *vec = value;
389 *veclen = len;
390 return offset + (opt_ptr - options);
391 break;
392 }
393 }
394
395 return -1;
396
397out_invalid_option:
398 BUG_ON(1); /* should never happen... options were previously parsed ! */
399 return -1;
400}
401
402static void ccid2_hc_tx_kill_rto_timer(struct ccid2_hc_tx_sock *hctx)
403{
404 if (del_timer(&hctx->ccid2hctx_rtotimer))
405 ccid2_pr_debug("deleted RTO timer\n");
406}
407
408static inline void ccid2_new_ack(struct sock *sk,
409 struct ccid2_seq *seqp,
410 unsigned int *maxincr)
411{
412 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
413
414 /* slow start */
415 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
416 hctx->ccid2hctx_acks = 0;
417
418 /* We can increase cwnd at most maxincr [ack_ratio/2] */
419 if (*maxincr) {
420 /* increase every 2 acks */
421 hctx->ccid2hctx_ssacks++;
422 if (hctx->ccid2hctx_ssacks == 2) {
423 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
424 hctx->ccid2hctx_ssacks = 0;
425 *maxincr = *maxincr - 1;
426 }
427 }
428 /* increased cwnd enough for this single ack */
429 else {
430 hctx->ccid2hctx_ssacks = 0;
431 }
432 }
433 else {
434 hctx->ccid2hctx_ssacks = 0;
435 hctx->ccid2hctx_acks++;
436
437 if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
438 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
439 hctx->ccid2hctx_acks = 0;
440 }
441 }
442
443 /* update RTO */
444 if (hctx->ccid2hctx_srtt == -1 ||
445 (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) {
446 unsigned long r = jiffies - seqp->ccid2s_sent;
447 int s;
448
449 /* first measurement */
450 if (hctx->ccid2hctx_srtt == -1) {
451 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
452 r, jiffies, seqp->ccid2s_seq);
453 hctx->ccid2hctx_srtt = r;
454 hctx->ccid2hctx_rttvar = r >> 1;
455 }
456 else {
457 /* RTTVAR */
458 long tmp = hctx->ccid2hctx_srtt - r;
459 if (tmp < 0)
460 tmp *= -1;
461
462 tmp >>= 2;
463 hctx->ccid2hctx_rttvar *= 3;
464 hctx->ccid2hctx_rttvar >>= 2;
465 hctx->ccid2hctx_rttvar += tmp;
466
467 /* SRTT */
468 hctx->ccid2hctx_srtt *= 7;
469 hctx->ccid2hctx_srtt >>= 3;
470 tmp = r >> 3;
471 hctx->ccid2hctx_srtt += tmp;
472 }
473 s = hctx->ccid2hctx_rttvar << 2;
474 /* clock granularity is 1 when based on jiffies */
475 if (!s)
476 s = 1;
477 hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
478
479 /* must be at least a second */
480 s = hctx->ccid2hctx_rto / HZ;
481 /* DCCP doesn't require this [but I like it cuz my code sux] */
482#if 1
483 if (s < 1)
484 hctx->ccid2hctx_rto = HZ;
485#endif
486 /* max 60 seconds */
487 if (s > 60)
488 hctx->ccid2hctx_rto = HZ * 60;
489
490 hctx->ccid2hctx_lastrtt = jiffies;
491
492 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
493 hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
494 hctx->ccid2hctx_rto, HZ, r);
495 hctx->ccid2hctx_sent = 0;
496 }
497
498 /* we got a new ack, so re-start RTO timer */
499 ccid2_hc_tx_kill_rto_timer(hctx);
500 ccid2_start_rto_timer(sk);
501}
502
503static void ccid2_hc_tx_dec_pipe(struct ccid2_hc_tx_sock *hctx)
504{
505 hctx->ccid2hctx_pipe--;
506 BUG_ON(hctx->ccid2hctx_pipe < 0);
507
508 if (hctx->ccid2hctx_pipe == 0)
509 ccid2_hc_tx_kill_rto_timer(hctx);
510}
511
512static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
513{
514 struct dccp_sock *dp = dccp_sk(sk);
515 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
516 u64 ackno, seqno;
517 struct ccid2_seq *seqp;
518 unsigned char *vector;
519 unsigned char veclen;
520 int offset = 0;
521 int done = 0;
522 int loss = 0;
523 unsigned int maxincr = 0;
524
525 ccid2_hc_tx_check_sanity(hctx);
526 /* check reverse path congestion */
527 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
528
529 /* XXX this whole "algorithm" is broken. Need to fix it to keep track
530 * of the seqnos of the dupacks so that rpseq and rpdupack are correct
531 * -sorbo.
532 */
533 /* need to bootstrap */
534 if (hctx->ccid2hctx_rpdupack == -1) {
535 hctx->ccid2hctx_rpdupack = 0;
536 hctx->ccid2hctx_rpseq = seqno;
537 }
538 else {
539 /* check if packet is consecutive */
540 if ((hctx->ccid2hctx_rpseq + 1) == seqno) {
541 hctx->ccid2hctx_rpseq++;
542 }
543 /* it's a later packet */
544 else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
545 hctx->ccid2hctx_rpdupack++;
546
547 /* check if we got enough dupacks */
548 if (hctx->ccid2hctx_rpdupack >=
549 hctx->ccid2hctx_numdupack) {
550
551 hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
552 hctx->ccid2hctx_rpseq = 0;
553
554 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1);
555 }
556 }
557 }
558
559 /* check forward path congestion */
560 /* still didn't send out new data packets */
561 if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
562 return;
563
564 switch (DCCP_SKB_CB(skb)->dccpd_type) {
565 case DCCP_PKT_ACK:
566 case DCCP_PKT_DATAACK:
567 break;
568
569 default:
570 return;
571 }
572
573 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
574 seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
575
576 /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
577 * this single ack. I round up.
578 * -sorbo.
579 */
580 maxincr = dp->dccps_l_ack_ratio >> 1;
581 maxincr++;
582
583 /* go through all ack vectors */
584 while ((offset = ccid2_ackvector(sk, skb, offset,
585 &vector, &veclen)) != -1) {
586 /* go through this ack vector */
587 while (veclen--) {
588 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
589 u64 ackno_end_rl;
590
591 dccp_set_seqno(&ackno_end_rl, ackno - rl);
592 ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno,
593 ackno_end_rl);
594 /* if the seqno we are analyzing is larger than the
595 * current ackno, then move towards the tail of our
596 * seqnos.
597 */
598 while (after48(seqp->ccid2s_seq, ackno)) {
599 if (seqp == hctx->ccid2hctx_seqt) {
600 done = 1;
601 break;
602 }
603 seqp = seqp->ccid2s_prev;
604 }
605 if (done)
606 break;
607
608 /* check all seqnos in the range of the vector
609 * run length
610 */
611 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
612 const u8 state = (*vector &
613 DCCP_ACKVEC_STATE_MASK) >> 6;
614
615 /* new packet received or marked */
616 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
617 !seqp->ccid2s_acked) {
618 if (state ==
619 DCCP_ACKVEC_STATE_ECN_MARKED) {
620 loss = 1;
621 }
622 else {
623 ccid2_new_ack(sk, seqp,
624 &maxincr);
625 }
626
627 seqp->ccid2s_acked = 1;
628 ccid2_pr_debug("Got ack for %llu\n",
629 seqp->ccid2s_seq);
630 ccid2_hc_tx_dec_pipe(hctx);
631 }
632 if (seqp == hctx->ccid2hctx_seqt) {
633 done = 1;
634 break;
635 }
636 seqp = seqp->ccid2s_next;
637 }
638 if (done)
639 break;
640
641
642 dccp_set_seqno(&ackno, ackno_end_rl - 1);
643 vector++;
644 }
645 if (done)
646 break;
647 }
648
649 /* The state about what is acked should be correct now
650 * Check for NUMDUPACK
651 */
652 seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
653 done = 0;
654 while (1) {
655 if (seqp->ccid2s_acked) {
656 done++;
657 if (done == hctx->ccid2hctx_numdupack) {
658 break;
659 }
660 }
661 if (seqp == hctx->ccid2hctx_seqt) {
662 break;
663 }
664 seqp = seqp->ccid2s_prev;
665 }
666
667 /* If there are at least 3 acknowledgements, anything unacknowledged
668 * below the last sequence number is considered lost
669 */
670 if (done == hctx->ccid2hctx_numdupack) {
671 struct ccid2_seq *last_acked = seqp;
672
673 /* check for lost packets */
674 while (1) {
675 if (!seqp->ccid2s_acked) {
676 loss = 1;
677 ccid2_hc_tx_dec_pipe(hctx);
678 }
679 if (seqp == hctx->ccid2hctx_seqt)
680 break;
681 seqp = seqp->ccid2s_prev;
682 }
683
684 hctx->ccid2hctx_seqt = last_acked;
685 }
686
687 /* trim acked packets in tail */
688 while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
689 if (!hctx->ccid2hctx_seqt->ccid2s_acked)
690 break;
691
692 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
693 }
694
695 if (loss) {
696 /* XXX do bit shifts guarantee a 0 as the new bit? */
697 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
698 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
699 if (hctx->ccid2hctx_ssthresh < 2)
700 hctx->ccid2hctx_ssthresh = 2;
701 }
702
703 ccid2_hc_tx_check_sanity(hctx);
704}
705
706static int ccid2_hc_tx_init(struct sock *sk)
707{
708 struct dccp_sock *dp = dccp_sk(sk);
709 struct ccid2_hc_tx_sock *hctx;
710 int seqcount = ccid2_seq_len;
711 int i;
712
713 dp->dccps_hc_tx_ccid_private = kzalloc(sizeof(*hctx), gfp_any());
714 if (dp->dccps_hc_tx_ccid_private == NULL)
715 return -ENOMEM;
716
717 hctx = ccid2_hc_tx_sk(sk);
718
719 /* XXX init variables with proper values */
720 hctx->ccid2hctx_cwnd = 1;
721 hctx->ccid2hctx_ssthresh = 10;
722 hctx->ccid2hctx_numdupack = 3;
723
724 /* XXX init ~ to window size... */
725 hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) *
726 seqcount, gfp_any());
727 if (hctx->ccid2hctx_seqbuf == NULL) {
728 kfree(dp->dccps_hc_tx_ccid_private);
729 dp->dccps_hc_tx_ccid_private = NULL;
730 return -ENOMEM;
731 }
732 for (i = 0; i < (seqcount - 1); i++) {
733 hctx->ccid2hctx_seqbuf[i].ccid2s_next =
734 &hctx->ccid2hctx_seqbuf[i + 1];
735 hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
736 &hctx->ccid2hctx_seqbuf[i];
737 }
738 hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
739 hctx->ccid2hctx_seqbuf;
740 hctx->ccid2hctx_seqbuf->ccid2s_prev =
741 &hctx->ccid2hctx_seqbuf[seqcount - 1];
742
743 hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf;
744 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
745 hctx->ccid2hctx_sent = 0;
746 hctx->ccid2hctx_rto = 3 * HZ;
747 hctx->ccid2hctx_srtt = -1;
748 hctx->ccid2hctx_rttvar = -1;
749 hctx->ccid2hctx_lastrtt = 0;
750 hctx->ccid2hctx_rpdupack = -1;
751
752 hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
753 hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
754 init_timer(&hctx->ccid2hctx_rtotimer);
755
756 ccid2_hc_tx_check_sanity(hctx);
757 return 0;
758}
759
760static void ccid2_hc_tx_exit(struct sock *sk)
761{
762 struct dccp_sock *dp = dccp_sk(sk);
763 struct ccid2_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
764
765 ccid2_hc_tx_kill_rto_timer(hctx);
766
767 kfree(hctx->ccid2hctx_seqbuf);
768
769 kfree(dp->dccps_hc_tx_ccid_private);
770 dp->dccps_hc_tx_ccid_private = NULL;
771}
772
773static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
774{
775 const struct dccp_sock *dp = dccp_sk(sk);
776 struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk);
777
778 switch (DCCP_SKB_CB(skb)->dccpd_type) {
779 case DCCP_PKT_DATA:
780 case DCCP_PKT_DATAACK:
781 hcrx->ccid2hcrx_data++;
782 if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
783 dccp_send_ack(sk);
784 hcrx->ccid2hcrx_data = 0;
785 }
786 break;
787 }
788}
789
790static int ccid2_hc_rx_init(struct sock *sk)
791{
792 struct dccp_sock *dp = dccp_sk(sk);
793 dp->dccps_hc_rx_ccid_private = kzalloc(sizeof(struct ccid2_hc_rx_sock),
794 gfp_any());
795 return dp->dccps_hc_rx_ccid_private == NULL ? -ENOMEM : 0;
796}
797
798static void ccid2_hc_rx_exit(struct sock *sk)
799{
800 struct dccp_sock *dp = dccp_sk(sk);
801
802 kfree(dp->dccps_hc_rx_ccid_private);
803 dp->dccps_hc_rx_ccid_private = NULL;
804}
805
806static struct ccid ccid2 = {
807 .ccid_id = 2,
808 .ccid_name = "ccid2",
809 .ccid_owner = THIS_MODULE,
810 .ccid_hc_tx_init = ccid2_hc_tx_init,
811 .ccid_hc_tx_exit = ccid2_hc_tx_exit,
812 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
813 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
814 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
815 .ccid_hc_rx_init = ccid2_hc_rx_init,
816 .ccid_hc_rx_exit = ccid2_hc_rx_exit,
817 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
818};
819
820module_param(ccid2_debug, int, 0444);
821MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
822
823static __init int ccid2_module_init(void)
824{
825 return ccid_register(&ccid2);
826}
827module_init(ccid2_module_init);
828
829static __exit void ccid2_module_exit(void)
830{
831 ccid_unregister(&ccid2);
832}
833module_exit(ccid2_module_exit);
834
835MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
836MODULE_DESCRIPTION("DCCP TCP CCID2 CCID");
837MODULE_LICENSE("GPL");
838MODULE_ALIAS("net-dccp-ccid-2");
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
new file mode 100644
index 000000000000..0b08c90955a9
--- /dev/null
+++ b/net/dccp/ccids/ccid2.h
@@ -0,0 +1,69 @@
1/*
2 * net/dccp/ccids/ccid2.h
3 *
4 * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#ifndef _DCCP_CCID2_H_
21#define _DCCP_CCID2_H_
22
23struct ccid2_seq {
24 u64 ccid2s_seq;
25 unsigned long ccid2s_sent;
26 int ccid2s_acked;
27 struct ccid2_seq *ccid2s_prev;
28 struct ccid2_seq *ccid2s_next;
29};
30
31/** struct ccid2_hc_tx_sock - CCID2 TX half connection
32 *
33 * @ccid2hctx_ssacks - ACKs recv in slow start
34 * @ccid2hctx_acks - ACKS recv in AI phase
35 * @ccid2hctx_sent - packets sent in this window
36 * @ccid2hctx_lastrtt -time RTT was last measured
37 * @ccid2hctx_arsent - packets sent [ack ratio]
38 * @ccid2hctx_ackloss - ack was lost in this win
39 * @ccid2hctx_rpseq - last consecutive seqno
40 * @ccid2hctx_rpdupack - dupacks since rpseq
41*/
42struct ccid2_hc_tx_sock {
43 int ccid2hctx_cwnd;
44 int ccid2hctx_ssacks;
45 int ccid2hctx_acks;
46 int ccid2hctx_ssthresh;
47 int ccid2hctx_pipe;
48 int ccid2hctx_numdupack;
49 struct ccid2_seq *ccid2hctx_seqbuf;
50 struct ccid2_seq *ccid2hctx_seqh;
51 struct ccid2_seq *ccid2hctx_seqt;
52 long ccid2hctx_rto;
53 long ccid2hctx_srtt;
54 long ccid2hctx_rttvar;
55 int ccid2hctx_sent;
56 unsigned long ccid2hctx_lastrtt;
57 struct timer_list ccid2hctx_rtotimer;
58 unsigned long ccid2hctx_arsent;
59 int ccid2hctx_ackloss;
60 u64 ccid2hctx_rpseq;
61 int ccid2hctx_rpdupack;
62 int ccid2hctx_sendwait;
63};
64
65struct ccid2_hc_rx_sock {
66 int ccid2hcrx_data;
67};
68
69#endif /* _DCCP_CCID2_H_ */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2ab6f0e6cd62..38321ad81875 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1081,6 +1081,7 @@ int dccp_v4_init_sock(struct sock *sk)
1081 dp->dccps_mss_cache = 536; 1081 dp->dccps_mss_cache = 536;
1082 dp->dccps_role = DCCP_ROLE_UNDEFINED; 1082 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1083 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; 1083 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
1084 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
1084 1085
1085 return 0; 1086 return 0;
1086} 1087}