diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-28 15:49:40 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-28 15:49:40 -0500 |
commit | 0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch) | |
tree | 454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/ipv4/tcp_cubic.c | |
parent | 54a696bd07c14d3b1192d03ce7269bc59b45209a (diff) | |
parent | eb56092fc168bf5af199d47af50c0d84a96db898 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits)
net: Allow dependancies of FDDI & Tokenring to be modular.
igb: Fix build warning when DCA is disabled.
net: Fix warning fallout from recent NAPI interface changes.
gro: Fix potential use after free
sfc: If AN is enabled, always read speed/duplex from the AN advertising bits
sfc: When disabling the NIC, close the device rather than unregistering it
sfc: SFT9001: Add cable diagnostics
sfc: Add support for multiple PHY self-tests
sfc: Merge top-level functions for self-tests
sfc: Clean up PHY mode management in loopback self-test
sfc: Fix unreliable link detection in some loopback modes
sfc: Generate unique names for per-NIC workqueues
802.3ad: use standard ethhdr instead of ad_header
802.3ad: generalize out mac address initializer
802.3ad: initialize ports LACPDU from const initializer
802.3ad: remove typedef around ad_system
802.3ad: turn ports is_individual into a bool
802.3ad: turn ports is_enabled into a bool
802.3ad: make ntt bool
ixgbe: Fix set_ringparam in ixgbe to use the same memory pools.
...
Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due
to the conversion to %pI (in this networking merge) and the addition of
doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/ipv4/tcp_cubic.c')
-rw-r--r-- | net/ipv4/tcp_cubic.c | 120 |
1 files changed, 109 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a1221e5e8ee..ee467ec40c4f 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -1,13 +1,23 @@ | |||
1 | /* | 1 | /* |
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 | 2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 |
3 | * Home page: | 3 | * Home page: |
4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC | 4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC |
5 | * This is from the implementation of CUBIC TCP in | 5 | * This is from the implementation of CUBIC TCP in |
6 | * Injong Rhee, Lisong Xu. | 6 | * Sangtae Ha, Injong Rhee and Lisong Xu, |
7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant | 7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" |
8 | * in PFLDnet 2005 | 8 | * in ACM SIGOPS Operating System Review, July 2008. |
9 | * Available from: | 9 | * Available from: |
10 | * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf | 10 | * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf |
11 | * | ||
12 | * CUBIC integrates a new slow start algorithm, called HyStart. | ||
13 | * The details of HyStart are presented in | ||
14 | * Sangtae Ha and Injong Rhee, | ||
15 | * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. | ||
16 | * Available from: | ||
17 | * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf | ||
18 | * | ||
19 | * All testing results are available from: | ||
20 | * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing | ||
11 | * | 21 | * |
12 | * Unless CUBIC is enabled and congestion window is large | 22 | * Unless CUBIC is enabled and congestion window is large |
13 | * this behaves the same as the original Reno. | 23 | * this behaves the same as the original Reno. |
@@ -23,12 +33,26 @@ | |||
23 | */ | 33 | */ |
24 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ | 34 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
25 | 35 | ||
36 | /* Two methods of hybrid slow start */ | ||
37 | #define HYSTART_ACK_TRAIN 0x1 | ||
38 | #define HYSTART_DELAY 0x2 | ||
39 | |||
40 | /* Number of delay samples for detecting the increase of delay */ | ||
41 | #define HYSTART_MIN_SAMPLES 8 | ||
42 | #define HYSTART_DELAY_MIN (2U<<3) | ||
43 | #define HYSTART_DELAY_MAX (16U<<3) | ||
44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) | ||
45 | |||
26 | static int fast_convergence __read_mostly = 1; | 46 | static int fast_convergence __read_mostly = 1; |
27 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ | 47 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
28 | static int initial_ssthresh __read_mostly; | 48 | static int initial_ssthresh __read_mostly; |
29 | static int bic_scale __read_mostly = 41; | 49 | static int bic_scale __read_mostly = 41; |
30 | static int tcp_friendliness __read_mostly = 1; | 50 | static int tcp_friendliness __read_mostly = 1; |
31 | 51 | ||
52 | static int hystart __read_mostly = 1; | ||
53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; | ||
54 | static int hystart_low_window __read_mostly = 16; | ||
55 | |||
32 | static u32 cube_rtt_scale __read_mostly; | 56 | static u32 cube_rtt_scale __read_mostly; |
33 | static u32 beta_scale __read_mostly; | 57 | static u32 beta_scale __read_mostly; |
34 | static u64 cube_factor __read_mostly; | 58 | static u64 cube_factor __read_mostly; |
@@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444); | |||
44 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); | 68 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); |
45 | module_param(tcp_friendliness, int, 0644); | 69 | module_param(tcp_friendliness, int, 0644); |
46 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | 70 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
71 | module_param(hystart, int, 0644); | ||
72 | MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); | ||
73 | module_param(hystart_detect, int, 0644); | ||
74 | MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" | ||
75 | " 1: packet-train 2: delay 3: both packet-train and delay"); | ||
76 | module_param(hystart_low_window, int, 0644); | ||
77 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); | ||
47 | 78 | ||
48 | /* BIC TCP Parameters */ | 79 | /* BIC TCP Parameters */ |
49 | struct bictcp { | 80 | struct bictcp { |
@@ -59,7 +90,13 @@ struct bictcp { | |||
59 | u32 ack_cnt; /* number of acks */ | 90 | u32 ack_cnt; /* number of acks */ |
60 | u32 tcp_cwnd; /* estimated tcp cwnd */ | 91 | u32 tcp_cwnd; /* estimated tcp cwnd */ |
61 | #define ACK_RATIO_SHIFT 4 | 92 | #define ACK_RATIO_SHIFT 4 |
62 | u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | 93 | u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
94 | u8 sample_cnt; /* number of samples to decide curr_rtt */ | ||
95 | u8 found; /* the exit point is found? */ | ||
96 | u32 round_start; /* beginning of each round */ | ||
97 | u32 end_seq; /* end_seq of the round */ | ||
98 | u32 last_jiffies; /* last time when the ACK spacing is close */ | ||
99 | u32 curr_rtt; /* the minimum rtt of current round */ | ||
63 | }; | 100 | }; |
64 | 101 | ||
65 | static inline void bictcp_reset(struct bictcp *ca) | 102 | static inline void bictcp_reset(struct bictcp *ca) |
@@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
76 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | 113 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
77 | ca->ack_cnt = 0; | 114 | ca->ack_cnt = 0; |
78 | ca->tcp_cwnd = 0; | 115 | ca->tcp_cwnd = 0; |
116 | ca->found = 0; | ||
117 | } | ||
118 | |||
119 | static inline void bictcp_hystart_reset(struct sock *sk) | ||
120 | { | ||
121 | struct tcp_sock *tp = tcp_sk(sk); | ||
122 | struct bictcp *ca = inet_csk_ca(sk); | ||
123 | |||
124 | ca->round_start = ca->last_jiffies = jiffies; | ||
125 | ca->end_seq = tp->snd_nxt; | ||
126 | ca->curr_rtt = 0; | ||
127 | ca->sample_cnt = 0; | ||
79 | } | 128 | } |
80 | 129 | ||
81 | static void bictcp_init(struct sock *sk) | 130 | static void bictcp_init(struct sock *sk) |
82 | { | 131 | { |
83 | bictcp_reset(inet_csk_ca(sk)); | 132 | bictcp_reset(inet_csk_ca(sk)); |
84 | if (initial_ssthresh) | 133 | |
134 | if (hystart) | ||
135 | bictcp_hystart_reset(sk); | ||
136 | |||
137 | if (!hystart && initial_ssthresh) | ||
85 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 138 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
86 | } | 139 | } |
87 | 140 | ||
@@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
235 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 288 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
236 | return; | 289 | return; |
237 | 290 | ||
238 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 291 | if (tp->snd_cwnd <= tp->snd_ssthresh) { |
292 | if (hystart && after(ack, ca->end_seq)) | ||
293 | bictcp_hystart_reset(sk); | ||
239 | tcp_slow_start(tp); | 294 | tcp_slow_start(tp); |
240 | else { | 295 | } else { |
241 | bictcp_update(ca, tp->snd_cwnd); | 296 | bictcp_update(ca, tp->snd_cwnd); |
242 | 297 | ||
243 | /* In dangerous area, increase slowly. | 298 | /* In dangerous area, increase slowly. |
@@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
281 | 336 | ||
282 | static void bictcp_state(struct sock *sk, u8 new_state) | 337 | static void bictcp_state(struct sock *sk, u8 new_state) |
283 | { | 338 | { |
284 | if (new_state == TCP_CA_Loss) | 339 | if (new_state == TCP_CA_Loss) { |
285 | bictcp_reset(inet_csk_ca(sk)); | 340 | bictcp_reset(inet_csk_ca(sk)); |
341 | bictcp_hystart_reset(sk); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | static void hystart_update(struct sock *sk, u32 delay) | ||
346 | { | ||
347 | struct tcp_sock *tp = tcp_sk(sk); | ||
348 | struct bictcp *ca = inet_csk_ca(sk); | ||
349 | |||
350 | if (!(ca->found & hystart_detect)) { | ||
351 | u32 curr_jiffies = jiffies; | ||
352 | |||
353 | /* first detection parameter - ack-train detection */ | ||
354 | if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { | ||
355 | ca->last_jiffies = curr_jiffies; | ||
356 | if (curr_jiffies - ca->round_start >= ca->delay_min>>4) | ||
357 | ca->found |= HYSTART_ACK_TRAIN; | ||
358 | } | ||
359 | |||
360 | /* obtain the minimum delay of more than sampling packets */ | ||
361 | if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { | ||
362 | if (ca->curr_rtt == 0 || ca->curr_rtt > delay) | ||
363 | ca->curr_rtt = delay; | ||
364 | |||
365 | ca->sample_cnt++; | ||
366 | } else { | ||
367 | if (ca->curr_rtt > ca->delay_min + | ||
368 | HYSTART_DELAY_THRESH(ca->delay_min>>4)) | ||
369 | ca->found |= HYSTART_DELAY; | ||
370 | } | ||
371 | /* | ||
372 | * Either one of two conditions are met, | ||
373 | * we exit from slow start immediately. | ||
374 | */ | ||
375 | if (ca->found & hystart_detect) | ||
376 | tp->snd_ssthresh = tp->snd_cwnd; | ||
377 | } | ||
286 | } | 378 | } |
287 | 379 | ||
288 | /* Track delayed acknowledgment ratio using sliding window | 380 | /* Track delayed acknowledgment ratio using sliding window |
@@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) | |||
291 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | 383 | static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) |
292 | { | 384 | { |
293 | const struct inet_connection_sock *icsk = inet_csk(sk); | 385 | const struct inet_connection_sock *icsk = inet_csk(sk); |
386 | const struct tcp_sock *tp = tcp_sk(sk); | ||
294 | struct bictcp *ca = inet_csk_ca(sk); | 387 | struct bictcp *ca = inet_csk_ca(sk); |
295 | u32 delay; | 388 | u32 delay; |
296 | 389 | ||
@@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
314 | /* first time call or link delay decreases */ | 407 | /* first time call or link delay decreases */ |
315 | if (ca->delay_min == 0 || ca->delay_min > delay) | 408 | if (ca->delay_min == 0 || ca->delay_min > delay) |
316 | ca->delay_min = delay; | 409 | ca->delay_min = delay; |
410 | |||
411 | /* hystart triggers when cwnd is larger than some threshold */ | ||
412 | if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && | ||
413 | tp->snd_cwnd >= hystart_low_window) | ||
414 | hystart_update(sk, delay); | ||
317 | } | 415 | } |
318 | 416 | ||
319 | static struct tcp_congestion_ops cubictcp = { | 417 | static struct tcp_congestion_ops cubictcp = { |
@@ -372,4 +470,4 @@ module_exit(cubictcp_unregister); | |||
372 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | 470 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
373 | MODULE_LICENSE("GPL"); | 471 | MODULE_LICENSE("GPL"); |
374 | MODULE_DESCRIPTION("CUBIC TCP"); | 472 | MODULE_DESCRIPTION("CUBIC TCP"); |
375 | MODULE_VERSION("2.2"); | 473 | MODULE_VERSION("2.3"); |