aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_cubic.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/ipv4/tcp_cubic.c
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/ipv4/tcp_cubic.c')
-rw-r--r--net/ipv4/tcp_cubic.c120
1 files changed, 109 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4a1221e5e8ee..ee467ec40c4f 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,13 +1,23 @@
1/* 1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
3 * Home page: 3 * Home page:
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC 4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
5 * This is from the implementation of CUBIC TCP in 5 * This is from the implementation of CUBIC TCP in
6 * Injong Rhee, Lisong Xu. 6 * Sangtae Ha, Injong Rhee and Lisong Xu,
7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant 7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
8 * in PFLDnet 2005 8 * in ACM SIGOPS Operating System Review, July 2008.
9 * Available from: 9 * Available from:
10 * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf 10 * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
11 *
12 * CUBIC integrates a new slow start algorithm, called HyStart.
13 * The details of HyStart are presented in
14 * Sangtae Ha and Injong Rhee,
15 * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
16 * Available from:
17 * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
18 *
19 * All testing results are available from:
20 * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
11 * 21 *
12 * Unless CUBIC is enabled and congestion window is large 22 * Unless CUBIC is enabled and congestion window is large
13 * this behaves the same as the original Reno. 23 * this behaves the same as the original Reno.
@@ -23,12 +33,26 @@
23 */ 33 */
24#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ 34#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
25 35
36/* Two methods of hybrid slow start */
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40/* Number of delay samples for detecting the increase of delay */
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
26static int fast_convergence __read_mostly = 1; 46static int fast_convergence __read_mostly = 1;
27static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ 47static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
28static int initial_ssthresh __read_mostly; 48static int initial_ssthresh __read_mostly;
29static int bic_scale __read_mostly = 41; 49static int bic_scale __read_mostly = 41;
30static int tcp_friendliness __read_mostly = 1; 50static int tcp_friendliness __read_mostly = 1;
31 51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55
32static u32 cube_rtt_scale __read_mostly; 56static u32 cube_rtt_scale __read_mostly;
33static u32 beta_scale __read_mostly; 57static u32 beta_scale __read_mostly;
34static u64 cube_factor __read_mostly; 58static u64 cube_factor __read_mostly;
@@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444);
44MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); 68MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
45module_param(tcp_friendliness, int, 0644); 69module_param(tcp_friendliness, int, 0644);
46MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); 70MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
71module_param(hystart, int, 0644);
72MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
73module_param(hystart_detect, int, 0644);
74MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
47 78
48/* BIC TCP Parameters */ 79/* BIC TCP Parameters */
49struct bictcp { 80struct bictcp {
@@ -59,7 +90,13 @@ struct bictcp {
59 u32 ack_cnt; /* number of acks */ 90 u32 ack_cnt; /* number of acks */
60 u32 tcp_cwnd; /* estimated tcp cwnd */ 91 u32 tcp_cwnd; /* estimated tcp cwnd */
61#define ACK_RATIO_SHIFT 4 92#define ACK_RATIO_SHIFT 4
62 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ 93 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
94 u8 sample_cnt; /* number of samples to decide curr_rtt */
95 u8 found; /* the exit point is found? */
96 u32 round_start; /* beginning of each round */
97 u32 end_seq; /* end_seq of the round */
98 u32 last_jiffies; /* last time when the ACK spacing is close */
99 u32 curr_rtt; /* the minimum rtt of current round */
63}; 100};
64 101
65static inline void bictcp_reset(struct bictcp *ca) 102static inline void bictcp_reset(struct bictcp *ca)
@@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca)
76 ca->delayed_ack = 2 << ACK_RATIO_SHIFT; 113 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
77 ca->ack_cnt = 0; 114 ca->ack_cnt = 0;
78 ca->tcp_cwnd = 0; 115 ca->tcp_cwnd = 0;
116 ca->found = 0;
117}
118
119static inline void bictcp_hystart_reset(struct sock *sk)
120{
121 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk);
123
124 ca->round_start = ca->last_jiffies = jiffies;
125 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0;
127 ca->sample_cnt = 0;
79} 128}
80 129
81static void bictcp_init(struct sock *sk) 130static void bictcp_init(struct sock *sk)
82{ 131{
83 bictcp_reset(inet_csk_ca(sk)); 132 bictcp_reset(inet_csk_ca(sk));
84 if (initial_ssthresh) 133
134 if (hystart)
135 bictcp_hystart_reset(sk);
136
137 if (!hystart && initial_ssthresh)
85 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 138 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
86} 139}
87 140
@@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
235 if (!tcp_is_cwnd_limited(sk, in_flight)) 288 if (!tcp_is_cwnd_limited(sk, in_flight))
236 return; 289 return;
237 290
238 if (tp->snd_cwnd <= tp->snd_ssthresh) 291 if (tp->snd_cwnd <= tp->snd_ssthresh) {
292 if (hystart && after(ack, ca->end_seq))
293 bictcp_hystart_reset(sk);
239 tcp_slow_start(tp); 294 tcp_slow_start(tp);
240 else { 295 } else {
241 bictcp_update(ca, tp->snd_cwnd); 296 bictcp_update(ca, tp->snd_cwnd);
242 297
243 /* In dangerous area, increase slowly. 298 /* In dangerous area, increase slowly.
@@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
281 336
282static void bictcp_state(struct sock *sk, u8 new_state) 337static void bictcp_state(struct sock *sk, u8 new_state)
283{ 338{
284 if (new_state == TCP_CA_Loss) 339 if (new_state == TCP_CA_Loss) {
285 bictcp_reset(inet_csk_ca(sk)); 340 bictcp_reset(inet_csk_ca(sk));
341 bictcp_hystart_reset(sk);
342 }
343}
344
345static void hystart_update(struct sock *sk, u32 delay)
346{
347 struct tcp_sock *tp = tcp_sk(sk);
348 struct bictcp *ca = inet_csk_ca(sk);
349
350 if (!(ca->found & hystart_detect)) {
351 u32 curr_jiffies = jiffies;
352
353 /* first detection parameter - ack-train detection */
354 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
355 ca->last_jiffies = curr_jiffies;
356 if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
357 ca->found |= HYSTART_ACK_TRAIN;
358 }
359
360 /* obtain the minimum delay of more than sampling packets */
361 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
362 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
363 ca->curr_rtt = delay;
364
365 ca->sample_cnt++;
366 } else {
367 if (ca->curr_rtt > ca->delay_min +
368 HYSTART_DELAY_THRESH(ca->delay_min>>4))
369 ca->found |= HYSTART_DELAY;
370 }
371 /*
372 * Either one of two conditions are met,
373 * we exit from slow start immediately.
374 */
375 if (ca->found & hystart_detect)
376 tp->snd_ssthresh = tp->snd_cwnd;
377 }
286} 378}
287 379
288/* Track delayed acknowledgment ratio using sliding window 380/* Track delayed acknowledgment ratio using sliding window
@@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
291static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) 383static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
292{ 384{
293 const struct inet_connection_sock *icsk = inet_csk(sk); 385 const struct inet_connection_sock *icsk = inet_csk(sk);
386 const struct tcp_sock *tp = tcp_sk(sk);
294 struct bictcp *ca = inet_csk_ca(sk); 387 struct bictcp *ca = inet_csk_ca(sk);
295 u32 delay; 388 u32 delay;
296 389
@@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
314 /* first time call or link delay decreases */ 407 /* first time call or link delay decreases */
315 if (ca->delay_min == 0 || ca->delay_min > delay) 408 if (ca->delay_min == 0 || ca->delay_min > delay)
316 ca->delay_min = delay; 409 ca->delay_min = delay;
410
411 /* hystart triggers when cwnd is larger than some threshold */
412 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
413 tp->snd_cwnd >= hystart_low_window)
414 hystart_update(sk, delay);
317} 415}
318 416
319static struct tcp_congestion_ops cubictcp = { 417static struct tcp_congestion_ops cubictcp = {
@@ -372,4 +470,4 @@ module_exit(cubictcp_unregister);
372MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); 470MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
373MODULE_LICENSE("GPL"); 471MODULE_LICENSE("GPL");
374MODULE_DESCRIPTION("CUBIC TCP"); 472MODULE_DESCRIPTION("CUBIC TCP");
375MODULE_VERSION("2.2"); 473MODULE_VERSION("2.3");