aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_illinois.c298
1 files changed, 186 insertions, 112 deletions
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 91a2a34604cd..ae6298600886 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -23,74 +23,106 @@
23#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ 23#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
24#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ 24#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
25#define ALPHA_BASE ALPHA_SCALE /* 1.0 */ 25#define ALPHA_BASE ALPHA_SCALE /* 1.0 */
26#define U32_MAX ((u32)~0U)
27#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
26 28
27#define BETA_SHIFT 6 29#define BETA_SHIFT 6
28#define BETA_SCALE (1u<<BETA_SHIFT) 30#define BETA_SCALE (1u<<BETA_SHIFT)
29#define BETA_MIN (BETA_SCALE/8) /* 0.8 */ 31#define BETA_MIN (BETA_SCALE/8) /* 0.125 */
30#define BETA_MAX (BETA_SCALE/2) 32#define BETA_MAX (BETA_SCALE/2) /* 0.5 */
31#define BETA_BASE BETA_MAX /* 0.5 */ 33#define BETA_BASE BETA_MAX
32
33#define THETA 5
34 34
35static int win_thresh __read_mostly = 15; 35static int win_thresh __read_mostly = 15;
36module_param(win_thresh, int, 0644); 36module_param(win_thresh, int, 0);
37MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); 37MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
38 38
39#define MAX_RTT 0x7fffffff 39static int theta __read_mostly = 5;
40module_param(theta, int, 0);
41MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
40 42
41/* TCP Illinois Parameters */ 43/* TCP Illinois Parameters */
42struct tcp_illinois { 44struct illinois {
43 u32 last_alpha; 45 u64 sum_rtt; /* sum of rtt's measured within last rtt */
44 u32 min_rtt; 46 u16 cnt_rtt; /* # of rtts measured within last rtt */
45 u32 max_rtt; 47 u32 base_rtt; /* min of all rtt in usec */
46 u32 rtt_low; 48 u32 max_rtt; /* max of all rtt in usec */
47 u32 rtt_cnt; 49 u32 end_seq; /* right edge of current RTT */
48 u64 sum_rtt; 50 u32 alpha; /* Additive increase */
51 u32 beta; /* Muliplicative decrease */
52 u16 acked; /* # packets acked by current ACK */
53 u8 rtt_above; /* average rtt has gone above threshold */
54 u8 rtt_low; /* # of rtts measurements below threshold */
49}; 55};
50 56
57static void rtt_reset(struct sock *sk)
58{
59 struct tcp_sock *tp = tcp_sk(sk);
60 struct illinois *ca = inet_csk_ca(sk);
61
62 ca->end_seq = tp->snd_nxt;
63 ca->cnt_rtt = 0;
64 ca->sum_rtt = 0;
65
66 /* TODO: age max_rtt? */
67}
68
51static void tcp_illinois_init(struct sock *sk) 69static void tcp_illinois_init(struct sock *sk)
52{ 70{
53 struct tcp_illinois *ca = inet_csk_ca(sk); 71 struct illinois *ca = inet_csk_ca(sk);
72
73 ca->alpha = ALPHA_MAX;
74 ca->beta = BETA_BASE;
75 ca->base_rtt = 0x7fffffff;
76 ca->max_rtt = 0;
77
78 ca->acked = 0;
79 ca->rtt_low = 0;
80 ca->rtt_above = 0;
54 81
55 ca->last_alpha = ALPHA_BASE; 82 rtt_reset(sk);
56 ca->min_rtt = 0x7fffffff;
57} 83}
58 84
59/* 85/* Measure RTT for each ack. */
60 * Keep track of min, max and average RTT 86static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
61 */
62static void tcp_illinois_rtt_calc(struct sock *sk, u32 rtt)
63{ 87{
64 struct tcp_illinois *ca = inet_csk_ca(sk); 88 struct illinois *ca = inet_csk_ca(sk);
89
90 /* ignore bogus values, this prevents wraparound in alpha math */
91 if (rtt > RTT_MAX)
92 rtt = RTT_MAX;
65 93
66 if (rtt < ca->min_rtt) 94 /* keep track of minimum RTT seen so far */
67 ca->min_rtt = rtt; 95 if (ca->base_rtt > rtt)
68 if (rtt > ca->max_rtt) 96 ca->base_rtt = rtt;
97
98 /* and max */
99 if (ca->max_rtt < rtt)
69 ca->max_rtt = rtt; 100 ca->max_rtt = rtt;
70 101
71 if (++ca->rtt_cnt == 1) 102 ++ca->cnt_rtt;
72 ca->sum_rtt = rtt; 103 ca->sum_rtt += rtt;
73 else
74 ca->sum_rtt += rtt;
75} 104}
76 105
77/* max queuing delay */ 106/* Capture count of packets covered by ack, to adjust for delayed acks */
78static inline u32 max_delay(const struct tcp_illinois *ca) 107static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
79{ 108{
80 return ca->max_rtt - ca->min_rtt; 109 struct illinois *ca = inet_csk_ca(sk);
110 ca->acked = pkts_acked;
81} 111}
82 112
83/* average queueing delay */ 113/* Maximum queuing delay */
84static u32 avg_delay(struct tcp_illinois *ca) 114static inline u32 max_delay(const struct illinois *ca)
85{ 115{
86 u64 avg_rtt = ca->sum_rtt; 116 return ca->max_rtt - ca->base_rtt;
87 117}
88 do_div(avg_rtt, ca->rtt_cnt);
89 118
90 ca->sum_rtt = 0; 119/* Average queuing delay */
91 ca->rtt_cnt = 0; 120static inline u32 avg_delay(const struct illinois *ca)
121{
122 u64 t = ca->sum_rtt;
92 123
93 return avg_rtt - ca->min_rtt; 124 do_div(t, ca->cnt_rtt);
125 return t - ca->base_rtt;
94} 126}
95 127
96/* 128/*
@@ -101,32 +133,31 @@ static u32 avg_delay(struct tcp_illinois *ca)
101 * A. If average delay is at minimum (we are uncongested), 133 * A. If average delay is at minimum (we are uncongested),
102 * then use large alpha (10.0) to increase faster. 134 * then use large alpha (10.0) to increase faster.
103 * B. If average delay is at maximum (getting congested) 135 * B. If average delay is at maximum (getting congested)
104 * then use small alpha (1.0) 136 * then use small alpha (0.3)
105 * 137 *
106 * The result is a convex window growth curve. 138 * The result is a convex window growth curve.
107 */ 139 */
108static u32 alpha(const struct sock *sk) 140static u32 alpha(struct illinois *ca, u32 da, u32 dm)
109{ 141{
110 struct tcp_sock *tp = tcp_sk(sk); 142 u32 d1 = dm / 100; /* Low threshold */
111 struct tcp_illinois *ca = inet_csk_ca(sk);
112 u32 dm = max_delay(ca);
113 u32 da = avg_delay(ca);
114 u32 d1, a;
115 143
116 if (tp->snd_cwnd < win_thresh)
117 return ALPHA_BASE; /* same as Reno (1.0) */
118
119 d1 = dm / 100;
120 if (da <= d1) { 144 if (da <= d1) {
121 /* Don't let noise force agressive response */ 145 /* If never got out of low delay zone, then use max */
122 if (ca->rtt_low < THETA) { 146 if (!ca->rtt_above)
123 ++ca->rtt_low;
124 return ca->last_alpha;
125 } else
126 return ALPHA_MAX; 147 return ALPHA_MAX;
148
149 /* Wait for 5 good RTT's before allowing alpha to go alpha max.
150 * This prevents one good RTT from causing sudden window increase.
151 */
152 if (++ca->rtt_low < theta)
153 return ca->alpha;
154
155 ca->rtt_low = 0;
156 ca->rtt_above = 0;
157 return ALPHA_MAX;
127 } 158 }
128 159
129 ca->rtt_low = 0; 160 ca->rtt_above = 1;
130 161
131 /* 162 /*
132 * Based on: 163 * Based on:
@@ -146,37 +177,8 @@ static u32 alpha(const struct sock *sk)
146 177
147 dm -= d1; 178 dm -= d1;
148 da -= d1; 179 da -= d1;
149 180 return (dm * ALPHA_MAX) /
150 a = (dm * ALPHA_MAX) / (dm - (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); 181 (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
151 ca->last_alpha = a;
152 return a;
153}
154
155/*
156 * Increase window in response to successful acknowledgment.
157 */
158static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
159 u32 in_flight, int flag)
160{
161 struct tcp_sock *tp = tcp_sk(sk);
162
163 /* RFC2861 only increase cwnd if fully utilized */
164 if (!tcp_is_cwnd_limited(sk, in_flight))
165 return;
166
167 /* In slow start */
168 if (tp->snd_cwnd <= tp->snd_ssthresh)
169 tcp_slow_start(tp);
170
171 else {
172 /* additive increase cwnd += alpha / cwnd */
173 if ((tp->snd_cwnd_cnt * alpha(sk)) >> ALPHA_SHIFT >= tp->snd_cwnd) {
174 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
175 tp->snd_cwnd++;
176 tp->snd_cwnd_cnt = 0;
177 } else
178 tp->snd_cwnd_cnt++;
179 }
180} 182}
181 183
182/* 184/*
@@ -187,20 +189,14 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
187 * If delay is up to 80% of max then beta = 1/2 189 * If delay is up to 80% of max then beta = 1/2
188 * In between is a linear function 190 * In between is a linear function
189 */ 191 */
190static inline u32 beta(struct sock *sk) 192static u32 beta(u32 da, u32 dm)
191{ 193{
192 struct tcp_sock *tp = tcp_sk(sk);
193 struct tcp_illinois *ca = inet_csk_ca(sk);
194 u32 dm = max_delay(ca);
195 u32 da = avg_delay(ca);
196 u32 d2, d3; 194 u32 d2, d3;
197 195
198 if (tp->snd_cwnd < win_thresh)
199 return BETA_BASE;
200
201 d2 = dm / 10; 196 d2 = dm / 10;
202 if (da <= d2) 197 if (da <= d2)
203 return BETA_MIN; 198 return BETA_MIN;
199
204 d3 = (8 * dm) / 10; 200 d3 = (8 * dm) / 10;
205 if (da >= d3 || d3 <= d2) 201 if (da >= d3 || d3 <= d2)
206 return BETA_MAX; 202 return BETA_MAX;
@@ -222,31 +218,107 @@ static inline u32 beta(struct sock *sk)
222 / (d3 - d2); 218 / (d3 - d2);
223} 219}
224 220
221/* Update alpha and beta values once per RTT */
222static void update_params(struct sock *sk)
223{
224 struct tcp_sock *tp = tcp_sk(sk);
225 struct illinois *ca = inet_csk_ca(sk);
226
227 if (tp->snd_cwnd < win_thresh) {
228 ca->alpha = ALPHA_BASE;
229 ca->beta = BETA_BASE;
230 } else if (ca->cnt_rtt > 0) {
231 u32 dm = max_delay(ca);
232 u32 da = avg_delay(ca);
233
234 ca->alpha = alpha(ca, da, dm);
235 ca->beta = beta(da, dm);
236 }
237
238 rtt_reset(sk);
239}
240
241/*
242 * In case of loss, reset to default values
243 */
244static void tcp_illinois_state(struct sock *sk, u8 new_state)
245{
246 struct illinois *ca = inet_csk_ca(sk);
247
248 if (new_state == TCP_CA_Loss) {
249 ca->alpha = ALPHA_BASE;
250 ca->beta = BETA_BASE;
251 ca->rtt_low = 0;
252 ca->rtt_above = 0;
253 rtt_reset(sk);
254 }
255}
256
257/*
258 * Increase window in response to successful acknowledgment.
259 */
260static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
261 u32 in_flight, int flag)
262{
263 struct tcp_sock *tp = tcp_sk(sk);
264 struct illinois *ca = inet_csk_ca(sk);
265
266 if (after(ack, ca->end_seq))
267 update_params(sk);
268
269 /* RFC2861 only increase cwnd if fully utilized */
270 if (!tcp_is_cwnd_limited(sk, in_flight))
271 return;
272
273 /* In slow start */
274 if (tp->snd_cwnd <= tp->snd_ssthresh)
275 tcp_slow_start(tp);
276
277 else {
278 u32 delta;
279
280 /* snd_cwnd_cnt is # of packets since last cwnd increment */
281 tp->snd_cwnd_cnt += ca->acked;
282 ca->acked = 1;
283
284 /* This is close approximation of:
285 * tp->snd_cwnd += alpha/tp->snd_cwnd
286 */
287 delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
288 if (delta >= tp->snd_cwnd) {
289 tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
290 (u32) tp->snd_cwnd_clamp);
291 tp->snd_cwnd_cnt = 0;
292 }
293 }
294}
295
225static u32 tcp_illinois_ssthresh(struct sock *sk) 296static u32 tcp_illinois_ssthresh(struct sock *sk)
226{ 297{
227 struct tcp_sock *tp = tcp_sk(sk); 298 struct tcp_sock *tp = tcp_sk(sk);
299 struct illinois *ca = inet_csk_ca(sk);
228 300
229 /* Multiplicative decrease */ 301 /* Multiplicative decrease */
230 return max((tp->snd_cwnd * beta(sk)) >> BETA_SHIFT, 2U); 302 return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
231} 303}
232 304
233/* Extract info for TCP socket info provided via netlink. 305
234 * We aren't really doing Vegas, but we can provide RTT info 306/* Extract info for Tcp socket info provided via netlink. */
235 */ 307static void tcp_illinois_info(struct sock *sk, u32 ext,
236static void tcp_illinois_get_info(struct sock *sk, u32 ext, 308 struct sk_buff *skb)
237 struct sk_buff *skb)
238{ 309{
239 const struct tcp_illinois *ca = inet_csk_ca(sk); 310 const struct illinois *ca = inet_csk_ca(sk);
240 311
241 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 312 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
242 struct tcpvegas_info info = { 313 struct tcpvegas_info info = {
243 .tcpv_enabled = 1, 314 .tcpv_enabled = 1,
244 .tcpv_rttcnt = ca->rtt_cnt, 315 .tcpv_rttcnt = ca->cnt_rtt,
245 .tcpv_minrtt = ca->min_rtt, 316 .tcpv_minrtt = ca->base_rtt,
246 }; 317 };
247 u64 avg_rtt = ca->sum_rtt; 318 u64 t = ca->sum_rtt;
248 do_div(avg_rtt, ca->rtt_cnt); 319
249 info.tcpv_rtt = avg_rtt; 320 do_div(t, ca->cnt_rtt);
321 info.tcpv_rtt = t;
250 322
251 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); 323 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
252 } 324 }
@@ -257,8 +329,10 @@ static struct tcp_congestion_ops tcp_illinois = {
257 .ssthresh = tcp_illinois_ssthresh, 329 .ssthresh = tcp_illinois_ssthresh,
258 .min_cwnd = tcp_reno_min_cwnd, 330 .min_cwnd = tcp_reno_min_cwnd,
259 .cong_avoid = tcp_illinois_cong_avoid, 331 .cong_avoid = tcp_illinois_cong_avoid,
260 .rtt_sample = tcp_illinois_rtt_calc, 332 .set_state = tcp_illinois_state,
261 .get_info = tcp_illinois_get_info, 333 .rtt_sample = tcp_illinois_rtt_sample,
334 .get_info = tcp_illinois_info,
335 .pkts_acked = tcp_illinois_acked,
262 336
263 .owner = THIS_MODULE, 337 .owner = THIS_MODULE,
264 .name = "illinois", 338 .name = "illinois",
@@ -266,7 +340,7 @@ static struct tcp_congestion_ops tcp_illinois = {
266 340
267static int __init tcp_illinois_register(void) 341static int __init tcp_illinois_register(void)
268{ 342{
269 BUILD_BUG_ON(sizeof(struct tcp_illinois) > ICSK_CA_PRIV_SIZE); 343 BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
270 return tcp_register_congestion_control(&tcp_illinois); 344 return tcp_register_congestion_control(&tcp_illinois);
271} 345}
272 346
@@ -281,4 +355,4 @@ module_exit(tcp_illinois_unregister);
281MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); 355MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
282MODULE_LICENSE("GPL"); 356MODULE_LICENSE("GPL");
283MODULE_DESCRIPTION("TCP Illinois"); 357MODULE_DESCRIPTION("TCP Illinois");
284MODULE_VERSION("0.3"); 358MODULE_VERSION("1.0");