diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp_illinois.c | 298 |
1 files changed, 186 insertions, 112 deletions
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 91a2a34604cd..ae6298600886 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
@@ -23,74 +23,106 @@ | |||
23 | #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ | 23 | #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ |
24 | #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ | 24 | #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ |
25 | #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ | 25 | #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ |
26 | #define U32_MAX ((u32)~0U) | ||
27 | #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ | ||
26 | 28 | ||
27 | #define BETA_SHIFT 6 | 29 | #define BETA_SHIFT 6 |
28 | #define BETA_SCALE (1u<<BETA_SHIFT) | 30 | #define BETA_SCALE (1u<<BETA_SHIFT) |
29 | #define BETA_MIN (BETA_SCALE/8) /* 0.8 */ | 31 | #define BETA_MIN (BETA_SCALE/8) /* 0.125 */ |
30 | #define BETA_MAX (BETA_SCALE/2) | 32 | #define BETA_MAX (BETA_SCALE/2) /* 0.5 */ |
31 | #define BETA_BASE BETA_MAX /* 0.5 */ | 33 | #define BETA_BASE BETA_MAX |
32 | |||
33 | #define THETA 5 | ||
34 | 34 | ||
35 | static int win_thresh __read_mostly = 15; | 35 | static int win_thresh __read_mostly = 15; |
36 | module_param(win_thresh, int, 0644); | 36 | module_param(win_thresh, int, 0); |
37 | MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); | 37 | MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); |
38 | 38 | ||
39 | #define MAX_RTT 0x7fffffff | 39 | static int theta __read_mostly = 5; |
40 | module_param(theta, int, 0); | ||
41 | MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); | ||
40 | 42 | ||
41 | /* TCP Illinois Parameters */ | 43 | /* TCP Illinois Parameters */ |
42 | struct tcp_illinois { | 44 | struct illinois { |
43 | u32 last_alpha; | 45 | u64 sum_rtt; /* sum of rtt's measured within last rtt */ |
44 | u32 min_rtt; | 46 | u16 cnt_rtt; /* # of rtts measured within last rtt */ |
45 | u32 max_rtt; | 47 | u32 base_rtt; /* min of all rtt in usec */ |
46 | u32 rtt_low; | 48 | u32 max_rtt; /* max of all rtt in usec */ |
47 | u32 rtt_cnt; | 49 | u32 end_seq; /* right edge of current RTT */ |
48 | u64 sum_rtt; | 50 | u32 alpha; /* Additive increase */ |
51 | u32 beta; /* Muliplicative decrease */ | ||
52 | u16 acked; /* # packets acked by current ACK */ | ||
53 | u8 rtt_above; /* average rtt has gone above threshold */ | ||
54 | u8 rtt_low; /* # of rtts measurements below threshold */ | ||
49 | }; | 55 | }; |
50 | 56 | ||
57 | static void rtt_reset(struct sock *sk) | ||
58 | { | ||
59 | struct tcp_sock *tp = tcp_sk(sk); | ||
60 | struct illinois *ca = inet_csk_ca(sk); | ||
61 | |||
62 | ca->end_seq = tp->snd_nxt; | ||
63 | ca->cnt_rtt = 0; | ||
64 | ca->sum_rtt = 0; | ||
65 | |||
66 | /* TODO: age max_rtt? */ | ||
67 | } | ||
68 | |||
51 | static void tcp_illinois_init(struct sock *sk) | 69 | static void tcp_illinois_init(struct sock *sk) |
52 | { | 70 | { |
53 | struct tcp_illinois *ca = inet_csk_ca(sk); | 71 | struct illinois *ca = inet_csk_ca(sk); |
72 | |||
73 | ca->alpha = ALPHA_MAX; | ||
74 | ca->beta = BETA_BASE; | ||
75 | ca->base_rtt = 0x7fffffff; | ||
76 | ca->max_rtt = 0; | ||
77 | |||
78 | ca->acked = 0; | ||
79 | ca->rtt_low = 0; | ||
80 | ca->rtt_above = 0; | ||
54 | 81 | ||
55 | ca->last_alpha = ALPHA_BASE; | 82 | rtt_reset(sk); |
56 | ca->min_rtt = 0x7fffffff; | ||
57 | } | 83 | } |
58 | 84 | ||
59 | /* | 85 | /* Measure RTT for each ack. */ |
60 | * Keep track of min, max and average RTT | 86 | static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt) |
61 | */ | ||
62 | static void tcp_illinois_rtt_calc(struct sock *sk, u32 rtt) | ||
63 | { | 87 | { |
64 | struct tcp_illinois *ca = inet_csk_ca(sk); | 88 | struct illinois *ca = inet_csk_ca(sk); |
89 | |||
90 | /* ignore bogus values, this prevents wraparound in alpha math */ | ||
91 | if (rtt > RTT_MAX) | ||
92 | rtt = RTT_MAX; | ||
65 | 93 | ||
66 | if (rtt < ca->min_rtt) | 94 | /* keep track of minimum RTT seen so far */ |
67 | ca->min_rtt = rtt; | 95 | if (ca->base_rtt > rtt) |
68 | if (rtt > ca->max_rtt) | 96 | ca->base_rtt = rtt; |
97 | |||
98 | /* and max */ | ||
99 | if (ca->max_rtt < rtt) | ||
69 | ca->max_rtt = rtt; | 100 | ca->max_rtt = rtt; |
70 | 101 | ||
71 | if (++ca->rtt_cnt == 1) | 102 | ++ca->cnt_rtt; |
72 | ca->sum_rtt = rtt; | 103 | ca->sum_rtt += rtt; |
73 | else | ||
74 | ca->sum_rtt += rtt; | ||
75 | } | 104 | } |
76 | 105 | ||
77 | /* max queuing delay */ | 106 | /* Capture count of packets covered by ack, to adjust for delayed acks */ |
78 | static inline u32 max_delay(const struct tcp_illinois *ca) | 107 | static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked) |
79 | { | 108 | { |
80 | return ca->max_rtt - ca->min_rtt; | 109 | struct illinois *ca = inet_csk_ca(sk); |
110 | ca->acked = pkts_acked; | ||
81 | } | 111 | } |
82 | 112 | ||
83 | /* average queueing delay */ | 113 | /* Maximum queuing delay */ |
84 | static u32 avg_delay(struct tcp_illinois *ca) | 114 | static inline u32 max_delay(const struct illinois *ca) |
85 | { | 115 | { |
86 | u64 avg_rtt = ca->sum_rtt; | 116 | return ca->max_rtt - ca->base_rtt; |
87 | 117 | } | |
88 | do_div(avg_rtt, ca->rtt_cnt); | ||
89 | 118 | ||
90 | ca->sum_rtt = 0; | 119 | /* Average queuing delay */ |
91 | ca->rtt_cnt = 0; | 120 | static inline u32 avg_delay(const struct illinois *ca) |
121 | { | ||
122 | u64 t = ca->sum_rtt; | ||
92 | 123 | ||
93 | return avg_rtt - ca->min_rtt; | 124 | do_div(t, ca->cnt_rtt); |
125 | return t - ca->base_rtt; | ||
94 | } | 126 | } |
95 | 127 | ||
96 | /* | 128 | /* |
@@ -101,32 +133,31 @@ static u32 avg_delay(struct tcp_illinois *ca) | |||
101 | * A. If average delay is at minimum (we are uncongested), | 133 | * A. If average delay is at minimum (we are uncongested), |
102 | * then use large alpha (10.0) to increase faster. | 134 | * then use large alpha (10.0) to increase faster. |
103 | * B. If average delay is at maximum (getting congested) | 135 | * B. If average delay is at maximum (getting congested) |
104 | * then use small alpha (1.0) | 136 | * then use small alpha (0.3) |
105 | * | 137 | * |
106 | * The result is a convex window growth curve. | 138 | * The result is a convex window growth curve. |
107 | */ | 139 | */ |
108 | static u32 alpha(const struct sock *sk) | 140 | static u32 alpha(struct illinois *ca, u32 da, u32 dm) |
109 | { | 141 | { |
110 | struct tcp_sock *tp = tcp_sk(sk); | 142 | u32 d1 = dm / 100; /* Low threshold */ |
111 | struct tcp_illinois *ca = inet_csk_ca(sk); | ||
112 | u32 dm = max_delay(ca); | ||
113 | u32 da = avg_delay(ca); | ||
114 | u32 d1, a; | ||
115 | 143 | ||
116 | if (tp->snd_cwnd < win_thresh) | ||
117 | return ALPHA_BASE; /* same as Reno (1.0) */ | ||
118 | |||
119 | d1 = dm / 100; | ||
120 | if (da <= d1) { | 144 | if (da <= d1) { |
121 | /* Don't let noise force agressive response */ | 145 | /* If never got out of low delay zone, then use max */ |
122 | if (ca->rtt_low < THETA) { | 146 | if (!ca->rtt_above) |
123 | ++ca->rtt_low; | ||
124 | return ca->last_alpha; | ||
125 | } else | ||
126 | return ALPHA_MAX; | 147 | return ALPHA_MAX; |
148 | |||
149 | /* Wait for 5 good RTT's before allowing alpha to go alpha max. | ||
150 | * This prevents one good RTT from causing sudden window increase. | ||
151 | */ | ||
152 | if (++ca->rtt_low < theta) | ||
153 | return ca->alpha; | ||
154 | |||
155 | ca->rtt_low = 0; | ||
156 | ca->rtt_above = 0; | ||
157 | return ALPHA_MAX; | ||
127 | } | 158 | } |
128 | 159 | ||
129 | ca->rtt_low = 0; | 160 | ca->rtt_above = 1; |
130 | 161 | ||
131 | /* | 162 | /* |
132 | * Based on: | 163 | * Based on: |
@@ -146,37 +177,8 @@ static u32 alpha(const struct sock *sk) | |||
146 | 177 | ||
147 | dm -= d1; | 178 | dm -= d1; |
148 | da -= d1; | 179 | da -= d1; |
149 | 180 | return (dm * ALPHA_MAX) / | |
150 | a = (dm * ALPHA_MAX) / (dm - (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); | 181 | (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); |
151 | ca->last_alpha = a; | ||
152 | return a; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Increase window in response to successful acknowledgment. | ||
157 | */ | ||
158 | static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | ||
159 | u32 in_flight, int flag) | ||
160 | { | ||
161 | struct tcp_sock *tp = tcp_sk(sk); | ||
162 | |||
163 | /* RFC2861 only increase cwnd if fully utilized */ | ||
164 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
165 | return; | ||
166 | |||
167 | /* In slow start */ | ||
168 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
169 | tcp_slow_start(tp); | ||
170 | |||
171 | else { | ||
172 | /* additive increase cwnd += alpha / cwnd */ | ||
173 | if ((tp->snd_cwnd_cnt * alpha(sk)) >> ALPHA_SHIFT >= tp->snd_cwnd) { | ||
174 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
175 | tp->snd_cwnd++; | ||
176 | tp->snd_cwnd_cnt = 0; | ||
177 | } else | ||
178 | tp->snd_cwnd_cnt++; | ||
179 | } | ||
180 | } | 182 | } |
181 | 183 | ||
182 | /* | 184 | /* |
@@ -187,20 +189,14 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
187 | * If delay is up to 80% of max then beta = 1/2 | 189 | * If delay is up to 80% of max then beta = 1/2 |
188 | * In between is a linear function | 190 | * In between is a linear function |
189 | */ | 191 | */ |
190 | static inline u32 beta(struct sock *sk) | 192 | static u32 beta(u32 da, u32 dm) |
191 | { | 193 | { |
192 | struct tcp_sock *tp = tcp_sk(sk); | ||
193 | struct tcp_illinois *ca = inet_csk_ca(sk); | ||
194 | u32 dm = max_delay(ca); | ||
195 | u32 da = avg_delay(ca); | ||
196 | u32 d2, d3; | 194 | u32 d2, d3; |
197 | 195 | ||
198 | if (tp->snd_cwnd < win_thresh) | ||
199 | return BETA_BASE; | ||
200 | |||
201 | d2 = dm / 10; | 196 | d2 = dm / 10; |
202 | if (da <= d2) | 197 | if (da <= d2) |
203 | return BETA_MIN; | 198 | return BETA_MIN; |
199 | |||
204 | d3 = (8 * dm) / 10; | 200 | d3 = (8 * dm) / 10; |
205 | if (da >= d3 || d3 <= d2) | 201 | if (da >= d3 || d3 <= d2) |
206 | return BETA_MAX; | 202 | return BETA_MAX; |
@@ -222,31 +218,107 @@ static inline u32 beta(struct sock *sk) | |||
222 | / (d3 - d2); | 218 | / (d3 - d2); |
223 | } | 219 | } |
224 | 220 | ||
221 | /* Update alpha and beta values once per RTT */ | ||
222 | static void update_params(struct sock *sk) | ||
223 | { | ||
224 | struct tcp_sock *tp = tcp_sk(sk); | ||
225 | struct illinois *ca = inet_csk_ca(sk); | ||
226 | |||
227 | if (tp->snd_cwnd < win_thresh) { | ||
228 | ca->alpha = ALPHA_BASE; | ||
229 | ca->beta = BETA_BASE; | ||
230 | } else if (ca->cnt_rtt > 0) { | ||
231 | u32 dm = max_delay(ca); | ||
232 | u32 da = avg_delay(ca); | ||
233 | |||
234 | ca->alpha = alpha(ca, da, dm); | ||
235 | ca->beta = beta(da, dm); | ||
236 | } | ||
237 | |||
238 | rtt_reset(sk); | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * In case of loss, reset to default values | ||
243 | */ | ||
244 | static void tcp_illinois_state(struct sock *sk, u8 new_state) | ||
245 | { | ||
246 | struct illinois *ca = inet_csk_ca(sk); | ||
247 | |||
248 | if (new_state == TCP_CA_Loss) { | ||
249 | ca->alpha = ALPHA_BASE; | ||
250 | ca->beta = BETA_BASE; | ||
251 | ca->rtt_low = 0; | ||
252 | ca->rtt_above = 0; | ||
253 | rtt_reset(sk); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * Increase window in response to successful acknowledgment. | ||
259 | */ | ||
260 | static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | ||
261 | u32 in_flight, int flag) | ||
262 | { | ||
263 | struct tcp_sock *tp = tcp_sk(sk); | ||
264 | struct illinois *ca = inet_csk_ca(sk); | ||
265 | |||
266 | if (after(ack, ca->end_seq)) | ||
267 | update_params(sk); | ||
268 | |||
269 | /* RFC2861 only increase cwnd if fully utilized */ | ||
270 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
271 | return; | ||
272 | |||
273 | /* In slow start */ | ||
274 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
275 | tcp_slow_start(tp); | ||
276 | |||
277 | else { | ||
278 | u32 delta; | ||
279 | |||
280 | /* snd_cwnd_cnt is # of packets since last cwnd increment */ | ||
281 | tp->snd_cwnd_cnt += ca->acked; | ||
282 | ca->acked = 1; | ||
283 | |||
284 | /* This is close approximation of: | ||
285 | * tp->snd_cwnd += alpha/tp->snd_cwnd | ||
286 | */ | ||
287 | delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; | ||
288 | if (delta >= tp->snd_cwnd) { | ||
289 | tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, | ||
290 | (u32) tp->snd_cwnd_clamp); | ||
291 | tp->snd_cwnd_cnt = 0; | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | |||
225 | static u32 tcp_illinois_ssthresh(struct sock *sk) | 296 | static u32 tcp_illinois_ssthresh(struct sock *sk) |
226 | { | 297 | { |
227 | struct tcp_sock *tp = tcp_sk(sk); | 298 | struct tcp_sock *tp = tcp_sk(sk); |
299 | struct illinois *ca = inet_csk_ca(sk); | ||
228 | 300 | ||
229 | /* Multiplicative decrease */ | 301 | /* Multiplicative decrease */ |
230 | return max((tp->snd_cwnd * beta(sk)) >> BETA_SHIFT, 2U); | 302 | return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U); |
231 | } | 303 | } |
232 | 304 | ||
233 | /* Extract info for TCP socket info provided via netlink. | 305 | |
234 | * We aren't really doing Vegas, but we can provide RTT info | 306 | /* Extract info for Tcp socket info provided via netlink. */ |
235 | */ | 307 | static void tcp_illinois_info(struct sock *sk, u32 ext, |
236 | static void tcp_illinois_get_info(struct sock *sk, u32 ext, | 308 | struct sk_buff *skb) |
237 | struct sk_buff *skb) | ||
238 | { | 309 | { |
239 | const struct tcp_illinois *ca = inet_csk_ca(sk); | 310 | const struct illinois *ca = inet_csk_ca(sk); |
240 | 311 | ||
241 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | 312 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
242 | struct tcpvegas_info info = { | 313 | struct tcpvegas_info info = { |
243 | .tcpv_enabled = 1, | 314 | .tcpv_enabled = 1, |
244 | .tcpv_rttcnt = ca->rtt_cnt, | 315 | .tcpv_rttcnt = ca->cnt_rtt, |
245 | .tcpv_minrtt = ca->min_rtt, | 316 | .tcpv_minrtt = ca->base_rtt, |
246 | }; | 317 | }; |
247 | u64 avg_rtt = ca->sum_rtt; | 318 | u64 t = ca->sum_rtt; |
248 | do_div(avg_rtt, ca->rtt_cnt); | 319 | |
249 | info.tcpv_rtt = avg_rtt; | 320 | do_div(t, ca->cnt_rtt); |
321 | info.tcpv_rtt = t; | ||
250 | 322 | ||
251 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); | 323 | nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); |
252 | } | 324 | } |
@@ -257,8 +329,10 @@ static struct tcp_congestion_ops tcp_illinois = { | |||
257 | .ssthresh = tcp_illinois_ssthresh, | 329 | .ssthresh = tcp_illinois_ssthresh, |
258 | .min_cwnd = tcp_reno_min_cwnd, | 330 | .min_cwnd = tcp_reno_min_cwnd, |
259 | .cong_avoid = tcp_illinois_cong_avoid, | 331 | .cong_avoid = tcp_illinois_cong_avoid, |
260 | .rtt_sample = tcp_illinois_rtt_calc, | 332 | .set_state = tcp_illinois_state, |
261 | .get_info = tcp_illinois_get_info, | 333 | .rtt_sample = tcp_illinois_rtt_sample, |
334 | .get_info = tcp_illinois_info, | ||
335 | .pkts_acked = tcp_illinois_acked, | ||
262 | 336 | ||
263 | .owner = THIS_MODULE, | 337 | .owner = THIS_MODULE, |
264 | .name = "illinois", | 338 | .name = "illinois", |
@@ -266,7 +340,7 @@ static struct tcp_congestion_ops tcp_illinois = { | |||
266 | 340 | ||
267 | static int __init tcp_illinois_register(void) | 341 | static int __init tcp_illinois_register(void) |
268 | { | 342 | { |
269 | BUILD_BUG_ON(sizeof(struct tcp_illinois) > ICSK_CA_PRIV_SIZE); | 343 | BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); |
270 | return tcp_register_congestion_control(&tcp_illinois); | 344 | return tcp_register_congestion_control(&tcp_illinois); |
271 | } | 345 | } |
272 | 346 | ||
@@ -281,4 +355,4 @@ module_exit(tcp_illinois_unregister); | |||
281 | MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); | 355 | MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); |
282 | MODULE_LICENSE("GPL"); | 356 | MODULE_LICENSE("GPL"); |
283 | MODULE_DESCRIPTION("TCP Illinois"); | 357 | MODULE_DESCRIPTION("TCP Illinois"); |
284 | MODULE_VERSION("0.3"); | 358 | MODULE_VERSION("1.0"); |