diff options
author | Lawrence Brakmo <brakmo@fb.com> | 2016-06-09 00:16:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-11 02:07:49 -0400 |
commit | 699fafafab6d765f12367b3ce0816e64ae19d1e8 (patch) | |
tree | f43744c64fd040388ab6c55c3edaa186654ad21b /net/ipv4/tcp_nv.c | |
parent | 6f094b9ec680209c5b7314feee983b2f4c910b1b (diff) |
tcp: add NV congestion control
TCP-NV (New Vegas) is a major update to TCP-Vegas.
An earlier version of NV was presented at 2010's LPC.
It is a delayed based congestion avoidance for the
data center. This version has been tested within a
10G rack where the HW RTTs are 20-50us and with
1 to 400 flows.
A description of TCP-NV, including implementation
details as well as experimental results, can be found at:
http://www.brakmo.org/networking/tcp-nv/TCPNV.html
Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_nv.c')
-rw-r--r-- | net/ipv4/tcp_nv.c | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c new file mode 100644 index 000000000000..5de82a8d4d87 --- /dev/null +++ b/net/ipv4/tcp_nv.c | |||
@@ -0,0 +1,476 @@ | |||
1 | /* | ||
2 | * TCP NV: TCP with Congestion Avoidance | ||
3 | * | ||
4 | * TCP-NV is a successor of TCP-Vegas that has been developed to | ||
5 | * deal with the issues that occur in modern networks. | ||
6 | * Like TCP-Vegas, TCP-NV supports true congestion avoidance, | ||
7 | * the ability to detect congestion before packet losses occur. | ||
8 | * When congestion (queue buildup) starts to occur, TCP-NV | ||
9 | * predicts what the cwnd size should be for the current | ||
10 | * throughput and it reduces the cwnd proportionally to | ||
11 | * the difference between the current cwnd and the predicted cwnd. | ||
12 | * | ||
13 | * NV is only recommeneded for traffic within a data center, and when | ||
14 | * all the flows are NV (at least those within the data center). This | ||
15 | * is due to the inherent unfairness between flows using losses to | ||
16 | * detect congestion (congestion control) and those that use queue | ||
17 | * buildup to detect congestion (congestion avoidance). | ||
18 | * | ||
19 | * Note: High NIC coalescence values may lower the performance of NV | ||
20 | * due to the increased noise in RTT values. In particular, we have | ||
21 | * seen issues with rx-frames values greater than 8. | ||
22 | * | ||
23 | * TODO: | ||
24 | * 1) Add mechanism to deal with reverse congestion. | ||
25 | */ | ||
26 | |||
27 | #include <linux/mm.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/math64.h> | ||
30 | #include <net/tcp.h> | ||
31 | #include <linux/inet_diag.h> | ||
32 | |||
33 | /* TCP NV parameters | ||
34 | * | ||
35 | * nv_pad Max number of queued packets allowed in network | ||
36 | * nv_pad_buffer Do not grow cwnd if this closed to nv_pad | ||
37 | * nv_reset_period How often (in) seconds)to reset min_rtt | ||
38 | * nv_min_cwnd Don't decrease cwnd below this if there are no losses | ||
39 | * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected | ||
40 | * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 | ||
41 | * nv_rtt_factor RTT averaging factor | ||
42 | * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur | ||
43 | * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd | ||
44 | * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd | ||
45 | * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping | ||
46 | * slow-start due to congestion | ||
47 | * nv_stop_rtt_cnt Only grow cwnd for this many RTTs after non-congestion | ||
48 | * nv_rtt_min_cnt Wait these many RTTs before making congesion decision | ||
49 | * nv_cwnd_growth_rate_neg | ||
50 | * nv_cwnd_growth_rate_pos | ||
51 | * How quickly to double growth rate (not rate) of cwnd when not | ||
52 | * congested. One value (nv_cwnd_growth_rate_neg) for when | ||
53 | * rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos) | ||
54 | * otherwise. | ||
55 | */ | ||
56 | |||
57 | static int nv_pad __read_mostly = 10; | ||
58 | static int nv_pad_buffer __read_mostly = 2; | ||
59 | static int nv_reset_period __read_mostly = 5; /* in seconds */ | ||
60 | static int nv_min_cwnd __read_mostly = 2; | ||
61 | static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ | ||
62 | static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ | ||
63 | static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ | ||
64 | static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ | ||
65 | static int nv_cwnd_growth_rate_neg __read_mostly = 8; | ||
66 | static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ | ||
67 | static int nv_dec_eval_min_calls __read_mostly = 60; | ||
68 | static int nv_inc_eval_min_calls __read_mostly = 20; | ||
69 | static int nv_ssthresh_eval_min_calls __read_mostly = 30; | ||
70 | static int nv_stop_rtt_cnt __read_mostly = 10; | ||
71 | static int nv_rtt_min_cnt __read_mostly = 2; | ||
72 | |||
73 | module_param(nv_pad, int, 0644); | ||
74 | MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network"); | ||
75 | module_param(nv_reset_period, int, 0644); | ||
76 | MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)"); | ||
77 | module_param(nv_min_cwnd, int, 0644); | ||
78 | MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value" | ||
79 | " without losses"); | ||
80 | |||
81 | /* TCP NV Parameters */ | ||
82 | struct tcpnv { | ||
83 | unsigned long nv_min_rtt_reset_jiffies; /* when to switch to | ||
84 | * nv_min_rtt_new */ | ||
85 | s8 cwnd_growth_factor; /* Current cwnd growth factor, | ||
86 | * < 0 => less than 1 packet/RTT */ | ||
87 | u8 available8; | ||
88 | u16 available16; | ||
89 | u32 loss_cwnd; /* cwnd at last loss */ | ||
90 | u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */ | ||
91 | nv_reset:1, /* whether to reset values */ | ||
92 | nv_catchup:1; /* whether we are growing because | ||
93 | * of temporary cwnd decrease */ | ||
94 | u8 nv_eval_call_cnt; /* call count since last eval */ | ||
95 | u8 nv_min_cwnd; /* nv won't make a ca decision if cwnd is | ||
96 | * smaller than this. It may grow to handle | ||
97 | * TSO, LRO and interrupt coalescence because | ||
98 | * with these a small cwnd cannot saturate | ||
99 | * the link. Note that this is different from | ||
100 | * the file local nv_min_cwnd */ | ||
101 | u8 nv_rtt_cnt; /* RTTs without making ca decision */; | ||
102 | u32 nv_last_rtt; /* last rtt */ | ||
103 | u32 nv_min_rtt; /* active min rtt. Used to determine slope */ | ||
104 | u32 nv_min_rtt_new; /* min rtt for future use */ | ||
105 | u32 nv_rtt_max_rate; /* max rate seen during current RTT */ | ||
106 | u32 nv_rtt_start_seq; /* current RTT ends when packet arrives | ||
107 | * acking beyond nv_rtt_start_seq */ | ||
108 | u32 nv_last_snd_una; /* Previous value of tp->snd_una. It is | ||
109 | * used to determine bytes acked since last | ||
110 | * call to bictcp_acked */ | ||
111 | u32 nv_no_cong_cnt; /* Consecutive no congestion decisions */ | ||
112 | }; | ||
113 | |||
114 | #define NV_INIT_RTT U32_MAX | ||
115 | #define NV_MIN_CWND 4 | ||
116 | #define NV_MIN_CWND_GROW 2 | ||
117 | #define NV_TSO_CWND_BOUND 80 | ||
118 | |||
119 | static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) | ||
120 | { | ||
121 | struct tcp_sock *tp = tcp_sk(sk); | ||
122 | |||
123 | ca->nv_reset = 0; | ||
124 | ca->loss_cwnd = 0; | ||
125 | ca->nv_no_cong_cnt = 0; | ||
126 | ca->nv_rtt_cnt = 0; | ||
127 | ca->nv_last_rtt = 0; | ||
128 | ca->nv_rtt_max_rate = 0; | ||
129 | ca->nv_rtt_start_seq = tp->snd_una; | ||
130 | ca->nv_eval_call_cnt = 0; | ||
131 | ca->nv_last_snd_una = tp->snd_una; | ||
132 | } | ||
133 | |||
134 | static void tcpnv_init(struct sock *sk) | ||
135 | { | ||
136 | struct tcpnv *ca = inet_csk_ca(sk); | ||
137 | |||
138 | tcpnv_reset(ca, sk); | ||
139 | |||
140 | ca->nv_allow_cwnd_growth = 1; | ||
141 | ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; | ||
142 | ca->nv_min_rtt = NV_INIT_RTT; | ||
143 | ca->nv_min_rtt_new = NV_INIT_RTT; | ||
144 | ca->nv_min_cwnd = NV_MIN_CWND; | ||
145 | ca->nv_catchup = 0; | ||
146 | ca->cwnd_growth_factor = 0; | ||
147 | } | ||
148 | |||
149 | static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) | ||
150 | { | ||
151 | struct tcp_sock *tp = tcp_sk(sk); | ||
152 | struct tcpnv *ca = inet_csk_ca(sk); | ||
153 | u32 cnt; | ||
154 | |||
155 | if (!tcp_is_cwnd_limited(sk)) | ||
156 | return; | ||
157 | |||
158 | /* Only grow cwnd if NV has not detected congestion */ | ||
159 | if (!ca->nv_allow_cwnd_growth) | ||
160 | return; | ||
161 | |||
162 | if (tcp_in_slow_start(tp)) { | ||
163 | acked = tcp_slow_start(tp, acked); | ||
164 | if (!acked) | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | if (ca->cwnd_growth_factor < 0) { | ||
169 | cnt = tp->snd_cwnd << -ca->cwnd_growth_factor; | ||
170 | tcp_cong_avoid_ai(tp, cnt, acked); | ||
171 | } else { | ||
172 | cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor); | ||
173 | tcp_cong_avoid_ai(tp, cnt, acked); | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static u32 tcpnv_recalc_ssthresh(struct sock *sk) | ||
178 | { | ||
179 | const struct tcp_sock *tp = tcp_sk(sk); | ||
180 | struct tcpnv *ca = inet_csk_ca(sk); | ||
181 | |||
182 | ca->loss_cwnd = tp->snd_cwnd; | ||
183 | return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U); | ||
184 | } | ||
185 | |||
186 | static u32 tcpnv_undo_cwnd(struct sock *sk) | ||
187 | { | ||
188 | struct tcpnv *ca = inet_csk_ca(sk); | ||
189 | |||
190 | return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); | ||
191 | } | ||
192 | |||
193 | static void tcpnv_state(struct sock *sk, u8 new_state) | ||
194 | { | ||
195 | struct tcpnv *ca = inet_csk_ca(sk); | ||
196 | |||
197 | if (new_state == TCP_CA_Open && ca->nv_reset) { | ||
198 | tcpnv_reset(ca, sk); | ||
199 | } else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR || | ||
200 | new_state == TCP_CA_Recovery) { | ||
201 | ca->nv_reset = 1; | ||
202 | ca->nv_allow_cwnd_growth = 0; | ||
203 | if (new_state == TCP_CA_Loss) { | ||
204 | /* Reset cwnd growth factor to Reno value */ | ||
205 | if (ca->cwnd_growth_factor > 0) | ||
206 | ca->cwnd_growth_factor = 0; | ||
207 | /* Decrease growth rate if allowed */ | ||
208 | if (nv_cwnd_growth_rate_neg > 0 && | ||
209 | ca->cwnd_growth_factor > -8) | ||
210 | ca->cwnd_growth_factor--; | ||
211 | } | ||
212 | } | ||
213 | } | ||
214 | |||
215 | /* Do congestion avoidance calculations for TCP-NV | ||
216 | */ | ||
217 | static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) | ||
218 | { | ||
219 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
220 | struct tcp_sock *tp = tcp_sk(sk); | ||
221 | struct tcpnv *ca = inet_csk_ca(sk); | ||
222 | unsigned long now = jiffies; | ||
223 | s64 rate64 = 0; | ||
224 | u32 rate, max_win, cwnd_by_slope; | ||
225 | u32 avg_rtt; | ||
226 | u32 bytes_acked = 0; | ||
227 | |||
228 | /* Some calls are for duplicates without timetamps */ | ||
229 | if (sample->rtt_us < 0) | ||
230 | return; | ||
231 | |||
232 | /* If not in TCP_CA_Open or TCP_CA_Disorder states, skip. */ | ||
233 | if (icsk->icsk_ca_state != TCP_CA_Open && | ||
234 | icsk->icsk_ca_state != TCP_CA_Disorder) | ||
235 | return; | ||
236 | |||
237 | /* Stop cwnd growth if we were in catch up mode */ | ||
238 | if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) { | ||
239 | ca->nv_catchup = 0; | ||
240 | ca->nv_allow_cwnd_growth = 0; | ||
241 | } | ||
242 | |||
243 | bytes_acked = tp->snd_una - ca->nv_last_snd_una; | ||
244 | ca->nv_last_snd_una = tp->snd_una; | ||
245 | |||
246 | if (sample->in_flight == 0) | ||
247 | return; | ||
248 | |||
249 | /* Calculate moving average of RTT */ | ||
250 | if (nv_rtt_factor > 0) { | ||
251 | if (ca->nv_last_rtt > 0) { | ||
252 | avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor + | ||
253 | ((u64)ca->nv_last_rtt) | ||
254 | * (256 - nv_rtt_factor)) >> 8; | ||
255 | } else { | ||
256 | avg_rtt = sample->rtt_us; | ||
257 | ca->nv_min_rtt = avg_rtt << 1; | ||
258 | } | ||
259 | ca->nv_last_rtt = avg_rtt; | ||
260 | } else { | ||
261 | avg_rtt = sample->rtt_us; | ||
262 | } | ||
263 | |||
264 | /* rate in 100's bits per second */ | ||
265 | rate64 = ((u64)sample->in_flight) * 8000000; | ||
266 | rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); | ||
267 | |||
268 | /* Remember the maximum rate seen during this RTT | ||
269 | * Note: It may be more than one RTT. This function should be | ||
270 | * called at least nv_dec_eval_min_calls times. | ||
271 | */ | ||
272 | if (ca->nv_rtt_max_rate < rate) | ||
273 | ca->nv_rtt_max_rate = rate; | ||
274 | |||
275 | /* We have valid information, increment counter */ | ||
276 | if (ca->nv_eval_call_cnt < 255) | ||
277 | ca->nv_eval_call_cnt++; | ||
278 | |||
279 | /* update min rtt if necessary */ | ||
280 | if (avg_rtt < ca->nv_min_rtt) | ||
281 | ca->nv_min_rtt = avg_rtt; | ||
282 | |||
283 | /* update future min_rtt if necessary */ | ||
284 | if (avg_rtt < ca->nv_min_rtt_new) | ||
285 | ca->nv_min_rtt_new = avg_rtt; | ||
286 | |||
287 | /* nv_min_rtt is updated with the minimum (possibley averaged) rtt | ||
288 | * seen in the last sysctl_tcp_nv_reset_period seconds (i.e. a | ||
289 | * warm reset). This new nv_min_rtt will be continued to be updated | ||
290 | * and be used for another sysctl_tcp_nv_reset_period seconds, | ||
291 | * when it will be updated again. | ||
292 | * In practice we introduce some randomness, so the actual period used | ||
293 | * is chosen randomly from the range: | ||
294 | * [sysctl_tcp_nv_reset_period*3/4, sysctl_tcp_nv_reset_period*5/4) | ||
295 | */ | ||
296 | if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) { | ||
297 | unsigned char rand; | ||
298 | |||
299 | ca->nv_min_rtt = ca->nv_min_rtt_new; | ||
300 | ca->nv_min_rtt_new = NV_INIT_RTT; | ||
301 | get_random_bytes(&rand, 1); | ||
302 | ca->nv_min_rtt_reset_jiffies = | ||
303 | now + ((nv_reset_period * (384 + rand) * HZ) >> 9); | ||
304 | /* Every so often we decrease ca->nv_min_cwnd in case previous | ||
305 | * value is no longer accurate. | ||
306 | */ | ||
307 | ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND); | ||
308 | } | ||
309 | |||
310 | /* Once per RTT check if we need to do congestion avoidance */ | ||
311 | if (before(ca->nv_rtt_start_seq, tp->snd_una)) { | ||
312 | ca->nv_rtt_start_seq = tp->snd_nxt; | ||
313 | if (ca->nv_rtt_cnt < 0xff) | ||
314 | /* Increase counter for RTTs without CA decision */ | ||
315 | ca->nv_rtt_cnt++; | ||
316 | |||
317 | /* If this function is only called once within an RTT | ||
318 | * the cwnd is probably too small (in some cases due to | ||
319 | * tso, lro or interrupt coalescence), so we increase | ||
320 | * ca->nv_min_cwnd. | ||
321 | */ | ||
322 | if (ca->nv_eval_call_cnt == 1 && | ||
323 | bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache && | ||
324 | ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) { | ||
325 | ca->nv_min_cwnd = min(ca->nv_min_cwnd | ||
326 | + NV_MIN_CWND_GROW, | ||
327 | NV_TSO_CWND_BOUND + 1); | ||
328 | ca->nv_rtt_start_seq = tp->snd_nxt + | ||
329 | ca->nv_min_cwnd * tp->mss_cache; | ||
330 | ca->nv_eval_call_cnt = 0; | ||
331 | ca->nv_allow_cwnd_growth = 1; | ||
332 | return; | ||
333 | } | ||
334 | |||
335 | /* Find the ideal cwnd for current rate from slope | ||
336 | * slope = 80000.0 * mss / nv_min_rtt | ||
337 | * cwnd_by_slope = nv_rtt_max_rate / slope | ||
338 | */ | ||
339 | cwnd_by_slope = (u32) | ||
340 | div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt, | ||
341 | (u64)(80000 * tp->mss_cache)); | ||
342 | max_win = cwnd_by_slope + nv_pad; | ||
343 | |||
344 | /* If cwnd > max_win, decrease cwnd | ||
345 | * if cwnd < max_win, grow cwnd | ||
346 | * else leave the same | ||
347 | */ | ||
348 | if (tp->snd_cwnd > max_win) { | ||
349 | /* there is congestion, check that it is ok | ||
350 | * to make a CA decision | ||
351 | * 1. We should have at least nv_dec_eval_min_calls | ||
352 | * data points before making a CA decision | ||
353 | * 2. We only make a congesion decision after | ||
354 | * nv_rtt_min_cnt RTTs | ||
355 | */ | ||
356 | if (ca->nv_rtt_cnt < nv_rtt_min_cnt) { | ||
357 | return; | ||
358 | } else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) { | ||
359 | if (ca->nv_eval_call_cnt < | ||
360 | nv_ssthresh_eval_min_calls) | ||
361 | return; | ||
362 | /* otherwise we will decrease cwnd */ | ||
363 | } else if (ca->nv_eval_call_cnt < | ||
364 | nv_dec_eval_min_calls) { | ||
365 | if (ca->nv_allow_cwnd_growth && | ||
366 | ca->nv_rtt_cnt > nv_stop_rtt_cnt) | ||
367 | ca->nv_allow_cwnd_growth = 0; | ||
368 | return; | ||
369 | } | ||
370 | |||
371 | /* We have enough data to determine we are congested */ | ||
372 | ca->nv_allow_cwnd_growth = 0; | ||
373 | tp->snd_ssthresh = | ||
374 | (nv_ssthresh_factor * max_win) >> 3; | ||
375 | if (tp->snd_cwnd - max_win > 2) { | ||
376 | /* gap > 2, we do exponential cwnd decrease */ | ||
377 | int dec; | ||
378 | |||
379 | dec = max(2U, ((tp->snd_cwnd - max_win) * | ||
380 | nv_cong_dec_mult) >> 7); | ||
381 | tp->snd_cwnd -= dec; | ||
382 | } else if (nv_cong_dec_mult > 0) { | ||
383 | tp->snd_cwnd = max_win; | ||
384 | } | ||
385 | if (ca->cwnd_growth_factor > 0) | ||
386 | ca->cwnd_growth_factor = 0; | ||
387 | ca->nv_no_cong_cnt = 0; | ||
388 | } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) { | ||
389 | /* There is no congestion, grow cwnd if allowed*/ | ||
390 | if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls) | ||
391 | return; | ||
392 | |||
393 | ca->nv_allow_cwnd_growth = 1; | ||
394 | ca->nv_no_cong_cnt++; | ||
395 | if (ca->cwnd_growth_factor < 0 && | ||
396 | nv_cwnd_growth_rate_neg > 0 && | ||
397 | ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) { | ||
398 | ca->cwnd_growth_factor++; | ||
399 | ca->nv_no_cong_cnt = 0; | ||
400 | } else if (ca->cwnd_growth_factor >= 0 && | ||
401 | nv_cwnd_growth_rate_pos > 0 && | ||
402 | ca->nv_no_cong_cnt > | ||
403 | nv_cwnd_growth_rate_pos) { | ||
404 | ca->cwnd_growth_factor++; | ||
405 | ca->nv_no_cong_cnt = 0; | ||
406 | } | ||
407 | } else { | ||
408 | /* cwnd is in-between, so do nothing */ | ||
409 | return; | ||
410 | } | ||
411 | |||
412 | /* update state */ | ||
413 | ca->nv_eval_call_cnt = 0; | ||
414 | ca->nv_rtt_cnt = 0; | ||
415 | ca->nv_rtt_max_rate = 0; | ||
416 | |||
417 | /* Don't want to make cwnd < nv_min_cwnd | ||
418 | * (it wasn't before, if it is now is because nv | ||
419 | * decreased it). | ||
420 | */ | ||
421 | if (tp->snd_cwnd < nv_min_cwnd) | ||
422 | tp->snd_cwnd = nv_min_cwnd; | ||
423 | } | ||
424 | } | ||
425 | |||
426 | /* Extract info for Tcp socket info provided via netlink */ | ||
427 | size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr, | ||
428 | union tcp_cc_info *info) | ||
429 | { | ||
430 | const struct tcpnv *ca = inet_csk_ca(sk); | ||
431 | |||
432 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { | ||
433 | info->vegas.tcpv_enabled = 1; | ||
434 | info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt; | ||
435 | info->vegas.tcpv_rtt = ca->nv_last_rtt; | ||
436 | info->vegas.tcpv_minrtt = ca->nv_min_rtt; | ||
437 | |||
438 | *attr = INET_DIAG_VEGASINFO; | ||
439 | return sizeof(struct tcpvegas_info); | ||
440 | } | ||
441 | return 0; | ||
442 | } | ||
443 | EXPORT_SYMBOL_GPL(tcpnv_get_info); | ||
444 | |||
445 | static struct tcp_congestion_ops tcpnv __read_mostly = { | ||
446 | .init = tcpnv_init, | ||
447 | .ssthresh = tcpnv_recalc_ssthresh, | ||
448 | .cong_avoid = tcpnv_cong_avoid, | ||
449 | .set_state = tcpnv_state, | ||
450 | .undo_cwnd = tcpnv_undo_cwnd, | ||
451 | .pkts_acked = tcpnv_acked, | ||
452 | .get_info = tcpnv_get_info, | ||
453 | |||
454 | .owner = THIS_MODULE, | ||
455 | .name = "nv", | ||
456 | }; | ||
457 | |||
458 | static int __init tcpnv_register(void) | ||
459 | { | ||
460 | BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE); | ||
461 | |||
462 | return tcp_register_congestion_control(&tcpnv); | ||
463 | } | ||
464 | |||
465 | static void __exit tcpnv_unregister(void) | ||
466 | { | ||
467 | tcp_unregister_congestion_control(&tcpnv); | ||
468 | } | ||
469 | |||
470 | module_init(tcpnv_register); | ||
471 | module_exit(tcpnv_unregister); | ||
472 | |||
473 | MODULE_AUTHOR("Lawrence Brakmo"); | ||
474 | MODULE_LICENSE("GPL"); | ||
475 | MODULE_DESCRIPTION("TCP NV"); | ||
476 | MODULE_VERSION("1.0"); | ||