diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2011-07-03 11:55:03 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2011-07-04 14:37:49 -0400 |
commit | 113ced1f52e5ed2dfedc0771a1b11b536cde8168 (patch) | |
tree | 513df4ef1ad8b8530e12329f8f02c89140860b36 /net/dccp/ccids/ccid2.c | |
parent | 58fdea0f3170c13a3b875ef904d5b67cf73814be (diff) |
dccp ccid-2: Perform congestion-window validation
CCID-2's cwnd increases like TCP during slow-start, which has implications for
* the local Sequence Window value (should be > cwnd),
* the Ack Ratio value.
Hence an exponential growth, if it does not reflect the actual network
conditions, can quickly lead to instability.
This patch adds congestion-window validation (RFC2861) to CCID-2:
* cwnd is constrained if the sender is application limited;
* cwnd is reduced after a long idle period, as suggested in the '90 paper
by Van Jacobson, in RFC 2581 (sec. 4.1);
* cwnd is never reduced below the RFC 3390 initial window.
As marked in the comments, the code is actually almost a direct copy of the
TCP congestion-window-validation algorithms. By continuing this work, it may
in future be possible to use the TCP code (not possible at the moment).
The mechanism can be turned off using a module parameter. Sampling of the
currently-used window (moving-maximum) is however done constantly; this is
used to determine the expected window, which can be exploited to regulate
DCCP's Sequence Window value.
This patch also sets slow-start-after-idle (RFC 4341, 5.1), i.e. it behaves like
TCP when net.ipv4.tcp_slow_start_after_idle = 1.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp/ccids/ccid2.c')
-rw-r--r-- | net/dccp/ccids/ccid2.c | 84 |
1 files changed, 81 insertions, 3 deletions
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 7d917981a4d1..0462040fc818 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -153,17 +153,93 @@ out: | |||
153 | sock_put(sk); | 153 | sock_put(sk); |
154 | } | 154 | } |
155 | 155 | ||
156 | /* | ||
157 | * Congestion window validation (RFC 2861). | ||
158 | */ | ||
159 | static int ccid2_do_cwv = 1; | ||
160 | module_param(ccid2_do_cwv, bool, 0644); | ||
161 | MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); | ||
162 | |||
163 | /** | ||
164 | * ccid2_update_used_window - Track how much of cwnd is actually used | ||
165 | * This is done in addition to CWV. The sender needs to have an idea of how many | ||
166 | * packets may be in flight, to set the local Sequence Window value accordingly | ||
167 | * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the | ||
168 | * maximum-used window. We use an EWMA low-pass filter to filter out noise. | ||
169 | */ | ||
170 | static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) | ||
171 | { | ||
172 | hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; | ||
173 | } | ||
174 | |||
175 | /* This borrows the code of tcp_cwnd_application_limited() */ | ||
176 | static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) | ||
177 | { | ||
178 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
179 | /* don't reduce cwnd below the initial window (IW) */ | ||
180 | u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), | ||
181 | win_used = max(hc->tx_cwnd_used, init_win); | ||
182 | |||
183 | if (win_used < hc->tx_cwnd) { | ||
184 | hc->tx_ssthresh = max(hc->tx_ssthresh, | ||
185 | (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); | ||
186 | hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; | ||
187 | } | ||
188 | hc->tx_cwnd_used = 0; | ||
189 | hc->tx_cwnd_stamp = now; | ||
190 | } | ||
191 | |||
192 | /* This borrows the code of tcp_cwnd_restart() */ | ||
193 | static void ccid2_cwnd_restart(struct sock *sk, const u32 now) | ||
194 | { | ||
195 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
196 | u32 cwnd = hc->tx_cwnd, restart_cwnd, | ||
197 | iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); | ||
198 | |||
199 | hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); | ||
200 | |||
201 | /* don't reduce cwnd below the initial window (IW) */ | ||
202 | restart_cwnd = min(cwnd, iwnd); | ||
203 | cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; | ||
204 | hc->tx_cwnd = max(cwnd, restart_cwnd); | ||
205 | |||
206 | hc->tx_cwnd_stamp = now; | ||
207 | hc->tx_cwnd_used = 0; | ||
208 | } | ||
209 | |||
156 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | 210 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
157 | { | 211 | { |
158 | struct dccp_sock *dp = dccp_sk(sk); | 212 | struct dccp_sock *dp = dccp_sk(sk); |
159 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 213 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
214 | const u32 now = ccid2_time_stamp; | ||
160 | struct ccid2_seq *next; | 215 | struct ccid2_seq *next; |
161 | 216 | ||
162 | hc->tx_pipe++; | 217 | /* slow-start after idle periods (RFC 2581, RFC 2861) */ |
218 | if (ccid2_do_cwv && !hc->tx_pipe && | ||
219 | (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) | ||
220 | ccid2_cwnd_restart(sk, now); | ||
221 | |||
222 | hc->tx_lsndtime = now; | ||
223 | hc->tx_pipe += 1; | ||
224 | |||
225 | /* see whether cwnd was fully used (RFC 2861), update expected window */ | ||
226 | if (ccid2_cwnd_network_limited(hc)) { | ||
227 | ccid2_update_used_window(hc, hc->tx_cwnd); | ||
228 | hc->tx_cwnd_used = 0; | ||
229 | hc->tx_cwnd_stamp = now; | ||
230 | } else { | ||
231 | if (hc->tx_pipe > hc->tx_cwnd_used) | ||
232 | hc->tx_cwnd_used = hc->tx_pipe; | ||
233 | |||
234 | ccid2_update_used_window(hc, hc->tx_cwnd_used); | ||
235 | |||
236 | if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) | ||
237 | ccid2_cwnd_application_limited(sk, now); | ||
238 | } | ||
163 | 239 | ||
164 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; | 240 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; |
165 | hc->tx_seqh->ccid2s_acked = 0; | 241 | hc->tx_seqh->ccid2s_acked = 0; |
166 | hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; | 242 | hc->tx_seqh->ccid2s_sent = now; |
167 | 243 | ||
168 | next = hc->tx_seqh->ccid2s_next; | 244 | next = hc->tx_seqh->ccid2s_next; |
169 | /* check if we need to alloc more space */ | 245 | /* check if we need to alloc more space */ |
@@ -594,6 +670,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
594 | 670 | ||
595 | /* Use larger initial windows (RFC 4341, section 5). */ | 671 | /* Use larger initial windows (RFC 4341, section 5). */ |
596 | hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); | 672 | hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); |
673 | hc->tx_expected_wnd = hc->tx_cwnd; | ||
597 | 674 | ||
598 | /* Make sure that Ack Ratio is enabled and within bounds. */ | 675 | /* Make sure that Ack Ratio is enabled and within bounds. */ |
599 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); | 676 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); |
@@ -606,7 +683,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
606 | 683 | ||
607 | hc->tx_rto = DCCP_TIMEOUT_INIT; | 684 | hc->tx_rto = DCCP_TIMEOUT_INIT; |
608 | hc->tx_rpdupack = -1; | 685 | hc->tx_rpdupack = -1; |
609 | hc->tx_last_cong = ccid2_time_stamp; | 686 | hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; |
687 | hc->tx_cwnd_used = 0; | ||
610 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 688 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
611 | (unsigned long)sk); | 689 | (unsigned long)sk); |
612 | INIT_LIST_HEAD(&hc->tx_av_chunks); | 690 | INIT_LIST_HEAD(&hc->tx_av_chunks); |