diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2006-12-09 21:02:12 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-12-11 17:34:42 -0500 |
commit | 1a21e49a8d60f588c1276f765198b14d5688a778 (patch) | |
tree | 4b8770be71047437f30da694649719948977b189 | |
parent | 179ebc9f92da88e15ea86d7d27308c92712d8ee9 (diff) |
[DCCP] ccid3: Finer-grained resolution of sending rates
This patch
* resolves a bug where packets smaller than 32/64 bytes resulted in sending rates of 0
* supports all sending rates from 1/64 bytes/second up to 4Gbyte/second
* simplifies the present overflow problems in calculations
Current sending rate X and the cached value X_recv of the receiver-estimated
sending rate are both scaled by 64 (2^6) in order to
* cope with low sending rates (minimally 1 byte/second)
* allow upgrading to use a packets-per-second implementation of CCID 3
* avoid calculation errors due to integer arithmetic cut-off
The patch implements a revised strategy from
http://www.mail-archive.com/dccp@vger.kernel.org/msg01040.html
The only difference with regard to that strategy is that t_ipi is already
used in the calculation of the nofeedback timeout, which saves one division.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
-rw-r--r-- | include/linux/tfrc.h | 8 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 76 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 33 |
3 files changed, 78 insertions, 39 deletions
diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h index 31a9b25276fe..8a8462b4a4dd 100644 --- a/include/linux/tfrc.h +++ b/include/linux/tfrc.h | |||
@@ -37,10 +37,14 @@ struct tfrc_rx_info { | |||
37 | * @tfrctx_p: current loss event rate (5.4) | 37 | * @tfrctx_p: current loss event rate (5.4) |
38 | * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) | 38 | * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) |
39 | * @tfrctx_ipi: inter-packet interval (4.6) | 39 | * @tfrctx_ipi: inter-packet interval (4.6) |
40 | * | ||
41 | * Note: X and X_recv are both maintained in units of 64 * bytes/second. This | ||
42 | * enables a finer resolution of sending rates and avoids problems with | ||
43 | * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits. | ||
40 | */ | 44 | */ |
41 | struct tfrc_tx_info { | 45 | struct tfrc_tx_info { |
42 | __u32 tfrctx_x; | 46 | __u64 tfrctx_x; |
43 | __u32 tfrctx_x_recv; | 47 | __u64 tfrctx_x_recv; |
44 | __u32 tfrctx_x_calc; | 48 | __u32 tfrctx_x_calc; |
45 | __u32 tfrctx_rtt; | 49 | __u32 tfrctx_rtt; |
46 | __u32 tfrctx_p; | 50 | __u32 tfrctx_p; |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index c54663f21fdd..aa355d4cfc8a 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -108,8 +108,9 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) | |||
108 | { | 108 | { |
109 | timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); | 109 | timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); |
110 | 110 | ||
111 | /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ | 111 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ |
112 | hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); | 112 | hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, |
113 | hctx->ccid3hctx_x >> 6); | ||
113 | 114 | ||
114 | /* Update nominal send time with regard to the new t_ipi */ | 115 | /* Update nominal send time with regard to the new t_ipi */ |
115 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); | 116 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); |
@@ -128,26 +129,33 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) | |||
128 | * X = max(min(2 * X, 2 * X_recv), s / R); | 129 | * X = max(min(2 * X, 2 * X_recv), s / R); |
129 | * tld = now; | 130 | * tld = now; |
130 | * | 131 | * |
132 | * Note: X and X_recv are both stored in units of 64 * bytes/second, to support | ||
133 | * fine-grained resolution of sending rates. This requires scaling by 2^6 | ||
134 | * throughout the code. Only X_calc is unscaled (in bytes/second). | ||
135 | * | ||
131 | * If X has changed, we also update the scheduled send time t_now, | 136 | * If X has changed, we also update the scheduled send time t_now, |
132 | * the inter-packet interval t_ipi, and the delta value. | 137 | * the inter-packet interval t_ipi, and the delta value. |
133 | */ | 138 | */ |
134 | static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) | 139 | static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) |
135 | 140 | ||
136 | { | 141 | { |
137 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 142 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
138 | const __u32 old_x = hctx->ccid3hctx_x; | 143 | const __u64 old_x = hctx->ccid3hctx_x; |
139 | 144 | ||
140 | if (hctx->ccid3hctx_p > 0) { | 145 | if (hctx->ccid3hctx_p > 0) { |
141 | hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc, | 146 | |
142 | hctx->ccid3hctx_x_recv * 2), | 147 | hctx->ccid3hctx_x = min_t(u64, hctx->ccid3hctx_x_calc << 6, |
143 | hctx->ccid3hctx_s / TFRC_T_MBI); | 148 | hctx->ccid3hctx_x_recv * 2 ); |
149 | hctx->ccid3hctx_x = max_t(u64, hctx->ccid3hctx_x, | ||
150 | (hctx->ccid3hctx_s << 6)/TFRC_T_MBI); | ||
144 | 151 | ||
145 | } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= | 152 | } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= |
146 | hctx->ccid3hctx_rtt) { | 153 | hctx->ccid3hctx_rtt) { |
147 | hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv, | 154 | |
148 | hctx->ccid3hctx_x ) * 2, | 155 | hctx->ccid3hctx_x = max(2 * min(hctx->ccid3hctx_x, |
149 | usecs_div(hctx->ccid3hctx_s, | 156 | hctx->ccid3hctx_x_recv), |
150 | hctx->ccid3hctx_rtt) ); | 157 | scaled_div(hctx->ccid3hctx_s << 6, |
158 | hctx->ccid3hctx_rtt )); | ||
151 | hctx->ccid3hctx_t_ld = *now; | 159 | hctx->ccid3hctx_t_ld = *now; |
152 | } | 160 | } |
153 | 161 | ||
@@ -194,13 +202,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
194 | case TFRC_SSTATE_NO_FBACK: | 202 | case TFRC_SSTATE_NO_FBACK: |
195 | /* RFC 3448, 4.4: Halve send rate directly */ | 203 | /* RFC 3448, 4.4: Halve send rate directly */ |
196 | hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2, | 204 | hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2, |
197 | hctx->ccid3hctx_s / TFRC_T_MBI); | 205 | (hctx->ccid3hctx_s << 6)/TFRC_T_MBI); |
198 | 206 | ||
199 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " | 207 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %u " |
200 | "bytes/s\n", | 208 | "bytes/s\n", |
201 | dccp_role(sk), sk, | 209 | dccp_role(sk), sk, |
202 | ccid3_tx_state_name(hctx->ccid3hctx_state), | 210 | ccid3_tx_state_name(hctx->ccid3hctx_state), |
203 | hctx->ccid3hctx_x); | 211 | (unsigned)(hctx->ccid3hctx_x >> 6)); |
204 | /* The value of R is still undefined and so we can not recompute | 212 | /* The value of R is still undefined and so we can not recompute |
205 | * the timout value. Keep initial value as per [RFC 4342, 5]. */ | 213 | * the timout value. Keep initial value as per [RFC 4342, 5]. */ |
206 | t_nfb = TFRC_INITIAL_TIMEOUT; | 214 | t_nfb = TFRC_INITIAL_TIMEOUT; |
@@ -209,11 +217,11 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
209 | case TFRC_SSTATE_FBACK: | 217 | case TFRC_SSTATE_FBACK: |
210 | /* | 218 | /* |
211 | * Check if IDLE since last timeout and recv rate is less than | 219 | * Check if IDLE since last timeout and recv rate is less than |
212 | * 4 packets per RTT | 220 | * 4 packets (in units of 64*bytes/sec) per RTT |
213 | */ | 221 | */ |
214 | if (!hctx->ccid3hctx_idle || | 222 | if (!hctx->ccid3hctx_idle || |
215 | (hctx->ccid3hctx_x_recv >= | 223 | (hctx->ccid3hctx_x_recv >= 4 * |
216 | 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { | 224 | scaled_div(hctx->ccid3hctx_s << 6, hctx->ccid3hctx_rtt))) { |
217 | struct timeval now; | 225 | struct timeval now; |
218 | 226 | ||
219 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", | 227 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", |
@@ -227,17 +235,23 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
227 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | 235 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); |
228 | * Else | 236 | * Else |
229 | * X_recv = X_calc / 4; | 237 | * X_recv = X_calc / 4; |
238 | * | ||
239 | * Note that X_recv is scaled by 2^6 while X_calc is not | ||
230 | */ | 240 | */ |
231 | BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); | 241 | BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); |
232 | 242 | ||
233 | if (hctx->ccid3hctx_p == 0 || | 243 | if (hctx->ccid3hctx_p == 0 || |
234 | hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) { | 244 | hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) { |
235 | hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, | 245 | |
236 | hctx->ccid3hctx_s / (2 * TFRC_T_MBI)); | 246 | hctx->ccid3hctx_x_recv = |
247 | max_t(u64, hctx->ccid3hctx_x_recv / 2, | ||
248 | (hctx->ccid3hctx_s << 6) / | ||
249 | (2*TFRC_T_MBI)); | ||
250 | |||
237 | if (hctx->ccid3hctx_p == 0) | 251 | if (hctx->ccid3hctx_p == 0) |
238 | dccp_timestamp(sk, &now); | 252 | dccp_timestamp(sk, &now); |
239 | } else | 253 | } else |
240 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; | 254 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc << 4; |
241 | 255 | ||
242 | /* Now recalculate X [RFC 3448, 4.3, step (4)] */ | 256 | /* Now recalculate X [RFC 3448, 4.3, step (4)] */ |
243 | ccid3_hc_tx_update_x(sk, &now); | 257 | ccid3_hc_tx_update_x(sk, &now); |
@@ -315,9 +329,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
315 | hctx->ccid3hctx_t_last_win_count = now; | 329 | hctx->ccid3hctx_t_last_win_count = now; |
316 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | 330 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
317 | 331 | ||
318 | /* Set initial sending rate to 1 packet per second */ | 332 | /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */ |
319 | ccid3_hc_tx_update_s(hctx, skb->len); | 333 | ccid3_hc_tx_update_s(hctx, skb->len); |
320 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; | 334 | hctx->ccid3hctx_x = hctx->ccid3hctx_s << 6; |
321 | 335 | ||
322 | /* First timeout, according to [RFC 3448, 4.2], is 1 second */ | 336 | /* First timeout, according to [RFC 3448, 4.2], is 1 second */ |
323 | hctx->ccid3hctx_t_ipi = USEC_PER_SEC; | 337 | hctx->ccid3hctx_t_ipi = USEC_PER_SEC; |
@@ -438,8 +452,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
438 | return; | 452 | return; |
439 | } | 453 | } |
440 | 454 | ||
441 | /* Update receive rate */ | 455 | /* Update receive rate in units of 64 * bytes/second */ |
442 | hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; | 456 | hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate << 6; |
443 | 457 | ||
444 | /* Update loss event rate */ | 458 | /* Update loss event rate */ |
445 | pinv = opt_recv->ccid3or_loss_event_rate; | 459 | pinv = opt_recv->ccid3or_loss_event_rate; |
@@ -475,12 +489,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
475 | * q is a constant, RFC 3448 recomments 0.9 | 489 | * q is a constant, RFC 3448 recomments 0.9 |
476 | */ | 490 | */ |
477 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | 491 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { |
478 | /* Use Larger Initial Windows [RFC 4342, sec. 5] | 492 | /* |
479 | * We deviate in that we use `s' instead of `MSS'. */ | 493 | * Larger Initial Windows [RFC 4342, sec. 5] |
494 | * We deviate in that we use `s' instead of `MSS'. | ||
495 | */ | ||
480 | u16 w_init = min( 4 * hctx->ccid3hctx_s, | 496 | u16 w_init = min( 4 * hctx->ccid3hctx_s, |
481 | max(2 * hctx->ccid3hctx_s, 4380)); | 497 | max(2 * hctx->ccid3hctx_s, 4380)); |
482 | hctx->ccid3hctx_rtt = r_sample; | 498 | hctx->ccid3hctx_rtt = r_sample; |
483 | hctx->ccid3hctx_x = usecs_div(w_init, r_sample); | 499 | hctx->ccid3hctx_x = scaled_div(w_init<< 6, r_sample); |
484 | hctx->ccid3hctx_t_ld = now; | 500 | hctx->ccid3hctx_t_ld = now; |
485 | 501 | ||
486 | ccid3_update_send_time(hctx); | 502 | ccid3_update_send_time(hctx); |
@@ -488,7 +504,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
488 | ccid3_pr_debug("%s(%p), s=%u, w_init=%u, " | 504 | ccid3_pr_debug("%s(%p), s=%u, w_init=%u, " |
489 | "R_sample=%ldus, X=%u\n", dccp_role(sk), | 505 | "R_sample=%ldus, X=%u\n", dccp_role(sk), |
490 | sk, hctx->ccid3hctx_s, w_init, r_sample, | 506 | sk, hctx->ccid3hctx_s, w_init, r_sample, |
491 | hctx->ccid3hctx_x); | 507 | (unsigned)(hctx->ccid3hctx_x >> 6)); |
492 | 508 | ||
493 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | 509 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); |
494 | } else { | 510 | } else { |
@@ -508,7 +524,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
508 | sk, hctx->ccid3hctx_rtt, r_sample, | 524 | sk, hctx->ccid3hctx_rtt, r_sample, |
509 | hctx->ccid3hctx_s, hctx->ccid3hctx_p, | 525 | hctx->ccid3hctx_s, hctx->ccid3hctx_p, |
510 | hctx->ccid3hctx_x_calc, | 526 | hctx->ccid3hctx_x_calc, |
511 | hctx->ccid3hctx_x); | 527 | (unsigned)(hctx->ccid3hctx_x >> 6)); |
512 | } | 528 | } |
513 | 529 | ||
514 | /* unschedule no feedback timer */ | 530 | /* unschedule no feedback timer */ |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 07596d704ef9..cd4fc542f73e 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -75,14 +75,14 @@ enum ccid3_hc_tx_states { | |||
75 | 75 | ||
76 | /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket | 76 | /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket |
77 | * | 77 | * |
78 | * @ccid3hctx_x - Current sending rate | 78 | * @ccid3hctx_x - Current sending rate in 64 * bytes per second |
79 | * @ccid3hctx_x_recv - Receive rate | 79 | * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second |
80 | * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1) | 80 | * @ccid3hctx_x_calc - Calculated rate in bytes per second |
81 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs | 81 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs |
82 | * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 | 82 | * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 |
83 | * @ccid3hctx_s - Packet size | 83 | * @ccid3hctx_s - Packet size in bytes |
84 | * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1) | 84 | * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs |
85 | * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) | 85 | * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs |
86 | * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states | 86 | * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states |
87 | * @ccid3hctx_last_win_count - Last window counter sent | 87 | * @ccid3hctx_last_win_count - Last window counter sent |
88 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet | 88 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet |
@@ -91,7 +91,7 @@ enum ccid3_hc_tx_states { | |||
91 | * @ccid3hctx_idle - Flag indicating that sender is idling | 91 | * @ccid3hctx_idle - Flag indicating that sender is idling |
92 | * @ccid3hctx_t_ld - Time last doubled during slow start | 92 | * @ccid3hctx_t_ld - Time last doubled during slow start |
93 | * @ccid3hctx_t_nom - Nominal send time of next packet | 93 | * @ccid3hctx_t_nom - Nominal send time of next packet |
94 | * @ccid3hctx_delta - Send timer delta | 94 | * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs |
95 | * @ccid3hctx_hist - Packet history | 95 | * @ccid3hctx_hist - Packet history |
96 | * @ccid3hctx_options_received - Parsed set of retrieved options | 96 | * @ccid3hctx_options_received - Parsed set of retrieved options |
97 | */ | 97 | */ |
@@ -171,4 +171,23 @@ static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) | |||
171 | return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); | 171 | return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); |
172 | } | 172 | } |
173 | 173 | ||
174 | static inline u64 scaled_div(u64 a, u32 b) | ||
175 | { | ||
176 | BUG_ON(b==0); | ||
177 | a *= 1000000; | ||
178 | do_div(a, b); | ||
179 | return a; | ||
180 | } | ||
181 | |||
182 | static inline u32 scaled_div32(u64 a, u32 b) | ||
183 | { | ||
184 | u64 result = scaled_div(a, b); | ||
185 | |||
186 | if (result > UINT_MAX) { | ||
187 | DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", | ||
188 | (unsigned long long)a, b); | ||
189 | return UINT_MAX; | ||
190 | } | ||
191 | return result; | ||
192 | } | ||
174 | #endif /* _DCCP_CCID3_H_ */ | 193 | #endif /* _DCCP_CCID3_H_ */ |