aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2006-12-09 21:02:12 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-12-11 17:34:42 -0500
commit1a21e49a8d60f588c1276f765198b14d5688a778 (patch)
tree4b8770be71047437f30da694649719948977b189
parent179ebc9f92da88e15ea86d7d27308c92712d8ee9 (diff)
[DCCP] ccid3: Finer-grained resolution of sending rates
This patch * resolves a bug where packets smaller than 32/64 bytes resulted in sending rates of 0 * supports all sending rates from 1/64 bytes/second up to 4Gbyte/second * simplifies the present overflow problems in calculations Current sending rate X and the cached value X_recv of the receiver-estimated sending rate are both scaled by 64 (2^6) in order to * cope with low sending rates (minimally 1 byte/second) * allow upgrading to use a packets-per-second implementation of CCID 3 * avoid calculation errors due to integer arithmetic cut-off The patch implements a revised strategy from http://www.mail-archive.com/dccp@vger.kernel.org/msg01040.html The only difference with regard to that strategy is that t_ipi is already used in the calculation of the nofeedback timeout, which saves one division. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz> Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
-rw-r--r--include/linux/tfrc.h8
-rw-r--r--net/dccp/ccids/ccid3.c76
-rw-r--r--net/dccp/ccids/ccid3.h33
3 files changed, 78 insertions, 39 deletions
diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h
index 31a9b25276fe..8a8462b4a4dd 100644
--- a/include/linux/tfrc.h
+++ b/include/linux/tfrc.h
@@ -37,10 +37,14 @@ struct tfrc_rx_info {
37 * @tfrctx_p: current loss event rate (5.4) 37 * @tfrctx_p: current loss event rate (5.4)
38 * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) 38 * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3)
39 * @tfrctx_ipi: inter-packet interval (4.6) 39 * @tfrctx_ipi: inter-packet interval (4.6)
40 *
41 * Note: X and X_recv are both maintained in units of 64 * bytes/second. This
42 * enables a finer resolution of sending rates and avoids problems with
43 * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits.
40 */ 44 */
41struct tfrc_tx_info { 45struct tfrc_tx_info {
42 __u32 tfrctx_x; 46 __u64 tfrctx_x;
43 __u32 tfrctx_x_recv; 47 __u64 tfrctx_x_recv;
44 __u32 tfrctx_x_calc; 48 __u32 tfrctx_x_calc;
45 __u32 tfrctx_rtt; 49 __u32 tfrctx_rtt;
46 __u32 tfrctx_p; 50 __u32 tfrctx_p;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c54663f21fdd..aa355d4cfc8a 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -108,8 +108,9 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
108{ 108{
109 timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); 109 timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
110 110
111 /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ 111 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
112 hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); 112 hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
113 hctx->ccid3hctx_x >> 6);
113 114
114 /* Update nominal send time with regard to the new t_ipi */ 115 /* Update nominal send time with regard to the new t_ipi */
115 timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); 116 timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -128,26 +129,33 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
128 * X = max(min(2 * X, 2 * X_recv), s / R); 129 * X = max(min(2 * X, 2 * X_recv), s / R);
129 * tld = now; 130 * tld = now;
130 * 131 *
132 * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
133 * fine-grained resolution of sending rates. This requires scaling by 2^6
134 * throughout the code. Only X_calc is unscaled (in bytes/second).
135 *
131 * If X has changed, we also update the scheduled send time t_now, 136 * If X has changed, we also update the scheduled send time t_now,
132 * the inter-packet interval t_ipi, and the delta value. 137 * the inter-packet interval t_ipi, and the delta value.
133 */ 138 */
134static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) 139static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
135 140
136{ 141{
137 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 142 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
138 const __u32 old_x = hctx->ccid3hctx_x; 143 const __u64 old_x = hctx->ccid3hctx_x;
139 144
140 if (hctx->ccid3hctx_p > 0) { 145 if (hctx->ccid3hctx_p > 0) {
141 hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc, 146
142 hctx->ccid3hctx_x_recv * 2), 147 hctx->ccid3hctx_x = min_t(u64, hctx->ccid3hctx_x_calc << 6,
143 hctx->ccid3hctx_s / TFRC_T_MBI); 148 hctx->ccid3hctx_x_recv * 2 );
149 hctx->ccid3hctx_x = max_t(u64, hctx->ccid3hctx_x,
150 (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
144 151
145 } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= 152 } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >=
146 hctx->ccid3hctx_rtt) { 153 hctx->ccid3hctx_rtt) {
147 hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv, 154
148 hctx->ccid3hctx_x ) * 2, 155 hctx->ccid3hctx_x = max(2 * min(hctx->ccid3hctx_x,
149 usecs_div(hctx->ccid3hctx_s, 156 hctx->ccid3hctx_x_recv),
150 hctx->ccid3hctx_rtt) ); 157 scaled_div(hctx->ccid3hctx_s << 6,
158 hctx->ccid3hctx_rtt ));
151 hctx->ccid3hctx_t_ld = *now; 159 hctx->ccid3hctx_t_ld = *now;
152 } 160 }
153 161
@@ -194,13 +202,13 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
194 case TFRC_SSTATE_NO_FBACK: 202 case TFRC_SSTATE_NO_FBACK:
195 /* RFC 3448, 4.4: Halve send rate directly */ 203 /* RFC 3448, 4.4: Halve send rate directly */
196 hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2, 204 hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2,
197 hctx->ccid3hctx_s / TFRC_T_MBI); 205 (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
198 206
199 ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " 207 ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %u "
200 "bytes/s\n", 208 "bytes/s\n",
201 dccp_role(sk), sk, 209 dccp_role(sk), sk,
202 ccid3_tx_state_name(hctx->ccid3hctx_state), 210 ccid3_tx_state_name(hctx->ccid3hctx_state),
203 hctx->ccid3hctx_x); 211 (unsigned)(hctx->ccid3hctx_x >> 6));
204 /* The value of R is still undefined and so we can not recompute 212 /* The value of R is still undefined and so we can not recompute
205 * the timout value. Keep initial value as per [RFC 4342, 5]. */ 213 * the timout value. Keep initial value as per [RFC 4342, 5]. */
206 t_nfb = TFRC_INITIAL_TIMEOUT; 214 t_nfb = TFRC_INITIAL_TIMEOUT;
@@ -209,11 +217,11 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
209 case TFRC_SSTATE_FBACK: 217 case TFRC_SSTATE_FBACK:
210 /* 218 /*
211 * Check if IDLE since last timeout and recv rate is less than 219 * Check if IDLE since last timeout and recv rate is less than
212 * 4 packets per RTT 220 * 4 packets (in units of 64*bytes/sec) per RTT
213 */ 221 */
214 if (!hctx->ccid3hctx_idle || 222 if (!hctx->ccid3hctx_idle ||
215 (hctx->ccid3hctx_x_recv >= 223 (hctx->ccid3hctx_x_recv >= 4 *
216 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { 224 scaled_div(hctx->ccid3hctx_s << 6, hctx->ccid3hctx_rtt))) {
217 struct timeval now; 225 struct timeval now;
218 226
219 ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", 227 ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
@@ -227,17 +235,23 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
227 * X_recv = max(X_recv / 2, s / (2 * t_mbi)); 235 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
228 * Else 236 * Else
229 * X_recv = X_calc / 4; 237 * X_recv = X_calc / 4;
238 *
239 * Note that X_recv is scaled by 2^6 while X_calc is not
230 */ 240 */
231 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); 241 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
232 242
233 if (hctx->ccid3hctx_p == 0 || 243 if (hctx->ccid3hctx_p == 0 ||
234 hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) { 244 hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) {
235 hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, 245
236 hctx->ccid3hctx_s / (2 * TFRC_T_MBI)); 246 hctx->ccid3hctx_x_recv =
247 max_t(u64, hctx->ccid3hctx_x_recv / 2,
248 (hctx->ccid3hctx_s << 6) /
249 (2*TFRC_T_MBI));
250
237 if (hctx->ccid3hctx_p == 0) 251 if (hctx->ccid3hctx_p == 0)
238 dccp_timestamp(sk, &now); 252 dccp_timestamp(sk, &now);
239 } else 253 } else
240 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; 254 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc << 4;
241 255
242 /* Now recalculate X [RFC 3448, 4.3, step (4)] */ 256 /* Now recalculate X [RFC 3448, 4.3, step (4)] */
243 ccid3_hc_tx_update_x(sk, &now); 257 ccid3_hc_tx_update_x(sk, &now);
@@ -315,9 +329,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
315 hctx->ccid3hctx_t_last_win_count = now; 329 hctx->ccid3hctx_t_last_win_count = now;
316 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 330 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
317 331
318 /* Set initial sending rate to 1 packet per second */ 332 /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
319 ccid3_hc_tx_update_s(hctx, skb->len); 333 ccid3_hc_tx_update_s(hctx, skb->len);
320 hctx->ccid3hctx_x = hctx->ccid3hctx_s; 334 hctx->ccid3hctx_x = hctx->ccid3hctx_s << 6;
321 335
322 /* First timeout, according to [RFC 3448, 4.2], is 1 second */ 336 /* First timeout, according to [RFC 3448, 4.2], is 1 second */
323 hctx->ccid3hctx_t_ipi = USEC_PER_SEC; 337 hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
@@ -438,8 +452,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
438 return; 452 return;
439 } 453 }
440 454
441 /* Update receive rate */ 455 /* Update receive rate in units of 64 * bytes/second */
442 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; 456 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate << 6;
443 457
444 /* Update loss event rate */ 458 /* Update loss event rate */
445 pinv = opt_recv->ccid3or_loss_event_rate; 459 pinv = opt_recv->ccid3or_loss_event_rate;
@@ -475,12 +489,14 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
475 * q is a constant, RFC 3448 recomments 0.9 489 * q is a constant, RFC 3448 recomments 0.9
476 */ 490 */
477 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { 491 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
478 /* Use Larger Initial Windows [RFC 4342, sec. 5] 492 /*
479 * We deviate in that we use `s' instead of `MSS'. */ 493 * Larger Initial Windows [RFC 4342, sec. 5]
494 * We deviate in that we use `s' instead of `MSS'.
495 */
480 u16 w_init = min( 4 * hctx->ccid3hctx_s, 496 u16 w_init = min( 4 * hctx->ccid3hctx_s,
481 max(2 * hctx->ccid3hctx_s, 4380)); 497 max(2 * hctx->ccid3hctx_s, 4380));
482 hctx->ccid3hctx_rtt = r_sample; 498 hctx->ccid3hctx_rtt = r_sample;
483 hctx->ccid3hctx_x = usecs_div(w_init, r_sample); 499 hctx->ccid3hctx_x = scaled_div(w_init<< 6, r_sample);
484 hctx->ccid3hctx_t_ld = now; 500 hctx->ccid3hctx_t_ld = now;
485 501
486 ccid3_update_send_time(hctx); 502 ccid3_update_send_time(hctx);
@@ -488,7 +504,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
488 ccid3_pr_debug("%s(%p), s=%u, w_init=%u, " 504 ccid3_pr_debug("%s(%p), s=%u, w_init=%u, "
489 "R_sample=%ldus, X=%u\n", dccp_role(sk), 505 "R_sample=%ldus, X=%u\n", dccp_role(sk),
490 sk, hctx->ccid3hctx_s, w_init, r_sample, 506 sk, hctx->ccid3hctx_s, w_init, r_sample,
491 hctx->ccid3hctx_x); 507 (unsigned)(hctx->ccid3hctx_x >> 6));
492 508
493 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); 509 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
494 } else { 510 } else {
@@ -508,7 +524,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
508 sk, hctx->ccid3hctx_rtt, r_sample, 524 sk, hctx->ccid3hctx_rtt, r_sample,
509 hctx->ccid3hctx_s, hctx->ccid3hctx_p, 525 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
510 hctx->ccid3hctx_x_calc, 526 hctx->ccid3hctx_x_calc,
511 hctx->ccid3hctx_x); 527 (unsigned)(hctx->ccid3hctx_x >> 6));
512 } 528 }
513 529
514 /* unschedule no feedback timer */ 530 /* unschedule no feedback timer */
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 07596d704ef9..cd4fc542f73e 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -75,14 +75,14 @@ enum ccid3_hc_tx_states {
75 75
76/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket 76/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
77 * 77 *
78 * @ccid3hctx_x - Current sending rate 78 * @ccid3hctx_x - Current sending rate in 64 * bytes per second
79 * @ccid3hctx_x_recv - Receive rate 79 * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
80 * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1) 80 * @ccid3hctx_x_calc - Calculated rate in bytes per second
81 * @ccid3hctx_rtt - Estimate of current round trip time in usecs 81 * @ccid3hctx_rtt - Estimate of current round trip time in usecs
82 * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 82 * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
83 * @ccid3hctx_s - Packet size 83 * @ccid3hctx_s - Packet size in bytes
84 * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1) 84 * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
85 * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) 85 * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
86 * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states 86 * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
87 * @ccid3hctx_last_win_count - Last window counter sent 87 * @ccid3hctx_last_win_count - Last window counter sent
88 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet 88 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
@@ -91,7 +91,7 @@ enum ccid3_hc_tx_states {
91 * @ccid3hctx_idle - Flag indicating that sender is idling 91 * @ccid3hctx_idle - Flag indicating that sender is idling
92 * @ccid3hctx_t_ld - Time last doubled during slow start 92 * @ccid3hctx_t_ld - Time last doubled during slow start
93 * @ccid3hctx_t_nom - Nominal send time of next packet 93 * @ccid3hctx_t_nom - Nominal send time of next packet
94 * @ccid3hctx_delta - Send timer delta 94 * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
95 * @ccid3hctx_hist - Packet history 95 * @ccid3hctx_hist - Packet history
96 * @ccid3hctx_options_received - Parsed set of retrieved options 96 * @ccid3hctx_options_received - Parsed set of retrieved options
97 */ 97 */
@@ -171,4 +171,23 @@ static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
171 return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid); 171 return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
172} 172}
173 173
174static inline u64 scaled_div(u64 a, u32 b)
175{
176 BUG_ON(b==0);
177 a *= 1000000;
178 do_div(a, b);
179 return a;
180}
181
182static inline u32 scaled_div32(u64 a, u32 b)
183{
184 u64 result = scaled_div(a, b);
185
186 if (result > UINT_MAX) {
187 DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U",
188 (unsigned long long)a, b);
189 return UINT_MAX;
190 }
191 return result;
192}
174#endif /* _DCCP_CCID3_H_ */ 193#endif /* _DCCP_CCID3_H_ */