aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:33 -0400
commitde6f2b59e5cd15a8772adb732a1d80e141a77115 (patch)
tree3ed9cb3498826675d4a56896fde7d2520f9aa122 /net/dccp
parentb2e317f4b5ae73733963c702fae0f246d234100b (diff)
dccp ccid-3: Bug fix for the inter-packet scheduling algorithm
This fixes a subtle bug in the calculation of the inter-packet gap and shows that t_delta, as it is currently used, is not needed. And hence replaced. The algorithm from RFC 3448, 4.6 below continually computes a send time t_nom, which is initialised with the current time t_now; t_gran = 1E6 / HZ specifies the scheduling granularity, s the packet size, and X the sending rate: t_distance = t_nom - t_now; // in microseconds t_delta = min(t_ipi, t_gran) / 2; // `delta' parameter in microseconds if (t_distance >= t_delta) { reschedule after (t_distance / 1000) milliseconds; } else { t_ipi = s / X; // inter-packet interval in usec t_nom += t_ipi; // compute the next send time send packet now; } 1) Description of the bug ------------------------- Rescheduling requires a conversion into milliseconds, due to this call chain: * ccid3_hc_tx_send_packet() returns a timeout in milliseconds, * this value is converted by msecs_to_jiffies() in dccp_write_xmit(), * and finally used as jiffy-expires-value for sk_reset_timer(). The highest jiffy resolution with HZ=1000 is 1 millisecond, so using a higher granularity does not make much sense here. As a consequence, values of t_distance < 1000 are truncated to 0. This issue has so far been resolved by using instead if (t_distance >= t_delta + 1000) reschedule after (t_distance / 1000) milliseconds; The bug is in artificially inflating t_delta to t_delta' = t_delta + 1000. This is unnecessarily large, a more adequate value is t_delta' = max(t_delta, 1000). 2) Consequences of using the corrected t_delta' ----------------------------------------------- Since t_delta <= t_gran/2 = 10^6/(2*HZ), we have t_delta <= 1000 as long as HZ >= 500. This means that t_delta' = max(1000, t_delta) is constant at 1000. On the other hand, when using a coarse HZ value of HZ < 500, we have three sub-cases that can all be reduced to using another constant of t_gran/2. (a) The first case arises when t_ipi > t_gran. Here t_delta' is the constant t_delta' = max(1000, t_gran/2) = t_gran/2. (b) If t_ipi <= 2000 < t_gran = 10^6/HZ usec, then t_delta = t_ipi/2 <= 1000, so that t_delta' = max(1000, t_delta) = 1000 < t_gran/2. (c) If 2000 < t_ipi <= t_gran, we have t_delta' = max(t_delta, 1000) = t_ipi/2. In the second and third cases we have delay values less than t_gran/2, which is in the order of less than or equal to half a jiffy. How these are treated depends on how fractions of a jiffy are handled: they are either always rounded down to 0, or always rounded up to 1 jiffy (assuming non-zero values). In both cases the error is on average in the order of 50%. Thus we are not increasing the error when in the second/third case we replace a value less than t_gran/2 with 0, by setting t_delta' to the constant t_gran/2. 3) Summary ---------- Fixing (1) and considering (2), the patch replaces t_delta with a constant, whose value depends on CONFIG_HZ, changing the above algorithm to: if (t_distance >= t_delta') reschedule after (t_distance / 1000) milliseconds; where t_delta' = 10^6/(2*HZ) if HZ < 500, and t_delta' = 1000 otherwise. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ccids/ccid3.c17
-rw-r--r--net/dccp/ccids/ccid3.h19
2 files changed, 21 insertions, 15 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index b4cc62e2e2bb..eb1bda08eeb2 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -93,19 +93,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
93 return scaled_div(w_init << 6, hctx->rtt); 93 return scaled_div(w_init << 6, hctx->rtt);
94} 94}
95 95
96/* 96/**
97 * Recalculate t_ipi and delta (should be called whenever X changes) 97 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
98 * This respects the granularity of X_inst (64 * bytes/second).
98 */ 99 */
99static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) 100static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
100{ 101{
101 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
102 hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x); 102 hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x);
103 103
104 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 104 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hctx->t_ipi,
105 hctx->delta = min_t(u32, hctx->t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 105 hctx->s, (unsigned)(hctx->x >> 6));
106
107 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hctx->t_ipi,
108 hctx->delta, hctx->s, (unsigned)(hctx->x >> 6));
109} 106}
110 107
111static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) 108static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
@@ -340,8 +337,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
340 * else 337 * else
341 * // send the packet in (t_nom - t_now) milliseconds. 338 * // send the packet in (t_nom - t_now) milliseconds.
342 */ 339 */
343 if (delay - (s64)hctx->delta >= 1000) 340 if (delay >= TFRC_T_DELTA)
344 return (u32)delay / 1000L; 341 return (u32)delay / USEC_PER_MSEC;
345 342
346 ccid3_hc_tx_update_win_count(hctx, now); 343 ccid3_hc_tx_update_win_count(hctx, now);
347 break; 344 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0cfcfff8f5fb..92a5e1688a07 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -47,12 +47,23 @@
47/* Two seconds as per RFC 3448 4.2 */ 47/* Two seconds as per RFC 3448 4.2 */
48#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 48#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
49 49
50/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
51#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
52
53/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 50/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
54#define TFRC_T_MBI 64 51#define TFRC_T_MBI 64
55 52
53/*
54 * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
55 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
56 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
57 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
58 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
59 */
60#if (HZ >= 500)
61# define TFRC_T_DELTA USEC_PER_MSEC
62#else
63# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
64#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
65#endif
66
56enum ccid3_options { 67enum ccid3_options {
57 TFRC_OPT_LOSS_EVENT_RATE = 192, 68 TFRC_OPT_LOSS_EVENT_RATE = 192,
58 TFRC_OPT_LOSS_INTERVALS = 193, 69 TFRC_OPT_LOSS_INTERVALS = 193,
@@ -92,7 +103,6 @@ enum ccid3_hc_tx_states {
92 * @no_feedback_timer - Handle to no feedback timer 103 * @no_feedback_timer - Handle to no feedback timer
93 * @t_ld - Time last doubled during slow start 104 * @t_ld - Time last doubled during slow start
94 * @t_nom - Nominal send time of next packet 105 * @t_nom - Nominal send time of next packet
95 * @delta - Send timer delta (RFC 3448, 4.6) in usecs
96 * @hist - Packet history 106 * @hist - Packet history
97 * @options_received - Parsed set of retrieved options 107 * @options_received - Parsed set of retrieved options
98 */ 108 */
@@ -111,7 +121,6 @@ struct ccid3_hc_tx_sock {
111 struct timer_list no_feedback_timer; 121 struct timer_list no_feedback_timer;
112 ktime_t t_ld; 122 ktime_t t_ld;
113 ktime_t t_nom; 123 ktime_t t_nom;
114 u32 delta;
115 struct tfrc_tx_hist_entry *hist; 124 struct tfrc_tx_hist_entry *hist;
116 struct ccid3_options_received options_received; 125 struct ccid3_options_received options_received;
117}; 126};