diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2010-09-14 14:16:59 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2010-09-15 06:36:01 -0400 |
commit | 20cbd3e120a0c20bebe420e1fed0e816730bb988 (patch) | |
tree | 1c58cf0b974fd0bb4b5a17846e4711e109901096 | |
parent | 068e8a30320e33b1f8d15df9eaef84f04258f96d (diff) |
dccp ccid-3: A lower bound for the inter-packet scheduling algorithm
This fixes a subtle bug in the calculation of the inter-packet gap and shows
that t_delta, as it is currently used, is not needed.
The algorithm from RFC 5348, 8.3 below continually computes a send time t_nom,
which is initialised with the current time t_now; t_gran = 1E6 / HZ specifies
the scheduling granularity, s the packet size, and X the sending rate:
t_distance = t_nom - t_now; // in microseconds
t_delta = min(t_ipi, t_gran) / 2; // `delta' parameter in microseconds
if (t_distance >= t_delta) {
reschedule after (t_distance / 1000) milliseconds;
} else {
t_ipi = s / X; // inter-packet interval in usec
t_nom += t_ipi; // compute the next send time
send packet now;
}
Problem:
--------
Rescheduling requires a conversion into milliseconds (sk_reset_timer()). The
highest jiffy resolution with HZ=1000 is 1 millisecond, so using a higher
granularity does not make much sense here.
As a consequence, values of t_distance < 1000 are truncated to 0. This issue
has so far been resolved by using instead
if (t_distance >= t_delta + 1000)
reschedule after (t_distance / 1000) milliseconds;
This is unnecessarily large, a lower bound is t_delta' = max(t_delta, 1000).
And it implies a further simplification:
a) when HZ >= 500, then t_delta <= t_gran/2 = 10^6/(2*HZ) <= 1000, so that
t_delta' = MAX(1000, t_delta) = 1000 (constant value);
b) when HZ < 500, then t_delta = 1/2*MIN(rtt, t_ipi, t_gran) <= t_gran/2,
so that 1000 <= t_delta' <= t_gran/2.
The maximum error of using a constant t_delta in (b) is less than half a jiffy.
Fix:
----
The patch replaces t_delta with a constant, whose value depends on CONFIG_HZ,
changing the above algorithm to:
if (t_distance >= t_delta')
reschedule after (t_distance / 1000) milliseconds;
where t_delta' = 10^6/(2*HZ) if HZ < 500, and t_delta' = 1000 otherwise.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
-rw-r--r-- | net/dccp/ccids/ccid3.c | 19 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 18 |
2 files changed, 21 insertions, 16 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 278e17069322..e9ca0983ac58 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -91,19 +91,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk) | |||
91 | return scaled_div(w_init << 6, hc->tx_rtt); | 91 | return scaled_div(w_init << 6, hc->tx_rtt); |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /** |
95 | * Recalculate t_ipi and delta (should be called whenever X changes) | 95 | * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst |
96 | * This respects the granularity of X_inst (64 * bytes/second). | ||
96 | */ | 97 | */ |
97 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) | 98 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) |
98 | { | 99 | { |
99 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ | ||
100 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); | 100 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); |
101 | 101 | ||
102 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | 102 | ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, |
103 | hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); | 103 | hc->tx_s, (unsigned)(hc->tx_x >> 6)); |
104 | |||
105 | ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi, | ||
106 | hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6)); | ||
107 | } | 104 | } |
108 | 105 | ||
109 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) | 106 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) |
@@ -332,15 +329,15 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
332 | delay = ktime_us_delta(hc->tx_t_nom, now); | 329 | delay = ktime_us_delta(hc->tx_t_nom, now); |
333 | ccid3_pr_debug("delay=%ld\n", (long)delay); | 330 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
334 | /* | 331 | /* |
335 | * Scheduling of packet transmissions [RFC 3448, 4.6] | 332 | * Scheduling of packet transmissions (RFC 5348, 8.3) |
336 | * | 333 | * |
337 | * if (t_now > t_nom - delta) | 334 | * if (t_now > t_nom - delta) |
338 | * // send the packet now | 335 | * // send the packet now |
339 | * else | 336 | * else |
340 | * // send the packet in (t_nom - t_now) milliseconds. | 337 | * // send the packet in (t_nom - t_now) milliseconds. |
341 | */ | 338 | */ |
342 | if (delay - (s64)hc->tx_delta >= 1000) | 339 | if (delay >= TFRC_T_DELTA) |
343 | return (u32)delay / 1000L; | 340 | return (u32)delay / USEC_PER_MSEC; |
344 | 341 | ||
345 | ccid3_hc_tx_update_win_count(hc, now); | 342 | ccid3_hc_tx_update_win_count(hc, now); |
346 | break; | 343 | break; |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index b7e569c22f36..4a00174a97dd 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -45,12 +45,22 @@ | |||
45 | /* Two seconds as per RFC 5348, 4.2 */ | 45 | /* Two seconds as per RFC 5348, 4.2 */ |
46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | 46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) |
47 | 47 | ||
48 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
49 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) | ||
50 | |||
51 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ | 48 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ |
52 | #define TFRC_T_MBI 64 | 49 | #define TFRC_T_MBI 64 |
53 | 50 | ||
51 | /* | ||
52 | * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are | ||
53 | * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. | ||
54 | * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse | ||
55 | * resolution of HZ < 500 means that the error is below one timer tick (t_gran) | ||
56 | * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). | ||
57 | */ | ||
58 | #if (HZ >= 500) | ||
59 | # define TFRC_T_DELTA USEC_PER_MSEC | ||
60 | #else | ||
61 | # define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) | ||
62 | #endif | ||
63 | |||
54 | enum ccid3_options { | 64 | enum ccid3_options { |
55 | TFRC_OPT_LOSS_EVENT_RATE = 192, | 65 | TFRC_OPT_LOSS_EVENT_RATE = 192, |
56 | TFRC_OPT_LOSS_INTERVALS = 193, | 66 | TFRC_OPT_LOSS_INTERVALS = 193, |
@@ -90,7 +100,6 @@ enum ccid3_hc_tx_states { | |||
90 | * @tx_no_feedback_timer: Handle to no feedback timer | 100 | * @tx_no_feedback_timer: Handle to no feedback timer |
91 | * @tx_t_ld: Time last doubled during slow start | 101 | * @tx_t_ld: Time last doubled during slow start |
92 | * @tx_t_nom: Nominal send time of next packet | 102 | * @tx_t_nom: Nominal send time of next packet |
93 | * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs | ||
94 | * @tx_hist: Packet history | 103 | * @tx_hist: Packet history |
95 | * @tx_options_received: Parsed set of retrieved options | 104 | * @tx_options_received: Parsed set of retrieved options |
96 | */ | 105 | */ |
@@ -109,7 +118,6 @@ struct ccid3_hc_tx_sock { | |||
109 | struct timer_list tx_no_feedback_timer; | 118 | struct timer_list tx_no_feedback_timer; |
110 | ktime_t tx_t_ld; | 119 | ktime_t tx_t_ld; |
111 | ktime_t tx_t_nom; | 120 | ktime_t tx_t_nom; |
112 | u32 tx_delta; | ||
113 | struct tfrc_tx_hist_entry *tx_hist; | 121 | struct tfrc_tx_hist_entry *tx_hist; |
114 | struct ccid3_options_received tx_options_received; | 122 | struct ccid3_options_received tx_options_received; |
115 | }; | 123 | }; |