aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2010-09-14 14:16:59 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2010-09-15 06:36:01 -0400
commit20cbd3e120a0c20bebe420e1fed0e816730bb988 (patch)
tree1c58cf0b974fd0bb4b5a17846e4711e109901096 /net/dccp
parent068e8a30320e33b1f8d15df9eaef84f04258f96d (diff)
dccp ccid-3: A lower bound for the inter-packet scheduling algorithm
This fixes a subtle bug in the calculation of the inter-packet gap and shows that t_delta, as it is currently used, is not needed. The algorithm from RFC 5348, 8.3 below continually computes a send time t_nom, which is initialised with the current time t_now; t_gran = 1E6 / HZ specifies the scheduling granularity, s the packet size, and X the sending rate: t_distance = t_nom - t_now; // in microseconds t_delta = min(t_ipi, t_gran) / 2; // `delta' parameter in microseconds if (t_distance >= t_delta) { reschedule after (t_distance / 1000) milliseconds; } else { t_ipi = s / X; // inter-packet interval in usec t_nom += t_ipi; // compute the next send time send packet now; } Problem: -------- Rescheduling requires a conversion into milliseconds (sk_reset_timer()). The highest jiffy resolution with HZ=1000 is 1 millisecond, so using a higher granularity does not make much sense here. As a consequence, values of t_distance < 1000 are truncated to 0. This issue has so far been resolved by using instead if (t_distance >= t_delta + 1000) reschedule after (t_distance / 1000) milliseconds; This is unnecessarily large, a lower bound is t_delta' = max(t_delta, 1000). And it implies a further simplification: a) when HZ >= 500, then t_delta <= t_gran/2 = 10^6/(2*HZ) <= 1000, so that t_delta' = MAX(1000, t_delta) = 1000 (constant value); b) when HZ < 500, then t_delta = 1/2*MIN(rtt, t_ipi, t_gran) <= t_gran/2, so that 1000 <= t_delta' <= t_gran/2. The maximum error of using a constant t_delta in (b) is less than half a jiffy. Fix: ---- The patch replaces t_delta with a constant, whose value depends on CONFIG_HZ, changing the above algorithm to: if (t_distance >= t_delta') reschedule after (t_distance / 1000) milliseconds; where t_delta' = 10^6/(2*HZ) if HZ < 500, and t_delta' = 1000 otherwise. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ccids/ccid3.c19
-rw-r--r--net/dccp/ccids/ccid3.h18
2 files changed, 21 insertions, 16 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 278e17069322..e9ca0983ac58 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -91,19 +91,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 91 return scaled_div(w_init << 6, hc->tx_rtt);
92} 92}
93 93
94/* 94/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 95 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
96 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 97 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 98static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 99{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 101
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 102 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 103 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 104}
108 105
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 106static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -332,15 +329,15 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break; 343 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index b7e569c22f36..4a00174a97dd 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -45,12 +45,22 @@
45/* Two seconds as per RFC 5348, 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
@@ -90,7 +100,6 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 100 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 101 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 102 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 103 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options 104 * @tx_options_received: Parsed set of retrieved options
96 */ 105 */
@@ -109,7 +118,6 @@ struct ccid3_hc_tx_sock {
109 struct timer_list tx_no_feedback_timer; 118 struct timer_list tx_no_feedback_timer;
110 ktime_t tx_t_ld; 119 ktime_t tx_t_ld;
111 ktime_t tx_t_nom; 120 ktime_t tx_t_nom;
112 u32 tx_delta;
113 struct tfrc_tx_hist_entry *tx_hist; 121 struct tfrc_tx_hist_entry *tx_hist;
114 struct ccid3_options_received tx_options_received; 122 struct ccid3_options_received tx_options_received;
115}; 123};