diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-04 01:30:19 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-04 01:45:43 -0400 |
commit | a3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (patch) | |
tree | 8e66f40579776dbc07fdacb206c4d56e1b351e86 /net/dccp/ccids | |
parent | 53ac9570c8145710aaed9e1eb850c2e991a4ebc1 (diff) |
dccp ccid-3: Preventing Oscillations
This implements [RFC 3448, 4.5], which performs congestion avoidance behaviour
by reducing the transmit rate as the queueing delay (measured in terms of
long-term RTT) increases.
Oscillation can be turned on/off via a module option (do_osc_prev) and via sysfs
(using mode 0644), the default is off.
Overflow analysis:
------------------
* oscillation prevention is done after update_x(), so that t_ipi <= 64000;
* hence the multiplication "t_ipi * sqrt(R_sample)" needs 64 bits;
* done using u64 for sqrt_sample and explicit typecast of t_ipi;
* the divisor, R_sqmean, is non-zero because oscillation prevention is first
called when receiving the second feedback packet, and tfrc_scaled_rtt() > 0.
A detailed discussion of the algorithm (with plots) is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3/sender_notes/oscillation_prevention/
The algorithm has negative side effects:
* when allowing to decrease t_ipi (leads to a large RTT) and
* when using it during slow-start;
both uses are therefore disabled.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp/ccids')
-rw-r--r-- | net/dccp/ccids/ccid3.c | 40 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 6 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 15 |
3 files changed, 59 insertions, 2 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 7cd76c6c790c..06cfdad84a6a 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -49,6 +49,8 @@ static int ccid3_debug; | |||
49 | /* | 49 | /* |
50 | * Transmitter Half-Connection Routines | 50 | * Transmitter Half-Connection Routines |
51 | */ | 51 | */ |
52 | /* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */ | ||
53 | static int do_osc_prev = true; | ||
52 | 54 | ||
53 | /* | 55 | /* |
54 | * Compute the initial sending rate X_init in the manner of RFC 3390: | 56 | * Compute the initial sending rate X_init in the manner of RFC 3390: |
@@ -296,6 +298,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
296 | hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len); | 298 | hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len); |
297 | ccid3_update_send_interval(hctx); | 299 | ccid3_update_send_interval(hctx); |
298 | 300 | ||
301 | /* Seed value for Oscillation Prevention (sec. 4.5) */ | ||
302 | hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt); | ||
303 | |||
299 | } else { | 304 | } else { |
300 | delay = ktime_us_delta(hctx->t_nom, now); | 305 | delay = ktime_us_delta(hctx->t_nom, now); |
301 | ccid3_pr_debug("delay=%ld\n", (long)delay); | 306 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
@@ -400,6 +405,38 @@ done_computing_x: | |||
400 | hctx->s, hctx->p, hctx->x_calc, | 405 | hctx->s, hctx->p, hctx->x_calc, |
401 | (unsigned)(hctx->x_recv >> 6), | 406 | (unsigned)(hctx->x_recv >> 6), |
402 | (unsigned)(hctx->x >> 6)); | 407 | (unsigned)(hctx->x >> 6)); |
408 | /* | ||
409 | * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to | ||
410 | * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This | ||
411 | * can be useful if few connections share a link, avoiding that buffer | ||
412 | * fill levels (RTT) oscillate as a result of frequent adjustments to X. | ||
413 | * A useful presentation with background information is in | ||
414 | * Joerg Widmer, "Equation-Based Congestion Control", | ||
415 | * MSc Thesis, University of Mannheim, Germany, 2000 | ||
416 | * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation"). | ||
417 | */ | ||
418 | if (do_osc_prev) { | ||
419 | r_sample = tfrc_scaled_sqrt(r_sample); | ||
420 | /* | ||
421 | * The modulation can work in both ways: increase/decrease t_ipi | ||
422 | * according to long-term increases/decreases of the RTT. The | ||
423 | * former is a useful measure, since it works against queue | ||
424 | * build-up. The latter temporarily increases the sending rate, | ||
425 | * so that buffers fill up more quickly. This in turn causes | ||
426 | * the RTT to increase, so that either later reduction becomes | ||
427 | * necessary or the RTT stays at a very high level. Decreasing | ||
428 | * t_ipi is therefore not supported. | ||
429 | * Furthermore, during the initial slow-start phase the RTT | ||
430 | * naturally increases, where using the algorithm would cause | ||
431 | * delays. Hence it is disabled during the initial slow-start. | ||
432 | */ | ||
433 | if (r_sample > hctx->r_sqmean && hctx->p > 0) | ||
434 | hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample, | ||
435 | hctx->r_sqmean); | ||
436 | hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI); | ||
437 | /* update R_sqmean _after_ computing the modulation factor */ | ||
438 | hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9); | ||
439 | } | ||
403 | 440 | ||
404 | /* unschedule no feedback timer */ | 441 | /* unschedule no feedback timer */ |
405 | sk_stop_timer(sk, &hctx->no_feedback_timer); | 442 | sk_stop_timer(sk, &hctx->no_feedback_timer); |
@@ -749,6 +786,9 @@ static struct ccid_operations ccid3 = { | |||
749 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, | 786 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, |
750 | }; | 787 | }; |
751 | 788 | ||
789 | module_param(do_osc_prev, bool, 0644); | ||
790 | MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)"); | ||
791 | |||
752 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG | 792 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
753 | module_param(ccid3_debug, bool, 0644); | 793 | module_param(ccid3_debug, bool, 0644); |
754 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | 794 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 342235c57bf3..af6e1bf937d9 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -47,8 +47,8 @@ | |||
47 | /* Two seconds as per RFC 3448 4.2 */ | 47 | /* Two seconds as per RFC 3448 4.2 */ |
48 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | 48 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) |
49 | 49 | ||
50 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ | 50 | /* Maximum backoff interval t_mbi (RFC 3448, 4.3) */ |
51 | #define TFRC_T_MBI 64 | 51 | #define TFRC_T_MBI (64 * USEC_PER_SEC) |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are | 54 | * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are |
@@ -76,6 +76,7 @@ enum ccid3_options { | |||
76 | * @x_recv - Receive rate in 64 * bytes per second | 76 | * @x_recv - Receive rate in 64 * bytes per second |
77 | * @x_calc - Calculated rate in bytes per second | 77 | * @x_calc - Calculated rate in bytes per second |
78 | * @rtt - Estimate of current round trip time in usecs | 78 | * @rtt - Estimate of current round trip time in usecs |
79 | * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5) | ||
79 | * @p - Current loss event rate (0-1) scaled by 1000000 | 80 | * @p - Current loss event rate (0-1) scaled by 1000000 |
80 | * @s - Packet size in bytes | 81 | * @s - Packet size in bytes |
81 | * @t_rto - Nofeedback Timer setting in usecs | 82 | * @t_rto - Nofeedback Timer setting in usecs |
@@ -94,6 +95,7 @@ struct ccid3_hc_tx_sock { | |||
94 | u64 x_recv; | 95 | u64 x_recv; |
95 | u32 x_calc; | 96 | u32 x_calc; |
96 | u32 rtt; | 97 | u32 rtt; |
98 | u16 r_sqmean; | ||
97 | u32 p; | 99 | u32 p; |
98 | u32 t_rto; | 100 | u32 t_rto; |
99 | u32 t_ipi; | 101 | u32 t_ipi; |
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index bb47146ac7d1..ede12f53de5a 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b) | |||
48 | } | 48 | } |
49 | 49 | ||
50 | /** | 50 | /** |
51 | * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1 | ||
52 | * Uses scaling to improve accuracy of the integer approximation of sqrt(). The | ||
53 | * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for | ||
54 | * clamped RTT samples (dccp_sample_rtt). | ||
55 | * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the | ||
56 | * scaling factor is neutralised. For this purpose, it avoids returning zero. | ||
57 | */ | ||
58 | static inline u16 tfrc_scaled_sqrt(const u32 sample) | ||
59 | { | ||
60 | const unsigned long non_zero_sample = sample ? : 1; | ||
61 | |||
62 | return int_sqrt(non_zero_sample << 10); | ||
63 | } | ||
64 | |||
65 | /** | ||
51 | * tfrc_ewma - Exponentially weighted moving average | 66 | * tfrc_ewma - Exponentially weighted moving average |
52 | * @weight: Weight to be used as damping factor, in units of 1/10 | 67 | * @weight: Weight to be used as damping factor, in units of 1/10 |
53 | */ | 68 | */ |