aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:30:19 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-04 01:45:43 -0400
commita3cbdde8e9c38b66b4f13ac5d6ff1939ded0ff20 (patch)
tree8e66f40579776dbc07fdacb206c4d56e1b351e86 /net/dccp
parent53ac9570c8145710aaed9e1eb850c2e991a4ebc1 (diff)
dccp ccid-3: Preventing Oscillations
This implements [RFC 3448, 4.5], which performs congestion avoidance behaviour by reducing the transmit rate as the queueing delay (measured in terms of long-term RTT) increases. Oscillation can be turned on/off via a module option (do_osc_prev) and via sysfs (using mode 0644), the default is off. Overflow analysis: ------------------ * oscillation prevention is done after update_x(), so that t_ipi <= 64000; * hence the multiplication "t_ipi * sqrt(R_sample)" needs 64 bits; * done using u64 for sqrt_sample and explicit typecast of t_ipi; * the divisor, R_sqmean, is non-zero because oscillation prevention is first called when receiving the second feedback packet, and tfrc_scaled_rtt() > 0. A detailed discussion of the algorithm (with plots) is on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3/sender_notes/oscillation_prevention/ The algorithm has negative side effects: * when allowing to decrease t_ipi (leads to a large RTT) and * when using it during slow-start; both uses are therefore disabled. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/ccids/ccid3.c40
-rw-r--r--net/dccp/ccids/ccid3.h6
-rw-r--r--net/dccp/ccids/lib/tfrc.h15
3 files changed, 59 insertions, 2 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 7cd76c6c790c..06cfdad84a6a 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,6 +49,8 @@ static int ccid3_debug;
49/* 49/*
50 * Transmitter Half-Connection Routines 50 * Transmitter Half-Connection Routines
51 */ 51 */
52/* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */
53static int do_osc_prev = true;
52 54
53/* 55/*
54 * Compute the initial sending rate X_init in the manner of RFC 3390: 56 * Compute the initial sending rate X_init in the manner of RFC 3390:
@@ -296,6 +298,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
296 hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len); 298 hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len);
297 ccid3_update_send_interval(hctx); 299 ccid3_update_send_interval(hctx);
298 300
301 /* Seed value for Oscillation Prevention (sec. 4.5) */
302 hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt);
303
299 } else { 304 } else {
300 delay = ktime_us_delta(hctx->t_nom, now); 305 delay = ktime_us_delta(hctx->t_nom, now);
301 ccid3_pr_debug("delay=%ld\n", (long)delay); 306 ccid3_pr_debug("delay=%ld\n", (long)delay);
@@ -400,6 +405,38 @@ done_computing_x:
400 hctx->s, hctx->p, hctx->x_calc, 405 hctx->s, hctx->p, hctx->x_calc,
401 (unsigned)(hctx->x_recv >> 6), 406 (unsigned)(hctx->x_recv >> 6),
402 (unsigned)(hctx->x >> 6)); 407 (unsigned)(hctx->x >> 6));
408 /*
409 * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to
410 * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This
411 * can be useful if few connections share a link, avoiding that buffer
412 * fill levels (RTT) oscillate as a result of frequent adjustments to X.
413 * A useful presentation with background information is in
414 * Joerg Widmer, "Equation-Based Congestion Control",
415 * MSc Thesis, University of Mannheim, Germany, 2000
416 * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation").
417 */
418 if (do_osc_prev) {
419 r_sample = tfrc_scaled_sqrt(r_sample);
420 /*
421 * The modulation can work in both ways: increase/decrease t_ipi
422 * according to long-term increases/decreases of the RTT. The
423 * former is a useful measure, since it works against queue
424 * build-up. The latter temporarily increases the sending rate,
425 * so that buffers fill up more quickly. This in turn causes
426 * the RTT to increase, so that either later reduction becomes
427 * necessary or the RTT stays at a very high level. Decreasing
428 * t_ipi is therefore not supported.
429 * Furthermore, during the initial slow-start phase the RTT
430 * naturally increases, where using the algorithm would cause
431 * delays. Hence it is disabled during the initial slow-start.
432 */
433 if (r_sample > hctx->r_sqmean && hctx->p > 0)
434 hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample,
435 hctx->r_sqmean);
436 hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI);
437 /* update R_sqmean _after_ computing the modulation factor */
438 hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9);
439 }
403 440
404 /* unschedule no feedback timer */ 441 /* unschedule no feedback timer */
405 sk_stop_timer(sk, &hctx->no_feedback_timer); 442 sk_stop_timer(sk, &hctx->no_feedback_timer);
@@ -749,6 +786,9 @@ static struct ccid_operations ccid3 = {
749 .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, 786 .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
750}; 787};
751 788
789module_param(do_osc_prev, bool, 0644);
790MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)");
791
752#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 792#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
753module_param(ccid3_debug, bool, 0644); 793module_param(ccid3_debug, bool, 0644);
754MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); 794MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 342235c57bf3..af6e1bf937d9 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -47,8 +47,8 @@
47/* Two seconds as per RFC 3448 4.2 */ 47/* Two seconds as per RFC 3448 4.2 */
48#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 48#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
49 49
50/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 50/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
51#define TFRC_T_MBI 64 51#define TFRC_T_MBI (64 * USEC_PER_SEC)
52 52
53/* 53/*
54 * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are 54 * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
@@ -76,6 +76,7 @@ enum ccid3_options {
76 * @x_recv - Receive rate in 64 * bytes per second 76 * @x_recv - Receive rate in 64 * bytes per second
77 * @x_calc - Calculated rate in bytes per second 77 * @x_calc - Calculated rate in bytes per second
78 * @rtt - Estimate of current round trip time in usecs 78 * @rtt - Estimate of current round trip time in usecs
79 * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
79 * @p - Current loss event rate (0-1) scaled by 1000000 80 * @p - Current loss event rate (0-1) scaled by 1000000
80 * @s - Packet size in bytes 81 * @s - Packet size in bytes
81 * @t_rto - Nofeedback Timer setting in usecs 82 * @t_rto - Nofeedback Timer setting in usecs
@@ -94,6 +95,7 @@ struct ccid3_hc_tx_sock {
94 u64 x_recv; 95 u64 x_recv;
95 u32 x_calc; 96 u32 x_calc;
96 u32 rtt; 97 u32 rtt;
98 u16 r_sqmean;
97 u32 p; 99 u32 p;
98 u32 t_rto; 100 u32 t_rto;
99 u32 t_ipi; 101 u32 t_ipi;
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index bb47146ac7d1..ede12f53de5a 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -48,6 +48,21 @@ static inline u32 scaled_div32(u64 a, u64 b)
48} 48}
49 49
50/** 50/**
51 * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
52 * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
53 * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
54 * clamped RTT samples (dccp_sample_rtt).
55 * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
56 * scaling factor is neutralised. For this purpose, it avoids returning zero.
57 */
58static inline u16 tfrc_scaled_sqrt(const u32 sample)
59{
60 const unsigned long non_zero_sample = sample ? : 1;
61
62 return int_sqrt(non_zero_sample << 10);
63}
64
65/**
51 * tfrc_ewma - Exponentially weighted moving average 66 * tfrc_ewma - Exponentially weighted moving average
52 * @weight: Weight to be used as damping factor, in units of 1/10 67 * @weight: Weight to be used as damping factor, in units of 1/10
53 */ 68 */