aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids/lib
diff options
context:
space:
mode:
authorGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-09 07:27:22 -0400
committerGerrit Renker <gerrit@erg.abdn.ac.uk>2008-09-09 07:27:22 -0400
commit410e27a49bb98bc7fa3ff5fc05cc313817b9f253 (patch)
tree88bb1fcf84f9ebfa4299c9a8dcd9e6330b358446 /net/dccp/ccids/lib
parent0a68a20cc3eafa73bb54097c28b921147d7d3685 (diff)
This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp"
as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp/ccids/lib')
-rw-r--r--net/dccp/ccids/lib/loss_interval.c30
-rw-r--r--net/dccp/ccids/lib/loss_interval.h4
-rw-r--r--net/dccp/ccids/lib/packet_history.c282
-rw-r--r--net/dccp/ccids/lib/packet_history.h78
-rw-r--r--net/dccp/ccids/lib/tfrc.h16
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c29
6 files changed, 168 insertions, 271 deletions
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index b1ae8f8259e5..5b3ce0688c5c 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
86 86
87/** 87/**
88 * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 88 * tfrc_lh_update_i_mean - Update the `open' loss interval I_0
89 * This updates I_mean as the sequence numbers increase. As a consequence, the 89 * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
90 * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
91 * decreases, and thus there is no need to send renewed feedback.
92 */ 90 */
93void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) 91u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
94{ 92{
95 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); 93 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
94 u32 old_i_mean = lh->i_mean;
96 s64 len; 95 s64 len;
97 96
98 if (cur == NULL) /* not initialised */ 97 if (cur == NULL) /* not initialised */
99 return; 98 return 0;
100
101 /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
102 if (!dccp_data_packet(skb))
103 return;
104 99
105 len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; 100 len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
106 101
107 if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ 102 if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
108 return; 103 return 0;
109 104
110 if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) 105 if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
111 /* 106 /*
@@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
119 cur->li_is_closed = 1; 114 cur->li_is_closed = 1;
120 115
121 if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ 116 if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
122 return; 117 return 0;
123 118
124 cur->li_length = len; 119 cur->li_length = len;
125 tfrc_lh_calc_i_mean(lh); 120 tfrc_lh_calc_i_mean(lh);
121
122 return (lh->i_mean < old_i_mean);
126} 123}
124EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
127 125
128/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 126/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
129static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, 127static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
140 * @sk: Used by @calc_first_li in caller-specific way (subtyping) 138 * @sk: Used by @calc_first_li in caller-specific way (subtyping)
141 * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. 139 * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
142 */ 140 */
143bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, 141int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
144 u32 (*calc_first_li)(struct sock *), struct sock *sk) 142 u32 (*calc_first_li)(struct sock *), struct sock *sk)
145{ 143{
146 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; 144 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
147 145
148 if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) 146 if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
149 return false; 147 return 0;
150 148
151 new = tfrc_lh_demand_next(lh); 149 new = tfrc_lh_demand_next(lh);
152 if (unlikely(new == NULL)) { 150 if (unlikely(new == NULL)) {
153 DCCP_CRIT("Cannot allocate/add loss record."); 151 DCCP_CRIT("Cannot allocate/add loss record.");
154 return false; 152 return 0;
155 } 153 }
156 154
157 new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; 155 new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
169 167
170 tfrc_lh_calc_i_mean(lh); 168 tfrc_lh_calc_i_mean(lh);
171 } 169 }
172 return true; 170 return 1;
173} 171}
174EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); 172EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
175 173
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index d08a226db43e..246018a3b269 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
67 67
68struct tfrc_rx_hist; 68struct tfrc_rx_hist;
69 69
70extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, 70extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
71 u32 (*first_li)(struct sock *), struct sock *); 71 u32 (*first_li)(struct sock *), struct sock *);
72extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); 72extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
73extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); 73extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
74 74
75#endif /* _DCCP_LI_HIST_ */ 75#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index cce9f03bda3e..6cc108afdc3b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -40,6 +40,18 @@
40#include "packet_history.h" 40#include "packet_history.h"
41#include "../../dccp.h" 41#include "../../dccp.h"
42 42
43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
43/* 55/*
44 * Transmitter History Routines 56 * Transmitter History Routines
45 */ 57 */
@@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void)
61 } 73 }
62} 74}
63 75
76static struct tfrc_tx_hist_entry *
77 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
78{
79 while (head != NULL && head->seqno != seqno)
80 head = head->next;
81
82 return head;
83}
84
64int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 85int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
65{ 86{
66 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 87 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
90} 111}
91EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); 112EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
92 113
114u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
115 const ktime_t now)
116{
117 u32 rtt = 0;
118 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
119
120 if (packet != NULL) {
121 rtt = ktime_us_delta(now, packet->stamp);
122 /*
123 * Garbage-collect older (irrelevant) entries:
124 */
125 tfrc_tx_hist_purge(&packet->next);
126 }
127
128 return rtt;
129}
130EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
131
132
93/* 133/*
94 * Receiver History Routines 134 * Receiver History Routines
95 */ 135 */
@@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
151} 191}
152EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); 192EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
153 193
154
155static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
156{
157 struct tfrc_rx_hist_entry *tmp = h->ring[a];
158
159 h->ring[a] = h->ring[b];
160 h->ring[b] = tmp;
161}
162
163static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) 194static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
164{ 195{
165 __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a), 196 const u8 idx_a = tfrc_rx_hist_index(h, a),
166 tfrc_rx_hist_index(h, b)); 197 idx_b = tfrc_rx_hist_index(h, b);
167} 198 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
168 199
169/** 200 h->ring[idx_a] = h->ring[idx_b];
170 * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling 201 h->ring[idx_b] = tmp;
171 * This is called after loss detection has finished, when the history entry
172 * with the index of `loss_count' holds the highest-received sequence number.
173 * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
174 */
175static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
176{
177 __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
178 h->loss_count = h->loss_start = 0;
179} 202}
180 203
181/* 204/*
@@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
192 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, 215 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
193 s1 = DCCP_SKB_CB(skb)->dccpd_seq; 216 s1 = DCCP_SKB_CB(skb)->dccpd_seq;
194 217
195 if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */ 218 if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
196 h->loss_count = 1; 219 h->loss_count = 1;
220 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
221 }
197} 222}
198 223
199static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) 224static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
215 240
216 if (dccp_loss_free(s2, s1, n1)) { 241 if (dccp_loss_free(s2, s1, n1)) {
217 /* hole is filled: S0, S2, and S1 are consecutive */ 242 /* hole is filled: S0, S2, and S1 are consecutive */
218 tfrc_rx_hist_resume_rtt_sampling(h); 243 h->loss_count = 0;
244 h->loss_start = tfrc_rx_hist_index(h, 1);
219 } else 245 } else
220 /* gap between S2 and S1: just update loss_prev */ 246 /* gap between S2 and S1: just update loss_prev */
221 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); 247 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
268 294
269 if (dccp_loss_free(s1, s2, n2)) { 295 if (dccp_loss_free(s1, s2, n2)) {
270 /* entire hole filled by S0, S3, S1, S2 */ 296 /* entire hole filled by S0, S3, S1, S2 */
271 tfrc_rx_hist_resume_rtt_sampling(h); 297 h->loss_start = tfrc_rx_hist_index(h, 2);
298 h->loss_count = 0;
272 } else { 299 } else {
273 /* gap remains between S1 and S2 */ 300 /* gap remains between S1 and S2 */
274 h->loss_start = tfrc_rx_hist_index(h, 1); 301 h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
312 339
313 if (dccp_loss_free(s2, s3, n3)) { 340 if (dccp_loss_free(s2, s3, n3)) {
314 /* no gap between S2 and S3: entire hole is filled */ 341 /* no gap between S2 and S3: entire hole is filled */
315 tfrc_rx_hist_resume_rtt_sampling(h); 342 h->loss_start = tfrc_rx_hist_index(h, 3);
343 h->loss_count = 0;
316 } else { 344 } else {
317 /* gap between S2 and S3 */ 345 /* gap between S2 and S3 */
318 h->loss_start = tfrc_rx_hist_index(h, 2); 346 h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
326} 354}
327 355
328/** 356/**
329 * tfrc_rx_congestion_event - Loss detection and further processing 357 * tfrc_rx_handle_loss - Loss detection and further processing
330 * @h: The non-empty RX history object 358 * @h: The non-empty RX history object
331 * @lh: Loss Intervals database to update 359 * @lh: Loss Intervals database to update
332 * @skb: Currently received packet 360 * @skb: Currently received packet
333 * @ndp: The NDP count belonging to @skb 361 * @ndp: The NDP count belonging to @skb
334 * @first_li: Caller-dependent computation of first loss interval in @lh 362 * @calc_first_li: Caller-dependent computation of first loss interval in @lh
335 * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) 363 * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
336 * Chooses action according to pending loss, updates LI database when a new 364 * Chooses action according to pending loss, updates LI database when a new
337 * loss was detected, and does required post-processing. Returns 1 when caller 365 * loss was detected, and does required post-processing. Returns 1 when caller
338 * should send feedback, 0 otherwise. 366 * should send feedback, 0 otherwise.
@@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
340 * records accordingly, the caller should not perform any more RX history 368 * records accordingly, the caller should not perform any more RX history
341 * operations when loss_count is greater than 0 after calling this function. 369 * operations when loss_count is greater than 0 after calling this function.
342 */ 370 */
343bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, 371int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
344 struct tfrc_loss_hist *lh, 372 struct tfrc_loss_hist *lh,
345 struct sk_buff *skb, const u64 ndp, 373 struct sk_buff *skb, const u64 ndp,
346 u32 (*first_li)(struct sock *), struct sock *sk) 374 u32 (*calc_first_li)(struct sock *), struct sock *sk)
347{ 375{
348 bool new_event = false; 376 int is_new_loss = 0;
349
350 if (tfrc_rx_hist_duplicate(h, skb))
351 return 0;
352 377
353 if (h->loss_count == 0) { 378 if (h->loss_count == 0) {
354 __do_track_loss(h, skb, ndp); 379 __do_track_loss(h, skb, ndp);
355 tfrc_rx_hist_sample_rtt(h, skb);
356 tfrc_rx_hist_add_packet(h, skb, ndp);
357 } else if (h->loss_count == 1) { 380 } else if (h->loss_count == 1) {
358 __one_after_loss(h, skb, ndp); 381 __one_after_loss(h, skb, ndp);
359 } else if (h->loss_count != 2) { 382 } else if (h->loss_count != 2) {
@@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
362 /* 385 /*
363 * Update Loss Interval database and recycle RX records 386 * Update Loss Interval database and recycle RX records
364 */ 387 */
365 new_event = tfrc_lh_interval_add(lh, h, first_li, sk); 388 is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
366 __three_after_loss(h); 389 __three_after_loss(h);
367 } 390 }
368 391 return is_new_loss;
369 /*
370 * Update moving-average of `s' and the sum of received payload bytes.
371 */
372 if (dccp_data_packet(skb)) {
373 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
374
375 h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
376 h->bytes_recvd += payload;
377 }
378
379 /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
380 if (!new_event)
381 tfrc_lh_update_i_mean(lh, skb);
382
383 return new_event;
384} 392}
385EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event); 393EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
386 394
387/* Compute the sending rate X_recv measured between feedback intervals */ 395int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
388u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
389{ 396{
390 u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate; 397 int i;
391 s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
392
393 WARN_ON(delta <= 0);
394 /*
395 * Ensure that the sampling interval for X_recv is at least one RTT,
396 * by extending the sampling interval backwards in time, over the last
397 * R_(m-1) seconds, as per rfc3448bis-06, 6.2.
398 * To reduce noise (e.g. when the RTT changes often), this is only
399 * done when delta is smaller than RTT/2.
400 */
401 if (last_x_recv > 0 && delta < last_rtt/2) {
402 tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
403 (long)delta, (unsigned)last_rtt);
404 398
405 delta = (bytes ? delta : 0) + last_rtt; 399 for (i = 0; i <= TFRC_NDUPACK; i++) {
406 bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC); 400 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
401 if (h->ring[i] == NULL)
402 goto out_free;
407 } 403 }
408 404
409 if (unlikely(bytes == 0)) { 405 h->loss_count = h->loss_start = 0;
410 DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv); 406 return 0;
411 return last_x_recv; 407
408out_free:
409 while (i-- != 0) {
410 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
411 h->ring[i] = NULL;
412 } 412 }
413 return scaled_div32(bytes, delta); 413 return -ENOBUFS;
414} 414}
415EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv); 415EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
416 416
417void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) 417void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
418{ 418{
@@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
426} 426}
427EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); 427EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
428 428
429static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) 429/**
430 * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
431 */
432static inline struct tfrc_rx_hist_entry *
433 tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
430{ 434{
431 int i; 435 return h->ring[0];
432
433 memset(h, 0, sizeof(*h));
434
435 for (i = 0; i <= TFRC_NDUPACK; i++) {
436 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
437 if (h->ring[i] == NULL) {
438 tfrc_rx_hist_purge(h);
439 return -ENOBUFS;
440 }
441 }
442 return 0;
443} 436}
444 437
445int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) 438/**
439 * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
440 */
441static inline struct tfrc_rx_hist_entry *
442 tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
446{ 443{
447 if (tfrc_rx_hist_alloc(h)) 444 return h->ring[h->rtt_sample_prev];
448 return -ENOBUFS;
449 /*
450 * Initialise first entry with GSR to start loss detection as early as
451 * possible. Code using this must not use any other fields. The entry
452 * will be overwritten once the CCID updates its received packets.
453 */
454 tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
455 return 0;
456} 445}
457EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
458 446
459/** 447/**
460 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal 448 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
461 * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss 449 * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
462 * is pending and uses the following history entries (via rtt_sample_prev): 450 * to compute a sample with given data - calling function should check this.
463 * - h->ring[0] contains the most recent history entry prior to @skb;
464 * - h->ring[1] is an unused `dummy' entry when the current difference is 0;
465 */ 451 */
466void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) 452u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
467{ 453{
468 struct tfrc_rx_hist_entry *last = h->ring[0]; 454 u32 sample = 0,
469 u32 sample, delta_v; 455 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
470 456 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
471 /* 457
472 * When not to sample: 458 if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
473 * - on non-data packets 459 if (h->rtt_sample_prev == 2) { /* previous candidate stored */
474 * (RFC 4342, 8.1: CCVal only fully defined for data packets); 460 sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
475 * - when no data packets have been received yet 461 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
476 * (FIXME: using sampled packet size as indicator here); 462 if (sample)
477 * - as long as there are gaps in the sequence space (pending loss). 463 sample = 4 / sample *
478 */ 464 ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
479 if (!dccp_data_packet(skb) || h->packet_size == 0 || 465 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
480 tfrc_rx_hist_loss_pending(h)) 466 else /*
481 return; 467 * FIXME: This condition is in principle not
468 * possible but occurs when CCID is used for
469 * two-way data traffic. I have tried to trace
470 * it, but the cause does not seem to be here.
471 */
472 DCCP_BUG("please report to dccp@vger.kernel.org"
473 " => prev = %u, last = %u",
474 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
475 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
476 } else if (delta_v < 1) {
477 h->rtt_sample_prev = 1;
478 goto keep_ref_for_next_time;
479 }
482 480
483 h->rtt_sample_prev = 0; /* reset previous candidate */ 481 } else if (delta_v == 4) /* optimal match */
482 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
483 else { /* suboptimal match */
484 h->rtt_sample_prev = 2;
485 goto keep_ref_for_next_time;
486 }
484 487
485 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval); 488 if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
486 if (delta_v == 0) { /* less than RTT/4 difference */ 489 DCCP_WARN("RTT sample %u too large, using max\n", sample);
487 h->rtt_sample_prev = 1; 490 sample = DCCP_SANE_RTT_MAX;
488 return;
489 } 491 }
490 sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
491 492
492 if (delta_v <= 4) /* between RTT/4 and RTT */ 493 h->rtt_sample_prev = 0; /* use current entry as next reference */
493 sample *= 4 / delta_v; 494keep_ref_for_next_time:
494 else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
495 /*
496 * Optimisation: CCVal difference is greater than 1 RTT, yet the
497 * sample is less than the local RTT estimate; which means that
498 * the RTT estimate is too high.
499 * To avoid noise, it is not done if the sample is below RTT/2.
500 */
501 return;
502 495
503 /* Use a lower weight than usual to increase responsiveness */ 496 return sample;
504 h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
505} 497}
506EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); 498EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 555e65cd73a0..461cc91cce88 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,28 +40,12 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43/** 43struct tfrc_tx_hist_entry;
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
62 44
63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
65 49
66/* Subtraction a-b modulo-16, respects circular wrap-around */ 50/* Subtraction a-b modulo-16, respects circular wrap-around */
67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry {
91 * @loss_count: Number of entries in circular history 75 * @loss_count: Number of entries in circular history
92 * @loss_start: Movable index (for loss detection) 76 * @loss_start: Movable index (for loss detection)
93 * @rtt_sample_prev: Used during RTT sampling, points to candidate entry 77 * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
94 * @rtt_estimate: Receiver RTT estimate
95 * @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
96 * @bytes_recvd: Number of bytes received since @bytes_start
97 * @bytes_start: Start time for counting @bytes_recvd
98 */ 78 */
99struct tfrc_rx_hist { 79struct tfrc_rx_hist {
100 struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; 80 struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
101 u8 loss_count:2, 81 u8 loss_count:2,
102 loss_start:2; 82 loss_start:2;
103 /* Receiver RTT sampling */
104#define rtt_sample_prev loss_start 83#define rtt_sample_prev loss_start
105 u32 rtt_estimate;
106 /* Receiver sampling of application payload lengths */
107 u32 packet_size,
108 bytes_recvd;
109 ktime_t bytes_start;
110}; 84};
111 85
112/** 86/**
@@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
150 return h->loss_count > 0; 124 return h->loss_count > 0;
151} 125}
152 126
153/*
154 * Accessor functions to retrieve parameters sampled by the RX history
155 */
156static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
157{
158 if (h->packet_size == 0) {
159 DCCP_WARN("No sample for s, using fallback\n");
160 return TCP_MIN_RCVMSS;
161 }
162 return h->packet_size;
163
164}
165static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
166{
167 if (h->rtt_estimate == 0) {
168 DCCP_WARN("No RTT estimate available, using fallback RTT\n");
169 return DCCP_FALLBACK_RTT;
170 }
171 return h->rtt_estimate;
172}
173
174static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
175{
176 h->bytes_recvd = 0;
177 h->bytes_start = ktime_get_real();
178}
179
180extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
181
182
183extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, 127extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
184 const struct sk_buff *skb, const u64 ndp); 128 const struct sk_buff *skb, const u64 ndp);
185 129
186extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); 130extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
187 131
188struct tfrc_loss_hist; 132struct tfrc_loss_hist;
189extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, 133extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
190 struct tfrc_loss_hist *lh, 134 struct tfrc_loss_hist *lh,
191 struct sk_buff *skb, const u64 ndp, 135 struct sk_buff *skb, const u64 ndp,
192 u32 (*first_li)(struct sock *sk), 136 u32 (*first_li)(struct sock *sk),
193 struct sock *sk); 137 struct sock *sk);
194extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, 138extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
195 const struct sk_buff *skb); 139 const struct sk_buff *skb);
196extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); 140extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
197extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); 141extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
198 142
199#endif /* _DCCP_PKT_HIST_ */ 143#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ede12f53de5a..ed9857527acf 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -48,21 +48,6 @@ static inline u32 scaled_div32(u64 a, u64 b)
48} 48}
49 49
50/** 50/**
51 * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
52 * Uses scaling to improve accuracy of the integer approximation of sqrt(). The
53 * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
54 * clamped RTT samples (dccp_sample_rtt).
55 * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
56 * scaling factor is neutralised. For this purpose, it avoids returning zero.
57 */
58static inline u16 tfrc_scaled_sqrt(const u32 sample)
59{
60 const unsigned long non_zero_sample = sample ? : 1;
61
62 return int_sqrt(non_zero_sample << 10);
63}
64
65/**
66 * tfrc_ewma - Exponentially weighted moving average 51 * tfrc_ewma - Exponentially weighted moving average
67 * @weight: Weight to be used as damping factor, in units of 1/10 52 * @weight: Weight to be used as damping factor, in units of 1/10
68 */ 53 */
@@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
73 58
74extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 59extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
75extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 60extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
76extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
77 61
78extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
79extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 38239c4d5e14..2f20a29cffe4 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
632 632
633 if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ 633 if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
634 if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ 634 if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
635 /* 635 DCCP_WARN("Value of p (%d) below resolution. "
636 * In the congestion-avoidance phase p decays towards 0 636 "Substituting %d\n", p, TFRC_SMALLEST_P);
637 * when there are no further losses, so this case is
638 * natural. Truncating to p_min = 0.01% means that the
639 * maximum achievable throughput is limited to about
640 * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
641 * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
642 */
643 tfrc_pr_debug("Value of p (%d) below resolution. "
644 "Substituting %d\n", p, TFRC_SMALLEST_P);
645 index = 0; 637 index = 0;
646 } else /* 0.0001 <= p <= 0.05 */ 638 } else /* 0.0001 <= p <= 0.05 */
647 index = p/TFRC_SMALLEST_P - 1; 639 index = p/TFRC_SMALLEST_P - 1;
@@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
666 result = scaled_div(s, R); 658 result = scaled_div(s, R);
667 return scaled_div32(result, f); 659 return scaled_div32(result, f);
668} 660}
661
669EXPORT_SYMBOL_GPL(tfrc_calc_x); 662EXPORT_SYMBOL_GPL(tfrc_calc_x);
670 663
671/** 664/**
@@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
700 index = tfrc_binsearch(fvalue, 0); 693 index = tfrc_binsearch(fvalue, 0);
701 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 694 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
702} 695}
703EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
704 696
705/** 697EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
706 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
707 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
708 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
709 */
710u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
711{
712 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
713 return 0;
714 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
715 return 1000000;
716 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
717}
718EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);