diff options
author | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-09 07:27:22 -0400 |
---|---|---|
committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2008-09-09 07:27:22 -0400 |
commit | 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 (patch) | |
tree | 88bb1fcf84f9ebfa4299c9a8dcd9e6330b358446 /net/dccp/ccids | |
parent | 0a68a20cc3eafa73bb54097c28b921147d7d3685 (diff) |
This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp"
as it accentally contained the wrong set of patches. These will be
submitted separately.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Diffstat (limited to 'net/dccp/ccids')
-rw-r--r-- | net/dccp/ccids/Kconfig | 30 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 622 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 63 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 762 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 153 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 30 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 4 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 282 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 78 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 16 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 29 |
11 files changed, 1116 insertions, 953 deletions
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index fb168be2cb43..12275943eab8 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig | |||
@@ -1,8 +1,10 @@ | |||
1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | 1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" |
2 | depends on EXPERIMENTAL | ||
2 | 3 | ||
3 | config IP_DCCP_CCID2 | 4 | config IP_DCCP_CCID2 |
4 | tristate "CCID2 (TCP-Like)" | 5 | tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" |
5 | def_tristate IP_DCCP | 6 | def_tristate IP_DCCP |
7 | select IP_DCCP_ACKVEC | ||
6 | ---help--- | 8 | ---help--- |
7 | CCID 2, TCP-like Congestion Control, denotes Additive Increase, | 9 | CCID 2, TCP-like Congestion Control, denotes Additive Increase, |
8 | Multiplicative Decrease (AIMD) congestion control with behavior | 10 | Multiplicative Decrease (AIMD) congestion control with behavior |
@@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG | |||
34 | If in doubt, say N. | 36 | If in doubt, say N. |
35 | 37 | ||
36 | config IP_DCCP_CCID3 | 38 | config IP_DCCP_CCID3 |
37 | tristate "CCID3 (TCP-Friendly)" | 39 | tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" |
38 | def_tristate IP_DCCP | 40 | def_tristate IP_DCCP |
39 | select IP_DCCP_TFRC_LIB | 41 | select IP_DCCP_TFRC_LIB |
40 | ---help--- | 42 | ---help--- |
@@ -62,9 +64,9 @@ config IP_DCCP_CCID3 | |||
62 | 64 | ||
63 | If in doubt, say M. | 65 | If in doubt, say M. |
64 | 66 | ||
65 | if IP_DCCP_CCID3 | ||
66 | config IP_DCCP_CCID3_DEBUG | 67 | config IP_DCCP_CCID3_DEBUG |
67 | bool "CCID3 debugging messages" | 68 | bool "CCID3 debugging messages" |
69 | depends on IP_DCCP_CCID3 | ||
68 | ---help--- | 70 | ---help--- |
69 | Enable CCID3-specific debugging messages. | 71 | Enable CCID3-specific debugging messages. |
70 | 72 | ||
@@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG | |||
74 | 76 | ||
75 | If in doubt, say N. | 77 | If in doubt, say N. |
76 | 78 | ||
77 | choice | ||
78 | prompt "Select method for measuring the packet size s" | ||
79 | default IP_DCCP_CCID3_MEASURE_S_AS_MPS | ||
80 | |||
81 | config IP_DCCP_CCID3_MEASURE_S_AS_MPS | ||
82 | bool "Always use MPS in place of s" | ||
83 | ---help--- | ||
84 | This use is recommended as it is consistent with the initialisation | ||
85 | of X and suggested when s varies (rfc3448bis, (1) in section 4.1). | ||
86 | config IP_DCCP_CCID3_MEASURE_S_AS_AVG | ||
87 | bool "Use moving average" | ||
88 | ---help--- | ||
89 | An alternative way of tracking s, also supported by rfc3448bis. | ||
90 | This used to be the default for CCID-3 in previous kernels. | ||
91 | config IP_DCCP_CCID3_MEASURE_S_AS_MAX | ||
92 | bool "Track the maximum payload length" | ||
93 | ---help--- | ||
94 | An experimental method based on tracking the maximum packet size. | ||
95 | endchoice | ||
96 | |||
97 | config IP_DCCP_CCID3_RTO | 79 | config IP_DCCP_CCID3_RTO |
98 | int "Use higher bound for nofeedback timer" | 80 | int "Use higher bound for nofeedback timer" |
99 | default 100 | 81 | default 100 |
82 | depends on IP_DCCP_CCID3 && EXPERIMENTAL | ||
100 | ---help--- | 83 | ---help--- |
101 | Use higher lower bound for nofeedback timer expiration. | 84 | Use higher lower bound for nofeedback timer expiration. |
102 | 85 | ||
@@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO | |||
123 | The purpose of the nofeedback timer is to slow DCCP down when there | 106 | The purpose of the nofeedback timer is to slow DCCP down when there |
124 | is serious network congestion: experimenting with larger values should | 107 | is serious network congestion: experimenting with larger values should |
125 | therefore not be performed on WANs. | 108 | therefore not be performed on WANs. |
126 | endif # IP_DCCP_CCID3 | ||
127 | 109 | ||
128 | config IP_DCCP_TFRC_LIB | 110 | config IP_DCCP_TFRC_LIB |
129 | tristate | 111 | tristate |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fa713227c66f..9a430734530c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -25,7 +25,7 @@ | |||
25 | /* | 25 | /* |
26 | * This implementation should follow RFC 4341 | 26 | * This implementation should follow RFC 4341 |
27 | */ | 27 | */ |
28 | #include "../feat.h" | 28 | |
29 | #include "../ccid.h" | 29 | #include "../ccid.h" |
30 | #include "../dccp.h" | 30 | #include "../dccp.h" |
31 | #include "ccid2.h" | 31 | #include "ccid2.h" |
@@ -34,8 +34,51 @@ | |||
34 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 34 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
35 | static int ccid2_debug; | 35 | static int ccid2_debug; |
36 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) | 36 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) |
37 | |||
38 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) | ||
39 | { | ||
40 | int len = 0; | ||
41 | int pipe = 0; | ||
42 | struct ccid2_seq *seqp = hctx->ccid2hctx_seqh; | ||
43 | |||
44 | /* there is data in the chain */ | ||
45 | if (seqp != hctx->ccid2hctx_seqt) { | ||
46 | seqp = seqp->ccid2s_prev; | ||
47 | len++; | ||
48 | if (!seqp->ccid2s_acked) | ||
49 | pipe++; | ||
50 | |||
51 | while (seqp != hctx->ccid2hctx_seqt) { | ||
52 | struct ccid2_seq *prev = seqp->ccid2s_prev; | ||
53 | |||
54 | len++; | ||
55 | if (!prev->ccid2s_acked) | ||
56 | pipe++; | ||
57 | |||
58 | /* packets are sent sequentially */ | ||
59 | BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, | ||
60 | prev->ccid2s_seq ) >= 0); | ||
61 | BUG_ON(time_before(seqp->ccid2s_sent, | ||
62 | prev->ccid2s_sent)); | ||
63 | |||
64 | seqp = prev; | ||
65 | } | ||
66 | } | ||
67 | |||
68 | BUG_ON(pipe != hctx->ccid2hctx_pipe); | ||
69 | ccid2_pr_debug("len of chain=%d\n", len); | ||
70 | |||
71 | do { | ||
72 | seqp = seqp->ccid2s_prev; | ||
73 | len++; | ||
74 | } while (seqp != hctx->ccid2hctx_seqh); | ||
75 | |||
76 | ccid2_pr_debug("total len=%d\n", len); | ||
77 | BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); | ||
78 | } | ||
37 | #else | 79 | #else |
38 | #define ccid2_pr_debug(format, a...) | 80 | #define ccid2_pr_debug(format, a...) |
81 | #define ccid2_hc_tx_check_sanity(hctx) | ||
39 | #endif | 82 | #endif |
40 | 83 | ||
41 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) | 84 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) |
@@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) | |||
44 | int i; | 87 | int i; |
45 | 88 | ||
46 | /* check if we have space to preserve the pointer to the buffer */ | 89 | /* check if we have space to preserve the pointer to the buffer */ |
47 | if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *)) | 90 | if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / |
91 | sizeof(struct ccid2_seq*))) | ||
48 | return -ENOMEM; | 92 | return -ENOMEM; |
49 | 93 | ||
50 | /* allocate buffer and initialize linked list */ | 94 | /* allocate buffer and initialize linked list */ |
@@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) | |||
60 | seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; | 104 | seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; |
61 | 105 | ||
62 | /* This is the first allocation. Initiate the head and tail. */ | 106 | /* This is the first allocation. Initiate the head and tail. */ |
63 | if (hctx->seqbufc == 0) | 107 | if (hctx->ccid2hctx_seqbufc == 0) |
64 | hctx->seqh = hctx->seqt = seqp; | 108 | hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; |
65 | else { | 109 | else { |
66 | /* link the existing list with the one we just created */ | 110 | /* link the existing list with the one we just created */ |
67 | hctx->seqh->ccid2s_next = seqp; | 111 | hctx->ccid2hctx_seqh->ccid2s_next = seqp; |
68 | seqp->ccid2s_prev = hctx->seqh; | 112 | seqp->ccid2s_prev = hctx->ccid2hctx_seqh; |
69 | 113 | ||
70 | hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; | 114 | hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; |
71 | seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt; | 115 | seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; |
72 | } | 116 | } |
73 | 117 | ||
74 | /* store the original pointer to the buffer so we can free it */ | 118 | /* store the original pointer to the buffer so we can free it */ |
75 | hctx->seqbuf[hctx->seqbufc] = seqp; | 119 | hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; |
76 | hctx->seqbufc++; | 120 | hctx->ccid2hctx_seqbufc++; |
77 | 121 | ||
78 | return 0; | 122 | return 0; |
79 | } | 123 | } |
80 | 124 | ||
81 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | 125 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
82 | { | 126 | { |
83 | if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) | 127 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
84 | return CCID_PACKET_WILL_DEQUEUE_LATER; | 128 | |
85 | return CCID_PACKET_SEND_AT_ONCE; | 129 | if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) |
130 | return 0; | ||
131 | |||
132 | return 1; /* XXX CCID should dequeue when ready instead of polling */ | ||
86 | } | 133 | } |
87 | 134 | ||
88 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | 135 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) |
89 | { | 136 | { |
90 | struct dccp_sock *dp = dccp_sk(sk); | 137 | struct dccp_sock *dp = dccp_sk(sk); |
91 | u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2); | 138 | u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2); |
92 | 139 | ||
93 | /* | 140 | /* |
94 | * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from | 141 | * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from |
@@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
100 | DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); | 147 | DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); |
101 | val = max_ratio; | 148 | val = max_ratio; |
102 | } | 149 | } |
103 | if (val > DCCPF_ACK_RATIO_MAX) | 150 | if (val > 0xFFFF) /* RFC 4340, 11.3 */ |
104 | val = DCCPF_ACK_RATIO_MAX; | 151 | val = 0xFFFF; |
105 | 152 | ||
106 | if (val == dp->dccps_l_ack_ratio) | 153 | if (val == dp->dccps_l_ack_ratio) |
107 | return; | 154 | return; |
@@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
110 | dp->dccps_l_ack_ratio = val; | 157 | dp->dccps_l_ack_ratio = val; |
111 | } | 158 | } |
112 | 159 | ||
160 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) | ||
161 | { | ||
162 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
163 | hctx->ccid2hctx_srtt = val; | ||
164 | } | ||
165 | |||
166 | static void ccid2_start_rto_timer(struct sock *sk); | ||
167 | |||
113 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 168 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
114 | { | 169 | { |
115 | struct sock *sk = (struct sock *)data; | 170 | struct sock *sk = (struct sock *)data; |
116 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 171 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
117 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); | 172 | long s; |
118 | 173 | ||
119 | bh_lock_sock(sk); | 174 | bh_lock_sock(sk); |
120 | if (sock_owned_by_user(sk)) { | 175 | if (sock_owned_by_user(sk)) { |
121 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5); | 176 | sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, |
177 | jiffies + HZ / 5); | ||
122 | goto out; | 178 | goto out; |
123 | } | 179 | } |
124 | 180 | ||
125 | ccid2_pr_debug("RTO_EXPIRE\n"); | 181 | ccid2_pr_debug("RTO_EXPIRE\n"); |
126 | 182 | ||
183 | ccid2_hc_tx_check_sanity(hctx); | ||
184 | |||
127 | /* back-off timer */ | 185 | /* back-off timer */ |
128 | hctx->rto <<= 1; | 186 | hctx->ccid2hctx_rto <<= 1; |
129 | if (hctx->rto > DCCP_RTO_MAX) | 187 | |
130 | hctx->rto = DCCP_RTO_MAX; | 188 | s = hctx->ccid2hctx_rto / HZ; |
189 | if (s > 60) | ||
190 | hctx->ccid2hctx_rto = 60 * HZ; | ||
191 | |||
192 | ccid2_start_rto_timer(sk); | ||
131 | 193 | ||
132 | /* adjust pipe, cwnd etc */ | 194 | /* adjust pipe, cwnd etc */ |
133 | hctx->ssthresh = hctx->cwnd / 2; | 195 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2; |
134 | if (hctx->ssthresh < 2) | 196 | if (hctx->ccid2hctx_ssthresh < 2) |
135 | hctx->ssthresh = 2; | 197 | hctx->ccid2hctx_ssthresh = 2; |
136 | hctx->cwnd = 1; | 198 | hctx->ccid2hctx_cwnd = 1; |
137 | hctx->pipe = 0; | 199 | hctx->ccid2hctx_pipe = 0; |
138 | 200 | ||
139 | /* clear state about stuff we sent */ | 201 | /* clear state about stuff we sent */ |
140 | hctx->seqt = hctx->seqh; | 202 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; |
141 | hctx->packets_acked = 0; | 203 | hctx->ccid2hctx_packets_acked = 0; |
142 | 204 | ||
143 | /* clear ack ratio state. */ | 205 | /* clear ack ratio state. */ |
144 | hctx->rpseq = 0; | 206 | hctx->ccid2hctx_rpseq = 0; |
145 | hctx->rpdupack = -1; | 207 | hctx->ccid2hctx_rpdupack = -1; |
146 | ccid2_change_l_ack_ratio(sk, 1); | 208 | ccid2_change_l_ack_ratio(sk, 1); |
147 | 209 | ccid2_hc_tx_check_sanity(hctx); | |
148 | /* if we were blocked before, we may now send cwnd=1 packet */ | ||
149 | if (sender_was_blocked) | ||
150 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
151 | /* restart backed-off timer */ | ||
152 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); | ||
153 | out: | 210 | out: |
154 | bh_unlock_sock(sk); | 211 | bh_unlock_sock(sk); |
155 | sock_put(sk); | 212 | sock_put(sk); |
156 | } | 213 | } |
157 | 214 | ||
158 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | 215 | static void ccid2_start_rto_timer(struct sock *sk) |
216 | { | ||
217 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | ||
218 | |||
219 | ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto); | ||
220 | |||
221 | BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer)); | ||
222 | sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, | ||
223 | jiffies + hctx->ccid2hctx_rto); | ||
224 | } | ||
225 | |||
226 | static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | ||
159 | { | 227 | { |
160 | struct dccp_sock *dp = dccp_sk(sk); | 228 | struct dccp_sock *dp = dccp_sk(sk); |
161 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 229 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
162 | struct ccid2_seq *next; | 230 | struct ccid2_seq *next; |
163 | 231 | ||
164 | hctx->pipe++; | 232 | hctx->ccid2hctx_pipe++; |
165 | 233 | ||
166 | hctx->seqh->ccid2s_seq = dp->dccps_gss; | 234 | hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss; |
167 | hctx->seqh->ccid2s_acked = 0; | 235 | hctx->ccid2hctx_seqh->ccid2s_acked = 0; |
168 | hctx->seqh->ccid2s_sent = jiffies; | 236 | hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; |
169 | 237 | ||
170 | next = hctx->seqh->ccid2s_next; | 238 | next = hctx->ccid2hctx_seqh->ccid2s_next; |
171 | /* check if we need to alloc more space */ | 239 | /* check if we need to alloc more space */ |
172 | if (next == hctx->seqt) { | 240 | if (next == hctx->ccid2hctx_seqt) { |
173 | if (ccid2_hc_tx_alloc_seq(hctx)) { | 241 | if (ccid2_hc_tx_alloc_seq(hctx)) { |
174 | DCCP_CRIT("packet history - out of memory!"); | 242 | DCCP_CRIT("packet history - out of memory!"); |
175 | /* FIXME: find a more graceful way to bail out */ | 243 | /* FIXME: find a more graceful way to bail out */ |
176 | return; | 244 | return; |
177 | } | 245 | } |
178 | next = hctx->seqh->ccid2s_next; | 246 | next = hctx->ccid2hctx_seqh->ccid2s_next; |
179 | BUG_ON(next == hctx->seqt); | 247 | BUG_ON(next == hctx->ccid2hctx_seqt); |
180 | } | 248 | } |
181 | hctx->seqh = next; | 249 | hctx->ccid2hctx_seqh = next; |
182 | 250 | ||
183 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe); | 251 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, |
252 | hctx->ccid2hctx_pipe); | ||
184 | 253 | ||
185 | /* | 254 | /* |
186 | * FIXME: The code below is broken and the variables have been removed | 255 | * FIXME: The code below is broken and the variables have been removed |
@@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | |||
203 | */ | 272 | */ |
204 | #if 0 | 273 | #if 0 |
205 | /* Ack Ratio. Need to maintain a concept of how many windows we sent */ | 274 | /* Ack Ratio. Need to maintain a concept of how many windows we sent */ |
206 | hctx->arsent++; | 275 | hctx->ccid2hctx_arsent++; |
207 | /* We had an ack loss in this window... */ | 276 | /* We had an ack loss in this window... */ |
208 | if (hctx->ackloss) { | 277 | if (hctx->ccid2hctx_ackloss) { |
209 | if (hctx->arsent >= hctx->cwnd) { | 278 | if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) { |
210 | hctx->arsent = 0; | 279 | hctx->ccid2hctx_arsent = 0; |
211 | hctx->ackloss = 0; | 280 | hctx->ccid2hctx_ackloss = 0; |
212 | } | 281 | } |
213 | } else { | 282 | } else { |
214 | /* No acks lost up to now... */ | 283 | /* No acks lost up to now... */ |
@@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | |||
218 | int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - | 287 | int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - |
219 | dp->dccps_l_ack_ratio; | 288 | dp->dccps_l_ack_ratio; |
220 | 289 | ||
221 | denom = hctx->cwnd * hctx->cwnd / denom; | 290 | denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom; |
222 | 291 | ||
223 | if (hctx->arsent >= denom) { | 292 | if (hctx->ccid2hctx_arsent >= denom) { |
224 | ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); | 293 | ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); |
225 | hctx->arsent = 0; | 294 | hctx->ccid2hctx_arsent = 0; |
226 | } | 295 | } |
227 | } else { | 296 | } else { |
228 | /* we can't increase ack ratio further [1] */ | 297 | /* we can't increase ack ratio further [1] */ |
229 | hctx->arsent = 0; /* or maybe set it to cwnd*/ | 298 | hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ |
230 | } | 299 | } |
231 | } | 300 | } |
232 | #endif | 301 | #endif |
233 | 302 | ||
234 | /* setup RTO timer */ | 303 | /* setup RTO timer */ |
235 | if (!timer_pending(&hctx->rtotimer)) | 304 | if (!timer_pending(&hctx->ccid2hctx_rtotimer)) |
236 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); | 305 | ccid2_start_rto_timer(sk); |
237 | 306 | ||
238 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 307 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
239 | do { | 308 | do { |
240 | struct ccid2_seq *seqp = hctx->seqt; | 309 | struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; |
241 | 310 | ||
242 | while (seqp != hctx->seqh) { | 311 | while (seqp != hctx->ccid2hctx_seqh) { |
243 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", | 312 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", |
244 | (unsigned long long)seqp->ccid2s_seq, | 313 | (unsigned long long)seqp->ccid2s_seq, |
245 | seqp->ccid2s_acked, seqp->ccid2s_sent); | 314 | seqp->ccid2s_acked, seqp->ccid2s_sent); |
@@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | |||
247 | } | 316 | } |
248 | } while (0); | 317 | } while (0); |
249 | ccid2_pr_debug("=========\n"); | 318 | ccid2_pr_debug("=========\n"); |
319 | ccid2_hc_tx_check_sanity(hctx); | ||
250 | #endif | 320 | #endif |
251 | } | 321 | } |
252 | 322 | ||
253 | /** | 323 | /* XXX Lame code duplication! |
254 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm | 324 | * returns -1 if none was found. |
255 | * This code is almost identical with TCP's tcp_rtt_estimator(), since | 325 | * else returns the next offset to use in the function call. |
256 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
257 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
258 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
259 | * which suggests that the gain should be set according to window size), | ||
260 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
261 | */ | 326 | */ |
262 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) | 327 | static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, |
328 | unsigned char **vec, unsigned char *veclen) | ||
263 | { | 329 | { |
264 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 330 | const struct dccp_hdr *dh = dccp_hdr(skb); |
265 | long m = mrtt ? : 1; | 331 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); |
266 | 332 | unsigned char *opt_ptr; | |
267 | if (hctx->srtt == 0) { | 333 | const unsigned char *opt_end = (unsigned char *)dh + |
268 | /* First measurement m */ | 334 | (dh->dccph_doff * 4); |
269 | hctx->srtt = m << 3; | 335 | unsigned char opt, len; |
270 | hctx->mdev = m << 1; | 336 | unsigned char *value; |
271 | 337 | ||
272 | hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev); | 338 | BUG_ON(offset < 0); |
273 | hctx->rttvar = hctx->mdev_max; | 339 | options += offset; |
274 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | 340 | opt_ptr = options; |
275 | } else { | 341 | if (opt_ptr >= opt_end) |
276 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | 342 | return -1; |
277 | m -= (hctx->srtt >> 3); | 343 | |
278 | hctx->srtt += m; | 344 | while (opt_ptr != opt_end) { |
279 | 345 | opt = *opt_ptr++; | |
280 | /* Similarly, update scaled mdev with regard to |m| */ | 346 | len = 0; |
281 | if (m < 0) { | 347 | value = NULL; |
282 | m = -m; | 348 | |
283 | m -= (hctx->mdev >> 2); | 349 | /* Check if this isn't a single byte option */ |
350 | if (opt > DCCPO_MAX_RESERVED) { | ||
351 | if (opt_ptr == opt_end) | ||
352 | goto out_invalid_option; | ||
353 | |||
354 | len = *opt_ptr++; | ||
355 | if (len < 3) | ||
356 | goto out_invalid_option; | ||
284 | /* | 357 | /* |
285 | * This neutralises RTO increase when RTT < SRTT - mdev | 358 | * Remove the type and len fields, leaving |
286 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control | 359 | * just the value size |
287 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
288 | */ | 360 | */ |
289 | if (m > 0) | 361 | len -= 2; |
290 | m >>= 3; | 362 | value = opt_ptr; |
291 | } else { | 363 | opt_ptr += len; |
292 | m -= (hctx->mdev >> 2); | ||
293 | } | ||
294 | hctx->mdev += m; | ||
295 | 364 | ||
296 | if (hctx->mdev > hctx->mdev_max) { | 365 | if (opt_ptr > opt_end) |
297 | hctx->mdev_max = hctx->mdev; | 366 | goto out_invalid_option; |
298 | if (hctx->mdev_max > hctx->rttvar) | ||
299 | hctx->rttvar = hctx->mdev_max; | ||
300 | } | 367 | } |
301 | 368 | ||
302 | /* | 369 | switch (opt) { |
303 | * Decay RTTVAR at most once per flight, exploiting that | 370 | case DCCPO_ACK_VECTOR_0: |
304 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) | 371 | case DCCPO_ACK_VECTOR_1: |
305 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) | 372 | *vec = value; |
306 | * GAR is a useful bound for FlightSize = pipe, AWL is probably | 373 | *veclen = len; |
307 | * too low as it over-estimates pipe. | 374 | return offset + (opt_ptr - options); |
308 | */ | ||
309 | if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) { | ||
310 | if (hctx->mdev_max < hctx->rttvar) | ||
311 | hctx->rttvar -= (hctx->rttvar - | ||
312 | hctx->mdev_max) >> 2; | ||
313 | hctx->rtt_seq = dccp_sk(sk)->dccps_gss; | ||
314 | hctx->mdev_max = TCP_RTO_MIN; | ||
315 | } | 375 | } |
316 | } | 376 | } |
317 | 377 | ||
318 | /* | 378 | return -1; |
319 | * Set RTO from SRTT and RTTVAR | ||
320 | * Clock granularity is ignored since the minimum error for RTTVAR is | ||
321 | * clamped to 50msec (corresponding to HZ=20). This leads to a minimum | ||
322 | * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP | ||
323 | * does not retransmit data, DCCP does not require TCP's recommended | ||
324 | * minimum timeout of one second". | ||
325 | */ | ||
326 | hctx->rto = (hctx->srtt >> 3) + hctx->rttvar; | ||
327 | 379 | ||
328 | if (hctx->rto > DCCP_RTO_MAX) | 380 | out_invalid_option: |
329 | hctx->rto = DCCP_RTO_MAX; | 381 | DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); |
382 | return -1; | ||
330 | } | 383 | } |
331 | 384 | ||
332 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, | 385 | static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) |
333 | unsigned int *maxincr) | ||
334 | { | 386 | { |
335 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 387 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
336 | 388 | ||
337 | if (hctx->cwnd < hctx->ssthresh) { | 389 | sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer); |
338 | if (*maxincr > 0 && ++hctx->packets_acked == 2) { | 390 | ccid2_pr_debug("deleted RTO timer\n"); |
339 | hctx->cwnd += 1; | ||
340 | *maxincr -= 1; | ||
341 | hctx->packets_acked = 0; | ||
342 | } | ||
343 | } else if (++hctx->packets_acked >= hctx->cwnd) { | ||
344 | hctx->cwnd += 1; | ||
345 | hctx->packets_acked = 0; | ||
346 | } | ||
347 | /* | ||
348 | * FIXME: RTT is sampled several times per acknowledgment (for each | ||
349 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). | ||
350 | * This causes the RTT to be over-estimated, since the older entries | ||
351 | * in the Ack Vector have earlier sending times. | ||
352 | * The cleanest solution is to not use the ccid2s_sent field at all | ||
353 | * and instead use DCCP timestamps - need to be resolved at some time. | ||
354 | */ | ||
355 | ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); | ||
356 | } | 391 | } |
357 | 392 | ||
358 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 393 | static inline void ccid2_new_ack(struct sock *sk, |
394 | struct ccid2_seq *seqp, | ||
395 | unsigned int *maxincr) | ||
359 | { | 396 | { |
360 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 397 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
361 | 398 | ||
362 | if (time_before(seqp->ccid2s_sent, hctx->last_cong)) { | 399 | if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { |
363 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | 400 | if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) { |
364 | return; | 401 | hctx->ccid2hctx_cwnd += 1; |
402 | *maxincr -= 1; | ||
403 | hctx->ccid2hctx_packets_acked = 0; | ||
404 | } | ||
405 | } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) { | ||
406 | hctx->ccid2hctx_cwnd += 1; | ||
407 | hctx->ccid2hctx_packets_acked = 0; | ||
365 | } | 408 | } |
366 | 409 | ||
367 | hctx->last_cong = jiffies; | 410 | /* update RTO */ |
411 | if (hctx->ccid2hctx_srtt == -1 || | ||
412 | time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { | ||
413 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | ||
414 | int s; | ||
415 | |||
416 | /* first measurement */ | ||
417 | if (hctx->ccid2hctx_srtt == -1) { | ||
418 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
419 | r, jiffies, | ||
420 | (unsigned long long)seqp->ccid2s_seq); | ||
421 | ccid2_change_srtt(hctx, r); | ||
422 | hctx->ccid2hctx_rttvar = r >> 1; | ||
423 | } else { | ||
424 | /* RTTVAR */ | ||
425 | long tmp = hctx->ccid2hctx_srtt - r; | ||
426 | long srtt; | ||
427 | |||
428 | if (tmp < 0) | ||
429 | tmp *= -1; | ||
430 | |||
431 | tmp >>= 2; | ||
432 | hctx->ccid2hctx_rttvar *= 3; | ||
433 | hctx->ccid2hctx_rttvar >>= 2; | ||
434 | hctx->ccid2hctx_rttvar += tmp; | ||
435 | |||
436 | /* SRTT */ | ||
437 | srtt = hctx->ccid2hctx_srtt; | ||
438 | srtt *= 7; | ||
439 | srtt >>= 3; | ||
440 | tmp = r >> 3; | ||
441 | srtt += tmp; | ||
442 | ccid2_change_srtt(hctx, srtt); | ||
443 | } | ||
444 | s = hctx->ccid2hctx_rttvar << 2; | ||
445 | /* clock granularity is 1 when based on jiffies */ | ||
446 | if (!s) | ||
447 | s = 1; | ||
448 | hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s; | ||
449 | |||
450 | /* must be at least a second */ | ||
451 | s = hctx->ccid2hctx_rto / HZ; | ||
452 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
453 | #if 1 | ||
454 | if (s < 1) | ||
455 | hctx->ccid2hctx_rto = HZ; | ||
456 | #endif | ||
457 | /* max 60 seconds */ | ||
458 | if (s > 60) | ||
459 | hctx->ccid2hctx_rto = HZ * 60; | ||
368 | 460 | ||
369 | hctx->cwnd = hctx->cwnd / 2 ? : 1U; | 461 | hctx->ccid2hctx_lastrtt = jiffies; |
370 | hctx->ssthresh = max(hctx->cwnd, 2U); | ||
371 | 462 | ||
372 | /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ | 463 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", |
373 | if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd) | 464 | hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, |
374 | ccid2_change_l_ack_ratio(sk, hctx->cwnd); | 465 | hctx->ccid2hctx_rto, HZ, r); |
466 | } | ||
467 | |||
468 | /* we got a new ack, so re-start RTO timer */ | ||
469 | ccid2_hc_tx_kill_rto_timer(sk); | ||
470 | ccid2_start_rto_timer(sk); | ||
375 | } | 471 | } |
376 | 472 | ||
377 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, | 473 | static void ccid2_hc_tx_dec_pipe(struct sock *sk) |
378 | u8 option, u8 *optval, u8 optlen) | ||
379 | { | 474 | { |
380 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 475 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
381 | 476 | ||
382 | switch (option) { | 477 | if (hctx->ccid2hctx_pipe == 0) |
383 | case DCCPO_ACK_VECTOR_0: | 478 | DCCP_BUG("pipe == 0"); |
384 | case DCCPO_ACK_VECTOR_1: | 479 | else |
385 | return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen, | 480 | hctx->ccid2hctx_pipe--; |
386 | option - DCCPO_ACK_VECTOR_0); | 481 | |
482 | if (hctx->ccid2hctx_pipe == 0) | ||
483 | ccid2_hc_tx_kill_rto_timer(sk); | ||
484 | } | ||
485 | |||
486 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | ||
487 | { | ||
488 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | ||
489 | |||
490 | if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { | ||
491 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | ||
492 | return; | ||
387 | } | 493 | } |
388 | return 0; | 494 | |
495 | hctx->ccid2hctx_last_cong = jiffies; | ||
496 | |||
497 | hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U; | ||
498 | hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U); | ||
499 | |||
500 | /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ | ||
501 | if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd) | ||
502 | ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd); | ||
389 | } | 503 | } |
390 | 504 | ||
391 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 505 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
392 | { | 506 | { |
393 | struct dccp_sock *dp = dccp_sk(sk); | 507 | struct dccp_sock *dp = dccp_sk(sk); |
394 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 508 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
395 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); | ||
396 | struct dccp_ackvec_parsed *avp; | ||
397 | u64 ackno, seqno; | 509 | u64 ackno, seqno; |
398 | struct ccid2_seq *seqp; | 510 | struct ccid2_seq *seqp; |
511 | unsigned char *vector; | ||
512 | unsigned char veclen; | ||
513 | int offset = 0; | ||
399 | int done = 0; | 514 | int done = 0; |
400 | unsigned int maxincr = 0; | 515 | unsigned int maxincr = 0; |
401 | 516 | ||
517 | ccid2_hc_tx_check_sanity(hctx); | ||
402 | /* check reverse path congestion */ | 518 | /* check reverse path congestion */ |
403 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 519 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
404 | 520 | ||
@@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
407 | * -sorbo. | 523 | * -sorbo. |
408 | */ | 524 | */ |
409 | /* need to bootstrap */ | 525 | /* need to bootstrap */ |
410 | if (hctx->rpdupack == -1) { | 526 | if (hctx->ccid2hctx_rpdupack == -1) { |
411 | hctx->rpdupack = 0; | 527 | hctx->ccid2hctx_rpdupack = 0; |
412 | hctx->rpseq = seqno; | 528 | hctx->ccid2hctx_rpseq = seqno; |
413 | } else { | 529 | } else { |
414 | /* check if packet is consecutive */ | 530 | /* check if packet is consecutive */ |
415 | if (dccp_delta_seqno(hctx->rpseq, seqno) == 1) | 531 | if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) |
416 | hctx->rpseq = seqno; | 532 | hctx->ccid2hctx_rpseq = seqno; |
417 | /* it's a later packet */ | 533 | /* it's a later packet */ |
418 | else if (after48(seqno, hctx->rpseq)) { | 534 | else if (after48(seqno, hctx->ccid2hctx_rpseq)) { |
419 | hctx->rpdupack++; | 535 | hctx->ccid2hctx_rpdupack++; |
420 | 536 | ||
421 | /* check if we got enough dupacks */ | 537 | /* check if we got enough dupacks */ |
422 | if (hctx->rpdupack >= NUMDUPACK) { | 538 | if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) { |
423 | hctx->rpdupack = -1; /* XXX lame */ | 539 | hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ |
424 | hctx->rpseq = 0; | 540 | hctx->ccid2hctx_rpseq = 0; |
425 | 541 | ||
426 | ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); | 542 | ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); |
427 | } | 543 | } |
@@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
429 | } | 545 | } |
430 | 546 | ||
431 | /* check forward path congestion */ | 547 | /* check forward path congestion */ |
432 | if (dccp_packet_without_ack(skb)) | 548 | /* still didn't send out new data packets */ |
549 | if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) | ||
433 | return; | 550 | return; |
434 | 551 | ||
435 | /* still didn't send out new data packets */ | 552 | switch (DCCP_SKB_CB(skb)->dccpd_type) { |
436 | if (hctx->seqh == hctx->seqt) | 553 | case DCCP_PKT_ACK: |
437 | goto done; | 554 | case DCCP_PKT_DATAACK: |
555 | break; | ||
556 | default: | ||
557 | return; | ||
558 | } | ||
438 | 559 | ||
439 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | 560 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; |
440 | if (after48(ackno, hctx->high_ack)) | 561 | if (after48(ackno, hctx->ccid2hctx_high_ack)) |
441 | hctx->high_ack = ackno; | 562 | hctx->ccid2hctx_high_ack = ackno; |
442 | 563 | ||
443 | seqp = hctx->seqt; | 564 | seqp = hctx->ccid2hctx_seqt; |
444 | while (before48(seqp->ccid2s_seq, ackno)) { | 565 | while (before48(seqp->ccid2s_seq, ackno)) { |
445 | seqp = seqp->ccid2s_next; | 566 | seqp = seqp->ccid2s_next; |
446 | if (seqp == hctx->seqh) { | 567 | if (seqp == hctx->ccid2hctx_seqh) { |
447 | seqp = hctx->seqh->ccid2s_prev; | 568 | seqp = hctx->ccid2hctx_seqh->ccid2s_prev; |
448 | break; | 569 | break; |
449 | } | 570 | } |
450 | } | 571 | } |
@@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
454 | * packets per acknowledgement. Rounding up avoids that cwnd is not | 575 | * packets per acknowledgement. Rounding up avoids that cwnd is not |
455 | * advanced when Ack Ratio is 1 and gives a slight edge otherwise. | 576 | * advanced when Ack Ratio is 1 and gives a slight edge otherwise. |
456 | */ | 577 | */ |
457 | if (hctx->cwnd < hctx->ssthresh) | 578 | if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) |
458 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); | 579 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
459 | 580 | ||
460 | /* go through all ack vectors */ | 581 | /* go through all ack vectors */ |
461 | list_for_each_entry(avp, &hctx->av_chunks, node) { | 582 | while ((offset = ccid2_ackvector(sk, skb, offset, |
583 | &vector, &veclen)) != -1) { | ||
462 | /* go through this ack vector */ | 584 | /* go through this ack vector */ |
463 | for (; avp->len--; avp->vec++) { | 585 | while (veclen--) { |
464 | u64 ackno_end_rl = SUB48(ackno, | 586 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; |
465 | dccp_ackvec_runlen(avp->vec)); | 587 | u64 ackno_end_rl = SUB48(ackno, rl); |
466 | 588 | ||
467 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", | 589 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", |
468 | (unsigned long long)ackno, | 590 | (unsigned long long)ackno, |
469 | dccp_ackvec_state(avp->vec) >> 6, | 591 | (unsigned long long)ackno_end_rl); |
470 | dccp_ackvec_runlen(avp->vec)); | ||
471 | /* if the seqno we are analyzing is larger than the | 592 | /* if the seqno we are analyzing is larger than the |
472 | * current ackno, then move towards the tail of our | 593 | * current ackno, then move towards the tail of our |
473 | * seqnos. | 594 | * seqnos. |
474 | */ | 595 | */ |
475 | while (after48(seqp->ccid2s_seq, ackno)) { | 596 | while (after48(seqp->ccid2s_seq, ackno)) { |
476 | if (seqp == hctx->seqt) { | 597 | if (seqp == hctx->ccid2hctx_seqt) { |
477 | done = 1; | 598 | done = 1; |
478 | break; | 599 | break; |
479 | } | 600 | } |
@@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
486 | * run length | 607 | * run length |
487 | */ | 608 | */ |
488 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 609 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
489 | const u8 state = dccp_ackvec_state(avp->vec); | 610 | const u8 state = *vector & |
611 | DCCP_ACKVEC_STATE_MASK; | ||
490 | 612 | ||
491 | /* new packet received or marked */ | 613 | /* new packet received or marked */ |
492 | if (state != DCCPAV_NOT_RECEIVED && | 614 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && |
493 | !seqp->ccid2s_acked) { | 615 | !seqp->ccid2s_acked) { |
494 | if (state == DCCPAV_ECN_MARKED) | 616 | if (state == |
617 | DCCP_ACKVEC_STATE_ECN_MARKED) { | ||
495 | ccid2_congestion_event(sk, | 618 | ccid2_congestion_event(sk, |
496 | seqp); | 619 | seqp); |
497 | else | 620 | } else |
498 | ccid2_new_ack(sk, seqp, | 621 | ccid2_new_ack(sk, seqp, |
499 | &maxincr); | 622 | &maxincr); |
500 | 623 | ||
501 | seqp->ccid2s_acked = 1; | 624 | seqp->ccid2s_acked = 1; |
502 | ccid2_pr_debug("Got ack for %llu\n", | 625 | ccid2_pr_debug("Got ack for %llu\n", |
503 | (unsigned long long)seqp->ccid2s_seq); | 626 | (unsigned long long)seqp->ccid2s_seq); |
504 | hctx->pipe--; | 627 | ccid2_hc_tx_dec_pipe(sk); |
505 | } | 628 | } |
506 | if (seqp == hctx->seqt) { | 629 | if (seqp == hctx->ccid2hctx_seqt) { |
507 | done = 1; | 630 | done = 1; |
508 | break; | 631 | break; |
509 | } | 632 | } |
@@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
513 | break; | 636 | break; |
514 | 637 | ||
515 | ackno = SUB48(ackno_end_rl, 1); | 638 | ackno = SUB48(ackno_end_rl, 1); |
639 | vector++; | ||
516 | } | 640 | } |
517 | if (done) | 641 | if (done) |
518 | break; | 642 | break; |
@@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
521 | /* The state about what is acked should be correct now | 645 | /* The state about what is acked should be correct now |
522 | * Check for NUMDUPACK | 646 | * Check for NUMDUPACK |
523 | */ | 647 | */ |
524 | seqp = hctx->seqt; | 648 | seqp = hctx->ccid2hctx_seqt; |
525 | while (before48(seqp->ccid2s_seq, hctx->high_ack)) { | 649 | while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { |
526 | seqp = seqp->ccid2s_next; | 650 | seqp = seqp->ccid2s_next; |
527 | if (seqp == hctx->seqh) { | 651 | if (seqp == hctx->ccid2hctx_seqh) { |
528 | seqp = hctx->seqh->ccid2s_prev; | 652 | seqp = hctx->ccid2hctx_seqh->ccid2s_prev; |
529 | break; | 653 | break; |
530 | } | 654 | } |
531 | } | 655 | } |
@@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
536 | if (done == NUMDUPACK) | 660 | if (done == NUMDUPACK) |
537 | break; | 661 | break; |
538 | } | 662 | } |
539 | if (seqp == hctx->seqt) | 663 | if (seqp == hctx->ccid2hctx_seqt) |
540 | break; | 664 | break; |
541 | seqp = seqp->ccid2s_prev; | 665 | seqp = seqp->ccid2s_prev; |
542 | } | 666 | } |
@@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
557 | * one ack vector. | 681 | * one ack vector. |
558 | */ | 682 | */ |
559 | ccid2_congestion_event(sk, seqp); | 683 | ccid2_congestion_event(sk, seqp); |
560 | hctx->pipe--; | 684 | ccid2_hc_tx_dec_pipe(sk); |
561 | } | 685 | } |
562 | if (seqp == hctx->seqt) | 686 | if (seqp == hctx->ccid2hctx_seqt) |
563 | break; | 687 | break; |
564 | seqp = seqp->ccid2s_prev; | 688 | seqp = seqp->ccid2s_prev; |
565 | } | 689 | } |
566 | 690 | ||
567 | hctx->seqt = last_acked; | 691 | hctx->ccid2hctx_seqt = last_acked; |
568 | } | 692 | } |
569 | 693 | ||
570 | /* trim acked packets in tail */ | 694 | /* trim acked packets in tail */ |
571 | while (hctx->seqt != hctx->seqh) { | 695 | while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) { |
572 | if (!hctx->seqt->ccid2s_acked) | 696 | if (!hctx->ccid2hctx_seqt->ccid2s_acked) |
573 | break; | 697 | break; |
574 | 698 | ||
575 | hctx->seqt = hctx->seqt->ccid2s_next; | 699 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; |
576 | } | 700 | } |
577 | 701 | ||
578 | /* restart RTO timer if not all outstanding data has been acked */ | 702 | ccid2_hc_tx_check_sanity(hctx); |
579 | if (hctx->pipe == 0) | ||
580 | sk_stop_timer(sk, &hctx->rtotimer); | ||
581 | else | ||
582 | sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); | ||
583 | done: | ||
584 | /* check if incoming Acks allow pending packets to be sent */ | ||
585 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) | ||
586 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
587 | dccp_ackvec_parsed_cleanup(&hctx->av_chunks); | ||
588 | } | 703 | } |
589 | 704 | ||
590 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 705 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
594 | u32 max_ratio; | 709 | u32 max_ratio; |
595 | 710 | ||
596 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ | 711 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ |
597 | hctx->ssthresh = ~0U; | 712 | hctx->ccid2hctx_ssthresh = ~0U; |
598 | 713 | ||
599 | /* Use larger initial windows (RFC 3390, rfc2581bis) */ | 714 | /* |
600 | hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); | 715 | * RFC 4341, 5: "The cwnd parameter is initialized to at most four |
716 | * packets for new connections, following the rules from [RFC3390]". | ||
717 | * We need to convert the bytes of RFC3390 into the packets of RFC 4341. | ||
718 | */ | ||
719 | hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); | ||
601 | 720 | ||
602 | /* Make sure that Ack Ratio is enabled and within bounds. */ | 721 | /* Make sure that Ack Ratio is enabled and within bounds. */ |
603 | max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); | 722 | max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); |
604 | if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) | 723 | if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) |
605 | dp->dccps_l_ack_ratio = max_ratio; | 724 | dp->dccps_l_ack_ratio = max_ratio; |
606 | 725 | ||
@@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
608 | if (ccid2_hc_tx_alloc_seq(hctx)) | 727 | if (ccid2_hc_tx_alloc_seq(hctx)) |
609 | return -ENOMEM; | 728 | return -ENOMEM; |
610 | 729 | ||
611 | hctx->rto = DCCP_TIMEOUT_INIT; | 730 | hctx->ccid2hctx_rto = 3 * HZ; |
612 | hctx->rpdupack = -1; | 731 | ccid2_change_srtt(hctx, -1); |
613 | hctx->last_cong = jiffies; | 732 | hctx->ccid2hctx_rttvar = -1; |
614 | setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); | 733 | hctx->ccid2hctx_rpdupack = -1; |
615 | INIT_LIST_HEAD(&hctx->av_chunks); | 734 | hctx->ccid2hctx_last_cong = jiffies; |
735 | setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire, | ||
736 | (unsigned long)sk); | ||
737 | |||
738 | ccid2_hc_tx_check_sanity(hctx); | ||
616 | return 0; | 739 | return 0; |
617 | } | 740 | } |
618 | 741 | ||
@@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk) | |||
621 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 744 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
622 | int i; | 745 | int i; |
623 | 746 | ||
624 | sk_stop_timer(sk, &hctx->rtotimer); | 747 | ccid2_hc_tx_kill_rto_timer(sk); |
625 | 748 | ||
626 | for (i = 0; i < hctx->seqbufc; i++) | 749 | for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) |
627 | kfree(hctx->seqbuf[i]); | 750 | kfree(hctx->ccid2hctx_seqbuf[i]); |
628 | hctx->seqbufc = 0; | 751 | hctx->ccid2hctx_seqbufc = 0; |
629 | } | 752 | } |
630 | 753 | ||
631 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | 754 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
@@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
636 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 759 | switch (DCCP_SKB_CB(skb)->dccpd_type) { |
637 | case DCCP_PKT_DATA: | 760 | case DCCP_PKT_DATA: |
638 | case DCCP_PKT_DATAACK: | 761 | case DCCP_PKT_DATAACK: |
639 | hcrx->data++; | 762 | hcrx->ccid2hcrx_data++; |
640 | if (hcrx->data >= dp->dccps_r_ack_ratio) { | 763 | if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) { |
641 | dccp_send_ack(sk); | 764 | dccp_send_ack(sk); |
642 | hcrx->data = 0; | 765 | hcrx->ccid2hcrx_data = 0; |
643 | } | 766 | } |
644 | break; | 767 | break; |
645 | } | 768 | } |
646 | } | 769 | } |
647 | 770 | ||
648 | static struct ccid_operations ccid2 = { | 771 | static struct ccid_operations ccid2 = { |
649 | .ccid_id = DCCPC_CCID2, | 772 | .ccid_id = DCCPC_CCID2, |
650 | .ccid_name = "TCP-like", | 773 | .ccid_name = "TCP-like", |
651 | .ccid_owner = THIS_MODULE, | 774 | .ccid_owner = THIS_MODULE, |
652 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 775 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
653 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 776 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
654 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | 777 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, |
655 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | 778 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, |
656 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | 779 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, |
657 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, | 780 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, |
658 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | 781 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), |
659 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | 782 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, |
660 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | ||
661 | }; | 783 | }; |
662 | 784 | ||
663 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 785 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 8b7a2dee2f6d..2c94ca029010 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -42,49 +42,34 @@ struct ccid2_seq { | |||
42 | 42 | ||
43 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection | 43 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection |
44 | * | 44 | * |
45 | * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 | 45 | * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 |
46 | * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465) | 46 | * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465) |
47 | * @srtt: smoothed RTT estimate, scaled by 2^3 | 47 | * @ccid2hctx_lastrtt -time RTT was last measured |
48 | * @mdev: smoothed RTT variation, scaled by 2^2 | 48 | * @ccid2hctx_rpseq - last consecutive seqno |
49 | * @mdev_max: maximum of @mdev during one flight | 49 | * @ccid2hctx_rpdupack - dupacks since rpseq |
50 | * @rttvar: moving average/maximum of @mdev_max | 50 | */ |
51 | * @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) | ||
52 | * @rtt_seq: to decay RTTVAR at most once per flight | ||
53 | * @rpseq: last consecutive seqno | ||
54 | * @rpdupack: dupacks since rpseq | ||
55 | * @av_chunks: list of Ack Vectors received on current skb | ||
56 | */ | ||
57 | struct ccid2_hc_tx_sock { | 51 | struct ccid2_hc_tx_sock { |
58 | u32 cwnd; | 52 | u32 ccid2hctx_cwnd; |
59 | u32 ssthresh; | 53 | u32 ccid2hctx_ssthresh; |
60 | u32 pipe; | 54 | u32 ccid2hctx_pipe; |
61 | u32 packets_acked; | 55 | u32 ccid2hctx_packets_acked; |
62 | struct ccid2_seq *seqbuf[CCID2_SEQBUF_MAX]; | 56 | struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; |
63 | int seqbufc; | 57 | int ccid2hctx_seqbufc; |
64 | struct ccid2_seq *seqh; | 58 | struct ccid2_seq *ccid2hctx_seqh; |
65 | struct ccid2_seq *seqt; | 59 | struct ccid2_seq *ccid2hctx_seqt; |
66 | /* RTT measurement: variables/principles are the same as in TCP */ | 60 | long ccid2hctx_rto; |
67 | u32 srtt, | 61 | long ccid2hctx_srtt; |
68 | mdev, | 62 | long ccid2hctx_rttvar; |
69 | mdev_max, | 63 | unsigned long ccid2hctx_lastrtt; |
70 | rttvar, | 64 | struct timer_list ccid2hctx_rtotimer; |
71 | rto; | 65 | u64 ccid2hctx_rpseq; |
72 | u64 rtt_seq:48; | 66 | int ccid2hctx_rpdupack; |
73 | struct timer_list rtotimer; | 67 | unsigned long ccid2hctx_last_cong; |
74 | u64 rpseq; | 68 | u64 ccid2hctx_high_ack; |
75 | int rpdupack; | ||
76 | unsigned long last_cong; | ||
77 | u64 high_ack; | ||
78 | struct list_head av_chunks; | ||
79 | }; | 69 | }; |
80 | 70 | ||
81 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx) | ||
82 | { | ||
83 | return (hctx->pipe >= hctx->cwnd); | ||
84 | } | ||
85 | |||
86 | struct ccid2_hc_rx_sock { | 71 | struct ccid2_hc_rx_sock { |
87 | int data; | 72 | int ccid2hcrx_data; |
88 | }; | 73 | }; |
89 | 74 | ||
90 | static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) | 75 | static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 06cfdad84a6a..3b8bd7ca6761 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -49,41 +49,75 @@ static int ccid3_debug; | |||
49 | /* | 49 | /* |
50 | * Transmitter Half-Connection Routines | 50 | * Transmitter Half-Connection Routines |
51 | */ | 51 | */ |
52 | /* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */ | 52 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
53 | static int do_osc_prev = true; | 53 | static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) |
54 | { | ||
55 | static char *ccid3_state_names[] = { | ||
56 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | ||
57 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | ||
58 | [TFRC_SSTATE_FBACK] = "FBACK", | ||
59 | [TFRC_SSTATE_TERM] = "TERM", | ||
60 | }; | ||
61 | |||
62 | return ccid3_state_names[state]; | ||
63 | } | ||
64 | #endif | ||
65 | |||
66 | static void ccid3_hc_tx_set_state(struct sock *sk, | ||
67 | enum ccid3_hc_tx_states state) | ||
68 | { | ||
69 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | ||
70 | enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; | ||
71 | |||
72 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
73 | dccp_role(sk), sk, ccid3_tx_state_name(oldstate), | ||
74 | ccid3_tx_state_name(state)); | ||
75 | WARN_ON(state == oldstate); | ||
76 | hctx->ccid3hctx_state = state; | ||
77 | } | ||
54 | 78 | ||
55 | /* | 79 | /* |
56 | * Compute the initial sending rate X_init in the manner of RFC 3390: | 80 | * Compute the initial sending rate X_init in the manner of RFC 3390: |
57 | * | 81 | * |
58 | * X_init = min(4 * MPS, max(2 * MPS, 4380 bytes)) / RTT | 82 | * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT |
59 | * | 83 | * |
84 | * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis | ||
85 | * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. | ||
60 | * For consistency with other parts of the code, X_init is scaled by 2^6. | 86 | * For consistency with other parts of the code, X_init is scaled by 2^6. |
61 | */ | 87 | */ |
62 | static inline u64 rfc3390_initial_rate(struct sock *sk) | 88 | static inline u64 rfc3390_initial_rate(struct sock *sk) |
63 | { | 89 | { |
64 | const u32 mps = dccp_sk(sk)->dccps_mss_cache, | 90 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
65 | w_init = clamp(4380U, 2 * mps, 4 * mps); | 91 | const __u32 w_init = clamp_t(__u32, 4380U, |
92 | 2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s); | ||
66 | 93 | ||
67 | return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->rtt); | 94 | return scaled_div(w_init << 6, hctx->ccid3hctx_rtt); |
68 | } | 95 | } |
69 | 96 | ||
70 | /** | 97 | /* |
71 | * ccid3_update_send_interval - Calculate new t_ipi = s / X | 98 | * Recalculate t_ipi and delta (should be called whenever X changes) |
72 | * This respects the granularity of X (64 * bytes/second) and enforces the | ||
73 | * scaled minimum of s * 64 / t_mbi = `s' bytes/second as per RFC 3448/4342. | ||
74 | */ | 99 | */ |
75 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) | 100 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) |
76 | { | 101 | { |
77 | if (unlikely(hctx->x <= hctx->s)) | 102 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ |
78 | hctx->x = hctx->s; | 103 | hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, |
79 | hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x); | 104 | hctx->ccid3hctx_x); |
105 | |||
106 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | ||
107 | hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, | ||
108 | TFRC_OPSYS_HALF_TIME_GRAN); | ||
109 | |||
110 | ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", | ||
111 | hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta, | ||
112 | hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); | ||
113 | |||
80 | } | 114 | } |
81 | 115 | ||
82 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) | 116 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) |
83 | { | 117 | { |
84 | u32 delta = ktime_us_delta(now, hctx->t_last_win_count); | 118 | u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); |
85 | 119 | ||
86 | return delta / hctx->rtt; | 120 | return delta / hctx->ccid3hctx_rtt; |
87 | } | 121 | } |
88 | 122 | ||
89 | /** | 123 | /** |
@@ -99,8 +133,8 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) | |||
99 | static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) | 133 | static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) |
100 | { | 134 | { |
101 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 135 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
102 | u64 min_rate = 2 * hctx->x_recv; | 136 | __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; |
103 | const u64 old_x = hctx->x; | 137 | const __u64 old_x = hctx->ccid3hctx_x; |
104 | ktime_t now = stamp ? *stamp : ktime_get_real(); | 138 | ktime_t now = stamp ? *stamp : ktime_get_real(); |
105 | 139 | ||
106 | /* | 140 | /* |
@@ -111,44 +145,50 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) | |||
111 | */ | 145 | */ |
112 | if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { | 146 | if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { |
113 | min_rate = rfc3390_initial_rate(sk); | 147 | min_rate = rfc3390_initial_rate(sk); |
114 | min_rate = max(min_rate, 2 * hctx->x_recv); | 148 | min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); |
115 | } | 149 | } |
116 | 150 | ||
117 | if (hctx->p > 0) { | 151 | if (hctx->ccid3hctx_p > 0) { |
118 | 152 | ||
119 | hctx->x = min(((u64)hctx->x_calc) << 6, min_rate); | 153 | hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, |
154 | min_rate); | ||
155 | hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, | ||
156 | (((__u64)hctx->ccid3hctx_s) << 6) / | ||
157 | TFRC_T_MBI); | ||
120 | 158 | ||
121 | } else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) { | 159 | } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) |
160 | - (s64)hctx->ccid3hctx_rtt >= 0) { | ||
122 | 161 | ||
123 | hctx->x = min(2 * hctx->x, min_rate); | 162 | hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); |
124 | hctx->x = max(hctx->x, | 163 | hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, |
125 | scaled_div(((u64)hctx->s) << 6, hctx->rtt)); | 164 | scaled_div(((__u64)hctx->ccid3hctx_s) << 6, |
126 | hctx->t_ld = now; | 165 | hctx->ccid3hctx_rtt)); |
166 | hctx->ccid3hctx_t_ld = now; | ||
127 | } | 167 | } |
128 | 168 | ||
129 | if (hctx->x != old_x) { | 169 | if (hctx->ccid3hctx_x != old_x) { |
130 | ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " | 170 | ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " |
131 | "X_recv=%u\n", (unsigned)(old_x >> 6), | 171 | "X_recv=%u\n", (unsigned)(old_x >> 6), |
132 | (unsigned)(hctx->x >> 6), hctx->x_calc, | 172 | (unsigned)(hctx->ccid3hctx_x >> 6), |
133 | (unsigned)(hctx->x_recv >> 6)); | 173 | hctx->ccid3hctx_x_calc, |
174 | (unsigned)(hctx->ccid3hctx_x_recv >> 6)); | ||
134 | 175 | ||
135 | ccid3_update_send_interval(hctx); | 176 | ccid3_update_send_interval(hctx); |
136 | } | 177 | } |
137 | } | 178 | } |
138 | 179 | ||
139 | /* | 180 | /* |
140 | * ccid3_hc_tx_measure_packet_size - Measuring the packet size `s' (sec 4.1) | 181 | * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) |
141 | * @new_len: DCCP payload size in bytes (not used by all methods) | 182 | * @len: DCCP packet payload size in bytes |
142 | */ | 183 | */ |
143 | static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len) | 184 | static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) |
144 | { | 185 | { |
145 | #if defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_AVG) | 186 | const u16 old_s = hctx->ccid3hctx_s; |
146 | return tfrc_ewma(ccid3_hc_tx_sk(sk)->s, new_len, 9); | 187 | |
147 | #elif defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MAX) | 188 | hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9); |
148 | return max(ccid3_hc_tx_sk(sk)->s, new_len); | 189 | |
149 | #else /* CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MPS */ | 190 | if (hctx->ccid3hctx_s != old_s) |
150 | return dccp_sk(sk)->dccps_mss_cache; | 191 | ccid3_update_send_interval(hctx); |
151 | #endif | ||
152 | } | 192 | } |
153 | 193 | ||
154 | /* | 194 | /* |
@@ -158,13 +198,13 @@ static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len) | |||
158 | static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, | 198 | static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, |
159 | ktime_t now) | 199 | ktime_t now) |
160 | { | 200 | { |
161 | u32 delta = ktime_us_delta(now, hctx->t_last_win_count), | 201 | u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), |
162 | quarter_rtts = (4 * delta) / hctx->rtt; | 202 | quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; |
163 | 203 | ||
164 | if (quarter_rtts > 0) { | 204 | if (quarter_rtts > 0) { |
165 | hctx->t_last_win_count = now; | 205 | hctx->ccid3hctx_t_last_win_count = now; |
166 | hctx->last_win_count += min(quarter_rtts, 5U); | 206 | hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); |
167 | hctx->last_win_count &= 0xF; /* mod 16 */ | 207 | hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ |
168 | } | 208 | } |
169 | } | 209 | } |
170 | 210 | ||
@@ -181,26 +221,25 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
181 | goto restart_timer; | 221 | goto restart_timer; |
182 | } | 222 | } |
183 | 223 | ||
184 | ccid3_pr_debug("%s(%p) entry with%s feedback\n", dccp_role(sk), sk, | 224 | ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, |
185 | hctx->feedback ? "" : "out"); | 225 | ccid3_tx_state_name(hctx->ccid3hctx_state)); |
186 | 226 | ||
187 | /* Ignore and do not restart after leaving the established state */ | 227 | if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK) |
188 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) | 228 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
229 | else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) | ||
189 | goto out; | 230 | goto out; |
190 | 231 | ||
191 | /* Reset feedback state to "no feedback received" */ | ||
192 | hctx->feedback = false; | ||
193 | |||
194 | /* | 232 | /* |
195 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 | 233 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 |
196 | * RTO is 0 if and only if no feedback has been received yet. | ||
197 | */ | 234 | */ |
198 | if (hctx->t_rto == 0 || hctx->p == 0) { | 235 | if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */ |
236 | hctx->ccid3hctx_p == 0) { | ||
199 | 237 | ||
200 | /* halve send rate directly */ | 238 | /* halve send rate directly */ |
201 | hctx->x /= 2; | 239 | hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, |
240 | (((__u64)hctx->ccid3hctx_s) << 6) / | ||
241 | TFRC_T_MBI); | ||
202 | ccid3_update_send_interval(hctx); | 242 | ccid3_update_send_interval(hctx); |
203 | |||
204 | } else { | 243 | } else { |
205 | /* | 244 | /* |
206 | * Modify the cached value of X_recv | 245 | * Modify the cached value of X_recv |
@@ -212,41 +251,44 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
212 | * | 251 | * |
213 | * Note that X_recv is scaled by 2^6 while X_calc is not | 252 | * Note that X_recv is scaled by 2^6 while X_calc is not |
214 | */ | 253 | */ |
215 | BUG_ON(hctx->p && !hctx->x_calc); | 254 | BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); |
216 | 255 | ||
217 | if (hctx->x_calc > (hctx->x_recv >> 5)) | 256 | if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) |
218 | hctx->x_recv /= 2; | 257 | hctx->ccid3hctx_x_recv = |
258 | max(hctx->ccid3hctx_x_recv / 2, | ||
259 | (((__u64)hctx->ccid3hctx_s) << 6) / | ||
260 | (2 * TFRC_T_MBI)); | ||
219 | else { | 261 | else { |
220 | hctx->x_recv = hctx->x_calc; | 262 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; |
221 | hctx->x_recv <<= 4; | 263 | hctx->ccid3hctx_x_recv <<= 4; |
222 | } | 264 | } |
223 | ccid3_hc_tx_update_x(sk, NULL); | 265 | ccid3_hc_tx_update_x(sk, NULL); |
224 | } | 266 | } |
225 | ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", | 267 | ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", |
226 | (unsigned long long)hctx->x); | 268 | (unsigned long long)hctx->ccid3hctx_x); |
227 | 269 | ||
228 | /* | 270 | /* |
229 | * Set new timeout for the nofeedback timer. | 271 | * Set new timeout for the nofeedback timer. |
230 | * See comments in packet_recv() regarding the value of t_RTO. | 272 | * See comments in packet_recv() regarding the value of t_RTO. |
231 | */ | 273 | */ |
232 | if (unlikely(hctx->t_rto == 0)) /* no feedback received yet */ | 274 | if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */ |
233 | t_nfb = TFRC_INITIAL_TIMEOUT; | 275 | t_nfb = TFRC_INITIAL_TIMEOUT; |
234 | else | 276 | else |
235 | t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); | 277 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); |
236 | 278 | ||
237 | restart_timer: | 279 | restart_timer: |
238 | sk_reset_timer(sk, &hctx->no_feedback_timer, | 280 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, |
239 | jiffies + usecs_to_jiffies(t_nfb)); | 281 | jiffies + usecs_to_jiffies(t_nfb)); |
240 | out: | 282 | out: |
241 | bh_unlock_sock(sk); | 283 | bh_unlock_sock(sk); |
242 | sock_put(sk); | 284 | sock_put(sk); |
243 | } | 285 | } |
244 | 286 | ||
245 | /** | 287 | /* |
246 | * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets | 288 | * returns |
247 | * @skb: next packet candidate to send on @sk | 289 | * > 0: delay (in msecs) that should pass before actually sending |
248 | * This function uses the convention of ccid_packet_dequeue_eval() and | 290 | * = 0: can send immediately |
249 | * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. | 291 | * < 0: error condition; do not send packet |
250 | */ | 292 | */ |
251 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | 293 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
252 | { | 294 | { |
@@ -263,14 +305,18 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
263 | if (unlikely(skb->len == 0)) | 305 | if (unlikely(skb->len == 0)) |
264 | return -EBADMSG; | 306 | return -EBADMSG; |
265 | 307 | ||
266 | if (hctx->s == 0) { | 308 | switch (hctx->ccid3hctx_state) { |
267 | sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies + | 309 | case TFRC_SSTATE_NO_SENT: |
310 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
311 | (jiffies + | ||
268 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); | 312 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); |
269 | hctx->last_win_count = 0; | 313 | hctx->ccid3hctx_last_win_count = 0; |
270 | hctx->t_last_win_count = now; | 314 | hctx->ccid3hctx_t_last_win_count = now; |
271 | 315 | ||
272 | /* Set t_0 for initial packet */ | 316 | /* Set t_0 for initial packet */ |
273 | hctx->t_nom = now; | 317 | hctx->ccid3hctx_t_nom = now; |
318 | |||
319 | hctx->ccid3hctx_s = skb->len; | ||
274 | 320 | ||
275 | /* | 321 | /* |
276 | * Use initial RTT sample when available: recommended by erratum | 322 | * Use initial RTT sample when available: recommended by erratum |
@@ -279,9 +325,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
279 | */ | 325 | */ |
280 | if (dp->dccps_syn_rtt) { | 326 | if (dp->dccps_syn_rtt) { |
281 | ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); | 327 | ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); |
282 | hctx->rtt = dp->dccps_syn_rtt; | 328 | hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; |
283 | hctx->x = rfc3390_initial_rate(sk); | 329 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); |
284 | hctx->t_ld = now; | 330 | hctx->ccid3hctx_t_ld = now; |
285 | } else { | 331 | } else { |
286 | /* | 332 | /* |
287 | * Sender does not have RTT sample: | 333 | * Sender does not have RTT sample: |
@@ -289,20 +335,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
289 | * is needed in several parts (e.g. window counter); | 335 | * is needed in several parts (e.g. window counter); |
290 | * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. | 336 | * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. |
291 | */ | 337 | */ |
292 | hctx->rtt = DCCP_FALLBACK_RTT; | 338 | hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; |
293 | hctx->x = dp->dccps_mss_cache; | 339 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; |
294 | hctx->x <<= 6; | 340 | hctx->ccid3hctx_x <<= 6; |
295 | } | 341 | } |
296 | |||
297 | /* Compute t_ipi = s / X */ | ||
298 | hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len); | ||
299 | ccid3_update_send_interval(hctx); | 342 | ccid3_update_send_interval(hctx); |
300 | 343 | ||
301 | /* Seed value for Oscillation Prevention (sec. 4.5) */ | 344 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
302 | hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt); | 345 | break; |
303 | 346 | case TFRC_SSTATE_NO_FBACK: | |
304 | } else { | 347 | case TFRC_SSTATE_FBACK: |
305 | delay = ktime_us_delta(hctx->t_nom, now); | 348 | delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); |
306 | ccid3_pr_debug("delay=%ld\n", (long)delay); | 349 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
307 | /* | 350 | /* |
308 | * Scheduling of packet transmissions [RFC 3448, 4.6] | 351 | * Scheduling of packet transmissions [RFC 3448, 4.6] |
@@ -312,80 +355,99 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
312 | * else | 355 | * else |
313 | * // send the packet in (t_nom - t_now) milliseconds. | 356 | * // send the packet in (t_nom - t_now) milliseconds. |
314 | */ | 357 | */ |
315 | if (delay >= TFRC_T_DELTA) | 358 | if (delay - (s64)hctx->ccid3hctx_delta >= 1000) |
316 | return (u32)delay / USEC_PER_MSEC; | 359 | return (u32)delay / 1000L; |
317 | 360 | ||
318 | ccid3_hc_tx_update_win_count(hctx, now); | 361 | ccid3_hc_tx_update_win_count(hctx, now); |
362 | break; | ||
363 | case TFRC_SSTATE_TERM: | ||
364 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
365 | return -EINVAL; | ||
319 | } | 366 | } |
320 | 367 | ||
321 | /* prepare to send now (add options etc.) */ | 368 | /* prepare to send now (add options etc.) */ |
322 | dp->dccps_hc_tx_insert_options = 1; | 369 | dp->dccps_hc_tx_insert_options = 1; |
323 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->last_win_count; | 370 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; |
324 | 371 | ||
325 | /* set the nominal send time for the next following packet */ | 372 | /* set the nominal send time for the next following packet */ |
326 | hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi); | 373 | hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, |
327 | return CCID_PACKET_SEND_AT_ONCE; | 374 | hctx->ccid3hctx_t_ipi); |
375 | return 0; | ||
328 | } | 376 | } |
329 | 377 | ||
330 | static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) | 378 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, |
379 | unsigned int len) | ||
331 | { | 380 | { |
332 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 381 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
333 | 382 | ||
334 | /* Changes to s will become effective the next time X is computed */ | 383 | ccid3_hc_tx_update_s(hctx, len); |
335 | hctx->s = ccid3_hc_tx_measure_packet_size(sk, len); | ||
336 | 384 | ||
337 | if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss)) | 385 | if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss)) |
338 | DCCP_CRIT("packet history - out of memory!"); | 386 | DCCP_CRIT("packet history - out of memory!"); |
339 | } | 387 | } |
340 | 388 | ||
341 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 389 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
342 | { | 390 | { |
343 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 391 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
344 | struct tfrc_tx_hist_entry *acked; | 392 | struct ccid3_options_received *opt_recv; |
345 | ktime_t now; | 393 | ktime_t now; |
346 | unsigned long t_nfb; | 394 | unsigned long t_nfb; |
347 | u32 r_sample; | 395 | u32 pinv, r_sample; |
348 | 396 | ||
349 | /* we are only interested in ACKs */ | 397 | /* we are only interested in ACKs */ |
350 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | 398 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || |
351 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | 399 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) |
352 | return; | 400 | return; |
353 | /* | 401 | /* ... and only in the established state */ |
354 | * Locate the acknowledged packet in the TX history. | 402 | if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK && |
355 | * | 403 | hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) |
356 | * Returning "entry not found" here can for instance happen when | 404 | return; |
357 | * - the host has not sent out anything (e.g. a passive server), | 405 | |
358 | * - the Ack is outdated (packet with higher Ack number was received), | 406 | opt_recv = &hctx->ccid3hctx_options_received; |
359 | * - it is a bogus Ack (for a packet not sent on this connection). | 407 | now = ktime_get_real(); |
360 | */ | 408 | |
361 | acked = tfrc_tx_hist_find_entry(hctx->hist, dccp_hdr_ack_seq(skb)); | 409 | /* Estimate RTT from history if ACK number is valid */ |
362 | if (acked == NULL) | 410 | r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist, |
411 | DCCP_SKB_CB(skb)->dccpd_ack_seq, now); | ||
412 | if (r_sample == 0) { | ||
413 | DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, | ||
414 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), | ||
415 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
363 | return; | 416 | return; |
364 | /* For the sake of RTT sampling, ignore/remove all older entries */ | 417 | } |
365 | tfrc_tx_hist_purge(&acked->next); | ||
366 | 418 | ||
367 | /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ | 419 | /* Update receive rate in units of 64 * bytes/second */ |
368 | now = ktime_get_real(); | 420 | hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; |
369 | r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); | 421 | hctx->ccid3hctx_x_recv <<= 6; |
370 | hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9); | ||
371 | 422 | ||
423 | /* Update loss event rate (which is scaled by 1e6) */ | ||
424 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
425 | if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ | ||
426 | hctx->ccid3hctx_p = 0; | ||
427 | else /* can not exceed 100% */ | ||
428 | hctx->ccid3hctx_p = scaled_div(1, pinv); | ||
429 | /* | ||
430 | * Validate new RTT sample and update moving average | ||
431 | */ | ||
432 | r_sample = dccp_sample_rtt(sk, r_sample); | ||
433 | hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); | ||
372 | /* | 434 | /* |
373 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 | 435 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 |
374 | */ | 436 | */ |
375 | if (!hctx->feedback) { | 437 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { |
376 | hctx->feedback = true; | 438 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); |
377 | 439 | ||
378 | if (hctx->t_rto == 0) { | 440 | if (hctx->ccid3hctx_t_rto == 0) { |
379 | /* | 441 | /* |
380 | * Initial feedback packet: Larger Initial Windows (4.2) | 442 | * Initial feedback packet: Larger Initial Windows (4.2) |
381 | */ | 443 | */ |
382 | hctx->x = rfc3390_initial_rate(sk); | 444 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); |
383 | hctx->t_ld = now; | 445 | hctx->ccid3hctx_t_ld = now; |
384 | 446 | ||
385 | ccid3_update_send_interval(hctx); | 447 | ccid3_update_send_interval(hctx); |
386 | 448 | ||
387 | goto done_computing_x; | 449 | goto done_computing_x; |
388 | } else if (hctx->p == 0) { | 450 | } else if (hctx->ccid3hctx_p == 0) { |
389 | /* | 451 | /* |
390 | * First feedback after nofeedback timer expiry (4.3) | 452 | * First feedback after nofeedback timer expiry (4.3) |
391 | */ | 453 | */ |
@@ -394,52 +456,25 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
394 | } | 456 | } |
395 | 457 | ||
396 | /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ | 458 | /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ |
397 | if (hctx->p > 0) | 459 | if (hctx->ccid3hctx_p > 0) |
398 | hctx->x_calc = tfrc_calc_x(hctx->s, hctx->rtt, hctx->p); | 460 | hctx->ccid3hctx_x_calc = |
461 | tfrc_calc_x(hctx->ccid3hctx_s, | ||
462 | hctx->ccid3hctx_rtt, | ||
463 | hctx->ccid3hctx_p); | ||
399 | ccid3_hc_tx_update_x(sk, &now); | 464 | ccid3_hc_tx_update_x(sk, &now); |
400 | 465 | ||
401 | done_computing_x: | 466 | done_computing_x: |
402 | ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " | 467 | ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " |
403 | "p=%u, X_calc=%u, X_recv=%u, X=%u\n", | 468 | "p=%u, X_calc=%u, X_recv=%u, X=%u\n", |
404 | dccp_role(sk), sk, hctx->rtt, r_sample, | 469 | dccp_role(sk), |
405 | hctx->s, hctx->p, hctx->x_calc, | 470 | sk, hctx->ccid3hctx_rtt, r_sample, |
406 | (unsigned)(hctx->x_recv >> 6), | 471 | hctx->ccid3hctx_s, hctx->ccid3hctx_p, |
407 | (unsigned)(hctx->x >> 6)); | 472 | hctx->ccid3hctx_x_calc, |
408 | /* | 473 | (unsigned)(hctx->ccid3hctx_x_recv >> 6), |
409 | * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to | 474 | (unsigned)(hctx->ccid3hctx_x >> 6)); |
410 | * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This | ||
411 | * can be useful if few connections share a link, avoiding that buffer | ||
412 | * fill levels (RTT) oscillate as a result of frequent adjustments to X. | ||
413 | * A useful presentation with background information is in | ||
414 | * Joerg Widmer, "Equation-Based Congestion Control", | ||
415 | * MSc Thesis, University of Mannheim, Germany, 2000 | ||
416 | * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation"). | ||
417 | */ | ||
418 | if (do_osc_prev) { | ||
419 | r_sample = tfrc_scaled_sqrt(r_sample); | ||
420 | /* | ||
421 | * The modulation can work in both ways: increase/decrease t_ipi | ||
422 | * according to long-term increases/decreases of the RTT. The | ||
423 | * former is a useful measure, since it works against queue | ||
424 | * build-up. The latter temporarily increases the sending rate, | ||
425 | * so that buffers fill up more quickly. This in turn causes | ||
426 | * the RTT to increase, so that either later reduction becomes | ||
427 | * necessary or the RTT stays at a very high level. Decreasing | ||
428 | * t_ipi is therefore not supported. | ||
429 | * Furthermore, during the initial slow-start phase the RTT | ||
430 | * naturally increases, where using the algorithm would cause | ||
431 | * delays. Hence it is disabled during the initial slow-start. | ||
432 | */ | ||
433 | if (r_sample > hctx->r_sqmean && hctx->p > 0) | ||
434 | hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample, | ||
435 | hctx->r_sqmean); | ||
436 | hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI); | ||
437 | /* update R_sqmean _after_ computing the modulation factor */ | ||
438 | hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9); | ||
439 | } | ||
440 | 475 | ||
441 | /* unschedule no feedback timer */ | 476 | /* unschedule no feedback timer */ |
442 | sk_stop_timer(sk, &hctx->no_feedback_timer); | 477 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); |
443 | 478 | ||
444 | /* | 479 | /* |
445 | * As we have calculated new ipi, delta, t_nom it is possible | 480 | * As we have calculated new ipi, delta, t_nom it is possible |
@@ -453,66 +488,95 @@ done_computing_x: | |||
453 | * This can help avoid triggering the nofeedback timer too | 488 | * This can help avoid triggering the nofeedback timer too |
454 | * often ('spinning') on LANs with small RTTs. | 489 | * often ('spinning') on LANs with small RTTs. |
455 | */ | 490 | */ |
456 | hctx->t_rto = max_t(u32, 4 * hctx->rtt, (CONFIG_IP_DCCP_CCID3_RTO * | 491 | hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, |
457 | (USEC_PER_SEC / 1000))); | 492 | (CONFIG_IP_DCCP_CCID3_RTO * |
493 | (USEC_PER_SEC / 1000))); | ||
458 | /* | 494 | /* |
459 | * Schedule no feedback timer to expire in | 495 | * Schedule no feedback timer to expire in |
460 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | 496 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) |
461 | */ | 497 | */ |
462 | t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); | 498 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); |
463 | 499 | ||
464 | ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " | 500 | ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " |
465 | "expire in %lu jiffies (%luus)\n", | 501 | "expire in %lu jiffies (%luus)\n", |
466 | dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); | 502 | dccp_role(sk), |
503 | sk, usecs_to_jiffies(t_nfb), t_nfb); | ||
467 | 504 | ||
468 | sk_reset_timer(sk, &hctx->no_feedback_timer, | 505 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, |
469 | jiffies + usecs_to_jiffies(t_nfb)); | 506 | jiffies + usecs_to_jiffies(t_nfb)); |
470 | } | 507 | } |
471 | 508 | ||
472 | static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, | 509 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, |
473 | u8 option, u8 *optval, u8 optlen) | 510 | unsigned char len, u16 idx, |
511 | unsigned char *value) | ||
474 | { | 512 | { |
513 | int rc = 0; | ||
514 | const struct dccp_sock *dp = dccp_sk(sk); | ||
475 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 515 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
516 | struct ccid3_options_received *opt_recv; | ||
476 | __be32 opt_val; | 517 | __be32 opt_val; |
477 | 518 | ||
478 | switch (option) { | 519 | opt_recv = &hctx->ccid3hctx_options_received; |
479 | case TFRC_OPT_RECEIVE_RATE: | ||
480 | case TFRC_OPT_LOSS_EVENT_RATE: | ||
481 | /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ | ||
482 | if (packet_type == DCCP_PKT_DATA) | ||
483 | break; | ||
484 | if (unlikely(optlen != 4)) { | ||
485 | DCCP_WARN("%s(%p), invalid len %d for %u\n", | ||
486 | dccp_role(sk), sk, optlen, option); | ||
487 | return -EINVAL; | ||
488 | } | ||
489 | opt_val = ntohl(get_unaligned((__be32 *)optval)); | ||
490 | 520 | ||
491 | if (option == TFRC_OPT_RECEIVE_RATE) { | 521 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { |
492 | /* Receive Rate is kept in units of 64 bytes/second */ | 522 | opt_recv->ccid3or_seqno = dp->dccps_gsr; |
493 | hctx->x_recv = opt_val; | 523 | opt_recv->ccid3or_loss_event_rate = ~0; |
494 | hctx->x_recv <<= 6; | 524 | opt_recv->ccid3or_loss_intervals_idx = 0; |
525 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
526 | opt_recv->ccid3or_receive_rate = 0; | ||
527 | } | ||
495 | 528 | ||
496 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", | 529 | switch (option) { |
497 | dccp_role(sk), sk, opt_val); | 530 | case TFRC_OPT_LOSS_EVENT_RATE: |
531 | if (unlikely(len != 4)) { | ||
532 | DCCP_WARN("%s(%p), invalid len %d " | ||
533 | "for TFRC_OPT_LOSS_EVENT_RATE\n", | ||
534 | dccp_role(sk), sk, len); | ||
535 | rc = -EINVAL; | ||
498 | } else { | 536 | } else { |
499 | /* Update the fixpoint Loss Event Rate fraction */ | 537 | opt_val = get_unaligned((__be32 *)value); |
500 | hctx->p = tfrc_invert_loss_event_rate(opt_val); | 538 | opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); |
501 | |||
502 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", | 539 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", |
503 | dccp_role(sk), sk, opt_val); | 540 | dccp_role(sk), sk, |
541 | opt_recv->ccid3or_loss_event_rate); | ||
504 | } | 542 | } |
543 | break; | ||
544 | case TFRC_OPT_LOSS_INTERVALS: | ||
545 | opt_recv->ccid3or_loss_intervals_idx = idx; | ||
546 | opt_recv->ccid3or_loss_intervals_len = len; | ||
547 | ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", | ||
548 | dccp_role(sk), sk, | ||
549 | opt_recv->ccid3or_loss_intervals_idx, | ||
550 | opt_recv->ccid3or_loss_intervals_len); | ||
551 | break; | ||
552 | case TFRC_OPT_RECEIVE_RATE: | ||
553 | if (unlikely(len != 4)) { | ||
554 | DCCP_WARN("%s(%p), invalid len %d " | ||
555 | "for TFRC_OPT_RECEIVE_RATE\n", | ||
556 | dccp_role(sk), sk, len); | ||
557 | rc = -EINVAL; | ||
558 | } else { | ||
559 | opt_val = get_unaligned((__be32 *)value); | ||
560 | opt_recv->ccid3or_receive_rate = ntohl(opt_val); | ||
561 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", | ||
562 | dccp_role(sk), sk, | ||
563 | opt_recv->ccid3or_receive_rate); | ||
564 | } | ||
565 | break; | ||
505 | } | 566 | } |
506 | return 0; | 567 | |
568 | return rc; | ||
507 | } | 569 | } |
508 | 570 | ||
509 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) | 571 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) |
510 | { | 572 | { |
511 | struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); | 573 | struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); |
512 | 574 | ||
513 | hctx->hist = NULL; | 575 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; |
514 | setup_timer(&hctx->no_feedback_timer, | 576 | hctx->ccid3hctx_hist = NULL; |
515 | ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); | 577 | setup_timer(&hctx->ccid3hctx_no_feedback_timer, |
578 | ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); | ||
579 | |||
516 | return 0; | 580 | return 0; |
517 | } | 581 | } |
518 | 582 | ||
@@ -520,36 +584,42 @@ static void ccid3_hc_tx_exit(struct sock *sk) | |||
520 | { | 584 | { |
521 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 585 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
522 | 586 | ||
523 | sk_stop_timer(sk, &hctx->no_feedback_timer); | 587 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); |
524 | tfrc_tx_hist_purge(&hctx->hist); | 588 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); |
589 | |||
590 | tfrc_tx_hist_purge(&hctx->ccid3hctx_hist); | ||
525 | } | 591 | } |
526 | 592 | ||
527 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | 593 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) |
528 | { | 594 | { |
529 | info->tcpi_rto = ccid3_hc_tx_sk(sk)->t_rto; | 595 | struct ccid3_hc_tx_sock *hctx; |
530 | info->tcpi_rtt = ccid3_hc_tx_sk(sk)->rtt; | 596 | |
597 | /* Listen socks doesn't have a private CCID block */ | ||
598 | if (sk->sk_state == DCCP_LISTEN) | ||
599 | return; | ||
600 | |||
601 | hctx = ccid3_hc_tx_sk(sk); | ||
602 | info->tcpi_rto = hctx->ccid3hctx_t_rto; | ||
603 | info->tcpi_rtt = hctx->ccid3hctx_rtt; | ||
531 | } | 604 | } |
532 | 605 | ||
533 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | 606 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, |
534 | u32 __user *optval, int __user *optlen) | 607 | u32 __user *optval, int __user *optlen) |
535 | { | 608 | { |
536 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 609 | const struct ccid3_hc_tx_sock *hctx; |
537 | struct tfrc_tx_info tfrc; | ||
538 | const void *val; | 610 | const void *val; |
539 | 611 | ||
612 | /* Listen socks doesn't have a private CCID block */ | ||
613 | if (sk->sk_state == DCCP_LISTEN) | ||
614 | return -EINVAL; | ||
615 | |||
616 | hctx = ccid3_hc_tx_sk(sk); | ||
540 | switch (optname) { | 617 | switch (optname) { |
541 | case DCCP_SOCKOPT_CCID_TX_INFO: | 618 | case DCCP_SOCKOPT_CCID_TX_INFO: |
542 | if (len < sizeof(tfrc)) | 619 | if (len < sizeof(hctx->ccid3hctx_tfrc)) |
543 | return -EINVAL; | 620 | return -EINVAL; |
544 | tfrc.tfrctx_x = hctx->x; | 621 | len = sizeof(hctx->ccid3hctx_tfrc); |
545 | tfrc.tfrctx_x_recv = hctx->x_recv; | 622 | val = &hctx->ccid3hctx_tfrc; |
546 | tfrc.tfrctx_x_calc = hctx->x_calc; | ||
547 | tfrc.tfrctx_rtt = hctx->rtt; | ||
548 | tfrc.tfrctx_p = hctx->p; | ||
549 | tfrc.tfrctx_rto = hctx->t_rto; | ||
550 | tfrc.tfrctx_ipi = hctx->t_ipi; | ||
551 | len = sizeof(tfrc); | ||
552 | val = &tfrc; | ||
553 | break; | 623 | break; |
554 | default: | 624 | default: |
555 | return -ENOPROTOOPT; | 625 | return -ENOPROTOOPT; |
@@ -564,82 +634,112 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | |||
564 | /* | 634 | /* |
565 | * Receiver Half-Connection Routines | 635 | * Receiver Half-Connection Routines |
566 | */ | 636 | */ |
637 | |||
638 | /* CCID3 feedback types */ | ||
639 | enum ccid3_fback_type { | ||
640 | CCID3_FBACK_NONE = 0, | ||
641 | CCID3_FBACK_INITIAL, | ||
642 | CCID3_FBACK_PERIODIC, | ||
643 | CCID3_FBACK_PARAM_CHANGE | ||
644 | }; | ||
645 | |||
646 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG | ||
647 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | ||
648 | { | ||
649 | static char *ccid3_rx_state_names[] = { | ||
650 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | ||
651 | [TFRC_RSTATE_DATA] = "DATA", | ||
652 | [TFRC_RSTATE_TERM] = "TERM", | ||
653 | }; | ||
654 | |||
655 | return ccid3_rx_state_names[state]; | ||
656 | } | ||
657 | #endif | ||
658 | |||
659 | static void ccid3_hc_rx_set_state(struct sock *sk, | ||
660 | enum ccid3_hc_rx_states state) | ||
661 | { | ||
662 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | ||
663 | enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; | ||
664 | |||
665 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
666 | dccp_role(sk), sk, ccid3_rx_state_name(oldstate), | ||
667 | ccid3_rx_state_name(state)); | ||
668 | WARN_ON(state == oldstate); | ||
669 | hcrx->ccid3hcrx_state = state; | ||
670 | } | ||
671 | |||
567 | static void ccid3_hc_rx_send_feedback(struct sock *sk, | 672 | static void ccid3_hc_rx_send_feedback(struct sock *sk, |
568 | const struct sk_buff *skb, | 673 | const struct sk_buff *skb, |
569 | enum ccid3_fback_type fbtype) | 674 | enum ccid3_fback_type fbtype) |
570 | { | 675 | { |
571 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 676 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
677 | struct dccp_sock *dp = dccp_sk(sk); | ||
678 | ktime_t now; | ||
679 | s64 delta = 0; | ||
680 | |||
681 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM)) | ||
682 | return; | ||
683 | |||
684 | now = ktime_get_real(); | ||
572 | 685 | ||
573 | switch (fbtype) { | 686 | switch (fbtype) { |
574 | case CCID3_FBACK_INITIAL: | 687 | case CCID3_FBACK_INITIAL: |
575 | hcrx->x_recv = 0; | 688 | hcrx->ccid3hcrx_x_recv = 0; |
576 | hcrx->p_inverse = ~0U; /* see RFC 4342, 8.5 */ | 689 | hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ |
577 | break; | 690 | break; |
578 | case CCID3_FBACK_PARAM_CHANGE: | 691 | case CCID3_FBACK_PARAM_CHANGE: |
579 | if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) { | ||
580 | /* | ||
581 | * rfc3448bis-06, 6.3.1: First packet(s) lost or marked | ||
582 | * FIXME: in rfc3448bis the receiver returns X_recv=0 | ||
583 | * here as it normally would in the first feedback packet. | ||
584 | * However this is not possible yet, since the code still | ||
585 | * uses RFC 3448, i.e. | ||
586 | * If (p > 0) | ||
587 | * Calculate X_calc using the TCP throughput equation. | ||
588 | * X = max(min(X_calc, 2*X_recv), s/t_mbi); | ||
589 | * would bring X down to s/t_mbi. That is why we return | ||
590 | * X_recv according to rfc3448bis-06 for the moment. | ||
591 | */ | ||
592 | u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), | ||
593 | rtt = tfrc_rx_hist_rtt(&hcrx->hist); | ||
594 | |||
595 | hcrx->x_recv = scaled_div32(s, 2 * rtt); | ||
596 | break; | ||
597 | } | ||
598 | /* | 692 | /* |
599 | * When parameters change (new loss or p > p_prev), we do not | 693 | * When parameters change (new loss or p > p_prev), we do not |
600 | * have a reliable estimate for R_m of [RFC 3448, 6.2] and so | 694 | * have a reliable estimate for R_m of [RFC 3448, 6.2] and so |
601 | * always check whether at least RTT time units were covered. | 695 | * need to reuse the previous value of X_recv. However, when |
696 | * X_recv was 0 (due to early loss), this would kill X down to | ||
697 | * s/t_mbi (i.e. one packet in 64 seconds). | ||
698 | * To avoid such drastic reduction, we approximate X_recv as | ||
699 | * the number of bytes since last feedback. | ||
700 | * This is a safe fallback, since X is bounded above by X_calc. | ||
602 | */ | 701 | */ |
603 | hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); | 702 | if (hcrx->ccid3hcrx_x_recv > 0) |
604 | break; | 703 | break; |
704 | /* fall through */ | ||
605 | case CCID3_FBACK_PERIODIC: | 705 | case CCID3_FBACK_PERIODIC: |
606 | /* | 706 | delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback); |
607 | * Step (2) of rfc3448bis-06, 6.2: | 707 | if (delta <= 0) |
608 | * - if no data packets have been received, just restart timer | 708 | DCCP_BUG("delta (%ld) <= 0", (long)delta); |
609 | * - if data packets have been received, re-compute X_recv | 709 | else |
610 | */ | 710 | hcrx->ccid3hcrx_x_recv = |
611 | if (hcrx->hist.bytes_recvd == 0) | 711 | scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); |
612 | goto prepare_for_next_time; | ||
613 | hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); | ||
614 | break; | 712 | break; |
615 | default: | 713 | default: |
616 | return; | 714 | return; |
617 | } | 715 | } |
618 | 716 | ||
619 | ccid3_pr_debug("X_recv=%u, 1/p=%u\n", hcrx->x_recv, hcrx->p_inverse); | 717 | ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, |
718 | hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv); | ||
620 | 719 | ||
621 | dccp_sk(sk)->dccps_hc_rx_insert_options = 1; | 720 | hcrx->ccid3hcrx_tstamp_last_feedback = now; |
622 | dccp_send_ack(sk); | 721 | hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval; |
722 | hcrx->ccid3hcrx_bytes_recv = 0; | ||
623 | 723 | ||
624 | prepare_for_next_time: | 724 | dp->dccps_hc_rx_insert_options = 1; |
625 | tfrc_rx_hist_restart_byte_counter(&hcrx->hist); | 725 | dccp_send_ack(sk); |
626 | hcrx->last_counter = dccp_hdr(skb)->dccph_ccval; | ||
627 | hcrx->feedback = fbtype; | ||
628 | } | 726 | } |
629 | 727 | ||
630 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | 728 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) |
631 | { | 729 | { |
632 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 730 | const struct ccid3_hc_rx_sock *hcrx; |
633 | __be32 x_recv, pinv; | 731 | __be32 x_recv, pinv; |
634 | 732 | ||
635 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | 733 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) |
636 | return 0; | 734 | return 0; |
637 | 735 | ||
736 | hcrx = ccid3_hc_rx_sk(sk); | ||
737 | |||
638 | if (dccp_packet_without_ack(skb)) | 738 | if (dccp_packet_without_ack(skb)) |
639 | return 0; | 739 | return 0; |
640 | 740 | ||
641 | x_recv = htonl(hcrx->x_recv); | 741 | x_recv = htonl(hcrx->ccid3hcrx_x_recv); |
642 | pinv = htonl(hcrx->p_inverse); | 742 | pinv = htonl(hcrx->ccid3hcrx_pinv); |
643 | 743 | ||
644 | if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, | 744 | if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, |
645 | &pinv, sizeof(pinv)) || | 745 | &pinv, sizeof(pinv)) || |
@@ -662,95 +762,171 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
662 | static u32 ccid3_first_li(struct sock *sk) | 762 | static u32 ccid3_first_li(struct sock *sk) |
663 | { | 763 | { |
664 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 764 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
665 | u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), | 765 | u32 x_recv, p, delta; |
666 | rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p; | ||
667 | u64 fval; | 766 | u64 fval; |
668 | 767 | ||
669 | /* | 768 | if (hcrx->ccid3hcrx_rtt == 0) { |
670 | * rfc3448bis-06, 6.3.1: First data packet(s) are marked or lost. Set p | 769 | DCCP_WARN("No RTT estimate available, using fallback RTT\n"); |
671 | * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p | 770 | hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT; |
672 | * is about 20.64%. This yields an interval length of 4.84 (rounded up). | 771 | } |
673 | */ | ||
674 | if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) | ||
675 | return 5; | ||
676 | 772 | ||
677 | x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); | 773 | delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback)); |
678 | if (x_recv == 0) | 774 | x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); |
679 | goto failed; | 775 | if (x_recv == 0) { /* would also trigger divide-by-zero */ |
776 | DCCP_WARN("X_recv==0\n"); | ||
777 | if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { | ||
778 | DCCP_BUG("stored value of X_recv is zero"); | ||
779 | return ~0U; | ||
780 | } | ||
781 | } | ||
680 | 782 | ||
681 | fval = scaled_div32(scaled_div(s, rtt), x_recv); | 783 | fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt); |
784 | fval = scaled_div32(fval, x_recv); | ||
682 | p = tfrc_calc_x_reverse_lookup(fval); | 785 | p = tfrc_calc_x_reverse_lookup(fval); |
683 | 786 | ||
684 | ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " | 787 | ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " |
685 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); | 788 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); |
686 | 789 | ||
687 | if (p > 0) | 790 | return p == 0 ? ~0U : scaled_div(1, p); |
688 | return scaled_div(1, p); | ||
689 | failed: | ||
690 | return UINT_MAX; | ||
691 | } | 791 | } |
692 | 792 | ||
693 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | 793 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
694 | { | 794 | { |
695 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 795 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
796 | enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; | ||
696 | const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; | 797 | const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; |
697 | const bool is_data_packet = dccp_data_packet(skb); | 798 | const bool is_data_packet = dccp_data_packet(skb); |
698 | 799 | ||
800 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { | ||
801 | if (is_data_packet) { | ||
802 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
803 | do_feedback = CCID3_FBACK_INITIAL; | ||
804 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
805 | hcrx->ccid3hcrx_s = payload; | ||
806 | /* | ||
807 | * Not necessary to update ccid3hcrx_bytes_recv here, | ||
808 | * since X_recv = 0 for the first feedback packet (cf. | ||
809 | * RFC 3448, 6.3) -- gerrit | ||
810 | */ | ||
811 | } | ||
812 | goto update_records; | ||
813 | } | ||
814 | |||
815 | if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb)) | ||
816 | return; /* done receiving */ | ||
817 | |||
818 | if (is_data_packet) { | ||
819 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
820 | /* | ||
821 | * Update moving-average of s and the sum of received payload bytes | ||
822 | */ | ||
823 | hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9); | ||
824 | hcrx->ccid3hcrx_bytes_recv += payload; | ||
825 | } | ||
826 | |||
699 | /* | 827 | /* |
700 | * Perform loss detection and handle pending losses | 828 | * Perform loss detection and handle pending losses |
701 | */ | 829 | */ |
702 | if (tfrc_rx_congestion_event(&hcrx->hist, &hcrx->li_hist, | 830 | if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, |
703 | skb, ndp, ccid3_first_li, sk)) | 831 | skb, ndp, ccid3_first_li, sk)) { |
704 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PARAM_CHANGE); | 832 | do_feedback = CCID3_FBACK_PARAM_CHANGE; |
833 | goto done_receiving; | ||
834 | } | ||
835 | |||
836 | if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist)) | ||
837 | return; /* done receiving */ | ||
838 | |||
705 | /* | 839 | /* |
706 | * Feedback for first non-empty data packet (RFC 3448, 6.3) | 840 | * Handle data packets: RTT sampling and monitoring p |
707 | */ | 841 | */ |
708 | else if (unlikely(hcrx->feedback == CCID3_FBACK_NONE && is_data_packet)) | 842 | if (unlikely(!is_data_packet)) |
709 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_INITIAL); | 843 | goto update_records; |
844 | |||
845 | if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) { | ||
846 | const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb); | ||
847 | /* | ||
848 | * Empty loss history: no loss so far, hence p stays 0. | ||
849 | * Sample RTT values, since an RTT estimate is required for the | ||
850 | * computation of p when the first loss occurs; RFC 3448, 6.3.1. | ||
851 | */ | ||
852 | if (sample != 0) | ||
853 | hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9); | ||
854 | |||
855 | } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) { | ||
856 | /* | ||
857 | * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean | ||
858 | * has decreased (resp. p has increased), send feedback now. | ||
859 | */ | ||
860 | do_feedback = CCID3_FBACK_PARAM_CHANGE; | ||
861 | } | ||
862 | |||
710 | /* | 863 | /* |
711 | * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 | 864 | * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 |
712 | */ | 865 | */ |
713 | else if (!tfrc_rx_hist_loss_pending(&hcrx->hist) && is_data_packet && | 866 | if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3) |
714 | SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3) | 867 | do_feedback = CCID3_FBACK_PERIODIC; |
715 | ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PERIODIC); | 868 | |
869 | update_records: | ||
870 | tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp); | ||
871 | |||
872 | done_receiving: | ||
873 | if (do_feedback) | ||
874 | ccid3_hc_rx_send_feedback(sk, skb, do_feedback); | ||
716 | } | 875 | } |
717 | 876 | ||
718 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) | 877 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) |
719 | { | 878 | { |
720 | struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); | 879 | struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); |
721 | 880 | ||
722 | tfrc_lh_init(&hcrx->li_hist); | 881 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; |
723 | return tfrc_rx_hist_init(&hcrx->hist, sk); | 882 | tfrc_lh_init(&hcrx->ccid3hcrx_li_hist); |
883 | return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist); | ||
724 | } | 884 | } |
725 | 885 | ||
726 | static void ccid3_hc_rx_exit(struct sock *sk) | 886 | static void ccid3_hc_rx_exit(struct sock *sk) |
727 | { | 887 | { |
728 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 888 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
729 | 889 | ||
730 | tfrc_rx_hist_purge(&hcrx->hist); | 890 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); |
731 | tfrc_lh_cleanup(&hcrx->li_hist); | 891 | |
892 | tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist); | ||
893 | tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist); | ||
732 | } | 894 | } |
733 | 895 | ||
734 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | 896 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) |
735 | { | 897 | { |
898 | const struct ccid3_hc_rx_sock *hcrx; | ||
899 | |||
900 | /* Listen socks doesn't have a private CCID block */ | ||
901 | if (sk->sk_state == DCCP_LISTEN) | ||
902 | return; | ||
903 | |||
904 | hcrx = ccid3_hc_rx_sk(sk); | ||
905 | info->tcpi_ca_state = hcrx->ccid3hcrx_state; | ||
736 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | 906 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; |
737 | info->tcpi_rcv_rtt = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist); | 907 | info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; |
738 | } | 908 | } |
739 | 909 | ||
740 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | 910 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, |
741 | u32 __user *optval, int __user *optlen) | 911 | u32 __user *optval, int __user *optlen) |
742 | { | 912 | { |
743 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 913 | const struct ccid3_hc_rx_sock *hcrx; |
744 | struct tfrc_rx_info rx_info; | 914 | struct tfrc_rx_info rx_info; |
745 | const void *val; | 915 | const void *val; |
746 | 916 | ||
917 | /* Listen socks doesn't have a private CCID block */ | ||
918 | if (sk->sk_state == DCCP_LISTEN) | ||
919 | return -EINVAL; | ||
920 | |||
921 | hcrx = ccid3_hc_rx_sk(sk); | ||
747 | switch (optname) { | 922 | switch (optname) { |
748 | case DCCP_SOCKOPT_CCID_RX_INFO: | 923 | case DCCP_SOCKOPT_CCID_RX_INFO: |
749 | if (len < sizeof(rx_info)) | 924 | if (len < sizeof(rx_info)) |
750 | return -EINVAL; | 925 | return -EINVAL; |
751 | rx_info.tfrcrx_x_recv = hcrx->x_recv; | 926 | rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv; |
752 | rx_info.tfrcrx_rtt = tfrc_rx_hist_rtt(&hcrx->hist); | 927 | rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt; |
753 | rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hcrx->p_inverse); | 928 | rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U : |
929 | scaled_div(1, hcrx->ccid3hcrx_pinv); | ||
754 | len = sizeof(rx_info); | 930 | len = sizeof(rx_info); |
755 | val = &rx_info; | 931 | val = &rx_info; |
756 | break; | 932 | break; |
@@ -786,9 +962,6 @@ static struct ccid_operations ccid3 = { | |||
786 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, | 962 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, |
787 | }; | 963 | }; |
788 | 964 | ||
789 | module_param(do_osc_prev, bool, 0644); | ||
790 | MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)"); | ||
791 | |||
792 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG | 965 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
793 | module_param(ccid3_debug, bool, 0644); | 966 | module_param(ccid3_debug, bool, 0644); |
794 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | 967 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); |
@@ -796,19 +969,6 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | |||
796 | 969 | ||
797 | static __init int ccid3_module_init(void) | 970 | static __init int ccid3_module_init(void) |
798 | { | 971 | { |
799 | struct timespec tp; | ||
800 | |||
801 | /* | ||
802 | * Without a fine-grained clock resolution, RTTs/X_recv are not sampled | ||
803 | * correctly and feedback is sent either too early or too late. | ||
804 | */ | ||
805 | hrtimer_get_res(CLOCK_MONOTONIC, &tp); | ||
806 | if (tp.tv_sec || tp.tv_nsec > DCCP_TIME_RESOLUTION * NSEC_PER_USEC) { | ||
807 | printk(KERN_ERR "%s: Timer too coarse (%ld usec), need %u-usec" | ||
808 | " resolution - check your clocksource.\n", __func__, | ||
809 | tp.tv_nsec/NSEC_PER_USEC, DCCP_TIME_RESOLUTION); | ||
810 | return -ESOCKTNOSUPPORT; | ||
811 | } | ||
812 | return ccid_register(&ccid3); | 972 | return ccid_register(&ccid3); |
813 | } | 973 | } |
814 | module_init(ccid3_module_init); | 974 | module_init(ccid3_module_init); |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index af6e1bf937d9..49ca32bd7e79 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -47,22 +47,11 @@ | |||
47 | /* Two seconds as per RFC 3448 4.2 */ | 47 | /* Two seconds as per RFC 3448 4.2 */ |
48 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | 48 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) |
49 | 49 | ||
50 | /* Maximum backoff interval t_mbi (RFC 3448, 4.3) */ | 50 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ |
51 | #define TFRC_T_MBI (64 * USEC_PER_SEC) | 51 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) |
52 | 52 | ||
53 | /* | 53 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ |
54 | * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are | 54 | #define TFRC_T_MBI 64 |
55 | * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. | ||
56 | * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse | ||
57 | * resolution of HZ < 500 means that the error is below one timer tick (t_gran) | ||
58 | * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). | ||
59 | */ | ||
60 | #if (HZ >= 500) | ||
61 | # define TFRC_T_DELTA USEC_PER_MSEC | ||
62 | #else | ||
63 | # define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) | ||
64 | #warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC. | ||
65 | #endif | ||
66 | 55 | ||
67 | enum ccid3_options { | 56 | enum ccid3_options { |
68 | TFRC_OPT_LOSS_EVENT_RATE = 192, | 57 | TFRC_OPT_LOSS_EVENT_RATE = 192, |
@@ -70,43 +59,62 @@ enum ccid3_options { | |||
70 | TFRC_OPT_RECEIVE_RATE = 194, | 59 | TFRC_OPT_RECEIVE_RATE = 194, |
71 | }; | 60 | }; |
72 | 61 | ||
62 | struct ccid3_options_received { | ||
63 | u64 ccid3or_seqno:48, | ||
64 | ccid3or_loss_intervals_idx:16; | ||
65 | u16 ccid3or_loss_intervals_len; | ||
66 | u32 ccid3or_loss_event_rate; | ||
67 | u32 ccid3or_receive_rate; | ||
68 | }; | ||
69 | |||
70 | /* TFRC sender states */ | ||
71 | enum ccid3_hc_tx_states { | ||
72 | TFRC_SSTATE_NO_SENT = 1, | ||
73 | TFRC_SSTATE_NO_FBACK, | ||
74 | TFRC_SSTATE_FBACK, | ||
75 | TFRC_SSTATE_TERM, | ||
76 | }; | ||
77 | |||
73 | /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket | 78 | /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket |
74 | * | 79 | * |
75 | * @x - Current sending rate in 64 * bytes per second | 80 | * @ccid3hctx_x - Current sending rate in 64 * bytes per second |
76 | * @x_recv - Receive rate in 64 * bytes per second | 81 | * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second |
77 | * @x_calc - Calculated rate in bytes per second | 82 | * @ccid3hctx_x_calc - Calculated rate in bytes per second |
78 | * @rtt - Estimate of current round trip time in usecs | 83 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs |
79 | * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5) | 84 | * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 |
80 | * @p - Current loss event rate (0-1) scaled by 1000000 | 85 | * @ccid3hctx_s - Packet size in bytes |
81 | * @s - Packet size in bytes | 86 | * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs |
82 | * @t_rto - Nofeedback Timer setting in usecs | 87 | * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs |
83 | * @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs | 88 | * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states |
84 | * @feedback - Whether feedback has been received or not | 89 | * @ccid3hctx_last_win_count - Last window counter sent |
85 | * @last_win_count - Last window counter sent | 90 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet |
86 | * @t_last_win_count - Timestamp of earliest packet with | 91 | * with last_win_count value sent |
87 | * last_win_count value sent | 92 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer |
88 | * @no_feedback_timer - Handle to no feedback timer | 93 | * @ccid3hctx_t_ld - Time last doubled during slow start |
89 | * @t_ld - Time last doubled during slow start | 94 | * @ccid3hctx_t_nom - Nominal send time of next packet |
90 | * @t_nom - Nominal send time of next packet | 95 | * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs |
91 | * @hist - Packet history | 96 | * @ccid3hctx_hist - Packet history |
97 | * @ccid3hctx_options_received - Parsed set of retrieved options | ||
92 | */ | 98 | */ |
93 | struct ccid3_hc_tx_sock { | 99 | struct ccid3_hc_tx_sock { |
94 | u64 x; | 100 | struct tfrc_tx_info ccid3hctx_tfrc; |
95 | u64 x_recv; | 101 | #define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x |
96 | u32 x_calc; | 102 | #define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv |
97 | u32 rtt; | 103 | #define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc |
98 | u16 r_sqmean; | 104 | #define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt |
99 | u32 p; | 105 | #define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p |
100 | u32 t_rto; | 106 | #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto |
101 | u32 t_ipi; | 107 | #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi |
102 | u16 s; | 108 | u16 ccid3hctx_s; |
103 | bool feedback:1; | 109 | enum ccid3_hc_tx_states ccid3hctx_state:8; |
104 | u8 last_win_count; | 110 | u8 ccid3hctx_last_win_count; |
105 | ktime_t t_last_win_count; | 111 | ktime_t ccid3hctx_t_last_win_count; |
106 | struct timer_list no_feedback_timer; | 112 | struct timer_list ccid3hctx_no_feedback_timer; |
107 | ktime_t t_ld; | 113 | ktime_t ccid3hctx_t_ld; |
108 | ktime_t t_nom; | 114 | ktime_t ccid3hctx_t_nom; |
109 | struct tfrc_tx_hist_entry *hist; | 115 | u32 ccid3hctx_delta; |
116 | struct tfrc_tx_hist_entry *ccid3hctx_hist; | ||
117 | struct ccid3_options_received ccid3hctx_options_received; | ||
110 | }; | 118 | }; |
111 | 119 | ||
112 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | 120 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) |
@@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | |||
116 | return hctx; | 124 | return hctx; |
117 | } | 125 | } |
118 | 126 | ||
119 | 127 | /* TFRC receiver states */ | |
120 | enum ccid3_fback_type { | 128 | enum ccid3_hc_rx_states { |
121 | CCID3_FBACK_NONE = 0, | 129 | TFRC_RSTATE_NO_DATA = 1, |
122 | CCID3_FBACK_INITIAL, | 130 | TFRC_RSTATE_DATA, |
123 | CCID3_FBACK_PERIODIC, | 131 | TFRC_RSTATE_TERM = 127, |
124 | CCID3_FBACK_PARAM_CHANGE | ||
125 | }; | 132 | }; |
126 | 133 | ||
127 | /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket | 134 | /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket |
128 | * | 135 | * |
129 | * @last_counter - Tracks window counter (RFC 4342, 8.1) | 136 | * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) |
130 | * @feedback - The type of the feedback last sent | 137 | * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) |
131 | * @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) | 138 | * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4) |
132 | * @tstamp_last_feedback - Time at which last feedback was sent | 139 | * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1) |
133 | * @hist - Packet history (loss detection + RTT sampling) | 140 | * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states |
134 | * @li_hist - Loss Interval database | 141 | * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes |
135 | * @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) | 142 | * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) |
143 | * @ccid3hcrx_rtt - Receiver estimate of RTT | ||
144 | * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent | ||
145 | * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent | ||
146 | * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling) | ||
147 | * @ccid3hcrx_li_hist - Loss Interval database | ||
148 | * @ccid3hcrx_s - Received packet size in bytes | ||
149 | * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) | ||
136 | */ | 150 | */ |
137 | struct ccid3_hc_rx_sock { | 151 | struct ccid3_hc_rx_sock { |
138 | u8 last_counter:4; | 152 | u8 ccid3hcrx_last_counter:4; |
139 | enum ccid3_fback_type feedback:4; | 153 | enum ccid3_hc_rx_states ccid3hcrx_state:8; |
140 | u32 x_recv; | 154 | u32 ccid3hcrx_bytes_recv; |
141 | ktime_t tstamp_last_feedback; | 155 | u32 ccid3hcrx_x_recv; |
142 | struct tfrc_rx_hist hist; | 156 | u32 ccid3hcrx_rtt; |
143 | struct tfrc_loss_hist li_hist; | 157 | ktime_t ccid3hcrx_tstamp_last_feedback; |
144 | #define p_inverse li_hist.i_mean | 158 | struct tfrc_rx_hist ccid3hcrx_hist; |
159 | struct tfrc_loss_hist ccid3hcrx_li_hist; | ||
160 | u16 ccid3hcrx_s; | ||
161 | #define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean | ||
145 | }; | 162 | }; |
146 | 163 | ||
147 | static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) | 164 | static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) |
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index b1ae8f8259e5..5b3ce0688c5c 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) | |||
86 | 86 | ||
87 | /** | 87 | /** |
88 | * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 | 88 | * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 |
89 | * This updates I_mean as the sequence numbers increase. As a consequence, the | 89 | * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev |
90 | * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1) | ||
91 | * decreases, and thus there is no need to send renewed feedback. | ||
92 | */ | 90 | */ |
93 | void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) | 91 | u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) |
94 | { | 92 | { |
95 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); | 93 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); |
94 | u32 old_i_mean = lh->i_mean; | ||
96 | s64 len; | 95 | s64 len; |
97 | 96 | ||
98 | if (cur == NULL) /* not initialised */ | 97 | if (cur == NULL) /* not initialised */ |
99 | return; | 98 | return 0; |
100 | |||
101 | /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */ | ||
102 | if (!dccp_data_packet(skb)) | ||
103 | return; | ||
104 | 99 | ||
105 | len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; | 100 | len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; |
106 | 101 | ||
107 | if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ | 102 | if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ |
108 | return; | 103 | return 0; |
109 | 104 | ||
110 | if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) | 105 | if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) |
111 | /* | 106 | /* |
@@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) | |||
119 | cur->li_is_closed = 1; | 114 | cur->li_is_closed = 1; |
120 | 115 | ||
121 | if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ | 116 | if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ |
122 | return; | 117 | return 0; |
123 | 118 | ||
124 | cur->li_length = len; | 119 | cur->li_length = len; |
125 | tfrc_lh_calc_i_mean(lh); | 120 | tfrc_lh_calc_i_mean(lh); |
121 | |||
122 | return (lh->i_mean < old_i_mean); | ||
126 | } | 123 | } |
124 | EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); | ||
127 | 125 | ||
128 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ | 126 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ |
129 | static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, | 127 | static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, |
@@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, | |||
140 | * @sk: Used by @calc_first_li in caller-specific way (subtyping) | 138 | * @sk: Used by @calc_first_li in caller-specific way (subtyping) |
141 | * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. | 139 | * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. |
142 | */ | 140 | */ |
143 | bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, | 141 | int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, |
144 | u32 (*calc_first_li)(struct sock *), struct sock *sk) | 142 | u32 (*calc_first_li)(struct sock *), struct sock *sk) |
145 | { | 143 | { |
146 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; | 144 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; |
147 | 145 | ||
148 | if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) | 146 | if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) |
149 | return false; | 147 | return 0; |
150 | 148 | ||
151 | new = tfrc_lh_demand_next(lh); | 149 | new = tfrc_lh_demand_next(lh); |
152 | if (unlikely(new == NULL)) { | 150 | if (unlikely(new == NULL)) { |
153 | DCCP_CRIT("Cannot allocate/add loss record."); | 151 | DCCP_CRIT("Cannot allocate/add loss record."); |
154 | return false; | 152 | return 0; |
155 | } | 153 | } |
156 | 154 | ||
157 | new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; | 155 | new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; |
@@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, | |||
169 | 167 | ||
170 | tfrc_lh_calc_i_mean(lh); | 168 | tfrc_lh_calc_i_mean(lh); |
171 | } | 169 | } |
172 | return true; | 170 | return 1; |
173 | } | 171 | } |
174 | EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); | 172 | EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); |
175 | 173 | ||
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index d08a226db43e..246018a3b269 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h | |||
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) | |||
67 | 67 | ||
68 | struct tfrc_rx_hist; | 68 | struct tfrc_rx_hist; |
69 | 69 | ||
70 | extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, | 70 | extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, |
71 | u32 (*first_li)(struct sock *), struct sock *); | 71 | u32 (*first_li)(struct sock *), struct sock *); |
72 | extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); | 72 | extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); |
73 | extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); | 73 | extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); |
74 | 74 | ||
75 | #endif /* _DCCP_LI_HIST_ */ | 75 | #endif /* _DCCP_LI_HIST_ */ |
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index cce9f03bda3e..6cc108afdc3b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -40,6 +40,18 @@ | |||
40 | #include "packet_history.h" | 40 | #include "packet_history.h" |
41 | #include "../../dccp.h" | 41 | #include "../../dccp.h" |
42 | 42 | ||
43 | /** | ||
44 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
45 | * @next: next oldest entry (LIFO order) | ||
46 | * @seqno: sequence number of this entry | ||
47 | * @stamp: send time of packet with sequence number @seqno | ||
48 | */ | ||
49 | struct tfrc_tx_hist_entry { | ||
50 | struct tfrc_tx_hist_entry *next; | ||
51 | u64 seqno; | ||
52 | ktime_t stamp; | ||
53 | }; | ||
54 | |||
43 | /* | 55 | /* |
44 | * Transmitter History Routines | 56 | * Transmitter History Routines |
45 | */ | 57 | */ |
@@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void) | |||
61 | } | 73 | } |
62 | } | 74 | } |
63 | 75 | ||
76 | static struct tfrc_tx_hist_entry * | ||
77 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
78 | { | ||
79 | while (head != NULL && head->seqno != seqno) | ||
80 | head = head->next; | ||
81 | |||
82 | return head; | ||
83 | } | ||
84 | |||
64 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) | 85 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) |
65 | { | 86 | { |
66 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); | 87 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); |
@@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) | |||
90 | } | 111 | } |
91 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); | 112 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); |
92 | 113 | ||
114 | u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, | ||
115 | const ktime_t now) | ||
116 | { | ||
117 | u32 rtt = 0; | ||
118 | struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); | ||
119 | |||
120 | if (packet != NULL) { | ||
121 | rtt = ktime_us_delta(now, packet->stamp); | ||
122 | /* | ||
123 | * Garbage-collect older (irrelevant) entries: | ||
124 | */ | ||
125 | tfrc_tx_hist_purge(&packet->next); | ||
126 | } | ||
127 | |||
128 | return rtt; | ||
129 | } | ||
130 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); | ||
131 | |||
132 | |||
93 | /* | 133 | /* |
94 | * Receiver History Routines | 134 | * Receiver History Routines |
95 | */ | 135 | */ |
@@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) | |||
151 | } | 191 | } |
152 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); | 192 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); |
153 | 193 | ||
154 | |||
155 | static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) | ||
156 | { | ||
157 | struct tfrc_rx_hist_entry *tmp = h->ring[a]; | ||
158 | |||
159 | h->ring[a] = h->ring[b]; | ||
160 | h->ring[b] = tmp; | ||
161 | } | ||
162 | |||
163 | static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) | 194 | static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) |
164 | { | 195 | { |
165 | __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a), | 196 | const u8 idx_a = tfrc_rx_hist_index(h, a), |
166 | tfrc_rx_hist_index(h, b)); | 197 | idx_b = tfrc_rx_hist_index(h, b); |
167 | } | 198 | struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; |
168 | 199 | ||
169 | /** | 200 | h->ring[idx_a] = h->ring[idx_b]; |
170 | * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling | 201 | h->ring[idx_b] = tmp; |
171 | * This is called after loss detection has finished, when the history entry | ||
172 | * with the index of `loss_count' holds the highest-received sequence number. | ||
173 | * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt). | ||
174 | */ | ||
175 | static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h) | ||
176 | { | ||
177 | __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count)); | ||
178 | h->loss_count = h->loss_start = 0; | ||
179 | } | 202 | } |
180 | 203 | ||
181 | /* | 204 | /* |
@@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) | |||
192 | u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, | 215 | u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, |
193 | s1 = DCCP_SKB_CB(skb)->dccpd_seq; | 216 | s1 = DCCP_SKB_CB(skb)->dccpd_seq; |
194 | 217 | ||
195 | if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */ | 218 | if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ |
196 | h->loss_count = 1; | 219 | h->loss_count = 1; |
220 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); | ||
221 | } | ||
197 | } | 222 | } |
198 | 223 | ||
199 | static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) | 224 | static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) |
@@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 | |||
215 | 240 | ||
216 | if (dccp_loss_free(s2, s1, n1)) { | 241 | if (dccp_loss_free(s2, s1, n1)) { |
217 | /* hole is filled: S0, S2, and S1 are consecutive */ | 242 | /* hole is filled: S0, S2, and S1 are consecutive */ |
218 | tfrc_rx_hist_resume_rtt_sampling(h); | 243 | h->loss_count = 0; |
244 | h->loss_start = tfrc_rx_hist_index(h, 1); | ||
219 | } else | 245 | } else |
220 | /* gap between S2 and S1: just update loss_prev */ | 246 | /* gap between S2 and S1: just update loss_prev */ |
221 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); | 247 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); |
@@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) | |||
268 | 294 | ||
269 | if (dccp_loss_free(s1, s2, n2)) { | 295 | if (dccp_loss_free(s1, s2, n2)) { |
270 | /* entire hole filled by S0, S3, S1, S2 */ | 296 | /* entire hole filled by S0, S3, S1, S2 */ |
271 | tfrc_rx_hist_resume_rtt_sampling(h); | 297 | h->loss_start = tfrc_rx_hist_index(h, 2); |
298 | h->loss_count = 0; | ||
272 | } else { | 299 | } else { |
273 | /* gap remains between S1 and S2 */ | 300 | /* gap remains between S1 and S2 */ |
274 | h->loss_start = tfrc_rx_hist_index(h, 1); | 301 | h->loss_start = tfrc_rx_hist_index(h, 1); |
@@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h) | |||
312 | 339 | ||
313 | if (dccp_loss_free(s2, s3, n3)) { | 340 | if (dccp_loss_free(s2, s3, n3)) { |
314 | /* no gap between S2 and S3: entire hole is filled */ | 341 | /* no gap between S2 and S3: entire hole is filled */ |
315 | tfrc_rx_hist_resume_rtt_sampling(h); | 342 | h->loss_start = tfrc_rx_hist_index(h, 3); |
343 | h->loss_count = 0; | ||
316 | } else { | 344 | } else { |
317 | /* gap between S2 and S3 */ | 345 | /* gap between S2 and S3 */ |
318 | h->loss_start = tfrc_rx_hist_index(h, 2); | 346 | h->loss_start = tfrc_rx_hist_index(h, 2); |
@@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h) | |||
326 | } | 354 | } |
327 | 355 | ||
328 | /** | 356 | /** |
329 | * tfrc_rx_congestion_event - Loss detection and further processing | 357 | * tfrc_rx_handle_loss - Loss detection and further processing |
330 | * @h: The non-empty RX history object | 358 | * @h: The non-empty RX history object |
331 | * @lh: Loss Intervals database to update | 359 | * @lh: Loss Intervals database to update |
332 | * @skb: Currently received packet | 360 | * @skb: Currently received packet |
333 | * @ndp: The NDP count belonging to @skb | 361 | * @ndp: The NDP count belonging to @skb |
334 | * @first_li: Caller-dependent computation of first loss interval in @lh | 362 | * @calc_first_li: Caller-dependent computation of first loss interval in @lh |
335 | * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) | 363 | * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) |
336 | * Chooses action according to pending loss, updates LI database when a new | 364 | * Chooses action according to pending loss, updates LI database when a new |
337 | * loss was detected, and does required post-processing. Returns 1 when caller | 365 | * loss was detected, and does required post-processing. Returns 1 when caller |
338 | * should send feedback, 0 otherwise. | 366 | * should send feedback, 0 otherwise. |
@@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h) | |||
340 | * records accordingly, the caller should not perform any more RX history | 368 | * records accordingly, the caller should not perform any more RX history |
341 | * operations when loss_count is greater than 0 after calling this function. | 369 | * operations when loss_count is greater than 0 after calling this function. |
342 | */ | 370 | */ |
343 | bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, | 371 | int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, |
344 | struct tfrc_loss_hist *lh, | 372 | struct tfrc_loss_hist *lh, |
345 | struct sk_buff *skb, const u64 ndp, | 373 | struct sk_buff *skb, const u64 ndp, |
346 | u32 (*first_li)(struct sock *), struct sock *sk) | 374 | u32 (*calc_first_li)(struct sock *), struct sock *sk) |
347 | { | 375 | { |
348 | bool new_event = false; | 376 | int is_new_loss = 0; |
349 | |||
350 | if (tfrc_rx_hist_duplicate(h, skb)) | ||
351 | return 0; | ||
352 | 377 | ||
353 | if (h->loss_count == 0) { | 378 | if (h->loss_count == 0) { |
354 | __do_track_loss(h, skb, ndp); | 379 | __do_track_loss(h, skb, ndp); |
355 | tfrc_rx_hist_sample_rtt(h, skb); | ||
356 | tfrc_rx_hist_add_packet(h, skb, ndp); | ||
357 | } else if (h->loss_count == 1) { | 380 | } else if (h->loss_count == 1) { |
358 | __one_after_loss(h, skb, ndp); | 381 | __one_after_loss(h, skb, ndp); |
359 | } else if (h->loss_count != 2) { | 382 | } else if (h->loss_count != 2) { |
@@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, | |||
362 | /* | 385 | /* |
363 | * Update Loss Interval database and recycle RX records | 386 | * Update Loss Interval database and recycle RX records |
364 | */ | 387 | */ |
365 | new_event = tfrc_lh_interval_add(lh, h, first_li, sk); | 388 | is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); |
366 | __three_after_loss(h); | 389 | __three_after_loss(h); |
367 | } | 390 | } |
368 | 391 | return is_new_loss; | |
369 | /* | ||
370 | * Update moving-average of `s' and the sum of received payload bytes. | ||
371 | */ | ||
372 | if (dccp_data_packet(skb)) { | ||
373 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
374 | |||
375 | h->packet_size = tfrc_ewma(h->packet_size, payload, 9); | ||
376 | h->bytes_recvd += payload; | ||
377 | } | ||
378 | |||
379 | /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ | ||
380 | if (!new_event) | ||
381 | tfrc_lh_update_i_mean(lh, skb); | ||
382 | |||
383 | return new_event; | ||
384 | } | 392 | } |
385 | EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event); | 393 | EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); |
386 | 394 | ||
387 | /* Compute the sending rate X_recv measured between feedback intervals */ | 395 | int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) |
388 | u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv) | ||
389 | { | 396 | { |
390 | u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate; | 397 | int i; |
391 | s64 delta = ktime_to_us(net_timedelta(h->bytes_start)); | ||
392 | |||
393 | WARN_ON(delta <= 0); | ||
394 | /* | ||
395 | * Ensure that the sampling interval for X_recv is at least one RTT, | ||
396 | * by extending the sampling interval backwards in time, over the last | ||
397 | * R_(m-1) seconds, as per rfc3448bis-06, 6.2. | ||
398 | * To reduce noise (e.g. when the RTT changes often), this is only | ||
399 | * done when delta is smaller than RTT/2. | ||
400 | */ | ||
401 | if (last_x_recv > 0 && delta < last_rtt/2) { | ||
402 | tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n", | ||
403 | (long)delta, (unsigned)last_rtt); | ||
404 | 398 | ||
405 | delta = (bytes ? delta : 0) + last_rtt; | 399 | for (i = 0; i <= TFRC_NDUPACK; i++) { |
406 | bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC); | 400 | h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); |
401 | if (h->ring[i] == NULL) | ||
402 | goto out_free; | ||
407 | } | 403 | } |
408 | 404 | ||
409 | if (unlikely(bytes == 0)) { | 405 | h->loss_count = h->loss_start = 0; |
410 | DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv); | 406 | return 0; |
411 | return last_x_recv; | 407 | |
408 | out_free: | ||
409 | while (i-- != 0) { | ||
410 | kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); | ||
411 | h->ring[i] = NULL; | ||
412 | } | 412 | } |
413 | return scaled_div32(bytes, delta); | 413 | return -ENOBUFS; |
414 | } | 414 | } |
415 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv); | 415 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); |
416 | 416 | ||
417 | void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) | 417 | void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) |
418 | { | 418 | { |
@@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) | |||
426 | } | 426 | } |
427 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); | 427 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); |
428 | 428 | ||
429 | static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) | 429 | /** |
430 | * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against | ||
431 | */ | ||
432 | static inline struct tfrc_rx_hist_entry * | ||
433 | tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) | ||
430 | { | 434 | { |
431 | int i; | 435 | return h->ring[0]; |
432 | |||
433 | memset(h, 0, sizeof(*h)); | ||
434 | |||
435 | for (i = 0; i <= TFRC_NDUPACK; i++) { | ||
436 | h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); | ||
437 | if (h->ring[i] == NULL) { | ||
438 | tfrc_rx_hist_purge(h); | ||
439 | return -ENOBUFS; | ||
440 | } | ||
441 | } | ||
442 | return 0; | ||
443 | } | 436 | } |
444 | 437 | ||
445 | int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) | 438 | /** |
439 | * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry | ||
440 | */ | ||
441 | static inline struct tfrc_rx_hist_entry * | ||
442 | tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) | ||
446 | { | 443 | { |
447 | if (tfrc_rx_hist_alloc(h)) | 444 | return h->ring[h->rtt_sample_prev]; |
448 | return -ENOBUFS; | ||
449 | /* | ||
450 | * Initialise first entry with GSR to start loss detection as early as | ||
451 | * possible. Code using this must not use any other fields. The entry | ||
452 | * will be overwritten once the CCID updates its received packets. | ||
453 | */ | ||
454 | tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr; | ||
455 | return 0; | ||
456 | } | 445 | } |
457 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_init); | ||
458 | 446 | ||
459 | /** | 447 | /** |
460 | * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal | 448 | * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal |
461 | * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss | 449 | * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able |
462 | * is pending and uses the following history entries (via rtt_sample_prev): | 450 | * to compute a sample with given data - calling function should check this. |
463 | * - h->ring[0] contains the most recent history entry prior to @skb; | ||
464 | * - h->ring[1] is an unused `dummy' entry when the current difference is 0; | ||
465 | */ | 451 | */ |
466 | void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) | 452 | u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) |
467 | { | 453 | { |
468 | struct tfrc_rx_hist_entry *last = h->ring[0]; | 454 | u32 sample = 0, |
469 | u32 sample, delta_v; | 455 | delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, |
470 | 456 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); | |
471 | /* | 457 | |
472 | * When not to sample: | 458 | if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ |
473 | * - on non-data packets | 459 | if (h->rtt_sample_prev == 2) { /* previous candidate stored */ |
474 | * (RFC 4342, 8.1: CCVal only fully defined for data packets); | 460 | sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, |
475 | * - when no data packets have been received yet | 461 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); |
476 | * (FIXME: using sampled packet size as indicator here); | 462 | if (sample) |
477 | * - as long as there are gaps in the sequence space (pending loss). | 463 | sample = 4 / sample * |
478 | */ | 464 | ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, |
479 | if (!dccp_data_packet(skb) || h->packet_size == 0 || | 465 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); |
480 | tfrc_rx_hist_loss_pending(h)) | 466 | else /* |
481 | return; | 467 | * FIXME: This condition is in principle not |
468 | * possible but occurs when CCID is used for | ||
469 | * two-way data traffic. I have tried to trace | ||
470 | * it, but the cause does not seem to be here. | ||
471 | */ | ||
472 | DCCP_BUG("please report to dccp@vger.kernel.org" | ||
473 | " => prev = %u, last = %u", | ||
474 | tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, | ||
475 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); | ||
476 | } else if (delta_v < 1) { | ||
477 | h->rtt_sample_prev = 1; | ||
478 | goto keep_ref_for_next_time; | ||
479 | } | ||
482 | 480 | ||
483 | h->rtt_sample_prev = 0; /* reset previous candidate */ | 481 | } else if (delta_v == 4) /* optimal match */ |
482 | sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); | ||
483 | else { /* suboptimal match */ | ||
484 | h->rtt_sample_prev = 2; | ||
485 | goto keep_ref_for_next_time; | ||
486 | } | ||
484 | 487 | ||
485 | delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval); | 488 | if (unlikely(sample > DCCP_SANE_RTT_MAX)) { |
486 | if (delta_v == 0) { /* less than RTT/4 difference */ | 489 | DCCP_WARN("RTT sample %u too large, using max\n", sample); |
487 | h->rtt_sample_prev = 1; | 490 | sample = DCCP_SANE_RTT_MAX; |
488 | return; | ||
489 | } | 491 | } |
490 | sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp))); | ||
491 | 492 | ||
492 | if (delta_v <= 4) /* between RTT/4 and RTT */ | 493 | h->rtt_sample_prev = 0; /* use current entry as next reference */ |
493 | sample *= 4 / delta_v; | 494 | keep_ref_for_next_time: |
494 | else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2)) | ||
495 | /* | ||
496 | * Optimisation: CCVal difference is greater than 1 RTT, yet the | ||
497 | * sample is less than the local RTT estimate; which means that | ||
498 | * the RTT estimate is too high. | ||
499 | * To avoid noise, it is not done if the sample is below RTT/2. | ||
500 | */ | ||
501 | return; | ||
502 | 495 | ||
503 | /* Use a lower weight than usual to increase responsiveness */ | 496 | return sample; |
504 | h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5); | ||
505 | } | 497 | } |
506 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); | 498 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); |
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 555e65cd73a0..461cc91cce88 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -40,28 +40,12 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include "tfrc.h" | 41 | #include "tfrc.h" |
42 | 42 | ||
43 | /** | 43 | struct tfrc_tx_hist_entry; |
44 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
45 | * @next: next oldest entry (LIFO order) | ||
46 | * @seqno: sequence number of this entry | ||
47 | * @stamp: send time of packet with sequence number @seqno | ||
48 | */ | ||
49 | struct tfrc_tx_hist_entry { | ||
50 | struct tfrc_tx_hist_entry *next; | ||
51 | u64 seqno; | ||
52 | ktime_t stamp; | ||
53 | }; | ||
54 | |||
55 | static inline struct tfrc_tx_hist_entry * | ||
56 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
57 | { | ||
58 | while (head != NULL && head->seqno != seqno) | ||
59 | head = head->next; | ||
60 | return head; | ||
61 | } | ||
62 | 44 | ||
63 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); | 45 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); |
64 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); | 46 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); |
47 | extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, | ||
48 | const u64 seqno, const ktime_t now); | ||
65 | 49 | ||
66 | /* Subtraction a-b modulo-16, respects circular wrap-around */ | 50 | /* Subtraction a-b modulo-16, respects circular wrap-around */ |
67 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) | 51 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) |
@@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry { | |||
91 | * @loss_count: Number of entries in circular history | 75 | * @loss_count: Number of entries in circular history |
92 | * @loss_start: Movable index (for loss detection) | 76 | * @loss_start: Movable index (for loss detection) |
93 | * @rtt_sample_prev: Used during RTT sampling, points to candidate entry | 77 | * @rtt_sample_prev: Used during RTT sampling, points to candidate entry |
94 | * @rtt_estimate: Receiver RTT estimate | ||
95 | * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) | ||
96 | * @bytes_recvd: Number of bytes received since @bytes_start | ||
97 | * @bytes_start: Start time for counting @bytes_recvd | ||
98 | */ | 78 | */ |
99 | struct tfrc_rx_hist { | 79 | struct tfrc_rx_hist { |
100 | struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; | 80 | struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; |
101 | u8 loss_count:2, | 81 | u8 loss_count:2, |
102 | loss_start:2; | 82 | loss_start:2; |
103 | /* Receiver RTT sampling */ | ||
104 | #define rtt_sample_prev loss_start | 83 | #define rtt_sample_prev loss_start |
105 | u32 rtt_estimate; | ||
106 | /* Receiver sampling of application payload lengths */ | ||
107 | u32 packet_size, | ||
108 | bytes_recvd; | ||
109 | ktime_t bytes_start; | ||
110 | }; | 84 | }; |
111 | 85 | ||
112 | /** | 86 | /** |
@@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) | |||
150 | return h->loss_count > 0; | 124 | return h->loss_count > 0; |
151 | } | 125 | } |
152 | 126 | ||
153 | /* | ||
154 | * Accessor functions to retrieve parameters sampled by the RX history | ||
155 | */ | ||
156 | static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h) | ||
157 | { | ||
158 | if (h->packet_size == 0) { | ||
159 | DCCP_WARN("No sample for s, using fallback\n"); | ||
160 | return TCP_MIN_RCVMSS; | ||
161 | } | ||
162 | return h->packet_size; | ||
163 | |||
164 | } | ||
165 | static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h) | ||
166 | { | ||
167 | if (h->rtt_estimate == 0) { | ||
168 | DCCP_WARN("No RTT estimate available, using fallback RTT\n"); | ||
169 | return DCCP_FALLBACK_RTT; | ||
170 | } | ||
171 | return h->rtt_estimate; | ||
172 | } | ||
173 | |||
174 | static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h) | ||
175 | { | ||
176 | h->bytes_recvd = 0; | ||
177 | h->bytes_start = ktime_get_real(); | ||
178 | } | ||
179 | |||
180 | extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv); | ||
181 | |||
182 | |||
183 | extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, | 127 | extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, |
184 | const struct sk_buff *skb, const u64 ndp); | 128 | const struct sk_buff *skb, const u64 ndp); |
185 | 129 | ||
186 | extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); | 130 | extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); |
187 | 131 | ||
188 | struct tfrc_loss_hist; | 132 | struct tfrc_loss_hist; |
189 | extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, | 133 | extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, |
190 | struct tfrc_loss_hist *lh, | 134 | struct tfrc_loss_hist *lh, |
191 | struct sk_buff *skb, const u64 ndp, | 135 | struct sk_buff *skb, const u64 ndp, |
192 | u32 (*first_li)(struct sock *sk), | 136 | u32 (*first_li)(struct sock *sk), |
193 | struct sock *sk); | 137 | struct sock *sk); |
194 | extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, | 138 | extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, |
195 | const struct sk_buff *skb); | 139 | const struct sk_buff *skb); |
196 | extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); | 140 | extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); |
197 | extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); | 141 | extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); |
198 | 142 | ||
199 | #endif /* _DCCP_PKT_HIST_ */ | 143 | #endif /* _DCCP_PKT_HIST_ */ |
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ede12f53de5a..ed9857527acf 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -48,21 +48,6 @@ static inline u32 scaled_div32(u64 a, u64 b) | |||
48 | } | 48 | } |
49 | 49 | ||
50 | /** | 50 | /** |
51 | * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1 | ||
52 | * Uses scaling to improve accuracy of the integer approximation of sqrt(). The | ||
53 | * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for | ||
54 | * clamped RTT samples (dccp_sample_rtt). | ||
55 | * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the | ||
56 | * scaling factor is neutralised. For this purpose, it avoids returning zero. | ||
57 | */ | ||
58 | static inline u16 tfrc_scaled_sqrt(const u32 sample) | ||
59 | { | ||
60 | const unsigned long non_zero_sample = sample ? : 1; | ||
61 | |||
62 | return int_sqrt(non_zero_sample << 10); | ||
63 | } | ||
64 | |||
65 | /** | ||
66 | * tfrc_ewma - Exponentially weighted moving average | 51 | * tfrc_ewma - Exponentially weighted moving average |
67 | * @weight: Weight to be used as damping factor, in units of 1/10 | 52 | * @weight: Weight to be used as damping factor, in units of 1/10 |
68 | */ | 53 | */ |
@@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) | |||
73 | 58 | ||
74 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | 59 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); |
75 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | 60 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); |
76 | extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); | ||
77 | 61 | ||
78 | extern int tfrc_tx_packet_history_init(void); | 62 | extern int tfrc_tx_packet_history_init(void); |
79 | extern void tfrc_tx_packet_history_exit(void); | 63 | extern void tfrc_tx_packet_history_exit(void); |
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 38239c4d5e14..2f20a29cffe4 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c | |||
@@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) | |||
632 | 632 | ||
633 | if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ | 633 | if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ |
634 | if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ | 634 | if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ |
635 | /* | 635 | DCCP_WARN("Value of p (%d) below resolution. " |
636 | * In the congestion-avoidance phase p decays towards 0 | 636 | "Substituting %d\n", p, TFRC_SMALLEST_P); |
637 | * when there are no further losses, so this case is | ||
638 | * natural. Truncating to p_min = 0.01% means that the | ||
639 | * maximum achievable throughput is limited to about | ||
640 | * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g. | ||
641 | * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps. | ||
642 | */ | ||
643 | tfrc_pr_debug("Value of p (%d) below resolution. " | ||
644 | "Substituting %d\n", p, TFRC_SMALLEST_P); | ||
645 | index = 0; | 637 | index = 0; |
646 | } else /* 0.0001 <= p <= 0.05 */ | 638 | } else /* 0.0001 <= p <= 0.05 */ |
647 | index = p/TFRC_SMALLEST_P - 1; | 639 | index = p/TFRC_SMALLEST_P - 1; |
@@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) | |||
666 | result = scaled_div(s, R); | 658 | result = scaled_div(s, R); |
667 | return scaled_div32(result, f); | 659 | return scaled_div32(result, f); |
668 | } | 660 | } |
661 | |||
669 | EXPORT_SYMBOL_GPL(tfrc_calc_x); | 662 | EXPORT_SYMBOL_GPL(tfrc_calc_x); |
670 | 663 | ||
671 | /** | 664 | /** |
@@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) | |||
700 | index = tfrc_binsearch(fvalue, 0); | 693 | index = tfrc_binsearch(fvalue, 0); |
701 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; | 694 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; |
702 | } | 695 | } |
703 | EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); | ||
704 | 696 | ||
705 | /** | 697 | EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); |
706 | * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% | ||
707 | * When @loss_event_rate is large, there is a chance that p is truncated to 0. | ||
708 | * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. | ||
709 | */ | ||
710 | u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) | ||
711 | { | ||
712 | if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ | ||
713 | return 0; | ||
714 | if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ | ||
715 | return 1000000; | ||
716 | return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); | ||
717 | } | ||
718 | EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate); | ||