diff options
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Kconfig | 1 | ||||
-rw-r--r-- | net/dccp/ackvec.c | 163 | ||||
-rw-r--r-- | net/dccp/ackvec.h | 62 | ||||
-rw-r--r-- | net/dccp/ccid.c | 8 | ||||
-rw-r--r-- | net/dccp/ccid.h | 37 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 30 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 228 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 21 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 710 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 41 | ||||
-rw-r--r-- | net/dccp/ccids/lib/Makefile | 2 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 352 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 64 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 599 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 220 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.c | 63 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 29 | ||||
-rw-r--r-- | net/dccp/dccp.h | 35 | ||||
-rw-r--r-- | net/dccp/feat.c | 29 | ||||
-rw-r--r-- | net/dccp/feat.h | 26 | ||||
-rw-r--r-- | net/dccp/input.c | 155 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 12 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 10 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 33 | ||||
-rw-r--r-- | net/dccp/options.c | 139 | ||||
-rw-r--r-- | net/dccp/output.c | 55 | ||||
-rw-r--r-- | net/dccp/proto.c | 194 | ||||
-rw-r--r-- | net/dccp/sysctl.c | 36 | ||||
-rw-r--r-- | net/dccp/timer.c | 5 |
29 files changed, 1777 insertions, 1582 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 0549e4719b13..7aa2a7acc7ec 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | menuconfig IP_DCCP | 1 | menuconfig IP_DCCP |
2 | tristate "The DCCP Protocol (EXPERIMENTAL)" | 2 | tristate "The DCCP Protocol (EXPERIMENTAL)" |
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select IP_DCCP_CCID2 | ||
4 | ---help--- | 5 | ---help--- |
5 | Datagram Congestion Control Protocol (RFC 4340) | 6 | Datagram Congestion Control Protocol (RFC 4340) |
6 | 7 | ||
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 83378f379f72..6de4bd195d28 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -30,7 +30,7 @@ static struct dccp_ackvec_record *dccp_ackvec_record_new(void) | |||
30 | kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); | 30 | kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); |
31 | 31 | ||
32 | if (avr != NULL) | 32 | if (avr != NULL) |
33 | INIT_LIST_HEAD(&avr->dccpavr_node); | 33 | INIT_LIST_HEAD(&avr->avr_node); |
34 | 34 | ||
35 | return avr; | 35 | return avr; |
36 | } | 36 | } |
@@ -40,7 +40,7 @@ static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) | |||
40 | if (unlikely(avr == NULL)) | 40 | if (unlikely(avr == NULL)) |
41 | return; | 41 | return; |
42 | /* Check if deleting a linked record */ | 42 | /* Check if deleting a linked record */ |
43 | WARN_ON(!list_empty(&avr->dccpavr_node)); | 43 | WARN_ON(!list_empty(&avr->avr_node)); |
44 | kmem_cache_free(dccp_ackvec_record_slab, avr); | 44 | kmem_cache_free(dccp_ackvec_record_slab, avr); |
45 | } | 45 | } |
46 | 46 | ||
@@ -52,16 +52,15 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, | |||
52 | * just add the AVR at the head of the list. | 52 | * just add the AVR at the head of the list. |
53 | * -sorbo. | 53 | * -sorbo. |
54 | */ | 54 | */ |
55 | if (!list_empty(&av->dccpav_records)) { | 55 | if (!list_empty(&av->av_records)) { |
56 | const struct dccp_ackvec_record *head = | 56 | const struct dccp_ackvec_record *head = |
57 | list_entry(av->dccpav_records.next, | 57 | list_entry(av->av_records.next, |
58 | struct dccp_ackvec_record, | 58 | struct dccp_ackvec_record, |
59 | dccpavr_node); | 59 | avr_node); |
60 | BUG_ON(before48(avr->dccpavr_ack_seqno, | 60 | BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno)); |
61 | head->dccpavr_ack_seqno)); | ||
62 | } | 61 | } |
63 | 62 | ||
64 | list_add(&avr->dccpavr_node, &av->dccpav_records); | 63 | list_add(&avr->avr_node, &av->av_records); |
65 | } | 64 | } |
66 | 65 | ||
67 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | 66 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) |
@@ -69,9 +68,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
69 | struct dccp_sock *dp = dccp_sk(sk); | 68 | struct dccp_sock *dp = dccp_sk(sk); |
70 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | 69 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; |
71 | /* Figure out how many options do we need to represent the ackvec */ | 70 | /* Figure out how many options do we need to represent the ackvec */ |
72 | const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, | 71 | const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN); |
73 | DCCP_MAX_ACKVEC_OPT_LEN); | 72 | u16 len = av->av_vec_len + 2 * nr_opts, i; |
74 | u16 len = av->dccpav_vec_len + 2 * nr_opts, i; | ||
75 | u32 elapsed_time; | 73 | u32 elapsed_time; |
76 | const unsigned char *tail, *from; | 74 | const unsigned char *tail, *from; |
77 | unsigned char *to; | 75 | unsigned char *to; |
@@ -81,7 +79,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
81 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | 79 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) |
82 | return -1; | 80 | return -1; |
83 | 81 | ||
84 | delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); | 82 | delta = ktime_us_delta(ktime_get_real(), av->av_time); |
85 | elapsed_time = delta / 10; | 83 | elapsed_time = delta / 10; |
86 | 84 | ||
87 | if (elapsed_time != 0 && | 85 | if (elapsed_time != 0 && |
@@ -95,9 +93,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
95 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | 93 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; |
96 | 94 | ||
97 | to = skb_push(skb, len); | 95 | to = skb_push(skb, len); |
98 | len = av->dccpav_vec_len; | 96 | len = av->av_vec_len; |
99 | from = av->dccpav_buf + av->dccpav_buf_head; | 97 | from = av->av_buf + av->av_buf_head; |
100 | tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN; | 98 | tail = av->av_buf + DCCP_MAX_ACKVEC_LEN; |
101 | 99 | ||
102 | for (i = 0; i < nr_opts; ++i) { | 100 | for (i = 0; i < nr_opts; ++i) { |
103 | int copylen = len; | 101 | int copylen = len; |
@@ -116,7 +114,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
116 | to += tailsize; | 114 | to += tailsize; |
117 | len -= tailsize; | 115 | len -= tailsize; |
118 | copylen -= tailsize; | 116 | copylen -= tailsize; |
119 | from = av->dccpav_buf; | 117 | from = av->av_buf; |
120 | } | 118 | } |
121 | 119 | ||
122 | memcpy(to, from, copylen); | 120 | memcpy(to, from, copylen); |
@@ -134,19 +132,19 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
134 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | 132 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will |
135 | * equal buf_nonce. | 133 | * equal buf_nonce. |
136 | */ | 134 | */ |
137 | avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 135 | avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
138 | avr->dccpavr_ack_ptr = av->dccpav_buf_head; | 136 | avr->avr_ack_ptr = av->av_buf_head; |
139 | avr->dccpavr_ack_ackno = av->dccpav_buf_ackno; | 137 | avr->avr_ack_ackno = av->av_buf_ackno; |
140 | avr->dccpavr_ack_nonce = av->dccpav_buf_nonce; | 138 | avr->avr_ack_nonce = av->av_buf_nonce; |
141 | avr->dccpavr_sent_len = av->dccpav_vec_len; | 139 | avr->avr_sent_len = av->av_vec_len; |
142 | 140 | ||
143 | dccp_ackvec_insert_avr(av, avr); | 141 | dccp_ackvec_insert_avr(av, avr); |
144 | 142 | ||
145 | dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " | 143 | dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " |
146 | "ack_ackno=%llu\n", | 144 | "ack_ackno=%llu\n", |
147 | dccp_role(sk), avr->dccpavr_sent_len, | 145 | dccp_role(sk), avr->avr_sent_len, |
148 | (unsigned long long)avr->dccpavr_ack_seqno, | 146 | (unsigned long long)avr->avr_ack_seqno, |
149 | (unsigned long long)avr->dccpavr_ack_ackno); | 147 | (unsigned long long)avr->avr_ack_ackno); |
150 | return 0; | 148 | return 0; |
151 | } | 149 | } |
152 | 150 | ||
@@ -155,12 +153,12 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) | |||
155 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); | 153 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); |
156 | 154 | ||
157 | if (av != NULL) { | 155 | if (av != NULL) { |
158 | av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; | 156 | av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; |
159 | av->dccpav_buf_ackno = UINT48_MAX + 1; | 157 | av->av_buf_ackno = UINT48_MAX + 1; |
160 | av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; | 158 | av->av_buf_nonce = 0; |
161 | av->dccpav_time = ktime_set(0, 0); | 159 | av->av_time = ktime_set(0, 0); |
162 | av->dccpav_vec_len = 0; | 160 | av->av_vec_len = 0; |
163 | INIT_LIST_HEAD(&av->dccpav_records); | 161 | INIT_LIST_HEAD(&av->av_records); |
164 | } | 162 | } |
165 | 163 | ||
166 | return av; | 164 | return av; |
@@ -171,12 +169,11 @@ void dccp_ackvec_free(struct dccp_ackvec *av) | |||
171 | if (unlikely(av == NULL)) | 169 | if (unlikely(av == NULL)) |
172 | return; | 170 | return; |
173 | 171 | ||
174 | if (!list_empty(&av->dccpav_records)) { | 172 | if (!list_empty(&av->av_records)) { |
175 | struct dccp_ackvec_record *avr, *next; | 173 | struct dccp_ackvec_record *avr, *next; |
176 | 174 | ||
177 | list_for_each_entry_safe(avr, next, &av->dccpav_records, | 175 | list_for_each_entry_safe(avr, next, &av->av_records, avr_node) { |
178 | dccpavr_node) { | 176 | list_del_init(&avr->avr_node); |
179 | list_del_init(&avr->dccpavr_node); | ||
180 | dccp_ackvec_record_delete(avr); | 177 | dccp_ackvec_record_delete(avr); |
181 | } | 178 | } |
182 | } | 179 | } |
@@ -187,13 +184,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av) | |||
187 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | 184 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, |
188 | const u32 index) | 185 | const u32 index) |
189 | { | 186 | { |
190 | return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; | 187 | return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; |
191 | } | 188 | } |
192 | 189 | ||
193 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | 190 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, |
194 | const u32 index) | 191 | const u32 index) |
195 | { | 192 | { |
196 | return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; | 193 | return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; |
197 | } | 194 | } |
198 | 195 | ||
199 | /* | 196 | /* |
@@ -208,29 +205,29 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | |||
208 | unsigned int gap; | 205 | unsigned int gap; |
209 | long new_head; | 206 | long new_head; |
210 | 207 | ||
211 | if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN) | 208 | if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) |
212 | return -ENOBUFS; | 209 | return -ENOBUFS; |
213 | 210 | ||
214 | gap = packets - 1; | 211 | gap = packets - 1; |
215 | new_head = av->dccpav_buf_head - packets; | 212 | new_head = av->av_buf_head - packets; |
216 | 213 | ||
217 | if (new_head < 0) { | 214 | if (new_head < 0) { |
218 | if (gap > 0) { | 215 | if (gap > 0) { |
219 | memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, | 216 | memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, |
220 | gap + new_head + 1); | 217 | gap + new_head + 1); |
221 | gap = -new_head; | 218 | gap = -new_head; |
222 | } | 219 | } |
223 | new_head += DCCP_MAX_ACKVEC_LEN; | 220 | new_head += DCCP_MAX_ACKVEC_LEN; |
224 | } | 221 | } |
225 | 222 | ||
226 | av->dccpav_buf_head = new_head; | 223 | av->av_buf_head = new_head; |
227 | 224 | ||
228 | if (gap > 0) | 225 | if (gap > 0) |
229 | memset(av->dccpav_buf + av->dccpav_buf_head + 1, | 226 | memset(av->av_buf + av->av_buf_head + 1, |
230 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); | 227 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); |
231 | 228 | ||
232 | av->dccpav_buf[av->dccpav_buf_head] = state; | 229 | av->av_buf[av->av_buf_head] = state; |
233 | av->dccpav_vec_len += packets; | 230 | av->av_vec_len += packets; |
234 | return 0; | 231 | return 0; |
235 | } | 232 | } |
236 | 233 | ||
@@ -243,7 +240,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
243 | /* | 240 | /* |
244 | * Check at the right places if the buffer is full, if it is, tell the | 241 | * Check at the right places if the buffer is full, if it is, tell the |
245 | * caller to start dropping packets till the HC-Sender acks our ACK | 242 | * caller to start dropping packets till the HC-Sender acks our ACK |
246 | * vectors, when we will free up space in dccpav_buf. | 243 | * vectors, when we will free up space in av_buf. |
247 | * | 244 | * |
248 | * We may well decide to do buffer compression, etc, but for now lets | 245 | * We may well decide to do buffer compression, etc, but for now lets |
249 | * just drop. | 246 | * just drop. |
@@ -263,22 +260,20 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
263 | */ | 260 | */ |
264 | 261 | ||
265 | /* See if this is the first ackno being inserted */ | 262 | /* See if this is the first ackno being inserted */ |
266 | if (av->dccpav_vec_len == 0) { | 263 | if (av->av_vec_len == 0) { |
267 | av->dccpav_buf[av->dccpav_buf_head] = state; | 264 | av->av_buf[av->av_buf_head] = state; |
268 | av->dccpav_vec_len = 1; | 265 | av->av_vec_len = 1; |
269 | } else if (after48(ackno, av->dccpav_buf_ackno)) { | 266 | } else if (after48(ackno, av->av_buf_ackno)) { |
270 | const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, | 267 | const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); |
271 | ackno); | ||
272 | 268 | ||
273 | /* | 269 | /* |
274 | * Look if the state of this packet is the same as the | 270 | * Look if the state of this packet is the same as the |
275 | * previous ackno and if so if we can bump the head len. | 271 | * previous ackno and if so if we can bump the head len. |
276 | */ | 272 | */ |
277 | if (delta == 1 && | 273 | if (delta == 1 && |
278 | dccp_ackvec_state(av, av->dccpav_buf_head) == state && | 274 | dccp_ackvec_state(av, av->av_buf_head) == state && |
279 | (dccp_ackvec_len(av, av->dccpav_buf_head) < | 275 | dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK) |
280 | DCCP_ACKVEC_LEN_MASK)) | 276 | av->av_buf[av->av_buf_head]++; |
281 | av->dccpav_buf[av->dccpav_buf_head]++; | ||
282 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | 277 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) |
283 | return -ENOBUFS; | 278 | return -ENOBUFS; |
284 | } else { | 279 | } else { |
@@ -290,14 +285,14 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
290 | * the byte corresponding to S. (Indexing structures | 285 | * the byte corresponding to S. (Indexing structures |
291 | * could reduce the complexity of this scan.) | 286 | * could reduce the complexity of this scan.) |
292 | */ | 287 | */ |
293 | u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); | 288 | u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); |
294 | u32 index = av->dccpav_buf_head; | 289 | u32 index = av->av_buf_head; |
295 | 290 | ||
296 | while (1) { | 291 | while (1) { |
297 | const u8 len = dccp_ackvec_len(av, index); | 292 | const u8 len = dccp_ackvec_len(av, index); |
298 | const u8 state = dccp_ackvec_state(av, index); | 293 | const u8 state = dccp_ackvec_state(av, index); |
299 | /* | 294 | /* |
300 | * valid packets not yet in dccpav_buf have a reserved | 295 | * valid packets not yet in av_buf have a reserved |
301 | * entry, with a len equal to 0. | 296 | * entry, with a len equal to 0. |
302 | */ | 297 | */ |
303 | if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && | 298 | if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && |
@@ -305,7 +300,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
305 | reserved seat! */ | 300 | reserved seat! */ |
306 | dccp_pr_debug("Found %llu reserved seat!\n", | 301 | dccp_pr_debug("Found %llu reserved seat!\n", |
307 | (unsigned long long)ackno); | 302 | (unsigned long long)ackno); |
308 | av->dccpav_buf[index] = state; | 303 | av->av_buf[index] = state; |
309 | goto out; | 304 | goto out; |
310 | } | 305 | } |
311 | /* len == 0 means one packet */ | 306 | /* len == 0 means one packet */ |
@@ -318,8 +313,8 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
318 | } | 313 | } |
319 | } | 314 | } |
320 | 315 | ||
321 | av->dccpav_buf_ackno = ackno; | 316 | av->av_buf_ackno = ackno; |
322 | av->dccpav_time = ktime_get_real(); | 317 | av->av_time = ktime_get_real(); |
323 | out: | 318 | out: |
324 | return 0; | 319 | return 0; |
325 | 320 | ||
@@ -349,9 +344,9 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) | |||
349 | 344 | ||
350 | void dccp_ackvec_print(const struct dccp_ackvec *av) | 345 | void dccp_ackvec_print(const struct dccp_ackvec *av) |
351 | { | 346 | { |
352 | dccp_ackvector_print(av->dccpav_buf_ackno, | 347 | dccp_ackvector_print(av->av_buf_ackno, |
353 | av->dccpav_buf + av->dccpav_buf_head, | 348 | av->av_buf + av->av_buf_head, |
354 | av->dccpav_vec_len); | 349 | av->av_vec_len); |
355 | } | 350 | } |
356 | #endif | 351 | #endif |
357 | 352 | ||
@@ -361,17 +356,15 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av, | |||
361 | struct dccp_ackvec_record *next; | 356 | struct dccp_ackvec_record *next; |
362 | 357 | ||
363 | /* sort out vector length */ | 358 | /* sort out vector length */ |
364 | if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) | 359 | if (av->av_buf_head <= avr->avr_ack_ptr) |
365 | av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; | 360 | av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; |
366 | else | 361 | else |
367 | av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 | 362 | av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - |
368 | - av->dccpav_buf_head | 363 | av->av_buf_head + avr->avr_ack_ptr; |
369 | + avr->dccpavr_ack_ptr; | ||
370 | 364 | ||
371 | /* free records */ | 365 | /* free records */ |
372 | list_for_each_entry_safe_from(avr, next, &av->dccpav_records, | 366 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { |
373 | dccpavr_node) { | 367 | list_del_init(&avr->avr_node); |
374 | list_del_init(&avr->dccpavr_node); | ||
375 | dccp_ackvec_record_delete(avr); | 368 | dccp_ackvec_record_delete(avr); |
376 | } | 369 | } |
377 | } | 370 | } |
@@ -386,16 +379,16 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | |||
386 | * windows. We will be receiving ACKs for stuff we sent a while back | 379 | * windows. We will be receiving ACKs for stuff we sent a while back |
387 | * -sorbo. | 380 | * -sorbo. |
388 | */ | 381 | */ |
389 | list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) { | 382 | list_for_each_entry_reverse(avr, &av->av_records, avr_node) { |
390 | if (ackno == avr->dccpavr_ack_seqno) { | 383 | if (ackno == avr->avr_ack_seqno) { |
391 | dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " | 384 | dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " |
392 | "ack_ackno=%llu, ACKED!\n", | 385 | "ack_ackno=%llu, ACKED!\n", |
393 | dccp_role(sk), 1, | 386 | dccp_role(sk), 1, |
394 | (unsigned long long)avr->dccpavr_ack_seqno, | 387 | (unsigned long long)avr->avr_ack_seqno, |
395 | (unsigned long long)avr->dccpavr_ack_ackno); | 388 | (unsigned long long)avr->avr_ack_ackno); |
396 | dccp_ackvec_throw_record(av, avr); | 389 | dccp_ackvec_throw_record(av, avr); |
397 | break; | 390 | break; |
398 | } else if (avr->dccpavr_ack_seqno > ackno) | 391 | } else if (avr->avr_ack_seqno > ackno) |
399 | break; /* old news */ | 392 | break; /* old news */ |
400 | } | 393 | } |
401 | } | 394 | } |
@@ -409,7 +402,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | |||
409 | struct dccp_ackvec_record *avr; | 402 | struct dccp_ackvec_record *avr; |
410 | 403 | ||
411 | /* Check if we actually sent an ACK vector */ | 404 | /* Check if we actually sent an ACK vector */ |
412 | if (list_empty(&av->dccpav_records)) | 405 | if (list_empty(&av->av_records)) |
413 | return; | 406 | return; |
414 | 407 | ||
415 | i = len; | 408 | i = len; |
@@ -418,8 +411,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | |||
418 | * I think it might be more efficient to work backwards. See comment on | 411 | * I think it might be more efficient to work backwards. See comment on |
419 | * rcv_ackno. -sorbo. | 412 | * rcv_ackno. -sorbo. |
420 | */ | 413 | */ |
421 | avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record, | 414 | avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); |
422 | dccpavr_node); | ||
423 | while (i--) { | 415 | while (i--) { |
424 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | 416 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; |
425 | u64 ackno_end_rl; | 417 | u64 ackno_end_rl; |
@@ -430,15 +422,14 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | |||
430 | * If our AVR sequence number is greater than the ack, go | 422 | * If our AVR sequence number is greater than the ack, go |
431 | * forward in the AVR list until it is not so. | 423 | * forward in the AVR list until it is not so. |
432 | */ | 424 | */ |
433 | list_for_each_entry_from(avr, &av->dccpav_records, | 425 | list_for_each_entry_from(avr, &av->av_records, avr_node) { |
434 | dccpavr_node) { | 426 | if (!after48(avr->avr_ack_seqno, *ackno)) |
435 | if (!after48(avr->dccpavr_ack_seqno, *ackno)) | ||
436 | goto found; | 427 | goto found; |
437 | } | 428 | } |
438 | /* End of the dccpav_records list, not found, exit */ | 429 | /* End of the av_records list, not found, exit */ |
439 | break; | 430 | break; |
440 | found: | 431 | found: |
441 | if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) { | 432 | if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { |
442 | const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; | 433 | const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; |
443 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | 434 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { |
444 | dccp_pr_debug("%s ACK vector 0, len=%d, " | 435 | dccp_pr_debug("%s ACK vector 0, len=%d, " |
@@ -446,9 +437,9 @@ found: | |||
446 | "ACKED!\n", | 437 | "ACKED!\n", |
447 | dccp_role(sk), len, | 438 | dccp_role(sk), len, |
448 | (unsigned long long) | 439 | (unsigned long long) |
449 | avr->dccpavr_ack_seqno, | 440 | avr->avr_ack_seqno, |
450 | (unsigned long long) | 441 | (unsigned long long) |
451 | avr->dccpavr_ack_ackno); | 442 | avr->avr_ack_ackno); |
452 | dccp_ackvec_throw_record(av, avr); | 443 | dccp_ackvec_throw_record(av, avr); |
453 | break; | 444 | break; |
454 | } | 445 | } |
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 9ef0737043ee..bcb64fb4acef 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -32,54 +32,54 @@ | |||
32 | * | 32 | * |
33 | * This data structure is the one defined in RFC 4340, Appendix A. | 33 | * This data structure is the one defined in RFC 4340, Appendix A. |
34 | * | 34 | * |
35 | * @dccpav_buf_head - circular buffer head | 35 | * @av_buf_head - circular buffer head |
36 | * @dccpav_buf_tail - circular buffer tail | 36 | * @av_buf_tail - circular buffer tail |
37 | * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the | 37 | * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the |
38 | * buffer (i.e. %dccpav_buf_head) | 38 | * buffer (i.e. %av_buf_head) |
39 | * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | 39 | * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked |
40 | * by the buffer with State 0 | 40 | * by the buffer with State 0 |
41 | * | 41 | * |
42 | * Additionally, the HC-Receiver must keep some information about the | 42 | * Additionally, the HC-Receiver must keep some information about the |
43 | * Ack Vectors it has recently sent. For each packet sent carrying an | 43 | * Ack Vectors it has recently sent. For each packet sent carrying an |
44 | * Ack Vector, it remembers four variables: | 44 | * Ack Vector, it remembers four variables: |
45 | * | 45 | * |
46 | * @dccpav_records - list of dccp_ackvec_record | 46 | * @av_records - list of dccp_ackvec_record |
47 | * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | 47 | * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. |
48 | * | 48 | * |
49 | * @dccpav_time - the time in usecs | 49 | * @av_time - the time in usecs |
50 | * @dccpav_buf - circular buffer of acknowledgeable packets | 50 | * @av_buf - circular buffer of acknowledgeable packets |
51 | */ | 51 | */ |
52 | struct dccp_ackvec { | 52 | struct dccp_ackvec { |
53 | u64 dccpav_buf_ackno; | 53 | u64 av_buf_ackno; |
54 | struct list_head dccpav_records; | 54 | struct list_head av_records; |
55 | ktime_t dccpav_time; | 55 | ktime_t av_time; |
56 | u16 dccpav_buf_head; | 56 | u16 av_buf_head; |
57 | u16 dccpav_vec_len; | 57 | u16 av_vec_len; |
58 | u8 dccpav_buf_nonce; | 58 | u8 av_buf_nonce; |
59 | u8 dccpav_ack_nonce; | 59 | u8 av_ack_nonce; |
60 | u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; | 60 | u8 av_buf[DCCP_MAX_ACKVEC_LEN]; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /** struct dccp_ackvec_record - ack vector record | 63 | /** struct dccp_ackvec_record - ack vector record |
64 | * | 64 | * |
65 | * ACK vector record as defined in Appendix A of spec. | 65 | * ACK vector record as defined in Appendix A of spec. |
66 | * | 66 | * |
67 | * The list is sorted by dccpavr_ack_seqno | 67 | * The list is sorted by avr_ack_seqno |
68 | * | 68 | * |
69 | * @dccpavr_node - node in dccpav_records | 69 | * @avr_node - node in av_records |
70 | * @dccpavr_ack_seqno - sequence number of the packet this record was sent on | 70 | * @avr_ack_seqno - sequence number of the packet this record was sent on |
71 | * @dccpavr_ack_ackno - sequence number being acknowledged | 71 | * @avr_ack_ackno - sequence number being acknowledged |
72 | * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts | 72 | * @avr_ack_ptr - pointer into av_buf where this record starts |
73 | * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent | 73 | * @avr_ack_nonce - av_ack_nonce at the time this record was sent |
74 | * @dccpavr_sent_len - lenght of the record in dccpav_buf | 74 | * @avr_sent_len - lenght of the record in av_buf |
75 | */ | 75 | */ |
76 | struct dccp_ackvec_record { | 76 | struct dccp_ackvec_record { |
77 | struct list_head dccpavr_node; | 77 | struct list_head avr_node; |
78 | u64 dccpavr_ack_seqno; | 78 | u64 avr_ack_seqno; |
79 | u64 dccpavr_ack_ackno; | 79 | u64 avr_ack_ackno; |
80 | u16 dccpavr_ack_ptr; | 80 | u16 avr_ack_ptr; |
81 | u16 dccpavr_sent_len; | 81 | u16 avr_sent_len; |
82 | u8 dccpavr_ack_nonce; | 82 | u8 avr_ack_nonce; |
83 | }; | 83 | }; |
84 | 84 | ||
85 | struct sock; | 85 | struct sock; |
@@ -105,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | |||
105 | 105 | ||
106 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | 106 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) |
107 | { | 107 | { |
108 | return av->dccpav_vec_len; | 108 | return av->av_vec_len; |
109 | } | 109 | } |
110 | #else /* CONFIG_IP_DCCP_ACKVEC */ | 110 | #else /* CONFIG_IP_DCCP_ACKVEC */ |
111 | static inline int dccp_ackvec_init(void) | 111 | static inline int dccp_ackvec_init(void) |
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index c45088b5e6fb..4809753d12ae 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c | |||
@@ -92,15 +92,15 @@ int ccid_register(struct ccid_operations *ccid_ops) | |||
92 | 92 | ||
93 | ccid_ops->ccid_hc_rx_slab = | 93 | ccid_ops->ccid_hc_rx_slab = |
94 | ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, | 94 | ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, |
95 | "%s_hc_rx_sock", | 95 | "ccid%u_hc_rx_sock", |
96 | ccid_ops->ccid_name); | 96 | ccid_ops->ccid_id); |
97 | if (ccid_ops->ccid_hc_rx_slab == NULL) | 97 | if (ccid_ops->ccid_hc_rx_slab == NULL) |
98 | goto out; | 98 | goto out; |
99 | 99 | ||
100 | ccid_ops->ccid_hc_tx_slab = | 100 | ccid_ops->ccid_hc_tx_slab = |
101 | ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, | 101 | ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, |
102 | "%s_hc_tx_sock", | 102 | "ccid%u_hc_tx_sock", |
103 | ccid_ops->ccid_name); | 103 | ccid_ops->ccid_id); |
104 | if (ccid_ops->ccid_hc_tx_slab == NULL) | 104 | if (ccid_ops->ccid_hc_tx_slab == NULL) |
105 | goto out_free_rx_slab; | 105 | goto out_free_rx_slab; |
106 | 106 | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c65cb2453e43..fdeae7b57319 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
@@ -23,14 +23,37 @@ | |||
23 | 23 | ||
24 | struct tcp_info; | 24 | struct tcp_info; |
25 | 25 | ||
26 | /** | ||
27 | * struct ccid_operations - Interface to Congestion-Control Infrastructure | ||
28 | * | ||
29 | * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.) | ||
30 | * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled) | ||
31 | * @ccid_name: alphabetical identifier string for @ccid_id | ||
32 | * @ccid_owner: module which implements/owns this CCID | ||
33 | * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection | ||
34 | * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket | ||
35 | * | ||
36 | * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup) | ||
37 | * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction) | ||
38 | * @ccid_hc_rx_packet_recv: implements the HC-receiver side | ||
39 | * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options | ||
40 | * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options | ||
41 | * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender | ||
42 | * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender | ||
43 | * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender | ||
44 | * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender | ||
45 | * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender | ||
46 | */ | ||
26 | struct ccid_operations { | 47 | struct ccid_operations { |
27 | unsigned char ccid_id; | 48 | unsigned char ccid_id; |
28 | const char *ccid_name; | 49 | __u32 ccid_ccmps; |
29 | struct module *ccid_owner; | 50 | const char *ccid_name; |
30 | struct kmem_cache *ccid_hc_rx_slab; | 51 | struct module *ccid_owner; |
31 | __u32 ccid_hc_rx_obj_size; | 52 | struct kmem_cache *ccid_hc_rx_slab, |
32 | struct kmem_cache *ccid_hc_tx_slab; | 53 | *ccid_hc_tx_slab; |
33 | __u32 ccid_hc_tx_obj_size; | 54 | __u32 ccid_hc_rx_obj_size, |
55 | ccid_hc_tx_obj_size; | ||
56 | /* Interface Routines */ | ||
34 | int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); | 57 | int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); |
35 | int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); | 58 | int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); |
36 | void (*ccid_hc_rx_exit)(struct sock *sk); | 59 | void (*ccid_hc_rx_exit)(struct sock *sk); |
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 80f469887691..12275943eab8 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig | |||
@@ -1,9 +1,8 @@ | |||
1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | 1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" |
2 | depends on IP_DCCP && EXPERIMENTAL | 2 | depends on EXPERIMENTAL |
3 | 3 | ||
4 | config IP_DCCP_CCID2 | 4 | config IP_DCCP_CCID2 |
5 | tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" | 5 | tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" |
6 | depends on IP_DCCP | ||
7 | def_tristate IP_DCCP | 6 | def_tristate IP_DCCP |
8 | select IP_DCCP_ACKVEC | 7 | select IP_DCCP_ACKVEC |
9 | ---help--- | 8 | ---help--- |
@@ -20,18 +19,9 @@ config IP_DCCP_CCID2 | |||
20 | to the user. For example, a hypothetical application that | 19 | to the user. For example, a hypothetical application that |
21 | transferred files over DCCP, using application-level retransmissions | 20 | transferred files over DCCP, using application-level retransmissions |
22 | for lost packets, would prefer CCID 2 to CCID 3. On-line games may | 21 | for lost packets, would prefer CCID 2 to CCID 3. On-line games may |
23 | also prefer CCID 2. | 22 | also prefer CCID 2. See RFC 4341 for further details. |
24 | 23 | ||
25 | CCID 2 is further described in RFC 4341, | 24 | CCID2 is the default CCID used by DCCP. |
26 | http://www.ietf.org/rfc/rfc4341.txt | ||
27 | |||
28 | This text was extracted from RFC 4340 (sec. 10.1), | ||
29 | http://www.ietf.org/rfc/rfc4340.txt | ||
30 | |||
31 | To compile this CCID as a module, choose M here: the module will be | ||
32 | called dccp_ccid2. | ||
33 | |||
34 | If in doubt, say M. | ||
35 | 25 | ||
36 | config IP_DCCP_CCID2_DEBUG | 26 | config IP_DCCP_CCID2_DEBUG |
37 | bool "CCID2 debugging messages" | 27 | bool "CCID2 debugging messages" |
@@ -47,8 +37,8 @@ config IP_DCCP_CCID2_DEBUG | |||
47 | 37 | ||
48 | config IP_DCCP_CCID3 | 38 | config IP_DCCP_CCID3 |
49 | tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" | 39 | tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" |
50 | depends on IP_DCCP | ||
51 | def_tristate IP_DCCP | 40 | def_tristate IP_DCCP |
41 | select IP_DCCP_TFRC_LIB | ||
52 | ---help--- | 42 | ---help--- |
53 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based | 43 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based |
54 | rate-controlled congestion control mechanism. TFRC is designed to | 44 | rate-controlled congestion control mechanism. TFRC is designed to |
@@ -74,10 +64,6 @@ config IP_DCCP_CCID3 | |||
74 | 64 | ||
75 | If in doubt, say M. | 65 | If in doubt, say M. |
76 | 66 | ||
77 | config IP_DCCP_TFRC_LIB | ||
78 | depends on IP_DCCP_CCID3 | ||
79 | def_tristate IP_DCCP_CCID3 | ||
80 | |||
81 | config IP_DCCP_CCID3_DEBUG | 67 | config IP_DCCP_CCID3_DEBUG |
82 | bool "CCID3 debugging messages" | 68 | bool "CCID3 debugging messages" |
83 | depends on IP_DCCP_CCID3 | 69 | depends on IP_DCCP_CCID3 |
@@ -121,5 +107,13 @@ config IP_DCCP_CCID3_RTO | |||
121 | is serious network congestion: experimenting with larger values should | 107 | is serious network congestion: experimenting with larger values should |
122 | therefore not be performed on WANs. | 108 | therefore not be performed on WANs. |
123 | 109 | ||
110 | config IP_DCCP_TFRC_LIB | ||
111 | tristate | ||
112 | default n | ||
113 | |||
114 | config IP_DCCP_TFRC_DEBUG | ||
115 | bool | ||
116 | depends on IP_DCCP_TFRC_LIB | ||
117 | default y if IP_DCCP_CCID3_DEBUG | ||
124 | 118 | ||
125 | endmenu | 119 | endmenu |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d694656b8800..b5b52ebb2693 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -24,9 +24,6 @@ | |||
24 | 24 | ||
25 | /* | 25 | /* |
26 | * This implementation should follow RFC 4341 | 26 | * This implementation should follow RFC 4341 |
27 | * | ||
28 | * BUGS: | ||
29 | * - sequence number wrapping | ||
30 | */ | 27 | */ |
31 | 28 | ||
32 | #include "../ccid.h" | 29 | #include "../ccid.h" |
@@ -129,50 +126,35 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
129 | { | 126 | { |
130 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 127 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
131 | 128 | ||
132 | ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, | 129 | if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) |
133 | hctx->ccid2hctx_cwnd); | 130 | return 0; |
134 | |||
135 | if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) { | ||
136 | /* OK we can send... make sure previous packet was sent off */ | ||
137 | if (!hctx->ccid2hctx_sendwait) { | ||
138 | hctx->ccid2hctx_sendwait = 1; | ||
139 | return 0; | ||
140 | } | ||
141 | } | ||
142 | 131 | ||
143 | return 1; /* XXX CCID should dequeue when ready instead of polling */ | 132 | return 1; /* XXX CCID should dequeue when ready instead of polling */ |
144 | } | 133 | } |
145 | 134 | ||
146 | static void ccid2_change_l_ack_ratio(struct sock *sk, int val) | 135 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) |
147 | { | 136 | { |
148 | struct dccp_sock *dp = dccp_sk(sk); | 137 | struct dccp_sock *dp = dccp_sk(sk); |
138 | u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2); | ||
139 | |||
149 | /* | 140 | /* |
150 | * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio | 141 | * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from |
151 | * should be 1... it shouldn't be allowed to become 2. | 142 | * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always |
152 | * -sorbo. | 143 | * acceptable since this causes starvation/deadlock whenever cwnd < 2. |
144 | * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled). | ||
153 | */ | 145 | */ |
154 | if (val != 2) { | 146 | if (val == 0 || val > max_ratio) { |
155 | const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 147 | DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); |
156 | int max = hctx->ccid2hctx_cwnd / 2; | 148 | val = max_ratio; |
157 | |||
158 | /* round up */ | ||
159 | if (hctx->ccid2hctx_cwnd & 1) | ||
160 | max++; | ||
161 | |||
162 | if (val > max) | ||
163 | val = max; | ||
164 | } | 149 | } |
150 | if (val > 0xFFFF) /* RFC 4340, 11.3 */ | ||
151 | val = 0xFFFF; | ||
165 | 152 | ||
166 | ccid2_pr_debug("changing local ack ratio to %d\n", val); | 153 | if (val == dp->dccps_l_ack_ratio) |
167 | WARN_ON(val <= 0); | 154 | return; |
168 | dp->dccps_l_ack_ratio = val; | ||
169 | } | ||
170 | 155 | ||
171 | static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) | 156 | ccid2_pr_debug("changing local ack ratio to %u\n", val); |
172 | { | 157 | dp->dccps_l_ack_ratio = val; |
173 | /* XXX do we need to change ack ratio? */ | ||
174 | hctx->ccid2hctx_cwnd = val? : 1; | ||
175 | ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd); | ||
176 | } | 158 | } |
177 | 159 | ||
178 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) | 160 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) |
@@ -181,11 +163,6 @@ static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) | |||
181 | hctx->ccid2hctx_srtt = val; | 163 | hctx->ccid2hctx_srtt = val; |
182 | } | 164 | } |
183 | 165 | ||
184 | static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val) | ||
185 | { | ||
186 | hctx->ccid2hctx_pipe = val; | ||
187 | } | ||
188 | |||
189 | static void ccid2_start_rto_timer(struct sock *sk); | 166 | static void ccid2_start_rto_timer(struct sock *sk); |
190 | 167 | ||
191 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 168 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
@@ -215,21 +192,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
215 | ccid2_start_rto_timer(sk); | 192 | ccid2_start_rto_timer(sk); |
216 | 193 | ||
217 | /* adjust pipe, cwnd etc */ | 194 | /* adjust pipe, cwnd etc */ |
218 | ccid2_change_pipe(hctx, 0); | 195 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2; |
219 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; | ||
220 | if (hctx->ccid2hctx_ssthresh < 2) | 196 | if (hctx->ccid2hctx_ssthresh < 2) |
221 | hctx->ccid2hctx_ssthresh = 2; | 197 | hctx->ccid2hctx_ssthresh = 2; |
222 | ccid2_change_cwnd(hctx, 1); | 198 | hctx->ccid2hctx_cwnd = 1; |
199 | hctx->ccid2hctx_pipe = 0; | ||
223 | 200 | ||
224 | /* clear state about stuff we sent */ | 201 | /* clear state about stuff we sent */ |
225 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; | 202 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; |
226 | hctx->ccid2hctx_ssacks = 0; | 203 | hctx->ccid2hctx_packets_acked = 0; |
227 | hctx->ccid2hctx_acks = 0; | ||
228 | hctx->ccid2hctx_sent = 0; | ||
229 | 204 | ||
230 | /* clear ack ratio state. */ | 205 | /* clear ack ratio state. */ |
231 | hctx->ccid2hctx_arsent = 0; | ||
232 | hctx->ccid2hctx_ackloss = 0; | ||
233 | hctx->ccid2hctx_rpseq = 0; | 206 | hctx->ccid2hctx_rpseq = 0; |
234 | hctx->ccid2hctx_rpdupack = -1; | 207 | hctx->ccid2hctx_rpdupack = -1; |
235 | ccid2_change_l_ack_ratio(sk, 1); | 208 | ccid2_change_l_ack_ratio(sk, 1); |
@@ -255,23 +228,10 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
255 | struct dccp_sock *dp = dccp_sk(sk); | 228 | struct dccp_sock *dp = dccp_sk(sk); |
256 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 229 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
257 | struct ccid2_seq *next; | 230 | struct ccid2_seq *next; |
258 | u64 seq; | ||
259 | |||
260 | ccid2_hc_tx_check_sanity(hctx); | ||
261 | 231 | ||
262 | BUG_ON(!hctx->ccid2hctx_sendwait); | 232 | hctx->ccid2hctx_pipe++; |
263 | hctx->ccid2hctx_sendwait = 0; | ||
264 | ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1); | ||
265 | BUG_ON(hctx->ccid2hctx_pipe < 0); | ||
266 | 233 | ||
267 | /* There is an issue. What if another packet is sent between | 234 | hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss; |
268 | * packet_send() and packet_sent(). Then the sequence number would be | ||
269 | * wrong. | ||
270 | * -sorbo. | ||
271 | */ | ||
272 | seq = dp->dccps_gss; | ||
273 | |||
274 | hctx->ccid2hctx_seqh->ccid2s_seq = seq; | ||
275 | hctx->ccid2hctx_seqh->ccid2s_acked = 0; | 235 | hctx->ccid2hctx_seqh->ccid2s_acked = 0; |
276 | hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; | 236 | hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; |
277 | 237 | ||
@@ -291,8 +251,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
291 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, | 251 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, |
292 | hctx->ccid2hctx_pipe); | 252 | hctx->ccid2hctx_pipe); |
293 | 253 | ||
294 | hctx->ccid2hctx_sent++; | 254 | /* |
295 | 255 | * FIXME: The code below is broken and the variables have been removed | |
256 | * from the socket struct. The `ackloss' variable was always set to 0, | ||
257 | * and with arsent there are several problems: | ||
258 | * (i) it doesn't just count the number of Acks, but all sent packets; | ||
259 | * (ii) it is expressed in # of packets, not # of windows, so the | ||
260 | * comparison below uses the wrong formula: Appendix A of RFC 4341 | ||
261 | * comes up with the number K = cwnd / (R^2 - R) of consecutive windows | ||
262 | * of data with no lost or marked Ack packets. If arsent were the # of | ||
263 | * consecutive Acks received without loss, then Ack Ratio needs to be | ||
264 | * decreased by 1 when | ||
265 | * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2) | ||
266 | * where cwnd / R is the number of Acks received per window of data | ||
267 | * (cf. RFC 4341, App. A). The problems are that | ||
268 | * - arsent counts other packets as well; | ||
269 | * - the comparison uses a formula different from RFC 4341; | ||
270 | * - computing a cubic/quadratic equation each time is too complicated. | ||
271 | * Hence a different algorithm is needed. | ||
272 | */ | ||
273 | #if 0 | ||
296 | /* Ack Ratio. Need to maintain a concept of how many windows we sent */ | 274 | /* Ack Ratio. Need to maintain a concept of how many windows we sent */ |
297 | hctx->ccid2hctx_arsent++; | 275 | hctx->ccid2hctx_arsent++; |
298 | /* We had an ack loss in this window... */ | 276 | /* We had an ack loss in this window... */ |
@@ -320,14 +298,13 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
320 | hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ | 298 | hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ |
321 | } | 299 | } |
322 | } | 300 | } |
301 | #endif | ||
323 | 302 | ||
324 | /* setup RTO timer */ | 303 | /* setup RTO timer */ |
325 | if (!timer_pending(&hctx->ccid2hctx_rtotimer)) | 304 | if (!timer_pending(&hctx->ccid2hctx_rtotimer)) |
326 | ccid2_start_rto_timer(sk); | 305 | ccid2_start_rto_timer(sk); |
327 | 306 | ||
328 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 307 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
329 | ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); | ||
330 | ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq); | ||
331 | do { | 308 | do { |
332 | struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; | 309 | struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; |
333 | 310 | ||
@@ -419,31 +396,15 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
419 | { | 396 | { |
420 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 397 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
421 | 398 | ||
422 | /* slow start */ | ||
423 | if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { | 399 | if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { |
424 | hctx->ccid2hctx_acks = 0; | 400 | if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) { |
425 | 401 | hctx->ccid2hctx_cwnd += 1; | |
426 | /* We can increase cwnd at most maxincr [ack_ratio/2] */ | 402 | *maxincr -= 1; |
427 | if (*maxincr) { | 403 | hctx->ccid2hctx_packets_acked = 0; |
428 | /* increase every 2 acks */ | ||
429 | hctx->ccid2hctx_ssacks++; | ||
430 | if (hctx->ccid2hctx_ssacks == 2) { | ||
431 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1); | ||
432 | hctx->ccid2hctx_ssacks = 0; | ||
433 | *maxincr = *maxincr - 1; | ||
434 | } | ||
435 | } else { | ||
436 | /* increased cwnd enough for this single ack */ | ||
437 | hctx->ccid2hctx_ssacks = 0; | ||
438 | } | ||
439 | } else { | ||
440 | hctx->ccid2hctx_ssacks = 0; | ||
441 | hctx->ccid2hctx_acks++; | ||
442 | |||
443 | if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { | ||
444 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1); | ||
445 | hctx->ccid2hctx_acks = 0; | ||
446 | } | 404 | } |
405 | } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) { | ||
406 | hctx->ccid2hctx_cwnd += 1; | ||
407 | hctx->ccid2hctx_packets_acked = 0; | ||
447 | } | 408 | } |
448 | 409 | ||
449 | /* update RTO */ | 410 | /* update RTO */ |
@@ -502,7 +463,6 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
502 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | 463 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", |
503 | hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, | 464 | hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, |
504 | hctx->ccid2hctx_rto, HZ, r); | 465 | hctx->ccid2hctx_rto, HZ, r); |
505 | hctx->ccid2hctx_sent = 0; | ||
506 | } | 466 | } |
507 | 467 | ||
508 | /* we got a new ack, so re-start RTO timer */ | 468 | /* we got a new ack, so re-start RTO timer */ |
@@ -514,16 +474,19 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) | |||
514 | { | 474 | { |
515 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 475 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
516 | 476 | ||
517 | ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); | 477 | if (hctx->ccid2hctx_pipe == 0) |
518 | BUG_ON(hctx->ccid2hctx_pipe < 0); | 478 | DCCP_BUG("pipe == 0"); |
479 | else | ||
480 | hctx->ccid2hctx_pipe--; | ||
519 | 481 | ||
520 | if (hctx->ccid2hctx_pipe == 0) | 482 | if (hctx->ccid2hctx_pipe == 0) |
521 | ccid2_hc_tx_kill_rto_timer(sk); | 483 | ccid2_hc_tx_kill_rto_timer(sk); |
522 | } | 484 | } |
523 | 485 | ||
524 | static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, | 486 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
525 | struct ccid2_seq *seqp) | ||
526 | { | 487 | { |
488 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | ||
489 | |||
527 | if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { | 490 | if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { |
528 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | 491 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); |
529 | return; | 492 | return; |
@@ -531,10 +494,12 @@ static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, | |||
531 | 494 | ||
532 | hctx->ccid2hctx_last_cong = jiffies; | 495 | hctx->ccid2hctx_last_cong = jiffies; |
533 | 496 | ||
534 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); | 497 | hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U; |
535 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; | 498 | hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U); |
536 | if (hctx->ccid2hctx_ssthresh < 2) | 499 | |
537 | hctx->ccid2hctx_ssthresh = 2; | 500 | /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ |
501 | if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd) | ||
502 | ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd); | ||
538 | } | 503 | } |
539 | 504 | ||
540 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 505 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
@@ -570,12 +535,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
570 | hctx->ccid2hctx_rpdupack++; | 535 | hctx->ccid2hctx_rpdupack++; |
571 | 536 | ||
572 | /* check if we got enough dupacks */ | 537 | /* check if we got enough dupacks */ |
573 | if (hctx->ccid2hctx_rpdupack >= | 538 | if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) { |
574 | hctx->ccid2hctx_numdupack) { | ||
575 | hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ | 539 | hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ |
576 | hctx->ccid2hctx_rpseq = 0; | 540 | hctx->ccid2hctx_rpseq = 0; |
577 | 541 | ||
578 | ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1); | 542 | ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); |
579 | } | 543 | } |
580 | } | 544 | } |
581 | } | 545 | } |
@@ -606,12 +570,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
606 | } | 570 | } |
607 | } | 571 | } |
608 | 572 | ||
609 | /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for | 573 | /* |
610 | * this single ack. I round up. | 574 | * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2 |
611 | * -sorbo. | 575 | * packets per acknowledgement. Rounding up avoids that cwnd is not |
576 | * advanced when Ack Ratio is 1 and gives a slight edge otherwise. | ||
612 | */ | 577 | */ |
613 | maxincr = dp->dccps_l_ack_ratio >> 1; | 578 | if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) |
614 | maxincr++; | 579 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
615 | 580 | ||
616 | /* go through all ack vectors */ | 581 | /* go through all ack vectors */ |
617 | while ((offset = ccid2_ackvector(sk, skb, offset, | 582 | while ((offset = ccid2_ackvector(sk, skb, offset, |
@@ -619,9 +584,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
619 | /* go through this ack vector */ | 584 | /* go through this ack vector */ |
620 | while (veclen--) { | 585 | while (veclen--) { |
621 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | 586 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; |
622 | u64 ackno_end_rl; | 587 | u64 ackno_end_rl = SUB48(ackno, rl); |
623 | 588 | ||
624 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
625 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", | 589 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", |
626 | (unsigned long long)ackno, | 590 | (unsigned long long)ackno, |
627 | (unsigned long long)ackno_end_rl); | 591 | (unsigned long long)ackno_end_rl); |
@@ -651,7 +615,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
651 | !seqp->ccid2s_acked) { | 615 | !seqp->ccid2s_acked) { |
652 | if (state == | 616 | if (state == |
653 | DCCP_ACKVEC_STATE_ECN_MARKED) { | 617 | DCCP_ACKVEC_STATE_ECN_MARKED) { |
654 | ccid2_congestion_event(hctx, | 618 | ccid2_congestion_event(sk, |
655 | seqp); | 619 | seqp); |
656 | } else | 620 | } else |
657 | ccid2_new_ack(sk, seqp, | 621 | ccid2_new_ack(sk, seqp, |
@@ -666,13 +630,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
666 | done = 1; | 630 | done = 1; |
667 | break; | 631 | break; |
668 | } | 632 | } |
669 | seqp = seqp->ccid2s_next; | 633 | seqp = seqp->ccid2s_prev; |
670 | } | 634 | } |
671 | if (done) | 635 | if (done) |
672 | break; | 636 | break; |
673 | 637 | ||
674 | 638 | ackno = SUB48(ackno_end_rl, 1); | |
675 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
676 | vector++; | 639 | vector++; |
677 | } | 640 | } |
678 | if (done) | 641 | if (done) |
@@ -694,7 +657,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
694 | while (1) { | 657 | while (1) { |
695 | if (seqp->ccid2s_acked) { | 658 | if (seqp->ccid2s_acked) { |
696 | done++; | 659 | done++; |
697 | if (done == hctx->ccid2hctx_numdupack) | 660 | if (done == NUMDUPACK) |
698 | break; | 661 | break; |
699 | } | 662 | } |
700 | if (seqp == hctx->ccid2hctx_seqt) | 663 | if (seqp == hctx->ccid2hctx_seqt) |
@@ -705,7 +668,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
705 | /* If there are at least 3 acknowledgements, anything unacknowledged | 668 | /* If there are at least 3 acknowledgements, anything unacknowledged |
706 | * below the last sequence number is considered lost | 669 | * below the last sequence number is considered lost |
707 | */ | 670 | */ |
708 | if (done == hctx->ccid2hctx_numdupack) { | 671 | if (done == NUMDUPACK) { |
709 | struct ccid2_seq *last_acked = seqp; | 672 | struct ccid2_seq *last_acked = seqp; |
710 | 673 | ||
711 | /* check for lost packets */ | 674 | /* check for lost packets */ |
@@ -717,7 +680,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
717 | * order to detect multiple congestion events in | 680 | * order to detect multiple congestion events in |
718 | * one ack vector. | 681 | * one ack vector. |
719 | */ | 682 | */ |
720 | ccid2_congestion_event(hctx, seqp); | 683 | ccid2_congestion_event(sk, seqp); |
721 | ccid2_hc_tx_dec_pipe(sk); | 684 | ccid2_hc_tx_dec_pipe(sk); |
722 | } | 685 | } |
723 | if (seqp == hctx->ccid2hctx_seqt) | 686 | if (seqp == hctx->ccid2hctx_seqt) |
@@ -742,14 +705,23 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
742 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 705 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
743 | { | 706 | { |
744 | struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); | 707 | struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); |
708 | struct dccp_sock *dp = dccp_sk(sk); | ||
709 | u32 max_ratio; | ||
710 | |||
711 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ | ||
712 | hctx->ccid2hctx_ssthresh = ~0U; | ||
745 | 713 | ||
746 | ccid2_change_cwnd(hctx, 1); | 714 | /* |
747 | /* Initialize ssthresh to infinity. This means that we will exit the | 715 | * RFC 4341, 5: "The cwnd parameter is initialized to at most four |
748 | * initial slow-start after the first packet loss. This is what we | 716 | * packets for new connections, following the rules from [RFC3390]". |
749 | * want. | 717 | * We need to convert the bytes of RFC3390 into the packets of RFC 4341. |
750 | */ | 718 | */ |
751 | hctx->ccid2hctx_ssthresh = ~0; | 719 | hctx->ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp->dccps_mss_cache)); |
752 | hctx->ccid2hctx_numdupack = 3; | 720 | |
721 | /* Make sure that Ack Ratio is enabled and within bounds. */ | ||
722 | max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); | ||
723 | if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) | ||
724 | dp->dccps_l_ack_ratio = max_ratio; | ||
753 | 725 | ||
754 | /* XXX init ~ to window size... */ | 726 | /* XXX init ~ to window size... */ |
755 | if (ccid2_hc_tx_alloc_seq(hctx)) | 727 | if (ccid2_hc_tx_alloc_seq(hctx)) |
@@ -760,10 +732,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
760 | hctx->ccid2hctx_rttvar = -1; | 732 | hctx->ccid2hctx_rttvar = -1; |
761 | hctx->ccid2hctx_rpdupack = -1; | 733 | hctx->ccid2hctx_rpdupack = -1; |
762 | hctx->ccid2hctx_last_cong = jiffies; | 734 | hctx->ccid2hctx_last_cong = jiffies; |
763 | 735 | setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire, | |
764 | hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; | 736 | (unsigned long)sk); |
765 | hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; | ||
766 | init_timer(&hctx->ccid2hctx_rtotimer); | ||
767 | 737 | ||
768 | ccid2_hc_tx_check_sanity(hctx); | 738 | ccid2_hc_tx_check_sanity(hctx); |
769 | return 0; | 739 | return 0; |
@@ -800,7 +770,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
800 | 770 | ||
801 | static struct ccid_operations ccid2 = { | 771 | static struct ccid_operations ccid2 = { |
802 | .ccid_id = DCCPC_CCID2, | 772 | .ccid_id = DCCPC_CCID2, |
803 | .ccid_name = "ccid2", | 773 | .ccid_name = "TCP-like", |
804 | .ccid_owner = THIS_MODULE, | 774 | .ccid_owner = THIS_MODULE, |
805 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 775 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
806 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 776 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index d9daa534c9be..2c94ca029010 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include "../ccid.h" | 26 | #include "../ccid.h" |
27 | /* NUMDUPACK parameter from RFC 4341, p. 6 */ | ||
28 | #define NUMDUPACK 3 | ||
27 | 29 | ||
28 | struct sock; | 30 | struct sock; |
29 | 31 | ||
@@ -40,22 +42,17 @@ struct ccid2_seq { | |||
40 | 42 | ||
41 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection | 43 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection |
42 | * | 44 | * |
43 | * @ccid2hctx_ssacks - ACKs recv in slow start | 45 | * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 |
44 | * @ccid2hctx_acks - ACKS recv in AI phase | 46 | * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465) |
45 | * @ccid2hctx_sent - packets sent in this window | ||
46 | * @ccid2hctx_lastrtt -time RTT was last measured | 47 | * @ccid2hctx_lastrtt -time RTT was last measured |
47 | * @ccid2hctx_arsent - packets sent [ack ratio] | ||
48 | * @ccid2hctx_ackloss - ack was lost in this win | ||
49 | * @ccid2hctx_rpseq - last consecutive seqno | 48 | * @ccid2hctx_rpseq - last consecutive seqno |
50 | * @ccid2hctx_rpdupack - dupacks since rpseq | 49 | * @ccid2hctx_rpdupack - dupacks since rpseq |
51 | */ | 50 | */ |
52 | struct ccid2_hc_tx_sock { | 51 | struct ccid2_hc_tx_sock { |
53 | u32 ccid2hctx_cwnd; | 52 | u32 ccid2hctx_cwnd; |
54 | int ccid2hctx_ssacks; | 53 | u32 ccid2hctx_ssthresh; |
55 | int ccid2hctx_acks; | 54 | u32 ccid2hctx_pipe; |
56 | unsigned int ccid2hctx_ssthresh; | 55 | u32 ccid2hctx_packets_acked; |
57 | int ccid2hctx_pipe; | ||
58 | int ccid2hctx_numdupack; | ||
59 | struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; | 56 | struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; |
60 | int ccid2hctx_seqbufc; | 57 | int ccid2hctx_seqbufc; |
61 | struct ccid2_seq *ccid2hctx_seqh; | 58 | struct ccid2_seq *ccid2hctx_seqh; |
@@ -63,14 +60,10 @@ struct ccid2_hc_tx_sock { | |||
63 | long ccid2hctx_rto; | 60 | long ccid2hctx_rto; |
64 | long ccid2hctx_srtt; | 61 | long ccid2hctx_srtt; |
65 | long ccid2hctx_rttvar; | 62 | long ccid2hctx_rttvar; |
66 | int ccid2hctx_sent; | ||
67 | unsigned long ccid2hctx_lastrtt; | 63 | unsigned long ccid2hctx_lastrtt; |
68 | struct timer_list ccid2hctx_rtotimer; | 64 | struct timer_list ccid2hctx_rtotimer; |
69 | unsigned long ccid2hctx_arsent; | ||
70 | int ccid2hctx_ackloss; | ||
71 | u64 ccid2hctx_rpseq; | 65 | u64 ccid2hctx_rpseq; |
72 | int ccid2hctx_rpdupack; | 66 | int ccid2hctx_rpdupack; |
73 | int ccid2hctx_sendwait; | ||
74 | unsigned long ccid2hctx_last_cong; | 67 | unsigned long ccid2hctx_last_cong; |
75 | u64 ccid2hctx_high_ack; | 68 | u64 ccid2hctx_high_ack; |
76 | }; | 69 | }; |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 19b33586333d..e76f460af0ea 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/ccids/ccid3.c | 2 | * net/dccp/ccids/ccid3.c |
3 | * | 3 | * |
4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
4 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. | 5 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. |
5 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> | 6 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> |
6 | * | 7 | * |
@@ -33,11 +34,7 @@ | |||
33 | * along with this program; if not, write to the Free Software | 34 | * along with this program; if not, write to the Free Software |
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 35 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
35 | */ | 36 | */ |
36 | #include "../ccid.h" | ||
37 | #include "../dccp.h" | 37 | #include "../dccp.h" |
38 | #include "lib/packet_history.h" | ||
39 | #include "lib/loss_interval.h" | ||
40 | #include "lib/tfrc.h" | ||
41 | #include "ccid3.h" | 38 | #include "ccid3.h" |
42 | 39 | ||
43 | #include <asm/unaligned.h> | 40 | #include <asm/unaligned.h> |
@@ -49,9 +46,6 @@ static int ccid3_debug; | |||
49 | #define ccid3_pr_debug(format, a...) | 46 | #define ccid3_pr_debug(format, a...) |
50 | #endif | 47 | #endif |
51 | 48 | ||
52 | static struct dccp_tx_hist *ccid3_tx_hist; | ||
53 | static struct dccp_rx_hist *ccid3_rx_hist; | ||
54 | |||
55 | /* | 49 | /* |
56 | * Transmitter Half-Connection Routines | 50 | * Transmitter Half-Connection Routines |
57 | */ | 51 | */ |
@@ -83,24 +77,27 @@ static void ccid3_hc_tx_set_state(struct sock *sk, | |||
83 | } | 77 | } |
84 | 78 | ||
85 | /* | 79 | /* |
86 | * Compute the initial sending rate X_init according to RFC 3390: | 80 | * Compute the initial sending rate X_init in the manner of RFC 3390: |
87 | * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes)) | 81 | * |
88 | * X_init = w_init / RTT | 82 | * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT |
83 | * | ||
84 | * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis | ||
85 | * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. | ||
89 | * For consistency with other parts of the code, X_init is scaled by 2^6. | 86 | * For consistency with other parts of the code, X_init is scaled by 2^6. |
90 | */ | 87 | */ |
91 | static inline u64 rfc3390_initial_rate(struct sock *sk) | 88 | static inline u64 rfc3390_initial_rate(struct sock *sk) |
92 | { | 89 | { |
93 | const struct dccp_sock *dp = dccp_sk(sk); | 90 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
94 | const __u32 w_init = min(4 * dp->dccps_mss_cache, | 91 | const __u32 w_init = min_t(__u32, 4 * hctx->ccid3hctx_s, |
95 | max(2 * dp->dccps_mss_cache, 4380U)); | 92 | max_t(__u32, 2 * hctx->ccid3hctx_s, 4380)); |
96 | 93 | ||
97 | return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt); | 94 | return scaled_div(w_init << 6, hctx->ccid3hctx_rtt); |
98 | } | 95 | } |
99 | 96 | ||
100 | /* | 97 | /* |
101 | * Recalculate t_ipi and delta (should be called whenever X changes) | 98 | * Recalculate t_ipi and delta (should be called whenever X changes) |
102 | */ | 99 | */ |
103 | static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) | 100 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) |
104 | { | 101 | { |
105 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ | 102 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ |
106 | hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, | 103 | hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, |
@@ -116,6 +113,13 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) | |||
116 | 113 | ||
117 | } | 114 | } |
118 | 115 | ||
116 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) | ||
117 | { | ||
118 | u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); | ||
119 | |||
120 | return delta / hctx->ccid3hctx_rtt; | ||
121 | } | ||
122 | |||
119 | /** | 123 | /** |
120 | * ccid3_hc_tx_update_x - Update allowed sending rate X | 124 | * ccid3_hc_tx_update_x - Update allowed sending rate X |
121 | * @stamp: most recent time if available - can be left NULL. | 125 | * @stamp: most recent time if available - can be left NULL. |
@@ -127,19 +131,19 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) | |||
127 | * | 131 | * |
128 | */ | 132 | */ |
129 | static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) | 133 | static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) |
130 | |||
131 | { | 134 | { |
132 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 135 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
133 | __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; | 136 | __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; |
134 | const __u64 old_x = hctx->ccid3hctx_x; | 137 | const __u64 old_x = hctx->ccid3hctx_x; |
135 | ktime_t now = stamp? *stamp : ktime_get_real(); | 138 | ktime_t now = stamp ? *stamp : ktime_get_real(); |
136 | 139 | ||
137 | /* | 140 | /* |
138 | * Handle IDLE periods: do not reduce below RFC3390 initial sending rate | 141 | * Handle IDLE periods: do not reduce below RFC3390 initial sending rate |
139 | * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis. | 142 | * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis: |
143 | * a sender is idle if it has not sent anything over a 2-RTT-period. | ||
140 | * For consistency with X and X_recv, min_rate is also scaled by 2^6. | 144 | * For consistency with X and X_recv, min_rate is also scaled by 2^6. |
141 | */ | 145 | */ |
142 | if (unlikely(hctx->ccid3hctx_idle)) { | 146 | if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { |
143 | min_rate = rfc3390_initial_rate(sk); | 147 | min_rate = rfc3390_initial_rate(sk); |
144 | min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); | 148 | min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); |
145 | } | 149 | } |
@@ -181,7 +185,7 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) | |||
181 | { | 185 | { |
182 | const u16 old_s = hctx->ccid3hctx_s; | 186 | const u16 old_s = hctx->ccid3hctx_s; |
183 | 187 | ||
184 | hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10; | 188 | hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9); |
185 | 189 | ||
186 | if (hctx->ccid3hctx_s != old_s) | 190 | if (hctx->ccid3hctx_s != old_s) |
187 | ccid3_update_send_interval(hctx); | 191 | ccid3_update_send_interval(hctx); |
@@ -225,29 +229,27 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
225 | ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, | 229 | ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, |
226 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | 230 | ccid3_tx_state_name(hctx->ccid3hctx_state)); |
227 | 231 | ||
228 | hctx->ccid3hctx_idle = 1; | 232 | if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK) |
233 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | ||
234 | else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) | ||
235 | goto out; | ||
229 | 236 | ||
230 | switch (hctx->ccid3hctx_state) { | 237 | /* |
231 | case TFRC_SSTATE_NO_FBACK: | 238 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 |
232 | /* RFC 3448, 4.4: Halve send rate directly */ | 239 | */ |
240 | if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */ | ||
241 | hctx->ccid3hctx_p == 0) { | ||
242 | |||
243 | /* halve send rate directly */ | ||
233 | hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, | 244 | hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, |
234 | (((__u64)hctx->ccid3hctx_s) << 6) / | 245 | (((__u64)hctx->ccid3hctx_s) << 6) / |
235 | TFRC_T_MBI); | 246 | TFRC_T_MBI); |
236 | |||
237 | ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u " | ||
238 | "bytes/s\n", dccp_role(sk), sk, | ||
239 | ccid3_tx_state_name(hctx->ccid3hctx_state), | ||
240 | (unsigned)(hctx->ccid3hctx_x >> 6)); | ||
241 | /* The value of R is still undefined and so we can not recompute | ||
242 | * the timout value. Keep initial value as per [RFC 4342, 5]. */ | ||
243 | t_nfb = TFRC_INITIAL_TIMEOUT; | ||
244 | ccid3_update_send_interval(hctx); | 247 | ccid3_update_send_interval(hctx); |
245 | break; | 248 | } else { |
246 | case TFRC_SSTATE_FBACK: | ||
247 | /* | 249 | /* |
248 | * Modify the cached value of X_recv [RFC 3448, 4.4] | 250 | * Modify the cached value of X_recv |
249 | * | 251 | * |
250 | * If (p == 0 || X_calc > 2 * X_recv) | 252 | * If (X_calc > 2 * X_recv) |
251 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | 253 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); |
252 | * Else | 254 | * Else |
253 | * X_recv = X_calc / 4; | 255 | * X_recv = X_calc / 4; |
@@ -256,32 +258,28 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
256 | */ | 258 | */ |
257 | BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); | 259 | BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); |
258 | 260 | ||
259 | if (hctx->ccid3hctx_p == 0 || | 261 | if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) |
260 | (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) { | ||
261 | |||
262 | hctx->ccid3hctx_x_recv = | 262 | hctx->ccid3hctx_x_recv = |
263 | max(hctx->ccid3hctx_x_recv / 2, | 263 | max(hctx->ccid3hctx_x_recv / 2, |
264 | (((__u64)hctx->ccid3hctx_s) << 6) / | 264 | (((__u64)hctx->ccid3hctx_s) << 6) / |
265 | (2 * TFRC_T_MBI)); | 265 | (2 * TFRC_T_MBI)); |
266 | } else { | 266 | else { |
267 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; | 267 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; |
268 | hctx->ccid3hctx_x_recv <<= 4; | 268 | hctx->ccid3hctx_x_recv <<= 4; |
269 | } | 269 | } |
270 | /* Now recalculate X [RFC 3448, 4.3, step (4)] */ | ||
271 | ccid3_hc_tx_update_x(sk, NULL); | 270 | ccid3_hc_tx_update_x(sk, NULL); |
272 | /* | ||
273 | * Schedule no feedback timer to expire in | ||
274 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | ||
275 | * See comments in packet_recv() regarding the value of t_RTO. | ||
276 | */ | ||
277 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); | ||
278 | break; | ||
279 | case TFRC_SSTATE_NO_SENT: | ||
280 | DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk); | ||
281 | /* fall through */ | ||
282 | case TFRC_SSTATE_TERM: | ||
283 | goto out; | ||
284 | } | 271 | } |
272 | ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", | ||
273 | (unsigned long long)hctx->ccid3hctx_x); | ||
274 | |||
275 | /* | ||
276 | * Set new timeout for the nofeedback timer. | ||
277 | * See comments in packet_recv() regarding the value of t_RTO. | ||
278 | */ | ||
279 | if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */ | ||
280 | t_nfb = TFRC_INITIAL_TIMEOUT; | ||
281 | else | ||
282 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); | ||
285 | 283 | ||
286 | restart_timer: | 284 | restart_timer: |
287 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | 285 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, |
@@ -336,8 +334,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
336 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); | 334 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); |
337 | hctx->ccid3hctx_t_ld = now; | 335 | hctx->ccid3hctx_t_ld = now; |
338 | } else { | 336 | } else { |
339 | /* Sender does not have RTT sample: X = MSS/second */ | 337 | /* Sender does not have RTT sample: X_pps = 1 pkt/sec */ |
340 | hctx->ccid3hctx_x = dp->dccps_mss_cache; | 338 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; |
341 | hctx->ccid3hctx_x <<= 6; | 339 | hctx->ccid3hctx_x <<= 6; |
342 | } | 340 | } |
343 | ccid3_update_send_interval(hctx); | 341 | ccid3_update_send_interval(hctx); |
@@ -369,7 +367,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
369 | /* prepare to send now (add options etc.) */ | 367 | /* prepare to send now (add options etc.) */ |
370 | dp->dccps_hc_tx_insert_options = 1; | 368 | dp->dccps_hc_tx_insert_options = 1; |
371 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | 369 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; |
372 | hctx->ccid3hctx_idle = 0; | ||
373 | 370 | ||
374 | /* set the nominal send time for the next following packet */ | 371 | /* set the nominal send time for the next following packet */ |
375 | hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, | 372 | hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, |
@@ -381,28 +378,17 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, | |||
381 | unsigned int len) | 378 | unsigned int len) |
382 | { | 379 | { |
383 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 380 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
384 | struct dccp_tx_hist_entry *packet; | ||
385 | 381 | ||
386 | ccid3_hc_tx_update_s(hctx, len); | 382 | ccid3_hc_tx_update_s(hctx, len); |
387 | 383 | ||
388 | packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); | 384 | if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss)) |
389 | if (unlikely(packet == NULL)) { | ||
390 | DCCP_CRIT("packet history - out of memory!"); | 385 | DCCP_CRIT("packet history - out of memory!"); |
391 | return; | ||
392 | } | ||
393 | dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); | ||
394 | |||
395 | packet->dccphtx_tstamp = ktime_get_real(); | ||
396 | packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; | ||
397 | packet->dccphtx_rtt = hctx->ccid3hctx_rtt; | ||
398 | packet->dccphtx_sent = 1; | ||
399 | } | 386 | } |
400 | 387 | ||
401 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 388 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
402 | { | 389 | { |
403 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | 390 | struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); |
404 | struct ccid3_options_received *opt_recv; | 391 | struct ccid3_options_received *opt_recv; |
405 | struct dccp_tx_hist_entry *packet; | ||
406 | ktime_t now; | 392 | ktime_t now; |
407 | unsigned long t_nfb; | 393 | unsigned long t_nfb; |
408 | u32 pinv, r_sample; | 394 | u32 pinv, r_sample; |
@@ -411,131 +397,112 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
411 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | 397 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || |
412 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | 398 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) |
413 | return; | 399 | return; |
400 | /* ... and only in the established state */ | ||
401 | if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK && | ||
402 | hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) | ||
403 | return; | ||
414 | 404 | ||
415 | opt_recv = &hctx->ccid3hctx_options_received; | 405 | opt_recv = &hctx->ccid3hctx_options_received; |
406 | now = ktime_get_real(); | ||
416 | 407 | ||
417 | switch (hctx->ccid3hctx_state) { | 408 | /* Estimate RTT from history if ACK number is valid */ |
418 | case TFRC_SSTATE_NO_FBACK: | 409 | r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist, |
419 | case TFRC_SSTATE_FBACK: | 410 | DCCP_SKB_CB(skb)->dccpd_ack_seq, now); |
420 | /* get packet from history to look up t_recvdata */ | 411 | if (r_sample == 0) { |
421 | packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, | 412 | DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, |
422 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 413 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), |
423 | if (unlikely(packet == NULL)) { | 414 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); |
424 | DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " | 415 | return; |
425 | "in history!\n", dccp_role(sk), sk, | 416 | } |
426 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
427 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
428 | return; | ||
429 | } | ||
430 | |||
431 | /* Update receive rate in units of 64 * bytes/second */ | ||
432 | hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; | ||
433 | hctx->ccid3hctx_x_recv <<= 6; | ||
434 | 417 | ||
435 | /* Update loss event rate */ | 418 | /* Update receive rate in units of 64 * bytes/second */ |
436 | pinv = opt_recv->ccid3or_loss_event_rate; | 419 | hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; |
437 | if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ | 420 | hctx->ccid3hctx_x_recv <<= 6; |
438 | hctx->ccid3hctx_p = 0; | ||
439 | else /* can not exceed 100% */ | ||
440 | hctx->ccid3hctx_p = 1000000 / pinv; | ||
441 | 421 | ||
442 | now = ktime_get_real(); | 422 | /* Update loss event rate (which is scaled by 1e6) */ |
443 | /* | 423 | pinv = opt_recv->ccid3or_loss_event_rate; |
444 | * Calculate new round trip sample as per [RFC 3448, 4.3] by | 424 | if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ |
445 | * R_sample = (now - t_recvdata) - t_elapsed | 425 | hctx->ccid3hctx_p = 0; |
446 | */ | 426 | else /* can not exceed 100% */ |
447 | r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); | 427 | hctx->ccid3hctx_p = scaled_div(1, pinv); |
428 | /* | ||
429 | * Validate new RTT sample and update moving average | ||
430 | */ | ||
431 | r_sample = dccp_sample_rtt(sk, r_sample); | ||
432 | hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); | ||
433 | /* | ||
434 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 | ||
435 | */ | ||
436 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
437 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
448 | 438 | ||
449 | /* | 439 | if (hctx->ccid3hctx_t_rto == 0) { |
450 | * Update RTT estimate by | ||
451 | * If (No feedback recv) | ||
452 | * R = R_sample; | ||
453 | * Else | ||
454 | * R = q * R + (1 - q) * R_sample; | ||
455 | * | ||
456 | * q is a constant, RFC 3448 recomments 0.9 | ||
457 | */ | ||
458 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
459 | /* | 440 | /* |
460 | * Larger Initial Windows [RFC 4342, sec. 5] | 441 | * Initial feedback packet: Larger Initial Windows (4.2) |
461 | */ | 442 | */ |
462 | hctx->ccid3hctx_rtt = r_sample; | ||
463 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); | 443 | hctx->ccid3hctx_x = rfc3390_initial_rate(sk); |
464 | hctx->ccid3hctx_t_ld = now; | 444 | hctx->ccid3hctx_t_ld = now; |
465 | 445 | ||
466 | ccid3_update_send_interval(hctx); | 446 | ccid3_update_send_interval(hctx); |
467 | 447 | ||
468 | ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " | 448 | goto done_computing_x; |
469 | "R_sample=%uus, X=%u\n", dccp_role(sk), | 449 | } else if (hctx->ccid3hctx_p == 0) { |
470 | sk, hctx->ccid3hctx_s, | 450 | /* |
471 | dccp_sk(sk)->dccps_mss_cache, r_sample, | 451 | * First feedback after nofeedback timer expiry (4.3) |
472 | (unsigned)(hctx->ccid3hctx_x >> 6)); | 452 | */ |
473 | 453 | goto done_computing_x; | |
474 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
475 | } else { | ||
476 | hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + | ||
477 | r_sample) / 10; | ||
478 | |||
479 | /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ | ||
480 | if (hctx->ccid3hctx_p > 0) | ||
481 | hctx->ccid3hctx_x_calc = | ||
482 | tfrc_calc_x(hctx->ccid3hctx_s, | ||
483 | hctx->ccid3hctx_rtt, | ||
484 | hctx->ccid3hctx_p); | ||
485 | ccid3_hc_tx_update_x(sk, &now); | ||
486 | |||
487 | ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " | ||
488 | "p=%u, X_calc=%u, X_recv=%u, X=%u\n", | ||
489 | dccp_role(sk), | ||
490 | sk, hctx->ccid3hctx_rtt, r_sample, | ||
491 | hctx->ccid3hctx_s, hctx->ccid3hctx_p, | ||
492 | hctx->ccid3hctx_x_calc, | ||
493 | (unsigned)(hctx->ccid3hctx_x_recv >> 6), | ||
494 | (unsigned)(hctx->ccid3hctx_x >> 6)); | ||
495 | } | 454 | } |
455 | } | ||
496 | 456 | ||
497 | /* unschedule no feedback timer */ | 457 | /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ |
498 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | 458 | if (hctx->ccid3hctx_p > 0) |
459 | hctx->ccid3hctx_x_calc = | ||
460 | tfrc_calc_x(hctx->ccid3hctx_s, | ||
461 | hctx->ccid3hctx_rtt, | ||
462 | hctx->ccid3hctx_p); | ||
463 | ccid3_hc_tx_update_x(sk, &now); | ||
464 | |||
465 | done_computing_x: | ||
466 | ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " | ||
467 | "p=%u, X_calc=%u, X_recv=%u, X=%u\n", | ||
468 | dccp_role(sk), | ||
469 | sk, hctx->ccid3hctx_rtt, r_sample, | ||
470 | hctx->ccid3hctx_s, hctx->ccid3hctx_p, | ||
471 | hctx->ccid3hctx_x_calc, | ||
472 | (unsigned)(hctx->ccid3hctx_x_recv >> 6), | ||
473 | (unsigned)(hctx->ccid3hctx_x >> 6)); | ||
499 | 474 | ||
500 | /* remove all packets older than the one acked from history */ | 475 | /* unschedule no feedback timer */ |
501 | dccp_tx_hist_purge_older(ccid3_tx_hist, | 476 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); |
502 | &hctx->ccid3hctx_hist, packet); | ||
503 | /* | ||
504 | * As we have calculated new ipi, delta, t_nom it is possible | ||
505 | * that we now can send a packet, so wake up dccp_wait_for_ccid | ||
506 | */ | ||
507 | sk->sk_write_space(sk); | ||
508 | 477 | ||
509 | /* | 478 | /* |
510 | * Update timeout interval for the nofeedback timer. | 479 | * As we have calculated new ipi, delta, t_nom it is possible |
511 | * We use a configuration option to increase the lower bound. | 480 | * that we now can send a packet, so wake up dccp_wait_for_ccid |
512 | * This can help avoid triggering the nofeedback timer too | 481 | */ |
513 | * often ('spinning') on LANs with small RTTs. | 482 | sk->sk_write_space(sk); |
514 | */ | ||
515 | hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, | ||
516 | CONFIG_IP_DCCP_CCID3_RTO * | ||
517 | (USEC_PER_SEC/1000)); | ||
518 | /* | ||
519 | * Schedule no feedback timer to expire in | ||
520 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | ||
521 | */ | ||
522 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); | ||
523 | 483 | ||
524 | ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " | 484 | /* |
525 | "expire in %lu jiffies (%luus)\n", | 485 | * Update timeout interval for the nofeedback timer. |
526 | dccp_role(sk), | 486 | * We use a configuration option to increase the lower bound. |
527 | sk, usecs_to_jiffies(t_nfb), t_nfb); | 487 | * This can help avoid triggering the nofeedback timer too |
488 | * often ('spinning') on LANs with small RTTs. | ||
489 | */ | ||
490 | hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, | ||
491 | (CONFIG_IP_DCCP_CCID3_RTO * | ||
492 | (USEC_PER_SEC / 1000))); | ||
493 | /* | ||
494 | * Schedule no feedback timer to expire in | ||
495 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | ||
496 | */ | ||
497 | t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); | ||
528 | 498 | ||
529 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | 499 | ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " |
530 | jiffies + usecs_to_jiffies(t_nfb)); | 500 | "expire in %lu jiffies (%luus)\n", |
501 | dccp_role(sk), | ||
502 | sk, usecs_to_jiffies(t_nfb), t_nfb); | ||
531 | 503 | ||
532 | /* set idle flag */ | 504 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, |
533 | hctx->ccid3hctx_idle = 1; | 505 | jiffies + usecs_to_jiffies(t_nfb)); |
534 | break; | ||
535 | case TFRC_SSTATE_NO_SENT: /* fall through */ | ||
536 | case TFRC_SSTATE_TERM: /* ignore feedback when closing */ | ||
537 | break; | ||
538 | } | ||
539 | } | 506 | } |
540 | 507 | ||
541 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | 508 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, |
@@ -605,12 +572,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
605 | struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); | 572 | struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); |
606 | 573 | ||
607 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; | 574 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; |
608 | INIT_LIST_HEAD(&hctx->ccid3hctx_hist); | 575 | hctx->ccid3hctx_hist = NULL; |
609 | 576 | setup_timer(&hctx->ccid3hctx_no_feedback_timer, | |
610 | hctx->ccid3hctx_no_feedback_timer.function = | 577 | ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); |
611 | ccid3_hc_tx_no_feedback_timer; | ||
612 | hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; | ||
613 | init_timer(&hctx->ccid3hctx_no_feedback_timer); | ||
614 | 578 | ||
615 | return 0; | 579 | return 0; |
616 | } | 580 | } |
@@ -622,8 +586,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) | |||
622 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | 586 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); |
623 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | 587 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); |
624 | 588 | ||
625 | /* Empty packet history */ | 589 | tfrc_tx_hist_purge(&hctx->ccid3hctx_hist); |
626 | dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); | ||
627 | } | 590 | } |
628 | 591 | ||
629 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | 592 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) |
@@ -670,6 +633,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | |||
670 | /* | 633 | /* |
671 | * Receiver Half-Connection Routines | 634 | * Receiver Half-Connection Routines |
672 | */ | 635 | */ |
636 | |||
637 | /* CCID3 feedback types */ | ||
638 | enum ccid3_fback_type { | ||
639 | CCID3_FBACK_NONE = 0, | ||
640 | CCID3_FBACK_INITIAL, | ||
641 | CCID3_FBACK_PERIODIC, | ||
642 | CCID3_FBACK_PARAM_CHANGE | ||
643 | }; | ||
644 | |||
673 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG | 645 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
674 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | 646 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) |
675 | { | 647 | { |
@@ -696,67 +668,58 @@ static void ccid3_hc_rx_set_state(struct sock *sk, | |||
696 | hcrx->ccid3hcrx_state = state; | 668 | hcrx->ccid3hcrx_state = state; |
697 | } | 669 | } |
698 | 670 | ||
699 | static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) | 671 | static void ccid3_hc_rx_send_feedback(struct sock *sk, |
700 | { | 672 | const struct sk_buff *skb, |
701 | if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */ | 673 | enum ccid3_fback_type fbtype) |
702 | ccid3_pr_debug("Packet payload length is 0 - not updating\n"); | ||
703 | else | ||
704 | hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len : | ||
705 | (9 * hcrx->ccid3hcrx_s + len) / 10; | ||
706 | } | ||
707 | |||
708 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
709 | { | 674 | { |
710 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 675 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
711 | struct dccp_sock *dp = dccp_sk(sk); | 676 | struct dccp_sock *dp = dccp_sk(sk); |
712 | struct dccp_rx_hist_entry *packet; | ||
713 | ktime_t now; | 677 | ktime_t now; |
714 | suseconds_t delta; | 678 | s64 delta = 0; |
715 | 679 | ||
716 | ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); | 680 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM)) |
681 | return; | ||
717 | 682 | ||
718 | now = ktime_get_real(); | 683 | now = ktime_get_real(); |
719 | 684 | ||
720 | switch (hcrx->ccid3hcrx_state) { | 685 | switch (fbtype) { |
721 | case TFRC_RSTATE_NO_DATA: | 686 | case CCID3_FBACK_INITIAL: |
722 | hcrx->ccid3hcrx_x_recv = 0; | 687 | hcrx->ccid3hcrx_x_recv = 0; |
688 | hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ | ||
723 | break; | 689 | break; |
724 | case TFRC_RSTATE_DATA: | 690 | case CCID3_FBACK_PARAM_CHANGE: |
725 | delta = ktime_us_delta(now, | 691 | /* |
726 | hcrx->ccid3hcrx_tstamp_last_feedback); | 692 | * When parameters change (new loss or p > p_prev), we do not |
727 | DCCP_BUG_ON(delta < 0); | 693 | * have a reliable estimate for R_m of [RFC 3448, 6.2] and so |
728 | hcrx->ccid3hcrx_x_recv = | 694 | * need to reuse the previous value of X_recv. However, when |
729 | scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); | 695 | * X_recv was 0 (due to early loss), this would kill X down to |
696 | * s/t_mbi (i.e. one packet in 64 seconds). | ||
697 | * To avoid such drastic reduction, we approximate X_recv as | ||
698 | * the number of bytes since last feedback. | ||
699 | * This is a safe fallback, since X is bounded above by X_calc. | ||
700 | */ | ||
701 | if (hcrx->ccid3hcrx_x_recv > 0) | ||
702 | break; | ||
703 | /* fall through */ | ||
704 | case CCID3_FBACK_PERIODIC: | ||
705 | delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback); | ||
706 | if (delta <= 0) | ||
707 | DCCP_BUG("delta (%ld) <= 0", (long)delta); | ||
708 | else | ||
709 | hcrx->ccid3hcrx_x_recv = | ||
710 | scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); | ||
730 | break; | 711 | break; |
731 | case TFRC_RSTATE_TERM: | 712 | default: |
732 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
733 | return; | 713 | return; |
734 | } | 714 | } |
735 | 715 | ||
736 | packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); | 716 | ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, |
737 | if (unlikely(packet == NULL)) { | 717 | hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv); |
738 | DCCP_WARN("%s(%p), no data packet in history!\n", | ||
739 | dccp_role(sk), sk); | ||
740 | return; | ||
741 | } | ||
742 | 718 | ||
743 | hcrx->ccid3hcrx_tstamp_last_feedback = now; | 719 | hcrx->ccid3hcrx_tstamp_last_feedback = now; |
744 | hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; | 720 | hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval; |
745 | hcrx->ccid3hcrx_bytes_recv = 0; | 721 | hcrx->ccid3hcrx_bytes_recv = 0; |
746 | 722 | ||
747 | /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ | ||
748 | delta = ktime_us_delta(now, packet->dccphrx_tstamp); | ||
749 | DCCP_BUG_ON(delta < 0); | ||
750 | hcrx->ccid3hcrx_elapsed_time = delta / 10; | ||
751 | |||
752 | if (hcrx->ccid3hcrx_p == 0) | ||
753 | hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ | ||
754 | else if (hcrx->ccid3hcrx_p > 1000000) { | ||
755 | DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p); | ||
756 | hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */ | ||
757 | } else | ||
758 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
759 | |||
760 | dp->dccps_hc_rx_insert_options = 1; | 723 | dp->dccps_hc_rx_insert_options = 1; |
761 | dccp_send_ack(sk); | 724 | dccp_send_ack(sk); |
762 | } | 725 | } |
@@ -770,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
770 | return 0; | 733 | return 0; |
771 | 734 | ||
772 | hcrx = ccid3_hc_rx_sk(sk); | 735 | hcrx = ccid3_hc_rx_sk(sk); |
773 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; | ||
774 | 736 | ||
775 | if (dccp_packet_without_ack(skb)) | 737 | if (dccp_packet_without_ack(skb)) |
776 | return 0; | 738 | return 0; |
@@ -778,11 +740,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
778 | x_recv = htonl(hcrx->ccid3hcrx_x_recv); | 740 | x_recv = htonl(hcrx->ccid3hcrx_x_recv); |
779 | pinv = htonl(hcrx->ccid3hcrx_pinv); | 741 | pinv = htonl(hcrx->ccid3hcrx_pinv); |
780 | 742 | ||
781 | if ((hcrx->ccid3hcrx_elapsed_time != 0 && | 743 | if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, |
782 | dccp_insert_option_elapsed_time(sk, skb, | ||
783 | hcrx->ccid3hcrx_elapsed_time)) || | ||
784 | dccp_insert_option_timestamp(sk, skb) || | ||
785 | dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, | ||
786 | &pinv, sizeof(pinv)) || | 744 | &pinv, sizeof(pinv)) || |
787 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, | 745 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, |
788 | &x_recv, sizeof(x_recv))) | 746 | &x_recv, sizeof(x_recv))) |
@@ -791,180 +749,139 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
791 | return 0; | 749 | return 0; |
792 | } | 750 | } |
793 | 751 | ||
794 | static int ccid3_hc_rx_detect_loss(struct sock *sk, | 752 | /** ccid3_first_li - Implements [RFC 3448, 6.3.1] |
795 | struct dccp_rx_hist_entry *packet) | 753 | * |
754 | * Determine the length of the first loss interval via inverse lookup. | ||
755 | * Assume that X_recv can be computed by the throughput equation | ||
756 | * s | ||
757 | * X_recv = -------- | ||
758 | * R * fval | ||
759 | * Find some p such that f(p) = fval; return 1/p (scaled). | ||
760 | */ | ||
761 | static u32 ccid3_first_li(struct sock *sk) | ||
796 | { | 762 | { |
797 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 763 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
798 | struct dccp_rx_hist_entry *rx_hist = | 764 | u32 x_recv, p, delta; |
799 | dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); | 765 | u64 fval; |
800 | u64 seqno = packet->dccphrx_seqno; | ||
801 | u64 tmp_seqno; | ||
802 | int loss = 0; | ||
803 | u8 ccval; | ||
804 | |||
805 | |||
806 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | ||
807 | 766 | ||
808 | if (!rx_hist || | 767 | if (hcrx->ccid3hcrx_rtt == 0) { |
809 | follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | 768 | DCCP_WARN("No RTT estimate available, using fallback RTT\n"); |
810 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | 769 | hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT; |
811 | hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; | ||
812 | goto detect_out; | ||
813 | } | 770 | } |
814 | 771 | ||
815 | 772 | delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback)); | |
816 | while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) | 773 | x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); |
817 | > TFRC_RECV_NUM_LATE_LOSS) { | 774 | if (x_recv == 0) { /* would also trigger divide-by-zero */ |
818 | loss = 1; | 775 | DCCP_WARN("X_recv==0\n"); |
819 | dccp_li_update_li(sk, | 776 | if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { |
820 | &hcrx->ccid3hcrx_li_hist, | 777 | DCCP_BUG("stored value of X_recv is zero"); |
821 | &hcrx->ccid3hcrx_hist, | 778 | return ~0U; |
822 | hcrx->ccid3hcrx_tstamp_last_feedback, | ||
823 | hcrx->ccid3hcrx_s, | ||
824 | hcrx->ccid3hcrx_bytes_recv, | ||
825 | hcrx->ccid3hcrx_x_recv, | ||
826 | hcrx->ccid3hcrx_seqno_nonloss, | ||
827 | hcrx->ccid3hcrx_ccval_nonloss); | ||
828 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | ||
829 | dccp_inc_seqno(&tmp_seqno); | ||
830 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
831 | dccp_inc_seqno(&tmp_seqno); | ||
832 | while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, | ||
833 | tmp_seqno, &ccval)) { | ||
834 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
835 | hcrx->ccid3hcrx_ccval_nonloss = ccval; | ||
836 | dccp_inc_seqno(&tmp_seqno); | ||
837 | } | 779 | } |
838 | } | 780 | } |
839 | 781 | ||
840 | /* FIXME - this code could be simplified with above while */ | 782 | fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt); |
841 | /* but works at moment */ | 783 | fval = scaled_div32(fval, x_recv); |
842 | if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | 784 | p = tfrc_calc_x_reverse_lookup(fval); |
843 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | ||
844 | hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; | ||
845 | } | ||
846 | 785 | ||
847 | detect_out: | 786 | ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " |
848 | dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, | 787 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); |
849 | &hcrx->ccid3hcrx_li_hist, packet, | 788 | |
850 | hcrx->ccid3hcrx_seqno_nonloss); | 789 | return p == 0 ? ~0U : scaled_div(1, p); |
851 | return loss; | ||
852 | } | 790 | } |
853 | 791 | ||
854 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | 792 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
855 | { | 793 | { |
856 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 794 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
857 | const struct dccp_options_received *opt_recv; | 795 | enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; |
858 | struct dccp_rx_hist_entry *packet; | 796 | const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; |
859 | u32 p_prev, r_sample, rtt_prev; | 797 | const bool is_data_packet = dccp_data_packet(skb); |
860 | int loss, payload_size; | 798 | |
861 | ktime_t now; | 799 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { |
862 | 800 | if (is_data_packet) { | |
863 | opt_recv = &dccp_sk(sk)->dccps_options_received; | 801 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; |
864 | 802 | do_feedback = CCID3_FBACK_INITIAL; | |
865 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 803 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); |
866 | case DCCP_PKT_ACK: | 804 | hcrx->ccid3hcrx_s = payload; |
867 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | 805 | /* |
868 | return; | 806 | * Not necessary to update ccid3hcrx_bytes_recv here, |
869 | case DCCP_PKT_DATAACK: | 807 | * since X_recv = 0 for the first feedback packet (cf. |
870 | if (opt_recv->dccpor_timestamp_echo == 0) | 808 | * RFC 3448, 6.3) -- gerrit |
871 | break; | 809 | */ |
872 | r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; | 810 | } |
873 | rtt_prev = hcrx->ccid3hcrx_rtt; | 811 | goto update_records; |
874 | r_sample = dccp_sample_rtt(sk, 10 * r_sample); | 812 | } |
875 | 813 | ||
876 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | 814 | if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb)) |
877 | hcrx->ccid3hcrx_rtt = r_sample; | 815 | return; /* done receiving */ |
878 | else | ||
879 | hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + | ||
880 | r_sample / 10; | ||
881 | 816 | ||
882 | if (rtt_prev != hcrx->ccid3hcrx_rtt) | 817 | if (is_data_packet) { |
883 | ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", | 818 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; |
884 | dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, | 819 | /* |
885 | opt_recv->dccpor_elapsed_time); | 820 | * Update moving-average of s and the sum of received payload bytes |
886 | break; | 821 | */ |
887 | case DCCP_PKT_DATA: | 822 | hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9); |
888 | break; | 823 | hcrx->ccid3hcrx_bytes_recv += payload; |
889 | default: /* We're not interested in other packet types, move along */ | ||
890 | return; | ||
891 | } | 824 | } |
892 | 825 | ||
893 | packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, | 826 | /* |
894 | skb, GFP_ATOMIC); | 827 | * Handle pending losses and otherwise check for new loss |
895 | if (unlikely(packet == NULL)) { | 828 | */ |
896 | DCCP_WARN("%s(%p), Not enough mem to add rx packet " | 829 | if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) && |
897 | "to history, consider it lost!\n", dccp_role(sk), sk); | 830 | tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, |
898 | return; | 831 | &hcrx->ccid3hcrx_li_hist, |
832 | skb, ndp, ccid3_first_li, sk) ) { | ||
833 | do_feedback = CCID3_FBACK_PARAM_CHANGE; | ||
834 | goto done_receiving; | ||
899 | } | 835 | } |
900 | 836 | ||
901 | loss = ccid3_hc_rx_detect_loss(sk, packet); | 837 | if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp)) |
838 | goto update_records; | ||
902 | 839 | ||
903 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | 840 | /* |
904 | return; | 841 | * Handle data packets: RTT sampling and monitoring p |
905 | 842 | */ | |
906 | payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; | 843 | if (unlikely(!is_data_packet)) |
907 | ccid3_hc_rx_update_s(hcrx, payload_size); | 844 | goto update_records; |
908 | 845 | ||
909 | switch (hcrx->ccid3hcrx_state) { | 846 | if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) { |
910 | case TFRC_RSTATE_NO_DATA: | 847 | const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb); |
911 | ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " | 848 | /* |
912 | "feedback\n", dccp_role(sk), sk, | 849 | * Empty loss history: no loss so far, hence p stays 0. |
913 | dccp_state_name(sk->sk_state), skb); | 850 | * Sample RTT values, since an RTT estimate is required for the |
914 | ccid3_hc_rx_send_feedback(sk); | 851 | * computation of p when the first loss occurs; RFC 3448, 6.3.1. |
915 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | 852 | */ |
916 | return; | 853 | if (sample != 0) |
917 | case TFRC_RSTATE_DATA: | 854 | hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9); |
918 | hcrx->ccid3hcrx_bytes_recv += payload_size; | ||
919 | if (loss) | ||
920 | break; | ||
921 | 855 | ||
922 | now = ktime_get_real(); | 856 | } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) { |
923 | if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - | 857 | /* |
924 | (s64)hcrx->ccid3hcrx_rtt) >= 0) { | 858 | * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean |
925 | hcrx->ccid3hcrx_tstamp_last_ack = now; | 859 | * has decreased (resp. p has increased), send feedback now. |
926 | ccid3_hc_rx_send_feedback(sk); | 860 | */ |
927 | } | 861 | do_feedback = CCID3_FBACK_PARAM_CHANGE; |
928 | return; | ||
929 | case TFRC_RSTATE_TERM: | ||
930 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
931 | return; | ||
932 | } | 862 | } |
933 | 863 | ||
934 | /* Dealing with packet loss */ | 864 | /* |
935 | ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", | 865 | * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 |
936 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); | 866 | */ |
937 | 867 | if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3) | |
938 | p_prev = hcrx->ccid3hcrx_p; | 868 | do_feedback = CCID3_FBACK_PERIODIC; |
939 | |||
940 | /* Calculate loss event rate */ | ||
941 | if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { | ||
942 | u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); | ||
943 | 869 | ||
944 | /* Scaling up by 1000000 as fixed decimal */ | 870 | update_records: |
945 | if (i_mean != 0) | 871 | tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp); |
946 | hcrx->ccid3hcrx_p = 1000000 / i_mean; | ||
947 | } else | ||
948 | DCCP_BUG("empty loss history"); | ||
949 | 872 | ||
950 | if (hcrx->ccid3hcrx_p > p_prev) { | 873 | done_receiving: |
951 | ccid3_hc_rx_send_feedback(sk); | 874 | if (do_feedback) |
952 | return; | 875 | ccid3_hc_rx_send_feedback(sk, skb, do_feedback); |
953 | } | ||
954 | } | 876 | } |
955 | 877 | ||
956 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) | 878 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) |
957 | { | 879 | { |
958 | struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); | 880 | struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); |
959 | 881 | ||
960 | ccid3_pr_debug("entry\n"); | ||
961 | |||
962 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | 882 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; |
963 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | 883 | tfrc_lh_init(&hcrx->ccid3hcrx_li_hist); |
964 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); | 884 | return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist); |
965 | hcrx->ccid3hcrx_tstamp_last_feedback = | ||
966 | hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); | ||
967 | return 0; | ||
968 | } | 885 | } |
969 | 886 | ||
970 | static void ccid3_hc_rx_exit(struct sock *sk) | 887 | static void ccid3_hc_rx_exit(struct sock *sk) |
@@ -973,11 +890,8 @@ static void ccid3_hc_rx_exit(struct sock *sk) | |||
973 | 890 | ||
974 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | 891 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); |
975 | 892 | ||
976 | /* Empty packet history */ | 893 | tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist); |
977 | dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); | 894 | tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist); |
978 | |||
979 | /* Empty loss interval history */ | ||
980 | dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist); | ||
981 | } | 895 | } |
982 | 896 | ||
983 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | 897 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) |
@@ -998,6 +912,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | |||
998 | u32 __user *optval, int __user *optlen) | 912 | u32 __user *optval, int __user *optlen) |
999 | { | 913 | { |
1000 | const struct ccid3_hc_rx_sock *hcrx; | 914 | const struct ccid3_hc_rx_sock *hcrx; |
915 | struct tfrc_rx_info rx_info; | ||
1001 | const void *val; | 916 | const void *val; |
1002 | 917 | ||
1003 | /* Listen socks doesn't have a private CCID block */ | 918 | /* Listen socks doesn't have a private CCID block */ |
@@ -1007,10 +922,14 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | |||
1007 | hcrx = ccid3_hc_rx_sk(sk); | 922 | hcrx = ccid3_hc_rx_sk(sk); |
1008 | switch (optname) { | 923 | switch (optname) { |
1009 | case DCCP_SOCKOPT_CCID_RX_INFO: | 924 | case DCCP_SOCKOPT_CCID_RX_INFO: |
1010 | if (len < sizeof(hcrx->ccid3hcrx_tfrc)) | 925 | if (len < sizeof(rx_info)) |
1011 | return -EINVAL; | 926 | return -EINVAL; |
1012 | len = sizeof(hcrx->ccid3hcrx_tfrc); | 927 | rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv; |
1013 | val = &hcrx->ccid3hcrx_tfrc; | 928 | rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt; |
929 | rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U : | ||
930 | scaled_div(1, hcrx->ccid3hcrx_pinv); | ||
931 | len = sizeof(rx_info); | ||
932 | val = &rx_info; | ||
1014 | break; | 933 | break; |
1015 | default: | 934 | default: |
1016 | return -ENOPROTOOPT; | 935 | return -ENOPROTOOPT; |
@@ -1024,7 +943,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | |||
1024 | 943 | ||
1025 | static struct ccid_operations ccid3 = { | 944 | static struct ccid_operations ccid3 = { |
1026 | .ccid_id = DCCPC_CCID3, | 945 | .ccid_id = DCCPC_CCID3, |
1027 | .ccid_name = "ccid3", | 946 | .ccid_name = "TCP-Friendly Rate Control", |
1028 | .ccid_owner = THIS_MODULE, | 947 | .ccid_owner = THIS_MODULE, |
1029 | .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), | 948 | .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), |
1030 | .ccid_hc_tx_init = ccid3_hc_tx_init, | 949 | .ccid_hc_tx_init = ccid3_hc_tx_init, |
@@ -1051,44 +970,13 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | |||
1051 | 970 | ||
1052 | static __init int ccid3_module_init(void) | 971 | static __init int ccid3_module_init(void) |
1053 | { | 972 | { |
1054 | int rc = -ENOBUFS; | 973 | return ccid_register(&ccid3); |
1055 | |||
1056 | ccid3_rx_hist = dccp_rx_hist_new("ccid3"); | ||
1057 | if (ccid3_rx_hist == NULL) | ||
1058 | goto out; | ||
1059 | |||
1060 | ccid3_tx_hist = dccp_tx_hist_new("ccid3"); | ||
1061 | if (ccid3_tx_hist == NULL) | ||
1062 | goto out_free_rx; | ||
1063 | |||
1064 | rc = ccid_register(&ccid3); | ||
1065 | if (rc != 0) | ||
1066 | goto out_free_tx; | ||
1067 | out: | ||
1068 | return rc; | ||
1069 | |||
1070 | out_free_tx: | ||
1071 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1072 | ccid3_tx_hist = NULL; | ||
1073 | out_free_rx: | ||
1074 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1075 | ccid3_rx_hist = NULL; | ||
1076 | goto out; | ||
1077 | } | 974 | } |
1078 | module_init(ccid3_module_init); | 975 | module_init(ccid3_module_init); |
1079 | 976 | ||
1080 | static __exit void ccid3_module_exit(void) | 977 | static __exit void ccid3_module_exit(void) |
1081 | { | 978 | { |
1082 | ccid_unregister(&ccid3); | 979 | ccid_unregister(&ccid3); |
1083 | |||
1084 | if (ccid3_tx_hist != NULL) { | ||
1085 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1086 | ccid3_tx_hist = NULL; | ||
1087 | } | ||
1088 | if (ccid3_rx_hist != NULL) { | ||
1089 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1090 | ccid3_rx_hist = NULL; | ||
1091 | } | ||
1092 | } | 980 | } |
1093 | module_exit(ccid3_module_exit); | 981 | module_exit(ccid3_module_exit); |
1094 | 982 | ||
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 0cdc982cfe47..49ca32bd7e79 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/ccids/ccid3.h | 2 | * net/dccp/ccids/ccid3.h |
3 | * | 3 | * |
4 | * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. | 4 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. |
5 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
5 | * | 6 | * |
6 | * An implementation of the DCCP protocol | 7 | * An implementation of the DCCP protocol |
7 | * | 8 | * |
@@ -40,6 +41,7 @@ | |||
40 | #include <linux/list.h> | 41 | #include <linux/list.h> |
41 | #include <linux/types.h> | 42 | #include <linux/types.h> |
42 | #include <linux/tfrc.h> | 43 | #include <linux/tfrc.h> |
44 | #include "lib/tfrc.h" | ||
43 | #include "../ccid.h" | 45 | #include "../ccid.h" |
44 | 46 | ||
45 | /* Two seconds as per RFC 3448 4.2 */ | 47 | /* Two seconds as per RFC 3448 4.2 */ |
@@ -88,7 +90,6 @@ enum ccid3_hc_tx_states { | |||
88 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet | 90 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet |
89 | * with last_win_count value sent | 91 | * with last_win_count value sent |
90 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer | 92 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer |
91 | * @ccid3hctx_idle - Flag indicating that sender is idling | ||
92 | * @ccid3hctx_t_ld - Time last doubled during slow start | 93 | * @ccid3hctx_t_ld - Time last doubled during slow start |
93 | * @ccid3hctx_t_nom - Nominal send time of next packet | 94 | * @ccid3hctx_t_nom - Nominal send time of next packet |
94 | * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs | 95 | * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs |
@@ -107,13 +108,12 @@ struct ccid3_hc_tx_sock { | |||
107 | u16 ccid3hctx_s; | 108 | u16 ccid3hctx_s; |
108 | enum ccid3_hc_tx_states ccid3hctx_state:8; | 109 | enum ccid3_hc_tx_states ccid3hctx_state:8; |
109 | u8 ccid3hctx_last_win_count; | 110 | u8 ccid3hctx_last_win_count; |
110 | u8 ccid3hctx_idle; | ||
111 | ktime_t ccid3hctx_t_last_win_count; | 111 | ktime_t ccid3hctx_t_last_win_count; |
112 | struct timer_list ccid3hctx_no_feedback_timer; | 112 | struct timer_list ccid3hctx_no_feedback_timer; |
113 | ktime_t ccid3hctx_t_ld; | 113 | ktime_t ccid3hctx_t_ld; |
114 | ktime_t ccid3hctx_t_nom; | 114 | ktime_t ccid3hctx_t_nom; |
115 | u32 ccid3hctx_delta; | 115 | u32 ccid3hctx_delta; |
116 | struct list_head ccid3hctx_hist; | 116 | struct tfrc_tx_hist_entry *ccid3hctx_hist; |
117 | struct ccid3_options_received ccid3hctx_options_received; | 117 | struct ccid3_options_received ccid3hctx_options_received; |
118 | }; | 118 | }; |
119 | 119 | ||
@@ -135,37 +135,30 @@ enum ccid3_hc_rx_states { | |||
135 | * | 135 | * |
136 | * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) | 136 | * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) |
137 | * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) | 137 | * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) |
138 | * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) | 138 | * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4) |
139 | * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number | 139 | * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1) |
140 | * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal | 140 | * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states |
141 | * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1) | ||
142 | * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states | ||
143 | * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes | 141 | * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes |
142 | * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) | ||
143 | * @ccid3hcrx_rtt - Receiver estimate of RTT | ||
144 | * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent | 144 | * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent |
145 | * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent | 145 | * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent |
146 | * @ccid3hcrx_hist - Packet history | 146 | * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling) |
147 | * @ccid3hcrx_li_hist - Loss Interval History | 147 | * @ccid3hcrx_li_hist - Loss Interval database |
148 | * @ccid3hcrx_s - Received packet size in bytes | 148 | * @ccid3hcrx_s - Received packet size in bytes |
149 | * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) | 149 | * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) |
150 | * @ccid3hcrx_elapsed_time - Time since packet reception | ||
151 | */ | 150 | */ |
152 | struct ccid3_hc_rx_sock { | 151 | struct ccid3_hc_rx_sock { |
153 | struct tfrc_rx_info ccid3hcrx_tfrc; | 152 | u8 ccid3hcrx_last_counter:4; |
154 | #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv | ||
155 | #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt | ||
156 | #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p | ||
157 | u64 ccid3hcrx_seqno_nonloss:48, | ||
158 | ccid3hcrx_ccval_nonloss:4, | ||
159 | ccid3hcrx_ccval_last_counter:4; | ||
160 | enum ccid3_hc_rx_states ccid3hcrx_state:8; | 153 | enum ccid3_hc_rx_states ccid3hcrx_state:8; |
161 | u32 ccid3hcrx_bytes_recv; | 154 | u32 ccid3hcrx_bytes_recv; |
155 | u32 ccid3hcrx_x_recv; | ||
156 | u32 ccid3hcrx_rtt; | ||
162 | ktime_t ccid3hcrx_tstamp_last_feedback; | 157 | ktime_t ccid3hcrx_tstamp_last_feedback; |
163 | ktime_t ccid3hcrx_tstamp_last_ack; | 158 | struct tfrc_rx_hist ccid3hcrx_hist; |
164 | struct list_head ccid3hcrx_hist; | 159 | struct tfrc_loss_hist ccid3hcrx_li_hist; |
165 | struct list_head ccid3hcrx_li_hist; | ||
166 | u16 ccid3hcrx_s; | 160 | u16 ccid3hcrx_s; |
167 | u32 ccid3hcrx_pinv; | 161 | #define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean |
168 | u32 ccid3hcrx_elapsed_time; | ||
169 | }; | 162 | }; |
170 | 163 | ||
171 | static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) | 164 | static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) |
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile index 5f940a6cbaca..68c93e3d89dc 100644 --- a/net/dccp/ccids/lib/Makefile +++ b/net/dccp/ccids/lib/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o | 1 | obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o |
2 | 2 | ||
3 | dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o | 3 | dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o |
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index d26b88dbbb45..849e181e698f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/ccids/lib/loss_interval.c | 2 | * net/dccp/ccids/lib/loss_interval.c |
3 | * | 3 | * |
4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
4 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. | 5 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. |
5 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> | 6 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> |
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | 7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> |
@@ -10,285 +11,176 @@ | |||
10 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. | 12 | * (at your option) any later version. |
12 | */ | 13 | */ |
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <net/sock.h> | 14 | #include <net/sock.h> |
16 | #include "../../dccp.h" | ||
17 | #include "loss_interval.h" | ||
18 | #include "packet_history.h" | ||
19 | #include "tfrc.h" | 15 | #include "tfrc.h" |
20 | 16 | ||
21 | #define DCCP_LI_HIST_IVAL_F_LENGTH 8 | 17 | static struct kmem_cache *tfrc_lh_slab __read_mostly; |
22 | 18 | /* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ | |
23 | struct dccp_li_hist_entry { | 19 | static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; |
24 | struct list_head dccplih_node; | ||
25 | u64 dccplih_seqno:48, | ||
26 | dccplih_win_count:4; | ||
27 | u32 dccplih_interval; | ||
28 | }; | ||
29 | 20 | ||
30 | static struct kmem_cache *dccp_li_cachep __read_mostly; | 21 | /* implements LIFO semantics on the array */ |
31 | 22 | static inline u8 LIH_INDEX(const u8 ctr) | |
32 | static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) | ||
33 | { | 23 | { |
34 | return kmem_cache_alloc(dccp_li_cachep, prio); | 24 | return (LIH_SIZE - 1 - (ctr % LIH_SIZE)); |
35 | } | 25 | } |
36 | 26 | ||
37 | static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) | 27 | /* the `counter' index always points at the next entry to be populated */ |
28 | static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh) | ||
38 | { | 29 | { |
39 | if (entry != NULL) | 30 | return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL; |
40 | kmem_cache_free(dccp_li_cachep, entry); | ||
41 | } | 31 | } |
42 | 32 | ||
43 | void dccp_li_hist_purge(struct list_head *list) | 33 | /* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */ |
34 | static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i) | ||
44 | { | 35 | { |
45 | struct dccp_li_hist_entry *entry, *next; | 36 | BUG_ON(i >= lh->counter); |
46 | 37 | return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length; | |
47 | list_for_each_entry_safe(entry, next, list, dccplih_node) { | ||
48 | list_del_init(&entry->dccplih_node); | ||
49 | kmem_cache_free(dccp_li_cachep, entry); | ||
50 | } | ||
51 | } | 38 | } |
52 | 39 | ||
53 | EXPORT_SYMBOL_GPL(dccp_li_hist_purge); | ||
54 | |||
55 | /* Weights used to calculate loss event rate */ | ||
56 | /* | 40 | /* |
57 | * These are integers as per section 8 of RFC3448. We can then divide by 4 * | 41 | * On-demand allocation and de-allocation of entries |
58 | * when we use it. | ||
59 | */ | 42 | */ |
60 | static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { | 43 | static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh) |
61 | 4, 4, 4, 4, 3, 2, 1, 1, | ||
62 | }; | ||
63 | |||
64 | u32 dccp_li_hist_calc_i_mean(struct list_head *list) | ||
65 | { | 44 | { |
66 | struct dccp_li_hist_entry *li_entry, *li_next; | 45 | if (lh->ring[LIH_INDEX(lh->counter)] == NULL) |
67 | int i = 0; | 46 | lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab, |
68 | u32 i_tot; | 47 | GFP_ATOMIC); |
69 | u32 i_tot0 = 0; | 48 | return lh->ring[LIH_INDEX(lh->counter)]; |
70 | u32 i_tot1 = 0; | ||
71 | u32 w_tot = 0; | ||
72 | |||
73 | list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { | ||
74 | if (li_entry->dccplih_interval != ~0U) { | ||
75 | i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; | ||
76 | w_tot += dccp_li_hist_w[i]; | ||
77 | if (i != 0) | ||
78 | i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; | ||
79 | } | ||
80 | |||
81 | |||
82 | if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) | ||
83 | break; | ||
84 | } | ||
85 | |||
86 | if (i != DCCP_LI_HIST_IVAL_F_LENGTH) | ||
87 | return 0; | ||
88 | |||
89 | i_tot = max(i_tot0, i_tot1); | ||
90 | |||
91 | if (!w_tot) { | ||
92 | DCCP_WARN("w_tot = 0\n"); | ||
93 | return 1; | ||
94 | } | ||
95 | |||
96 | return i_tot / w_tot; | ||
97 | } | 49 | } |
98 | 50 | ||
99 | EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); | 51 | void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) |
100 | |||
101 | static int dccp_li_hist_interval_new(struct list_head *list, | ||
102 | const u64 seq_loss, const u8 win_loss) | ||
103 | { | 52 | { |
104 | struct dccp_li_hist_entry *entry; | 53 | if (!tfrc_lh_is_initialised(lh)) |
105 | int i; | 54 | return; |
106 | 55 | ||
107 | for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { | 56 | for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++) |
108 | entry = dccp_li_hist_entry_new(GFP_ATOMIC); | 57 | if (lh->ring[LIH_INDEX(lh->counter)] != NULL) { |
109 | if (entry == NULL) { | 58 | kmem_cache_free(tfrc_lh_slab, |
110 | dccp_li_hist_purge(list); | 59 | lh->ring[LIH_INDEX(lh->counter)]); |
111 | DCCP_BUG("loss interval list entry is NULL"); | 60 | lh->ring[LIH_INDEX(lh->counter)] = NULL; |
112 | return 0; | ||
113 | } | 61 | } |
114 | entry->dccplih_interval = ~0; | ||
115 | list_add(&entry->dccplih_node, list); | ||
116 | } | ||
117 | |||
118 | entry->dccplih_seqno = seq_loss; | ||
119 | entry->dccplih_win_count = win_loss; | ||
120 | return 1; | ||
121 | } | 62 | } |
63 | EXPORT_SYMBOL_GPL(tfrc_lh_cleanup); | ||
122 | 64 | ||
123 | /* calculate first loss interval | 65 | static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) |
124 | * | ||
125 | * returns estimated loss interval in usecs */ | ||
126 | static u32 dccp_li_calc_first_li(struct sock *sk, | ||
127 | struct list_head *hist_list, | ||
128 | ktime_t last_feedback, | ||
129 | u16 s, u32 bytes_recv, | ||
130 | u32 previous_x_recv) | ||
131 | { | 66 | { |
132 | struct dccp_rx_hist_entry *entry, *next, *tail = NULL; | 67 | u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; |
133 | u32 x_recv, p; | 68 | int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ |
134 | suseconds_t rtt, delta; | ||
135 | ktime_t tstamp = ktime_set(0, 0); | ||
136 | int interval = 0; | ||
137 | int win_count = 0; | ||
138 | int step = 0; | ||
139 | u64 fval; | ||
140 | 69 | ||
141 | list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { | 70 | for (i=0; i <= k; i++) { |
142 | if (dccp_rx_hist_entry_data_packet(entry)) { | 71 | i_i = tfrc_lh_get_interval(lh, i); |
143 | tail = entry; | ||
144 | 72 | ||
145 | switch (step) { | 73 | if (i < k) { |
146 | case 0: | 74 | i_tot0 += i_i * tfrc_lh_weights[i]; |
147 | tstamp = entry->dccphrx_tstamp; | 75 | w_tot += tfrc_lh_weights[i]; |
148 | win_count = entry->dccphrx_ccval; | ||
149 | step = 1; | ||
150 | break; | ||
151 | case 1: | ||
152 | interval = win_count - entry->dccphrx_ccval; | ||
153 | if (interval < 0) | ||
154 | interval += TFRC_WIN_COUNT_LIMIT; | ||
155 | if (interval > 4) | ||
156 | goto found; | ||
157 | break; | ||
158 | } | ||
159 | } | 76 | } |
77 | if (i > 0) | ||
78 | i_tot1 += i_i * tfrc_lh_weights[i-1]; | ||
160 | } | 79 | } |
161 | 80 | ||
162 | if (unlikely(step == 0)) { | 81 | BUG_ON(w_tot == 0); |
163 | DCCP_WARN("%s(%p), packet history has no data packets!\n", | 82 | lh->i_mean = max(i_tot0, i_tot1) / w_tot; |
164 | dccp_role(sk), sk); | 83 | } |
165 | return ~0; | ||
166 | } | ||
167 | |||
168 | if (unlikely(interval == 0)) { | ||
169 | DCCP_WARN("%s(%p), Could not find a win_count interval > 0. " | ||
170 | "Defaulting to 1\n", dccp_role(sk), sk); | ||
171 | interval = 1; | ||
172 | } | ||
173 | found: | ||
174 | if (!tail) { | ||
175 | DCCP_CRIT("tail is null\n"); | ||
176 | return ~0; | ||
177 | } | ||
178 | |||
179 | delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp); | ||
180 | DCCP_BUG_ON(delta < 0); | ||
181 | 84 | ||
182 | rtt = delta * 4 / interval; | 85 | /** |
183 | dccp_pr_debug("%s(%p), approximated RTT to %dus\n", | 86 | * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 |
184 | dccp_role(sk), sk, (int)rtt); | 87 | * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev |
88 | */ | ||
89 | u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) | ||
90 | { | ||
91 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); | ||
92 | u32 old_i_mean = lh->i_mean; | ||
93 | s64 length; | ||
185 | 94 | ||
186 | /* | 95 | if (cur == NULL) /* not initialised */ |
187 | * Determine the length of the first loss interval via inverse lookup. | 96 | return 0; |
188 | * Assume that X_recv can be computed by the throughput equation | ||
189 | * s | ||
190 | * X_recv = -------- | ||
191 | * R * fval | ||
192 | * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. | ||
193 | */ | ||
194 | if (rtt == 0) { /* would result in divide-by-zero */ | ||
195 | DCCP_WARN("RTT==0\n"); | ||
196 | return ~0; | ||
197 | } | ||
198 | 97 | ||
199 | delta = ktime_us_delta(ktime_get_real(), last_feedback); | 98 | length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq); |
200 | DCCP_BUG_ON(delta <= 0); | ||
201 | 99 | ||
202 | x_recv = scaled_div32(bytes_recv, delta); | 100 | if (length - cur->li_length <= 0) /* duplicate or reordered */ |
203 | if (x_recv == 0) { /* would also trigger divide-by-zero */ | 101 | return 0; |
204 | DCCP_WARN("X_recv==0\n"); | ||
205 | if (previous_x_recv == 0) { | ||
206 | DCCP_BUG("stored value of X_recv is zero"); | ||
207 | return ~0; | ||
208 | } | ||
209 | x_recv = previous_x_recv; | ||
210 | } | ||
211 | 102 | ||
212 | fval = scaled_div(s, rtt); | 103 | if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) |
213 | fval = scaled_div32(fval, x_recv); | 104 | /* |
214 | p = tfrc_calc_x_reverse_lookup(fval); | 105 | * Implements RFC 4342, 10.2: |
106 | * If a packet S (skb) exists whose seqno comes `after' the one | ||
107 | * starting the current loss interval (cur) and if the modulo-16 | ||
108 | * distance from C(cur) to C(S) is greater than 4, consider all | ||
109 | * subsequent packets as belonging to a new loss interval. This | ||
110 | * test is necessary since CCVal may wrap between intervals. | ||
111 | */ | ||
112 | cur->li_is_closed = 1; | ||
113 | |||
114 | if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ | ||
115 | return 0; | ||
215 | 116 | ||
216 | dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " | 117 | cur->li_length = length; |
217 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); | 118 | tfrc_lh_calc_i_mean(lh); |
218 | 119 | ||
219 | if (p == 0) | 120 | return (lh->i_mean < old_i_mean); |
220 | return ~0; | ||
221 | else | ||
222 | return 1000000 / p; | ||
223 | } | 121 | } |
122 | EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); | ||
224 | 123 | ||
225 | void dccp_li_update_li(struct sock *sk, | 124 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ |
226 | struct list_head *li_hist_list, | 125 | static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, |
227 | struct list_head *hist_list, | 126 | struct tfrc_rx_hist_entry *new_loss) |
228 | ktime_t last_feedback, u16 s, u32 bytes_recv, | ||
229 | u32 previous_x_recv, u64 seq_loss, u8 win_loss) | ||
230 | { | 127 | { |
231 | struct dccp_li_hist_entry *head; | 128 | return dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 && |
232 | u64 seq_temp; | 129 | (cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4); |
233 | 130 | } | |
234 | if (list_empty(li_hist_list)) { | ||
235 | if (!dccp_li_hist_interval_new(li_hist_list, seq_loss, | ||
236 | win_loss)) | ||
237 | return; | ||
238 | |||
239 | head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, | ||
240 | dccplih_node); | ||
241 | head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list, | ||
242 | last_feedback, | ||
243 | s, bytes_recv, | ||
244 | previous_x_recv); | ||
245 | } else { | ||
246 | struct dccp_li_hist_entry *entry; | ||
247 | struct list_head *tail; | ||
248 | 131 | ||
249 | head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, | 132 | /** tfrc_lh_interval_add - Insert new record into the Loss Interval database |
250 | dccplih_node); | 133 | * @lh: Loss Interval database |
251 | /* FIXME win count check removed as was wrong */ | 134 | * @rh: Receive history containing a fresh loss event |
252 | /* should make this check with receive history */ | 135 | * @calc_first_li: Caller-dependent routine to compute length of first interval |
253 | /* and compare there as per section 10.2 of RFC4342 */ | 136 | * @sk: Used by @calc_first_li in caller-specific way (subtyping) |
137 | * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. | ||
138 | */ | ||
139 | int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, | ||
140 | u32 (*calc_first_li)(struct sock *), struct sock *sk) | ||
141 | { | ||
142 | struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; | ||
254 | 143 | ||
255 | /* new loss event detected */ | 144 | if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) |
256 | /* calculate last interval length */ | 145 | return 0; |
257 | seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); | ||
258 | entry = dccp_li_hist_entry_new(GFP_ATOMIC); | ||
259 | 146 | ||
260 | if (entry == NULL) { | 147 | new = tfrc_lh_demand_next(lh); |
261 | DCCP_BUG("out of memory - can not allocate entry"); | 148 | if (unlikely(new == NULL)) { |
262 | return; | 149 | DCCP_CRIT("Cannot allocate/add loss record."); |
263 | } | 150 | return 0; |
151 | } | ||
264 | 152 | ||
265 | list_add(&entry->dccplih_node, li_hist_list); | 153 | new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; |
154 | new->li_ccval = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval; | ||
155 | new->li_is_closed = 0; | ||
266 | 156 | ||
267 | tail = li_hist_list->prev; | 157 | if (++lh->counter == 1) |
268 | list_del(tail); | 158 | lh->i_mean = new->li_length = (*calc_first_li)(sk); |
269 | kmem_cache_free(dccp_li_cachep, tail); | 159 | else { |
160 | cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno); | ||
161 | new->li_length = dccp_delta_seqno(new->li_seqno, | ||
162 | tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno); | ||
163 | if (lh->counter > (2*LIH_SIZE)) | ||
164 | lh->counter -= LIH_SIZE; | ||
270 | 165 | ||
271 | /* Create the newest interval */ | 166 | tfrc_lh_calc_i_mean(lh); |
272 | entry->dccplih_seqno = seq_loss; | ||
273 | entry->dccplih_interval = seq_temp; | ||
274 | entry->dccplih_win_count = win_loss; | ||
275 | } | 167 | } |
168 | return 1; | ||
276 | } | 169 | } |
170 | EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); | ||
277 | 171 | ||
278 | EXPORT_SYMBOL_GPL(dccp_li_update_li); | 172 | int __init tfrc_li_init(void) |
279 | |||
280 | static __init int dccp_li_init(void) | ||
281 | { | 173 | { |
282 | dccp_li_cachep = kmem_cache_create("dccp_li_hist", | 174 | tfrc_lh_slab = kmem_cache_create("tfrc_li_hist", |
283 | sizeof(struct dccp_li_hist_entry), | 175 | sizeof(struct tfrc_loss_interval), 0, |
284 | 0, SLAB_HWCACHE_ALIGN, NULL); | 176 | SLAB_HWCACHE_ALIGN, NULL); |
285 | return dccp_li_cachep == NULL ? -ENOBUFS : 0; | 177 | return tfrc_lh_slab == NULL ? -ENOBUFS : 0; |
286 | } | 178 | } |
287 | 179 | ||
288 | static __exit void dccp_li_exit(void) | 180 | void tfrc_li_exit(void) |
289 | { | 181 | { |
290 | kmem_cache_destroy(dccp_li_cachep); | 182 | if (tfrc_lh_slab != NULL) { |
183 | kmem_cache_destroy(tfrc_lh_slab); | ||
184 | tfrc_lh_slab = NULL; | ||
185 | } | ||
291 | } | 186 | } |
292 | |||
293 | module_init(dccp_li_init); | ||
294 | module_exit(dccp_li_exit); | ||
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 27bee92dae13..246018a3b269 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h | |||
@@ -3,6 +3,7 @@ | |||
3 | /* | 3 | /* |
4 | * net/dccp/ccids/lib/loss_interval.h | 4 | * net/dccp/ccids/lib/loss_interval.h |
5 | * | 5 | * |
6 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
6 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. | 7 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. |
7 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> | 8 | * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> |
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | 9 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> |
@@ -12,18 +13,63 @@ | |||
12 | * Software Foundation; either version 2 of the License, or (at your option) | 13 | * Software Foundation; either version 2 of the License, or (at your option) |
13 | * any later version. | 14 | * any later version. |
14 | */ | 15 | */ |
15 | |||
16 | #include <linux/ktime.h> | 16 | #include <linux/ktime.h> |
17 | #include <linux/list.h> | 17 | #include <linux/list.h> |
18 | #include <linux/slab.h> | ||
19 | |||
20 | /* | ||
21 | * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than | ||
22 | * NINTERVAL, since the `open' interval I_0 is always stored as the first entry. | ||
23 | */ | ||
24 | #define NINTERVAL 8 | ||
25 | #define LIH_SIZE (NINTERVAL + 1) | ||
26 | |||
27 | /** | ||
28 | * tfrc_loss_interval - Loss history record for TFRC-based protocols | ||
29 | * @li_seqno: Highest received seqno before the start of loss | ||
30 | * @li_ccval: The CCVal belonging to @li_seqno | ||
31 | * @li_is_closed: Whether @li_seqno is older than 1 RTT | ||
32 | * @li_length: Loss interval sequence length | ||
33 | */ | ||
34 | struct tfrc_loss_interval { | ||
35 | u64 li_seqno:48, | ||
36 | li_ccval:4, | ||
37 | li_is_closed:1; | ||
38 | u32 li_length; | ||
39 | }; | ||
40 | |||
41 | /** | ||
42 | * tfrc_loss_hist - Loss record database | ||
43 | * @ring: Circular queue managed in LIFO manner | ||
44 | * @counter: Current count of entries (can be more than %LIH_SIZE) | ||
45 | * @i_mean: Current Average Loss Interval [RFC 3448, 5.4] | ||
46 | */ | ||
47 | struct tfrc_loss_hist { | ||
48 | struct tfrc_loss_interval *ring[LIH_SIZE]; | ||
49 | u8 counter; | ||
50 | u32 i_mean; | ||
51 | }; | ||
52 | |||
53 | static inline void tfrc_lh_init(struct tfrc_loss_hist *lh) | ||
54 | { | ||
55 | memset(lh, 0, sizeof(struct tfrc_loss_hist)); | ||
56 | } | ||
57 | |||
58 | static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh) | ||
59 | { | ||
60 | return lh->counter > 0; | ||
61 | } | ||
62 | |||
63 | static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) | ||
64 | { | ||
65 | return min(lh->counter, (u8)LIH_SIZE); | ||
66 | } | ||
18 | 67 | ||
19 | extern void dccp_li_hist_purge(struct list_head *list); | 68 | struct tfrc_rx_hist; |
20 | 69 | ||
21 | extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); | 70 | extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, |
71 | u32 (*first_li)(struct sock *), struct sock *); | ||
72 | extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); | ||
73 | extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); | ||
22 | 74 | ||
23 | extern void dccp_li_update_li(struct sock *sk, | ||
24 | struct list_head *li_hist_list, | ||
25 | struct list_head *hist_list, | ||
26 | ktime_t last_feedback, u16 s, | ||
27 | u32 bytes_recv, u32 previous_x_recv, | ||
28 | u64 seq_loss, u8 win_loss); | ||
29 | #endif /* _DCCP_LI_HIST_ */ | 75 | #endif /* _DCCP_LI_HIST_ */ |
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 34c4f6047724..20af1a693427 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/packet_history.c | 2 | * net/dccp/packet_history.c |
3 | * | 3 | * |
4 | * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. | 4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK |
5 | * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | 6 | * |
6 | * An implementation of the DCCP protocol | 7 | * An implementation of the DCCP protocol |
7 | * | 8 | * |
@@ -34,267 +35,465 @@ | |||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 35 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
35 | */ | 36 | */ |
36 | 37 | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/string.h> | 38 | #include <linux/string.h> |
39 | #include <linux/slab.h> | ||
39 | #include "packet_history.h" | 40 | #include "packet_history.h" |
41 | #include "../../dccp.h" | ||
42 | |||
43 | /** | ||
44 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
45 | * @next: next oldest entry (LIFO order) | ||
46 | * @seqno: sequence number of this entry | ||
47 | * @stamp: send time of packet with sequence number @seqno | ||
48 | */ | ||
49 | struct tfrc_tx_hist_entry { | ||
50 | struct tfrc_tx_hist_entry *next; | ||
51 | u64 seqno; | ||
52 | ktime_t stamp; | ||
53 | }; | ||
40 | 54 | ||
41 | /* | 55 | /* |
42 | * Transmitter History Routines | 56 | * Transmitter History Routines |
43 | */ | 57 | */ |
44 | struct dccp_tx_hist *dccp_tx_hist_new(const char *name) | 58 | static struct kmem_cache *tfrc_tx_hist_slab; |
59 | |||
60 | int __init tfrc_tx_packet_history_init(void) | ||
45 | { | 61 | { |
46 | struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | 62 | tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist", |
47 | static const char dccp_tx_hist_mask[] = "tx_hist_%s"; | 63 | sizeof(struct tfrc_tx_hist_entry), |
48 | char *slab_name; | 64 | 0, SLAB_HWCACHE_ALIGN, NULL); |
49 | 65 | return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; | |
50 | if (hist == NULL) | ||
51 | goto out; | ||
52 | |||
53 | slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, | ||
54 | GFP_ATOMIC); | ||
55 | if (slab_name == NULL) | ||
56 | goto out_free_hist; | ||
57 | |||
58 | sprintf(slab_name, dccp_tx_hist_mask, name); | ||
59 | hist->dccptxh_slab = kmem_cache_create(slab_name, | ||
60 | sizeof(struct dccp_tx_hist_entry), | ||
61 | 0, SLAB_HWCACHE_ALIGN, | ||
62 | NULL); | ||
63 | if (hist->dccptxh_slab == NULL) | ||
64 | goto out_free_slab_name; | ||
65 | out: | ||
66 | return hist; | ||
67 | out_free_slab_name: | ||
68 | kfree(slab_name); | ||
69 | out_free_hist: | ||
70 | kfree(hist); | ||
71 | hist = NULL; | ||
72 | goto out; | ||
73 | } | 66 | } |
74 | 67 | ||
75 | EXPORT_SYMBOL_GPL(dccp_tx_hist_new); | 68 | void tfrc_tx_packet_history_exit(void) |
76 | |||
77 | void dccp_tx_hist_delete(struct dccp_tx_hist *hist) | ||
78 | { | 69 | { |
79 | const char* name = kmem_cache_name(hist->dccptxh_slab); | 70 | if (tfrc_tx_hist_slab != NULL) { |
80 | 71 | kmem_cache_destroy(tfrc_tx_hist_slab); | |
81 | kmem_cache_destroy(hist->dccptxh_slab); | 72 | tfrc_tx_hist_slab = NULL; |
82 | kfree(name); | 73 | } |
83 | kfree(hist); | ||
84 | } | 74 | } |
85 | 75 | ||
86 | EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); | 76 | static struct tfrc_tx_hist_entry * |
87 | 77 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | |
88 | struct dccp_tx_hist_entry * | ||
89 | dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) | ||
90 | { | 78 | { |
91 | struct dccp_tx_hist_entry *packet = NULL, *entry; | 79 | while (head != NULL && head->seqno != seqno) |
92 | 80 | head = head->next; | |
93 | list_for_each_entry(entry, list, dccphtx_node) | ||
94 | if (entry->dccphtx_seqno == seq) { | ||
95 | packet = entry; | ||
96 | break; | ||
97 | } | ||
98 | 81 | ||
99 | return packet; | 82 | return head; |
100 | } | 83 | } |
101 | 84 | ||
102 | EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); | 85 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) |
86 | { | ||
87 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); | ||
88 | |||
89 | if (entry == NULL) | ||
90 | return -ENOBUFS; | ||
91 | entry->seqno = seqno; | ||
92 | entry->stamp = ktime_get_real(); | ||
93 | entry->next = *headp; | ||
94 | *headp = entry; | ||
95 | return 0; | ||
96 | } | ||
97 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_add); | ||
103 | 98 | ||
104 | void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) | 99 | void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) |
105 | { | 100 | { |
106 | struct dccp_tx_hist_entry *entry, *next; | 101 | struct tfrc_tx_hist_entry *head = *headp; |
102 | |||
103 | while (head != NULL) { | ||
104 | struct tfrc_tx_hist_entry *next = head->next; | ||
107 | 105 | ||
108 | list_for_each_entry_safe(entry, next, list, dccphtx_node) { | 106 | kmem_cache_free(tfrc_tx_hist_slab, head); |
109 | list_del_init(&entry->dccphtx_node); | 107 | head = next; |
110 | dccp_tx_hist_entry_delete(hist, entry); | ||
111 | } | 108 | } |
112 | } | ||
113 | 109 | ||
114 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); | 110 | *headp = NULL; |
111 | } | ||
112 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); | ||
115 | 113 | ||
116 | void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | 114 | u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, |
117 | struct list_head *list, | 115 | const ktime_t now) |
118 | struct dccp_tx_hist_entry *packet) | ||
119 | { | 116 | { |
120 | struct dccp_tx_hist_entry *next; | 117 | u32 rtt = 0; |
118 | struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); | ||
121 | 119 | ||
122 | list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { | 120 | if (packet != NULL) { |
123 | list_del_init(&packet->dccphtx_node); | 121 | rtt = ktime_us_delta(now, packet->stamp); |
124 | dccp_tx_hist_entry_delete(hist, packet); | 122 | /* |
123 | * Garbage-collect older (irrelevant) entries: | ||
124 | */ | ||
125 | tfrc_tx_hist_purge(&packet->next); | ||
125 | } | 126 | } |
127 | |||
128 | return rtt; | ||
126 | } | 129 | } |
130 | EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); | ||
127 | 131 | ||
128 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); | ||
129 | 132 | ||
130 | /* | 133 | /* |
131 | * Receiver History Routines | 134 | * Receiver History Routines |
132 | */ | 135 | */ |
133 | struct dccp_rx_hist *dccp_rx_hist_new(const char *name) | 136 | static struct kmem_cache *tfrc_rx_hist_slab; |
137 | |||
138 | int __init tfrc_rx_packet_history_init(void) | ||
134 | { | 139 | { |
135 | struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | 140 | tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache", |
136 | static const char dccp_rx_hist_mask[] = "rx_hist_%s"; | 141 | sizeof(struct tfrc_rx_hist_entry), |
137 | char *slab_name; | 142 | 0, SLAB_HWCACHE_ALIGN, NULL); |
138 | 143 | return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; | |
139 | if (hist == NULL) | ||
140 | goto out; | ||
141 | |||
142 | slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, | ||
143 | GFP_ATOMIC); | ||
144 | if (slab_name == NULL) | ||
145 | goto out_free_hist; | ||
146 | |||
147 | sprintf(slab_name, dccp_rx_hist_mask, name); | ||
148 | hist->dccprxh_slab = kmem_cache_create(slab_name, | ||
149 | sizeof(struct dccp_rx_hist_entry), | ||
150 | 0, SLAB_HWCACHE_ALIGN, | ||
151 | NULL); | ||
152 | if (hist->dccprxh_slab == NULL) | ||
153 | goto out_free_slab_name; | ||
154 | out: | ||
155 | return hist; | ||
156 | out_free_slab_name: | ||
157 | kfree(slab_name); | ||
158 | out_free_hist: | ||
159 | kfree(hist); | ||
160 | hist = NULL; | ||
161 | goto out; | ||
162 | } | 144 | } |
163 | 145 | ||
164 | EXPORT_SYMBOL_GPL(dccp_rx_hist_new); | 146 | void tfrc_rx_packet_history_exit(void) |
147 | { | ||
148 | if (tfrc_rx_hist_slab != NULL) { | ||
149 | kmem_cache_destroy(tfrc_rx_hist_slab); | ||
150 | tfrc_rx_hist_slab = NULL; | ||
151 | } | ||
152 | } | ||
165 | 153 | ||
166 | void dccp_rx_hist_delete(struct dccp_rx_hist *hist) | 154 | static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry, |
155 | const struct sk_buff *skb, | ||
156 | const u32 ndp) | ||
167 | { | 157 | { |
168 | const char* name = kmem_cache_name(hist->dccprxh_slab); | 158 | const struct dccp_hdr *dh = dccp_hdr(skb); |
169 | 159 | ||
170 | kmem_cache_destroy(hist->dccprxh_slab); | 160 | entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
171 | kfree(name); | 161 | entry->tfrchrx_ccval = dh->dccph_ccval; |
172 | kfree(hist); | 162 | entry->tfrchrx_type = dh->dccph_type; |
163 | entry->tfrchrx_ndp = ndp; | ||
164 | entry->tfrchrx_tstamp = ktime_get_real(); | ||
173 | } | 165 | } |
174 | 166 | ||
175 | EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); | 167 | void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, |
168 | const struct sk_buff *skb, | ||
169 | const u32 ndp) | ||
170 | { | ||
171 | struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h); | ||
172 | |||
173 | tfrc_rx_hist_entry_from_skb(entry, skb, ndp); | ||
174 | } | ||
175 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet); | ||
176 | 176 | ||
177 | int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, | 177 | /* has the packet contained in skb been seen before? */ |
178 | u8 *ccval) | 178 | int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) |
179 | { | 179 | { |
180 | struct dccp_rx_hist_entry *packet = NULL, *entry; | 180 | const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq; |
181 | int i; | ||
181 | 182 | ||
182 | list_for_each_entry(entry, list, dccphrx_node) | 183 | if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0) |
183 | if (entry->dccphrx_seqno == seq) { | 184 | return 1; |
184 | packet = entry; | ||
185 | break; | ||
186 | } | ||
187 | 185 | ||
188 | if (packet) | 186 | for (i = 1; i <= h->loss_count; i++) |
189 | *ccval = packet->dccphrx_ccval; | 187 | if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq) |
188 | return 1; | ||
190 | 189 | ||
191 | return packet != NULL; | 190 | return 0; |
192 | } | 191 | } |
192 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); | ||
193 | 193 | ||
194 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); | 194 | static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) |
195 | struct dccp_rx_hist_entry * | ||
196 | dccp_rx_hist_find_data_packet(const struct list_head *list) | ||
197 | { | 195 | { |
198 | struct dccp_rx_hist_entry *entry, *packet = NULL; | 196 | const u8 idx_a = tfrc_rx_hist_index(h, a), |
199 | 197 | idx_b = tfrc_rx_hist_index(h, b); | |
200 | list_for_each_entry(entry, list, dccphrx_node) | 198 | struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; |
201 | if (entry->dccphrx_type == DCCP_PKT_DATA || | ||
202 | entry->dccphrx_type == DCCP_PKT_DATAACK) { | ||
203 | packet = entry; | ||
204 | break; | ||
205 | } | ||
206 | 199 | ||
207 | return packet; | 200 | h->ring[idx_a] = h->ring[idx_b]; |
201 | h->ring[idx_b] = tmp; | ||
208 | } | 202 | } |
209 | 203 | ||
210 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); | 204 | /* |
205 | * Private helper functions for loss detection. | ||
206 | * | ||
207 | * In the descriptions, `Si' refers to the sequence number of entry number i, | ||
208 | * whose NDP count is `Ni' (lower case is used for variables). | ||
209 | * Note: All __after_loss functions expect that a test against duplicates has | ||
210 | * been performed already: the seqno of the skb must not be less than the | ||
211 | * seqno of loss_prev; and it must not equal that of any valid hist_entry. | ||
212 | */ | ||
213 | static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) | ||
214 | { | ||
215 | u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, | ||
216 | s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, | ||
217 | s2 = DCCP_SKB_CB(skb)->dccpd_seq; | ||
218 | int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, | ||
219 | d12 = dccp_delta_seqno(s1, s2), d2; | ||
220 | |||
221 | if (d12 > 0) { /* S1 < S2 */ | ||
222 | h->loss_count = 2; | ||
223 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2); | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | /* S0 < S2 < S1 */ | ||
228 | d2 = dccp_delta_seqno(s0, s2); | ||
211 | 229 | ||
212 | void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | 230 | if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */ |
213 | struct list_head *rx_list, | 231 | int d21 = -d12; |
214 | struct list_head *li_list, | 232 | |
215 | struct dccp_rx_hist_entry *packet, | 233 | if (d21 == 1 || n1 >= d21) { |
216 | u64 nonloss_seqno) | 234 | /* hole is filled: S0, S2, and S1 are consecutive */ |
235 | h->loss_count = 0; | ||
236 | h->loss_start = tfrc_rx_hist_index(h, 1); | ||
237 | } else | ||
238 | /* gap between S2 and S1: just update loss_prev */ | ||
239 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); | ||
240 | |||
241 | } else { /* hole between S0 and S2 */ | ||
242 | /* | ||
243 | * Reorder history to insert S2 between S0 and s1 | ||
244 | */ | ||
245 | tfrc_rx_hist_swap(h, 0, 3); | ||
246 | h->loss_start = tfrc_rx_hist_index(h, 3); | ||
247 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2); | ||
248 | h->loss_count = 2; | ||
249 | } | ||
250 | } | ||
251 | |||
252 | /* return 1 if a new loss event has been identified */ | ||
253 | static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) | ||
217 | { | 254 | { |
218 | struct dccp_rx_hist_entry *entry, *next; | 255 | u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, |
219 | u8 num_later = 0; | 256 | s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno, |
220 | 257 | s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno, | |
221 | list_add(&packet->dccphrx_node, rx_list); | 258 | s3 = DCCP_SKB_CB(skb)->dccpd_seq; |
222 | 259 | int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp, | |
223 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | 260 | d23 = dccp_delta_seqno(s2, s3), d13, d3, d31; |
224 | 261 | ||
225 | if (!list_empty(li_list)) { | 262 | if (d23 > 0) { /* S2 < S3 */ |
226 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | 263 | h->loss_count = 3; |
227 | if (num_later == 0) { | 264 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3); |
228 | if (after48(nonloss_seqno, | 265 | return 1; |
229 | entry->dccphrx_seqno)) { | 266 | } |
230 | list_del_init(&entry->dccphrx_node); | 267 | |
231 | dccp_rx_hist_entry_delete(hist, entry); | 268 | /* S3 < S2 */ |
232 | } | 269 | d13 = dccp_delta_seqno(s1, s3); |
233 | } else if (dccp_rx_hist_entry_data_packet(entry)) | 270 | |
234 | --num_later; | 271 | if (d13 > 0) { |
235 | } | ||
236 | } else { | ||
237 | int step = 0; | ||
238 | u8 win_count = 0; /* Not needed, but lets shut up gcc */ | ||
239 | int tmp; | ||
240 | /* | 272 | /* |
241 | * We have no loss interval history so we need at least one | 273 | * The sequence number order is S1, S3, S2 |
242 | * rtt:s of data packets to approximate rtt. | 274 | * Reorder history to insert entry between S1 and S2 |
243 | */ | 275 | */ |
244 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | 276 | tfrc_rx_hist_swap(h, 2, 3); |
245 | if (num_later == 0) { | 277 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3); |
246 | switch (step) { | 278 | h->loss_count = 3; |
247 | case 0: | 279 | return 1; |
248 | step = 1; | 280 | } |
249 | /* OK, find next data packet */ | 281 | |
250 | num_later = 1; | 282 | /* S0 < S3 < S1 */ |
251 | break; | 283 | d31 = -d13; |
252 | case 1: | 284 | d3 = dccp_delta_seqno(s0, s3); |
253 | step = 2; | 285 | |
254 | /* OK, find next data packet */ | 286 | if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */ |
255 | num_later = 1; | 287 | |
256 | win_count = entry->dccphrx_ccval; | 288 | if (d31 == 1 || n1 >= d31) { |
257 | break; | 289 | /* hole between S0 and S1 filled by S3 */ |
258 | case 2: | 290 | int d2 = dccp_delta_seqno(s1, s2), |
259 | tmp = win_count - entry->dccphrx_ccval; | 291 | n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp; |
260 | if (tmp < 0) | 292 | |
261 | tmp += TFRC_WIN_COUNT_LIMIT; | 293 | if (d2 == 1 || n2 >= d2) { |
262 | if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { | 294 | /* entire hole filled by S0, S3, S1, S2 */ |
263 | /* | 295 | h->loss_start = tfrc_rx_hist_index(h, 2); |
264 | * We have found a packet older | 296 | h->loss_count = 0; |
265 | * than one rtt remove the rest | 297 | } else { |
266 | */ | 298 | /* gap remains between S1 and S2 */ |
267 | step = 3; | 299 | h->loss_start = tfrc_rx_hist_index(h, 1); |
268 | } else /* OK, find next data packet */ | 300 | h->loss_count = 1; |
269 | num_later = 1; | 301 | } |
270 | break; | 302 | |
271 | case 3: | 303 | } else /* gap exists between S3 and S1, loss_count stays at 2 */ |
272 | list_del_init(&entry->dccphrx_node); | 304 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3); |
273 | dccp_rx_hist_entry_delete(hist, entry); | 305 | |
274 | break; | 306 | return 0; |
275 | } | 307 | } |
276 | } else if (dccp_rx_hist_entry_data_packet(entry)) | 308 | |
277 | --num_later; | 309 | /* |
310 | * The remaining case: S3 is not a successor of S0. | ||
311 | * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1 | ||
312 | */ | ||
313 | tfrc_rx_hist_swap(h, 0, 3); | ||
314 | h->loss_start = tfrc_rx_hist_index(h, 3); | ||
315 | tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3); | ||
316 | h->loss_count = 3; | ||
317 | |||
318 | return 1; | ||
319 | } | ||
320 | |||
321 | /* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */ | ||
322 | static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2) | ||
323 | { | ||
324 | DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count); | ||
325 | |||
326 | return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno, | ||
327 | tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno); | ||
328 | } | ||
329 | |||
330 | /* recycle RX history records to continue loss detection if necessary */ | ||
331 | static void __three_after_loss(struct tfrc_rx_hist *h) | ||
332 | { | ||
333 | /* | ||
334 | * The distance between S0 and S1 is always greater than 1 and the NDP | ||
335 | * count of S1 is smaller than this distance. Otherwise there would | ||
336 | * have been no loss. Hence it is only necessary to see whether there | ||
337 | * are further missing data packets between S1/S2 and S2/S3. | ||
338 | */ | ||
339 | int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2), | ||
340 | d3 = tfrc_rx_hist_delta_seqno(h, 2, 3), | ||
341 | n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp, | ||
342 | n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp; | ||
343 | |||
344 | if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */ | ||
345 | |||
346 | if (d3 == 1 || n3 >= d3) { | ||
347 | /* S3 is successor of S2: entire hole is filled */ | ||
348 | h->loss_start = tfrc_rx_hist_index(h, 3); | ||
349 | h->loss_count = 0; | ||
350 | } else { | ||
351 | /* gap between S2 and S3 */ | ||
352 | h->loss_start = tfrc_rx_hist_index(h, 2); | ||
353 | h->loss_count = 1; | ||
278 | } | 354 | } |
355 | |||
356 | } else { /* gap between S1 and S2 */ | ||
357 | h->loss_start = tfrc_rx_hist_index(h, 1); | ||
358 | h->loss_count = 2; | ||
279 | } | 359 | } |
280 | } | 360 | } |
281 | 361 | ||
282 | EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); | 362 | /** |
363 | * tfrc_rx_handle_loss - Loss detection and further processing | ||
364 | * @h: The non-empty RX history object | ||
365 | * @lh: Loss Intervals database to update | ||
366 | * @skb: Currently received packet | ||
367 | * @ndp: The NDP count belonging to @skb | ||
368 | * @calc_first_li: Caller-dependent computation of first loss interval in @lh | ||
369 | * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) | ||
370 | * Chooses action according to pending loss, updates LI database when a new | ||
371 | * loss was detected, and does required post-processing. Returns 1 when caller | ||
372 | * should send feedback, 0 otherwise. | ||
373 | */ | ||
374 | int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, | ||
375 | struct tfrc_loss_hist *lh, | ||
376 | struct sk_buff *skb, u32 ndp, | ||
377 | u32 (*calc_first_li)(struct sock *), struct sock *sk) | ||
378 | { | ||
379 | int is_new_loss = 0; | ||
283 | 380 | ||
284 | void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) | 381 | if (h->loss_count == 1) { |
382 | __one_after_loss(h, skb, ndp); | ||
383 | } else if (h->loss_count != 2) { | ||
384 | DCCP_BUG("invalid loss_count %d", h->loss_count); | ||
385 | } else if (__two_after_loss(h, skb, ndp)) { | ||
386 | /* | ||
387 | * Update Loss Interval database and recycle RX records | ||
388 | */ | ||
389 | is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); | ||
390 | __three_after_loss(h); | ||
391 | } | ||
392 | return is_new_loss; | ||
393 | } | ||
394 | EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); | ||
395 | |||
396 | int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) | ||
285 | { | 397 | { |
286 | struct dccp_rx_hist_entry *entry, *next; | 398 | int i; |
399 | |||
400 | for (i = 0; i <= TFRC_NDUPACK; i++) { | ||
401 | h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); | ||
402 | if (h->ring[i] == NULL) | ||
403 | goto out_free; | ||
404 | } | ||
405 | |||
406 | h->loss_count = h->loss_start = 0; | ||
407 | return 0; | ||
287 | 408 | ||
288 | list_for_each_entry_safe(entry, next, list, dccphrx_node) { | 409 | out_free: |
289 | list_del_init(&entry->dccphrx_node); | 410 | while (i-- != 0) { |
290 | kmem_cache_free(hist->dccprxh_slab, entry); | 411 | kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); |
412 | h->ring[i] = NULL; | ||
291 | } | 413 | } |
414 | return -ENOBUFS; | ||
292 | } | 415 | } |
416 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); | ||
417 | |||
418 | void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) | ||
419 | { | ||
420 | int i; | ||
293 | 421 | ||
294 | EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); | 422 | for (i = 0; i <= TFRC_NDUPACK; ++i) |
423 | if (h->ring[i] != NULL) { | ||
424 | kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); | ||
425 | h->ring[i] = NULL; | ||
426 | } | ||
427 | } | ||
428 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); | ||
295 | 429 | ||
430 | /** | ||
431 | * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against | ||
432 | */ | ||
433 | static inline struct tfrc_rx_hist_entry * | ||
434 | tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) | ||
435 | { | ||
436 | return h->ring[0]; | ||
437 | } | ||
296 | 438 | ||
297 | MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " | 439 | /** |
298 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | 440 | * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry |
299 | MODULE_DESCRIPTION("DCCP TFRC library"); | 441 | */ |
300 | MODULE_LICENSE("GPL"); | 442 | static inline struct tfrc_rx_hist_entry * |
443 | tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) | ||
444 | { | ||
445 | return h->ring[h->rtt_sample_prev]; | ||
446 | } | ||
447 | |||
448 | /** | ||
449 | * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal | ||
450 | * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able | ||
451 | * to compute a sample with given data - calling function should check this. | ||
452 | */ | ||
453 | u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) | ||
454 | { | ||
455 | u32 sample = 0, | ||
456 | delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, | ||
457 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); | ||
458 | |||
459 | if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ | ||
460 | if (h->rtt_sample_prev == 2) { /* previous candidate stored */ | ||
461 | sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, | ||
462 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); | ||
463 | if (sample) | ||
464 | sample = 4 / sample * | ||
465 | ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, | ||
466 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); | ||
467 | else /* | ||
468 | * FIXME: This condition is in principle not | ||
469 | * possible but occurs when CCID is used for | ||
470 | * two-way data traffic. I have tried to trace | ||
471 | * it, but the cause does not seem to be here. | ||
472 | */ | ||
473 | DCCP_BUG("please report to dccp@vger.kernel.org" | ||
474 | " => prev = %u, last = %u", | ||
475 | tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, | ||
476 | tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); | ||
477 | } else if (delta_v < 1) { | ||
478 | h->rtt_sample_prev = 1; | ||
479 | goto keep_ref_for_next_time; | ||
480 | } | ||
481 | |||
482 | } else if (delta_v == 4) /* optimal match */ | ||
483 | sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); | ||
484 | else { /* suboptimal match */ | ||
485 | h->rtt_sample_prev = 2; | ||
486 | goto keep_ref_for_next_time; | ||
487 | } | ||
488 | |||
489 | if (unlikely(sample > DCCP_SANE_RTT_MAX)) { | ||
490 | DCCP_WARN("RTT sample %u too large, using max\n", sample); | ||
491 | sample = DCCP_SANE_RTT_MAX; | ||
492 | } | ||
493 | |||
494 | h->rtt_sample_prev = 0; /* use current entry as next reference */ | ||
495 | keep_ref_for_next_time: | ||
496 | |||
497 | return sample; | ||
498 | } | ||
499 | EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); | ||
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 032bb61c6e39..c7eeda49cb20 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -1,10 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/packet_history.h | 2 | * Packet RX/TX history data structures and routines for TFRC-based protocols. |
3 | * | 3 | * |
4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
4 | * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. | 5 | * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. |
5 | * | 6 | * |
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | 7 | * This code has been developed by the University of Waikato WAND |
9 | * research group. For further information please see http://www.wand.net.nz/ | 8 | * research group. For further information please see http://www.wand.net.nz/ |
10 | * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz | 9 | * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz |
@@ -37,165 +36,128 @@ | |||
37 | #ifndef _DCCP_PKT_HIST_ | 36 | #ifndef _DCCP_PKT_HIST_ |
38 | #define _DCCP_PKT_HIST_ | 37 | #define _DCCP_PKT_HIST_ |
39 | 38 | ||
40 | #include <linux/ktime.h> | ||
41 | #include <linux/list.h> | 39 | #include <linux/list.h> |
42 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include "tfrc.h" | ||
43 | 42 | ||
44 | #include "../../dccp.h" | 43 | struct tfrc_tx_hist_entry; |
45 | 44 | ||
46 | /* Number of later packets received before one is considered lost */ | 45 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); |
47 | #define TFRC_RECV_NUM_LATE_LOSS 3 | 46 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); |
47 | extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, | ||
48 | const u64 seqno, const ktime_t now); | ||
48 | 49 | ||
49 | #define TFRC_WIN_COUNT_PER_RTT 4 | 50 | /* Subtraction a-b modulo-16, respects circular wrap-around */ |
50 | #define TFRC_WIN_COUNT_LIMIT 16 | 51 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) |
51 | 52 | ||
52 | /* | 53 | /* Number of packets to wait after a missing packet (RFC 4342, 6.1) */ |
53 | * Transmitter History data structures and declarations | 54 | #define TFRC_NDUPACK 3 |
55 | |||
56 | /** | ||
57 | * tfrc_rx_hist_entry - Store information about a single received packet | ||
58 | * @tfrchrx_seqno: DCCP packet sequence number | ||
59 | * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1) | ||
60 | * @tfrchrx_ndp: the NDP count (if any) of the packet | ||
61 | * @tfrchrx_tstamp: actual receive time of packet | ||
54 | */ | 62 | */ |
55 | struct dccp_tx_hist_entry { | 63 | struct tfrc_rx_hist_entry { |
56 | struct list_head dccphtx_node; | 64 | u64 tfrchrx_seqno:48, |
57 | u64 dccphtx_seqno:48, | 65 | tfrchrx_ccval:4, |
58 | dccphtx_sent:1; | 66 | tfrchrx_type:4; |
59 | u32 dccphtx_rtt; | 67 | u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */ |
60 | ktime_t dccphtx_tstamp; | 68 | ktime_t tfrchrx_tstamp; |
61 | }; | 69 | }; |
62 | 70 | ||
63 | struct dccp_tx_hist { | 71 | /** |
64 | struct kmem_cache *dccptxh_slab; | 72 | * tfrc_rx_hist - RX history structure for TFRC-based protocols |
73 | * | ||
74 | * @ring: Packet history for RTT sampling and loss detection | ||
75 | * @loss_count: Number of entries in circular history | ||
76 | * @loss_start: Movable index (for loss detection) | ||
77 | * @rtt_sample_prev: Used during RTT sampling, points to candidate entry | ||
78 | */ | ||
79 | struct tfrc_rx_hist { | ||
80 | struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; | ||
81 | u8 loss_count:2, | ||
82 | loss_start:2; | ||
83 | #define rtt_sample_prev loss_start | ||
65 | }; | 84 | }; |
66 | 85 | ||
67 | extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); | 86 | /** |
68 | extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); | 87 | * tfrc_rx_hist_index - index to reach n-th entry after loss_start |
69 | 88 | */ | |
70 | static inline struct dccp_tx_hist_entry * | 89 | static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) |
71 | dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, | ||
72 | const gfp_t prio) | ||
73 | { | 90 | { |
74 | struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, | 91 | return (h->loss_start + n) & TFRC_NDUPACK; |
75 | prio); | ||
76 | |||
77 | if (entry != NULL) | ||
78 | entry->dccphtx_sent = 0; | ||
79 | |||
80 | return entry; | ||
81 | } | 92 | } |
82 | 93 | ||
83 | static inline struct dccp_tx_hist_entry * | 94 | /** |
84 | dccp_tx_hist_head(struct list_head *list) | 95 | * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far |
96 | */ | ||
97 | static inline struct tfrc_rx_hist_entry * | ||
98 | tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) | ||
85 | { | 99 | { |
86 | struct dccp_tx_hist_entry *head = NULL; | 100 | return h->ring[tfrc_rx_hist_index(h, h->loss_count)]; |
87 | |||
88 | if (!list_empty(list)) | ||
89 | head = list_entry(list->next, struct dccp_tx_hist_entry, | ||
90 | dccphtx_node); | ||
91 | return head; | ||
92 | } | 101 | } |
93 | 102 | ||
94 | extern struct dccp_tx_hist_entry * | 103 | /** |
95 | dccp_tx_hist_find_entry(const struct list_head *list, | 104 | * tfrc_rx_hist_entry - return the n-th history entry after loss_start |
96 | const u64 seq); | 105 | */ |
97 | 106 | static inline struct tfrc_rx_hist_entry * | |
98 | static inline void dccp_tx_hist_add_entry(struct list_head *list, | 107 | tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) |
99 | struct dccp_tx_hist_entry *entry) | ||
100 | { | 108 | { |
101 | list_add(&entry->dccphtx_node, list); | 109 | return h->ring[tfrc_rx_hist_index(h, n)]; |
102 | } | 110 | } |
103 | 111 | ||
104 | static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, | 112 | /** |
105 | struct dccp_tx_hist_entry *entry) | 113 | * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected |
114 | */ | ||
115 | static inline struct tfrc_rx_hist_entry * | ||
116 | tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) | ||
106 | { | 117 | { |
107 | if (entry != NULL) | 118 | return h->ring[h->loss_start]; |
108 | kmem_cache_free(hist->dccptxh_slab, entry); | ||
109 | } | 119 | } |
110 | 120 | ||
111 | extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, | 121 | /* initialise loss detection and disable RTT sampling */ |
112 | struct list_head *list); | 122 | static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h) |
113 | |||
114 | extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | ||
115 | struct list_head *list, | ||
116 | struct dccp_tx_hist_entry *next); | ||
117 | |||
118 | /* | ||
119 | * Receiver History data structures and declarations | ||
120 | */ | ||
121 | struct dccp_rx_hist_entry { | ||
122 | struct list_head dccphrx_node; | ||
123 | u64 dccphrx_seqno:48, | ||
124 | dccphrx_ccval:4, | ||
125 | dccphrx_type:4; | ||
126 | u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ | ||
127 | ktime_t dccphrx_tstamp; | ||
128 | }; | ||
129 | |||
130 | struct dccp_rx_hist { | ||
131 | struct kmem_cache *dccprxh_slab; | ||
132 | }; | ||
133 | |||
134 | extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); | ||
135 | extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); | ||
136 | |||
137 | static inline struct dccp_rx_hist_entry * | ||
138 | dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, | ||
139 | const u32 ndp, | ||
140 | const struct sk_buff *skb, | ||
141 | const gfp_t prio) | ||
142 | { | 123 | { |
143 | struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, | 124 | h->loss_count = 1; |
144 | prio); | ||
145 | |||
146 | if (entry != NULL) { | ||
147 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
148 | |||
149 | entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
150 | entry->dccphrx_ccval = dh->dccph_ccval; | ||
151 | entry->dccphrx_type = dh->dccph_type; | ||
152 | entry->dccphrx_ndp = ndp; | ||
153 | entry->dccphrx_tstamp = ktime_get_real(); | ||
154 | } | ||
155 | |||
156 | return entry; | ||
157 | } | 125 | } |
158 | 126 | ||
159 | static inline struct dccp_rx_hist_entry * | 127 | /* indicate whether previously a packet was detected missing */ |
160 | dccp_rx_hist_head(struct list_head *list) | 128 | static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) |
161 | { | 129 | { |
162 | struct dccp_rx_hist_entry *head = NULL; | 130 | return h->loss_count; |
163 | |||
164 | if (!list_empty(list)) | ||
165 | head = list_entry(list->next, struct dccp_rx_hist_entry, | ||
166 | dccphrx_node); | ||
167 | return head; | ||
168 | } | 131 | } |
169 | 132 | ||
170 | extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, | 133 | /* any data packets missing between last reception and skb ? */ |
171 | u8 *ccval); | 134 | static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, |
172 | extern struct dccp_rx_hist_entry * | 135 | const struct sk_buff *skb, |
173 | dccp_rx_hist_find_data_packet(const struct list_head *list); | 136 | u32 ndp) |
174 | |||
175 | extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | ||
176 | struct list_head *rx_list, | ||
177 | struct list_head *li_list, | ||
178 | struct dccp_rx_hist_entry *packet, | ||
179 | u64 nonloss_seqno); | ||
180 | |||
181 | static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, | ||
182 | struct dccp_rx_hist_entry *entry) | ||
183 | { | 137 | { |
184 | if (entry != NULL) | 138 | int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno, |
185 | kmem_cache_free(hist->dccprxh_slab, entry); | 139 | DCCP_SKB_CB(skb)->dccpd_seq); |
186 | } | ||
187 | 140 | ||
188 | extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, | 141 | if (delta > 1 && ndp < delta) |
189 | struct list_head *list); | 142 | tfrc_rx_hist_loss_indicated(h); |
190 | 143 | ||
191 | static inline int | 144 | return tfrc_rx_hist_loss_pending(h); |
192 | dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) | ||
193 | { | ||
194 | return entry->dccphrx_type == DCCP_PKT_DATA || | ||
195 | entry->dccphrx_type == DCCP_PKT_DATAACK; | ||
196 | } | 145 | } |
197 | 146 | ||
198 | extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | 147 | extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, |
199 | struct list_head *li_list, u8 *win_loss); | 148 | const struct sk_buff *skb, const u32 ndp); |
149 | |||
150 | extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); | ||
151 | |||
152 | struct tfrc_loss_hist; | ||
153 | extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, | ||
154 | struct tfrc_loss_hist *lh, | ||
155 | struct sk_buff *skb, u32 ndp, | ||
156 | u32 (*first_li)(struct sock *sk), | ||
157 | struct sock *sk); | ||
158 | extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, | ||
159 | const struct sk_buff *skb); | ||
160 | extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); | ||
161 | extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); | ||
200 | 162 | ||
201 | #endif /* _DCCP_PKT_HIST_ */ | 163 | #endif /* _DCCP_PKT_HIST_ */ |
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c new file mode 100644 index 000000000000..d1dfbb8de64c --- /dev/null +++ b/net/dccp/ccids/lib/tfrc.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * TFRC: main module holding the pieces of the TFRC library together | ||
3 | * | ||
4 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK | ||
5 | * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> | ||
6 | */ | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/moduleparam.h> | ||
9 | #include "tfrc.h" | ||
10 | |||
11 | #ifdef CONFIG_IP_DCCP_TFRC_DEBUG | ||
12 | int tfrc_debug; | ||
13 | module_param(tfrc_debug, bool, 0444); | ||
14 | MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); | ||
15 | #endif | ||
16 | |||
17 | extern int tfrc_tx_packet_history_init(void); | ||
18 | extern void tfrc_tx_packet_history_exit(void); | ||
19 | extern int tfrc_rx_packet_history_init(void); | ||
20 | extern void tfrc_rx_packet_history_exit(void); | ||
21 | |||
22 | extern int tfrc_li_init(void); | ||
23 | extern void tfrc_li_exit(void); | ||
24 | |||
25 | static int __init tfrc_module_init(void) | ||
26 | { | ||
27 | int rc = tfrc_li_init(); | ||
28 | |||
29 | if (rc) | ||
30 | goto out; | ||
31 | |||
32 | rc = tfrc_tx_packet_history_init(); | ||
33 | if (rc) | ||
34 | goto out_free_loss_intervals; | ||
35 | |||
36 | rc = tfrc_rx_packet_history_init(); | ||
37 | if (rc) | ||
38 | goto out_free_tx_history; | ||
39 | return 0; | ||
40 | |||
41 | out_free_tx_history: | ||
42 | tfrc_tx_packet_history_exit(); | ||
43 | out_free_loss_intervals: | ||
44 | tfrc_li_exit(); | ||
45 | out: | ||
46 | return rc; | ||
47 | } | ||
48 | |||
49 | static void __exit tfrc_module_exit(void) | ||
50 | { | ||
51 | tfrc_rx_packet_history_exit(); | ||
52 | tfrc_tx_packet_history_exit(); | ||
53 | tfrc_li_exit(); | ||
54 | } | ||
55 | |||
56 | module_init(tfrc_module_init); | ||
57 | module_exit(tfrc_module_exit); | ||
58 | |||
59 | MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, " | ||
60 | "Ian McDonald <ian.mcdonald@jandi.co.nz>, " | ||
61 | "Arnaldo Carvalho de Melo <acme@redhat.com>"); | ||
62 | MODULE_DESCRIPTION("DCCP TFRC library"); | ||
63 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index faf5f7e219e3..1fb1187bbf1c 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -3,10 +3,11 @@ | |||
3 | /* | 3 | /* |
4 | * net/dccp/ccids/lib/tfrc.h | 4 | * net/dccp/ccids/lib/tfrc.h |
5 | * | 5 | * |
6 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | 6 | * Copyright (c) 2007 The University of Aberdeen, Scotland, UK |
7 | * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> | 7 | * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. |
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | 8 | * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> |
9 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | 9 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> |
10 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
10 | * | 11 | * |
11 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 13 | * it under the terms of the GNU General Public License as published by |
@@ -15,6 +16,17 @@ | |||
15 | */ | 16 | */ |
16 | #include <linux/types.h> | 17 | #include <linux/types.h> |
17 | #include <asm/div64.h> | 18 | #include <asm/div64.h> |
19 | #include "../../dccp.h" | ||
20 | /* internal includes that this module exports: */ | ||
21 | #include "loss_interval.h" | ||
22 | #include "packet_history.h" | ||
23 | |||
24 | #ifdef CONFIG_IP_DCCP_TFRC_DEBUG | ||
25 | extern int tfrc_debug; | ||
26 | #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) | ||
27 | #else | ||
28 | #define tfrc_pr_debug(format, a...) | ||
29 | #endif | ||
18 | 30 | ||
19 | /* integer-arithmetic divisions of type (a * 1000000)/b */ | 31 | /* integer-arithmetic divisions of type (a * 1000000)/b */ |
20 | static inline u64 scaled_div(u64 a, u32 b) | 32 | static inline u64 scaled_div(u64 a, u32 b) |
@@ -37,6 +49,15 @@ static inline u32 scaled_div32(u64 a, u32 b) | |||
37 | return result; | 49 | return result; |
38 | } | 50 | } |
39 | 51 | ||
52 | /** | ||
53 | * tfrc_ewma - Exponentially weighted moving average | ||
54 | * @weight: Weight to be used as damping factor, in units of 1/10 | ||
55 | */ | ||
56 | static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) | ||
57 | { | ||
58 | return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; | ||
59 | } | ||
60 | |||
40 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | 61 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); |
41 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | 62 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); |
42 | 63 | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index ee97950d77d1..ebe59d98721a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -72,11 +72,21 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); | |||
72 | /* RFC 1122, 4.2.3.1 initial RTO value */ | 72 | /* RFC 1122, 4.2.3.1 initial RTO value */ |
73 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) | 73 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) |
74 | 74 | ||
75 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | 75 | /* |
76 | * The maximum back-off value for retransmissions. This is needed for | ||
77 | * - retransmitting client-Requests (sec. 8.1.1), | ||
78 | * - retransmitting Close/CloseReq when closing (sec. 8.3), | ||
79 | * - feature-negotiation retransmission (sec. 6.6.3), | ||
80 | * - Acks in client-PARTOPEN state (sec. 8.1.5). | ||
81 | */ | ||
82 | #define DCCP_RTO_MAX ((unsigned)(64 * HZ)) | ||
76 | 83 | ||
77 | /* bounds for sampled RTT values from packet exchanges (in usec) */ | 84 | /* |
85 | * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4 | ||
86 | */ | ||
78 | #define DCCP_SANE_RTT_MIN 100 | 87 | #define DCCP_SANE_RTT_MIN 100 |
79 | #define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC) | 88 | #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) |
89 | #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) | ||
80 | 90 | ||
81 | /* Maximal interval between probes for local resources. */ | 91 | /* Maximal interval between probes for local resources. */ |
82 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | 92 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) |
@@ -143,12 +153,6 @@ static inline u64 max48(const u64 seq1, const u64 seq2) | |||
143 | return after48(seq1, seq2) ? seq1 : seq2; | 153 | return after48(seq1, seq2) ? seq1 : seq2; |
144 | } | 154 | } |
145 | 155 | ||
146 | /* is seq1 next seqno after seq2 */ | ||
147 | static inline int follows48(const u64 seq1, const u64 seq2) | ||
148 | { | ||
149 | return dccp_delta_seqno(seq2, seq1) == 1; | ||
150 | } | ||
151 | |||
152 | enum { | 156 | enum { |
153 | DCCP_MIB_NUM = 0, | 157 | DCCP_MIB_NUM = 0, |
154 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ | 158 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ |
@@ -334,6 +338,7 @@ struct dccp_skb_cb { | |||
334 | 338 | ||
335 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | 339 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) |
336 | 340 | ||
341 | /* RFC 4340, sec. 7.7 */ | ||
337 | static inline int dccp_non_data_packet(const struct sk_buff *skb) | 342 | static inline int dccp_non_data_packet(const struct sk_buff *skb) |
338 | { | 343 | { |
339 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | 344 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; |
@@ -346,6 +351,17 @@ static inline int dccp_non_data_packet(const struct sk_buff *skb) | |||
346 | type == DCCP_PKT_SYNCACK; | 351 | type == DCCP_PKT_SYNCACK; |
347 | } | 352 | } |
348 | 353 | ||
354 | /* RFC 4340, sec. 7.7 */ | ||
355 | static inline int dccp_data_packet(const struct sk_buff *skb) | ||
356 | { | ||
357 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
358 | |||
359 | return type == DCCP_PKT_DATA || | ||
360 | type == DCCP_PKT_DATAACK || | ||
361 | type == DCCP_PKT_REQUEST || | ||
362 | type == DCCP_PKT_RESPONSE; | ||
363 | } | ||
364 | |||
349 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) | 365 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) |
350 | { | 366 | { |
351 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | 367 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; |
@@ -406,6 +422,7 @@ static inline int dccp_ack_pending(const struct sock *sk) | |||
406 | } | 422 | } |
407 | 423 | ||
408 | extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); | 424 | extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); |
425 | extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); | ||
409 | extern int dccp_insert_option_elapsed_time(struct sock *sk, | 426 | extern int dccp_insert_option_elapsed_time(struct sock *sk, |
410 | struct sk_buff *skb, | 427 | struct sk_buff *skb, |
411 | u32 elapsed_time); | 428 | u32 elapsed_time); |
diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 5ebdd86c1b99..4a4f6ce4498d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c | |||
@@ -4,10 +4,16 @@ | |||
4 | * An implementation of the DCCP protocol | 4 | * An implementation of the DCCP protocol |
5 | * Andrea Bittau <a.bittau@cs.ucl.ac.uk> | 5 | * Andrea Bittau <a.bittau@cs.ucl.ac.uk> |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * ASSUMPTIONS |
8 | * modify it under the terms of the GNU General Public License | 8 | * ----------- |
9 | * as published by the Free Software Foundation; either version | 9 | * o All currently known SP features have 1-byte quantities. If in the future |
10 | * 2 of the License, or (at your option) any later version. | 10 | * extensions of RFCs 4340..42 define features with item lengths larger than |
11 | * one byte, a feature-specific extension of the code will be required. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version | ||
16 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | 17 | */ |
12 | 18 | ||
13 | #include <linux/module.h> | 19 | #include <linux/module.h> |
@@ -24,11 +30,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, | |||
24 | 30 | ||
25 | dccp_feat_debug(type, feature, *val); | 31 | dccp_feat_debug(type, feature, *val); |
26 | 32 | ||
27 | if (!dccp_feat_is_valid_type(type)) { | 33 | if (len > 3) { |
28 | DCCP_WARN("option type %d invalid in negotiation\n", type); | ||
29 | return 1; | ||
30 | } | ||
31 | if (!dccp_feat_is_valid_length(type, feature, len)) { | ||
32 | DCCP_WARN("invalid length %d\n", len); | 34 | DCCP_WARN("invalid length %d\n", len); |
33 | return 1; | 35 | return 1; |
34 | } | 36 | } |
@@ -99,7 +101,6 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) | |||
99 | return 0; | 101 | return 0; |
100 | } | 102 | } |
101 | 103 | ||
102 | /* XXX taking only u8 vals */ | ||
103 | static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) | 104 | static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) |
104 | { | 105 | { |
105 | dccp_feat_debug(type, feat, val); | 106 | dccp_feat_debug(type, feat, val); |
@@ -144,7 +145,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, | |||
144 | /* FIXME sanity check vals */ | 145 | /* FIXME sanity check vals */ |
145 | 146 | ||
146 | /* Are values in any order? XXX Lame "algorithm" here */ | 147 | /* Are values in any order? XXX Lame "algorithm" here */ |
147 | /* XXX assume values are 1 byte */ | ||
148 | for (i = 0; i < slen; i++) { | 148 | for (i = 0; i < slen; i++) { |
149 | for (j = 0; j < rlen; j++) { | 149 | for (j = 0; j < rlen; j++) { |
150 | if (spref[i] == rpref[j]) { | 150 | if (spref[i] == rpref[j]) { |
@@ -179,7 +179,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* need to put result and our preference list */ | 181 | /* need to put result and our preference list */ |
182 | /* XXX assume 1 byte vals */ | ||
183 | rlen = 1 + opt->dccpop_len; | 182 | rlen = 1 + opt->dccpop_len; |
184 | rpref = kmalloc(rlen, GFP_ATOMIC); | 183 | rpref = kmalloc(rlen, GFP_ATOMIC); |
185 | if (rpref == NULL) | 184 | if (rpref == NULL) |
@@ -637,12 +636,12 @@ const char *dccp_feat_name(const u8 feat) | |||
637 | [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", | 636 | [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", |
638 | [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", | 637 | [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", |
639 | }; | 638 | }; |
639 | if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) | ||
640 | return feature_names[DCCPF_RESERVED]; | ||
641 | |||
640 | if (feat >= DCCPF_MIN_CCID_SPECIFIC) | 642 | if (feat >= DCCPF_MIN_CCID_SPECIFIC) |
641 | return "CCID-specific"; | 643 | return "CCID-specific"; |
642 | 644 | ||
643 | if (dccp_feat_is_reserved(feat)) | ||
644 | return feature_names[DCCPF_RESERVED]; | ||
645 | |||
646 | return feature_names[feat]; | 645 | return feature_names[feat]; |
647 | } | 646 | } |
648 | 647 | ||
diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 177f7dee4d10..e272222c7ace 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h | |||
@@ -14,32 +14,6 @@ | |||
14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
15 | #include "dccp.h" | 15 | #include "dccp.h" |
16 | 16 | ||
17 | static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len) | ||
18 | { | ||
19 | /* sec. 6.1: Confirm has at least length 3, | ||
20 | * sec. 6.2: Change has at least length 4 */ | ||
21 | if (len < 3) | ||
22 | return 1; | ||
23 | if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R)) | ||
24 | return 1; | ||
25 | /* XXX: add per-feature length validation (sec. 6.6.8) */ | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | static inline int dccp_feat_is_reserved(const u8 feat) | ||
30 | { | ||
31 | return (feat > DCCPF_DATA_CHECKSUM && | ||
32 | feat < DCCPF_MIN_CCID_SPECIFIC) || | ||
33 | feat == DCCPF_RESERVED; | ||
34 | } | ||
35 | |||
36 | /* feature negotiation knows only these four option types (RFC 4340, sec. 6) */ | ||
37 | static inline int dccp_feat_is_valid_type(const u8 optnum) | ||
38 | { | ||
39 | return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R; | ||
40 | |||
41 | } | ||
42 | |||
43 | #ifdef CONFIG_IP_DCCP_DEBUG | 17 | #ifdef CONFIG_IP_DCCP_DEBUG |
44 | extern const char *dccp_feat_typename(const u8 type); | 18 | extern const char *dccp_feat_typename(const u8 type); |
45 | extern const char *dccp_feat_name(const u8 feat); | 19 | extern const char *dccp_feat_name(const u8 feat); |
diff --git a/net/dccp/input.c b/net/dccp/input.c index 1ce101062824..08392ed86c25 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -22,26 +22,77 @@ | |||
22 | /* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ | 22 | /* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ |
23 | int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; | 23 | int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; |
24 | 24 | ||
25 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) | 25 | static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb) |
26 | { | 26 | { |
27 | sk->sk_shutdown |= RCV_SHUTDOWN; | ||
28 | sock_set_flag(sk, SOCK_DONE); | ||
29 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); | 27 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); |
30 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 28 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
31 | skb_set_owner_r(skb, sk); | 29 | skb_set_owner_r(skb, sk); |
32 | sk->sk_data_ready(sk, 0); | 30 | sk->sk_data_ready(sk, 0); |
33 | } | 31 | } |
34 | 32 | ||
35 | static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | 33 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) |
36 | { | 34 | { |
37 | dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); | 35 | /* |
38 | dccp_fin(sk, skb); | 36 | * On receiving Close/CloseReq, both RD/WR shutdown are performed. |
39 | dccp_set_state(sk, DCCP_CLOSED); | 37 | * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after |
40 | sk_wake_async(sk, 1, POLL_HUP); | 38 | * receiving the closing segment, but there is no guarantee that such |
39 | * data will be processed at all. | ||
40 | */ | ||
41 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
42 | sock_set_flag(sk, SOCK_DONE); | ||
43 | dccp_enqueue_skb(sk, skb); | ||
44 | } | ||
45 | |||
46 | static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | ||
47 | { | ||
48 | int queued = 0; | ||
49 | |||
50 | switch (sk->sk_state) { | ||
51 | /* | ||
52 | * We ignore Close when received in one of the following states: | ||
53 | * - CLOSED (may be a late or duplicate packet) | ||
54 | * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier) | ||
55 | * - RESPOND (already handled by dccp_check_req) | ||
56 | */ | ||
57 | case DCCP_CLOSING: | ||
58 | /* | ||
59 | * Simultaneous-close: receiving a Close after sending one. This | ||
60 | * can happen if both client and server perform active-close and | ||
61 | * will result in an endless ping-pong of crossing and retrans- | ||
62 | * mitted Close packets, which only terminates when one of the | ||
63 | * nodes times out (min. 64 seconds). Quicker convergence can be | ||
64 | * achieved when one of the nodes acts as tie-breaker. | ||
65 | * This is ok as both ends are done with data transfer and each | ||
66 | * end is just waiting for the other to acknowledge termination. | ||
67 | */ | ||
68 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) | ||
69 | break; | ||
70 | /* fall through */ | ||
71 | case DCCP_REQUESTING: | ||
72 | case DCCP_ACTIVE_CLOSEREQ: | ||
73 | dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
74 | dccp_done(sk); | ||
75 | break; | ||
76 | case DCCP_OPEN: | ||
77 | case DCCP_PARTOPEN: | ||
78 | /* Give waiting application a chance to read pending data */ | ||
79 | queued = 1; | ||
80 | dccp_fin(sk, skb); | ||
81 | dccp_set_state(sk, DCCP_PASSIVE_CLOSE); | ||
82 | /* fall through */ | ||
83 | case DCCP_PASSIVE_CLOSE: | ||
84 | /* | ||
85 | * Retransmitted Close: we have already enqueued the first one. | ||
86 | */ | ||
87 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); | ||
88 | } | ||
89 | return queued; | ||
41 | } | 90 | } |
42 | 91 | ||
43 | static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | 92 | static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) |
44 | { | 93 | { |
94 | int queued = 0; | ||
95 | |||
45 | /* | 96 | /* |
46 | * Step 7: Check for unexpected packet types | 97 | * Step 7: Check for unexpected packet types |
47 | * If (S.is_server and P.type == CloseReq) | 98 | * If (S.is_server and P.type == CloseReq) |
@@ -50,12 +101,26 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | |||
50 | */ | 101 | */ |
51 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { | 102 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { |
52 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); | 103 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); |
53 | return; | 104 | return queued; |
54 | } | 105 | } |
55 | 106 | ||
56 | if (sk->sk_state != DCCP_CLOSING) | 107 | /* Step 13: process relevant Client states < CLOSEREQ */ |
108 | switch (sk->sk_state) { | ||
109 | case DCCP_REQUESTING: | ||
110 | dccp_send_close(sk, 0); | ||
57 | dccp_set_state(sk, DCCP_CLOSING); | 111 | dccp_set_state(sk, DCCP_CLOSING); |
58 | dccp_send_close(sk, 0); | 112 | break; |
113 | case DCCP_OPEN: | ||
114 | case DCCP_PARTOPEN: | ||
115 | /* Give waiting application a chance to read pending data */ | ||
116 | queued = 1; | ||
117 | dccp_fin(sk, skb); | ||
118 | dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ); | ||
119 | /* fall through */ | ||
120 | case DCCP_PASSIVE_CLOSEREQ: | ||
121 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); | ||
122 | } | ||
123 | return queued; | ||
59 | } | 124 | } |
60 | 125 | ||
61 | static u8 dccp_reset_code_convert(const u8 code) | 126 | static u8 dccp_reset_code_convert(const u8 code) |
@@ -90,7 +155,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) | |||
90 | dccp_fin(sk, skb); | 155 | dccp_fin(sk, skb); |
91 | 156 | ||
92 | if (err && !sock_flag(sk, SOCK_DEAD)) | 157 | if (err && !sock_flag(sk, SOCK_DEAD)) |
93 | sk_wake_async(sk, 0, POLL_ERR); | 158 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
94 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | 159 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); |
95 | } | 160 | } |
96 | 161 | ||
@@ -103,6 +168,21 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | |||
103 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 168 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
104 | } | 169 | } |
105 | 170 | ||
171 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) | ||
172 | { | ||
173 | const struct dccp_sock *dp = dccp_sk(sk); | ||
174 | |||
175 | /* Don't deliver to RX CCID when node has shut down read end. */ | ||
176 | if (!(sk->sk_shutdown & RCV_SHUTDOWN)) | ||
177 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
178 | /* | ||
179 | * Until the TX queue has been drained, we can not honour SHUT_WR, since | ||
180 | * we need received feedback as input to adjust congestion control. | ||
181 | */ | ||
182 | if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN)) | ||
183 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
184 | } | ||
185 | |||
106 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | 186 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) |
107 | { | 187 | { |
108 | const struct dccp_hdr *dh = dccp_hdr(skb); | 188 | const struct dccp_hdr *dh = dccp_hdr(skb); |
@@ -209,13 +289,11 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
209 | case DCCP_PKT_DATAACK: | 289 | case DCCP_PKT_DATAACK: |
210 | case DCCP_PKT_DATA: | 290 | case DCCP_PKT_DATA: |
211 | /* | 291 | /* |
212 | * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED | 292 | * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when |
213 | * option if it is. | 293 | * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening" |
294 | * - sk_receive_queue is full, use Code 2, "Receive Buffer" | ||
214 | */ | 295 | */ |
215 | __skb_pull(skb, dh->dccph_doff * 4); | 296 | dccp_enqueue_skb(sk, skb); |
216 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
217 | skb_set_owner_r(skb, sk); | ||
218 | sk->sk_data_ready(sk, 0); | ||
219 | return 0; | 297 | return 0; |
220 | case DCCP_PKT_ACK: | 298 | case DCCP_PKT_ACK: |
221 | goto discard; | 299 | goto discard; |
@@ -231,11 +309,13 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
231 | dccp_rcv_reset(sk, skb); | 309 | dccp_rcv_reset(sk, skb); |
232 | return 0; | 310 | return 0; |
233 | case DCCP_PKT_CLOSEREQ: | 311 | case DCCP_PKT_CLOSEREQ: |
234 | dccp_rcv_closereq(sk, skb); | 312 | if (dccp_rcv_closereq(sk, skb)) |
313 | return 0; | ||
235 | goto discard; | 314 | goto discard; |
236 | case DCCP_PKT_CLOSE: | 315 | case DCCP_PKT_CLOSE: |
237 | dccp_rcv_close(sk, skb); | 316 | if (dccp_rcv_close(sk, skb)) |
238 | return 0; | 317 | return 0; |
318 | goto discard; | ||
239 | case DCCP_PKT_REQUEST: | 319 | case DCCP_PKT_REQUEST: |
240 | /* Step 7 | 320 | /* Step 7 |
241 | * or (S.is_server and P.type == Response) | 321 | * or (S.is_server and P.type == Response) |
@@ -289,7 +369,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
289 | if (dccp_check_seqno(sk, skb)) | 369 | if (dccp_check_seqno(sk, skb)) |
290 | goto discard; | 370 | goto discard; |
291 | 371 | ||
292 | if (dccp_parse_options(sk, skb)) | 372 | if (dccp_parse_options(sk, NULL, skb)) |
293 | goto discard; | 373 | goto discard; |
294 | 374 | ||
295 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 375 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
@@ -300,9 +380,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
300 | DCCP_SKB_CB(skb)->dccpd_seq, | 380 | DCCP_SKB_CB(skb)->dccpd_seq, |
301 | DCCP_ACKVEC_STATE_RECEIVED)) | 381 | DCCP_ACKVEC_STATE_RECEIVED)) |
302 | goto discard; | 382 | goto discard; |
303 | 383 | dccp_deliver_input_to_ccids(sk, skb); | |
304 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
305 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
306 | 384 | ||
307 | return __dccp_rcv_established(sk, skb, dh, len); | 385 | return __dccp_rcv_established(sk, skb, dh, len); |
308 | discard: | 386 | discard: |
@@ -349,7 +427,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
349 | goto out_invalid_packet; | 427 | goto out_invalid_packet; |
350 | } | 428 | } |
351 | 429 | ||
352 | if (dccp_parse_options(sk, skb)) | 430 | if (dccp_parse_options(sk, NULL, skb)) |
353 | goto out_invalid_packet; | 431 | goto out_invalid_packet; |
354 | 432 | ||
355 | /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ | 433 | /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ |
@@ -402,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
402 | 480 | ||
403 | if (!sock_flag(sk, SOCK_DEAD)) { | 481 | if (!sock_flag(sk, SOCK_DEAD)) { |
404 | sk->sk_state_change(sk); | 482 | sk->sk_state_change(sk); |
405 | sk_wake_async(sk, 0, POLL_OUT); | 483 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); |
406 | } | 484 | } |
407 | 485 | ||
408 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || | 486 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || |
@@ -531,7 +609,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
531 | /* | 609 | /* |
532 | * Step 8: Process options and mark acknowledgeable | 610 | * Step 8: Process options and mark acknowledgeable |
533 | */ | 611 | */ |
534 | if (dccp_parse_options(sk, skb)) | 612 | if (dccp_parse_options(sk, NULL, skb)) |
535 | goto discard; | 613 | goto discard; |
536 | 614 | ||
537 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 615 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
@@ -543,8 +621,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
543 | DCCP_ACKVEC_STATE_RECEIVED)) | 621 | DCCP_ACKVEC_STATE_RECEIVED)) |
544 | goto discard; | 622 | goto discard; |
545 | 623 | ||
546 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | 624 | dccp_deliver_input_to_ccids(sk, skb); |
547 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
548 | } | 625 | } |
549 | 626 | ||
550 | /* | 627 | /* |
@@ -560,16 +637,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
560 | return 0; | 637 | return 0; |
561 | /* | 638 | /* |
562 | * Step 7: Check for unexpected packet types | 639 | * Step 7: Check for unexpected packet types |
563 | * If (S.is_server and P.type == CloseReq) | 640 | * If (S.is_server and P.type == Response) |
564 | * or (S.is_server and P.type == Response) | ||
565 | * or (S.is_client and P.type == Request) | 641 | * or (S.is_client and P.type == Request) |
566 | * or (S.state == RESPOND and P.type == Data), | 642 | * or (S.state == RESPOND and P.type == Data), |
567 | * Send Sync packet acknowledging P.seqno | 643 | * Send Sync packet acknowledging P.seqno |
568 | * Drop packet and return | 644 | * Drop packet and return |
569 | */ | 645 | */ |
570 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && | 646 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && |
571 | (dh->dccph_type == DCCP_PKT_RESPONSE || | 647 | dh->dccph_type == DCCP_PKT_RESPONSE) || |
572 | dh->dccph_type == DCCP_PKT_CLOSEREQ)) || | ||
573 | (dp->dccps_role == DCCP_ROLE_CLIENT && | 648 | (dp->dccps_role == DCCP_ROLE_CLIENT && |
574 | dh->dccph_type == DCCP_PKT_REQUEST) || | 649 | dh->dccph_type == DCCP_PKT_REQUEST) || |
575 | (sk->sk_state == DCCP_RESPOND && | 650 | (sk->sk_state == DCCP_RESPOND && |
@@ -577,11 +652,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
577 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); | 652 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); |
578 | goto discard; | 653 | goto discard; |
579 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { | 654 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { |
580 | dccp_rcv_closereq(sk, skb); | 655 | if (dccp_rcv_closereq(sk, skb)) |
656 | return 0; | ||
581 | goto discard; | 657 | goto discard; |
582 | } else if (dh->dccph_type == DCCP_PKT_CLOSE) { | 658 | } else if (dh->dccph_type == DCCP_PKT_CLOSE) { |
583 | dccp_rcv_close(sk, skb); | 659 | if (dccp_rcv_close(sk, skb)) |
584 | return 0; | 660 | return 0; |
661 | goto discard; | ||
585 | } | 662 | } |
586 | 663 | ||
587 | switch (sk->sk_state) { | 664 | switch (sk->sk_state) { |
@@ -611,7 +688,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
611 | switch (old_state) { | 688 | switch (old_state) { |
612 | case DCCP_PARTOPEN: | 689 | case DCCP_PARTOPEN: |
613 | sk->sk_state_change(sk); | 690 | sk->sk_state_change(sk); |
614 | sk_wake_async(sk, 0, POLL_OUT); | 691 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); |
615 | break; | 692 | break; |
616 | } | 693 | } |
617 | } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { | 694 | } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index db17b83e8d3e..9e38b0d6195c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
408 | 408 | ||
409 | dccp_sync_mss(newsk, dst_mtu(dst)); | 409 | dccp_sync_mss(newsk, dst_mtu(dst)); |
410 | 410 | ||
411 | __inet_hash(&dccp_hashinfo, newsk, 0); | 411 | __inet_hash_nolisten(&dccp_hashinfo, newsk); |
412 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | 412 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); |
413 | 413 | ||
414 | return newsk; | 414 | return newsk; |
@@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | |||
469 | }; | 469 | }; |
470 | 470 | ||
471 | security_skb_classify_flow(skb, &fl); | 471 | security_skb_classify_flow(skb, &fl); |
472 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | 472 | if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) { |
473 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 473 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
474 | return NULL; | 474 | return NULL; |
475 | } | 475 | } |
@@ -600,11 +600,12 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
600 | if (req == NULL) | 600 | if (req == NULL) |
601 | goto drop; | 601 | goto drop; |
602 | 602 | ||
603 | if (dccp_parse_options(sk, skb)) | ||
604 | goto drop_and_free; | ||
605 | |||
606 | dccp_reqsk_init(req, skb); | 603 | dccp_reqsk_init(req, skb); |
607 | 604 | ||
605 | dreq = dccp_rsk(req); | ||
606 | if (dccp_parse_options(sk, dreq, skb)) | ||
607 | goto drop_and_free; | ||
608 | |||
608 | if (security_inet_conn_request(sk, skb, req)) | 609 | if (security_inet_conn_request(sk, skb, req)) |
609 | goto drop_and_free; | 610 | goto drop_and_free; |
610 | 611 | ||
@@ -621,7 +622,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
621 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | 622 | * In fact we defer setting S.GSR, S.SWL, S.SWH to |
622 | * dccp_create_openreq_child. | 623 | * dccp_create_openreq_child. |
623 | */ | 624 | */ |
624 | dreq = dccp_rsk(req); | ||
625 | dreq->dreq_isr = dcb->dccpd_seq; | 625 | dreq->dreq_isr = dcb->dccpd_seq; |
626 | dreq->dreq_iss = dccp_v4_init_sequence(skb); | 626 | dreq->dreq_iss = dccp_v4_init_sequence(skb); |
627 | dreq->dreq_service = service; | 627 | dreq->dreq_service = service; |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 87c98fb86fa8..f42b75ce7f5c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -415,11 +415,12 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
415 | if (req == NULL) | 415 | if (req == NULL) |
416 | goto drop; | 416 | goto drop; |
417 | 417 | ||
418 | if (dccp_parse_options(sk, skb)) | ||
419 | goto drop_and_free; | ||
420 | |||
421 | dccp_reqsk_init(req, skb); | 418 | dccp_reqsk_init(req, skb); |
422 | 419 | ||
420 | dreq = dccp_rsk(req); | ||
421 | if (dccp_parse_options(sk, dreq, skb)) | ||
422 | goto drop_and_free; | ||
423 | |||
423 | if (security_inet_conn_request(sk, skb, req)) | 424 | if (security_inet_conn_request(sk, skb, req)) |
424 | goto drop_and_free; | 425 | goto drop_and_free; |
425 | 426 | ||
@@ -449,7 +450,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
449 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | 450 | * In fact we defer setting S.GSR, S.SWL, S.SWH to |
450 | * dccp_create_openreq_child. | 451 | * dccp_create_openreq_child. |
451 | */ | 452 | */ |
452 | dreq = dccp_rsk(req); | ||
453 | dreq->dreq_isr = dcb->dccpd_seq; | 453 | dreq->dreq_isr = dcb->dccpd_seq; |
454 | dreq->dreq_iss = dccp_v6_init_sequence(skb); | 454 | dreq->dreq_iss = dccp_v6_init_sequence(skb); |
455 | dreq->dreq_service = service; | 455 | dreq->dreq_service = service; |
@@ -994,7 +994,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
994 | if (final_p) | 994 | if (final_p) |
995 | ipv6_addr_copy(&fl.fl6_dst, final_p); | 995 | ipv6_addr_copy(&fl.fl6_dst, final_p); |
996 | 996 | ||
997 | err = __xfrm_lookup(&dst, &fl, sk, 1); | 997 | err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); |
998 | if (err < 0) { | 998 | if (err < 0) { |
999 | if (err == -EREMOTE) | 999 | if (err == -EREMOTE) |
1000 | err = ip6_dst_blackhole(sk, &dst, &fl); | 1000 | err = ip6_dst_blackhole(sk, &dst, &fl); |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 831b76e08d02..027d1814e1ab 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -117,11 +117,13 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
117 | struct dccp_sock *newdp = dccp_sk(newsk); | 117 | struct dccp_sock *newdp = dccp_sk(newsk); |
118 | struct dccp_minisock *newdmsk = dccp_msk(newsk); | 118 | struct dccp_minisock *newdmsk = dccp_msk(newsk); |
119 | 119 | ||
120 | newdp->dccps_role = DCCP_ROLE_SERVER; | 120 | newdp->dccps_role = DCCP_ROLE_SERVER; |
121 | newdp->dccps_hc_rx_ackvec = NULL; | 121 | newdp->dccps_hc_rx_ackvec = NULL; |
122 | newdp->dccps_service_list = NULL; | 122 | newdp->dccps_service_list = NULL; |
123 | newdp->dccps_service = dreq->dreq_service; | 123 | newdp->dccps_service = dreq->dreq_service; |
124 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | 124 | newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; |
125 | newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; | ||
126 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
125 | 127 | ||
126 | if (dccp_feat_clone(sk, newsk)) | 128 | if (dccp_feat_clone(sk, newsk)) |
127 | goto out_free; | 129 | goto out_free; |
@@ -200,10 +202,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
200 | struct request_sock **prev) | 202 | struct request_sock **prev) |
201 | { | 203 | { |
202 | struct sock *child = NULL; | 204 | struct sock *child = NULL; |
205 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
203 | 206 | ||
204 | /* Check for retransmitted REQUEST */ | 207 | /* Check for retransmitted REQUEST */ |
205 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | 208 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { |
206 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
207 | 209 | ||
208 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { | 210 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { |
209 | dccp_pr_debug("Retransmitted REQUEST\n"); | 211 | dccp_pr_debug("Retransmitted REQUEST\n"); |
@@ -227,22 +229,22 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
227 | goto drop; | 229 | goto drop; |
228 | 230 | ||
229 | /* Invalid ACK */ | 231 | /* Invalid ACK */ |
230 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { | 232 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) { |
231 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " | 233 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " |
232 | "dreq_iss=%llu\n", | 234 | "dreq_iss=%llu\n", |
233 | (unsigned long long) | 235 | (unsigned long long) |
234 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | 236 | DCCP_SKB_CB(skb)->dccpd_ack_seq, |
235 | (unsigned long long) | 237 | (unsigned long long) dreq->dreq_iss); |
236 | dccp_rsk(req)->dreq_iss); | ||
237 | goto drop; | 238 | goto drop; |
238 | } | 239 | } |
239 | 240 | ||
241 | if (dccp_parse_options(sk, dreq, skb)) | ||
242 | goto drop; | ||
243 | |||
240 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | 244 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
241 | if (child == NULL) | 245 | if (child == NULL) |
242 | goto listen_overflow; | 246 | goto listen_overflow; |
243 | 247 | ||
244 | /* FIXME: deal with options */ | ||
245 | |||
246 | inet_csk_reqsk_queue_unlink(sk, req, prev); | 248 | inet_csk_reqsk_queue_unlink(sk, req, prev); |
247 | inet_csk_reqsk_queue_removed(sk, req); | 249 | inet_csk_reqsk_queue_removed(sk, req); |
248 | inet_csk_reqsk_queue_add(sk, req, child); | 250 | inet_csk_reqsk_queue_add(sk, req, child); |
@@ -303,9 +305,12 @@ EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); | |||
303 | 305 | ||
304 | void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) | 306 | void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) |
305 | { | 307 | { |
306 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; | 308 | struct dccp_request_sock *dreq = dccp_rsk(req); |
307 | inet_rsk(req)->acked = 0; | 309 | |
308 | req->rcv_wnd = sysctl_dccp_feat_sequence_window; | 310 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; |
311 | inet_rsk(req)->acked = 0; | ||
312 | req->rcv_wnd = sysctl_dccp_feat_sequence_window; | ||
313 | dreq->dreq_timestamp_echo = 0; | ||
309 | } | 314 | } |
310 | 315 | ||
311 | EXPORT_SYMBOL_GPL(dccp_reqsk_init); | 316 | EXPORT_SYMBOL_GPL(dccp_reqsk_init); |
diff --git a/net/dccp/options.c b/net/dccp/options.c index d286cffe2c49..d2a84a2fecee 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -46,7 +46,13 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) | |||
46 | return value; | 46 | return value; |
47 | } | 47 | } |
48 | 48 | ||
49 | int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | 49 | /** |
50 | * dccp_parse_options - Parse DCCP options present in @skb | ||
51 | * @sk: client|server|listening dccp socket (when @dreq != NULL) | ||
52 | * @dreq: request socket to use during connection setup, or NULL | ||
53 | */ | ||
54 | int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | ||
55 | struct sk_buff *skb) | ||
50 | { | 56 | { |
51 | struct dccp_sock *dp = dccp_sk(sk); | 57 | struct dccp_sock *dp = dccp_sk(sk); |
52 | const struct dccp_hdr *dh = dccp_hdr(skb); | 58 | const struct dccp_hdr *dh = dccp_hdr(skb); |
@@ -92,6 +98,20 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
92 | goto out_invalid_option; | 98 | goto out_invalid_option; |
93 | } | 99 | } |
94 | 100 | ||
101 | /* | ||
102 | * CCID-Specific Options (from RFC 4340, sec. 10.3): | ||
103 | * | ||
104 | * Option numbers 128 through 191 are for options sent from the | ||
105 | * HC-Sender to the HC-Receiver; option numbers 192 through 255 | ||
106 | * are for options sent from the HC-Receiver to the HC-Sender. | ||
107 | * | ||
108 | * CCID-specific options are ignored during connection setup, as | ||
109 | * negotiation may still be in progress (see RFC 4340, 10.3). | ||
110 | * | ||
111 | */ | ||
112 | if (dreq != NULL && opt >= 128) | ||
113 | goto ignore_option; | ||
114 | |||
95 | switch (opt) { | 115 | switch (opt) { |
96 | case DCCPO_PADDING: | 116 | case DCCPO_PADDING: |
97 | break; | 117 | break; |
@@ -112,6 +132,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
112 | case DCCPO_CHANGE_L: | 132 | case DCCPO_CHANGE_L: |
113 | /* fall through */ | 133 | /* fall through */ |
114 | case DCCPO_CHANGE_R: | 134 | case DCCPO_CHANGE_R: |
135 | if (pkt_type == DCCP_PKT_DATA) | ||
136 | break; | ||
115 | if (len < 2) | 137 | if (len < 2) |
116 | goto out_invalid_option; | 138 | goto out_invalid_option; |
117 | rc = dccp_feat_change_recv(sk, opt, *value, value + 1, | 139 | rc = dccp_feat_change_recv(sk, opt, *value, value + 1, |
@@ -128,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
128 | case DCCPO_CONFIRM_L: | 150 | case DCCPO_CONFIRM_L: |
129 | /* fall through */ | 151 | /* fall through */ |
130 | case DCCPO_CONFIRM_R: | 152 | case DCCPO_CONFIRM_R: |
131 | if (len < 2) | 153 | if (pkt_type == DCCP_PKT_DATA) |
154 | break; | ||
155 | if (len < 2) /* FIXME this disallows empty confirm */ | ||
132 | goto out_invalid_option; | 156 | goto out_invalid_option; |
133 | if (dccp_feat_confirm_recv(sk, opt, *value, | 157 | if (dccp_feat_confirm_recv(sk, opt, *value, |
134 | value + 1, len - 1)) | 158 | value + 1, len - 1)) |
@@ -136,7 +160,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
136 | break; | 160 | break; |
137 | case DCCPO_ACK_VECTOR_0: | 161 | case DCCPO_ACK_VECTOR_0: |
138 | case DCCPO_ACK_VECTOR_1: | 162 | case DCCPO_ACK_VECTOR_1: |
139 | if (pkt_type == DCCP_PKT_DATA) | 163 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ |
140 | break; | 164 | break; |
141 | 165 | ||
142 | if (dccp_msk(sk)->dccpms_send_ack_vector && | 166 | if (dccp_msk(sk)->dccpms_send_ack_vector && |
@@ -146,15 +170,27 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
146 | case DCCPO_TIMESTAMP: | 170 | case DCCPO_TIMESTAMP: |
147 | if (len != 4) | 171 | if (len != 4) |
148 | goto out_invalid_option; | 172 | goto out_invalid_option; |
149 | 173 | /* | |
174 | * RFC 4340 13.1: "The precise time corresponding to | ||
175 | * Timestamp Value zero is not specified". We use | ||
176 | * zero to indicate absence of a meaningful timestamp. | ||
177 | */ | ||
150 | opt_val = get_unaligned((__be32 *)value); | 178 | opt_val = get_unaligned((__be32 *)value); |
151 | opt_recv->dccpor_timestamp = ntohl(opt_val); | 179 | if (unlikely(opt_val == 0)) { |
152 | 180 | DCCP_WARN("Timestamp with zero value\n"); | |
153 | dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; | 181 | break; |
154 | dp->dccps_timestamp_time = ktime_get_real(); | 182 | } |
155 | 183 | ||
184 | if (dreq != NULL) { | ||
185 | dreq->dreq_timestamp_echo = ntohl(opt_val); | ||
186 | dreq->dreq_timestamp_time = dccp_timestamp(); | ||
187 | } else { | ||
188 | opt_recv->dccpor_timestamp = | ||
189 | dp->dccps_timestamp_echo = ntohl(opt_val); | ||
190 | dp->dccps_timestamp_time = dccp_timestamp(); | ||
191 | } | ||
156 | dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", | 192 | dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", |
157 | dccp_role(sk), opt_recv->dccpor_timestamp, | 193 | dccp_role(sk), ntohl(opt_val), |
158 | (unsigned long long) | 194 | (unsigned long long) |
159 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 195 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
160 | break; | 196 | break; |
@@ -194,18 +230,17 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
194 | opt_recv->dccpor_elapsed_time = elapsed_time; | 230 | opt_recv->dccpor_elapsed_time = elapsed_time; |
195 | break; | 231 | break; |
196 | case DCCPO_ELAPSED_TIME: | 232 | case DCCPO_ELAPSED_TIME: |
197 | if (len != 2 && len != 4) | 233 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 13.2 */ |
198 | goto out_invalid_option; | 234 | break; |
199 | |||
200 | if (pkt_type == DCCP_PKT_DATA) | ||
201 | continue; | ||
202 | 235 | ||
203 | if (len == 2) { | 236 | if (len == 2) { |
204 | __be16 opt_val2 = get_unaligned((__be16 *)value); | 237 | __be16 opt_val2 = get_unaligned((__be16 *)value); |
205 | elapsed_time = ntohs(opt_val2); | 238 | elapsed_time = ntohs(opt_val2); |
206 | } else { | 239 | } else if (len == 4) { |
207 | opt_val = get_unaligned((__be32 *)value); | 240 | opt_val = get_unaligned((__be32 *)value); |
208 | elapsed_time = ntohl(opt_val); | 241 | elapsed_time = ntohl(opt_val); |
242 | } else { | ||
243 | goto out_invalid_option; | ||
209 | } | 244 | } |
210 | 245 | ||
211 | if (elapsed_time > opt_recv->dccpor_elapsed_time) | 246 | if (elapsed_time > opt_recv->dccpor_elapsed_time) |
@@ -214,15 +249,6 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
214 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", | 249 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", |
215 | dccp_role(sk), elapsed_time); | 250 | dccp_role(sk), elapsed_time); |
216 | break; | 251 | break; |
217 | /* | ||
218 | * From RFC 4340, sec. 10.3: | ||
219 | * | ||
220 | * Option numbers 128 through 191 are for | ||
221 | * options sent from the HC-Sender to the | ||
222 | * HC-Receiver; option numbers 192 through 255 | ||
223 | * are for options sent from the HC-Receiver to | ||
224 | * the HC-Sender. | ||
225 | */ | ||
226 | case 128 ... 191: { | 252 | case 128 ... 191: { |
227 | const u16 idx = value - options; | 253 | const u16 idx = value - options; |
228 | 254 | ||
@@ -246,7 +272,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
246 | "implemented, ignoring", sk, opt, len); | 272 | "implemented, ignoring", sk, opt, len); |
247 | break; | 273 | break; |
248 | } | 274 | } |
249 | 275 | ignore_option: | |
250 | if (opt != DCCPO_MANDATORY) | 276 | if (opt != DCCPO_MANDATORY) |
251 | mandatory = 0; | 277 | mandatory = 0; |
252 | } | 278 | } |
@@ -382,16 +408,24 @@ int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) | |||
382 | 408 | ||
383 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); | 409 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); |
384 | 410 | ||
385 | static int dccp_insert_option_timestamp_echo(struct sock *sk, | 411 | static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, |
412 | struct dccp_request_sock *dreq, | ||
386 | struct sk_buff *skb) | 413 | struct sk_buff *skb) |
387 | { | 414 | { |
388 | struct dccp_sock *dp = dccp_sk(sk); | ||
389 | __be32 tstamp_echo; | 415 | __be32 tstamp_echo; |
390 | int len, elapsed_time_len; | ||
391 | unsigned char *to; | 416 | unsigned char *to; |
392 | const suseconds_t delta = ktime_us_delta(ktime_get_real(), | 417 | u32 elapsed_time, elapsed_time_len, len; |
393 | dp->dccps_timestamp_time); | 418 | |
394 | u32 elapsed_time = delta / 10; | 419 | if (dreq != NULL) { |
420 | elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time; | ||
421 | tstamp_echo = htonl(dreq->dreq_timestamp_echo); | ||
422 | dreq->dreq_timestamp_echo = 0; | ||
423 | } else { | ||
424 | elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time; | ||
425 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
426 | dp->dccps_timestamp_echo = 0; | ||
427 | } | ||
428 | |||
395 | elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | 429 | elapsed_time_len = dccp_elapsed_time_len(elapsed_time); |
396 | len = 6 + elapsed_time_len; | 430 | len = 6 + elapsed_time_len; |
397 | 431 | ||
@@ -404,7 +438,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, | |||
404 | *to++ = DCCPO_TIMESTAMP_ECHO; | 438 | *to++ = DCCPO_TIMESTAMP_ECHO; |
405 | *to++ = len; | 439 | *to++ = len; |
406 | 440 | ||
407 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
408 | memcpy(to, &tstamp_echo, 4); | 441 | memcpy(to, &tstamp_echo, 4); |
409 | to += 4; | 442 | to += 4; |
410 | 443 | ||
@@ -416,8 +449,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk, | |||
416 | memcpy(to, &var32, 4); | 449 | memcpy(to, &var32, 4); |
417 | } | 450 | } |
418 | 451 | ||
419 | dp->dccps_timestamp_echo = 0; | ||
420 | dp->dccps_timestamp_time = ktime_set(0, 0); | ||
421 | return 0; | 452 | return 0; |
422 | } | 453 | } |
423 | 454 | ||
@@ -510,6 +541,18 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) | |||
510 | return 0; | 541 | return 0; |
511 | } | 542 | } |
512 | 543 | ||
544 | /* The length of all options needs to be a multiple of 4 (5.8) */ | ||
545 | static void dccp_insert_option_padding(struct sk_buff *skb) | ||
546 | { | ||
547 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | ||
548 | |||
549 | if (padding != 0) { | ||
550 | padding = 4 - padding; | ||
551 | memset(skb_push(skb, padding), 0, padding); | ||
552 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
553 | } | ||
554 | } | ||
555 | |||
513 | int dccp_insert_options(struct sock *sk, struct sk_buff *skb) | 556 | int dccp_insert_options(struct sock *sk, struct sk_buff *skb) |
514 | { | 557 | { |
515 | struct dccp_sock *dp = dccp_sk(sk); | 558 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -526,10 +569,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
526 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && | 569 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && |
527 | dccp_insert_option_ackvec(sk, skb)) | 570 | dccp_insert_option_ackvec(sk, skb)) |
528 | return -1; | 571 | return -1; |
529 | |||
530 | if (dp->dccps_timestamp_echo != 0 && | ||
531 | dccp_insert_option_timestamp_echo(sk, skb)) | ||
532 | return -1; | ||
533 | } | 572 | } |
534 | 573 | ||
535 | if (dp->dccps_hc_rx_insert_options) { | 574 | if (dp->dccps_hc_rx_insert_options) { |
@@ -553,18 +592,22 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
553 | dccp_insert_option_timestamp(sk, skb)) | 592 | dccp_insert_option_timestamp(sk, skb)) |
554 | return -1; | 593 | return -1; |
555 | 594 | ||
556 | /* XXX: insert other options when appropriate */ | 595 | if (dp->dccps_timestamp_echo != 0 && |
596 | dccp_insert_option_timestamp_echo(dp, NULL, skb)) | ||
597 | return -1; | ||
598 | |||
599 | dccp_insert_option_padding(skb); | ||
600 | return 0; | ||
601 | } | ||
557 | 602 | ||
558 | if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { | 603 | int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) |
559 | /* The length of all options has to be a multiple of 4 */ | 604 | { |
560 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | 605 | DCCP_SKB_CB(skb)->dccpd_opt_len = 0; |
561 | 606 | ||
562 | if (padding != 0) { | 607 | if (dreq->dreq_timestamp_echo != 0 && |
563 | padding = 4 - padding; | 608 | dccp_insert_option_timestamp_echo(NULL, dreq, skb)) |
564 | memset(skb_push(skb, padding), 0, padding); | 609 | return -1; |
565 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
566 | } | ||
567 | } | ||
568 | 610 | ||
611 | dccp_insert_option_padding(skb); | ||
569 | return 0; | 612 | return 0; |
570 | } | 613 | } |
diff --git a/net/dccp/output.c b/net/dccp/output.c index f49544618f20..3b763db3d863 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -133,15 +133,31 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
133 | return -ENOBUFS; | 133 | return -ENOBUFS; |
134 | } | 134 | } |
135 | 135 | ||
136 | /** | ||
137 | * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits | ||
138 | * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), | ||
139 | * since the RX CCID is restricted to feedback packets (Acks), which are small | ||
140 | * in comparison with the data traffic. A value of 0 means "no current CCMPS". | ||
141 | */ | ||
142 | static u32 dccp_determine_ccmps(const struct dccp_sock *dp) | ||
143 | { | ||
144 | const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid; | ||
145 | |||
146 | if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL) | ||
147 | return 0; | ||
148 | return tx_ccid->ccid_ops->ccid_ccmps; | ||
149 | } | ||
150 | |||
136 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | 151 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) |
137 | { | 152 | { |
138 | struct inet_connection_sock *icsk = inet_csk(sk); | 153 | struct inet_connection_sock *icsk = inet_csk(sk); |
139 | struct dccp_sock *dp = dccp_sk(sk); | 154 | struct dccp_sock *dp = dccp_sk(sk); |
140 | int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - | 155 | u32 ccmps = dccp_determine_ccmps(dp); |
141 | sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); | 156 | int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; |
142 | 157 | ||
143 | /* Now subtract optional transport overhead */ | 158 | /* Account for header lengths and IPv4/v6 option overhead */ |
144 | mss_now -= icsk->icsk_ext_hdr_len; | 159 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + |
160 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); | ||
145 | 161 | ||
146 | /* | 162 | /* |
147 | * FIXME: this should come from the CCID infrastructure, where, say, | 163 | * FIXME: this should come from the CCID infrastructure, where, say, |
@@ -151,13 +167,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | |||
151 | * make it a multiple of 4 | 167 | * make it a multiple of 4 |
152 | */ | 168 | */ |
153 | 169 | ||
154 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | 170 | cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; |
155 | 171 | ||
156 | /* And store cached results */ | 172 | /* And store cached results */ |
157 | icsk->icsk_pmtu_cookie = pmtu; | 173 | icsk->icsk_pmtu_cookie = pmtu; |
158 | dp->dccps_mss_cache = mss_now; | 174 | dp->dccps_mss_cache = cur_mps; |
159 | 175 | ||
160 | return mss_now; | 176 | return cur_mps; |
161 | } | 177 | } |
162 | 178 | ||
163 | EXPORT_SYMBOL_GPL(dccp_sync_mss); | 179 | EXPORT_SYMBOL_GPL(dccp_sync_mss); |
@@ -170,7 +186,7 @@ void dccp_write_space(struct sock *sk) | |||
170 | wake_up_interruptible(sk->sk_sleep); | 186 | wake_up_interruptible(sk->sk_sleep); |
171 | /* Should agree with poll, otherwise some programs break */ | 187 | /* Should agree with poll, otherwise some programs break */ |
172 | if (sock_writeable(sk)) | 188 | if (sock_writeable(sk)) |
173 | sk_wake_async(sk, 2, POLL_OUT); | 189 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
174 | 190 | ||
175 | read_unlock(&sk->sk_callback_lock); | 191 | read_unlock(&sk->sk_callback_lock); |
176 | } | 192 | } |
@@ -303,7 +319,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
303 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | 319 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; |
304 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; | 320 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; |
305 | 321 | ||
306 | if (dccp_insert_options(sk, skb)) { | 322 | if (dccp_insert_options_rsk(dreq, skb)) { |
307 | kfree_skb(skb); | 323 | kfree_skb(skb); |
308 | return NULL; | 324 | return NULL; |
309 | } | 325 | } |
@@ -391,7 +407,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) | |||
391 | * FIXME: what if rebuild_header fails? | 407 | * FIXME: what if rebuild_header fails? |
392 | * Should we be doing a rebuild_header here? | 408 | * Should we be doing a rebuild_header here? |
393 | */ | 409 | */ |
394 | int err = inet_sk_rebuild_header(sk); | 410 | int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk); |
395 | 411 | ||
396 | if (err != 0) | 412 | if (err != 0) |
397 | return err; | 413 | return err; |
@@ -567,14 +583,27 @@ void dccp_send_close(struct sock *sk, const int active) | |||
567 | 583 | ||
568 | /* Reserve space for headers and prepare control bits. */ | 584 | /* Reserve space for headers and prepare control bits. */ |
569 | skb_reserve(skb, sk->sk_prot->max_header); | 585 | skb_reserve(skb, sk->sk_prot->max_header); |
570 | DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? | 586 | if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait) |
571 | DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | 587 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ; |
588 | else | ||
589 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; | ||
572 | 590 | ||
573 | if (active) { | 591 | if (active) { |
574 | dccp_write_xmit(sk, 1); | 592 | dccp_write_xmit(sk, 1); |
575 | dccp_skb_entail(sk, skb); | 593 | dccp_skb_entail(sk, skb); |
576 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | 594 | dccp_transmit_skb(sk, skb_clone(skb, prio)); |
577 | /* FIXME do we need a retransmit timer here? */ | 595 | /* |
596 | * Retransmission timer for active-close: RFC 4340, 8.3 requires | ||
597 | * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ | ||
598 | * state can be left. The initial timeout is 2 RTTs. | ||
599 | * Since RTT measurement is done by the CCIDs, there is no easy | ||
600 | * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4 | ||
601 | * is too low (200ms); we use a high value to avoid unnecessary | ||
602 | * retransmissions when the link RTT is > 0.2 seconds. | ||
603 | * FIXME: Let main module sample RTTs and use that instead. | ||
604 | */ | ||
605 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
606 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); | ||
578 | } else | 607 | } else |
579 | dccp_transmit_skb(sk, skb); | 608 | dccp_transmit_skb(sk, skb); |
580 | } | 609 | } |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7a3bea9c28c1..0bed4a6095b7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -60,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state) | |||
60 | { | 60 | { |
61 | const int oldstate = sk->sk_state; | 61 | const int oldstate = sk->sk_state; |
62 | 62 | ||
63 | dccp_pr_debug("%s(%p) %-10.10s -> %s\n", | 63 | dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, |
64 | dccp_role(sk), sk, | ||
65 | dccp_state_name(oldstate), dccp_state_name(state)); | 64 | dccp_state_name(oldstate), dccp_state_name(state)); |
66 | WARN_ON(state == oldstate); | 65 | WARN_ON(state == oldstate); |
67 | 66 | ||
@@ -72,7 +71,8 @@ void dccp_set_state(struct sock *sk, const int state) | |||
72 | break; | 71 | break; |
73 | 72 | ||
74 | case DCCP_CLOSED: | 73 | case DCCP_CLOSED: |
75 | if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) | 74 | if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || |
75 | oldstate == DCCP_CLOSING) | ||
76 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); | 76 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); |
77 | 77 | ||
78 | sk->sk_prot->unhash(sk); | 78 | sk->sk_prot->unhash(sk); |
@@ -93,6 +93,24 @@ void dccp_set_state(struct sock *sk, const int state) | |||
93 | 93 | ||
94 | EXPORT_SYMBOL_GPL(dccp_set_state); | 94 | EXPORT_SYMBOL_GPL(dccp_set_state); |
95 | 95 | ||
96 | static void dccp_finish_passive_close(struct sock *sk) | ||
97 | { | ||
98 | switch (sk->sk_state) { | ||
99 | case DCCP_PASSIVE_CLOSE: | ||
100 | /* Node (client or server) has received Close packet. */ | ||
101 | dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
102 | dccp_set_state(sk, DCCP_CLOSED); | ||
103 | break; | ||
104 | case DCCP_PASSIVE_CLOSEREQ: | ||
105 | /* | ||
106 | * Client received CloseReq. We set the `active' flag so that | ||
107 | * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. | ||
108 | */ | ||
109 | dccp_send_close(sk, 1); | ||
110 | dccp_set_state(sk, DCCP_CLOSING); | ||
111 | } | ||
112 | } | ||
113 | |||
96 | void dccp_done(struct sock *sk) | 114 | void dccp_done(struct sock *sk) |
97 | { | 115 | { |
98 | dccp_set_state(sk, DCCP_CLOSED); | 116 | dccp_set_state(sk, DCCP_CLOSED); |
@@ -134,14 +152,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name); | |||
134 | const char *dccp_state_name(const int state) | 152 | const char *dccp_state_name(const int state) |
135 | { | 153 | { |
136 | static char *dccp_state_names[] = { | 154 | static char *dccp_state_names[] = { |
137 | [DCCP_OPEN] = "OPEN", | 155 | [DCCP_OPEN] = "OPEN", |
138 | [DCCP_REQUESTING] = "REQUESTING", | 156 | [DCCP_REQUESTING] = "REQUESTING", |
139 | [DCCP_PARTOPEN] = "PARTOPEN", | 157 | [DCCP_PARTOPEN] = "PARTOPEN", |
140 | [DCCP_LISTEN] = "LISTEN", | 158 | [DCCP_LISTEN] = "LISTEN", |
141 | [DCCP_RESPOND] = "RESPOND", | 159 | [DCCP_RESPOND] = "RESPOND", |
142 | [DCCP_CLOSING] = "CLOSING", | 160 | [DCCP_CLOSING] = "CLOSING", |
143 | [DCCP_TIME_WAIT] = "TIME_WAIT", | 161 | [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", |
144 | [DCCP_CLOSED] = "CLOSED", | 162 | [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", |
163 | [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", | ||
164 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
165 | [DCCP_CLOSED] = "CLOSED", | ||
145 | }; | 166 | }; |
146 | 167 | ||
147 | if (state >= DCCP_MAX_STATES) | 168 | if (state >= DCCP_MAX_STATES) |
@@ -174,6 +195,19 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
174 | 195 | ||
175 | dccp_minisock_init(&dp->dccps_minisock); | 196 | dccp_minisock_init(&dp->dccps_minisock); |
176 | 197 | ||
198 | icsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
199 | icsk->icsk_syn_retries = sysctl_dccp_request_retries; | ||
200 | sk->sk_state = DCCP_CLOSED; | ||
201 | sk->sk_write_space = dccp_write_space; | ||
202 | icsk->icsk_sync_mss = dccp_sync_mss; | ||
203 | dp->dccps_mss_cache = 536; | ||
204 | dp->dccps_rate_last = jiffies; | ||
205 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
206 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | ||
207 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | ||
208 | |||
209 | dccp_init_xmit_timers(sk); | ||
210 | |||
177 | /* | 211 | /* |
178 | * FIXME: We're hardcoding the CCID, and doing this at this point makes | 212 | * FIXME: We're hardcoding the CCID, and doing this at this point makes |
179 | * the listening (master) sock get CCID control blocks, which is not | 213 | * the listening (master) sock get CCID control blocks, which is not |
@@ -213,18 +247,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
213 | INIT_LIST_HEAD(&dmsk->dccpms_conf); | 247 | INIT_LIST_HEAD(&dmsk->dccpms_conf); |
214 | } | 248 | } |
215 | 249 | ||
216 | dccp_init_xmit_timers(sk); | ||
217 | icsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
218 | icsk->icsk_syn_retries = sysctl_dccp_request_retries; | ||
219 | sk->sk_state = DCCP_CLOSED; | ||
220 | sk->sk_write_space = dccp_write_space; | ||
221 | icsk->icsk_sync_mss = dccp_sync_mss; | ||
222 | dp->dccps_mss_cache = 536; | ||
223 | dp->dccps_rate_last = jiffies; | ||
224 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
225 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | ||
226 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | ||
227 | |||
228 | return 0; | 250 | return 0; |
229 | } | 251 | } |
230 | 252 | ||
@@ -275,6 +297,12 @@ static inline int dccp_listen_start(struct sock *sk, int backlog) | |||
275 | return inet_csk_listen_start(sk, backlog); | 297 | return inet_csk_listen_start(sk, backlog); |
276 | } | 298 | } |
277 | 299 | ||
300 | static inline int dccp_need_reset(int state) | ||
301 | { | ||
302 | return state != DCCP_CLOSED && state != DCCP_LISTEN && | ||
303 | state != DCCP_REQUESTING; | ||
304 | } | ||
305 | |||
278 | int dccp_disconnect(struct sock *sk, int flags) | 306 | int dccp_disconnect(struct sock *sk, int flags) |
279 | { | 307 | { |
280 | struct inet_connection_sock *icsk = inet_csk(sk); | 308 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -285,10 +313,15 @@ int dccp_disconnect(struct sock *sk, int flags) | |||
285 | if (old_state != DCCP_CLOSED) | 313 | if (old_state != DCCP_CLOSED) |
286 | dccp_set_state(sk, DCCP_CLOSED); | 314 | dccp_set_state(sk, DCCP_CLOSED); |
287 | 315 | ||
288 | /* ABORT function of RFC793 */ | 316 | /* |
317 | * This corresponds to the ABORT function of RFC793, sec. 3.8 | ||
318 | * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". | ||
319 | */ | ||
289 | if (old_state == DCCP_LISTEN) { | 320 | if (old_state == DCCP_LISTEN) { |
290 | inet_csk_listen_stop(sk); | 321 | inet_csk_listen_stop(sk); |
291 | /* FIXME: do the active reset thing */ | 322 | } else if (dccp_need_reset(old_state)) { |
323 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
324 | sk->sk_err = ECONNRESET; | ||
292 | } else if (old_state == DCCP_REQUESTING) | 325 | } else if (old_state == DCCP_REQUESTING) |
293 | sk->sk_err = ECONNRESET; | 326 | sk->sk_err = ECONNRESET; |
294 | 327 | ||
@@ -518,6 +551,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
518 | (struct dccp_so_feat __user *) | 551 | (struct dccp_so_feat __user *) |
519 | optval); | 552 | optval); |
520 | break; | 553 | break; |
554 | case DCCP_SOCKOPT_SERVER_TIMEWAIT: | ||
555 | if (dp->dccps_role != DCCP_ROLE_SERVER) | ||
556 | err = -EOPNOTSUPP; | ||
557 | else | ||
558 | dp->dccps_server_timewait = (val != 0); | ||
559 | break; | ||
521 | case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ | 560 | case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ |
522 | if (val < 0 || val > 15) | 561 | if (val < 0 || val > 15) |
523 | err = -EINVAL; | 562 | err = -EINVAL; |
@@ -618,15 +657,15 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
618 | (__be32 __user *)optval, optlen); | 657 | (__be32 __user *)optval, optlen); |
619 | case DCCP_SOCKOPT_GET_CUR_MPS: | 658 | case DCCP_SOCKOPT_GET_CUR_MPS: |
620 | val = dp->dccps_mss_cache; | 659 | val = dp->dccps_mss_cache; |
621 | len = sizeof(val); | 660 | break; |
661 | case DCCP_SOCKOPT_SERVER_TIMEWAIT: | ||
662 | val = dp->dccps_server_timewait; | ||
622 | break; | 663 | break; |
623 | case DCCP_SOCKOPT_SEND_CSCOV: | 664 | case DCCP_SOCKOPT_SEND_CSCOV: |
624 | val = dp->dccps_pcslen; | 665 | val = dp->dccps_pcslen; |
625 | len = sizeof(val); | ||
626 | break; | 666 | break; |
627 | case DCCP_SOCKOPT_RECV_CSCOV: | 667 | case DCCP_SOCKOPT_RECV_CSCOV: |
628 | val = dp->dccps_pcrlen; | 668 | val = dp->dccps_pcrlen; |
629 | len = sizeof(val); | ||
630 | break; | 669 | break; |
631 | case 128 ... 191: | 670 | case 128 ... 191: |
632 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 671 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
@@ -638,6 +677,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
638 | return -ENOPROTOOPT; | 677 | return -ENOPROTOOPT; |
639 | } | 678 | } |
640 | 679 | ||
680 | len = sizeof(val); | ||
641 | if (put_user(len, optlen) || copy_to_user(optval, &val, len)) | 681 | if (put_user(len, optlen) || copy_to_user(optval, &val, len)) |
642 | return -EFAULT; | 682 | return -EFAULT; |
643 | 683 | ||
@@ -748,19 +788,26 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
748 | 788 | ||
749 | dh = dccp_hdr(skb); | 789 | dh = dccp_hdr(skb); |
750 | 790 | ||
751 | if (dh->dccph_type == DCCP_PKT_DATA || | 791 | switch (dh->dccph_type) { |
752 | dh->dccph_type == DCCP_PKT_DATAACK) | 792 | case DCCP_PKT_DATA: |
793 | case DCCP_PKT_DATAACK: | ||
753 | goto found_ok_skb; | 794 | goto found_ok_skb; |
754 | 795 | ||
755 | if (dh->dccph_type == DCCP_PKT_RESET || | 796 | case DCCP_PKT_CLOSE: |
756 | dh->dccph_type == DCCP_PKT_CLOSE) { | 797 | case DCCP_PKT_CLOSEREQ: |
757 | dccp_pr_debug("found fin ok!\n"); | 798 | if (!(flags & MSG_PEEK)) |
799 | dccp_finish_passive_close(sk); | ||
800 | /* fall through */ | ||
801 | case DCCP_PKT_RESET: | ||
802 | dccp_pr_debug("found fin (%s) ok!\n", | ||
803 | dccp_packet_name(dh->dccph_type)); | ||
758 | len = 0; | 804 | len = 0; |
759 | goto found_fin_ok; | 805 | goto found_fin_ok; |
806 | default: | ||
807 | dccp_pr_debug("packet_type=%s\n", | ||
808 | dccp_packet_name(dh->dccph_type)); | ||
809 | sk_eat_skb(sk, skb, 0); | ||
760 | } | 810 | } |
761 | dccp_pr_debug("packet_type=%s\n", | ||
762 | dccp_packet_name(dh->dccph_type)); | ||
763 | sk_eat_skb(sk, skb, 0); | ||
764 | verify_sock_status: | 811 | verify_sock_status: |
765 | if (sock_flag(sk, SOCK_DONE)) { | 812 | if (sock_flag(sk, SOCK_DONE)) { |
766 | len = 0; | 813 | len = 0; |
@@ -862,34 +909,38 @@ out: | |||
862 | 909 | ||
863 | EXPORT_SYMBOL_GPL(inet_dccp_listen); | 910 | EXPORT_SYMBOL_GPL(inet_dccp_listen); |
864 | 911 | ||
865 | static const unsigned char dccp_new_state[] = { | 912 | static void dccp_terminate_connection(struct sock *sk) |
866 | /* current state: new state: action: */ | ||
867 | [0] = DCCP_CLOSED, | ||
868 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
869 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
870 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
871 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
872 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
873 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
874 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
875 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
876 | }; | ||
877 | |||
878 | static int dccp_close_state(struct sock *sk) | ||
879 | { | 913 | { |
880 | const int next = dccp_new_state[sk->sk_state]; | 914 | u8 next_state = DCCP_CLOSED; |
881 | const int ns = next & DCCP_STATE_MASK; | ||
882 | 915 | ||
883 | if (ns != sk->sk_state) | 916 | switch (sk->sk_state) { |
884 | dccp_set_state(sk, ns); | 917 | case DCCP_PASSIVE_CLOSE: |
918 | case DCCP_PASSIVE_CLOSEREQ: | ||
919 | dccp_finish_passive_close(sk); | ||
920 | break; | ||
921 | case DCCP_PARTOPEN: | ||
922 | dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); | ||
923 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
924 | /* fall through */ | ||
925 | case DCCP_OPEN: | ||
926 | dccp_send_close(sk, 1); | ||
885 | 927 | ||
886 | return next & DCCP_ACTION_FIN; | 928 | if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && |
929 | !dccp_sk(sk)->dccps_server_timewait) | ||
930 | next_state = DCCP_ACTIVE_CLOSEREQ; | ||
931 | else | ||
932 | next_state = DCCP_CLOSING; | ||
933 | /* fall through */ | ||
934 | default: | ||
935 | dccp_set_state(sk, next_state); | ||
936 | } | ||
887 | } | 937 | } |
888 | 938 | ||
889 | void dccp_close(struct sock *sk, long timeout) | 939 | void dccp_close(struct sock *sk, long timeout) |
890 | { | 940 | { |
891 | struct dccp_sock *dp = dccp_sk(sk); | 941 | struct dccp_sock *dp = dccp_sk(sk); |
892 | struct sk_buff *skb; | 942 | struct sk_buff *skb; |
943 | u32 data_was_unread = 0; | ||
893 | int state; | 944 | int state; |
894 | 945 | ||
895 | lock_sock(sk); | 946 | lock_sock(sk); |
@@ -912,16 +963,21 @@ void dccp_close(struct sock *sk, long timeout) | |||
912 | * descriptor close, not protocol-sourced closes, because the | 963 | * descriptor close, not protocol-sourced closes, because the |
913 | *reader process may not have drained the data yet! | 964 | *reader process may not have drained the data yet! |
914 | */ | 965 | */ |
915 | /* FIXME: check for unread data */ | ||
916 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | 966 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { |
967 | data_was_unread += skb->len; | ||
917 | __kfree_skb(skb); | 968 | __kfree_skb(skb); |
918 | } | 969 | } |
919 | 970 | ||
920 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | 971 | if (data_was_unread) { |
972 | /* Unread data was tossed, send an appropriate Reset Code */ | ||
973 | DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); | ||
974 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
975 | dccp_set_state(sk, DCCP_CLOSED); | ||
976 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
921 | /* Check zero linger _after_ checking for unread data. */ | 977 | /* Check zero linger _after_ checking for unread data. */ |
922 | sk->sk_prot->disconnect(sk, 0); | 978 | sk->sk_prot->disconnect(sk, 0); |
923 | } else if (dccp_close_state(sk)) { | 979 | } else if (sk->sk_state != DCCP_CLOSED) { |
924 | dccp_send_close(sk, 1); | 980 | dccp_terminate_connection(sk); |
925 | } | 981 | } |
926 | 982 | ||
927 | sk_stream_wait_close(sk, timeout); | 983 | sk_stream_wait_close(sk, timeout); |
@@ -948,24 +1004,6 @@ adjudge_to_death: | |||
948 | if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) | 1004 | if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) |
949 | goto out; | 1005 | goto out; |
950 | 1006 | ||
951 | /* | ||
952 | * The last release_sock may have processed the CLOSE or RESET | ||
953 | * packet moving sock to CLOSED state, if not we have to fire | ||
954 | * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" | ||
955 | * in draft-ietf-dccp-spec-11. -acme | ||
956 | */ | ||
957 | if (sk->sk_state == DCCP_CLOSING) { | ||
958 | /* FIXME: should start at 2 * RTT */ | ||
959 | /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ | ||
960 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
961 | inet_csk(sk)->icsk_rto, | ||
962 | DCCP_RTO_MAX); | ||
963 | #if 0 | ||
964 | /* Yeah, we should use sk->sk_prot->orphan_count, etc */ | ||
965 | dccp_set_state(sk, DCCP_CLOSED); | ||
966 | #endif | ||
967 | } | ||
968 | |||
969 | if (sk->sk_state == DCCP_CLOSED) | 1007 | if (sk->sk_state == DCCP_CLOSED) |
970 | inet_csk_destroy_sock(sk); | 1008 | inet_csk_destroy_sock(sk); |
971 | 1009 | ||
@@ -981,7 +1019,7 @@ EXPORT_SYMBOL_GPL(dccp_close); | |||
981 | 1019 | ||
982 | void dccp_shutdown(struct sock *sk, int how) | 1020 | void dccp_shutdown(struct sock *sk, int how) |
983 | { | 1021 | { |
984 | dccp_pr_debug("entry\n"); | 1022 | dccp_pr_debug("called shutdown(%x)\n", how); |
985 | } | 1023 | } |
986 | 1024 | ||
987 | EXPORT_SYMBOL_GPL(dccp_shutdown); | 1025 | EXPORT_SYMBOL_GPL(dccp_shutdown); |
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index c62c05039f69..21295993fdb8 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c | |||
@@ -100,41 +100,19 @@ static struct ctl_table dccp_default_table[] = { | |||
100 | { .ctl_name = 0, } | 100 | { .ctl_name = 0, } |
101 | }; | 101 | }; |
102 | 102 | ||
103 | static struct ctl_table dccp_table[] = { | 103 | static struct ctl_path dccp_path[] = { |
104 | { | 104 | { .procname = "net", .ctl_name = CTL_NET, }, |
105 | .ctl_name = NET_DCCP_DEFAULT, | 105 | { .procname = "dccp", .ctl_name = NET_DCCP, }, |
106 | .procname = "default", | 106 | { .procname = "default", .ctl_name = NET_DCCP_DEFAULT, }, |
107 | .mode = 0555, | 107 | { } |
108 | .child = dccp_default_table, | ||
109 | }, | ||
110 | { .ctl_name = 0, }, | ||
111 | }; | ||
112 | |||
113 | static struct ctl_table dccp_dir_table[] = { | ||
114 | { | ||
115 | .ctl_name = NET_DCCP, | ||
116 | .procname = "dccp", | ||
117 | .mode = 0555, | ||
118 | .child = dccp_table, | ||
119 | }, | ||
120 | { .ctl_name = 0, }, | ||
121 | }; | ||
122 | |||
123 | static struct ctl_table dccp_root_table[] = { | ||
124 | { | ||
125 | .ctl_name = CTL_NET, | ||
126 | .procname = "net", | ||
127 | .mode = 0555, | ||
128 | .child = dccp_dir_table, | ||
129 | }, | ||
130 | { .ctl_name = 0, }, | ||
131 | }; | 108 | }; |
132 | 109 | ||
133 | static struct ctl_table_header *dccp_table_header; | 110 | static struct ctl_table_header *dccp_table_header; |
134 | 111 | ||
135 | int __init dccp_sysctl_init(void) | 112 | int __init dccp_sysctl_init(void) |
136 | { | 113 | { |
137 | dccp_table_header = register_sysctl_table(dccp_root_table); | 114 | dccp_table_header = register_sysctl_paths(dccp_path, |
115 | dccp_default_table); | ||
138 | 116 | ||
139 | return dccp_table_header != NULL ? 0 : -ENOMEM; | 117 | return dccp_table_header != NULL ? 0 : -ENOMEM; |
140 | } | 118 | } |
diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3af067354bd4..8703a792b560 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c | |||
@@ -280,9 +280,8 @@ static void dccp_init_write_xmit_timer(struct sock *sk) | |||
280 | { | 280 | { |
281 | struct dccp_sock *dp = dccp_sk(sk); | 281 | struct dccp_sock *dp = dccp_sk(sk); |
282 | 282 | ||
283 | init_timer(&dp->dccps_xmit_timer); | 283 | setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, |
284 | dp->dccps_xmit_timer.data = (unsigned long)sk; | 284 | (unsigned long)sk); |
285 | dp->dccps_xmit_timer.function = dccp_write_xmit_timer; | ||
286 | } | 285 | } |
287 | 286 | ||
288 | void dccp_init_xmit_timers(struct sock *sk) | 287 | void dccp_init_xmit_timers(struct sock *sk) |