diff options
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Makefile | 4 | ||||
-rw-r--r-- | net/dccp/ackvec.c | 415 | ||||
-rw-r--r-- | net/dccp/ackvec.h | 38 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 134 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 2 | ||||
-rw-r--r-- | net/dccp/dccp.h | 21 | ||||
-rw-r--r-- | net/dccp/input.c | 34 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 13 | ||||
-rw-r--r-- | net/dccp/options.c | 43 | ||||
-rw-r--r-- | net/dccp/output.c | 22 | ||||
-rw-r--r-- | net/dccp/proto.c | 71 | ||||
-rw-r--r-- | net/dccp/qpolicy.c | 137 |
12 files changed, 566 insertions, 368 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o | 1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o |
2 | 2 | ||
3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o | 3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ |
4 | 4 | qpolicy.o | |
5 | # | 5 | # |
6 | # CCID algorithms to be used by dccp.ko | 6 | # CCID algorithms to be used by dccp.ko |
7 | # | 7 | # |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index abaf241c7353..25b7a8d1ad58 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -9,18 +9,10 @@ | |||
9 | * under the terms of the GNU General Public License as published by the | 9 | * under the terms of the GNU General Public License as published by the |
10 | * Free Software Foundation; version 2 of the License; | 10 | * Free Software Foundation; version 2 of the License; |
11 | */ | 11 | */ |
12 | |||
13 | #include "ackvec.h" | ||
14 | #include "dccp.h" | 12 | #include "dccp.h" |
15 | |||
16 | #include <linux/init.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
19 | #include <linux/skbuff.h> | ||
20 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
21 | 15 | ||
22 | #include <net/sock.h> | ||
23 | |||
24 | static struct kmem_cache *dccp_ackvec_slab; | 16 | static struct kmem_cache *dccp_ackvec_slab; |
25 | static struct kmem_cache *dccp_ackvec_record_slab; | 17 | static struct kmem_cache *dccp_ackvec_record_slab; |
26 | 18 | ||
@@ -92,6 +84,24 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) | |||
92 | return 0; | 84 | return 0; |
93 | } | 85 | } |
94 | 86 | ||
87 | static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, | ||
88 | const u64 ackno) | ||
89 | { | ||
90 | struct dccp_ackvec_record *avr; | ||
91 | /* | ||
92 | * Exploit that records are inserted in descending order of sequence | ||
93 | * number, start with the oldest record first. If @ackno is `before' | ||
94 | * the earliest ack_ackno, the packet is too old to be considered. | ||
95 | */ | ||
96 | list_for_each_entry_reverse(avr, av_list, avr_node) { | ||
97 | if (avr->avr_ack_seqno == ackno) | ||
98 | return avr; | ||
99 | if (before48(ackno, avr->avr_ack_seqno)) | ||
100 | break; | ||
101 | } | ||
102 | return NULL; | ||
103 | } | ||
104 | |||
95 | /* | 105 | /* |
96 | * Buffer index and length computation using modulo-buffersize arithmetic. | 106 | * Buffer index and length computation using modulo-buffersize arithmetic. |
97 | * Note that, as pointers move from right to left, head is `before' tail. | 107 | * Note that, as pointers move from right to left, head is `before' tail. |
@@ -113,248 +123,253 @@ u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) | |||
113 | return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); | 123 | return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); |
114 | } | 124 | } |
115 | 125 | ||
116 | /* | 126 | /** |
117 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | 127 | * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 |
118 | * bytes with run length 0, rather than a single byte with a larger run length; | 128 | * @av: non-empty buffer to update |
119 | * this simplifies table updates if one of the missing packets arrives. | 129 | * @distance: negative or zero distance of @seqno from buf_ackno downward |
130 | * @seqno: the (old) sequence number whose record is to be updated | ||
131 | * @state: state in which packet carrying @seqno was received | ||
120 | */ | 132 | */ |
121 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | 133 | static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, |
122 | const unsigned int packets, | 134 | u64 seqno, enum dccp_ackvec_states state) |
123 | const unsigned char state) | ||
124 | { | 135 | { |
125 | long gap; | 136 | u16 ptr = av->av_buf_head; |
126 | long new_head; | ||
127 | 137 | ||
128 | if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN) | 138 | BUG_ON(distance > 0); |
129 | return -ENOBUFS; | 139 | if (unlikely(dccp_ackvec_is_empty(av))) |
140 | return; | ||
130 | 141 | ||
131 | gap = packets - 1; | 142 | do { |
132 | new_head = av->av_buf_head - packets; | 143 | u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); |
133 | 144 | ||
134 | if (new_head < 0) { | 145 | if (distance + runlen >= 0) { |
135 | if (gap > 0) { | 146 | /* |
136 | memset(av->av_buf, DCCPAV_NOT_RECEIVED, | 147 | * Only update the state if packet has not been received |
137 | gap + new_head + 1); | 148 | * yet. This is OK as per the second table in RFC 4340, |
138 | gap = -new_head; | 149 | * 11.4.1; i.e. here we are using the following table: |
150 | * RECEIVED | ||
151 | * 0 1 3 | ||
152 | * S +---+---+---+ | ||
153 | * T 0 | 0 | 0 | 0 | | ||
154 | * O +---+---+---+ | ||
155 | * R 1 | 1 | 1 | 1 | | ||
156 | * E +---+---+---+ | ||
157 | * D 3 | 0 | 1 | 3 | | ||
158 | * +---+---+---+ | ||
159 | * The "Not Received" state was set by reserve_seats(). | ||
160 | */ | ||
161 | if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) | ||
162 | av->av_buf[ptr] = state; | ||
163 | else | ||
164 | dccp_pr_debug("Not changing %llu state to %u\n", | ||
165 | (unsigned long long)seqno, state); | ||
166 | break; | ||
139 | } | 167 | } |
140 | new_head += DCCPAV_MAX_ACKVEC_LEN; | ||
141 | } | ||
142 | 168 | ||
143 | av->av_buf_head = new_head; | 169 | distance += runlen + 1; |
170 | ptr = __ackvec_idx_add(ptr, 1); | ||
144 | 171 | ||
145 | if (gap > 0) | 172 | } while (ptr != av->av_buf_tail); |
146 | memset(av->av_buf + av->av_buf_head + 1, | 173 | } |
147 | DCCPAV_NOT_RECEIVED, gap); | ||
148 | 174 | ||
149 | av->av_buf[av->av_buf_head] = state; | 175 | /* Mark @num entries after buf_head as "Not yet received". */ |
150 | av->av_vec_len += packets; | 176 | static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) |
151 | return 0; | 177 | { |
178 | u16 start = __ackvec_idx_add(av->av_buf_head, 1), | ||
179 | len = DCCPAV_MAX_ACKVEC_LEN - start; | ||
180 | |||
181 | /* check for buffer wrap-around */ | ||
182 | if (num > len) { | ||
183 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); | ||
184 | start = 0; | ||
185 | num -= len; | ||
186 | } | ||
187 | if (num) | ||
188 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); | ||
152 | } | 189 | } |
153 | 190 | ||
154 | /* | 191 | /** |
155 | * Implements the RFC 4340, Appendix A | 192 | * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer |
193 | * @av: container of buffer to update (can be empty or non-empty) | ||
194 | * @num_packets: number of packets to register (must be >= 1) | ||
195 | * @seqno: sequence number of the first packet in @num_packets | ||
196 | * @state: state in which packet carrying @seqno was received | ||
156 | */ | 197 | */ |
157 | int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 198 | static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, |
158 | const u64 ackno, const u8 state) | 199 | u64 seqno, enum dccp_ackvec_states state) |
159 | { | 200 | { |
160 | u8 *cur_head = av->av_buf + av->av_buf_head, | 201 | u32 num_cells = num_packets; |
161 | *buf_end = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; | ||
162 | /* | ||
163 | * Check at the right places if the buffer is full, if it is, tell the | ||
164 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
165 | * vectors, when we will free up space in av_buf. | ||
166 | * | ||
167 | * We may well decide to do buffer compression, etc, but for now lets | ||
168 | * just drop. | ||
169 | * | ||
170 | * From Appendix A.1.1 (`New Packets'): | ||
171 | * | ||
172 | * Of course, the circular buffer may overflow, either when the | ||
173 | * HC-Sender is sending data at a very high rate, when the | ||
174 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
175 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
176 | * (so the HC-Receiver is unable to clean up old state). In this | ||
177 | * case, the HC-Receiver should either compress the buffer (by | ||
178 | * increasing run lengths when possible), transfer its state to | ||
179 | * a larger buffer, or, as a last resort, drop all received | ||
180 | * packets, without processing them whatsoever, until its buffer | ||
181 | * shrinks again. | ||
182 | */ | ||
183 | 202 | ||
184 | /* See if this is the first ackno being inserted */ | 203 | if (num_packets > DCCPAV_BURST_THRESH) { |
185 | if (av->av_vec_len == 0) { | 204 | u32 lost_packets = num_packets - 1; |
186 | *cur_head = state; | ||
187 | av->av_vec_len = 1; | ||
188 | } else if (after48(ackno, av->av_buf_ackno)) { | ||
189 | const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); | ||
190 | 205 | ||
206 | DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); | ||
191 | /* | 207 | /* |
192 | * Look if the state of this packet is the same as the | 208 | * We received 1 packet and have a loss of size "num_packets-1" |
193 | * previous ackno and if so if we can bump the head len. | 209 | * which we squeeze into num_cells-1 rather than reserving an |
210 | * entire byte for each lost packet. | ||
211 | * The reason is that the vector grows in O(burst_length); when | ||
212 | * it grows too large there will no room left for the payload. | ||
213 | * This is a trade-off: if a few packets out of the burst show | ||
214 | * up later, their state will not be changed; it is simply too | ||
215 | * costly to reshuffle/reallocate/copy the buffer each time. | ||
216 | * Should such problems persist, we will need to switch to a | ||
217 | * different underlying data structure. | ||
194 | */ | 218 | */ |
195 | if (delta == 1 && dccp_ackvec_state(cur_head) == state && | 219 | for (num_packets = num_cells = 1; lost_packets; ++num_cells) { |
196 | dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN) | 220 | u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); |
197 | *cur_head += 1; | ||
198 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | ||
199 | return -ENOBUFS; | ||
200 | } else { | ||
201 | /* | ||
202 | * A.1.2. Old Packets | ||
203 | * | ||
204 | * When a packet with Sequence Number S <= buf_ackno | ||
205 | * arrives, the HC-Receiver will scan the table for | ||
206 | * the byte corresponding to S. (Indexing structures | ||
207 | * could reduce the complexity of this scan.) | ||
208 | */ | ||
209 | u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); | ||
210 | 221 | ||
211 | while (1) { | 222 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); |
212 | const u8 len = dccp_ackvec_runlen(cur_head); | 223 | av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; |
213 | /* | 224 | |
214 | * valid packets not yet in av_buf have a reserved | 225 | lost_packets -= len; |
215 | * entry, with a len equal to 0. | ||
216 | */ | ||
217 | if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) { | ||
218 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
219 | (unsigned long long)ackno); | ||
220 | *cur_head = state; | ||
221 | goto out; | ||
222 | } | ||
223 | /* len == 0 means one packet */ | ||
224 | if (delta < len + 1) | ||
225 | goto out_duplicate; | ||
226 | |||
227 | delta -= len + 1; | ||
228 | if (++cur_head == buf_end) | ||
229 | cur_head = av->av_buf; | ||
230 | } | 226 | } |
231 | } | 227 | } |
232 | 228 | ||
233 | av->av_buf_ackno = ackno; | 229 | if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { |
234 | out: | 230 | DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); |
235 | return 0; | 231 | av->av_overflow = true; |
232 | } | ||
233 | |||
234 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); | ||
235 | if (av->av_overflow) | ||
236 | av->av_buf_tail = av->av_buf_head; | ||
237 | |||
238 | av->av_buf[av->av_buf_head] = state; | ||
239 | av->av_buf_ackno = seqno; | ||
236 | 240 | ||
237 | out_duplicate: | 241 | if (num_packets > 1) |
238 | /* Duplicate packet */ | 242 | dccp_ackvec_reserve_seats(av, num_packets - 1); |
239 | dccp_pr_debug("Received a dup or already considered lost " | ||
240 | "packet: %llu\n", (unsigned long long)ackno); | ||
241 | return -EILSEQ; | ||
242 | } | 243 | } |
243 | 244 | ||
244 | static void dccp_ackvec_throw_record(struct dccp_ackvec *av, | 245 | /** |
245 | struct dccp_ackvec_record *avr) | 246 | * dccp_ackvec_input - Register incoming packet in the buffer |
247 | */ | ||
248 | void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) | ||
246 | { | 249 | { |
247 | struct dccp_ackvec_record *next; | 250 | u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
251 | enum dccp_ackvec_states state = DCCPAV_RECEIVED; | ||
248 | 252 | ||
249 | /* sort out vector length */ | 253 | if (dccp_ackvec_is_empty(av)) { |
250 | if (av->av_buf_head <= avr->avr_ack_ptr) | 254 | dccp_ackvec_add_new(av, 1, seqno, state); |
251 | av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; | 255 | av->av_tail_ackno = seqno; |
252 | else | ||
253 | av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 - | ||
254 | av->av_buf_head + avr->avr_ack_ptr; | ||
255 | 256 | ||
256 | /* free records */ | 257 | } else { |
257 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | 258 | s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); |
258 | list_del(&avr->avr_node); | 259 | u8 *current_head = av->av_buf + av->av_buf_head; |
259 | kmem_cache_free(dccp_ackvec_record_slab, avr); | ||
260 | } | ||
261 | } | ||
262 | 260 | ||
263 | void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | 261 | if (num_packets == 1 && |
264 | const u64 ackno) | 262 | dccp_ackvec_state(current_head) == state && |
265 | { | 263 | dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { |
266 | struct dccp_ackvec_record *avr; | ||
267 | 264 | ||
268 | /* | 265 | *current_head += 1; |
269 | * If we traverse backwards, it should be faster when we have large | 266 | av->av_buf_ackno = seqno; |
270 | * windows. We will be receiving ACKs for stuff we sent a while back | 267 | |
271 | * -sorbo. | 268 | } else if (num_packets > 0) { |
272 | */ | 269 | dccp_ackvec_add_new(av, num_packets, seqno, state); |
273 | list_for_each_entry_reverse(avr, &av->av_records, avr_node) { | 270 | } else { |
274 | if (ackno == avr->avr_ack_seqno) { | 271 | dccp_ackvec_update_old(av, num_packets, seqno, state); |
275 | dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " | 272 | } |
276 | "ack_ackno=%llu, ACKED!\n", | ||
277 | dccp_role(sk), avr->avr_ack_runlen, | ||
278 | (unsigned long long)avr->avr_ack_seqno, | ||
279 | (unsigned long long)avr->avr_ack_ackno); | ||
280 | dccp_ackvec_throw_record(av, avr); | ||
281 | break; | ||
282 | } else if (avr->avr_ack_seqno > ackno) | ||
283 | break; /* old news */ | ||
284 | } | 273 | } |
285 | } | 274 | } |
286 | 275 | ||
287 | static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | 276 | /** |
288 | struct sock *sk, u64 *ackno, | 277 | * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection |
289 | const unsigned char len, | 278 | * This routine is called when the peer acknowledges the receipt of Ack Vectors |
290 | const unsigned char *vector) | 279 | * up to and including @ackno. While based on on section A.3 of RFC 4340, here |
280 | * are additional precautions to prevent corrupted buffer state. In particular, | ||
281 | * we use tail_ackno to identify outdated records; it always marks the earliest | ||
282 | * packet of group (2) in 11.4.2. | ||
283 | */ | ||
284 | void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) | ||
291 | { | 285 | { |
292 | unsigned char i; | 286 | struct dccp_ackvec_record *avr, *next; |
293 | struct dccp_ackvec_record *avr; | 287 | u8 runlen_now, eff_runlen; |
288 | s64 delta; | ||
294 | 289 | ||
295 | /* Check if we actually sent an ACK vector */ | 290 | avr = dccp_ackvec_lookup(&av->av_records, ackno); |
296 | if (list_empty(&av->av_records)) | 291 | if (avr == NULL) |
297 | return; | 292 | return; |
293 | /* | ||
294 | * Deal with outdated acknowledgments: this arises when e.g. there are | ||
295 | * several old records and the acks from the peer come in slowly. In | ||
296 | * that case we may still have records that pre-date tail_ackno. | ||
297 | */ | ||
298 | delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); | ||
299 | if (delta < 0) | ||
300 | goto free_records; | ||
301 | /* | ||
302 | * Deal with overlapping Ack Vectors: don't subtract more than the | ||
303 | * number of packets between tail_ackno and ack_ackno. | ||
304 | */ | ||
305 | eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; | ||
298 | 306 | ||
299 | i = len; | 307 | runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); |
300 | /* | 308 | /* |
301 | * XXX | 309 | * The run length of Ack Vector cells does not decrease over time. If |
302 | * I think it might be more efficient to work backwards. See comment on | 310 | * the run length is the same as at the time the Ack Vector was sent, we |
303 | * rcv_ackno. -sorbo. | 311 | * free the ack_ptr cell. That cell can however not be freed if the run |
312 | * length has increased: in this case we need to move the tail pointer | ||
313 | * backwards (towards higher indices), to its next-oldest neighbour. | ||
304 | */ | 314 | */ |
305 | avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); | 315 | if (runlen_now > eff_runlen) { |
306 | while (i--) { | ||
307 | const u8 rl = dccp_ackvec_runlen(vector); | ||
308 | u64 ackno_end_rl; | ||
309 | 316 | ||
310 | dccp_set_seqno(&ackno_end_rl, *ackno - rl); | 317 | av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; |
318 | av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); | ||
311 | 319 | ||
320 | /* This move may not have cleared the overflow flag. */ | ||
321 | if (av->av_overflow) | ||
322 | av->av_overflow = (av->av_buf_head == av->av_buf_tail); | ||
323 | } else { | ||
324 | av->av_buf_tail = avr->avr_ack_ptr; | ||
312 | /* | 325 | /* |
313 | * If our AVR sequence number is greater than the ack, go | 326 | * We have made sure that avr points to a valid cell within the |
314 | * forward in the AVR list until it is not so. | 327 | * buffer. This cell is either older than head, or equals head |
328 | * (empty buffer): in both cases we no longer have any overflow. | ||
315 | */ | 329 | */ |
316 | list_for_each_entry_from(avr, &av->av_records, avr_node) { | 330 | av->av_overflow = 0; |
317 | if (!after48(avr->avr_ack_seqno, *ackno)) | 331 | } |
318 | goto found; | ||
319 | } | ||
320 | /* End of the av_records list, not found, exit */ | ||
321 | break; | ||
322 | found: | ||
323 | if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { | ||
324 | if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) { | ||
325 | dccp_pr_debug("%s ACK vector 0, len=%d, " | ||
326 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
327 | "ACKED!\n", | ||
328 | dccp_role(sk), len, | ||
329 | (unsigned long long) | ||
330 | avr->avr_ack_seqno, | ||
331 | (unsigned long long) | ||
332 | avr->avr_ack_ackno); | ||
333 | dccp_ackvec_throw_record(av, avr); | ||
334 | break; | ||
335 | } | ||
336 | /* | ||
337 | * If it wasn't received, continue scanning... we might | ||
338 | * find another one. | ||
339 | */ | ||
340 | } | ||
341 | 332 | ||
342 | dccp_set_seqno(ackno, ackno_end_rl - 1); | 333 | /* |
343 | ++vector; | 334 | * The peer has acknowledged up to and including ack_ackno. Hence the |
335 | * first packet in group (2) of 11.4.2 is the successor of ack_ackno. | ||
336 | */ | ||
337 | av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); | ||
338 | |||
339 | free_records: | ||
340 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | ||
341 | list_del(&avr->avr_node); | ||
342 | kmem_cache_free(dccp_ackvec_record_slab, avr); | ||
344 | } | 343 | } |
345 | } | 344 | } |
346 | 345 | ||
347 | int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | 346 | /* |
348 | u64 *ackno, const u8 opt, const u8 *value, const u8 len) | 347 | * Routines to keep track of Ack Vectors received in an skb |
348 | */ | ||
349 | int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) | ||
349 | { | 350 | { |
350 | if (len > DCCP_SINGLE_OPT_MAXLEN) | 351 | struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); |
351 | return -1; | 352 | |
353 | if (new == NULL) | ||
354 | return -ENOBUFS; | ||
355 | new->vec = vec; | ||
356 | new->len = len; | ||
357 | new->nonce = nonce; | ||
352 | 358 | ||
353 | /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ | 359 | list_add_tail(&new->node, head); |
354 | dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, | ||
355 | ackno, len, value); | ||
356 | return 0; | 360 | return 0; |
357 | } | 361 | } |
362 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); | ||
363 | |||
364 | void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) | ||
365 | { | ||
366 | struct dccp_ackvec_parsed *cur, *next; | ||
367 | |||
368 | list_for_each_entry_safe(cur, next, parsed_chunks, node) | ||
369 | kfree(cur); | ||
370 | INIT_LIST_HEAD(parsed_chunks); | ||
371 | } | ||
372 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); | ||
358 | 373 | ||
359 | int __init dccp_ackvec_init(void) | 374 | int __init dccp_ackvec_init(void) |
360 | { | 375 | { |
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 23880be8fc29..e2ab0627a5ff 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -29,6 +29,9 @@ | |||
29 | /* Estimated minimum average Ack Vector length - used for updating MPS */ | 29 | /* Estimated minimum average Ack Vector length - used for updating MPS */ |
30 | #define DCCPAV_MIN_OPTLEN 16 | 30 | #define DCCPAV_MIN_OPTLEN 16 |
31 | 31 | ||
32 | /* Threshold for coping with large bursts of losses */ | ||
33 | #define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) | ||
34 | |||
32 | enum dccp_ackvec_states { | 35 | enum dccp_ackvec_states { |
33 | DCCPAV_RECEIVED = 0x00, | 36 | DCCPAV_RECEIVED = 0x00, |
34 | DCCPAV_ECN_MARKED = 0x40, | 37 | DCCPAV_ECN_MARKED = 0x40, |
@@ -61,7 +64,6 @@ static inline u8 dccp_ackvec_state(const u8 *cell) | |||
61 | * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf | 64 | * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf |
62 | * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound | 65 | * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound |
63 | * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) | 66 | * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) |
64 | * @av_veclen: length of the live portion of @av_buf | ||
65 | */ | 67 | */ |
66 | struct dccp_ackvec { | 68 | struct dccp_ackvec { |
67 | u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; | 69 | u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; |
@@ -72,7 +74,6 @@ struct dccp_ackvec { | |||
72 | bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; | 74 | bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; |
73 | u8 av_overflow:1; | 75 | u8 av_overflow:1; |
74 | struct list_head av_records; | 76 | struct list_head av_records; |
75 | u16 av_vec_len; | ||
76 | }; | 77 | }; |
77 | 78 | ||
78 | /** struct dccp_ackvec_record - Records information about sent Ack Vectors | 79 | /** struct dccp_ackvec_record - Records information about sent Ack Vectors |
@@ -98,29 +99,38 @@ struct dccp_ackvec_record { | |||
98 | u8 avr_ack_nonce:1; | 99 | u8 avr_ack_nonce:1; |
99 | }; | 100 | }; |
100 | 101 | ||
101 | struct sock; | ||
102 | struct sk_buff; | ||
103 | |||
104 | extern int dccp_ackvec_init(void); | 102 | extern int dccp_ackvec_init(void); |
105 | extern void dccp_ackvec_exit(void); | 103 | extern void dccp_ackvec_exit(void); |
106 | 104 | ||
107 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); | 105 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); |
108 | extern void dccp_ackvec_free(struct dccp_ackvec *av); | 106 | extern void dccp_ackvec_free(struct dccp_ackvec *av); |
109 | 107 | ||
110 | extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 108 | extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); |
111 | const u64 ackno, const u8 state); | ||
112 | |||
113 | extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | ||
114 | struct sock *sk, const u64 ackno); | ||
115 | extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
116 | u64 *ackno, const u8 opt, | ||
117 | const u8 *value, const u8 len); | ||
118 | |||
119 | extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); | 109 | extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); |
110 | extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); | ||
120 | extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); | 111 | extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); |
121 | 112 | ||
122 | static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) | 113 | static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) |
123 | { | 114 | { |
124 | return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; | 115 | return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; |
125 | } | 116 | } |
117 | |||
118 | /** | ||
119 | * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb | ||
120 | * @vec: start of vector (offset into skb) | ||
121 | * @len: length of @vec | ||
122 | * @nonce: whether @vec had an ECN nonce of 0 or 1 | ||
123 | * @node: FIFO - arranged in descending order of ack_ackno | ||
124 | * This structure is used by CCIDs to access Ack Vectors in a received skb. | ||
125 | */ | ||
126 | struct dccp_ackvec_parsed { | ||
127 | u8 *vec, | ||
128 | len, | ||
129 | nonce:1; | ||
130 | struct list_head node; | ||
131 | }; | ||
132 | |||
133 | extern int dccp_ackvec_parsed_add(struct list_head *head, | ||
134 | u8 *vec, u8 len, u8 nonce); | ||
135 | extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); | ||
126 | #endif /* _ACKVEC_H */ | 136 | #endif /* _ACKVEC_H */ |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index cb1b4a0d1877..e96d5e810039 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | |||
246 | #endif | 246 | #endif |
247 | } | 247 | } |
248 | 248 | ||
249 | /* XXX Lame code duplication! | ||
250 | * returns -1 if none was found. | ||
251 | * else returns the next offset to use in the function call. | ||
252 | */ | ||
253 | static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, | ||
254 | unsigned char **vec, unsigned char *veclen) | ||
255 | { | ||
256 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
257 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
258 | unsigned char *opt_ptr; | ||
259 | const unsigned char *opt_end = (unsigned char *)dh + | ||
260 | (dh->dccph_doff * 4); | ||
261 | unsigned char opt, len; | ||
262 | unsigned char *value; | ||
263 | |||
264 | BUG_ON(offset < 0); | ||
265 | options += offset; | ||
266 | opt_ptr = options; | ||
267 | if (opt_ptr >= opt_end) | ||
268 | return -1; | ||
269 | |||
270 | while (opt_ptr != opt_end) { | ||
271 | opt = *opt_ptr++; | ||
272 | len = 0; | ||
273 | value = NULL; | ||
274 | |||
275 | /* Check if this isn't a single byte option */ | ||
276 | if (opt > DCCPO_MAX_RESERVED) { | ||
277 | if (opt_ptr == opt_end) | ||
278 | goto out_invalid_option; | ||
279 | |||
280 | len = *opt_ptr++; | ||
281 | if (len < 3) | ||
282 | goto out_invalid_option; | ||
283 | /* | ||
284 | * Remove the type and len fields, leaving | ||
285 | * just the value size | ||
286 | */ | ||
287 | len -= 2; | ||
288 | value = opt_ptr; | ||
289 | opt_ptr += len; | ||
290 | |||
291 | if (opt_ptr > opt_end) | ||
292 | goto out_invalid_option; | ||
293 | } | ||
294 | |||
295 | switch (opt) { | ||
296 | case DCCPO_ACK_VECTOR_0: | ||
297 | case DCCPO_ACK_VECTOR_1: | ||
298 | *vec = value; | ||
299 | *veclen = len; | ||
300 | return offset + (opt_ptr - options); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | return -1; | ||
305 | |||
306 | out_invalid_option: | ||
307 | DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); | ||
308 | return -1; | ||
309 | } | ||
310 | |||
311 | /** | 249 | /** |
312 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm | 250 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
313 | * This code is almost identical with TCP's tcp_rtt_estimator(), since | 251 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | |||
432 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); | 370 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); |
433 | } | 371 | } |
434 | 372 | ||
373 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, | ||
374 | u8 option, u8 *optval, u8 optlen) | ||
375 | { | ||
376 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
377 | |||
378 | switch (option) { | ||
379 | case DCCPO_ACK_VECTOR_0: | ||
380 | case DCCPO_ACK_VECTOR_1: | ||
381 | return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, | ||
382 | option - DCCPO_ACK_VECTOR_0); | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
386 | |||
435 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 387 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
436 | { | 388 | { |
437 | struct dccp_sock *dp = dccp_sk(sk); | 389 | struct dccp_sock *dp = dccp_sk(sk); |
438 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 390 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
439 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); | 391 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); |
392 | struct dccp_ackvec_parsed *avp; | ||
440 | u64 ackno, seqno; | 393 | u64 ackno, seqno; |
441 | struct ccid2_seq *seqp; | 394 | struct ccid2_seq *seqp; |
442 | unsigned char *vector; | ||
443 | unsigned char veclen; | ||
444 | int offset = 0; | ||
445 | int done = 0; | 395 | int done = 0; |
446 | unsigned int maxincr = 0; | 396 | unsigned int maxincr = 0; |
447 | 397 | ||
@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
475 | } | 425 | } |
476 | 426 | ||
477 | /* check forward path congestion */ | 427 | /* check forward path congestion */ |
478 | /* still didn't send out new data packets */ | 428 | if (dccp_packet_without_ack(skb)) |
479 | if (hc->tx_seqh == hc->tx_seqt) | ||
480 | return; | 429 | return; |
481 | 430 | ||
482 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 431 | /* still didn't send out new data packets */ |
483 | case DCCP_PKT_ACK: | 432 | if (hc->tx_seqh == hc->tx_seqt) |
484 | case DCCP_PKT_DATAACK: | 433 | goto done; |
485 | break; | ||
486 | default: | ||
487 | return; | ||
488 | } | ||
489 | 434 | ||
490 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | 435 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; |
491 | if (after48(ackno, hc->tx_high_ack)) | 436 | if (after48(ackno, hc->tx_high_ack)) |
@@ -509,15 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
509 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); | 454 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
510 | 455 | ||
511 | /* go through all ack vectors */ | 456 | /* go through all ack vectors */ |
512 | while ((offset = ccid2_ackvector(sk, skb, offset, | 457 | list_for_each_entry(avp, &hc->tx_av_chunks, node) { |
513 | &vector, &veclen)) != -1) { | ||
514 | /* go through this ack vector */ | 458 | /* go through this ack vector */ |
515 | while (veclen--) { | 459 | for (; avp->len--; avp->vec++) { |
516 | u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector)); | 460 | u64 ackno_end_rl = SUB48(ackno, |
461 | dccp_ackvec_runlen(avp->vec)); | ||
517 | 462 | ||
518 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", | 463 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", |
519 | (unsigned long long)ackno, | 464 | (unsigned long long)ackno, |
520 | (unsigned long long)ackno_end_rl); | 465 | dccp_ackvec_state(avp->vec) >> 6, |
466 | dccp_ackvec_runlen(avp->vec)); | ||
521 | /* if the seqno we are analyzing is larger than the | 467 | /* if the seqno we are analyzing is larger than the |
522 | * current ackno, then move towards the tail of our | 468 | * current ackno, then move towards the tail of our |
523 | * seqnos. | 469 | * seqnos. |
@@ -536,7 +482,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
536 | * run length | 482 | * run length |
537 | */ | 483 | */ |
538 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 484 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
539 | const u8 state = dccp_ackvec_state(vector); | 485 | const u8 state = dccp_ackvec_state(avp->vec); |
540 | 486 | ||
541 | /* new packet received or marked */ | 487 | /* new packet received or marked */ |
542 | if (state != DCCPAV_NOT_RECEIVED && | 488 | if (state != DCCPAV_NOT_RECEIVED && |
@@ -563,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
563 | break; | 509 | break; |
564 | 510 | ||
565 | ackno = SUB48(ackno_end_rl, 1); | 511 | ackno = SUB48(ackno_end_rl, 1); |
566 | vector++; | ||
567 | } | 512 | } |
568 | if (done) | 513 | if (done) |
569 | break; | 514 | break; |
@@ -631,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
631 | sk_stop_timer(sk, &hc->tx_rtotimer); | 576 | sk_stop_timer(sk, &hc->tx_rtotimer); |
632 | else | 577 | else |
633 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | 578 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
634 | 579 | done: | |
635 | /* check if incoming Acks allow pending packets to be sent */ | 580 | /* check if incoming Acks allow pending packets to be sent */ |
636 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) | 581 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) |
637 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | 582 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); |
583 | dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); | ||
638 | } | 584 | } |
639 | 585 | ||
640 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 586 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -663,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
663 | hc->tx_last_cong = ccid2_time_stamp; | 609 | hc->tx_last_cong = ccid2_time_stamp; |
664 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 610 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
665 | (unsigned long)sk); | 611 | (unsigned long)sk); |
612 | INIT_LIST_HEAD(&hc->tx_av_chunks); | ||
666 | return 0; | 613 | return 0; |
667 | } | 614 | } |
668 | 615 | ||
@@ -696,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
696 | } | 643 | } |
697 | 644 | ||
698 | struct ccid_operations ccid2_ops = { | 645 | struct ccid_operations ccid2_ops = { |
699 | .ccid_id = DCCPC_CCID2, | 646 | .ccid_id = DCCPC_CCID2, |
700 | .ccid_name = "TCP-like", | 647 | .ccid_name = "TCP-like", |
701 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 648 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
702 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 649 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
703 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | 650 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, |
704 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | 651 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, |
705 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | 652 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, |
706 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | 653 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, |
707 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | 654 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, |
708 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | 655 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), |
656 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | ||
709 | }; | 657 | }; |
710 | 658 | ||
711 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 659 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 25cb6b216eda..e9985dafc2c7 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -55,6 +55,7 @@ struct ccid2_seq { | |||
55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight | 55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight |
56 | * @tx_rpseq: last consecutive seqno | 56 | * @tx_rpseq: last consecutive seqno |
57 | * @tx_rpdupack: dupacks since rpseq | 57 | * @tx_rpdupack: dupacks since rpseq |
58 | * @tx_av_chunks: list of Ack Vectors received on current skb | ||
58 | */ | 59 | */ |
59 | struct ccid2_hc_tx_sock { | 60 | struct ccid2_hc_tx_sock { |
60 | u32 tx_cwnd; | 61 | u32 tx_cwnd; |
@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock { | |||
79 | int tx_rpdupack; | 80 | int tx_rpdupack; |
80 | u32 tx_last_cong; | 81 | u32 tx_last_cong; |
81 | u64 tx_high_ack; | 82 | u64 tx_high_ack; |
83 | struct list_head tx_av_chunks; | ||
82 | }; | 84 | }; |
83 | 85 | ||
84 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) | 86 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 19fafd597465..45087052d894 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -93,9 +93,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); | |||
93 | #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) | 93 | #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) |
94 | #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) | 94 | #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) |
95 | 95 | ||
96 | /* Maximal interval between probes for local resources. */ | ||
97 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
98 | |||
99 | /* sysctl variables for DCCP */ | 96 | /* sysctl variables for DCCP */ |
100 | extern int sysctl_dccp_request_retries; | 97 | extern int sysctl_dccp_request_retries; |
101 | extern int sysctl_dccp_retries1; | 98 | extern int sysctl_dccp_retries1; |
@@ -203,12 +200,7 @@ struct dccp_mib { | |||
203 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | 200 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); |
204 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | 201 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) |
205 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | 202 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) |
206 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
207 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | 203 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) |
208 | #define DCCP_ADD_STATS_BH(field, val) \ | ||
209 | SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
210 | #define DCCP_ADD_STATS_USER(field, val) \ | ||
211 | SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
212 | 204 | ||
213 | /* | 205 | /* |
214 | * Checksumming routines | 206 | * Checksumming routines |
@@ -243,6 +235,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
243 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 235 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
244 | const enum dccp_pkt_type pkt_type); | 236 | const enum dccp_pkt_type pkt_type); |
245 | 237 | ||
238 | /* | ||
239 | * TX Packet Dequeueing Interface | ||
240 | */ | ||
241 | extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); | ||
242 | extern bool dccp_qpolicy_full(struct sock *sk); | ||
243 | extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); | ||
244 | extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); | ||
245 | extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); | ||
246 | extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); | ||
247 | |||
248 | /* | ||
249 | * TX Packet Output and TX Timers | ||
250 | */ | ||
246 | extern void dccp_write_xmit(struct sock *sk); | 251 | extern void dccp_write_xmit(struct sock *sk); |
247 | extern void dccp_write_space(struct sock *sk); | 252 | extern void dccp_write_space(struct sock *sk); |
248 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); | 253 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); |
diff --git a/net/dccp/input.c b/net/dccp/input.c index c7aeeba859d4..15af247ea007 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) | |||
160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | 160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); |
161 | } | 161 | } |
162 | 162 | ||
163 | static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | 163 | static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) |
164 | { | 164 | { |
165 | struct dccp_sock *dp = dccp_sk(sk); | 165 | struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; |
166 | 166 | ||
167 | if (dp->dccps_hc_rx_ackvec != NULL) | 167 | if (av == NULL) |
168 | dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, | 168 | return; |
169 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 169 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
170 | dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
171 | dccp_ackvec_input(av, skb); | ||
170 | } | 172 | } |
171 | 173 | ||
172 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) | 174 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) |
@@ -239,7 +241,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | |||
239 | dccp_update_gsr(sk, seqno); | 241 | dccp_update_gsr(sk, seqno); |
240 | 242 | ||
241 | if (dh->dccph_type != DCCP_PKT_SYNC && | 243 | if (dh->dccph_type != DCCP_PKT_SYNC && |
242 | (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) | 244 | ackno != DCCP_PKT_WITHOUT_ACK_SEQ && |
245 | after48(ackno, dp->dccps_gar)) | ||
243 | dp->dccps_gar = ackno; | 246 | dp->dccps_gar = ackno; |
244 | } else { | 247 | } else { |
245 | unsigned long now = jiffies; | 248 | unsigned long now = jiffies; |
@@ -365,21 +368,13 @@ discard: | |||
365 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | 368 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, |
366 | const struct dccp_hdr *dh, const unsigned len) | 369 | const struct dccp_hdr *dh, const unsigned len) |
367 | { | 370 | { |
368 | struct dccp_sock *dp = dccp_sk(sk); | ||
369 | |||
370 | if (dccp_check_seqno(sk, skb)) | 371 | if (dccp_check_seqno(sk, skb)) |
371 | goto discard; | 372 | goto discard; |
372 | 373 | ||
373 | if (dccp_parse_options(sk, NULL, skb)) | 374 | if (dccp_parse_options(sk, NULL, skb)) |
374 | return 1; | 375 | return 1; |
375 | 376 | ||
376 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 377 | dccp_handle_ackvec_processing(sk, skb); |
377 | dccp_event_ack_recv(sk, skb); | ||
378 | |||
379 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
380 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
381 | DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED)) | ||
382 | goto discard; | ||
383 | dccp_deliver_input_to_ccids(sk, skb); | 378 | dccp_deliver_input_to_ccids(sk, skb); |
384 | 379 | ||
385 | return __dccp_rcv_established(sk, skb, dh, len); | 380 | return __dccp_rcv_established(sk, skb, dh, len); |
@@ -631,14 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
631 | if (dccp_parse_options(sk, NULL, skb)) | 626 | if (dccp_parse_options(sk, NULL, skb)) |
632 | return 1; | 627 | return 1; |
633 | 628 | ||
634 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 629 | dccp_handle_ackvec_processing(sk, skb); |
635 | dccp_event_ack_recv(sk, skb); | ||
636 | |||
637 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
638 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
639 | DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED)) | ||
640 | goto discard; | ||
641 | |||
642 | dccp_deliver_input_to_ccids(sk, skb); | 630 | dccp_deliver_input_to_ccids(sk, skb); |
643 | } | 631 | } |
644 | 632 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f69ea114829..45a434f94169 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, | |||
462 | { | 462 | { |
463 | struct rtable *rt; | 463 | struct rtable *rt; |
464 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, | 464 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, |
465 | .nl_u = { .ip4_u = | 465 | .fl4_dst = ip_hdr(skb)->saddr, |
466 | { .daddr = ip_hdr(skb)->saddr, | 466 | .fl4_src = ip_hdr(skb)->daddr, |
467 | .saddr = ip_hdr(skb)->daddr, | 467 | .fl4_tos = RT_CONN_FLAGS(sk), |
468 | .tos = RT_CONN_FLAGS(sk) } }, | ||
469 | .proto = sk->sk_protocol, | 468 | .proto = sk->sk_protocol, |
470 | .uli_u = { .ports = | 469 | .fl_ip_sport = dccp_hdr(skb)->dccph_dport, |
471 | { .sport = dccp_hdr(skb)->dccph_dport, | 470 | .fl_ip_dport = dccp_hdr(skb)->dccph_sport |
472 | .dport = dccp_hdr(skb)->dccph_sport } | ||
473 | } | ||
474 | }; | 471 | }; |
475 | 472 | ||
476 | security_skb_classify_flow(skb, &fl); | 473 | security_skb_classify_flow(skb, &fl); |
diff --git a/net/dccp/options.c b/net/dccp/options.c index 5adeeed5e0d2..f06ffcfc8d71 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
54 | struct dccp_sock *dp = dccp_sk(sk); | 54 | struct dccp_sock *dp = dccp_sk(sk); |
55 | const struct dccp_hdr *dh = dccp_hdr(skb); | 55 | const struct dccp_hdr *dh = dccp_hdr(skb); |
56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | 56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; |
57 | u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
58 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | 57 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); |
59 | unsigned char *opt_ptr = options; | 58 | unsigned char *opt_ptr = options; |
60 | const unsigned char *opt_end = (unsigned char *)dh + | 59 | const unsigned char *opt_end = (unsigned char *)dh + |
@@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
129 | if (rc) | 128 | if (rc) |
130 | goto out_featneg_failed; | 129 | goto out_featneg_failed; |
131 | break; | 130 | break; |
132 | case DCCPO_ACK_VECTOR_0: | ||
133 | case DCCPO_ACK_VECTOR_1: | ||
134 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | ||
135 | break; | ||
136 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
137 | dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) | ||
138 | goto out_invalid_option; | ||
139 | break; | ||
140 | case DCCPO_TIMESTAMP: | 131 | case DCCPO_TIMESTAMP: |
141 | if (len != 4) | 132 | if (len != 4) |
142 | goto out_invalid_option; | 133 | goto out_invalid_option; |
@@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
226 | pkt_type, opt, value, len)) | 217 | pkt_type, opt, value, len)) |
227 | goto out_invalid_option; | 218 | goto out_invalid_option; |
228 | break; | 219 | break; |
220 | case DCCPO_ACK_VECTOR_0: | ||
221 | case DCCPO_ACK_VECTOR_1: | ||
222 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | ||
223 | break; | ||
224 | /* | ||
225 | * Ack vectors are processed by the TX CCID if it is | ||
226 | * interested. The RX CCID need not parse Ack Vectors, | ||
227 | * since it is only interested in clearing old state. | ||
228 | * Fall through. | ||
229 | */ | ||
229 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: | 230 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: |
230 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | 231 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, |
231 | pkt_type, opt, value, len)) | 232 | pkt_type, opt, value, len)) |
@@ -429,6 +430,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
429 | { | 430 | { |
430 | struct dccp_sock *dp = dccp_sk(sk); | 431 | struct dccp_sock *dp = dccp_sk(sk); |
431 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | 432 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; |
433 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
432 | const u16 buflen = dccp_ackvec_buflen(av); | 434 | const u16 buflen = dccp_ackvec_buflen(av); |
433 | /* Figure out how many options do we need to represent the ackvec */ | 435 | /* Figure out how many options do we need to represent the ackvec */ |
434 | const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); | 436 | const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); |
@@ -437,10 +439,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
437 | const unsigned char *tail, *from; | 439 | const unsigned char *tail, *from; |
438 | unsigned char *to; | 440 | unsigned char *to; |
439 | 441 | ||
440 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | 442 | if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { |
443 | DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, | ||
444 | dccp_packet_name(dcb->dccpd_type)); | ||
441 | return -1; | 445 | return -1; |
442 | 446 | } | |
443 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | 447 | /* |
448 | * Since Ack Vectors are variable-length, we can not always predict | ||
449 | * their size. To catch exception cases where the space is running out | ||
450 | * on the skb, a separate Sync is scheduled to carry the Ack Vector. | ||
451 | */ | ||
452 | if (len > DCCPAV_MIN_OPTLEN && | ||
453 | len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { | ||
454 | DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " | ||
455 | "MPS=%u ==> reduce payload size?\n", len, skb->len, | ||
456 | dcb->dccpd_opt_len, dp->dccps_mss_cache); | ||
457 | dp->dccps_sync_scheduled = 1; | ||
458 | return 0; | ||
459 | } | ||
460 | dcb->dccpd_opt_len += len; | ||
444 | 461 | ||
445 | to = skb_push(skb, len); | 462 | to = skb_push(skb, len); |
446 | len = buflen; | 463 | len = buflen; |
@@ -481,7 +498,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
481 | /* | 498 | /* |
482 | * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. | 499 | * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. |
483 | */ | 500 | */ |
484 | if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) | 501 | if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) |
485 | return -ENOBUFS; | 502 | return -ENOBUFS; |
486 | return 0; | 503 | return 0; |
487 | } | 504 | } |
diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5ae..784d30210543 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) | |||
242 | { | 242 | { |
243 | int err, len; | 243 | int err, len; |
244 | struct dccp_sock *dp = dccp_sk(sk); | 244 | struct dccp_sock *dp = dccp_sk(sk); |
245 | struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); | 245 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
246 | 246 | ||
247 | if (unlikely(skb == NULL)) | 247 | if (unlikely(skb == NULL)) |
248 | return; | 248 | return; |
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) | |||
283 | * any local drop will eventually be reported via receiver feedback. | 283 | * any local drop will eventually be reported via receiver feedback. |
284 | */ | 284 | */ |
285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | 285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); |
286 | |||
287 | /* | ||
288 | * If the CCID needs to transfer additional header options out-of-band | ||
289 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
290 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
291 | * currently pending header options, thus clearing the backlog. | ||
292 | */ | ||
293 | if (dp->dccps_sync_scheduled) | ||
294 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
286 | } | 295 | } |
287 | 296 | ||
288 | /** | 297 | /** |
@@ -336,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) | |||
336 | struct dccp_sock *dp = dccp_sk(sk); | 345 | struct dccp_sock *dp = dccp_sk(sk); |
337 | struct sk_buff *skb; | 346 | struct sk_buff *skb; |
338 | 347 | ||
339 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 348 | while ((skb = dccp_qpolicy_top(sk))) { |
340 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 349 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
341 | 350 | ||
342 | switch (ccid_packet_dequeue_eval(rc)) { | 351 | switch (ccid_packet_dequeue_eval(rc)) { |
@@ -350,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) | |||
350 | dccp_xmit_packet(sk); | 359 | dccp_xmit_packet(sk); |
351 | break; | 360 | break; |
352 | case CCID_PACKET_ERR: | 361 | case CCID_PACKET_ERR: |
353 | skb_dequeue(&sk->sk_write_queue); | 362 | dccp_qpolicy_drop(sk, skb); |
354 | kfree_skb(skb); | ||
355 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | 363 | dccp_pr_debug("packet discarded due to err=%d\n", rc); |
356 | } | 364 | } |
357 | } | 365 | } |
@@ -636,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, | |||
636 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | 644 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; |
637 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; | 645 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; |
638 | 646 | ||
647 | /* | ||
648 | * Clear the flag in case the Sync was scheduled for out-of-band data, | ||
649 | * such as carrying a long Ack Vector. | ||
650 | */ | ||
651 | dccp_sk(sk)->dccps_sync_scheduled = 0; | ||
652 | |||
639 | dccp_transmit_skb(sk, skb); | 653 | dccp_transmit_skb(sk, skb); |
640 | } | 654 | } |
641 | 655 | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fcea..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | 186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; |
187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | 187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; |
188 | dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; | ||
188 | 189 | ||
189 | dccp_init_xmit_timers(sk); | 190 | dccp_init_xmit_timers(sk); |
190 | 191 | ||
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
532 | case DCCP_SOCKOPT_RECV_CSCOV: | 533 | case DCCP_SOCKOPT_RECV_CSCOV: |
533 | err = dccp_setsockopt_cscov(sk, val, true); | 534 | err = dccp_setsockopt_cscov(sk, val, true); |
534 | break; | 535 | break; |
536 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
537 | if (sk->sk_state != DCCP_CLOSED) | ||
538 | err = -EISCONN; | ||
539 | else if (val < 0 || val >= DCCPQ_POLICY_MAX) | ||
540 | err = -EINVAL; | ||
541 | else | ||
542 | dp->dccps_qpolicy = val; | ||
543 | break; | ||
544 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
545 | if (val < 0) | ||
546 | err = -EINVAL; | ||
547 | else | ||
548 | dp->dccps_tx_qlen = val; | ||
549 | break; | ||
535 | default: | 550 | default: |
536 | err = -ENOPROTOOPT; | 551 | err = -ENOPROTOOPT; |
537 | break; | 552 | break; |
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
639 | case DCCP_SOCKOPT_RECV_CSCOV: | 654 | case DCCP_SOCKOPT_RECV_CSCOV: |
640 | val = dp->dccps_pcrlen; | 655 | val = dp->dccps_pcrlen; |
641 | break; | 656 | break; |
657 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
658 | val = dp->dccps_qpolicy; | ||
659 | break; | ||
660 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
661 | val = dp->dccps_tx_qlen; | ||
662 | break; | ||
642 | case 128 ... 191: | 663 | case 128 ... 191: |
643 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 664 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
644 | len, (u32 __user *)optval, optlen); | 665 | len, (u32 __user *)optval, optlen); |
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
681 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); | 702 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); |
682 | #endif | 703 | #endif |
683 | 704 | ||
705 | static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) | ||
706 | { | ||
707 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); | ||
708 | |||
709 | /* | ||
710 | * Assign an (opaque) qpolicy priority value to skb->priority. | ||
711 | * | ||
712 | * We are overloading this skb field for use with the qpolicy subystem. | ||
713 | * The skb->priority is normally used for the SO_PRIORITY option, which | ||
714 | * is initialised from sk_priority. Since the assignment of sk_priority | ||
715 | * to skb->priority happens later (on layer 3), we overload this field | ||
716 | * for use with queueing priorities as long as the skb is on layer 4. | ||
717 | * The default priority value (if nothing is set) is 0. | ||
718 | */ | ||
719 | skb->priority = 0; | ||
720 | |||
721 | for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
722 | |||
723 | if (!CMSG_OK(msg, cmsg)) | ||
724 | return -EINVAL; | ||
725 | |||
726 | if (cmsg->cmsg_level != SOL_DCCP) | ||
727 | continue; | ||
728 | |||
729 | if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && | ||
730 | !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) | ||
731 | return -EINVAL; | ||
732 | |||
733 | switch (cmsg->cmsg_type) { | ||
734 | case DCCP_SCM_PRIORITY: | ||
735 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) | ||
736 | return -EINVAL; | ||
737 | skb->priority = *(__u32 *)CMSG_DATA(cmsg); | ||
738 | break; | ||
739 | default: | ||
740 | return -EINVAL; | ||
741 | } | ||
742 | } | ||
743 | return 0; | ||
744 | } | ||
745 | |||
684 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 746 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
685 | size_t len) | 747 | size_t len) |
686 | { | 748 | { |
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
696 | 758 | ||
697 | lock_sock(sk); | 759 | lock_sock(sk); |
698 | 760 | ||
699 | if (sysctl_dccp_tx_qlen && | 761 | if (dccp_qpolicy_full(sk)) { |
700 | (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { | ||
701 | rc = -EAGAIN; | 762 | rc = -EAGAIN; |
702 | goto out_release; | 763 | goto out_release; |
703 | } | 764 | } |
@@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
725 | if (rc != 0) | 786 | if (rc != 0) |
726 | goto out_discard; | 787 | goto out_discard; |
727 | 788 | ||
728 | skb_queue_tail(&sk->sk_write_queue, skb); | 789 | rc = dccp_msghdr_parse(msg, skb); |
790 | if (rc != 0) | ||
791 | goto out_discard; | ||
792 | |||
793 | dccp_qpolicy_push(sk, skb); | ||
729 | /* | 794 | /* |
730 | * The xmit_timer is set if the TX CCID is rate-based and will expire | 795 | * The xmit_timer is set if the TX CCID is rate-based and will expire |
731 | * when congestion control permits to release further packets into the | 796 | * when congestion control permits to release further packets into the |
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/qpolicy.c | ||
3 | * | ||
4 | * Policy-based packet dequeueing interface for DCCP. | ||
5 | * | ||
6 | * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License v2 | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include "dccp.h" | ||
13 | |||
14 | /* | ||
15 | * Simple Dequeueing Policy: | ||
16 | * If tx_qlen is different from 0, enqueue up to tx_qlen elements. | ||
17 | */ | ||
18 | static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) | ||
19 | { | ||
20 | skb_queue_tail(&sk->sk_write_queue, skb); | ||
21 | } | ||
22 | |||
23 | static bool qpolicy_simple_full(struct sock *sk) | ||
24 | { | ||
25 | return dccp_sk(sk)->dccps_tx_qlen && | ||
26 | sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; | ||
27 | } | ||
28 | |||
29 | static struct sk_buff *qpolicy_simple_top(struct sock *sk) | ||
30 | { | ||
31 | return skb_peek(&sk->sk_write_queue); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * Priority-based Dequeueing Policy: | ||
36 | * If tx_qlen is different from 0 and the queue has reached its upper bound | ||
37 | * of tx_qlen elements, replace older packets lowest-priority-first. | ||
38 | */ | ||
39 | static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) | ||
40 | { | ||
41 | struct sk_buff *skb, *best = NULL; | ||
42 | |||
43 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
44 | if (best == NULL || skb->priority > best->priority) | ||
45 | best = skb; | ||
46 | return best; | ||
47 | } | ||
48 | |||
49 | static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) | ||
50 | { | ||
51 | struct sk_buff *skb, *worst = NULL; | ||
52 | |||
53 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
54 | if (worst == NULL || skb->priority < worst->priority) | ||
55 | worst = skb; | ||
56 | return worst; | ||
57 | } | ||
58 | |||
59 | static bool qpolicy_prio_full(struct sock *sk) | ||
60 | { | ||
61 | if (qpolicy_simple_full(sk)) | ||
62 | dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); | ||
63 | return false; | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface | ||
68 | * @push: add a new @skb to the write queue | ||
69 | * @full: indicates that no more packets will be admitted | ||
70 | * @top: peeks at whatever the queueing policy defines as its `top' | ||
71 | */ | ||
72 | static struct dccp_qpolicy_operations { | ||
73 | void (*push) (struct sock *sk, struct sk_buff *skb); | ||
74 | bool (*full) (struct sock *sk); | ||
75 | struct sk_buff* (*top) (struct sock *sk); | ||
76 | __be32 params; | ||
77 | |||
78 | } qpol_table[DCCPQ_POLICY_MAX] = { | ||
79 | [DCCPQ_POLICY_SIMPLE] = { | ||
80 | .push = qpolicy_simple_push, | ||
81 | .full = qpolicy_simple_full, | ||
82 | .top = qpolicy_simple_top, | ||
83 | .params = 0, | ||
84 | }, | ||
85 | [DCCPQ_POLICY_PRIO] = { | ||
86 | .push = qpolicy_simple_push, | ||
87 | .full = qpolicy_prio_full, | ||
88 | .top = qpolicy_prio_best_skb, | ||
89 | .params = DCCP_SCM_PRIORITY, | ||
90 | }, | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * Externally visible interface | ||
95 | */ | ||
96 | void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) | ||
97 | { | ||
98 | qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); | ||
99 | } | ||
100 | |||
101 | bool dccp_qpolicy_full(struct sock *sk) | ||
102 | { | ||
103 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); | ||
104 | } | ||
105 | |||
106 | void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) | ||
107 | { | ||
108 | if (skb != NULL) { | ||
109 | skb_unlink(skb, &sk->sk_write_queue); | ||
110 | kfree_skb(skb); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | struct sk_buff *dccp_qpolicy_top(struct sock *sk) | ||
115 | { | ||
116 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); | ||
117 | } | ||
118 | |||
119 | struct sk_buff *dccp_qpolicy_pop(struct sock *sk) | ||
120 | { | ||
121 | struct sk_buff *skb = dccp_qpolicy_top(sk); | ||
122 | |||
123 | if (skb != NULL) { | ||
124 | /* Clear any skb fields that we used internally */ | ||
125 | skb->priority = 0; | ||
126 | skb_unlink(skb, &sk->sk_write_queue); | ||
127 | } | ||
128 | return skb; | ||
129 | } | ||
130 | |||
131 | bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) | ||
132 | { | ||
133 | /* check if exactly one bit is set */ | ||
134 | if (!param || (param & (param - 1))) | ||
135 | return false; | ||
136 | return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; | ||
137 | } | ||