diff options
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Makefile | 4 | ||||
-rw-r--r-- | net/dccp/ackvec.c | 616 | ||||
-rw-r--r-- | net/dccp/ackvec.h | 151 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 143 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 2 | ||||
-rw-r--r-- | net/dccp/dccp.h | 24 | ||||
-rw-r--r-- | net/dccp/input.c | 33 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 13 | ||||
-rw-r--r-- | net/dccp/options.c | 100 | ||||
-rw-r--r-- | net/dccp/output.c | 22 | ||||
-rw-r--r-- | net/dccp/proto.c | 71 | ||||
-rw-r--r-- | net/dccp/qpolicy.c | 137 |
12 files changed, 755 insertions, 561 deletions
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o | 1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o |
2 | 2 | ||
3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o | 3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ |
4 | 4 | qpolicy.o | |
5 | # | 5 | # |
6 | # CCID algorithms to be used by dccp.ko | 6 | # CCID algorithms to be used by dccp.ko |
7 | # | 7 | # |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 92a6fcb40d7d..25b7a8d1ad58 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -1,444 +1,375 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/ackvec.c | 2 | * net/dccp/ackvec.c |
3 | * | 3 | * |
4 | * An implementation of the DCCP protocol | 4 | * An implementation of Ack Vectors for the DCCP protocol |
5 | * Copyright (c) 2007 University of Aberdeen, Scotland, UK | ||
5 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> |
6 | * | 7 | * |
7 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License as published by the | 9 | * under the terms of the GNU General Public License as published by the |
9 | * Free Software Foundation; version 2 of the License; | 10 | * Free Software Foundation; version 2 of the License; |
10 | */ | 11 | */ |
11 | |||
12 | #include "ackvec.h" | ||
13 | #include "dccp.h" | 12 | #include "dccp.h" |
14 | |||
15 | #include <linux/init.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
20 | 15 | ||
21 | #include <net/sock.h> | ||
22 | |||
23 | static struct kmem_cache *dccp_ackvec_slab; | 16 | static struct kmem_cache *dccp_ackvec_slab; |
24 | static struct kmem_cache *dccp_ackvec_record_slab; | 17 | static struct kmem_cache *dccp_ackvec_record_slab; |
25 | 18 | ||
26 | static struct dccp_ackvec_record *dccp_ackvec_record_new(void) | 19 | struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) |
27 | { | 20 | { |
28 | struct dccp_ackvec_record *avr = | 21 | struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority); |
29 | kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); | ||
30 | 22 | ||
31 | if (avr != NULL) | 23 | if (av != NULL) { |
32 | INIT_LIST_HEAD(&avr->avr_node); | 24 | av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1; |
33 | 25 | INIT_LIST_HEAD(&av->av_records); | |
34 | return avr; | 26 | } |
27 | return av; | ||
35 | } | 28 | } |
36 | 29 | ||
37 | static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) | 30 | static void dccp_ackvec_purge_records(struct dccp_ackvec *av) |
38 | { | 31 | { |
39 | if (unlikely(avr == NULL)) | 32 | struct dccp_ackvec_record *cur, *next; |
40 | return; | 33 | |
41 | /* Check if deleting a linked record */ | 34 | list_for_each_entry_safe(cur, next, &av->av_records, avr_node) |
42 | WARN_ON(!list_empty(&avr->avr_node)); | 35 | kmem_cache_free(dccp_ackvec_record_slab, cur); |
43 | kmem_cache_free(dccp_ackvec_record_slab, avr); | 36 | INIT_LIST_HEAD(&av->av_records); |
44 | } | 37 | } |
45 | 38 | ||
46 | static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, | 39 | void dccp_ackvec_free(struct dccp_ackvec *av) |
47 | struct dccp_ackvec_record *avr) | ||
48 | { | 40 | { |
49 | /* | 41 | if (likely(av != NULL)) { |
50 | * AVRs are sorted by seqno. Since we are sending them in order, we | 42 | dccp_ackvec_purge_records(av); |
51 | * just add the AVR at the head of the list. | 43 | kmem_cache_free(dccp_ackvec_slab, av); |
52 | * -sorbo. | ||
53 | */ | ||
54 | if (!list_empty(&av->av_records)) { | ||
55 | const struct dccp_ackvec_record *head = | ||
56 | list_entry(av->av_records.next, | ||
57 | struct dccp_ackvec_record, | ||
58 | avr_node); | ||
59 | BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno)); | ||
60 | } | 44 | } |
61 | |||
62 | list_add(&avr->avr_node, &av->av_records); | ||
63 | } | 45 | } |
64 | 46 | ||
65 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | 47 | /** |
48 | * dccp_ackvec_update_records - Record information about sent Ack Vectors | ||
49 | * @av: Ack Vector records to update | ||
50 | * @seqno: Sequence number of the packet carrying the Ack Vector just sent | ||
51 | * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector | ||
52 | */ | ||
53 | int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) | ||
66 | { | 54 | { |
67 | struct dccp_sock *dp = dccp_sk(sk); | ||
68 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
69 | /* Figure out how many options do we need to represent the ackvec */ | ||
70 | const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); | ||
71 | u16 len = av->av_vec_len + 2 * nr_opts, i; | ||
72 | u32 elapsed_time; | ||
73 | const unsigned char *tail, *from; | ||
74 | unsigned char *to; | ||
75 | struct dccp_ackvec_record *avr; | 55 | struct dccp_ackvec_record *avr; |
76 | suseconds_t delta; | ||
77 | |||
78 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
79 | return -1; | ||
80 | |||
81 | delta = ktime_us_delta(ktime_get_real(), av->av_time); | ||
82 | elapsed_time = delta / 10; | ||
83 | 56 | ||
84 | if (elapsed_time != 0 && | 57 | avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); |
85 | dccp_insert_option_elapsed_time(skb, elapsed_time)) | ||
86 | return -1; | ||
87 | |||
88 | avr = dccp_ackvec_record_new(); | ||
89 | if (avr == NULL) | 58 | if (avr == NULL) |
90 | return -1; | 59 | return -ENOBUFS; |
91 | |||
92 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
93 | |||
94 | to = skb_push(skb, len); | ||
95 | len = av->av_vec_len; | ||
96 | from = av->av_buf + av->av_buf_head; | ||
97 | tail = av->av_buf + DCCP_MAX_ACKVEC_LEN; | ||
98 | |||
99 | for (i = 0; i < nr_opts; ++i) { | ||
100 | int copylen = len; | ||
101 | |||
102 | if (len > DCCP_SINGLE_OPT_MAXLEN) | ||
103 | copylen = DCCP_SINGLE_OPT_MAXLEN; | ||
104 | |||
105 | *to++ = DCCPO_ACK_VECTOR_0; | ||
106 | *to++ = copylen + 2; | ||
107 | |||
108 | /* Check if buf_head wraps */ | ||
109 | if (from + copylen > tail) { | ||
110 | const u16 tailsize = tail - from; | ||
111 | |||
112 | memcpy(to, from, tailsize); | ||
113 | to += tailsize; | ||
114 | len -= tailsize; | ||
115 | copylen -= tailsize; | ||
116 | from = av->av_buf; | ||
117 | } | ||
118 | |||
119 | memcpy(to, from, copylen); | ||
120 | from += copylen; | ||
121 | to += copylen; | ||
122 | len -= copylen; | ||
123 | } | ||
124 | 60 | ||
61 | avr->avr_ack_seqno = seqno; | ||
62 | avr->avr_ack_ptr = av->av_buf_head; | ||
63 | avr->avr_ack_ackno = av->av_buf_ackno; | ||
64 | avr->avr_ack_nonce = nonce_sum; | ||
65 | avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head); | ||
125 | /* | 66 | /* |
126 | * From RFC 4340, A.2: | 67 | * When the buffer overflows, we keep no more than one record. This is |
127 | * | 68 | * the simplest way of disambiguating sender-Acks dating from before the |
128 | * For each acknowledgement it sends, the HC-Receiver will add an | 69 | * overflow from sender-Acks which refer to after the overflow; a simple |
129 | * acknowledgement record. ack_seqno will equal the HC-Receiver | 70 | * solution is preferable here since we are handling an exception. |
130 | * sequence number it used for the ack packet; ack_ptr will equal | ||
131 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
132 | * equal buf_nonce. | ||
133 | */ | 71 | */ |
134 | avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 72 | if (av->av_overflow) |
135 | avr->avr_ack_ptr = av->av_buf_head; | 73 | dccp_ackvec_purge_records(av); |
136 | avr->avr_ack_ackno = av->av_buf_ackno; | 74 | /* |
137 | avr->avr_ack_nonce = av->av_buf_nonce; | 75 | * Since GSS is incremented for each packet, the list is automatically |
138 | avr->avr_sent_len = av->av_vec_len; | 76 | * arranged in descending order of @ack_seqno. |
139 | 77 | */ | |
140 | dccp_ackvec_insert_avr(av, avr); | 78 | list_add(&avr->avr_node, &av->av_records); |
141 | 79 | ||
142 | dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " | 80 | dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n", |
143 | "ack_ackno=%llu\n", | ||
144 | dccp_role(sk), avr->avr_sent_len, | ||
145 | (unsigned long long)avr->avr_ack_seqno, | 81 | (unsigned long long)avr->avr_ack_seqno, |
146 | (unsigned long long)avr->avr_ack_ackno); | 82 | (unsigned long long)avr->avr_ack_ackno, |
83 | avr->avr_ack_runlen); | ||
147 | return 0; | 84 | return 0; |
148 | } | 85 | } |
149 | 86 | ||
150 | struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) | 87 | static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, |
88 | const u64 ackno) | ||
151 | { | 89 | { |
152 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); | 90 | struct dccp_ackvec_record *avr; |
153 | 91 | /* | |
154 | if (av != NULL) { | 92 | * Exploit that records are inserted in descending order of sequence |
155 | av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; | 93 | * number, start with the oldest record first. If @ackno is `before' |
156 | av->av_buf_ackno = UINT48_MAX + 1; | 94 | * the earliest ack_ackno, the packet is too old to be considered. |
157 | av->av_buf_nonce = 0; | 95 | */ |
158 | av->av_time = ktime_set(0, 0); | 96 | list_for_each_entry_reverse(avr, av_list, avr_node) { |
159 | av->av_vec_len = 0; | 97 | if (avr->avr_ack_seqno == ackno) |
160 | INIT_LIST_HEAD(&av->av_records); | 98 | return avr; |
99 | if (before48(ackno, avr->avr_ack_seqno)) | ||
100 | break; | ||
161 | } | 101 | } |
162 | 102 | return NULL; | |
163 | return av; | ||
164 | } | 103 | } |
165 | 104 | ||
166 | void dccp_ackvec_free(struct dccp_ackvec *av) | 105 | /* |
106 | * Buffer index and length computation using modulo-buffersize arithmetic. | ||
107 | * Note that, as pointers move from right to left, head is `before' tail. | ||
108 | */ | ||
109 | static inline u16 __ackvec_idx_add(const u16 a, const u16 b) | ||
167 | { | 110 | { |
168 | if (unlikely(av == NULL)) | 111 | return (a + b) % DCCPAV_MAX_ACKVEC_LEN; |
169 | return; | ||
170 | |||
171 | if (!list_empty(&av->av_records)) { | ||
172 | struct dccp_ackvec_record *avr, *next; | ||
173 | |||
174 | list_for_each_entry_safe(avr, next, &av->av_records, avr_node) { | ||
175 | list_del_init(&avr->avr_node); | ||
176 | dccp_ackvec_record_delete(avr); | ||
177 | } | ||
178 | } | ||
179 | |||
180 | kmem_cache_free(dccp_ackvec_slab, av); | ||
181 | } | 112 | } |
182 | 113 | ||
183 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | 114 | static inline u16 __ackvec_idx_sub(const u16 a, const u16 b) |
184 | const u32 index) | ||
185 | { | 115 | { |
186 | return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; | 116 | return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b); |
187 | } | 117 | } |
188 | 118 | ||
189 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | 119 | u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) |
190 | const u32 index) | ||
191 | { | 120 | { |
192 | return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; | 121 | if (unlikely(av->av_overflow)) |
122 | return DCCPAV_MAX_ACKVEC_LEN; | ||
123 | return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); | ||
193 | } | 124 | } |
194 | 125 | ||
195 | /* | 126 | /** |
196 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | 127 | * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 |
197 | * bytes with run length 0, rather than a single byte with a larger run length; | 128 | * @av: non-empty buffer to update |
198 | * this simplifies table updates if one of the missing packets arrives. | 129 | * @distance: negative or zero distance of @seqno from buf_ackno downward |
130 | * @seqno: the (old) sequence number whose record is to be updated | ||
131 | * @state: state in which packet carrying @seqno was received | ||
199 | */ | 132 | */ |
200 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | 133 | static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, |
201 | const unsigned int packets, | 134 | u64 seqno, enum dccp_ackvec_states state) |
202 | const unsigned char state) | ||
203 | { | 135 | { |
204 | long gap; | 136 | u16 ptr = av->av_buf_head; |
205 | long new_head; | ||
206 | 137 | ||
207 | if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) | 138 | BUG_ON(distance > 0); |
208 | return -ENOBUFS; | 139 | if (unlikely(dccp_ackvec_is_empty(av))) |
140 | return; | ||
209 | 141 | ||
210 | gap = packets - 1; | 142 | do { |
211 | new_head = av->av_buf_head - packets; | 143 | u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); |
212 | 144 | ||
213 | if (new_head < 0) { | 145 | if (distance + runlen >= 0) { |
214 | if (gap > 0) { | 146 | /* |
215 | memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, | 147 | * Only update the state if packet has not been received |
216 | gap + new_head + 1); | 148 | * yet. This is OK as per the second table in RFC 4340, |
217 | gap = -new_head; | 149 | * 11.4.1; i.e. here we are using the following table: |
150 | * RECEIVED | ||
151 | * 0 1 3 | ||
152 | * S +---+---+---+ | ||
153 | * T 0 | 0 | 0 | 0 | | ||
154 | * O +---+---+---+ | ||
155 | * R 1 | 1 | 1 | 1 | | ||
156 | * E +---+---+---+ | ||
157 | * D 3 | 0 | 1 | 3 | | ||
158 | * +---+---+---+ | ||
159 | * The "Not Received" state was set by reserve_seats(). | ||
160 | */ | ||
161 | if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) | ||
162 | av->av_buf[ptr] = state; | ||
163 | else | ||
164 | dccp_pr_debug("Not changing %llu state to %u\n", | ||
165 | (unsigned long long)seqno, state); | ||
166 | break; | ||
218 | } | 167 | } |
219 | new_head += DCCP_MAX_ACKVEC_LEN; | ||
220 | } | ||
221 | 168 | ||
222 | av->av_buf_head = new_head; | 169 | distance += runlen + 1; |
170 | ptr = __ackvec_idx_add(ptr, 1); | ||
223 | 171 | ||
224 | if (gap > 0) | 172 | } while (ptr != av->av_buf_tail); |
225 | memset(av->av_buf + av->av_buf_head + 1, | 173 | } |
226 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); | ||
227 | 174 | ||
228 | av->av_buf[av->av_buf_head] = state; | 175 | /* Mark @num entries after buf_head as "Not yet received". */ |
229 | av->av_vec_len += packets; | 176 | static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) |
230 | return 0; | 177 | { |
178 | u16 start = __ackvec_idx_add(av->av_buf_head, 1), | ||
179 | len = DCCPAV_MAX_ACKVEC_LEN - start; | ||
180 | |||
181 | /* check for buffer wrap-around */ | ||
182 | if (num > len) { | ||
183 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); | ||
184 | start = 0; | ||
185 | num -= len; | ||
186 | } | ||
187 | if (num) | ||
188 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); | ||
231 | } | 189 | } |
232 | 190 | ||
233 | /* | 191 | /** |
234 | * Implements the RFC 4340, Appendix A | 192 | * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer |
193 | * @av: container of buffer to update (can be empty or non-empty) | ||
194 | * @num_packets: number of packets to register (must be >= 1) | ||
195 | * @seqno: sequence number of the first packet in @num_packets | ||
196 | * @state: state in which packet carrying @seqno was received | ||
235 | */ | 197 | */ |
236 | int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 198 | static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, |
237 | const u64 ackno, const u8 state) | 199 | u64 seqno, enum dccp_ackvec_states state) |
238 | { | 200 | { |
239 | /* | 201 | u32 num_cells = num_packets; |
240 | * Check at the right places if the buffer is full, if it is, tell the | ||
241 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
242 | * vectors, when we will free up space in av_buf. | ||
243 | * | ||
244 | * We may well decide to do buffer compression, etc, but for now lets | ||
245 | * just drop. | ||
246 | * | ||
247 | * From Appendix A.1.1 (`New Packets'): | ||
248 | * | ||
249 | * Of course, the circular buffer may overflow, either when the | ||
250 | * HC-Sender is sending data at a very high rate, when the | ||
251 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
252 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
253 | * (so the HC-Receiver is unable to clean up old state). In this | ||
254 | * case, the HC-Receiver should either compress the buffer (by | ||
255 | * increasing run lengths when possible), transfer its state to | ||
256 | * a larger buffer, or, as a last resort, drop all received | ||
257 | * packets, without processing them whatsoever, until its buffer | ||
258 | * shrinks again. | ||
259 | */ | ||
260 | 202 | ||
261 | /* See if this is the first ackno being inserted */ | 203 | if (num_packets > DCCPAV_BURST_THRESH) { |
262 | if (av->av_vec_len == 0) { | 204 | u32 lost_packets = num_packets - 1; |
263 | av->av_buf[av->av_buf_head] = state; | ||
264 | av->av_vec_len = 1; | ||
265 | } else if (after48(ackno, av->av_buf_ackno)) { | ||
266 | const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); | ||
267 | 205 | ||
206 | DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); | ||
268 | /* | 207 | /* |
269 | * Look if the state of this packet is the same as the | 208 | * We received 1 packet and have a loss of size "num_packets-1" |
270 | * previous ackno and if so if we can bump the head len. | 209 | * which we squeeze into num_cells-1 rather than reserving an |
210 | * entire byte for each lost packet. | ||
211 | * The reason is that the vector grows in O(burst_length); when | ||
212 | * it grows too large there will no room left for the payload. | ||
213 | * This is a trade-off: if a few packets out of the burst show | ||
214 | * up later, their state will not be changed; it is simply too | ||
215 | * costly to reshuffle/reallocate/copy the buffer each time. | ||
216 | * Should such problems persist, we will need to switch to a | ||
217 | * different underlying data structure. | ||
271 | */ | 218 | */ |
272 | if (delta == 1 && | 219 | for (num_packets = num_cells = 1; lost_packets; ++num_cells) { |
273 | dccp_ackvec_state(av, av->av_buf_head) == state && | 220 | u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); |
274 | dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK) | ||
275 | av->av_buf[av->av_buf_head]++; | ||
276 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | ||
277 | return -ENOBUFS; | ||
278 | } else { | ||
279 | /* | ||
280 | * A.1.2. Old Packets | ||
281 | * | ||
282 | * When a packet with Sequence Number S <= buf_ackno | ||
283 | * arrives, the HC-Receiver will scan the table for | ||
284 | * the byte corresponding to S. (Indexing structures | ||
285 | * could reduce the complexity of this scan.) | ||
286 | */ | ||
287 | u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); | ||
288 | u32 index = av->av_buf_head; | ||
289 | 221 | ||
290 | while (1) { | 222 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); |
291 | const u8 len = dccp_ackvec_len(av, index); | 223 | av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; |
292 | const u8 av_state = dccp_ackvec_state(av, index); | 224 | |
293 | /* | 225 | lost_packets -= len; |
294 | * valid packets not yet in av_buf have a reserved | ||
295 | * entry, with a len equal to 0. | ||
296 | */ | ||
297 | if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && | ||
298 | len == 0 && delta == 0) { /* Found our | ||
299 | reserved seat! */ | ||
300 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
301 | (unsigned long long)ackno); | ||
302 | av->av_buf[index] = state; | ||
303 | goto out; | ||
304 | } | ||
305 | /* len == 0 means one packet */ | ||
306 | if (delta < len + 1) | ||
307 | goto out_duplicate; | ||
308 | |||
309 | delta -= len + 1; | ||
310 | if (++index == DCCP_MAX_ACKVEC_LEN) | ||
311 | index = 0; | ||
312 | } | 226 | } |
313 | } | 227 | } |
314 | 228 | ||
315 | av->av_buf_ackno = ackno; | 229 | if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { |
316 | av->av_time = ktime_get_real(); | 230 | DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); |
317 | out: | 231 | av->av_overflow = true; |
318 | return 0; | 232 | } |
233 | |||
234 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); | ||
235 | if (av->av_overflow) | ||
236 | av->av_buf_tail = av->av_buf_head; | ||
319 | 237 | ||
320 | out_duplicate: | 238 | av->av_buf[av->av_buf_head] = state; |
321 | /* Duplicate packet */ | 239 | av->av_buf_ackno = seqno; |
322 | dccp_pr_debug("Received a dup or already considered lost " | 240 | |
323 | "packet: %llu\n", (unsigned long long)ackno); | 241 | if (num_packets > 1) |
324 | return -EILSEQ; | 242 | dccp_ackvec_reserve_seats(av, num_packets - 1); |
325 | } | 243 | } |
326 | 244 | ||
327 | static void dccp_ackvec_throw_record(struct dccp_ackvec *av, | 245 | /** |
328 | struct dccp_ackvec_record *avr) | 246 | * dccp_ackvec_input - Register incoming packet in the buffer |
247 | */ | ||
248 | void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) | ||
329 | { | 249 | { |
330 | struct dccp_ackvec_record *next; | 250 | u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
251 | enum dccp_ackvec_states state = DCCPAV_RECEIVED; | ||
331 | 252 | ||
332 | /* sort out vector length */ | 253 | if (dccp_ackvec_is_empty(av)) { |
333 | if (av->av_buf_head <= avr->avr_ack_ptr) | 254 | dccp_ackvec_add_new(av, 1, seqno, state); |
334 | av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; | 255 | av->av_tail_ackno = seqno; |
335 | else | ||
336 | av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - | ||
337 | av->av_buf_head + avr->avr_ack_ptr; | ||
338 | 256 | ||
339 | /* free records */ | 257 | } else { |
340 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | 258 | s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); |
341 | list_del_init(&avr->avr_node); | 259 | u8 *current_head = av->av_buf + av->av_buf_head; |
342 | dccp_ackvec_record_delete(avr); | ||
343 | } | ||
344 | } | ||
345 | 260 | ||
346 | void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | 261 | if (num_packets == 1 && |
347 | const u64 ackno) | 262 | dccp_ackvec_state(current_head) == state && |
348 | { | 263 | dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { |
349 | struct dccp_ackvec_record *avr; | ||
350 | 264 | ||
351 | /* | 265 | *current_head += 1; |
352 | * If we traverse backwards, it should be faster when we have large | 266 | av->av_buf_ackno = seqno; |
353 | * windows. We will be receiving ACKs for stuff we sent a while back | 267 | |
354 | * -sorbo. | 268 | } else if (num_packets > 0) { |
355 | */ | 269 | dccp_ackvec_add_new(av, num_packets, seqno, state); |
356 | list_for_each_entry_reverse(avr, &av->av_records, avr_node) { | 270 | } else { |
357 | if (ackno == avr->avr_ack_seqno) { | 271 | dccp_ackvec_update_old(av, num_packets, seqno, state); |
358 | dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " | 272 | } |
359 | "ack_ackno=%llu, ACKED!\n", | ||
360 | dccp_role(sk), 1, | ||
361 | (unsigned long long)avr->avr_ack_seqno, | ||
362 | (unsigned long long)avr->avr_ack_ackno); | ||
363 | dccp_ackvec_throw_record(av, avr); | ||
364 | break; | ||
365 | } else if (avr->avr_ack_seqno > ackno) | ||
366 | break; /* old news */ | ||
367 | } | 273 | } |
368 | } | 274 | } |
369 | 275 | ||
370 | static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | 276 | /** |
371 | struct sock *sk, u64 *ackno, | 277 | * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection |
372 | const unsigned char len, | 278 | * This routine is called when the peer acknowledges the receipt of Ack Vectors |
373 | const unsigned char *vector) | 279 | * up to and including @ackno. While based on on section A.3 of RFC 4340, here |
280 | * are additional precautions to prevent corrupted buffer state. In particular, | ||
281 | * we use tail_ackno to identify outdated records; it always marks the earliest | ||
282 | * packet of group (2) in 11.4.2. | ||
283 | */ | ||
284 | void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) | ||
374 | { | 285 | { |
375 | unsigned char i; | 286 | struct dccp_ackvec_record *avr, *next; |
376 | struct dccp_ackvec_record *avr; | 287 | u8 runlen_now, eff_runlen; |
288 | s64 delta; | ||
377 | 289 | ||
378 | /* Check if we actually sent an ACK vector */ | 290 | avr = dccp_ackvec_lookup(&av->av_records, ackno); |
379 | if (list_empty(&av->av_records)) | 291 | if (avr == NULL) |
380 | return; | 292 | return; |
293 | /* | ||
294 | * Deal with outdated acknowledgments: this arises when e.g. there are | ||
295 | * several old records and the acks from the peer come in slowly. In | ||
296 | * that case we may still have records that pre-date tail_ackno. | ||
297 | */ | ||
298 | delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); | ||
299 | if (delta < 0) | ||
300 | goto free_records; | ||
301 | /* | ||
302 | * Deal with overlapping Ack Vectors: don't subtract more than the | ||
303 | * number of packets between tail_ackno and ack_ackno. | ||
304 | */ | ||
305 | eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; | ||
381 | 306 | ||
382 | i = len; | 307 | runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); |
383 | /* | 308 | /* |
384 | * XXX | 309 | * The run length of Ack Vector cells does not decrease over time. If |
385 | * I think it might be more efficient to work backwards. See comment on | 310 | * the run length is the same as at the time the Ack Vector was sent, we |
386 | * rcv_ackno. -sorbo. | 311 | * free the ack_ptr cell. That cell can however not be freed if the run |
312 | * length has increased: in this case we need to move the tail pointer | ||
313 | * backwards (towards higher indices), to its next-oldest neighbour. | ||
387 | */ | 314 | */ |
388 | avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); | 315 | if (runlen_now > eff_runlen) { |
389 | while (i--) { | ||
390 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
391 | u64 ackno_end_rl; | ||
392 | 316 | ||
393 | dccp_set_seqno(&ackno_end_rl, *ackno - rl); | 317 | av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; |
318 | av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); | ||
394 | 319 | ||
320 | /* This move may not have cleared the overflow flag. */ | ||
321 | if (av->av_overflow) | ||
322 | av->av_overflow = (av->av_buf_head == av->av_buf_tail); | ||
323 | } else { | ||
324 | av->av_buf_tail = avr->avr_ack_ptr; | ||
395 | /* | 325 | /* |
396 | * If our AVR sequence number is greater than the ack, go | 326 | * We have made sure that avr points to a valid cell within the |
397 | * forward in the AVR list until it is not so. | 327 | * buffer. This cell is either older than head, or equals head |
328 | * (empty buffer): in both cases we no longer have any overflow. | ||
398 | */ | 329 | */ |
399 | list_for_each_entry_from(avr, &av->av_records, avr_node) { | 330 | av->av_overflow = 0; |
400 | if (!after48(avr->avr_ack_seqno, *ackno)) | 331 | } |
401 | goto found; | ||
402 | } | ||
403 | /* End of the av_records list, not found, exit */ | ||
404 | break; | ||
405 | found: | ||
406 | if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { | ||
407 | const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; | ||
408 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | ||
409 | dccp_pr_debug("%s ACK vector 0, len=%d, " | ||
410 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
411 | "ACKED!\n", | ||
412 | dccp_role(sk), len, | ||
413 | (unsigned long long) | ||
414 | avr->avr_ack_seqno, | ||
415 | (unsigned long long) | ||
416 | avr->avr_ack_ackno); | ||
417 | dccp_ackvec_throw_record(av, avr); | ||
418 | break; | ||
419 | } | ||
420 | /* | ||
421 | * If it wasn't received, continue scanning... we might | ||
422 | * find another one. | ||
423 | */ | ||
424 | } | ||
425 | 332 | ||
426 | dccp_set_seqno(ackno, ackno_end_rl - 1); | 333 | /* |
427 | ++vector; | 334 | * The peer has acknowledged up to and including ack_ackno. Hence the |
335 | * first packet in group (2) of 11.4.2 is the successor of ack_ackno. | ||
336 | */ | ||
337 | av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); | ||
338 | |||
339 | free_records: | ||
340 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | ||
341 | list_del(&avr->avr_node); | ||
342 | kmem_cache_free(dccp_ackvec_record_slab, avr); | ||
428 | } | 343 | } |
429 | } | 344 | } |
430 | 345 | ||
431 | int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | 346 | /* |
432 | u64 *ackno, const u8 opt, const u8 *value, const u8 len) | 347 | * Routines to keep track of Ack Vectors received in an skb |
348 | */ | ||
349 | int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) | ||
433 | { | 350 | { |
434 | if (len > DCCP_SINGLE_OPT_MAXLEN) | 351 | struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); |
435 | return -1; | 352 | |
353 | if (new == NULL) | ||
354 | return -ENOBUFS; | ||
355 | new->vec = vec; | ||
356 | new->len = len; | ||
357 | new->nonce = nonce; | ||
436 | 358 | ||
437 | /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ | 359 | list_add_tail(&new->node, head); |
438 | dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, | ||
439 | ackno, len, value); | ||
440 | return 0; | 360 | return 0; |
441 | } | 361 | } |
362 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); | ||
363 | |||
364 | void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) | ||
365 | { | ||
366 | struct dccp_ackvec_parsed *cur, *next; | ||
367 | |||
368 | list_for_each_entry_safe(cur, next, parsed_chunks, node) | ||
369 | kfree(cur); | ||
370 | INIT_LIST_HEAD(parsed_chunks); | ||
371 | } | ||
372 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); | ||
442 | 373 | ||
443 | int __init dccp_ackvec_init(void) | 374 | int __init dccp_ackvec_init(void) |
444 | { | 375 | { |
@@ -448,10 +379,9 @@ int __init dccp_ackvec_init(void) | |||
448 | if (dccp_ackvec_slab == NULL) | 379 | if (dccp_ackvec_slab == NULL) |
449 | goto out_err; | 380 | goto out_err; |
450 | 381 | ||
451 | dccp_ackvec_record_slab = | 382 | dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", |
452 | kmem_cache_create("dccp_ackvec_record", | 383 | sizeof(struct dccp_ackvec_record), |
453 | sizeof(struct dccp_ackvec_record), | 384 | 0, SLAB_HWCACHE_ALIGN, NULL); |
454 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
455 | if (dccp_ackvec_record_slab == NULL) | 385 | if (dccp_ackvec_record_slab == NULL) |
456 | goto out_destroy_slab; | 386 | goto out_destroy_slab; |
457 | 387 | ||
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 7ea557b7c6b1..e2ab0627a5ff 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -3,9 +3,9 @@ | |||
3 | /* | 3 | /* |
4 | * net/dccp/ackvec.h | 4 | * net/dccp/ackvec.h |
5 | * | 5 | * |
6 | * An implementation of the DCCP protocol | 6 | * An implementation of Ack Vectors for the DCCP protocol |
7 | * Copyright (c) 2007 University of Aberdeen, Scotland, UK | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> | 8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> |
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | 9 | * This program is free software; you can redistribute it and/or modify it |
10 | * under the terms of the GNU General Public License version 2 as | 10 | * under the terms of the GNU General Public License version 2 as |
11 | * published by the Free Software Foundation. | 11 | * published by the Free Software Foundation. |
@@ -13,99 +13,124 @@ | |||
13 | 13 | ||
14 | #include <linux/dccp.h> | 14 | #include <linux/dccp.h> |
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <linux/ktime.h> | ||
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
18 | #include <linux/types.h> | 17 | #include <linux/types.h> |
19 | 18 | ||
20 | /* We can spread an ack vector across multiple options */ | 19 | /* |
21 | #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) | 20 | * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN, |
21 | * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1 | ||
22 | * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives | ||
23 | * more headroom if Ack Ratio is higher or when the sender acknowledges slowly. | ||
24 | * The maximum value is bounded by the u16 types for indices and functions. | ||
25 | */ | ||
26 | #define DCCPAV_NUM_ACKVECS 2 | ||
27 | #define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS) | ||
22 | 28 | ||
23 | /* Estimated minimum average Ack Vector length - used for updating MPS */ | 29 | /* Estimated minimum average Ack Vector length - used for updating MPS */ |
24 | #define DCCPAV_MIN_OPTLEN 16 | 30 | #define DCCPAV_MIN_OPTLEN 16 |
25 | 31 | ||
26 | #define DCCP_ACKVEC_STATE_RECEIVED 0 | 32 | /* Threshold for coping with large bursts of losses */ |
27 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) | 33 | #define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) |
28 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) | ||
29 | 34 | ||
30 | #define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ | 35 | enum dccp_ackvec_states { |
31 | #define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ | 36 | DCCPAV_RECEIVED = 0x00, |
37 | DCCPAV_ECN_MARKED = 0x40, | ||
38 | DCCPAV_RESERVED = 0x80, | ||
39 | DCCPAV_NOT_RECEIVED = 0xC0 | ||
40 | }; | ||
41 | #define DCCPAV_MAX_RUNLEN 0x3F | ||
32 | 42 | ||
33 | /** struct dccp_ackvec - ack vector | 43 | static inline u8 dccp_ackvec_runlen(const u8 *cell) |
34 | * | 44 | { |
35 | * This data structure is the one defined in RFC 4340, Appendix A. | 45 | return *cell & DCCPAV_MAX_RUNLEN; |
36 | * | 46 | } |
37 | * @av_buf_head - circular buffer head | 47 | |
38 | * @av_buf_tail - circular buffer tail | 48 | static inline u8 dccp_ackvec_state(const u8 *cell) |
39 | * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the | 49 | { |
40 | * buffer (i.e. %av_buf_head) | 50 | return *cell & ~DCCPAV_MAX_RUNLEN; |
41 | * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | 51 | } |
42 | * by the buffer with State 0 | 52 | |
43 | * | 53 | /** struct dccp_ackvec - Ack Vector main data structure |
44 | * Additionally, the HC-Receiver must keep some information about the | ||
45 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
46 | * Ack Vector, it remembers four variables: | ||
47 | * | 54 | * |
48 | * @av_records - list of dccp_ackvec_record | 55 | * This implements a fixed-size circular buffer within an array and is largely |
49 | * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | 56 | * based on Appendix A of RFC 4340. |
50 | * | 57 | * |
51 | * @av_time - the time in usecs | 58 | * @av_buf: circular buffer storage area |
52 | * @av_buf - circular buffer of acknowledgeable packets | 59 | * @av_buf_head: head index; begin of live portion in @av_buf |
60 | * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf | ||
61 | * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf | ||
62 | * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf | ||
63 | * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to | ||
64 | * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf | ||
65 | * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound | ||
66 | * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) | ||
53 | */ | 67 | */ |
54 | struct dccp_ackvec { | 68 | struct dccp_ackvec { |
55 | u64 av_buf_ackno; | 69 | u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; |
56 | struct list_head av_records; | ||
57 | ktime_t av_time; | ||
58 | u16 av_buf_head; | 70 | u16 av_buf_head; |
59 | u16 av_vec_len; | 71 | u16 av_buf_tail; |
60 | u8 av_buf_nonce; | 72 | u64 av_buf_ackno:48; |
61 | u8 av_ack_nonce; | 73 | u64 av_tail_ackno:48; |
62 | u8 av_buf[DCCP_MAX_ACKVEC_LEN]; | 74 | bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; |
75 | u8 av_overflow:1; | ||
76 | struct list_head av_records; | ||
63 | }; | 77 | }; |
64 | 78 | ||
65 | /** struct dccp_ackvec_record - ack vector record | 79 | /** struct dccp_ackvec_record - Records information about sent Ack Vectors |
66 | * | 80 | * |
67 | * ACK vector record as defined in Appendix A of spec. | 81 | * These list entries define the additional information which the HC-Receiver |
82 | * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. | ||
68 | * | 83 | * |
69 | * The list is sorted by avr_ack_seqno | 84 | * @avr_node: the list node in @av_records |
85 | * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on | ||
86 | * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to | ||
87 | * @avr_ack_ptr: pointer into @av_buf where this record starts | ||
88 | * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending | ||
89 | * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent | ||
70 | * | 90 | * |
71 | * @avr_node - node in av_records | 91 | * The list as a whole is sorted in descending order by @avr_ack_seqno. |
72 | * @avr_ack_seqno - sequence number of the packet this record was sent on | ||
73 | * @avr_ack_ackno - sequence number being acknowledged | ||
74 | * @avr_ack_ptr - pointer into av_buf where this record starts | ||
75 | * @avr_ack_nonce - av_ack_nonce at the time this record was sent | ||
76 | * @avr_sent_len - lenght of the record in av_buf | ||
77 | */ | 92 | */ |
78 | struct dccp_ackvec_record { | 93 | struct dccp_ackvec_record { |
79 | struct list_head avr_node; | 94 | struct list_head avr_node; |
80 | u64 avr_ack_seqno; | 95 | u64 avr_ack_seqno:48; |
81 | u64 avr_ack_ackno; | 96 | u64 avr_ack_ackno:48; |
82 | u16 avr_ack_ptr; | 97 | u16 avr_ack_ptr; |
83 | u16 avr_sent_len; | 98 | u8 avr_ack_runlen; |
84 | u8 avr_ack_nonce; | 99 | u8 avr_ack_nonce:1; |
85 | }; | 100 | }; |
86 | 101 | ||
87 | struct sock; | ||
88 | struct sk_buff; | ||
89 | |||
90 | extern int dccp_ackvec_init(void); | 102 | extern int dccp_ackvec_init(void); |
91 | extern void dccp_ackvec_exit(void); | 103 | extern void dccp_ackvec_exit(void); |
92 | 104 | ||
93 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); | 105 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); |
94 | extern void dccp_ackvec_free(struct dccp_ackvec *av); | 106 | extern void dccp_ackvec_free(struct dccp_ackvec *av); |
95 | 107 | ||
96 | extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 108 | extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); |
97 | const u64 ackno, const u8 state); | 109 | extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); |
98 | 110 | extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); | |
99 | extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | 111 | extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); |
100 | struct sock *sk, const u64 ackno); | ||
101 | extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
102 | u64 *ackno, const u8 opt, | ||
103 | const u8 *value, const u8 len); | ||
104 | 112 | ||
105 | extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | 113 | static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) |
106 | |||
107 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
108 | { | 114 | { |
109 | return av->av_vec_len; | 115 | return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; |
110 | } | 116 | } |
117 | |||
118 | /** | ||
119 | * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb | ||
120 | * @vec: start of vector (offset into skb) | ||
121 | * @len: length of @vec | ||
122 | * @nonce: whether @vec had an ECN nonce of 0 or 1 | ||
123 | * @node: FIFO - arranged in descending order of ack_ackno | ||
124 | * This structure is used by CCIDs to access Ack Vectors in a received skb. | ||
125 | */ | ||
126 | struct dccp_ackvec_parsed { | ||
127 | u8 *vec, | ||
128 | len, | ||
129 | nonce:1; | ||
130 | struct list_head node; | ||
131 | }; | ||
132 | |||
133 | extern int dccp_ackvec_parsed_add(struct list_head *head, | ||
134 | u8 *vec, u8 len, u8 nonce); | ||
135 | extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); | ||
111 | #endif /* _ACKVEC_H */ | 136 | #endif /* _ACKVEC_H */ |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 6576eae9e779..e96d5e810039 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) | |||
246 | #endif | 246 | #endif |
247 | } | 247 | } |
248 | 248 | ||
249 | /* XXX Lame code duplication! | ||
250 | * returns -1 if none was found. | ||
251 | * else returns the next offset to use in the function call. | ||
252 | */ | ||
253 | static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, | ||
254 | unsigned char **vec, unsigned char *veclen) | ||
255 | { | ||
256 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
257 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
258 | unsigned char *opt_ptr; | ||
259 | const unsigned char *opt_end = (unsigned char *)dh + | ||
260 | (dh->dccph_doff * 4); | ||
261 | unsigned char opt, len; | ||
262 | unsigned char *value; | ||
263 | |||
264 | BUG_ON(offset < 0); | ||
265 | options += offset; | ||
266 | opt_ptr = options; | ||
267 | if (opt_ptr >= opt_end) | ||
268 | return -1; | ||
269 | |||
270 | while (opt_ptr != opt_end) { | ||
271 | opt = *opt_ptr++; | ||
272 | len = 0; | ||
273 | value = NULL; | ||
274 | |||
275 | /* Check if this isn't a single byte option */ | ||
276 | if (opt > DCCPO_MAX_RESERVED) { | ||
277 | if (opt_ptr == opt_end) | ||
278 | goto out_invalid_option; | ||
279 | |||
280 | len = *opt_ptr++; | ||
281 | if (len < 3) | ||
282 | goto out_invalid_option; | ||
283 | /* | ||
284 | * Remove the type and len fields, leaving | ||
285 | * just the value size | ||
286 | */ | ||
287 | len -= 2; | ||
288 | value = opt_ptr; | ||
289 | opt_ptr += len; | ||
290 | |||
291 | if (opt_ptr > opt_end) | ||
292 | goto out_invalid_option; | ||
293 | } | ||
294 | |||
295 | switch (opt) { | ||
296 | case DCCPO_ACK_VECTOR_0: | ||
297 | case DCCPO_ACK_VECTOR_1: | ||
298 | *vec = value; | ||
299 | *veclen = len; | ||
300 | return offset + (opt_ptr - options); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | return -1; | ||
305 | |||
306 | out_invalid_option: | ||
307 | DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); | ||
308 | return -1; | ||
309 | } | ||
310 | |||
311 | /** | 249 | /** |
312 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm | 250 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
313 | * This code is almost identical with TCP's tcp_rtt_estimator(), since | 251 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | |||
432 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); | 370 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); |
433 | } | 371 | } |
434 | 372 | ||
373 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, | ||
374 | u8 option, u8 *optval, u8 optlen) | ||
375 | { | ||
376 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
377 | |||
378 | switch (option) { | ||
379 | case DCCPO_ACK_VECTOR_0: | ||
380 | case DCCPO_ACK_VECTOR_1: | ||
381 | return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, | ||
382 | option - DCCPO_ACK_VECTOR_0); | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
386 | |||
435 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 387 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
436 | { | 388 | { |
437 | struct dccp_sock *dp = dccp_sk(sk); | 389 | struct dccp_sock *dp = dccp_sk(sk); |
438 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 390 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
439 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); | 391 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); |
392 | struct dccp_ackvec_parsed *avp; | ||
440 | u64 ackno, seqno; | 393 | u64 ackno, seqno; |
441 | struct ccid2_seq *seqp; | 394 | struct ccid2_seq *seqp; |
442 | unsigned char *vector; | ||
443 | unsigned char veclen; | ||
444 | int offset = 0; | ||
445 | int done = 0; | 395 | int done = 0; |
446 | unsigned int maxincr = 0; | 396 | unsigned int maxincr = 0; |
447 | 397 | ||
@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
475 | } | 425 | } |
476 | 426 | ||
477 | /* check forward path congestion */ | 427 | /* check forward path congestion */ |
478 | /* still didn't send out new data packets */ | 428 | if (dccp_packet_without_ack(skb)) |
479 | if (hc->tx_seqh == hc->tx_seqt) | ||
480 | return; | 429 | return; |
481 | 430 | ||
482 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 431 | /* still didn't send out new data packets */ |
483 | case DCCP_PKT_ACK: | 432 | if (hc->tx_seqh == hc->tx_seqt) |
484 | case DCCP_PKT_DATAACK: | 433 | goto done; |
485 | break; | ||
486 | default: | ||
487 | return; | ||
488 | } | ||
489 | 434 | ||
490 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | 435 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; |
491 | if (after48(ackno, hc->tx_high_ack)) | 436 | if (after48(ackno, hc->tx_high_ack)) |
@@ -509,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
509 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); | 454 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
510 | 455 | ||
511 | /* go through all ack vectors */ | 456 | /* go through all ack vectors */ |
512 | while ((offset = ccid2_ackvector(sk, skb, offset, | 457 | list_for_each_entry(avp, &hc->tx_av_chunks, node) { |
513 | &vector, &veclen)) != -1) { | ||
514 | /* go through this ack vector */ | 458 | /* go through this ack vector */ |
515 | while (veclen--) { | 459 | for (; avp->len--; avp->vec++) { |
516 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | 460 | u64 ackno_end_rl = SUB48(ackno, |
517 | u64 ackno_end_rl = SUB48(ackno, rl); | 461 | dccp_ackvec_runlen(avp->vec)); |
518 | 462 | ||
519 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", | 463 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", |
520 | (unsigned long long)ackno, | 464 | (unsigned long long)ackno, |
521 | (unsigned long long)ackno_end_rl); | 465 | dccp_ackvec_state(avp->vec) >> 6, |
466 | dccp_ackvec_runlen(avp->vec)); | ||
522 | /* if the seqno we are analyzing is larger than the | 467 | /* if the seqno we are analyzing is larger than the |
523 | * current ackno, then move towards the tail of our | 468 | * current ackno, then move towards the tail of our |
524 | * seqnos. | 469 | * seqnos. |
@@ -537,17 +482,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
537 | * run length | 482 | * run length |
538 | */ | 483 | */ |
539 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 484 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
540 | const u8 state = *vector & | 485 | const u8 state = dccp_ackvec_state(avp->vec); |
541 | DCCP_ACKVEC_STATE_MASK; | ||
542 | 486 | ||
543 | /* new packet received or marked */ | 487 | /* new packet received or marked */ |
544 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && | 488 | if (state != DCCPAV_NOT_RECEIVED && |
545 | !seqp->ccid2s_acked) { | 489 | !seqp->ccid2s_acked) { |
546 | if (state == | 490 | if (state == DCCPAV_ECN_MARKED) |
547 | DCCP_ACKVEC_STATE_ECN_MARKED) { | ||
548 | ccid2_congestion_event(sk, | 491 | ccid2_congestion_event(sk, |
549 | seqp); | 492 | seqp); |
550 | } else | 493 | else |
551 | ccid2_new_ack(sk, seqp, | 494 | ccid2_new_ack(sk, seqp, |
552 | &maxincr); | 495 | &maxincr); |
553 | 496 | ||
@@ -566,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
566 | break; | 509 | break; |
567 | 510 | ||
568 | ackno = SUB48(ackno_end_rl, 1); | 511 | ackno = SUB48(ackno_end_rl, 1); |
569 | vector++; | ||
570 | } | 512 | } |
571 | if (done) | 513 | if (done) |
572 | break; | 514 | break; |
@@ -634,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
634 | sk_stop_timer(sk, &hc->tx_rtotimer); | 576 | sk_stop_timer(sk, &hc->tx_rtotimer); |
635 | else | 577 | else |
636 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | 578 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
637 | 579 | done: | |
638 | /* check if incoming Acks allow pending packets to be sent */ | 580 | /* check if incoming Acks allow pending packets to be sent */ |
639 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) | 581 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) |
640 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | 582 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); |
583 | dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); | ||
641 | } | 584 | } |
642 | 585 | ||
643 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 586 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -666,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
666 | hc->tx_last_cong = ccid2_time_stamp; | 609 | hc->tx_last_cong = ccid2_time_stamp; |
667 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 610 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
668 | (unsigned long)sk); | 611 | (unsigned long)sk); |
612 | INIT_LIST_HEAD(&hc->tx_av_chunks); | ||
669 | return 0; | 613 | return 0; |
670 | } | 614 | } |
671 | 615 | ||
@@ -699,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
699 | } | 643 | } |
700 | 644 | ||
701 | struct ccid_operations ccid2_ops = { | 645 | struct ccid_operations ccid2_ops = { |
702 | .ccid_id = DCCPC_CCID2, | 646 | .ccid_id = DCCPC_CCID2, |
703 | .ccid_name = "TCP-like", | 647 | .ccid_name = "TCP-like", |
704 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 648 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
705 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 649 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
706 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | 650 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, |
707 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | 651 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, |
708 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | 652 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, |
709 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | 653 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, |
710 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | 654 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, |
711 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | 655 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), |
656 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | ||
712 | }; | 657 | }; |
713 | 658 | ||
714 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 659 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 25cb6b216eda..e9985dafc2c7 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -55,6 +55,7 @@ struct ccid2_seq { | |||
55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight | 55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight |
56 | * @tx_rpseq: last consecutive seqno | 56 | * @tx_rpseq: last consecutive seqno |
57 | * @tx_rpdupack: dupacks since rpseq | 57 | * @tx_rpdupack: dupacks since rpseq |
58 | * @tx_av_chunks: list of Ack Vectors received on current skb | ||
58 | */ | 59 | */ |
59 | struct ccid2_hc_tx_sock { | 60 | struct ccid2_hc_tx_sock { |
60 | u32 tx_cwnd; | 61 | u32 tx_cwnd; |
@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock { | |||
79 | int tx_rpdupack; | 80 | int tx_rpdupack; |
80 | u32 tx_last_cong; | 81 | u32 tx_last_cong; |
81 | u64 tx_high_ack; | 82 | u64 tx_high_ack; |
83 | struct list_head tx_av_chunks; | ||
82 | }; | 84 | }; |
83 | 85 | ||
84 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) | 86 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a8ed459508b2..48ad5d9da7cb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
243 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 243 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
244 | const enum dccp_pkt_type pkt_type); | 244 | const enum dccp_pkt_type pkt_type); |
245 | 245 | ||
246 | /* | ||
247 | * TX Packet Dequeueing Interface | ||
248 | */ | ||
249 | extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); | ||
250 | extern bool dccp_qpolicy_full(struct sock *sk); | ||
251 | extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); | ||
252 | extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); | ||
253 | extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); | ||
254 | extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); | ||
255 | |||
256 | /* | ||
257 | * TX Packet Output and TX Timers | ||
258 | */ | ||
246 | extern void dccp_write_xmit(struct sock *sk); | 259 | extern void dccp_write_xmit(struct sock *sk); |
247 | extern void dccp_write_space(struct sock *sk); | 260 | extern void dccp_write_space(struct sock *sk); |
248 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); | 261 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); |
@@ -457,12 +470,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) | |||
457 | dp->dccps_awh = dp->dccps_gss; | 470 | dp->dccps_awh = dp->dccps_gss; |
458 | } | 471 | } |
459 | 472 | ||
473 | static inline int dccp_ackvec_pending(const struct sock *sk) | ||
474 | { | ||
475 | return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL && | ||
476 | !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec); | ||
477 | } | ||
478 | |||
460 | static inline int dccp_ack_pending(const struct sock *sk) | 479 | static inline int dccp_ack_pending(const struct sock *sk) |
461 | { | 480 | { |
462 | const struct dccp_sock *dp = dccp_sk(sk); | 481 | return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); |
463 | return (dp->dccps_hc_rx_ackvec != NULL && | ||
464 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || | ||
465 | inet_csk_ack_scheduled(sk); | ||
466 | } | 482 | } |
467 | 483 | ||
468 | extern int dccp_feat_finalise_settings(struct dccp_sock *dp); | 484 | extern int dccp_feat_finalise_settings(struct dccp_sock *dp); |
diff --git a/net/dccp/input.c b/net/dccp/input.c index e424a09e83f6..15af247ea007 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) | |||
160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | 160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); |
161 | } | 161 | } |
162 | 162 | ||
163 | static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | 163 | static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) |
164 | { | 164 | { |
165 | struct dccp_sock *dp = dccp_sk(sk); | 165 | struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; |
166 | 166 | ||
167 | if (dp->dccps_hc_rx_ackvec != NULL) | 167 | if (av == NULL) |
168 | dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, | 168 | return; |
169 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 169 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
170 | dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
171 | dccp_ackvec_input(av, skb); | ||
170 | } | 172 | } |
171 | 173 | ||
172 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) | 174 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) |
@@ -366,22 +368,13 @@ discard: | |||
366 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | 368 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, |
367 | const struct dccp_hdr *dh, const unsigned len) | 369 | const struct dccp_hdr *dh, const unsigned len) |
368 | { | 370 | { |
369 | struct dccp_sock *dp = dccp_sk(sk); | ||
370 | |||
371 | if (dccp_check_seqno(sk, skb)) | 371 | if (dccp_check_seqno(sk, skb)) |
372 | goto discard; | 372 | goto discard; |
373 | 373 | ||
374 | if (dccp_parse_options(sk, NULL, skb)) | 374 | if (dccp_parse_options(sk, NULL, skb)) |
375 | return 1; | 375 | return 1; |
376 | 376 | ||
377 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 377 | dccp_handle_ackvec_processing(sk, skb); |
378 | dccp_event_ack_recv(sk, skb); | ||
379 | |||
380 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
381 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
382 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
383 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
384 | goto discard; | ||
385 | dccp_deliver_input_to_ccids(sk, skb); | 378 | dccp_deliver_input_to_ccids(sk, skb); |
386 | 379 | ||
387 | return __dccp_rcv_established(sk, skb, dh, len); | 380 | return __dccp_rcv_established(sk, skb, dh, len); |
@@ -633,15 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
633 | if (dccp_parse_options(sk, NULL, skb)) | 626 | if (dccp_parse_options(sk, NULL, skb)) |
634 | return 1; | 627 | return 1; |
635 | 628 | ||
636 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 629 | dccp_handle_ackvec_processing(sk, skb); |
637 | dccp_event_ack_recv(sk, skb); | ||
638 | |||
639 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
640 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
641 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
642 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
643 | goto discard; | ||
644 | |||
645 | dccp_deliver_input_to_ccids(sk, skb); | 630 | dccp_deliver_input_to_ccids(sk, skb); |
646 | } | 631 | } |
647 | 632 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f69ea114829..45a434f94169 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, | |||
462 | { | 462 | { |
463 | struct rtable *rt; | 463 | struct rtable *rt; |
464 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, | 464 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, |
465 | .nl_u = { .ip4_u = | 465 | .fl4_dst = ip_hdr(skb)->saddr, |
466 | { .daddr = ip_hdr(skb)->saddr, | 466 | .fl4_src = ip_hdr(skb)->daddr, |
467 | .saddr = ip_hdr(skb)->daddr, | 467 | .fl4_tos = RT_CONN_FLAGS(sk), |
468 | .tos = RT_CONN_FLAGS(sk) } }, | ||
469 | .proto = sk->sk_protocol, | 468 | .proto = sk->sk_protocol, |
470 | .uli_u = { .ports = | 469 | .fl_ip_sport = dccp_hdr(skb)->dccph_dport, |
471 | { .sport = dccp_hdr(skb)->dccph_dport, | 470 | .fl_ip_dport = dccp_hdr(skb)->dccph_sport |
472 | .dport = dccp_hdr(skb)->dccph_sport } | ||
473 | } | ||
474 | }; | 471 | }; |
475 | 472 | ||
476 | security_skb_classify_flow(skb, &fl); | 473 | security_skb_classify_flow(skb, &fl); |
diff --git a/net/dccp/options.c b/net/dccp/options.c index cd3061813009..f06ffcfc8d71 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
54 | struct dccp_sock *dp = dccp_sk(sk); | 54 | struct dccp_sock *dp = dccp_sk(sk); |
55 | const struct dccp_hdr *dh = dccp_hdr(skb); | 55 | const struct dccp_hdr *dh = dccp_hdr(skb); |
56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | 56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; |
57 | u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
58 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | 57 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); |
59 | unsigned char *opt_ptr = options; | 58 | unsigned char *opt_ptr = options; |
60 | const unsigned char *opt_end = (unsigned char *)dh + | 59 | const unsigned char *opt_end = (unsigned char *)dh + |
@@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
129 | if (rc) | 128 | if (rc) |
130 | goto out_featneg_failed; | 129 | goto out_featneg_failed; |
131 | break; | 130 | break; |
132 | case DCCPO_ACK_VECTOR_0: | ||
133 | case DCCPO_ACK_VECTOR_1: | ||
134 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | ||
135 | break; | ||
136 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
137 | dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) | ||
138 | goto out_invalid_option; | ||
139 | break; | ||
140 | case DCCPO_TIMESTAMP: | 131 | case DCCPO_TIMESTAMP: |
141 | if (len != 4) | 132 | if (len != 4) |
142 | goto out_invalid_option; | 133 | goto out_invalid_option; |
@@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
226 | pkt_type, opt, value, len)) | 217 | pkt_type, opt, value, len)) |
227 | goto out_invalid_option; | 218 | goto out_invalid_option; |
228 | break; | 219 | break; |
220 | case DCCPO_ACK_VECTOR_0: | ||
221 | case DCCPO_ACK_VECTOR_1: | ||
222 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | ||
223 | break; | ||
224 | /* | ||
225 | * Ack vectors are processed by the TX CCID if it is | ||
226 | * interested. The RX CCID need not parse Ack Vectors, | ||
227 | * since it is only interested in clearing old state. | ||
228 | * Fall through. | ||
229 | */ | ||
229 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: | 230 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: |
230 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | 231 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, |
231 | pkt_type, opt, value, len)) | 232 | pkt_type, opt, value, len)) |
@@ -340,6 +341,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) | |||
340 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; | 341 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; |
341 | } | 342 | } |
342 | 343 | ||
344 | /* FIXME: This function is currently not used anywhere */ | ||
343 | int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) | 345 | int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) |
344 | { | 346 | { |
345 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | 347 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); |
@@ -424,6 +426,83 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, | |||
424 | return 0; | 426 | return 0; |
425 | } | 427 | } |
426 | 428 | ||
429 | static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | ||
430 | { | ||
431 | struct dccp_sock *dp = dccp_sk(sk); | ||
432 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
433 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
434 | const u16 buflen = dccp_ackvec_buflen(av); | ||
435 | /* Figure out how many options do we need to represent the ackvec */ | ||
436 | const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); | ||
437 | u16 len = buflen + 2 * nr_opts; | ||
438 | u8 i, nonce = 0; | ||
439 | const unsigned char *tail, *from; | ||
440 | unsigned char *to; | ||
441 | |||
442 | if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
443 | DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, | ||
444 | dccp_packet_name(dcb->dccpd_type)); | ||
445 | return -1; | ||
446 | } | ||
447 | /* | ||
448 | * Since Ack Vectors are variable-length, we can not always predict | ||
449 | * their size. To catch exception cases where the space is running out | ||
450 | * on the skb, a separate Sync is scheduled to carry the Ack Vector. | ||
451 | */ | ||
452 | if (len > DCCPAV_MIN_OPTLEN && | ||
453 | len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { | ||
454 | DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " | ||
455 | "MPS=%u ==> reduce payload size?\n", len, skb->len, | ||
456 | dcb->dccpd_opt_len, dp->dccps_mss_cache); | ||
457 | dp->dccps_sync_scheduled = 1; | ||
458 | return 0; | ||
459 | } | ||
460 | dcb->dccpd_opt_len += len; | ||
461 | |||
462 | to = skb_push(skb, len); | ||
463 | len = buflen; | ||
464 | from = av->av_buf + av->av_buf_head; | ||
465 | tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; | ||
466 | |||
467 | for (i = 0; i < nr_opts; ++i) { | ||
468 | int copylen = len; | ||
469 | |||
470 | if (len > DCCP_SINGLE_OPT_MAXLEN) | ||
471 | copylen = DCCP_SINGLE_OPT_MAXLEN; | ||
472 | |||
473 | /* | ||
474 | * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via | ||
475 | * its type; ack_nonce is the sum of all individual buf_nonce's. | ||
476 | */ | ||
477 | nonce ^= av->av_buf_nonce[i]; | ||
478 | |||
479 | *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; | ||
480 | *to++ = copylen + 2; | ||
481 | |||
482 | /* Check if buf_head wraps */ | ||
483 | if (from + copylen > tail) { | ||
484 | const u16 tailsize = tail - from; | ||
485 | |||
486 | memcpy(to, from, tailsize); | ||
487 | to += tailsize; | ||
488 | len -= tailsize; | ||
489 | copylen -= tailsize; | ||
490 | from = av->av_buf; | ||
491 | } | ||
492 | |||
493 | memcpy(to, from, copylen); | ||
494 | from += copylen; | ||
495 | to += copylen; | ||
496 | len -= copylen; | ||
497 | } | ||
498 | /* | ||
499 | * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. | ||
500 | */ | ||
501 | if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) | ||
502 | return -ENOBUFS; | ||
503 | return 0; | ||
504 | } | ||
505 | |||
427 | /** | 506 | /** |
428 | * dccp_insert_option_mandatory - Mandatory option (5.8.2) | 507 | * dccp_insert_option_mandatory - Mandatory option (5.8.2) |
429 | * Note that since we are using skb_push, this function needs to be called | 508 | * Note that since we are using skb_push, this function needs to be called |
@@ -519,8 +598,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
519 | if (dccp_insert_option_timestamp(skb)) | 598 | if (dccp_insert_option_timestamp(skb)) |
520 | return -1; | 599 | return -1; |
521 | 600 | ||
522 | } else if (dp->dccps_hc_rx_ackvec != NULL && | 601 | } else if (dccp_ackvec_pending(sk) && |
523 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && | ||
524 | dccp_insert_option_ackvec(sk, skb)) { | 602 | dccp_insert_option_ackvec(sk, skb)) { |
525 | return -1; | 603 | return -1; |
526 | } | 604 | } |
diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5ae..784d30210543 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) | |||
242 | { | 242 | { |
243 | int err, len; | 243 | int err, len; |
244 | struct dccp_sock *dp = dccp_sk(sk); | 244 | struct dccp_sock *dp = dccp_sk(sk); |
245 | struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); | 245 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
246 | 246 | ||
247 | if (unlikely(skb == NULL)) | 247 | if (unlikely(skb == NULL)) |
248 | return; | 248 | return; |
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) | |||
283 | * any local drop will eventually be reported via receiver feedback. | 283 | * any local drop will eventually be reported via receiver feedback. |
284 | */ | 284 | */ |
285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | 285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); |
286 | |||
287 | /* | ||
288 | * If the CCID needs to transfer additional header options out-of-band | ||
289 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
290 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
291 | * currently pending header options, thus clearing the backlog. | ||
292 | */ | ||
293 | if (dp->dccps_sync_scheduled) | ||
294 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
286 | } | 295 | } |
287 | 296 | ||
288 | /** | 297 | /** |
@@ -336,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) | |||
336 | struct dccp_sock *dp = dccp_sk(sk); | 345 | struct dccp_sock *dp = dccp_sk(sk); |
337 | struct sk_buff *skb; | 346 | struct sk_buff *skb; |
338 | 347 | ||
339 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 348 | while ((skb = dccp_qpolicy_top(sk))) { |
340 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 349 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
341 | 350 | ||
342 | switch (ccid_packet_dequeue_eval(rc)) { | 351 | switch (ccid_packet_dequeue_eval(rc)) { |
@@ -350,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) | |||
350 | dccp_xmit_packet(sk); | 359 | dccp_xmit_packet(sk); |
351 | break; | 360 | break; |
352 | case CCID_PACKET_ERR: | 361 | case CCID_PACKET_ERR: |
353 | skb_dequeue(&sk->sk_write_queue); | 362 | dccp_qpolicy_drop(sk, skb); |
354 | kfree_skb(skb); | ||
355 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | 363 | dccp_pr_debug("packet discarded due to err=%d\n", rc); |
356 | } | 364 | } |
357 | } | 365 | } |
@@ -636,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, | |||
636 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | 644 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; |
637 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; | 645 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; |
638 | 646 | ||
647 | /* | ||
648 | * Clear the flag in case the Sync was scheduled for out-of-band data, | ||
649 | * such as carrying a long Ack Vector. | ||
650 | */ | ||
651 | dccp_sk(sk)->dccps_sync_scheduled = 0; | ||
652 | |||
639 | dccp_transmit_skb(sk, skb); | 653 | dccp_transmit_skb(sk, skb); |
640 | } | 654 | } |
641 | 655 | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fcea..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | 186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; |
187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | 187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; |
188 | dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; | ||
188 | 189 | ||
189 | dccp_init_xmit_timers(sk); | 190 | dccp_init_xmit_timers(sk); |
190 | 191 | ||
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
532 | case DCCP_SOCKOPT_RECV_CSCOV: | 533 | case DCCP_SOCKOPT_RECV_CSCOV: |
533 | err = dccp_setsockopt_cscov(sk, val, true); | 534 | err = dccp_setsockopt_cscov(sk, val, true); |
534 | break; | 535 | break; |
536 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
537 | if (sk->sk_state != DCCP_CLOSED) | ||
538 | err = -EISCONN; | ||
539 | else if (val < 0 || val >= DCCPQ_POLICY_MAX) | ||
540 | err = -EINVAL; | ||
541 | else | ||
542 | dp->dccps_qpolicy = val; | ||
543 | break; | ||
544 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
545 | if (val < 0) | ||
546 | err = -EINVAL; | ||
547 | else | ||
548 | dp->dccps_tx_qlen = val; | ||
549 | break; | ||
535 | default: | 550 | default: |
536 | err = -ENOPROTOOPT; | 551 | err = -ENOPROTOOPT; |
537 | break; | 552 | break; |
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
639 | case DCCP_SOCKOPT_RECV_CSCOV: | 654 | case DCCP_SOCKOPT_RECV_CSCOV: |
640 | val = dp->dccps_pcrlen; | 655 | val = dp->dccps_pcrlen; |
641 | break; | 656 | break; |
657 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
658 | val = dp->dccps_qpolicy; | ||
659 | break; | ||
660 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
661 | val = dp->dccps_tx_qlen; | ||
662 | break; | ||
642 | case 128 ... 191: | 663 | case 128 ... 191: |
643 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 664 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
644 | len, (u32 __user *)optval, optlen); | 665 | len, (u32 __user *)optval, optlen); |
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
681 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); | 702 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); |
682 | #endif | 703 | #endif |
683 | 704 | ||
705 | static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) | ||
706 | { | ||
707 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); | ||
708 | |||
709 | /* | ||
710 | * Assign an (opaque) qpolicy priority value to skb->priority. | ||
711 | * | ||
712 | * We are overloading this skb field for use with the qpolicy subystem. | ||
713 | * The skb->priority is normally used for the SO_PRIORITY option, which | ||
714 | * is initialised from sk_priority. Since the assignment of sk_priority | ||
715 | * to skb->priority happens later (on layer 3), we overload this field | ||
716 | * for use with queueing priorities as long as the skb is on layer 4. | ||
717 | * The default priority value (if nothing is set) is 0. | ||
718 | */ | ||
719 | skb->priority = 0; | ||
720 | |||
721 | for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
722 | |||
723 | if (!CMSG_OK(msg, cmsg)) | ||
724 | return -EINVAL; | ||
725 | |||
726 | if (cmsg->cmsg_level != SOL_DCCP) | ||
727 | continue; | ||
728 | |||
729 | if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && | ||
730 | !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) | ||
731 | return -EINVAL; | ||
732 | |||
733 | switch (cmsg->cmsg_type) { | ||
734 | case DCCP_SCM_PRIORITY: | ||
735 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) | ||
736 | return -EINVAL; | ||
737 | skb->priority = *(__u32 *)CMSG_DATA(cmsg); | ||
738 | break; | ||
739 | default: | ||
740 | return -EINVAL; | ||
741 | } | ||
742 | } | ||
743 | return 0; | ||
744 | } | ||
745 | |||
684 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 746 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
685 | size_t len) | 747 | size_t len) |
686 | { | 748 | { |
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
696 | 758 | ||
697 | lock_sock(sk); | 759 | lock_sock(sk); |
698 | 760 | ||
699 | if (sysctl_dccp_tx_qlen && | 761 | if (dccp_qpolicy_full(sk)) { |
700 | (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { | ||
701 | rc = -EAGAIN; | 762 | rc = -EAGAIN; |
702 | goto out_release; | 763 | goto out_release; |
703 | } | 764 | } |
@@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
725 | if (rc != 0) | 786 | if (rc != 0) |
726 | goto out_discard; | 787 | goto out_discard; |
727 | 788 | ||
728 | skb_queue_tail(&sk->sk_write_queue, skb); | 789 | rc = dccp_msghdr_parse(msg, skb); |
790 | if (rc != 0) | ||
791 | goto out_discard; | ||
792 | |||
793 | dccp_qpolicy_push(sk, skb); | ||
729 | /* | 794 | /* |
730 | * The xmit_timer is set if the TX CCID is rate-based and will expire | 795 | * The xmit_timer is set if the TX CCID is rate-based and will expire |
731 | * when congestion control permits to release further packets into the | 796 | * when congestion control permits to release further packets into the |
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/qpolicy.c | ||
3 | * | ||
4 | * Policy-based packet dequeueing interface for DCCP. | ||
5 | * | ||
6 | * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License v2 | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include "dccp.h" | ||
13 | |||
14 | /* | ||
15 | * Simple Dequeueing Policy: | ||
16 | * If tx_qlen is different from 0, enqueue up to tx_qlen elements. | ||
17 | */ | ||
18 | static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) | ||
19 | { | ||
20 | skb_queue_tail(&sk->sk_write_queue, skb); | ||
21 | } | ||
22 | |||
23 | static bool qpolicy_simple_full(struct sock *sk) | ||
24 | { | ||
25 | return dccp_sk(sk)->dccps_tx_qlen && | ||
26 | sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; | ||
27 | } | ||
28 | |||
29 | static struct sk_buff *qpolicy_simple_top(struct sock *sk) | ||
30 | { | ||
31 | return skb_peek(&sk->sk_write_queue); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * Priority-based Dequeueing Policy: | ||
36 | * If tx_qlen is different from 0 and the queue has reached its upper bound | ||
37 | * of tx_qlen elements, replace older packets lowest-priority-first. | ||
38 | */ | ||
39 | static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) | ||
40 | { | ||
41 | struct sk_buff *skb, *best = NULL; | ||
42 | |||
43 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
44 | if (best == NULL || skb->priority > best->priority) | ||
45 | best = skb; | ||
46 | return best; | ||
47 | } | ||
48 | |||
49 | static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) | ||
50 | { | ||
51 | struct sk_buff *skb, *worst = NULL; | ||
52 | |||
53 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
54 | if (worst == NULL || skb->priority < worst->priority) | ||
55 | worst = skb; | ||
56 | return worst; | ||
57 | } | ||
58 | |||
59 | static bool qpolicy_prio_full(struct sock *sk) | ||
60 | { | ||
61 | if (qpolicy_simple_full(sk)) | ||
62 | dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); | ||
63 | return false; | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface | ||
68 | * @push: add a new @skb to the write queue | ||
69 | * @full: indicates that no more packets will be admitted | ||
70 | * @top: peeks at whatever the queueing policy defines as its `top' | ||
71 | */ | ||
72 | static struct dccp_qpolicy_operations { | ||
73 | void (*push) (struct sock *sk, struct sk_buff *skb); | ||
74 | bool (*full) (struct sock *sk); | ||
75 | struct sk_buff* (*top) (struct sock *sk); | ||
76 | __be32 params; | ||
77 | |||
78 | } qpol_table[DCCPQ_POLICY_MAX] = { | ||
79 | [DCCPQ_POLICY_SIMPLE] = { | ||
80 | .push = qpolicy_simple_push, | ||
81 | .full = qpolicy_simple_full, | ||
82 | .top = qpolicy_simple_top, | ||
83 | .params = 0, | ||
84 | }, | ||
85 | [DCCPQ_POLICY_PRIO] = { | ||
86 | .push = qpolicy_simple_push, | ||
87 | .full = qpolicy_prio_full, | ||
88 | .top = qpolicy_prio_best_skb, | ||
89 | .params = DCCP_SCM_PRIORITY, | ||
90 | }, | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * Externally visible interface | ||
95 | */ | ||
96 | void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) | ||
97 | { | ||
98 | qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); | ||
99 | } | ||
100 | |||
101 | bool dccp_qpolicy_full(struct sock *sk) | ||
102 | { | ||
103 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); | ||
104 | } | ||
105 | |||
106 | void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) | ||
107 | { | ||
108 | if (skb != NULL) { | ||
109 | skb_unlink(skb, &sk->sk_write_queue); | ||
110 | kfree_skb(skb); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | struct sk_buff *dccp_qpolicy_top(struct sock *sk) | ||
115 | { | ||
116 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); | ||
117 | } | ||
118 | |||
119 | struct sk_buff *dccp_qpolicy_pop(struct sock *sk) | ||
120 | { | ||
121 | struct sk_buff *skb = dccp_qpolicy_top(sk); | ||
122 | |||
123 | if (skb != NULL) { | ||
124 | /* Clear any skb fields that we used internally */ | ||
125 | skb->priority = 0; | ||
126 | skb_unlink(skb, &sk->sk_write_queue); | ||
127 | } | ||
128 | return skb; | ||
129 | } | ||
130 | |||
131 | bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) | ||
132 | { | ||
133 | /* check if exactly one bit is set */ | ||
134 | if (!param || (param & (param - 1))) | ||
135 | return false; | ||
136 | return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; | ||
137 | } | ||