diff options
-rw-r--r-- | include/linux/dccp.h | 432 | ||||
-rw-r--r-- | include/linux/in.h | 1 | ||||
-rw-r--r-- | include/linux/net.h | 1 | ||||
-rw-r--r-- | include/linux/socket.h | 1 | ||||
-rw-r--r-- | net/Kconfig | 1 | ||||
-rw-r--r-- | net/Makefile | 1 | ||||
-rw-r--r-- | net/dccp/Kconfig | 24 | ||||
-rw-r--r-- | net/dccp/Makefile | 5 | ||||
-rw-r--r-- | net/dccp/ccid.c | 139 | ||||
-rw-r--r-- | net/dccp/ccid.h | 156 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 25 | ||||
-rw-r--r-- | net/dccp/ccids/Makefile | 3 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 2164 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 137 | ||||
-rw-r--r-- | net/dccp/dccp.h | 422 | ||||
-rw-r--r-- | net/dccp/input.c | 510 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 1289 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 199 | ||||
-rw-r--r-- | net/dccp/options.c | 763 | ||||
-rw-r--r-- | net/dccp/output.c | 406 | ||||
-rw-r--r-- | net/dccp/proto.c | 818 | ||||
-rw-r--r-- | net/dccp/timer.c | 249 |
22 files changed, 7746 insertions, 0 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 000000000000..e3b4bf7346bb --- /dev/null +++ b/include/linux/dccp.h | |||
@@ -0,0 +1,432 @@ | |||
1 | #ifndef _LINUX_DCCP_H | ||
2 | #define _LINUX_DCCP_H | ||
3 | |||
4 | #include <linux/in.h> | ||
5 | #include <linux/list.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/uio.h> | ||
8 | #include <linux/workqueue.h> | ||
9 | |||
10 | #include <net/inet_connection_sock.h> | ||
11 | #include <net/sock.h> | ||
12 | #include <net/tcp_states.h> | ||
13 | #include <net/tcp.h> | ||
14 | |||
15 | /* FIXME: this is utterly wrong */ | ||
16 | struct sockaddr_dccp { | ||
17 | struct sockaddr_in in; | ||
18 | unsigned int service; | ||
19 | }; | ||
20 | |||
21 | enum dccp_state { | ||
22 | DCCP_OPEN = TCP_ESTABLISHED, | ||
23 | DCCP_REQUESTING = TCP_SYN_SENT, | ||
24 | DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: | ||
25 | This mapping is horrible, but TCP has | ||
26 | no matching state for DCCP_PARTOPEN, | ||
27 | as TCP_SYN_RECV is already used by | ||
28 | DCCP_RESPOND, why don't stop using TCP | ||
29 | mapping of states? OK, now we don't use | ||
30 | sk_stream_sendmsg anymore, so doesn't | ||
31 | seem to exist any reason for us to | ||
32 | do the TCP mapping here */ | ||
33 | DCCP_LISTEN = TCP_LISTEN, | ||
34 | DCCP_RESPOND = TCP_SYN_RECV, | ||
35 | DCCP_CLOSING = TCP_CLOSING, | ||
36 | DCCP_TIME_WAIT = TCP_TIME_WAIT, | ||
37 | DCCP_CLOSED = TCP_CLOSE, | ||
38 | DCCP_MAX_STATES = TCP_MAX_STATES, | ||
39 | }; | ||
40 | |||
41 | #define DCCP_STATE_MASK 0xf | ||
42 | #define DCCP_ACTION_FIN (1<<7) | ||
43 | |||
44 | enum { | ||
45 | DCCPF_OPEN = TCPF_ESTABLISHED, | ||
46 | DCCPF_REQUESTING = TCPF_SYN_SENT, | ||
47 | DCCPF_PARTOPEN = TCPF_FIN_WAIT1, | ||
48 | DCCPF_LISTEN = TCPF_LISTEN, | ||
49 | DCCPF_RESPOND = TCPF_SYN_RECV, | ||
50 | DCCPF_CLOSING = TCPF_CLOSING, | ||
51 | DCCPF_TIME_WAIT = TCPF_TIME_WAIT, | ||
52 | DCCPF_CLOSED = TCPF_CLOSE, | ||
53 | }; | ||
54 | |||
55 | /** | ||
56 | * struct dccp_hdr - generic part of DCCP packet header | ||
57 | * | ||
58 | * @dccph_sport - Relevant port on the endpoint that sent this packet | ||
59 | * @dccph_dport - Relevant port on the other endpoint | ||
60 | * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words | ||
61 | * @dccph_ccval - Used by the HC-Sender CCID | ||
62 | * @dccph_cscov - Parts of the packet that are covered by the Checksum field | ||
63 | * @dccph_checksum - Internet checksum, depends on dccph_cscov | ||
64 | * @dccph_x - 0 = 24 bit sequence number, 1 = 48 | ||
65 | * @dccph_type - packet type, see DCCP_PKT_ prefixed macros | ||
66 | * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x | ||
67 | */ | ||
68 | struct dccp_hdr { | ||
69 | __u16 dccph_sport, | ||
70 | dccph_dport; | ||
71 | __u8 dccph_doff; | ||
72 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
73 | __u8 dccph_cscov:4, | ||
74 | dccph_ccval:4; | ||
75 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
76 | __u8 dccph_ccval:4, | ||
77 | dccph_cscov:4; | ||
78 | #else | ||
79 | #error "Adjust your <asm/byteorder.h> defines" | ||
80 | #endif | ||
81 | __u16 dccph_checksum; | ||
82 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
83 | __u32 dccph_x:1, | ||
84 | dccph_type:4, | ||
85 | dccph_reserved:3, | ||
86 | dccph_seq:24; | ||
87 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
88 | __u32 dccph_reserved:3, | ||
89 | dccph_type:4, | ||
90 | dccph_x:1, | ||
91 | dccph_seq:24; | ||
92 | #else | ||
93 | #error "Adjust your <asm/byteorder.h> defines" | ||
94 | #endif | ||
95 | }; | ||
96 | |||
97 | static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) | ||
98 | { | ||
99 | return (struct dccp_hdr *)skb->h.raw; | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * struct dccp_hdr_ext - the low bits of a 48 bit seq packet | ||
104 | * | ||
105 | * @dccph_seq_low - low 24 bits of a 48 bit seq packet | ||
106 | */ | ||
107 | struct dccp_hdr_ext { | ||
108 | __u32 dccph_seq_low; | ||
109 | }; | ||
110 | |||
111 | static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) | ||
112 | { | ||
113 | return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); | ||
114 | } | ||
115 | |||
116 | static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) | ||
117 | { | ||
118 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
119 | return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); | ||
120 | } | ||
121 | |||
122 | static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) | ||
123 | { | ||
124 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
125 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
126 | __u64 seq_nr = ntohl(dh->dccph_seq << 8); | ||
127 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
128 | __u64 seq_nr = ntohl(dh->dccph_seq); | ||
129 | #else | ||
130 | #error "Adjust your <asm/byteorder.h> defines" | ||
131 | #endif | ||
132 | |||
133 | if (dh->dccph_x != 0) | ||
134 | seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); | ||
135 | |||
136 | return seq_nr; | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * struct dccp_hdr_request - Conection initiation request header | ||
141 | * | ||
142 | * @dccph_req_service - Service to which the client app wants to connect | ||
143 | * @dccph_req_options - list of options (must be a multiple of 32 bits | ||
144 | */ | ||
145 | struct dccp_hdr_request { | ||
146 | __u32 dccph_req_service; | ||
147 | }; | ||
148 | |||
149 | static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) | ||
150 | { | ||
151 | return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets | ||
156 | * | ||
157 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
158 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
159 | */ | ||
160 | struct dccp_hdr_ack_bits { | ||
161 | __u32 dccph_reserved1:8, | ||
162 | dccph_ack_nr_high:24; | ||
163 | __u32 dccph_ack_nr_low; | ||
164 | }; | ||
165 | |||
166 | static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) | ||
167 | { | ||
168 | return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
169 | } | ||
170 | |||
171 | static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) | ||
172 | { | ||
173 | const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); | ||
174 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
175 | return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
176 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
177 | return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
178 | #else | ||
179 | #error "Adjust your <asm/byteorder.h> defines" | ||
180 | #endif | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * struct dccp_hdr_response - Conection initiation response header | ||
185 | * | ||
186 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
187 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
188 | * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request | ||
189 | * @dccph_resp_options - list of options (must be a multiple of 32 bits | ||
190 | */ | ||
191 | struct dccp_hdr_response { | ||
192 | struct dccp_hdr_ack_bits dccph_resp_ack; | ||
193 | __u32 dccph_resp_service; | ||
194 | }; | ||
195 | |||
196 | static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) | ||
197 | { | ||
198 | return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * struct dccp_hdr_reset - Unconditionally shut down a connection | ||
203 | * | ||
204 | * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request | ||
205 | * @dccph_reset_options - list of options (must be a multiple of 32 bits | ||
206 | */ | ||
207 | struct dccp_hdr_reset { | ||
208 | struct dccp_hdr_ack_bits dccph_reset_ack; | ||
209 | __u8 dccph_reset_code, | ||
210 | dccph_reset_data[3]; | ||
211 | }; | ||
212 | |||
213 | static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) | ||
214 | { | ||
215 | return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
216 | } | ||
217 | |||
218 | enum dccp_pkt_type { | ||
219 | DCCP_PKT_REQUEST = 0, | ||
220 | DCCP_PKT_RESPONSE, | ||
221 | DCCP_PKT_DATA, | ||
222 | DCCP_PKT_ACK, | ||
223 | DCCP_PKT_DATAACK, | ||
224 | DCCP_PKT_CLOSEREQ, | ||
225 | DCCP_PKT_CLOSE, | ||
226 | DCCP_PKT_RESET, | ||
227 | DCCP_PKT_SYNC, | ||
228 | DCCP_PKT_SYNCACK, | ||
229 | DCCP_PKT_INVALID, | ||
230 | }; | ||
231 | |||
232 | #define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID | ||
233 | |||
234 | static inline unsigned int dccp_packet_hdr_len(const __u8 type) | ||
235 | { | ||
236 | if (type == DCCP_PKT_DATA) | ||
237 | return 0; | ||
238 | if (type == DCCP_PKT_DATAACK || | ||
239 | type == DCCP_PKT_ACK || | ||
240 | type == DCCP_PKT_SYNC || | ||
241 | type == DCCP_PKT_SYNCACK || | ||
242 | type == DCCP_PKT_CLOSE || | ||
243 | type == DCCP_PKT_CLOSEREQ) | ||
244 | return sizeof(struct dccp_hdr_ack_bits); | ||
245 | if (type == DCCP_PKT_REQUEST) | ||
246 | return sizeof(struct dccp_hdr_request); | ||
247 | if (type == DCCP_PKT_RESPONSE) | ||
248 | return sizeof(struct dccp_hdr_response); | ||
249 | return sizeof(struct dccp_hdr_reset); | ||
250 | } | ||
251 | |||
252 | static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) | ||
253 | { | ||
254 | return dccp_basic_hdr_len(skb) + | ||
255 | dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); | ||
256 | } | ||
257 | |||
258 | enum dccp_reset_codes { | ||
259 | DCCP_RESET_CODE_UNSPECIFIED = 0, | ||
260 | DCCP_RESET_CODE_CLOSED, | ||
261 | DCCP_RESET_CODE_ABORTED, | ||
262 | DCCP_RESET_CODE_NO_CONNECTION, | ||
263 | DCCP_RESET_CODE_PACKET_ERROR, | ||
264 | DCCP_RESET_CODE_OPTION_ERROR, | ||
265 | DCCP_RESET_CODE_MANDATORY_ERROR, | ||
266 | DCCP_RESET_CODE_CONNECTION_REFUSED, | ||
267 | DCCP_RESET_CODE_BAD_SERVICE_CODE, | ||
268 | DCCP_RESET_CODE_TOO_BUSY, | ||
269 | DCCP_RESET_CODE_BAD_INIT_COOKIE, | ||
270 | DCCP_RESET_CODE_AGGRESSION_PENALTY, | ||
271 | }; | ||
272 | |||
273 | /* DCCP options */ | ||
274 | enum { | ||
275 | DCCPO_PADDING = 0, | ||
276 | DCCPO_MANDATORY = 1, | ||
277 | DCCPO_MIN_RESERVED = 3, | ||
278 | DCCPO_MAX_RESERVED = 31, | ||
279 | DCCPO_NDP_COUNT = 37, | ||
280 | DCCPO_ACK_VECTOR_0 = 38, | ||
281 | DCCPO_ACK_VECTOR_1 = 39, | ||
282 | DCCPO_TIMESTAMP = 41, | ||
283 | DCCPO_TIMESTAMP_ECHO = 42, | ||
284 | DCCPO_ELAPSED_TIME = 43, | ||
285 | DCCPO_MAX = 45, | ||
286 | DCCPO_MIN_CCID_SPECIFIC = 128, | ||
287 | DCCPO_MAX_CCID_SPECIFIC = 255, | ||
288 | }; | ||
289 | |||
290 | /* DCCP features */ | ||
291 | enum { | ||
292 | DCCPF_RESERVED = 0, | ||
293 | DCCPF_SEQUENCE_WINDOW = 3, | ||
294 | DCCPF_SEND_ACK_VECTOR = 6, | ||
295 | DCCPF_SEND_NDP_COUNT = 7, | ||
296 | /* 10-127 reserved */ | ||
297 | DCCPF_MIN_CCID_SPECIFIC = 128, | ||
298 | DCCPF_MAX_CCID_SPECIFIC = 255, | ||
299 | }; | ||
300 | |||
301 | /* initial values for each feature */ | ||
302 | #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 | ||
303 | /* FIXME: for now we're using CCID 3 (TFRC) */ | ||
304 | #define DCCPF_INITIAL_CCID 3 | ||
305 | #define DCCPF_INITIAL_SEND_ACK_VECTOR 0 | ||
306 | /* FIXME: for now we're default to 1 but it should really be 0 */ | ||
307 | #define DCCPF_INITIAL_SEND_NDP_COUNT 1 | ||
308 | |||
309 | #define DCCP_NDP_LIMIT 0xFFFFFF | ||
310 | |||
311 | /** | ||
312 | * struct dccp_options - option values for a DCCP connection | ||
313 | * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) | ||
314 | * @dccpo_ccid - Congestion Control Id (CCID) (section 10) | ||
315 | * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) | ||
316 | * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) | ||
317 | */ | ||
318 | struct dccp_options { | ||
319 | __u64 dccpo_sequence_window; | ||
320 | __u8 dccpo_ccid; | ||
321 | __u8 dccpo_send_ack_vector; | ||
322 | __u8 dccpo_send_ndp_count; | ||
323 | }; | ||
324 | |||
325 | extern void __dccp_options_init(struct dccp_options *dccpo); | ||
326 | extern void dccp_options_init(struct dccp_options *dccpo); | ||
327 | extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); | ||
328 | |||
329 | struct dccp_request_sock { | ||
330 | struct inet_request_sock dreq_inet_rsk; | ||
331 | __u64 dreq_iss; | ||
332 | __u64 dreq_isr; | ||
333 | __u32 dreq_service; | ||
334 | }; | ||
335 | |||
336 | static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) | ||
337 | { | ||
338 | return (struct dccp_request_sock *)req; | ||
339 | } | ||
340 | |||
341 | /* Read about the ECN nonce to see why it is 253 */ | ||
342 | #define DCCP_MAX_ACK_VECTOR_LEN 253 | ||
343 | |||
344 | struct dccp_options_received { | ||
345 | u32 dccpor_ndp:24, | ||
346 | dccpor_ack_vector_len:8; | ||
347 | u32 dccpor_ack_vector_idx:10; | ||
348 | /* 22 bits hole, try to pack */ | ||
349 | u32 dccpor_timestamp; | ||
350 | u32 dccpor_timestamp_echo; | ||
351 | u32 dccpor_elapsed_time; | ||
352 | }; | ||
353 | |||
354 | struct ccid; | ||
355 | |||
356 | enum dccp_role { | ||
357 | DCCP_ROLE_UNDEFINED, | ||
358 | DCCP_ROLE_LISTEN, | ||
359 | DCCP_ROLE_CLIENT, | ||
360 | DCCP_ROLE_SERVER, | ||
361 | }; | ||
362 | |||
363 | /** | ||
364 | * struct dccp_sock - DCCP socket state | ||
365 | * | ||
366 | * @dccps_swl - sequence number window low | ||
367 | * @dccps_swh - sequence number window high | ||
368 | * @dccps_awl - acknowledgement number window low | ||
369 | * @dccps_awh - acknowledgement number window high | ||
370 | * @dccps_iss - initial sequence number sent | ||
371 | * @dccps_isr - initial sequence number received | ||
372 | * @dccps_osr - first OPEN sequence number received | ||
373 | * @dccps_gss - greatest sequence number sent | ||
374 | * @dccps_gsr - greatest valid sequence number received | ||
375 | * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss | ||
376 | * @dccps_timestamp_time - time of latest TIMESTAMP option | ||
377 | * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option | ||
378 | * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) | ||
379 | * @dccps_pmtu_cookie - Last pmtu seen by socket | ||
380 | * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it | ||
381 | * @dccps_role - Role of this sock, one of %dccp_role | ||
382 | * @dccps_ndp_count - number of Non Data Packets since last data packet | ||
383 | * @dccps_hc_rx_ackpkts - receiver half connection acked packets | ||
384 | */ | ||
385 | struct dccp_sock { | ||
386 | /* inet_connection_sock has to be the first member of dccp_sock */ | ||
387 | struct inet_connection_sock dccps_inet_connection; | ||
388 | __u64 dccps_swl; | ||
389 | __u64 dccps_swh; | ||
390 | __u64 dccps_awl; | ||
391 | __u64 dccps_awh; | ||
392 | __u64 dccps_iss; | ||
393 | __u64 dccps_isr; | ||
394 | __u64 dccps_osr; | ||
395 | __u64 dccps_gss; | ||
396 | __u64 dccps_gsr; | ||
397 | __u64 dccps_gar; | ||
398 | unsigned long dccps_service; | ||
399 | unsigned long dccps_timestamp_time; | ||
400 | __u32 dccps_timestamp_echo; | ||
401 | __u32 dccps_avg_packet_size; | ||
402 | unsigned long dccps_ndp_count; | ||
403 | __u16 dccps_ext_header_len; | ||
404 | __u32 dccps_pmtu_cookie; | ||
405 | __u32 dccps_mss_cache; | ||
406 | struct dccp_options dccps_options; | ||
407 | struct dccp_ackpkts *dccps_hc_rx_ackpkts; | ||
408 | void *dccps_hc_rx_ccid_private; | ||
409 | void *dccps_hc_tx_ccid_private; | ||
410 | struct ccid *dccps_hc_rx_ccid; | ||
411 | struct ccid *dccps_hc_tx_ccid; | ||
412 | struct dccp_options_received dccps_options_received; | ||
413 | enum dccp_role dccps_role:2; | ||
414 | }; | ||
415 | |||
416 | static inline struct dccp_sock *dccp_sk(const struct sock *sk) | ||
417 | { | ||
418 | return (struct dccp_sock *)sk; | ||
419 | } | ||
420 | |||
421 | static inline const char *dccp_role(const struct sock *sk) | ||
422 | { | ||
423 | switch (dccp_sk(sk)->dccps_role) { | ||
424 | case DCCP_ROLE_UNDEFINED: return "undefined"; | ||
425 | case DCCP_ROLE_LISTEN: return "listen"; | ||
426 | case DCCP_ROLE_SERVER: return "server"; | ||
427 | case DCCP_ROLE_CLIENT: return "client"; | ||
428 | } | ||
429 | return NULL; | ||
430 | } | ||
431 | |||
432 | #endif /* _LINUX_DCCP_H */ | ||
diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748d..ba355384016a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h | |||
@@ -32,6 +32,7 @@ enum { | |||
32 | IPPROTO_PUP = 12, /* PUP protocol */ | 32 | IPPROTO_PUP = 12, /* PUP protocol */ |
33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ | 33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ |
34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ | 34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ |
35 | IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ | ||
35 | IPPROTO_RSVP = 46, /* RSVP protocol */ | 36 | IPPROTO_RSVP = 46, /* RSVP protocol */ |
36 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ | 37 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ |
37 | 38 | ||
diff --git a/include/linux/net.h b/include/linux/net.h index 39906619b9d7..5f8b632ff653 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -84,6 +84,7 @@ enum sock_type { | |||
84 | SOCK_RAW = 3, | 84 | SOCK_RAW = 3, |
85 | SOCK_RDM = 4, | 85 | SOCK_RDM = 4, |
86 | SOCK_SEQPACKET = 5, | 86 | SOCK_SEQPACKET = 5, |
87 | SOCK_DCCP = 6, | ||
87 | SOCK_PACKET = 10, | 88 | SOCK_PACKET = 10, |
88 | }; | 89 | }; |
89 | 90 | ||
diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2e..ddf22559f484 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
@@ -271,6 +271,7 @@ struct ucred { | |||
271 | #define SOL_IRDA 266 | 271 | #define SOL_IRDA 266 |
272 | #define SOL_NETBEUI 267 | 272 | #define SOL_NETBEUI 267 |
273 | #define SOL_LLC 268 | 273 | #define SOL_LLC 268 |
274 | #define SOL_DCCP 269 | ||
274 | 275 | ||
275 | /* IPX options */ | 276 | /* IPX options */ |
276 | #define IPX_TYPE 1 | 277 | #define IPX_TYPE 1 |
diff --git a/net/Kconfig b/net/Kconfig index 02877ac0f2f4..c07aafb59a0f 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" | |||
147 | 147 | ||
148 | endif | 148 | endif |
149 | 149 | ||
150 | source "net/dccp/Kconfig" | ||
150 | source "net/sctp/Kconfig" | 151 | source "net/sctp/Kconfig" |
151 | source "net/atm/Kconfig" | 152 | source "net/atm/Kconfig" |
152 | source "net/bridge/Kconfig" | 153 | source "net/bridge/Kconfig" |
diff --git a/net/Makefile b/net/Makefile index 4a01be8d3e1e..7e6eff206c81 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ | |||
42 | obj-$(CONFIG_DECNET) += decnet/ | 42 | obj-$(CONFIG_DECNET) += decnet/ |
43 | obj-$(CONFIG_ECONET) += econet/ | 43 | obj-$(CONFIG_ECONET) += econet/ |
44 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ | 44 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ |
45 | obj-$(CONFIG_IP_DCCP) += dccp/ | ||
45 | obj-$(CONFIG_IP_SCTP) += sctp/ | 46 | obj-$(CONFIG_IP_SCTP) += sctp/ |
46 | 47 | ||
47 | ifeq ($(CONFIG_NET),y) | 48 | ifeq ($(CONFIG_NET),y) |
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..90460bc629b3 --- /dev/null +++ b/net/dccp/Kconfig | |||
@@ -0,0 +1,24 @@ | |||
1 | menu "DCCP Configuration (EXPERIMENTAL)" | ||
2 | depends on INET && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP | ||
5 | tristate "The DCCP Protocol (EXPERIMENTAL)" | ||
6 | ---help--- | ||
7 | Datagram Congestion Control Protocol | ||
8 | |||
9 | From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. | ||
10 | |||
11 | The Datagram Congestion Control Protocol (DCCP) is a transport | ||
12 | protocol that implements bidirectional, unicast connections of | ||
13 | congestion-controlled, unreliable datagrams. It should be suitable | ||
14 | for use by applications such as streaming media, Internet telephony, | ||
15 | and on-line games | ||
16 | |||
17 | To compile this protocol support as a module, choose M here: the | ||
18 | module will be called dccp. | ||
19 | |||
20 | If in doubt, say N. | ||
21 | |||
22 | source "net/dccp/ccids/Kconfig" | ||
23 | |||
24 | endmenu | ||
diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..c6e6ba55c36b --- /dev/null +++ b/net/dccp/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o | ||
2 | |||
3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o | ||
4 | |||
5 | obj-y += ccids/ | ||
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * net/dccp/ccid.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * CCID infrastructure | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include "ccid.h" | ||
15 | |||
16 | static struct ccid *ccids[CCID_MAX]; | ||
17 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
18 | static atomic_t ccids_lockct = ATOMIC_INIT(0); | ||
19 | static DEFINE_SPINLOCK(ccids_lock); | ||
20 | |||
21 | /* | ||
22 | * The strategy is: modifications ccids vector are short, do not sleep and | ||
23 | * veeery rare, but read access should be free of any exclusive locks. | ||
24 | */ | ||
25 | static void ccids_write_lock(void) | ||
26 | { | ||
27 | spin_lock(&ccids_lock); | ||
28 | while (atomic_read(&ccids_lockct) != 0) { | ||
29 | spin_unlock(&ccids_lock); | ||
30 | yield(); | ||
31 | spin_lock(&ccids_lock); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | static inline void ccids_write_unlock(void) | ||
36 | { | ||
37 | spin_unlock(&ccids_lock); | ||
38 | } | ||
39 | |||
40 | static inline void ccids_read_lock(void) | ||
41 | { | ||
42 | atomic_inc(&ccids_lockct); | ||
43 | spin_unlock_wait(&ccids_lock); | ||
44 | } | ||
45 | |||
46 | static inline void ccids_read_unlock(void) | ||
47 | { | ||
48 | atomic_dec(&ccids_lockct); | ||
49 | } | ||
50 | |||
51 | #else | ||
52 | #define ccids_write_lock() do { } while(0) | ||
53 | #define ccids_write_unlock() do { } while(0) | ||
54 | #define ccids_read_lock() do { } while(0) | ||
55 | #define ccids_read_unlock() do { } while(0) | ||
56 | #endif | ||
57 | |||
58 | int ccid_register(struct ccid *ccid) | ||
59 | { | ||
60 | int err; | ||
61 | |||
62 | if (ccid->ccid_init == NULL) | ||
63 | return -1; | ||
64 | |||
65 | ccids_write_lock(); | ||
66 | err = -EEXIST; | ||
67 | if (ccids[ccid->ccid_id] == NULL) { | ||
68 | ccids[ccid->ccid_id] = ccid; | ||
69 | err = 0; | ||
70 | } | ||
71 | ccids_write_unlock(); | ||
72 | if (err == 0) | ||
73 | pr_info("CCID: Registered CCID %d (%s)\n", | ||
74 | ccid->ccid_id, ccid->ccid_name); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | EXPORT_SYMBOL_GPL(ccid_register); | ||
79 | |||
80 | int ccid_unregister(struct ccid *ccid) | ||
81 | { | ||
82 | ccids_write_lock(); | ||
83 | ccids[ccid->ccid_id] = NULL; | ||
84 | ccids_write_unlock(); | ||
85 | pr_info("CCID: Unregistered CCID %d (%s)\n", | ||
86 | ccid->ccid_id, ccid->ccid_name); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | EXPORT_SYMBOL_GPL(ccid_unregister); | ||
91 | |||
92 | struct ccid *ccid_init(unsigned char id, struct sock *sk) | ||
93 | { | ||
94 | struct ccid *ccid; | ||
95 | |||
96 | #ifdef CONFIG_KMOD | ||
97 | if (ccids[id] == NULL) | ||
98 | request_module("net-dccp-ccid-%d", id); | ||
99 | #endif | ||
100 | ccids_read_lock(); | ||
101 | |||
102 | ccid = ccids[id]; | ||
103 | if (ccid == NULL) | ||
104 | goto out; | ||
105 | |||
106 | if (!try_module_get(ccid->ccid_owner)) | ||
107 | goto out_err; | ||
108 | |||
109 | if (ccid->ccid_init(sk) != 0) | ||
110 | goto out_module_put; | ||
111 | out: | ||
112 | ccids_read_unlock(); | ||
113 | return ccid; | ||
114 | out_module_put: | ||
115 | module_put(ccid->ccid_owner); | ||
116 | out_err: | ||
117 | ccid = NULL; | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | EXPORT_SYMBOL_GPL(ccid_init); | ||
122 | |||
123 | void ccid_exit(struct ccid *ccid, struct sock *sk) | ||
124 | { | ||
125 | if (ccid == NULL) | ||
126 | return; | ||
127 | |||
128 | ccids_read_lock(); | ||
129 | |||
130 | if (ccids[ccid->ccid_id] != NULL) { | ||
131 | if (ccid->ccid_exit != NULL) | ||
132 | ccid->ccid_exit(sk); | ||
133 | module_put(ccid->ccid_owner); | ||
134 | } | ||
135 | |||
136 | ccids_read_unlock(); | ||
137 | } | ||
138 | |||
139 | EXPORT_SYMBOL_GPL(ccid_exit); | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..06105b2a613c --- /dev/null +++ b/net/dccp/ccid.h | |||
@@ -0,0 +1,156 @@ | |||
1 | #ifndef _CCID_H | ||
2 | #define _CCID_H | ||
3 | /* | ||
4 | * net/dccp/ccid.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * | ||
9 | * CCID infrastructure | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License version 2 as | ||
13 | * published by the Free Software Foundation. | ||
14 | */ | ||
15 | |||
16 | #include <net/sock.h> | ||
17 | #include <linux/dccp.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | #define CCID_MAX 255 | ||
22 | |||
23 | struct ccid { | ||
24 | unsigned char ccid_id; | ||
25 | const char *ccid_name; | ||
26 | struct module *ccid_owner; | ||
27 | int (*ccid_init)(struct sock *sk); | ||
28 | void (*ccid_exit)(struct sock *sk); | ||
29 | int (*ccid_hc_rx_init)(struct sock *sk); | ||
30 | int (*ccid_hc_tx_init)(struct sock *sk); | ||
31 | void (*ccid_hc_rx_exit)(struct sock *sk); | ||
32 | void (*ccid_hc_tx_exit)(struct sock *sk); | ||
33 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); | ||
34 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | ||
35 | unsigned char option, | ||
36 | unsigned char len, u16 idx, | ||
37 | unsigned char* value); | ||
38 | void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); | ||
39 | void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); | ||
40 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); | ||
41 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | ||
42 | unsigned char option, | ||
43 | unsigned char len, u16 idx, | ||
44 | unsigned char* value); | ||
45 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | ||
46 | struct sk_buff *skb, int len, | ||
47 | long *delay); | ||
48 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); | ||
49 | }; | ||
50 | |||
51 | extern int ccid_register(struct ccid *ccid); | ||
52 | extern int ccid_unregister(struct ccid *ccid); | ||
53 | |||
54 | extern struct ccid *ccid_init(unsigned char id, struct sock *sk); | ||
55 | extern void ccid_exit(struct ccid *ccid, struct sock *sk); | ||
56 | |||
57 | static inline void __ccid_get(struct ccid *ccid) | ||
58 | { | ||
59 | __module_get(ccid->ccid_owner); | ||
60 | } | ||
61 | |||
62 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | ||
63 | struct sk_buff *skb, int len, | ||
64 | long *delay) | ||
65 | { | ||
66 | int rc = 0; | ||
67 | if (ccid->ccid_hc_tx_send_packet != NULL) | ||
68 | rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); | ||
69 | return rc; | ||
70 | } | ||
71 | |||
72 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | ||
73 | int more, int len) | ||
74 | { | ||
75 | if (ccid->ccid_hc_tx_packet_sent != NULL) | ||
76 | ccid->ccid_hc_tx_packet_sent(sk, more, len); | ||
77 | } | ||
78 | |||
79 | static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) | ||
80 | { | ||
81 | int rc = 0; | ||
82 | if (ccid->ccid_hc_rx_init != NULL) | ||
83 | rc = ccid->ccid_hc_rx_init(sk); | ||
84 | return rc; | ||
85 | } | ||
86 | |||
87 | static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) | ||
88 | { | ||
89 | int rc = 0; | ||
90 | if (ccid->ccid_hc_tx_init != NULL) | ||
91 | rc = ccid->ccid_hc_tx_init(sk); | ||
92 | return rc; | ||
93 | } | ||
94 | |||
95 | static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) | ||
96 | { | ||
97 | if (ccid->ccid_hc_rx_exit != NULL) | ||
98 | ccid->ccid_hc_rx_exit(sk); | ||
99 | } | ||
100 | |||
101 | static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) | ||
102 | { | ||
103 | if (ccid->ccid_hc_tx_exit != NULL) | ||
104 | ccid->ccid_hc_tx_exit(sk); | ||
105 | } | ||
106 | |||
107 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
108 | struct sk_buff *skb) | ||
109 | { | ||
110 | if (ccid->ccid_hc_rx_packet_recv != NULL) | ||
111 | ccid->ccid_hc_rx_packet_recv(sk, skb); | ||
112 | } | ||
113 | |||
114 | static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
115 | struct sk_buff *skb) | ||
116 | { | ||
117 | if (ccid->ccid_hc_tx_packet_recv != NULL) | ||
118 | ccid->ccid_hc_tx_packet_recv(sk, skb); | ||
119 | } | ||
120 | |||
121 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | ||
122 | unsigned char option, | ||
123 | unsigned char len, u16 idx, | ||
124 | unsigned char* value) | ||
125 | { | ||
126 | int rc = 0; | ||
127 | if (ccid->ccid_hc_tx_parse_options != NULL) | ||
128 | rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); | ||
129 | return rc; | ||
130 | } | ||
131 | |||
132 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | ||
133 | unsigned char option, | ||
134 | unsigned char len, u16 idx, | ||
135 | unsigned char* value) | ||
136 | { | ||
137 | int rc = 0; | ||
138 | if (ccid->ccid_hc_rx_parse_options != NULL) | ||
139 | rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); | ||
140 | return rc; | ||
141 | } | ||
142 | |||
143 | static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, | ||
144 | struct sk_buff *skb) | ||
145 | { | ||
146 | if (ccid->ccid_hc_tx_insert_options != NULL) | ||
147 | ccid->ccid_hc_tx_insert_options(sk, skb); | ||
148 | } | ||
149 | |||
150 | static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | ||
151 | struct sk_buff *skb) | ||
152 | { | ||
153 | if (ccid->ccid_hc_rx_insert_options != NULL) | ||
154 | ccid->ccid_hc_rx_insert_options(sk, skb); | ||
155 | } | ||
156 | #endif /* _CCID_H */ | ||
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..67f9c06bd179 --- /dev/null +++ b/net/dccp/ccids/Kconfig | |||
@@ -0,0 +1,25 @@ | |||
1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | ||
2 | depends on IP_DCCP && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP_CCID3 | ||
5 | tristate "CCID3 (TFRC) (EXPERIMENTAL)" | ||
6 | depends on IP_DCCP | ||
7 | ---help--- | ||
8 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based | ||
9 | rate-controlled congestion control mechanism. TFRC is designed to | ||
10 | be reasonably fair when competing for bandwidth with TCP-like flows, | ||
11 | where a flow is "reasonably fair" if its sending rate is generally | ||
12 | within a factor of two of the sending rate of a TCP flow under the | ||
13 | same conditions. However, TFRC has a much lower variation of | ||
14 | throughput over time compared with TCP, which makes CCID 3 more | ||
15 | suitable than CCID 2 for applications such streaming media where a | ||
16 | relatively smooth sending rate is of importance. | ||
17 | |||
18 | CCID 3 is further described in [CCID 3 PROFILE]. The TFRC | ||
19 | congestion control algorithms were initially described in RFC 3448. | ||
20 | |||
21 | This text was extracted from draft-ietf-dccp-spec-11.txt. | ||
22 | |||
23 | If in doubt, say M. | ||
24 | |||
25 | endmenu | ||
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..1c720131c5db --- /dev/null +++ b/net/dccp/ccids/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o | ||
2 | |||
3 | dccp_ccid3-y := ccid3.o | ||
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..4f45902cb55e --- /dev/null +++ b/net/dccp/ccids/ccid3.c | |||
@@ -0,0 +1,2164 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #include "../ccid.h" | ||
38 | #include "../dccp.h" | ||
39 | #include "ccid3.h" | ||
40 | |||
41 | #ifdef CCID3_DEBUG | ||
42 | extern int ccid3_debug; | ||
43 | |||
44 | #define ccid3_pr_debug(format, a...) \ | ||
45 | do { if (ccid3_debug) \ | ||
46 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ | ||
47 | } while (0) | ||
48 | #else | ||
49 | #define ccid3_pr_debug(format, a...) | ||
50 | #endif | ||
51 | |||
52 | #define TFRC_MIN_PACKET_SIZE 16 | ||
53 | #define TFRC_STD_PACKET_SIZE 256 | ||
54 | #define TFRC_MAX_PACKET_SIZE 65535 | ||
55 | |||
56 | #define USEC_IN_SEC 1000000 | ||
57 | |||
58 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) | ||
59 | /* two seconds as per CCID3 spec 11 */ | ||
60 | |||
61 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) | ||
62 | /* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
63 | |||
64 | #define TFRC_WIN_COUNT_PER_RTT 4 | ||
65 | #define TFRC_WIN_COUNT_LIMIT 16 | ||
66 | |||
67 | #define TFRC_MAX_BACK_OFF_TIME 64 | ||
68 | /* above is in seconds */ | ||
69 | |||
70 | #define TFRC_SMALLEST_P 40 | ||
71 | |||
72 | #define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ | ||
73 | |||
74 | /* Number of later packets received before one is considered lost */ | ||
75 | #define TFRC_RECV_NUM_LATE_LOSS 3 | ||
76 | |||
77 | enum ccid3_options { | ||
78 | TFRC_OPT_LOSS_EVENT_RATE = 192, | ||
79 | TFRC_OPT_LOSS_INTERVALS = 193, | ||
80 | TFRC_OPT_RECEIVE_RATE = 194, | ||
81 | }; | ||
82 | |||
83 | static int ccid3_debug; | ||
84 | |||
85 | static kmem_cache_t *ccid3_tx_hist_slab; | ||
86 | static kmem_cache_t *ccid3_rx_hist_slab; | ||
87 | static kmem_cache_t *ccid3_loss_interval_hist_slab; | ||
88 | |||
89 | static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) | ||
90 | { | ||
91 | struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); | ||
92 | |||
93 | if (entry != NULL) | ||
94 | entry->ccid3htx_sent = 0; | ||
95 | |||
96 | return entry; | ||
97 | } | ||
98 | |||
99 | static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) | ||
100 | { | ||
101 | if (entry != NULL) | ||
102 | kmem_cache_free(ccid3_tx_hist_slab, entry); | ||
103 | } | ||
104 | |||
105 | static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, | ||
106 | struct sk_buff *skb, | ||
107 | int prio) | ||
108 | { | ||
109 | struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); | ||
110 | |||
111 | if (entry != NULL) { | ||
112 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
113 | |||
114 | entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
115 | entry->ccid3hrx_win_count = dh->dccph_ccval; | ||
116 | entry->ccid3hrx_type = dh->dccph_type; | ||
117 | entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; | ||
118 | do_gettimeofday(&(entry->ccid3hrx_tstamp)); | ||
119 | } | ||
120 | |||
121 | return entry; | ||
122 | } | ||
123 | |||
124 | static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) | ||
125 | { | ||
126 | if (entry != NULL) | ||
127 | kmem_cache_free(ccid3_rx_hist_slab, entry); | ||
128 | } | ||
129 | |||
130 | static void ccid3_rx_history_delete(struct list_head *hist) | ||
131 | { | ||
132 | struct ccid3_rx_hist_entry *entry, *next; | ||
133 | |||
134 | list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { | ||
135 | list_del_init(&entry->ccid3hrx_node); | ||
136 | kmem_cache_free(ccid3_rx_hist_slab, entry); | ||
137 | } | ||
138 | } | ||
139 | |||
140 | static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) | ||
141 | { | ||
142 | return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); | ||
143 | } | ||
144 | |||
145 | static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) | ||
146 | { | ||
147 | if (entry != NULL) | ||
148 | kmem_cache_free(ccid3_loss_interval_hist_slab, entry); | ||
149 | } | ||
150 | |||
151 | static void ccid3_loss_interval_history_delete(struct list_head *hist) | ||
152 | { | ||
153 | struct ccid3_loss_interval_hist_entry *entry, *next; | ||
154 | |||
155 | list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { | ||
156 | list_del_init(&entry->ccid3lih_node); | ||
157 | kmem_cache_free(ccid3_loss_interval_hist_slab, entry); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int ccid3_init(struct sock *sk) | ||
162 | { | ||
163 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void ccid3_exit(struct sock *sk) | ||
168 | { | ||
169 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
170 | } | ||
171 | |||
172 | /* TFRC sender states */ | ||
173 | enum ccid3_hc_tx_states { | ||
174 | TFRC_SSTATE_NO_SENT = 1, | ||
175 | TFRC_SSTATE_NO_FBACK, | ||
176 | TFRC_SSTATE_FBACK, | ||
177 | TFRC_SSTATE_TERM, | ||
178 | }; | ||
179 | |||
180 | #ifdef CCID3_DEBUG | ||
181 | static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | ||
182 | { | ||
183 | static char *ccid3_state_names[] = { | ||
184 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | ||
185 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | ||
186 | [TFRC_SSTATE_FBACK] = "FBACK", | ||
187 | [TFRC_SSTATE_TERM] = "TERM", | ||
188 | }; | ||
189 | |||
190 | return ccid3_state_names[state]; | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) | ||
195 | { | ||
196 | struct dccp_sock *dp = dccp_sk(sk); | ||
197 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
198 | enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; | ||
199 | |||
200 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
201 | dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); | ||
202 | WARN_ON(state == oldstate); | ||
203 | hctx->ccid3hctx_state = state; | ||
204 | } | ||
205 | |||
206 | static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { | ||
207 | |||
208 | result->tv_sec = large.tv_sec-small.tv_sec; | ||
209 | if (large.tv_usec < small.tv_usec) { | ||
210 | (result->tv_sec)--; | ||
211 | result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; | ||
212 | } else | ||
213 | result->tv_usec = large.tv_usec-small.tv_usec; | ||
214 | } | ||
215 | |||
216 | static inline void timeval_fix(struct timeval *tv) { | ||
217 | if (tv->tv_usec >= USEC_IN_SEC) { | ||
218 | tv->tv_sec++; | ||
219 | tv->tv_usec -= USEC_IN_SEC; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | /* returns the difference in usecs between timeval passed in and current time */ | ||
224 | static inline u32 now_delta(struct timeval tv) { | ||
225 | struct timeval now; | ||
226 | |||
227 | do_gettimeofday(&now); | ||
228 | return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); | ||
229 | } | ||
230 | |||
231 | #define CALCX_ARRSIZE 500 | ||
232 | |||
233 | #define CALCX_SPLIT 50000 | ||
234 | /* equivalent to 0.05 */ | ||
235 | |||
236 | static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { | ||
237 | { 37172 , 8172 }, | ||
238 | { 53499 , 11567 }, | ||
239 | { 66664 , 14180 }, | ||
240 | { 78298 , 16388 }, | ||
241 | { 89021 , 18339 }, | ||
242 | { 99147 , 20108 }, | ||
243 | { 108858 , 21738 }, | ||
244 | { 118273 , 23260 }, | ||
245 | { 127474 , 24693 }, | ||
246 | { 136520 , 26052 }, | ||
247 | { 145456 , 27348 }, | ||
248 | { 154316 , 28589 }, | ||
249 | { 163130 , 29783 }, | ||
250 | { 171919 , 30935 }, | ||
251 | { 180704 , 32049 }, | ||
252 | { 189502 , 33130 }, | ||
253 | { 198328 , 34180 }, | ||
254 | { 207194 , 35202 }, | ||
255 | { 216114 , 36198 }, | ||
256 | { 225097 , 37172 }, | ||
257 | { 234153 , 38123 }, | ||
258 | { 243294 , 39055 }, | ||
259 | { 252527 , 39968 }, | ||
260 | { 261861 , 40864 }, | ||
261 | { 271305 , 41743 }, | ||
262 | { 280866 , 42607 }, | ||
263 | { 290553 , 43457 }, | ||
264 | { 300372 , 44293 }, | ||
265 | { 310333 , 45117 }, | ||
266 | { 320441 , 45929 }, | ||
267 | { 330705 , 46729 }, | ||
268 | { 341131 , 47518 }, | ||
269 | { 351728 , 48297 }, | ||
270 | { 362501 , 49066 }, | ||
271 | { 373460 , 49826 }, | ||
272 | { 384609 , 50577 }, | ||
273 | { 395958 , 51320 }, | ||
274 | { 407513 , 52054 }, | ||
275 | { 419281 , 52780 }, | ||
276 | { 431270 , 53499 }, | ||
277 | { 443487 , 54211 }, | ||
278 | { 455940 , 54916 }, | ||
279 | { 468635 , 55614 }, | ||
280 | { 481581 , 56306 }, | ||
281 | { 494785 , 56991 }, | ||
282 | { 508254 , 57671 }, | ||
283 | { 521996 , 58345 }, | ||
284 | { 536019 , 59014 }, | ||
285 | { 550331 , 59677 }, | ||
286 | { 564939 , 60335 }, | ||
287 | { 579851 , 60988 }, | ||
288 | { 595075 , 61636 }, | ||
289 | { 610619 , 62279 }, | ||
290 | { 626491 , 62918 }, | ||
291 | { 642700 , 63553 }, | ||
292 | { 659253 , 64183 }, | ||
293 | { 676158 , 64809 }, | ||
294 | { 693424 , 65431 }, | ||
295 | { 711060 , 66050 }, | ||
296 | { 729073 , 66664 }, | ||
297 | { 747472 , 67275 }, | ||
298 | { 766266 , 67882 }, | ||
299 | { 785464 , 68486 }, | ||
300 | { 805073 , 69087 }, | ||
301 | { 825103 , 69684 }, | ||
302 | { 845562 , 70278 }, | ||
303 | { 866460 , 70868 }, | ||
304 | { 887805 , 71456 }, | ||
305 | { 909606 , 72041 }, | ||
306 | { 931873 , 72623 }, | ||
307 | { 954614 , 73202 }, | ||
308 | { 977839 , 73778 }, | ||
309 | { 1001557 , 74352 }, | ||
310 | { 1025777 , 74923 }, | ||
311 | { 1050508 , 75492 }, | ||
312 | { 1075761 , 76058 }, | ||
313 | { 1101544 , 76621 }, | ||
314 | { 1127867 , 77183 }, | ||
315 | { 1154739 , 77741 }, | ||
316 | { 1182172 , 78298 }, | ||
317 | { 1210173 , 78852 }, | ||
318 | { 1238753 , 79405 }, | ||
319 | { 1267922 , 79955 }, | ||
320 | { 1297689 , 80503 }, | ||
321 | { 1328066 , 81049 }, | ||
322 | { 1359060 , 81593 }, | ||
323 | { 1390684 , 82135 }, | ||
324 | { 1422947 , 82675 }, | ||
325 | { 1455859 , 83213 }, | ||
326 | { 1489430 , 83750 }, | ||
327 | { 1523671 , 84284 }, | ||
328 | { 1558593 , 84817 }, | ||
329 | { 1594205 , 85348 }, | ||
330 | { 1630518 , 85878 }, | ||
331 | { 1667543 , 86406 }, | ||
332 | { 1705290 , 86932 }, | ||
333 | { 1743770 , 87457 }, | ||
334 | { 1782994 , 87980 }, | ||
335 | { 1822973 , 88501 }, | ||
336 | { 1863717 , 89021 }, | ||
337 | { 1905237 , 89540 }, | ||
338 | { 1947545 , 90057 }, | ||
339 | { 1990650 , 90573 }, | ||
340 | { 2034566 , 91087 }, | ||
341 | { 2079301 , 91600 }, | ||
342 | { 2124869 , 92111 }, | ||
343 | { 2171279 , 92622 }, | ||
344 | { 2218543 , 93131 }, | ||
345 | { 2266673 , 93639 }, | ||
346 | { 2315680 , 94145 }, | ||
347 | { 2365575 , 94650 }, | ||
348 | { 2416371 , 95154 }, | ||
349 | { 2468077 , 95657 }, | ||
350 | { 2520707 , 96159 }, | ||
351 | { 2574271 , 96660 }, | ||
352 | { 2628782 , 97159 }, | ||
353 | { 2684250 , 97658 }, | ||
354 | { 2740689 , 98155 }, | ||
355 | { 2798110 , 98651 }, | ||
356 | { 2856524 , 99147 }, | ||
357 | { 2915944 , 99641 }, | ||
358 | { 2976382 , 100134 }, | ||
359 | { 3037850 , 100626 }, | ||
360 | { 3100360 , 101117 }, | ||
361 | { 3163924 , 101608 }, | ||
362 | { 3228554 , 102097 }, | ||
363 | { 3294263 , 102586 }, | ||
364 | { 3361063 , 103073 }, | ||
365 | { 3428966 , 103560 }, | ||
366 | { 3497984 , 104045 }, | ||
367 | { 3568131 , 104530 }, | ||
368 | { 3639419 , 105014 }, | ||
369 | { 3711860 , 105498 }, | ||
370 | { 3785467 , 105980 }, | ||
371 | { 3860253 , 106462 }, | ||
372 | { 3936229 , 106942 }, | ||
373 | { 4013410 , 107422 }, | ||
374 | { 4091808 , 107902 }, | ||
375 | { 4171435 , 108380 }, | ||
376 | { 4252306 , 108858 }, | ||
377 | { 4334431 , 109335 }, | ||
378 | { 4417825 , 109811 }, | ||
379 | { 4502501 , 110287 }, | ||
380 | { 4588472 , 110762 }, | ||
381 | { 4675750 , 111236 }, | ||
382 | { 4764349 , 111709 }, | ||
383 | { 4854283 , 112182 }, | ||
384 | { 4945564 , 112654 }, | ||
385 | { 5038206 , 113126 }, | ||
386 | { 5132223 , 113597 }, | ||
387 | { 5227627 , 114067 }, | ||
388 | { 5324432 , 114537 }, | ||
389 | { 5422652 , 115006 }, | ||
390 | { 5522299 , 115474 }, | ||
391 | { 5623389 , 115942 }, | ||
392 | { 5725934 , 116409 }, | ||
393 | { 5829948 , 116876 }, | ||
394 | { 5935446 , 117342 }, | ||
395 | { 6042439 , 117808 }, | ||
396 | { 6150943 , 118273 }, | ||
397 | { 6260972 , 118738 }, | ||
398 | { 6372538 , 119202 }, | ||
399 | { 6485657 , 119665 }, | ||
400 | { 6600342 , 120128 }, | ||
401 | { 6716607 , 120591 }, | ||
402 | { 6834467 , 121053 }, | ||
403 | { 6953935 , 121514 }, | ||
404 | { 7075025 , 121976 }, | ||
405 | { 7197752 , 122436 }, | ||
406 | { 7322131 , 122896 }, | ||
407 | { 7448175 , 123356 }, | ||
408 | { 7575898 , 123815 }, | ||
409 | { 7705316 , 124274 }, | ||
410 | { 7836442 , 124733 }, | ||
411 | { 7969291 , 125191 }, | ||
412 | { 8103877 , 125648 }, | ||
413 | { 8240216 , 126105 }, | ||
414 | { 8378321 , 126562 }, | ||
415 | { 8518208 , 127018 }, | ||
416 | { 8659890 , 127474 }, | ||
417 | { 8803384 , 127930 }, | ||
418 | { 8948702 , 128385 }, | ||
419 | { 9095861 , 128840 }, | ||
420 | { 9244875 , 129294 }, | ||
421 | { 9395760 , 129748 }, | ||
422 | { 9548529 , 130202 }, | ||
423 | { 9703198 , 130655 }, | ||
424 | { 9859782 , 131108 }, | ||
425 | { 10018296 , 131561 }, | ||
426 | { 10178755 , 132014 }, | ||
427 | { 10341174 , 132466 }, | ||
428 | { 10505569 , 132917 }, | ||
429 | { 10671954 , 133369 }, | ||
430 | { 10840345 , 133820 }, | ||
431 | { 11010757 , 134271 }, | ||
432 | { 11183206 , 134721 }, | ||
433 | { 11357706 , 135171 }, | ||
434 | { 11534274 , 135621 }, | ||
435 | { 11712924 , 136071 }, | ||
436 | { 11893673 , 136520 }, | ||
437 | { 12076536 , 136969 }, | ||
438 | { 12261527 , 137418 }, | ||
439 | { 12448664 , 137867 }, | ||
440 | { 12637961 , 138315 }, | ||
441 | { 12829435 , 138763 }, | ||
442 | { 13023101 , 139211 }, | ||
443 | { 13218974 , 139658 }, | ||
444 | { 13417071 , 140106 }, | ||
445 | { 13617407 , 140553 }, | ||
446 | { 13819999 , 140999 }, | ||
447 | { 14024862 , 141446 }, | ||
448 | { 14232012 , 141892 }, | ||
449 | { 14441465 , 142339 }, | ||
450 | { 14653238 , 142785 }, | ||
451 | { 14867346 , 143230 }, | ||
452 | { 15083805 , 143676 }, | ||
453 | { 15302632 , 144121 }, | ||
454 | { 15523842 , 144566 }, | ||
455 | { 15747453 , 145011 }, | ||
456 | { 15973479 , 145456 }, | ||
457 | { 16201939 , 145900 }, | ||
458 | { 16432847 , 146345 }, | ||
459 | { 16666221 , 146789 }, | ||
460 | { 16902076 , 147233 }, | ||
461 | { 17140429 , 147677 }, | ||
462 | { 17381297 , 148121 }, | ||
463 | { 17624696 , 148564 }, | ||
464 | { 17870643 , 149007 }, | ||
465 | { 18119154 , 149451 }, | ||
466 | { 18370247 , 149894 }, | ||
467 | { 18623936 , 150336 }, | ||
468 | { 18880241 , 150779 }, | ||
469 | { 19139176 , 151222 }, | ||
470 | { 19400759 , 151664 }, | ||
471 | { 19665007 , 152107 }, | ||
472 | { 19931936 , 152549 }, | ||
473 | { 20201564 , 152991 }, | ||
474 | { 20473907 , 153433 }, | ||
475 | { 20748982 , 153875 }, | ||
476 | { 21026807 , 154316 }, | ||
477 | { 21307399 , 154758 }, | ||
478 | { 21590773 , 155199 }, | ||
479 | { 21876949 , 155641 }, | ||
480 | { 22165941 , 156082 }, | ||
481 | { 22457769 , 156523 }, | ||
482 | { 22752449 , 156964 }, | ||
483 | { 23049999 , 157405 }, | ||
484 | { 23350435 , 157846 }, | ||
485 | { 23653774 , 158287 }, | ||
486 | { 23960036 , 158727 }, | ||
487 | { 24269236 , 159168 }, | ||
488 | { 24581392 , 159608 }, | ||
489 | { 24896521 , 160049 }, | ||
490 | { 25214642 , 160489 }, | ||
491 | { 25535772 , 160929 }, | ||
492 | { 25859927 , 161370 }, | ||
493 | { 26187127 , 161810 }, | ||
494 | { 26517388 , 162250 }, | ||
495 | { 26850728 , 162690 }, | ||
496 | { 27187165 , 163130 }, | ||
497 | { 27526716 , 163569 }, | ||
498 | { 27869400 , 164009 }, | ||
499 | { 28215234 , 164449 }, | ||
500 | { 28564236 , 164889 }, | ||
501 | { 28916423 , 165328 }, | ||
502 | { 29271815 , 165768 }, | ||
503 | { 29630428 , 166208 }, | ||
504 | { 29992281 , 166647 }, | ||
505 | { 30357392 , 167087 }, | ||
506 | { 30725779 , 167526 }, | ||
507 | { 31097459 , 167965 }, | ||
508 | { 31472452 , 168405 }, | ||
509 | { 31850774 , 168844 }, | ||
510 | { 32232445 , 169283 }, | ||
511 | { 32617482 , 169723 }, | ||
512 | { 33005904 , 170162 }, | ||
513 | { 33397730 , 170601 }, | ||
514 | { 33792976 , 171041 }, | ||
515 | { 34191663 , 171480 }, | ||
516 | { 34593807 , 171919 }, | ||
517 | { 34999428 , 172358 }, | ||
518 | { 35408544 , 172797 }, | ||
519 | { 35821174 , 173237 }, | ||
520 | { 36237335 , 173676 }, | ||
521 | { 36657047 , 174115 }, | ||
522 | { 37080329 , 174554 }, | ||
523 | { 37507197 , 174993 }, | ||
524 | { 37937673 , 175433 }, | ||
525 | { 38371773 , 175872 }, | ||
526 | { 38809517 , 176311 }, | ||
527 | { 39250924 , 176750 }, | ||
528 | { 39696012 , 177190 }, | ||
529 | { 40144800 , 177629 }, | ||
530 | { 40597308 , 178068 }, | ||
531 | { 41053553 , 178507 }, | ||
532 | { 41513554 , 178947 }, | ||
533 | { 41977332 , 179386 }, | ||
534 | { 42444904 , 179825 }, | ||
535 | { 42916290 , 180265 }, | ||
536 | { 43391509 , 180704 }, | ||
537 | { 43870579 , 181144 }, | ||
538 | { 44353520 , 181583 }, | ||
539 | { 44840352 , 182023 }, | ||
540 | { 45331092 , 182462 }, | ||
541 | { 45825761 , 182902 }, | ||
542 | { 46324378 , 183342 }, | ||
543 | { 46826961 , 183781 }, | ||
544 | { 47333531 , 184221 }, | ||
545 | { 47844106 , 184661 }, | ||
546 | { 48358706 , 185101 }, | ||
547 | { 48877350 , 185541 }, | ||
548 | { 49400058 , 185981 }, | ||
549 | { 49926849 , 186421 }, | ||
550 | { 50457743 , 186861 }, | ||
551 | { 50992759 , 187301 }, | ||
552 | { 51531916 , 187741 }, | ||
553 | { 52075235 , 188181 }, | ||
554 | { 52622735 , 188622 }, | ||
555 | { 53174435 , 189062 }, | ||
556 | { 53730355 , 189502 }, | ||
557 | { 54290515 , 189943 }, | ||
558 | { 54854935 , 190383 }, | ||
559 | { 55423634 , 190824 }, | ||
560 | { 55996633 , 191265 }, | ||
561 | { 56573950 , 191706 }, | ||
562 | { 57155606 , 192146 }, | ||
563 | { 57741621 , 192587 }, | ||
564 | { 58332014 , 193028 }, | ||
565 | { 58926806 , 193470 }, | ||
566 | { 59526017 , 193911 }, | ||
567 | { 60129666 , 194352 }, | ||
568 | { 60737774 , 194793 }, | ||
569 | { 61350361 , 195235 }, | ||
570 | { 61967446 , 195677 }, | ||
571 | { 62589050 , 196118 }, | ||
572 | { 63215194 , 196560 }, | ||
573 | { 63845897 , 197002 }, | ||
574 | { 64481179 , 197444 }, | ||
575 | { 65121061 , 197886 }, | ||
576 | { 65765563 , 198328 }, | ||
577 | { 66414705 , 198770 }, | ||
578 | { 67068508 , 199213 }, | ||
579 | { 67726992 , 199655 }, | ||
580 | { 68390177 , 200098 }, | ||
581 | { 69058085 , 200540 }, | ||
582 | { 69730735 , 200983 }, | ||
583 | { 70408147 , 201426 }, | ||
584 | { 71090343 , 201869 }, | ||
585 | { 71777343 , 202312 }, | ||
586 | { 72469168 , 202755 }, | ||
587 | { 73165837 , 203199 }, | ||
588 | { 73867373 , 203642 }, | ||
589 | { 74573795 , 204086 }, | ||
590 | { 75285124 , 204529 }, | ||
591 | { 76001380 , 204973 }, | ||
592 | { 76722586 , 205417 }, | ||
593 | { 77448761 , 205861 }, | ||
594 | { 78179926 , 206306 }, | ||
595 | { 78916102 , 206750 }, | ||
596 | { 79657310 , 207194 }, | ||
597 | { 80403571 , 207639 }, | ||
598 | { 81154906 , 208084 }, | ||
599 | { 81911335 , 208529 }, | ||
600 | { 82672880 , 208974 }, | ||
601 | { 83439562 , 209419 }, | ||
602 | { 84211402 , 209864 }, | ||
603 | { 84988421 , 210309 }, | ||
604 | { 85770640 , 210755 }, | ||
605 | { 86558080 , 211201 }, | ||
606 | { 87350762 , 211647 }, | ||
607 | { 88148708 , 212093 }, | ||
608 | { 88951938 , 212539 }, | ||
609 | { 89760475 , 212985 }, | ||
610 | { 90574339 , 213432 }, | ||
611 | { 91393551 , 213878 }, | ||
612 | { 92218133 , 214325 }, | ||
613 | { 93048107 , 214772 }, | ||
614 | { 93883493 , 215219 }, | ||
615 | { 94724314 , 215666 }, | ||
616 | { 95570590 , 216114 }, | ||
617 | { 96422343 , 216561 }, | ||
618 | { 97279594 , 217009 }, | ||
619 | { 98142366 , 217457 }, | ||
620 | { 99010679 , 217905 }, | ||
621 | { 99884556 , 218353 }, | ||
622 | { 100764018 , 218801 }, | ||
623 | { 101649086 , 219250 }, | ||
624 | { 102539782 , 219698 }, | ||
625 | { 103436128 , 220147 }, | ||
626 | { 104338146 , 220596 }, | ||
627 | { 105245857 , 221046 }, | ||
628 | { 106159284 , 221495 }, | ||
629 | { 107078448 , 221945 }, | ||
630 | { 108003370 , 222394 }, | ||
631 | { 108934074 , 222844 }, | ||
632 | { 109870580 , 223294 }, | ||
633 | { 110812910 , 223745 }, | ||
634 | { 111761087 , 224195 }, | ||
635 | { 112715133 , 224646 }, | ||
636 | { 113675069 , 225097 }, | ||
637 | { 114640918 , 225548 }, | ||
638 | { 115612702 , 225999 }, | ||
639 | { 116590442 , 226450 }, | ||
640 | { 117574162 , 226902 }, | ||
641 | { 118563882 , 227353 }, | ||
642 | { 119559626 , 227805 }, | ||
643 | { 120561415 , 228258 }, | ||
644 | { 121569272 , 228710 }, | ||
645 | { 122583219 , 229162 }, | ||
646 | { 123603278 , 229615 }, | ||
647 | { 124629471 , 230068 }, | ||
648 | { 125661822 , 230521 }, | ||
649 | { 126700352 , 230974 }, | ||
650 | { 127745083 , 231428 }, | ||
651 | { 128796039 , 231882 }, | ||
652 | { 129853241 , 232336 }, | ||
653 | { 130916713 , 232790 }, | ||
654 | { 131986475 , 233244 }, | ||
655 | { 133062553 , 233699 }, | ||
656 | { 134144966 , 234153 }, | ||
657 | { 135233739 , 234608 }, | ||
658 | { 136328894 , 235064 }, | ||
659 | { 137430453 , 235519 }, | ||
660 | { 138538440 , 235975 }, | ||
661 | { 139652876 , 236430 }, | ||
662 | { 140773786 , 236886 }, | ||
663 | { 141901190 , 237343 }, | ||
664 | { 143035113 , 237799 }, | ||
665 | { 144175576 , 238256 }, | ||
666 | { 145322604 , 238713 }, | ||
667 | { 146476218 , 239170 }, | ||
668 | { 147636442 , 239627 }, | ||
669 | { 148803298 , 240085 }, | ||
670 | { 149976809 , 240542 }, | ||
671 | { 151156999 , 241000 }, | ||
672 | { 152343890 , 241459 }, | ||
673 | { 153537506 , 241917 }, | ||
674 | { 154737869 , 242376 }, | ||
675 | { 155945002 , 242835 }, | ||
676 | { 157158929 , 243294 }, | ||
677 | { 158379673 , 243753 }, | ||
678 | { 159607257 , 244213 }, | ||
679 | { 160841704 , 244673 }, | ||
680 | { 162083037 , 245133 }, | ||
681 | { 163331279 , 245593 }, | ||
682 | { 164586455 , 246054 }, | ||
683 | { 165848586 , 246514 }, | ||
684 | { 167117696 , 246975 }, | ||
685 | { 168393810 , 247437 }, | ||
686 | { 169676949 , 247898 }, | ||
687 | { 170967138 , 248360 }, | ||
688 | { 172264399 , 248822 }, | ||
689 | { 173568757 , 249284 }, | ||
690 | { 174880235 , 249747 }, | ||
691 | { 176198856 , 250209 }, | ||
692 | { 177524643 , 250672 }, | ||
693 | { 178857621 , 251136 }, | ||
694 | { 180197813 , 251599 }, | ||
695 | { 181545242 , 252063 }, | ||
696 | { 182899933 , 252527 }, | ||
697 | { 184261908 , 252991 }, | ||
698 | { 185631191 , 253456 }, | ||
699 | { 187007807 , 253920 }, | ||
700 | { 188391778 , 254385 }, | ||
701 | { 189783129 , 254851 }, | ||
702 | { 191181884 , 255316 }, | ||
703 | { 192588065 , 255782 }, | ||
704 | { 194001698 , 256248 }, | ||
705 | { 195422805 , 256714 }, | ||
706 | { 196851411 , 257181 }, | ||
707 | { 198287540 , 257648 }, | ||
708 | { 199731215 , 258115 }, | ||
709 | { 201182461 , 258582 }, | ||
710 | { 202641302 , 259050 }, | ||
711 | { 204107760 , 259518 }, | ||
712 | { 205581862 , 259986 }, | ||
713 | { 207063630 , 260454 }, | ||
714 | { 208553088 , 260923 }, | ||
715 | { 210050262 , 261392 }, | ||
716 | { 211555174 , 261861 }, | ||
717 | { 213067849 , 262331 }, | ||
718 | { 214588312 , 262800 }, | ||
719 | { 216116586 , 263270 }, | ||
720 | { 217652696 , 263741 }, | ||
721 | { 219196666 , 264211 }, | ||
722 | { 220748520 , 264682 }, | ||
723 | { 222308282 , 265153 }, | ||
724 | { 223875978 , 265625 }, | ||
725 | { 225451630 , 266097 }, | ||
726 | { 227035265 , 266569 }, | ||
727 | { 228626905 , 267041 }, | ||
728 | { 230226576 , 267514 }, | ||
729 | { 231834302 , 267986 }, | ||
730 | { 233450107 , 268460 }, | ||
731 | { 235074016 , 268933 }, | ||
732 | { 236706054 , 269407 }, | ||
733 | { 238346244 , 269881 }, | ||
734 | { 239994613 , 270355 }, | ||
735 | { 241651183 , 270830 }, | ||
736 | { 243315981 , 271305 } | ||
737 | }; | ||
738 | |||
739 | /* Calculate the send rate as per section 3.1 of RFC3448 | ||
740 | |||
741 | Returns send rate in bytes per second | ||
742 | |||
743 | Integer maths and lookups are used as not allowed floating point in kernel | ||
744 | |||
745 | The function for Xcalc as per section 3.1 of RFC3448 is: | ||
746 | |||
747 | X = s | ||
748 | ------------------------------------------------------------- | ||
749 | R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) | ||
750 | |||
751 | where | ||
752 | X is the trasmit rate in bytes/second | ||
753 | s is the packet size in bytes | ||
754 | R is the round trip time in seconds | ||
755 | p is the loss event rate, between 0 and 1.0, of the number of loss events | ||
756 | as a fraction of the number of packets transmitted | ||
757 | t_RTO is the TCP retransmission timeout value in seconds | ||
758 | b is the number of packets acknowledged by a single TCP acknowledgement | ||
759 | |||
760 | we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: | ||
761 | |||
762 | X = s | ||
763 | ----------------------------------------------------------------------- | ||
764 | R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) | ||
765 | |||
766 | |||
767 | which we can break down into: | ||
768 | |||
769 | X = s | ||
770 | -------- | ||
771 | R * f(p) | ||
772 | |||
773 | where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) | ||
774 | |||
775 | Function parameters: | ||
776 | s - bytes | ||
777 | R - RTT in usecs | ||
778 | p - loss rate (decimal fraction multiplied by 1,000,000) | ||
779 | |||
780 | Returns Xcalc in bytes per second | ||
781 | |||
782 | DON'T alter this code unless you run test cases against it as the code | ||
783 | has been manipulated to stop underflow/overlow. | ||
784 | |||
785 | */ | ||
786 | static u32 ccid3_calc_x(u16 s, u32 R, u32 p) | ||
787 | { | ||
788 | int index; | ||
789 | u32 f; | ||
790 | u64 tmp1, tmp2; | ||
791 | |||
792 | if (p < CALCX_SPLIT) | ||
793 | index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; | ||
794 | else | ||
795 | index = (p / (1000000 / CALCX_ARRSIZE)) - 1; | ||
796 | |||
797 | if (index < 0) | ||
798 | /* p should be 0 unless there is a bug in my code */ | ||
799 | index = 0; | ||
800 | |||
801 | if (R == 0) | ||
802 | R = 1; /* RTT can't be zero or else divide by zero */ | ||
803 | |||
804 | BUG_ON(index >= CALCX_ARRSIZE); | ||
805 | |||
806 | if (p >= CALCX_SPLIT) | ||
807 | f = calcx_lookup[index][0]; | ||
808 | else | ||
809 | f = calcx_lookup[index][1]; | ||
810 | |||
811 | tmp1 = ((u64)s * 100000000); | ||
812 | tmp2 = ((u64)R * (u64)f); | ||
813 | do_div(tmp2,10000); | ||
814 | do_div(tmp1,tmp2); | ||
815 | /* don't alter above math unless you test due to overflow on 32 bit */ | ||
816 | |||
817 | return (u32)tmp1; | ||
818 | } | ||
819 | |||
820 | /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ | ||
821 | static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) | ||
822 | { | ||
823 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) | ||
824 | return; | ||
825 | /* if no feedback spec says t_ipi is 1 second (set elsewhere and then | ||
826 | * doubles after every no feedback timer (separate function) */ | ||
827 | |||
828 | if (hctx->ccid3hctx_x < 10) { | ||
829 | ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); | ||
830 | hctx->ccid3hctx_x = 10; | ||
831 | } | ||
832 | hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) | ||
833 | / (hctx->ccid3hctx_x / 10); | ||
834 | /* reason for above maths with 10 in there is to avoid 32 bit | ||
835 | * overflow for jumbo packets */ | ||
836 | |||
837 | } | ||
838 | |||
839 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | ||
840 | static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) | ||
841 | { | ||
842 | hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); | ||
843 | |||
844 | } | ||
845 | |||
846 | /* | ||
847 | * Update X by | ||
848 | * If (p > 0) | ||
849 | * x_calc = calcX(s, R, p); | ||
850 | * X = max(min(X_calc, 2 * X_recv), s / t_mbi); | ||
851 | * Else | ||
852 | * If (now - tld >= R) | ||
853 | * X = max(min(2 * X, 2 * X_recv), s / R); | ||
854 | * tld = now; | ||
855 | */ | ||
856 | static void ccid3_hc_tx_update_x(struct sock *sk) | ||
857 | { | ||
858 | struct dccp_sock *dp = dccp_sk(sk); | ||
859 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
860 | |||
861 | if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ | ||
862 | hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, | ||
863 | hctx->ccid3hctx_rtt, | ||
864 | hctx->ccid3hctx_p); | ||
865 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), | ||
866 | hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); | ||
867 | } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { | ||
868 | u32 rtt = hctx->ccid3hctx_rtt; | ||
869 | if (rtt < 10) { | ||
870 | rtt = 10; | ||
871 | } /* avoid divide by zero below */ | ||
872 | |||
873 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), | ||
874 | (hctx->ccid3hctx_s * 100000) / (rtt / 10)); | ||
875 | /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ | ||
876 | do_gettimeofday(&hctx->ccid3hctx_t_ld); | ||
877 | } | ||
878 | |||
879 | if (hctx->ccid3hctx_x == 0) { | ||
880 | ccid3_pr_debug("ccid3hctx_x = 0!\n"); | ||
881 | hctx->ccid3hctx_x = 1; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | ||
886 | { | ||
887 | struct sock *sk = (struct sock *)data; | ||
888 | struct dccp_sock *dp = dccp_sk(sk); | ||
889 | unsigned long next_tmout = 0; | ||
890 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
891 | u32 rtt; | ||
892 | |||
893 | bh_lock_sock(sk); | ||
894 | if (sock_owned_by_user(sk)) { | ||
895 | /* Try again later. */ | ||
896 | /* XXX: set some sensible MIB */ | ||
897 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); | ||
898 | goto out; | ||
899 | } | ||
900 | |||
901 | ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, | ||
902 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
903 | |||
904 | if (hctx->ccid3hctx_x < 10) { | ||
905 | ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); | ||
906 | hctx->ccid3hctx_x = 10; | ||
907 | } | ||
908 | |||
909 | switch (hctx->ccid3hctx_state) { | ||
910 | case TFRC_SSTATE_TERM: | ||
911 | goto out; | ||
912 | case TFRC_SSTATE_NO_FBACK: | ||
913 | /* Halve send rate */ | ||
914 | hctx->ccid3hctx_x /= 2; | ||
915 | if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) | ||
916 | hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; | ||
917 | |||
918 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", | ||
919 | dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), | ||
920 | hctx->ccid3hctx_x); | ||
921 | next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) | ||
922 | / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); | ||
923 | /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ | ||
924 | /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 | ||
925 | * should adjust tx_t_ipi and double that to achieve it really */ | ||
926 | break; | ||
927 | case TFRC_SSTATE_FBACK: | ||
928 | /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ | ||
929 | rtt = hctx->ccid3hctx_rtt; | ||
930 | if (rtt < 10) | ||
931 | rtt = 10; | ||
932 | /* stop divide by zero below */ | ||
933 | if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= | ||
934 | 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { | ||
935 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, | ||
936 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
937 | /* Halve sending rate */ | ||
938 | |||
939 | /* If (X_calc > 2 * X_recv) | ||
940 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | ||
941 | * Else | ||
942 | * X_recv = X_calc / 4; | ||
943 | */ | ||
944 | BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); | ||
945 | |||
946 | /* check also if p is zero -> x_calc is infinity? */ | ||
947 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || | ||
948 | hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) | ||
949 | hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, | ||
950 | hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); | ||
951 | else | ||
952 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; | ||
953 | |||
954 | /* Update sending rate */ | ||
955 | ccid3_hc_tx_update_x(sk); | ||
956 | } | ||
957 | if (hctx->ccid3hctx_x == 0) { | ||
958 | ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); | ||
959 | hctx->ccid3hctx_x = 10; | ||
960 | } | ||
961 | /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ | ||
962 | next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, | ||
963 | 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); | ||
964 | break; | ||
965 | default: | ||
966 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
967 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
968 | dump_stack(); | ||
969 | goto out; | ||
970 | } | ||
971 | |||
972 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
973 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
974 | hctx->ccid3hctx_idle = 1; | ||
975 | out: | ||
976 | bh_unlock_sock(sk); | ||
977 | sock_put(sk); | ||
978 | } | ||
979 | |||
980 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, | ||
981 | int len, long *delay) | ||
982 | { | ||
983 | struct dccp_sock *dp = dccp_sk(sk); | ||
984 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
985 | struct ccid3_tx_hist_entry *new_packet = NULL; | ||
986 | struct timeval now; | ||
987 | int rc = -ENOTCONN; | ||
988 | |||
989 | // ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); | ||
990 | /* | ||
991 | * check if pure ACK or Terminating */ | ||
992 | /* XXX: We only call this function for DATA and DATAACK, on, these packets can have | ||
993 | * zero length, but why the comment about "pure ACK"? | ||
994 | */ | ||
995 | if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) | ||
996 | goto out; | ||
997 | |||
998 | /* See if last packet allocated was not sent */ | ||
999 | if (!list_empty(&hctx->ccid3hctx_hist)) | ||
1000 | new_packet = list_entry(hctx->ccid3hctx_hist.next, | ||
1001 | struct ccid3_tx_hist_entry, ccid3htx_node); | ||
1002 | |||
1003 | if (new_packet == NULL || new_packet->ccid3htx_sent) { | ||
1004 | new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); | ||
1005 | |||
1006 | rc = -ENOBUFS; | ||
1007 | if (new_packet == NULL) { | ||
1008 | ccid3_pr_debug("%s, sk=%p, not enough mem to add " | ||
1009 | "to history, send refused\n", dccp_role(sk), sk); | ||
1010 | goto out; | ||
1011 | } | ||
1012 | |||
1013 | list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); | ||
1014 | } | ||
1015 | |||
1016 | do_gettimeofday(&now); | ||
1017 | |||
1018 | switch (hctx->ccid3hctx_state) { | ||
1019 | case TFRC_SSTATE_NO_SENT: | ||
1020 | ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, | ||
1021 | dp->dccps_gss); | ||
1022 | |||
1023 | hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; | ||
1024 | hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; | ||
1025 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); | ||
1026 | hctx->ccid3hctx_last_win_count = 0; | ||
1027 | hctx->ccid3hctx_t_last_win_count = now; | ||
1028 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | ||
1029 | hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; | ||
1030 | |||
1031 | /* Set nominal send time for initial packet */ | ||
1032 | hctx->ccid3hctx_t_nom = now; | ||
1033 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
1034 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
1035 | ccid3_calc_new_delta(hctx); | ||
1036 | rc = 0; | ||
1037 | break; | ||
1038 | case TFRC_SSTATE_NO_FBACK: | ||
1039 | case TFRC_SSTATE_FBACK: | ||
1040 | *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); | ||
1041 | ccid3_pr_debug("send_packet delay=%ld\n",*delay); | ||
1042 | *delay /= -1000; | ||
1043 | /* divide by -1000 is to convert to ms and get sign right */ | ||
1044 | rc = *delay > 0 ? -EAGAIN : 0; | ||
1045 | break; | ||
1046 | default: | ||
1047 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1048 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
1049 | dump_stack(); | ||
1050 | rc = -EINVAL; | ||
1051 | break; | ||
1052 | } | ||
1053 | |||
1054 | /* Can we send? if so add options and add to packet history */ | ||
1055 | if (rc == 0) | ||
1056 | new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
1057 | out: | ||
1058 | return rc; | ||
1059 | } | ||
1060 | |||
1061 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) | ||
1062 | { | ||
1063 | struct dccp_sock *dp = dccp_sk(sk); | ||
1064 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1065 | struct ccid3_tx_hist_entry *packet = NULL; | ||
1066 | struct timeval now; | ||
1067 | |||
1068 | // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); | ||
1069 | BUG_ON(hctx == NULL); | ||
1070 | |||
1071 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
1072 | ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", | ||
1073 | dccp_role(sk), sk); | ||
1074 | return; | ||
1075 | } | ||
1076 | |||
1077 | do_gettimeofday(&now); | ||
1078 | |||
1079 | /* check if we have sent a data packet */ | ||
1080 | if (len > 0) { | ||
1081 | unsigned long quarter_rtt; | ||
1082 | |||
1083 | if (list_empty(&hctx->ccid3hctx_hist)) { | ||
1084 | printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); | ||
1085 | return; | ||
1086 | } | ||
1087 | packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); | ||
1088 | if (packet->ccid3htx_sent) { | ||
1089 | printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); | ||
1090 | return; | ||
1091 | } | ||
1092 | packet->ccid3htx_tstamp = now; | ||
1093 | packet->ccid3htx_seqno = dp->dccps_gss; | ||
1094 | // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); | ||
1095 | |||
1096 | /* | ||
1097 | * Check if win_count have changed */ | ||
1098 | /* COMPLIANCE_BEGIN | ||
1099 | * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt | ||
1100 | */ | ||
1101 | quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); | ||
1102 | if (quarter_rtt > 0) { | ||
1103 | hctx->ccid3hctx_t_last_win_count = now; | ||
1104 | hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + | ||
1105 | min_t(unsigned long, quarter_rtt, 5)) % 16; | ||
1106 | ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", | ||
1107 | dccp_role(sk), sk, | ||
1108 | packet->ccid3htx_win_count, | ||
1109 | hctx->ccid3hctx_last_win_count); | ||
1110 | } | ||
1111 | /* COMPLIANCE_END */ | ||
1112 | #if 0 | ||
1113 | ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", | ||
1114 | dccp_role(sk), sk, | ||
1115 | packet->ccid3htx_seqno, | ||
1116 | packet->ccid3htx_win_count); | ||
1117 | #endif | ||
1118 | hctx->ccid3hctx_idle = 0; | ||
1119 | packet->ccid3htx_sent = 1; | ||
1120 | } else | ||
1121 | ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", | ||
1122 | dccp_role(sk), sk, dp->dccps_gss); | ||
1123 | |||
1124 | switch (hctx->ccid3hctx_state) { | ||
1125 | case TFRC_SSTATE_NO_SENT: | ||
1126 | /* if first wasn't pure ack */ | ||
1127 | if (len != 0) | ||
1128 | printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", | ||
1129 | __FUNCTION__, dccp_role(sk)); | ||
1130 | return; | ||
1131 | case TFRC_SSTATE_NO_FBACK: | ||
1132 | case TFRC_SSTATE_FBACK: | ||
1133 | if (len > 0) { | ||
1134 | hctx->ccid3hctx_t_nom = now; | ||
1135 | ccid3_calc_new_t_ipi(hctx); | ||
1136 | ccid3_calc_new_delta(hctx); | ||
1137 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
1138 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
1139 | } | ||
1140 | break; | ||
1141 | default: | ||
1142 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1143 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
1144 | dump_stack(); | ||
1145 | break; | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
1150 | { | ||
1151 | struct dccp_sock *dp = dccp_sk(sk); | ||
1152 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1153 | struct ccid3_options_received *opt_recv; | ||
1154 | struct ccid3_tx_hist_entry *entry, *next, *packet; | ||
1155 | unsigned long next_tmout; | ||
1156 | u16 t_elapsed; | ||
1157 | u32 pinv; | ||
1158 | u32 x_recv; | ||
1159 | u32 r_sample; | ||
1160 | #if 0 | ||
1161 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", | ||
1162 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), | ||
1163 | skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
1164 | #endif | ||
1165 | if (hctx == NULL) | ||
1166 | return; | ||
1167 | |||
1168 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
1169 | ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); | ||
1170 | return; | ||
1171 | } | ||
1172 | |||
1173 | /* we are only interested in ACKs */ | ||
1174 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | ||
1175 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | ||
1176 | return; | ||
1177 | |||
1178 | opt_recv = &hctx->ccid3hctx_options_received; | ||
1179 | |||
1180 | t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; | ||
1181 | x_recv = opt_recv->ccid3or_receive_rate; | ||
1182 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
1183 | |||
1184 | switch (hctx->ccid3hctx_state) { | ||
1185 | case TFRC_SSTATE_NO_SENT: | ||
1186 | /* FIXME: what to do here? */ | ||
1187 | return; | ||
1188 | case TFRC_SSTATE_NO_FBACK: | ||
1189 | case TFRC_SSTATE_FBACK: | ||
1190 | /* Calculate new round trip sample by | ||
1191 | * R_sample = (now - t_recvdata) - t_delay */ | ||
1192 | /* get t_recvdata from history */ | ||
1193 | packet = NULL; | ||
1194 | list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) | ||
1195 | if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { | ||
1196 | packet = entry; | ||
1197 | break; | ||
1198 | } | ||
1199 | |||
1200 | if (packet == NULL) { | ||
1201 | ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", | ||
1202 | dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
1203 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
1204 | return; | ||
1205 | } | ||
1206 | |||
1207 | /* Update RTT */ | ||
1208 | r_sample = now_delta(packet->ccid3htx_tstamp); | ||
1209 | /* FIXME: */ | ||
1210 | // r_sample -= usecs_to_jiffies(t_elapsed * 10); | ||
1211 | |||
1212 | /* Update RTT estimate by | ||
1213 | * If (No feedback recv) | ||
1214 | * R = R_sample; | ||
1215 | * Else | ||
1216 | * R = q * R + (1 - q) * R_sample; | ||
1217 | * | ||
1218 | * q is a constant, RFC 3448 recomments 0.9 | ||
1219 | */ | ||
1220 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
1221 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
1222 | hctx->ccid3hctx_rtt = r_sample; | ||
1223 | } else | ||
1224 | hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; | ||
1225 | |||
1226 | /* | ||
1227 | * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent | ||
1228 | * implemention of the new window count. | ||
1229 | */ | ||
1230 | if (hctx->ccid3hctx_rtt < 4) | ||
1231 | hctx->ccid3hctx_rtt = 4; | ||
1232 | |||
1233 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", | ||
1234 | dccp_role(sk), sk, | ||
1235 | hctx->ccid3hctx_rtt, | ||
1236 | r_sample); | ||
1237 | |||
1238 | /* Update timeout interval */ | ||
1239 | inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); | ||
1240 | |||
1241 | /* Update receive rate */ | ||
1242 | hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ | ||
1243 | |||
1244 | /* Update loss event rate */ | ||
1245 | if (pinv == ~0 || pinv == 0) | ||
1246 | hctx->ccid3hctx_p = 0; | ||
1247 | else { | ||
1248 | hctx->ccid3hctx_p = 1000000 / pinv; | ||
1249 | |||
1250 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { | ||
1251 | hctx->ccid3hctx_p = TFRC_SMALLEST_P; | ||
1252 | ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); | ||
1253 | } | ||
1254 | } | ||
1255 | |||
1256 | /* unschedule no feedback timer */ | ||
1257 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
1258 | |||
1259 | /* Update sending rate */ | ||
1260 | ccid3_hc_tx_update_x(sk); | ||
1261 | |||
1262 | /* Update next send time */ | ||
1263 | if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { | ||
1264 | (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; | ||
1265 | (hctx->ccid3hctx_t_nom).tv_sec--; | ||
1266 | } | ||
1267 | /* FIXME - if no feedback then t_ipi can go > 1 second */ | ||
1268 | (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; | ||
1269 | ccid3_calc_new_t_ipi(hctx); | ||
1270 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
1271 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
1272 | ccid3_calc_new_delta(hctx); | ||
1273 | |||
1274 | /* remove all packets older than the one acked from history */ | ||
1275 | #if 0 | ||
1276 | FIXME! | ||
1277 | list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { | ||
1278 | list_del_init(&entry->ccid3htx_node); | ||
1279 | ccid3_tx_hist_entry_delete(entry); | ||
1280 | } | ||
1281 | #endif | ||
1282 | if (hctx->ccid3hctx_x < 10) { | ||
1283 | ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); | ||
1284 | hctx->ccid3hctx_x = 10; | ||
1285 | } | ||
1286 | /* to prevent divide by zero below */ | ||
1287 | |||
1288 | /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ | ||
1289 | next_tmout = max(inet_csk(sk)->icsk_rto, | ||
1290 | 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); | ||
1291 | /* maths with 100000 and 10 is to prevent overflow with 32 bit */ | ||
1292 | |||
1293 | ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", | ||
1294 | dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); | ||
1295 | |||
1296 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
1297 | jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); | ||
1298 | |||
1299 | /* set idle flag */ | ||
1300 | hctx->ccid3hctx_idle = 1; | ||
1301 | break; | ||
1302 | default: | ||
1303 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1304 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
1305 | dump_stack(); | ||
1306 | break; | ||
1307 | } | ||
1308 | } | ||
1309 | |||
1310 | static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
1311 | { | ||
1312 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1313 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1314 | |||
1315 | if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | ||
1316 | return; | ||
1317 | |||
1318 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
1319 | } | ||
1320 | |||
1321 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | ||
1322 | unsigned char len, u16 idx, unsigned char *value) | ||
1323 | { | ||
1324 | int rc = 0; | ||
1325 | struct dccp_sock *dp = dccp_sk(sk); | ||
1326 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1327 | struct ccid3_options_received *opt_recv; | ||
1328 | |||
1329 | if (hctx == NULL) | ||
1330 | return 0; | ||
1331 | |||
1332 | opt_recv = &hctx->ccid3hctx_options_received; | ||
1333 | |||
1334 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
1335 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
1336 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
1337 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
1338 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
1339 | opt_recv->ccid3or_receive_rate = 0; | ||
1340 | } | ||
1341 | |||
1342 | switch (option) { | ||
1343 | case TFRC_OPT_LOSS_EVENT_RATE: | ||
1344 | if (len != 4) { | ||
1345 | ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", | ||
1346 | dccp_role(sk), sk); | ||
1347 | rc = -EINVAL; | ||
1348 | } else { | ||
1349 | opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); | ||
1350 | ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", | ||
1351 | dccp_role(sk), sk, | ||
1352 | opt_recv->ccid3or_loss_event_rate); | ||
1353 | } | ||
1354 | break; | ||
1355 | case TFRC_OPT_LOSS_INTERVALS: | ||
1356 | opt_recv->ccid3or_loss_intervals_idx = idx; | ||
1357 | opt_recv->ccid3or_loss_intervals_len = len; | ||
1358 | ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", | ||
1359 | dccp_role(sk), sk, | ||
1360 | opt_recv->ccid3or_loss_intervals_idx, | ||
1361 | opt_recv->ccid3or_loss_intervals_len); | ||
1362 | break; | ||
1363 | case TFRC_OPT_RECEIVE_RATE: | ||
1364 | if (len != 4) { | ||
1365 | ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", | ||
1366 | dccp_role(sk), sk); | ||
1367 | rc = -EINVAL; | ||
1368 | } else { | ||
1369 | opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); | ||
1370 | ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", | ||
1371 | dccp_role(sk), sk, | ||
1372 | opt_recv->ccid3or_receive_rate); | ||
1373 | } | ||
1374 | break; | ||
1375 | } | ||
1376 | |||
1377 | return rc; | ||
1378 | } | ||
1379 | |||
1380 | static int ccid3_hc_tx_init(struct sock *sk) | ||
1381 | { | ||
1382 | struct dccp_sock *dp = dccp_sk(sk); | ||
1383 | struct ccid3_hc_tx_sock *hctx; | ||
1384 | |||
1385 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1386 | |||
1387 | hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); | ||
1388 | if (hctx == NULL) | ||
1389 | return -ENOMEM; | ||
1390 | |||
1391 | memset(hctx, 0, sizeof(*hctx)); | ||
1392 | |||
1393 | if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
1394 | dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
1395 | hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; | ||
1396 | else | ||
1397 | hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; | ||
1398 | |||
1399 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ | ||
1400 | hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ | ||
1401 | inet_csk(sk)->icsk_rto = USEC_IN_SEC; | ||
1402 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; | ||
1403 | INIT_LIST_HEAD(&hctx->ccid3hctx_hist); | ||
1404 | init_timer(&hctx->ccid3hctx_no_feedback_timer); | ||
1405 | |||
1406 | return 0; | ||
1407 | } | ||
1408 | |||
1409 | static void ccid3_hc_tx_exit(struct sock *sk) | ||
1410 | { | ||
1411 | struct dccp_sock *dp = dccp_sk(sk); | ||
1412 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1413 | struct ccid3_tx_hist_entry *entry, *next; | ||
1414 | |||
1415 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1416 | BUG_ON(hctx == NULL); | ||
1417 | |||
1418 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
1419 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
1420 | |||
1421 | /* Empty packet history */ | ||
1422 | list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { | ||
1423 | list_del_init(&entry->ccid3htx_node); | ||
1424 | ccid3_tx_hist_entry_delete(entry); | ||
1425 | } | ||
1426 | |||
1427 | kfree(dp->dccps_hc_tx_ccid_private); | ||
1428 | dp->dccps_hc_tx_ccid_private = NULL; | ||
1429 | } | ||
1430 | |||
1431 | /* | ||
1432 | * RX Half Connection methods | ||
1433 | */ | ||
1434 | |||
1435 | /* TFRC receiver states */ | ||
1436 | enum ccid3_hc_rx_states { | ||
1437 | TFRC_RSTATE_NO_DATA = 1, | ||
1438 | TFRC_RSTATE_DATA, | ||
1439 | TFRC_RSTATE_TERM = 127, | ||
1440 | }; | ||
1441 | |||
1442 | #ifdef CCID3_DEBUG | ||
1443 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | ||
1444 | { | ||
1445 | static char *ccid3_rx_state_names[] = { | ||
1446 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | ||
1447 | [TFRC_RSTATE_DATA] = "DATA", | ||
1448 | [TFRC_RSTATE_TERM] = "TERM", | ||
1449 | }; | ||
1450 | |||
1451 | return ccid3_rx_state_names[state]; | ||
1452 | } | ||
1453 | #endif | ||
1454 | |||
1455 | static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) | ||
1456 | { | ||
1457 | struct dccp_sock *dp = dccp_sk(sk); | ||
1458 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1459 | enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; | ||
1460 | |||
1461 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
1462 | dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); | ||
1463 | WARN_ON(state == oldstate); | ||
1464 | hcrx->ccid3hcrx_state = state; | ||
1465 | } | ||
1466 | |||
1467 | static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) | ||
1468 | { | ||
1469 | struct dccp_sock *dp = dccp_sk(sk); | ||
1470 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1471 | struct ccid3_rx_hist_entry *entry, *next; | ||
1472 | u8 num_later = 0; | ||
1473 | |||
1474 | if (list_empty(&hcrx->ccid3hcrx_hist)) | ||
1475 | list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
1476 | else { | ||
1477 | u64 seqno = packet->ccid3hrx_seqno; | ||
1478 | struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, | ||
1479 | struct ccid3_rx_hist_entry, | ||
1480 | ccid3hrx_node); | ||
1481 | if (after48(seqno, iter->ccid3hrx_seqno)) | ||
1482 | list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
1483 | else { | ||
1484 | if (iter->ccid3hrx_type == DCCP_PKT_DATA || | ||
1485 | iter->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1486 | num_later = 1; | ||
1487 | |||
1488 | list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1489 | if (after48(seqno, iter->ccid3hrx_seqno)) { | ||
1490 | list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); | ||
1491 | goto trim_history; | ||
1492 | } | ||
1493 | |||
1494 | if (iter->ccid3hrx_type == DCCP_PKT_DATA || | ||
1495 | iter->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1496 | num_later++; | ||
1497 | |||
1498 | if (num_later == TFRC_RECV_NUM_LATE_LOSS) { | ||
1499 | ccid3_rx_hist_entry_delete(packet); | ||
1500 | ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", | ||
1501 | dccp_role(sk), sk, seqno); | ||
1502 | return 1; | ||
1503 | } | ||
1504 | } | ||
1505 | |||
1506 | if (num_later < TFRC_RECV_NUM_LATE_LOSS) | ||
1507 | list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
1508 | /* FIXME: else what? should we destroy the packet like above? */ | ||
1509 | } | ||
1510 | } | ||
1511 | |||
1512 | trim_history: | ||
1513 | /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ | ||
1514 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | ||
1515 | |||
1516 | if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
1517 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1518 | if (num_later == 0) { | ||
1519 | list_del_init(&entry->ccid3hrx_node); | ||
1520 | ccid3_rx_hist_entry_delete(entry); | ||
1521 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1522 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1523 | --num_later; | ||
1524 | } | ||
1525 | } else { | ||
1526 | int step = 0; | ||
1527 | u8 win_count = 0; /* Not needed, but lets shut up gcc */ | ||
1528 | int tmp; | ||
1529 | /* | ||
1530 | * We have no loss interval history so we need at least one | ||
1531 | * rtt:s of data packets to approximate rtt. | ||
1532 | */ | ||
1533 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1534 | if (num_later == 0) { | ||
1535 | switch (step) { | ||
1536 | case 0: | ||
1537 | step = 1; | ||
1538 | /* OK, find next data packet */ | ||
1539 | num_later = 1; | ||
1540 | break; | ||
1541 | case 1: | ||
1542 | step = 2; | ||
1543 | /* OK, find next data packet */ | ||
1544 | num_later = 1; | ||
1545 | win_count = entry->ccid3hrx_win_count; | ||
1546 | break; | ||
1547 | case 2: | ||
1548 | tmp = win_count - entry->ccid3hrx_win_count; | ||
1549 | if (tmp < 0) | ||
1550 | tmp += TFRC_WIN_COUNT_LIMIT; | ||
1551 | if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { | ||
1552 | /* we have found a packet older than one rtt | ||
1553 | * remove the rest */ | ||
1554 | step = 3; | ||
1555 | } else /* OK, find next data packet */ | ||
1556 | num_later = 1; | ||
1557 | break; | ||
1558 | case 3: | ||
1559 | list_del_init(&entry->ccid3hrx_node); | ||
1560 | ccid3_rx_hist_entry_delete(entry); | ||
1561 | break; | ||
1562 | } | ||
1563 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1564 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1565 | --num_later; | ||
1566 | } | ||
1567 | } | ||
1568 | |||
1569 | return 0; | ||
1570 | } | ||
1571 | |||
1572 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
1573 | { | ||
1574 | struct dccp_sock *dp = dccp_sk(sk); | ||
1575 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1576 | struct ccid3_rx_hist_entry *entry, *packet; | ||
1577 | |||
1578 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1579 | |||
1580 | switch (hcrx->ccid3hcrx_state) { | ||
1581 | case TFRC_RSTATE_NO_DATA: | ||
1582 | hcrx->ccid3hcrx_x_recv = 0; | ||
1583 | break; | ||
1584 | case TFRC_RSTATE_DATA: { | ||
1585 | u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); | ||
1586 | |||
1587 | if (delta == 0) | ||
1588 | delta = 1; /* to prevent divide by zero */ | ||
1589 | hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; | ||
1590 | } | ||
1591 | break; | ||
1592 | default: | ||
1593 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1594 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
1595 | dump_stack(); | ||
1596 | return; | ||
1597 | } | ||
1598 | |||
1599 | packet = NULL; | ||
1600 | list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) | ||
1601 | if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1602 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) { | ||
1603 | packet = entry; | ||
1604 | break; | ||
1605 | } | ||
1606 | |||
1607 | if (packet == NULL) { | ||
1608 | printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", | ||
1609 | __FUNCTION__, dccp_role(sk), sk); | ||
1610 | dump_stack(); | ||
1611 | return; | ||
1612 | } | ||
1613 | |||
1614 | do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); | ||
1615 | hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; | ||
1616 | hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; | ||
1617 | hcrx->ccid3hcrx_bytes_recv = 0; | ||
1618 | |||
1619 | /* Convert to multiples of 10us */ | ||
1620 | hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; | ||
1621 | if (hcrx->ccid3hcrx_p == 0) | ||
1622 | hcrx->ccid3hcrx_pinv = ~0; | ||
1623 | else | ||
1624 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
1625 | dccp_send_ack(sk); | ||
1626 | } | ||
1627 | |||
1628 | static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
1629 | { | ||
1630 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1631 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1632 | |||
1633 | if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | ||
1634 | return; | ||
1635 | |||
1636 | if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) | ||
1637 | dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); | ||
1638 | |||
1639 | if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { | ||
1640 | const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); | ||
1641 | const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); | ||
1642 | |||
1643 | dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); | ||
1644 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); | ||
1645 | } | ||
1646 | |||
1647 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; | ||
1648 | } | ||
1649 | |||
1650 | /* Weights used to calculate loss event rate */ | ||
1651 | /* | ||
1652 | * These are integers as per section 8 of RFC3448. We can then divide by 4 * | ||
1653 | * when we use it. | ||
1654 | */ | ||
1655 | const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; | ||
1656 | |||
1657 | /* | ||
1658 | * args: fvalue - function value to match | ||
1659 | * returns: p closest to that value | ||
1660 | * | ||
1661 | * both fvalue and p are multiplied by 1,000,000 to use ints | ||
1662 | */ | ||
1663 | u32 calcx_reverse_lookup(u32 fvalue) { | ||
1664 | int ctr = 0; | ||
1665 | int small; | ||
1666 | |||
1667 | if (fvalue < calcx_lookup[0][1]) | ||
1668 | return 0; | ||
1669 | if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) | ||
1670 | small = 1; | ||
1671 | else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) | ||
1672 | return 1000000; | ||
1673 | else | ||
1674 | small = 0; | ||
1675 | while (fvalue > calcx_lookup[ctr][small]) | ||
1676 | ctr++; | ||
1677 | if (small) | ||
1678 | return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); | ||
1679 | else | ||
1680 | return (1000000 * ctr / CALCX_ARRSIZE) ; | ||
1681 | } | ||
1682 | |||
1683 | /* calculate first loss interval | ||
1684 | * | ||
1685 | * returns estimated loss interval in usecs */ | ||
1686 | |||
1687 | static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) | ||
1688 | { | ||
1689 | struct dccp_sock *dp = dccp_sk(sk); | ||
1690 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1691 | struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; | ||
1692 | u32 rtt, delta, x_recv, fval, p, tmp2; | ||
1693 | struct timeval tstamp, tmp_tv; | ||
1694 | int interval = 0; | ||
1695 | int win_count = 0; | ||
1696 | int step = 0; | ||
1697 | u64 tmp1; | ||
1698 | |||
1699 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1700 | if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1701 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) { | ||
1702 | tail = entry; | ||
1703 | |||
1704 | switch (step) { | ||
1705 | case 0: | ||
1706 | tstamp = entry->ccid3hrx_tstamp; | ||
1707 | win_count = entry->ccid3hrx_win_count; | ||
1708 | step = 1; | ||
1709 | break; | ||
1710 | case 1: | ||
1711 | interval = win_count - entry->ccid3hrx_win_count; | ||
1712 | if (interval < 0) | ||
1713 | interval += TFRC_WIN_COUNT_LIMIT; | ||
1714 | if (interval > 4) | ||
1715 | goto found; | ||
1716 | break; | ||
1717 | } | ||
1718 | } | ||
1719 | } | ||
1720 | |||
1721 | if (step == 0) { | ||
1722 | printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", | ||
1723 | __FUNCTION__, dccp_role(sk), sk); | ||
1724 | return ~0; | ||
1725 | } | ||
1726 | |||
1727 | if (interval == 0) { | ||
1728 | ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", | ||
1729 | dccp_role(sk), sk); | ||
1730 | interval = 1; | ||
1731 | } | ||
1732 | found: | ||
1733 | timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); | ||
1734 | rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; | ||
1735 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", | ||
1736 | dccp_role(sk), sk, rtt); | ||
1737 | if (rtt == 0) | ||
1738 | rtt = 1; | ||
1739 | |||
1740 | delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); | ||
1741 | if (delta == 0) | ||
1742 | delta = 1; | ||
1743 | |||
1744 | x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; | ||
1745 | |||
1746 | tmp1 = (u64)x_recv * (u64)rtt; | ||
1747 | do_div(tmp1,10000000); | ||
1748 | tmp2 = (u32)tmp1; | ||
1749 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; | ||
1750 | /* do not alter order above or you will get overflow on 32 bit */ | ||
1751 | p = calcx_reverse_lookup(fval); | ||
1752 | ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ | ||
1753 | dccp_role(sk), sk, x_recv, p); | ||
1754 | |||
1755 | if (p == 0) | ||
1756 | return ~0; | ||
1757 | else | ||
1758 | return 1000000 / p; | ||
1759 | } | ||
1760 | |||
1761 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | ||
1762 | { | ||
1763 | struct dccp_sock *dp = dccp_sk(sk); | ||
1764 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1765 | struct ccid3_loss_interval_hist_entry *li_entry; | ||
1766 | |||
1767 | if (seq_loss != DCCP_MAX_SEQNO + 1) { | ||
1768 | ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", | ||
1769 | dccp_role(sk), sk, seq_loss, win_loss); | ||
1770 | |||
1771 | if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
1772 | struct ccid3_loss_interval_hist_entry *li_tail = NULL; | ||
1773 | int i; | ||
1774 | |||
1775 | ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); | ||
1776 | for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { | ||
1777 | li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); | ||
1778 | if (li_entry == NULL) { | ||
1779 | ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); | ||
1780 | ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", | ||
1781 | dccp_role(sk), sk); | ||
1782 | return; | ||
1783 | } | ||
1784 | if (li_tail == NULL) | ||
1785 | li_tail = li_entry; | ||
1786 | list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); | ||
1787 | } | ||
1788 | |||
1789 | li_entry->ccid3lih_seqno = seq_loss; | ||
1790 | li_entry->ccid3lih_win_count = win_loss; | ||
1791 | |||
1792 | li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); | ||
1793 | } | ||
1794 | } | ||
1795 | /* FIXME: find end of interval */ | ||
1796 | } | ||
1797 | |||
1798 | static void ccid3_hc_rx_detect_loss(struct sock *sk) | ||
1799 | { | ||
1800 | struct dccp_sock *dp = dccp_sk(sk); | ||
1801 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1802 | struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; | ||
1803 | struct ccid3_rx_hist_entry *a_loss = NULL; | ||
1804 | struct ccid3_rx_hist_entry *b_loss = NULL; | ||
1805 | u64 seq_loss = DCCP_MAX_SEQNO + 1; | ||
1806 | u8 win_loss = 0; | ||
1807 | u8 num_later = TFRC_RECV_NUM_LATE_LOSS; | ||
1808 | |||
1809 | list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1810 | if (num_later == 0) { | ||
1811 | b_loss = entry; | ||
1812 | break; | ||
1813 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1814 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1815 | --num_later; | ||
1816 | } | ||
1817 | |||
1818 | if (b_loss == NULL) | ||
1819 | goto out_update_li; | ||
1820 | |||
1821 | a_next = b_next; | ||
1822 | num_later = 1; | ||
1823 | #if 0 | ||
1824 | FIXME MERGE GIT! | ||
1825 | list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1826 | if (num_later == 0) { | ||
1827 | a_loss = entry; | ||
1828 | break; | ||
1829 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
1830 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1831 | --num_later; | ||
1832 | } | ||
1833 | #endif | ||
1834 | |||
1835 | if (a_loss == NULL) { | ||
1836 | if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
1837 | /* no loss event have occured yet */ | ||
1838 | ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " | ||
1839 | "packet by comparing to initial seqno\n", | ||
1840 | dccp_role(sk), sk); | ||
1841 | goto out_update_li; | ||
1842 | } else { | ||
1843 | pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", | ||
1844 | __FUNCTION__, dccp_role(sk), sk); | ||
1845 | return; | ||
1846 | } | ||
1847 | } | ||
1848 | |||
1849 | /* Locate a lost data packet */ | ||
1850 | entry = packet = b_loss; | ||
1851 | #if 0 | ||
1852 | FIXME MERGE GIT! | ||
1853 | list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
1854 | u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); | ||
1855 | |||
1856 | if (delta != 0) { | ||
1857 | if (packet->ccid3hrx_type == DCCP_PKT_DATA || | ||
1858 | packet->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
1859 | --delta; | ||
1860 | /* | ||
1861 | * FIXME: check this, probably this % usage is because | ||
1862 | * in earlier drafts the ndp count was just 8 bits | ||
1863 | * long, but now it cam be up to 24 bits long. | ||
1864 | */ | ||
1865 | #if 0 | ||
1866 | if (delta % DCCP_NDP_LIMIT != | ||
1867 | (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) | ||
1868 | #endif | ||
1869 | if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { | ||
1870 | seq_loss = entry->ccid3hrx_seqno; | ||
1871 | dccp_inc_seqno(&seq_loss); | ||
1872 | } | ||
1873 | } | ||
1874 | packet = entry; | ||
1875 | if (packet == a_loss) | ||
1876 | break; | ||
1877 | } | ||
1878 | #endif | ||
1879 | |||
1880 | if (seq_loss != DCCP_MAX_SEQNO + 1) | ||
1881 | win_loss = a_loss->ccid3hrx_win_count; | ||
1882 | |||
1883 | out_update_li: | ||
1884 | ccid3_hc_rx_update_li(sk, seq_loss, win_loss); | ||
1885 | } | ||
1886 | |||
1887 | static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) | ||
1888 | { | ||
1889 | struct dccp_sock *dp = dccp_sk(sk); | ||
1890 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1891 | struct ccid3_loss_interval_hist_entry *li_entry, *li_next; | ||
1892 | int i = 0; | ||
1893 | u32 i_tot; | ||
1894 | u32 i_tot0 = 0; | ||
1895 | u32 i_tot1 = 0; | ||
1896 | u32 w_tot = 0; | ||
1897 | |||
1898 | list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { | ||
1899 | if (i < TFRC_RECV_IVAL_F_LENGTH) { | ||
1900 | i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; | ||
1901 | w_tot += ccid3_hc_rx_w[i]; | ||
1902 | } | ||
1903 | |||
1904 | if (i != 0) | ||
1905 | i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; | ||
1906 | |||
1907 | if (++i > TFRC_RECV_IVAL_F_LENGTH) | ||
1908 | break; | ||
1909 | } | ||
1910 | |||
1911 | if (i != TFRC_RECV_IVAL_F_LENGTH) { | ||
1912 | pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", | ||
1913 | __FUNCTION__, dccp_role(sk), sk); | ||
1914 | return 0; | ||
1915 | } | ||
1916 | |||
1917 | i_tot = max(i_tot0, i_tot1); | ||
1918 | |||
1919 | /* FIXME: Why do we do this? -Ian McDonald */ | ||
1920 | if (i_tot * 4 < w_tot) | ||
1921 | i_tot = w_tot * 4; | ||
1922 | |||
1923 | return i_tot * 4 / w_tot; | ||
1924 | } | ||
1925 | |||
1926 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
1927 | { | ||
1928 | struct dccp_sock *dp = dccp_sk(sk); | ||
1929 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1930 | struct ccid3_rx_hist_entry *packet; | ||
1931 | struct timeval now; | ||
1932 | u8 win_count; | ||
1933 | u32 p_prev; | ||
1934 | int ins; | ||
1935 | #if 0 | ||
1936 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", | ||
1937 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), | ||
1938 | skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
1939 | #endif | ||
1940 | if (hcrx == NULL) | ||
1941 | return; | ||
1942 | |||
1943 | BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || | ||
1944 | hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); | ||
1945 | |||
1946 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | ||
1947 | case DCCP_PKT_ACK: | ||
1948 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | ||
1949 | return; | ||
1950 | case DCCP_PKT_DATAACK: | ||
1951 | if (dp->dccps_options_received.dccpor_timestamp_echo == 0) | ||
1952 | break; | ||
1953 | p_prev = hcrx->ccid3hcrx_rtt; | ||
1954 | do_gettimeofday(&now); | ||
1955 | /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - | ||
1956 | usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); | ||
1957 | FIXME - I think above code is broken - have to look at options more, will also need | ||
1958 | to fix pr_debug below */ | ||
1959 | if (p_prev != hcrx->ccid3hcrx_rtt) | ||
1960 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", | ||
1961 | dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, | ||
1962 | dp->dccps_options_received.dccpor_timestamp_echo, | ||
1963 | dp->dccps_options_received.dccpor_elapsed_time); | ||
1964 | break; | ||
1965 | case DCCP_PKT_DATA: | ||
1966 | break; | ||
1967 | default: | ||
1968 | ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", | ||
1969 | dccp_role(sk), sk, | ||
1970 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
1971 | return; | ||
1972 | } | ||
1973 | |||
1974 | packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); | ||
1975 | if (packet == NULL) { | ||
1976 | ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", | ||
1977 | dccp_role(sk), sk); | ||
1978 | return; | ||
1979 | } | ||
1980 | |||
1981 | win_count = packet->ccid3hrx_win_count; | ||
1982 | |||
1983 | ins = ccid3_hc_rx_add_hist(sk, packet); | ||
1984 | |||
1985 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | ||
1986 | return; | ||
1987 | |||
1988 | switch (hcrx->ccid3hcrx_state) { | ||
1989 | case TFRC_RSTATE_NO_DATA: | ||
1990 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", | ||
1991 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); | ||
1992 | ccid3_hc_rx_send_feedback(sk); | ||
1993 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
1994 | return; | ||
1995 | case TFRC_RSTATE_DATA: | ||
1996 | hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
1997 | if (ins == 0) { | ||
1998 | do_gettimeofday(&now); | ||
1999 | if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { | ||
2000 | hcrx->ccid3hcrx_tstamp_last_ack = now; | ||
2001 | ccid3_hc_rx_send_feedback(sk); | ||
2002 | } | ||
2003 | return; | ||
2004 | } | ||
2005 | break; | ||
2006 | default: | ||
2007 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
2008 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
2009 | dump_stack(); | ||
2010 | return; | ||
2011 | } | ||
2012 | |||
2013 | /* Dealing with packet loss */ | ||
2014 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", | ||
2015 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); | ||
2016 | |||
2017 | ccid3_hc_rx_detect_loss(sk); | ||
2018 | p_prev = hcrx->ccid3hcrx_p; | ||
2019 | |||
2020 | /* Calculate loss event rate */ | ||
2021 | if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) | ||
2022 | /* Scaling up by 1000000 as fixed decimal */ | ||
2023 | hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); | ||
2024 | |||
2025 | if (hcrx->ccid3hcrx_p > p_prev) { | ||
2026 | ccid3_hc_rx_send_feedback(sk); | ||
2027 | return; | ||
2028 | } | ||
2029 | } | ||
2030 | |||
2031 | static int ccid3_hc_rx_init(struct sock *sk) | ||
2032 | { | ||
2033 | struct dccp_sock *dp = dccp_sk(sk); | ||
2034 | struct ccid3_hc_rx_sock *hcrx; | ||
2035 | |||
2036 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
2037 | |||
2038 | hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); | ||
2039 | if (hcrx == NULL) | ||
2040 | return -ENOMEM; | ||
2041 | |||
2042 | memset(hcrx, 0, sizeof(*hcrx)); | ||
2043 | |||
2044 | if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
2045 | dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
2046 | hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; | ||
2047 | else | ||
2048 | hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; | ||
2049 | |||
2050 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | ||
2051 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | ||
2052 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); | ||
2053 | |||
2054 | return 0; | ||
2055 | } | ||
2056 | |||
2057 | static void ccid3_hc_rx_exit(struct sock *sk) | ||
2058 | { | ||
2059 | struct dccp_sock *dp = dccp_sk(sk); | ||
2060 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
2061 | |||
2062 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
2063 | |||
2064 | if (hcrx == NULL) | ||
2065 | return; | ||
2066 | |||
2067 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
2068 | |||
2069 | /* Empty packet history */ | ||
2070 | ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); | ||
2071 | |||
2072 | /* Empty loss interval history */ | ||
2073 | ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); | ||
2074 | |||
2075 | kfree(dp->dccps_hc_rx_ccid_private); | ||
2076 | dp->dccps_hc_rx_ccid_private = NULL; | ||
2077 | } | ||
2078 | |||
2079 | static struct ccid ccid3 = { | ||
2080 | .ccid_id = 3, | ||
2081 | .ccid_name = "ccid3", | ||
2082 | .ccid_owner = THIS_MODULE, | ||
2083 | .ccid_init = ccid3_init, | ||
2084 | .ccid_exit = ccid3_exit, | ||
2085 | .ccid_hc_tx_init = ccid3_hc_tx_init, | ||
2086 | .ccid_hc_tx_exit = ccid3_hc_tx_exit, | ||
2087 | .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, | ||
2088 | .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, | ||
2089 | .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, | ||
2090 | .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, | ||
2091 | .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, | ||
2092 | .ccid_hc_rx_init = ccid3_hc_rx_init, | ||
2093 | .ccid_hc_rx_exit = ccid3_hc_rx_exit, | ||
2094 | .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, | ||
2095 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | ||
2096 | }; | ||
2097 | |||
2098 | module_param(ccid3_debug, int, 0444); | ||
2099 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | ||
2100 | |||
2101 | static __init int ccid3_module_init(void) | ||
2102 | { | ||
2103 | int rc = -ENOMEM; | ||
2104 | |||
2105 | ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", | ||
2106 | sizeof(struct ccid3_tx_hist_entry), 0, | ||
2107 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
2108 | if (ccid3_tx_hist_slab == NULL) | ||
2109 | goto out; | ||
2110 | |||
2111 | ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", | ||
2112 | sizeof(struct ccid3_rx_hist_entry), 0, | ||
2113 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
2114 | if (ccid3_rx_hist_slab == NULL) | ||
2115 | goto out_free_tx_history; | ||
2116 | |||
2117 | ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", | ||
2118 | sizeof(struct ccid3_loss_interval_hist_entry), 0, | ||
2119 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
2120 | if (ccid3_loss_interval_hist_slab == NULL) | ||
2121 | goto out_free_rx_history; | ||
2122 | |||
2123 | rc = ccid_register(&ccid3); | ||
2124 | if (rc != 0) | ||
2125 | goto out_free_loss_interval_history; | ||
2126 | |||
2127 | out: | ||
2128 | return rc; | ||
2129 | out_free_loss_interval_history: | ||
2130 | kmem_cache_destroy(ccid3_loss_interval_hist_slab); | ||
2131 | ccid3_loss_interval_hist_slab = NULL; | ||
2132 | out_free_rx_history: | ||
2133 | kmem_cache_destroy(ccid3_rx_hist_slab); | ||
2134 | ccid3_rx_hist_slab = NULL; | ||
2135 | out_free_tx_history: | ||
2136 | kmem_cache_destroy(ccid3_tx_hist_slab); | ||
2137 | ccid3_tx_hist_slab = NULL; | ||
2138 | goto out; | ||
2139 | } | ||
2140 | module_init(ccid3_module_init); | ||
2141 | |||
2142 | static __exit void ccid3_module_exit(void) | ||
2143 | { | ||
2144 | ccid_unregister(&ccid3); | ||
2145 | |||
2146 | if (ccid3_tx_hist_slab != NULL) { | ||
2147 | kmem_cache_destroy(ccid3_tx_hist_slab); | ||
2148 | ccid3_tx_hist_slab = NULL; | ||
2149 | } | ||
2150 | if (ccid3_rx_hist_slab != NULL) { | ||
2151 | kmem_cache_destroy(ccid3_rx_hist_slab); | ||
2152 | ccid3_rx_hist_slab = NULL; | ||
2153 | } | ||
2154 | if (ccid3_loss_interval_hist_slab != NULL) { | ||
2155 | kmem_cache_destroy(ccid3_loss_interval_hist_slab); | ||
2156 | ccid3_loss_interval_hist_slab = NULL; | ||
2157 | } | ||
2158 | } | ||
2159 | module_exit(ccid3_module_exit); | ||
2160 | |||
2161 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
2162 | MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); | ||
2163 | MODULE_LICENSE("GPL"); | ||
2164 | MODULE_ALIAS("net-dccp-ccid-3"); | ||
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..5d6b623e64da --- /dev/null +++ b/net/dccp/ccids/ccid3.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | #ifndef _DCCP_CCID3_H_ | ||
37 | #define _DCCP_CCID3_H_ | ||
38 | |||
39 | #include <linux/types.h> | ||
40 | #include <linux/list.h> | ||
41 | #include <linux/timer.h> | ||
42 | |||
43 | struct ccid3_tx_hist_entry { | ||
44 | struct list_head ccid3htx_node; | ||
45 | u64 ccid3htx_seqno:48, | ||
46 | ccid3htx_win_count:8, | ||
47 | ccid3htx_sent:1; | ||
48 | struct timeval ccid3htx_tstamp; | ||
49 | }; | ||
50 | |||
51 | struct ccid3_options_received { | ||
52 | u64 ccid3or_seqno:48, | ||
53 | ccid3or_loss_intervals_idx:16; | ||
54 | u16 ccid3or_loss_intervals_len; | ||
55 | u32 ccid3or_loss_event_rate; | ||
56 | u32 ccid3or_receive_rate; | ||
57 | }; | ||
58 | |||
59 | /** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block | ||
60 | * | ||
61 | * @ccid3hctx_state - Sender state | ||
62 | * @ccid3hctx_x - Current sending rate | ||
63 | * @ccid3hctx_x_recv - Receive rate | ||
64 | * @ccid3hctx_x_calc - Calculated send (?) rate | ||
65 | * @ccid3hctx_s - Packet size | ||
66 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs | ||
67 | * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 | ||
68 | * @ccid3hctx_last_win_count - Last window counter sent | ||
69 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent | ||
70 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer | ||
71 | * @ccid3hctx_idle - FIXME | ||
72 | * @ccid3hctx_t_ld - Time last doubled during slow start | ||
73 | * @ccid3hctx_t_nom - Nominal send time of next packet | ||
74 | * @ccid3hctx_t_ipi - Interpacket (send) interval | ||
75 | * @ccid3hctx_delta - Send timer delta | ||
76 | * @ccid3hctx_hist - Packet history | ||
77 | */ | ||
78 | struct ccid3_hc_tx_sock { | ||
79 | u32 ccid3hctx_x; | ||
80 | u32 ccid3hctx_x_recv; | ||
81 | u32 ccid3hctx_x_calc; | ||
82 | u16 ccid3hctx_s; | ||
83 | u32 ccid3hctx_rtt; | ||
84 | u32 ccid3hctx_p; | ||
85 | u8 ccid3hctx_state; | ||
86 | u8 ccid3hctx_last_win_count; | ||
87 | u8 ccid3hctx_idle; | ||
88 | struct timeval ccid3hctx_t_last_win_count; | ||
89 | struct timer_list ccid3hctx_no_feedback_timer; | ||
90 | struct timeval ccid3hctx_t_ld; | ||
91 | struct timeval ccid3hctx_t_nom; | ||
92 | u32 ccid3hctx_t_ipi; | ||
93 | u32 ccid3hctx_delta; | ||
94 | struct list_head ccid3hctx_hist; | ||
95 | struct ccid3_options_received ccid3hctx_options_received; | ||
96 | }; | ||
97 | |||
98 | struct ccid3_loss_interval_hist_entry { | ||
99 | struct list_head ccid3lih_node; | ||
100 | u64 ccid3lih_seqno:48, | ||
101 | ccid3lih_win_count:4; | ||
102 | u32 ccid3lih_interval; | ||
103 | }; | ||
104 | |||
105 | struct ccid3_rx_hist_entry { | ||
106 | struct list_head ccid3hrx_node; | ||
107 | u64 ccid3hrx_seqno:48, | ||
108 | ccid3hrx_win_count:4, | ||
109 | ccid3hrx_type:4; | ||
110 | u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ | ||
111 | struct timeval ccid3hrx_tstamp; | ||
112 | }; | ||
113 | |||
114 | struct ccid3_hc_rx_sock { | ||
115 | u64 ccid3hcrx_seqno_last_counter:48, | ||
116 | ccid3hcrx_state:8, | ||
117 | ccid3hcrx_last_counter:4; | ||
118 | unsigned long ccid3hcrx_rtt; | ||
119 | u32 ccid3hcrx_p; | ||
120 | u32 ccid3hcrx_bytes_recv; | ||
121 | struct timeval ccid3hcrx_tstamp_last_feedback; | ||
122 | struct timeval ccid3hcrx_tstamp_last_ack; | ||
123 | struct list_head ccid3hcrx_hist; | ||
124 | struct list_head ccid3hcrx_loss_interval_hist; | ||
125 | u16 ccid3hcrx_s; | ||
126 | u32 ccid3hcrx_pinv; | ||
127 | u32 ccid3hcrx_elapsed_time; | ||
128 | u32 ccid3hcrx_x_recv; | ||
129 | }; | ||
130 | |||
131 | #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ | ||
132 | ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) | ||
133 | |||
134 | #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ | ||
135 | ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) | ||
136 | |||
137 | #endif /* _DCCP_CCID3_H_ */ | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..fb83454102c1 --- /dev/null +++ b/net/dccp/dccp.h | |||
@@ -0,0 +1,422 @@ | |||
1 | #ifndef _DCCP_H | ||
2 | #define _DCCP_H | ||
3 | /* | ||
4 | * net/dccp/dccp.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/dccp.h> | ||
15 | #include <net/snmp.h> | ||
16 | #include <net/sock.h> | ||
17 | #include <net/tcp.h> | ||
18 | |||
19 | #define DCCP_DEBUG | ||
20 | |||
21 | #ifdef DCCP_DEBUG | ||
22 | extern int dccp_debug; | ||
23 | |||
24 | #define dccp_pr_debug(format, a...) \ | ||
25 | do { if (dccp_debug) \ | ||
26 | printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ | ||
27 | } while (0) | ||
28 | #define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) | ||
29 | #else | ||
30 | #define dccp_pr_debug(format, a...) | ||
31 | #define dccp_pr_debug_cat(format, a...) | ||
32 | #endif | ||
33 | |||
34 | extern struct inet_hashinfo dccp_hashinfo; | ||
35 | |||
36 | extern atomic_t dccp_orphan_count; | ||
37 | extern int dccp_tw_count; | ||
38 | extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); | ||
39 | |||
40 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); | ||
41 | |||
42 | /* FIXME: Right size this */ | ||
43 | #define DCCP_MAX_OPT_LEN 128 | ||
44 | |||
45 | #define DCCP_MAX_PACKET_HDR 32 | ||
46 | |||
47 | #define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) | ||
48 | |||
49 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT | ||
50 | * state, about 60 seconds */ | ||
51 | |||
52 | /* draft-ietf-dccp-spec-11.txt initial RTO value */ | ||
53 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) | ||
54 | |||
55 | /* Maximal interval between probes for local resources. */ | ||
56 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
57 | |||
58 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | ||
59 | |||
60 | extern struct proto dccp_v4_prot; | ||
61 | |||
62 | /* is seq1 < seq2 ? */ | ||
63 | static inline const int before48(const u64 seq1, const u64 seq2) | ||
64 | { | ||
65 | return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; | ||
66 | } | ||
67 | |||
68 | /* is seq1 > seq2 ? */ | ||
69 | static inline const int after48(const u64 seq1, const u64 seq2) | ||
70 | { | ||
71 | return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; | ||
72 | } | ||
73 | |||
74 | /* is seq2 <= seq1 <= seq3 ? */ | ||
75 | static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) | ||
76 | { | ||
77 | return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); | ||
78 | } | ||
79 | |||
80 | static inline u64 max48(const u64 seq1, const u64 seq2) | ||
81 | { | ||
82 | return after48(seq1, seq2) ? seq1 : seq2; | ||
83 | } | ||
84 | |||
85 | enum { | ||
86 | DCCP_MIB_NUM = 0, | ||
87 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ | ||
88 | DCCP_MIB_ESTABRESETS, /* EstabResets */ | ||
89 | DCCP_MIB_CURRESTAB, /* CurrEstab */ | ||
90 | DCCP_MIB_OUTSEGS, /* OutSegs */ | ||
91 | DCCP_MIB_OUTRSTS, | ||
92 | DCCP_MIB_ABORTONTIMEOUT, | ||
93 | DCCP_MIB_TIMEOUTS, | ||
94 | DCCP_MIB_ABORTFAILED, | ||
95 | DCCP_MIB_PASSIVEOPENS, | ||
96 | DCCP_MIB_ATTEMPTFAILS, | ||
97 | DCCP_MIB_OUTDATAGRAMS, | ||
98 | DCCP_MIB_INERRS, | ||
99 | DCCP_MIB_OPTMANDATORYERROR, | ||
100 | DCCP_MIB_INVALIDOPT, | ||
101 | __DCCP_MIB_MAX | ||
102 | }; | ||
103 | |||
104 | #define DCCP_MIB_MAX __DCCP_MIB_MAX | ||
105 | struct dccp_mib { | ||
106 | unsigned long mibs[DCCP_MIB_MAX]; | ||
107 | } __SNMP_MIB_ALIGN__; | ||
108 | |||
109 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
110 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | ||
111 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | ||
112 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
113 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | ||
114 | #define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
115 | #define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
116 | |||
117 | extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); | ||
118 | extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); | ||
119 | |||
120 | extern int dccp_send_response(struct sock *sk); | ||
121 | extern void dccp_send_ack(struct sock *sk); | ||
122 | extern void dccp_send_delayed_ack(struct sock *sk); | ||
123 | extern void dccp_send_sync(struct sock *sk, u64 seq); | ||
124 | |||
125 | extern void dccp_init_xmit_timers(struct sock *sk); | ||
126 | static inline void dccp_clear_xmit_timers(struct sock *sk) | ||
127 | { | ||
128 | inet_csk_clear_xmit_timers(sk); | ||
129 | } | ||
130 | |||
131 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | ||
132 | |||
133 | extern const char *dccp_packet_name(const int type); | ||
134 | extern const char *dccp_state_name(const int state); | ||
135 | |||
136 | static inline void dccp_set_state(struct sock *sk, const int state) | ||
137 | { | ||
138 | const int oldstate = sk->sk_state; | ||
139 | |||
140 | dccp_pr_debug("%s(%p) %-10.10s -> %s\n", | ||
141 | dccp_role(sk), sk, | ||
142 | dccp_state_name(oldstate), dccp_state_name(state)); | ||
143 | WARN_ON(state == oldstate); | ||
144 | |||
145 | switch (state) { | ||
146 | case DCCP_OPEN: | ||
147 | if (oldstate != DCCP_OPEN) | ||
148 | DCCP_INC_STATS(DCCP_MIB_CURRESTAB); | ||
149 | break; | ||
150 | |||
151 | case DCCP_CLOSED: | ||
152 | if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) | ||
153 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); | ||
154 | |||
155 | sk->sk_prot->unhash(sk); | ||
156 | if (inet_csk(sk)->icsk_bind_hash != NULL && | ||
157 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | ||
158 | inet_put_port(&dccp_hashinfo, sk); | ||
159 | /* fall through */ | ||
160 | default: | ||
161 | if (oldstate == DCCP_OPEN) | ||
162 | DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); | ||
163 | } | ||
164 | |||
165 | /* Change state AFTER socket is unhashed to avoid closed | ||
166 | * socket sitting in hash tables. | ||
167 | */ | ||
168 | sk->sk_state = state; | ||
169 | } | ||
170 | |||
171 | static inline void dccp_done(struct sock *sk) | ||
172 | { | ||
173 | dccp_set_state(sk, DCCP_CLOSED); | ||
174 | dccp_clear_xmit_timers(sk); | ||
175 | |||
176 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
177 | |||
178 | if (!sock_flag(sk, SOCK_DEAD)) | ||
179 | sk->sk_state_change(sk); | ||
180 | else | ||
181 | inet_csk_destroy_sock(sk); | ||
182 | } | ||
183 | |||
184 | static inline void dccp_openreq_init(struct request_sock *req, | ||
185 | struct dccp_sock *dp, | ||
186 | struct sk_buff *skb) | ||
187 | { | ||
188 | /* | ||
189 | * FIXME: fill in the other req fields from the DCCP options | ||
190 | * received | ||
191 | */ | ||
192 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; | ||
193 | inet_rsk(req)->acked = 0; | ||
194 | req->rcv_wnd = 0; | ||
195 | } | ||
196 | |||
197 | extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, | ||
198 | struct sk_buff *skb); | ||
199 | extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); | ||
200 | |||
201 | extern struct sock *dccp_create_openreq_child(struct sock *sk, | ||
202 | const struct request_sock *req, | ||
203 | const struct sk_buff *skb); | ||
204 | |||
205 | extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); | ||
206 | |||
207 | extern void dccp_v4_err(struct sk_buff *skb, u32); | ||
208 | |||
209 | extern int dccp_v4_rcv(struct sk_buff *skb); | ||
210 | |||
211 | extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, | ||
212 | struct sk_buff *skb, | ||
213 | struct request_sock *req, | ||
214 | struct dst_entry *dst); | ||
215 | extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
216 | struct request_sock *req, | ||
217 | struct request_sock **prev); | ||
218 | |||
219 | extern int dccp_child_process(struct sock *parent, struct sock *child, | ||
220 | struct sk_buff *skb); | ||
221 | extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
222 | struct dccp_hdr *dh, unsigned len); | ||
223 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
224 | const struct dccp_hdr *dh, const unsigned len); | ||
225 | |||
226 | extern void dccp_close(struct sock *sk, long timeout); | ||
227 | extern struct sk_buff *dccp_make_response(struct sock *sk, | ||
228 | struct dst_entry *dst, | ||
229 | struct request_sock *req); | ||
230 | |||
231 | extern int dccp_connect(struct sock *sk); | ||
232 | extern int dccp_disconnect(struct sock *sk, int flags); | ||
233 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
234 | char *optval, int *optlen); | ||
235 | extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); | ||
236 | extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
237 | size_t size); | ||
238 | extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, | ||
239 | struct msghdr *msg, size_t len, int nonblock, | ||
240 | int flags, int *addr_len); | ||
241 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
242 | char *optval, int optlen); | ||
243 | extern void dccp_shutdown(struct sock *sk, int how); | ||
244 | |||
245 | extern int dccp_v4_checksum(struct sk_buff *skb); | ||
246 | |||
247 | extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); | ||
248 | extern void dccp_send_close(struct sock *sk); | ||
249 | |||
250 | struct dccp_skb_cb { | ||
251 | __u8 dccpd_type; | ||
252 | __u8 dccpd_reset_code; | ||
253 | __u8 dccpd_service; | ||
254 | __u8 dccpd_ccval; | ||
255 | __u64 dccpd_seq; | ||
256 | __u64 dccpd_ack_seq; | ||
257 | int dccpd_opt_len; | ||
258 | }; | ||
259 | |||
260 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | ||
261 | |||
262 | static inline int dccp_non_data_packet(const struct sk_buff *skb) | ||
263 | { | ||
264 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
265 | |||
266 | return type == DCCP_PKT_ACK || | ||
267 | type == DCCP_PKT_CLOSE || | ||
268 | type == DCCP_PKT_CLOSEREQ || | ||
269 | type == DCCP_PKT_RESET || | ||
270 | type == DCCP_PKT_SYNC || | ||
271 | type == DCCP_PKT_SYNCACK; | ||
272 | } | ||
273 | |||
274 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) | ||
275 | { | ||
276 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
277 | |||
278 | return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; | ||
279 | } | ||
280 | |||
281 | #define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) | ||
282 | #define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) | ||
283 | |||
284 | static inline void dccp_set_seqno(u64 *seqno, u64 value) | ||
285 | { | ||
286 | if (value > DCCP_MAX_SEQNO) | ||
287 | value -= DCCP_MAX_SEQNO + 1; | ||
288 | *seqno = value; | ||
289 | } | ||
290 | |||
291 | static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) | ||
292 | { | ||
293 | return ((seqno2 << 16) - (seqno1 << 16)) >> 16; | ||
294 | } | ||
295 | |||
296 | static inline void dccp_inc_seqno(u64 *seqno) | ||
297 | { | ||
298 | if (++*seqno > DCCP_MAX_SEQNO) | ||
299 | *seqno = 0; | ||
300 | } | ||
301 | |||
302 | static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) | ||
303 | { | ||
304 | struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); | ||
305 | |||
306 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
307 | dh->dccph_seq = htonl((gss >> 32)) >> 8; | ||
308 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
309 | dh->dccph_seq = htonl((gss >> 32)); | ||
310 | #else | ||
311 | #error "Adjust your <asm/byteorder.h> defines" | ||
312 | #endif | ||
313 | dhx->dccph_seq_low = htonl(gss & 0xffffffff); | ||
314 | } | ||
315 | |||
316 | static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) | ||
317 | { | ||
318 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
319 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; | ||
320 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
321 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)); | ||
322 | #else | ||
323 | #error "Adjust your <asm/byteorder.h> defines" | ||
324 | #endif | ||
325 | dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); | ||
326 | } | ||
327 | |||
328 | static inline void dccp_update_gsr(struct sock *sk, u64 seq) | ||
329 | { | ||
330 | struct dccp_sock *dp = dccp_sk(sk); | ||
331 | u64 tmp_gsr; | ||
332 | |||
333 | dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); | ||
334 | dp->dccps_gsr = seq; | ||
335 | dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); | ||
336 | dccp_set_seqno(&dp->dccps_swh, | ||
337 | dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); | ||
338 | } | ||
339 | |||
340 | static inline void dccp_update_gss(struct sock *sk, u64 seq) | ||
341 | { | ||
342 | struct dccp_sock *dp = dccp_sk(sk); | ||
343 | u64 tmp_gss; | ||
344 | |||
345 | dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); | ||
346 | dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); | ||
347 | dp->dccps_awh = dp->dccps_gss = seq; | ||
348 | } | ||
349 | |||
350 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | ||
351 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | ||
352 | struct sk_buff *skb, | ||
353 | u32 elapsed_time); | ||
354 | extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
355 | unsigned char option, | ||
356 | const void *value, unsigned char len); | ||
357 | |||
358 | extern struct socket *dccp_ctl_socket; | ||
359 | |||
360 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
361 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
362 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
363 | |||
364 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
365 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
366 | |||
367 | /** struct dccp_ackpkts - acknowledgeable packets | ||
368 | * | ||
369 | * This data structure is the one defined in the DCCP draft | ||
370 | * Appendix A. | ||
371 | * | ||
372 | * @dccpap_buf_head - circular buffer head | ||
373 | * @dccpap_buf_tail - circular buffer tail | ||
374 | * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) | ||
375 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 | ||
376 | * | ||
377 | * Additionally, the HC-Receiver must keep some information about the | ||
378 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
379 | * Ack Vector, it remembers four variables: | ||
380 | * | ||
381 | * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) | ||
382 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
383 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) | ||
384 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
385 | * | ||
386 | * @dccpap_buf_len - circular buffer length | ||
387 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
388 | */ | ||
389 | struct dccp_ackpkts { | ||
390 | unsigned int dccpap_buf_head; | ||
391 | unsigned int dccpap_buf_tail; | ||
392 | u64 dccpap_buf_ackno; | ||
393 | u64 dccpap_ack_seqno; | ||
394 | u64 dccpap_ack_ackno; | ||
395 | unsigned int dccpap_ack_ptr; | ||
396 | unsigned int dccpap_buf_vector_len; | ||
397 | unsigned int dccpap_ack_vector_len; | ||
398 | unsigned int dccpap_buf_len; | ||
399 | unsigned long dccpap_time; | ||
400 | u8 dccpap_buf_nonce; | ||
401 | u8 dccpap_ack_nonce; | ||
402 | u8 dccpap_buf[0]; | ||
403 | }; | ||
404 | |||
405 | extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); | ||
406 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
407 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); | ||
408 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
409 | struct sock *sk, u64 ackno); | ||
410 | |||
411 | #ifdef DCCP_DEBUG | ||
412 | extern void dccp_ackvector_print(const u64 ackno, | ||
413 | const unsigned char *vector, int len); | ||
414 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
415 | #else | ||
416 | static inline void dccp_ackvector_print(const u64 ackno, | ||
417 | const unsigned char *vector, | ||
418 | int len) { } | ||
419 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
420 | #endif | ||
421 | |||
422 | #endif /* _DCCP_H */ | ||
diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..622e976a51fe --- /dev/null +++ b/net/dccp/input.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * net/dccp/input.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) | ||
23 | { | ||
24 | sk->sk_shutdown |= RCV_SHUTDOWN; | ||
25 | sock_set_flag(sk, SOCK_DONE); | ||
26 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); | ||
27 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
28 | skb_set_owner_r(skb, sk); | ||
29 | sk->sk_data_ready(sk, 0); | ||
30 | } | ||
31 | |||
32 | static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | ||
33 | { | ||
34 | switch (sk->sk_state) { | ||
35 | case DCCP_PARTOPEN: | ||
36 | case DCCP_OPEN: | ||
37 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
38 | dccp_fin(sk, skb); | ||
39 | dccp_set_state(sk, DCCP_CLOSED); | ||
40 | break; | ||
41 | } | ||
42 | } | ||
43 | |||
44 | static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | ||
45 | { | ||
46 | /* | ||
47 | * Step 7: Check for unexpected packet types | ||
48 | * If (S.is_server and P.type == CloseReq) | ||
49 | * Send Sync packet acknowledging P.seqno | ||
50 | * Drop packet and return | ||
51 | */ | ||
52 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { | ||
53 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | switch (sk->sk_state) { | ||
58 | case DCCP_PARTOPEN: | ||
59 | case DCCP_OPEN: | ||
60 | dccp_set_state(sk, DCCP_CLOSING); | ||
61 | dccp_send_close(sk); | ||
62 | break; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | ||
67 | { | ||
68 | struct dccp_sock *dp = dccp_sk(sk); | ||
69 | |||
70 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
71 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | ||
72 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
73 | } | ||
74 | |||
75 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | ||
76 | { | ||
77 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
78 | struct dccp_sock *dp = dccp_sk(sk); | ||
79 | u64 lswl = dp->dccps_swl; | ||
80 | u64 lawl = dp->dccps_awl; | ||
81 | |||
82 | /* | ||
83 | * Step 5: Prepare sequence numbers for Sync | ||
84 | * If P.type == Sync or P.type == SyncAck, | ||
85 | * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, | ||
86 | * / * P is valid, so update sequence number variables | ||
87 | * accordingly. After this update, P will pass the tests | ||
88 | * in Step 6. A SyncAck is generated if necessary in | ||
89 | * Step 15 * / | ||
90 | * Update S.GSR, S.SWL, S.SWH | ||
91 | * Otherwise, | ||
92 | * Drop packet and return | ||
93 | */ | ||
94 | if (dh->dccph_type == DCCP_PKT_SYNC || | ||
95 | dh->dccph_type == DCCP_PKT_SYNCACK) { | ||
96 | if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && | ||
97 | !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) | ||
98 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
99 | else | ||
100 | return -1; | ||
101 | /* | ||
102 | * Step 6: Check sequence numbers | ||
103 | * Let LSWL = S.SWL and LAWL = S.AWL | ||
104 | * If P.type == CloseReq or P.type == Close or P.type == Reset, | ||
105 | * LSWL := S.GSR + 1, LAWL := S.GAR | ||
106 | * If LSWL <= P.seqno <= S.SWH | ||
107 | * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), | ||
108 | * Update S.GSR, S.SWL, S.SWH | ||
109 | * If P.type != Sync, | ||
110 | * Update S.GAR | ||
111 | * Otherwise, | ||
112 | * Send Sync packet acknowledging P.seqno | ||
113 | * Drop packet and return | ||
114 | */ | ||
115 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || | ||
116 | dh->dccph_type == DCCP_PKT_CLOSE || | ||
117 | dh->dccph_type == DCCP_PKT_RESET) { | ||
118 | lswl = dp->dccps_gsr; | ||
119 | dccp_inc_seqno(&lswl); | ||
120 | lawl = dp->dccps_gar; | ||
121 | } | ||
122 | |||
123 | if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && | ||
124 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || | ||
125 | between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { | ||
126 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
127 | |||
128 | if (dh->dccph_type != DCCP_PKT_SYNC && | ||
129 | DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
130 | dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
131 | } else { | ||
132 | dccp_pr_debug("Step 6 failed, sending SYNC...\n"); | ||
133 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
134 | return -1; | ||
135 | } | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
141 | const struct dccp_hdr *dh, const unsigned len) | ||
142 | { | ||
143 | struct dccp_sock *dp = dccp_sk(sk); | ||
144 | |||
145 | if (dccp_check_seqno(sk, skb)) | ||
146 | goto discard; | ||
147 | |||
148 | if (dccp_parse_options(sk, skb)) | ||
149 | goto discard; | ||
150 | |||
151 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
152 | dccp_event_ack_recv(sk, skb); | ||
153 | |||
154 | /* | ||
155 | * FIXME: check ECN to see if we should use | ||
156 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
157 | */ | ||
158 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
159 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
160 | |||
161 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
162 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
163 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
164 | LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); | ||
165 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
166 | inet_csk_schedule_ack(sk); | ||
167 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); | ||
168 | goto discard; | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * FIXME: this activation is probably wrong, have to study more | ||
173 | * TCP delack machinery and how it fits into DCCP draft, but | ||
174 | * for now it kinda "works" 8) | ||
175 | */ | ||
176 | if (!inet_csk_ack_scheduled(sk)) { | ||
177 | inet_csk_schedule_ack(sk); | ||
178 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); | ||
179 | } | ||
180 | } | ||
181 | |||
182 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
183 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
184 | |||
185 | switch (dccp_hdr(skb)->dccph_type) { | ||
186 | case DCCP_PKT_DATAACK: | ||
187 | case DCCP_PKT_DATA: | ||
188 | /* | ||
189 | * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option | ||
190 | * if it is. | ||
191 | */ | ||
192 | __skb_pull(skb, dh->dccph_doff * 4); | ||
193 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
194 | skb_set_owner_r(skb, sk); | ||
195 | sk->sk_data_ready(sk, 0); | ||
196 | return 0; | ||
197 | case DCCP_PKT_ACK: | ||
198 | goto discard; | ||
199 | case DCCP_PKT_RESET: | ||
200 | /* | ||
201 | * Step 9: Process Reset | ||
202 | * If P.type == Reset, | ||
203 | * Tear down connection | ||
204 | * S.state := TIMEWAIT | ||
205 | * Set TIMEWAIT timer | ||
206 | * Drop packet and return | ||
207 | */ | ||
208 | dccp_fin(sk, skb); | ||
209 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
210 | return 0; | ||
211 | case DCCP_PKT_CLOSEREQ: | ||
212 | dccp_rcv_closereq(sk, skb); | ||
213 | goto discard; | ||
214 | case DCCP_PKT_CLOSE: | ||
215 | dccp_rcv_close(sk, skb); | ||
216 | return 0; | ||
217 | case DCCP_PKT_REQUEST: | ||
218 | /* Step 7 | ||
219 | * or (S.is_server and P.type == Response) | ||
220 | * or (S.is_client and P.type == Request) | ||
221 | * or (S.state >= OPEN and P.type == Request | ||
222 | * and P.seqno >= S.OSR) | ||
223 | * or (S.state >= OPEN and P.type == Response | ||
224 | * and P.seqno >= S.OSR) | ||
225 | * or (S.state == RESPOND and P.type == Data), | ||
226 | * Send Sync packet acknowledging P.seqno | ||
227 | * Drop packet and return | ||
228 | */ | ||
229 | if (dp->dccps_role != DCCP_ROLE_LISTEN) | ||
230 | goto send_sync; | ||
231 | goto check_seq; | ||
232 | case DCCP_PKT_RESPONSE: | ||
233 | if (dp->dccps_role != DCCP_ROLE_CLIENT) | ||
234 | goto send_sync; | ||
235 | check_seq: | ||
236 | if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { | ||
237 | send_sync: | ||
238 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
239 | } | ||
240 | break; | ||
241 | } | ||
242 | |||
243 | DCCP_INC_STATS_BH(DCCP_MIB_INERRS); | ||
244 | discard: | ||
245 | __kfree_skb(skb); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static int dccp_rcv_request_sent_state_process(struct sock *sk, | ||
250 | struct sk_buff *skb, | ||
251 | const struct dccp_hdr *dh, | ||
252 | const unsigned len) | ||
253 | { | ||
254 | /* | ||
255 | * Step 4: Prepare sequence numbers in REQUEST | ||
256 | * If S.state == REQUEST, | ||
257 | * If (P.type == Response or P.type == Reset) | ||
258 | * and S.AWL <= P.ackno <= S.AWH, | ||
259 | * / * Set sequence number variables corresponding to the | ||
260 | * other endpoint, so P will pass the tests in Step 6 * / | ||
261 | * Set S.GSR, S.ISR, S.SWL, S.SWH | ||
262 | * / * Response processing continues in Step 10; Reset | ||
263 | * processing continues in Step 9 * / | ||
264 | */ | ||
265 | if (dh->dccph_type == DCCP_PKT_RESPONSE) { | ||
266 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
267 | struct dccp_sock *dp = dccp_sk(sk); | ||
268 | |||
269 | /* Stop the REQUEST timer */ | ||
270 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | ||
271 | BUG_TRAP(sk->sk_send_head != NULL); | ||
272 | __kfree_skb(sk->sk_send_head); | ||
273 | sk->sk_send_head = NULL; | ||
274 | |||
275 | if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { | ||
276 | dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", | ||
277 | dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); | ||
278 | goto out_invalid_packet; | ||
279 | } | ||
280 | |||
281 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
282 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
283 | |||
284 | if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || | ||
285 | ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { | ||
286 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
287 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
288 | /* FIXME: send appropriate RESET code */ | ||
289 | goto out_invalid_packet; | ||
290 | } | ||
291 | |||
292 | dccp_sync_mss(sk, dp->dccps_pmtu_cookie); | ||
293 | |||
294 | /* | ||
295 | * Step 10: Process REQUEST state (second part) | ||
296 | * If S.state == REQUEST, | ||
297 | * / * If we get here, P is a valid Response from the server (see | ||
298 | * Step 4), and we should move to PARTOPEN state. PARTOPEN | ||
299 | * means send an Ack, don't send Data packets, retransmit | ||
300 | * Acks periodically, and always include any Init Cookie from | ||
301 | * the Response * / | ||
302 | * S.state := PARTOPEN | ||
303 | * Set PARTOPEN timer | ||
304 | * Continue with S.state == PARTOPEN | ||
305 | * / * Step 12 will send the Ack completing the three-way | ||
306 | * handshake * / | ||
307 | */ | ||
308 | dccp_set_state(sk, DCCP_PARTOPEN); | ||
309 | |||
310 | /* Make sure socket is routed, for correct metrics. */ | ||
311 | inet_sk_rebuild_header(sk); | ||
312 | |||
313 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
314 | sk->sk_state_change(sk); | ||
315 | sk_wake_async(sk, 0, POLL_OUT); | ||
316 | } | ||
317 | |||
318 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || | ||
319 | icsk->icsk_accept_queue.rskq_defer_accept) { | ||
320 | /* Save one ACK. Data will be ready after | ||
321 | * several ticks, if write_pending is set. | ||
322 | * | ||
323 | * It may be deleted, but with this feature tcpdumps | ||
324 | * look so _wonderfully_ clever, that I was not able | ||
325 | * to stand against the temptation 8) --ANK | ||
326 | */ | ||
327 | /* | ||
328 | * OK, in DCCP we can as well do a similar trick, its | ||
329 | * even in the draft, but there is no need for us to | ||
330 | * schedule an ack here, as dccp_sendmsg does this for | ||
331 | * us, also stated in the draft. -acme | ||
332 | */ | ||
333 | __kfree_skb(skb); | ||
334 | return 0; | ||
335 | } | ||
336 | dccp_send_ack(sk); | ||
337 | return -1; | ||
338 | } | ||
339 | |||
340 | out_invalid_packet: | ||
341 | return 1; /* dccp_v4_do_rcv will send a reset, but... | ||
342 | FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ | ||
343 | } | ||
344 | |||
345 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | ||
346 | struct sk_buff *skb, | ||
347 | const struct dccp_hdr *dh, | ||
348 | const unsigned len) | ||
349 | { | ||
350 | int queued = 0; | ||
351 | |||
352 | switch (dh->dccph_type) { | ||
353 | case DCCP_PKT_RESET: | ||
354 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
355 | break; | ||
356 | case DCCP_PKT_DATAACK: | ||
357 | case DCCP_PKT_ACK: | ||
358 | /* | ||
359 | * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, | ||
360 | * but only if we haven't used the DELACK timer for something else, | ||
361 | * like sending a delayed ack for a TIMESTAMP echo, etc, for now | ||
362 | * were not clearing it, sending an extra ACK when there is nothing | ||
363 | * else to do in DELACK is not a big deal after all. | ||
364 | */ | ||
365 | |||
366 | /* Stop the PARTOPEN timer */ | ||
367 | if (sk->sk_state == DCCP_PARTOPEN) | ||
368 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
369 | |||
370 | dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
371 | dccp_set_state(sk, DCCP_OPEN); | ||
372 | |||
373 | if (dh->dccph_type == DCCP_PKT_DATAACK) { | ||
374 | dccp_rcv_established(sk, skb, dh, len); | ||
375 | queued = 1; /* packet was queued (by dccp_rcv_established) */ | ||
376 | } | ||
377 | break; | ||
378 | } | ||
379 | |||
380 | return queued; | ||
381 | } | ||
382 | |||
383 | int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
384 | struct dccp_hdr *dh, unsigned len) | ||
385 | { | ||
386 | struct dccp_sock *dp = dccp_sk(sk); | ||
387 | const int old_state = sk->sk_state; | ||
388 | int queued = 0; | ||
389 | |||
390 | if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { | ||
391 | if (dccp_check_seqno(sk, skb)) | ||
392 | goto discard; | ||
393 | |||
394 | /* | ||
395 | * Step 8: Process options and mark acknowledgeable | ||
396 | */ | ||
397 | if (dccp_parse_options(sk, skb)) | ||
398 | goto discard; | ||
399 | |||
400 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
401 | dccp_event_ack_recv(sk, skb); | ||
402 | |||
403 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
404 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
405 | |||
406 | /* | ||
407 | * FIXME: check ECN to see if we should use | ||
408 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
409 | */ | ||
410 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
411 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
412 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
413 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
414 | goto discard; | ||
415 | /* | ||
416 | * FIXME: this activation is probably wrong, have to study more | ||
417 | * TCP delack machinery and how it fits into DCCP draft, but | ||
418 | * for now it kinda "works" 8) | ||
419 | */ | ||
420 | if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && | ||
421 | !inet_csk_ack_scheduled(sk)) { | ||
422 | inet_csk_schedule_ack(sk); | ||
423 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); | ||
424 | } | ||
425 | } | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Step 9: Process Reset | ||
430 | * If P.type == Reset, | ||
431 | * Tear down connection | ||
432 | * S.state := TIMEWAIT | ||
433 | * Set TIMEWAIT timer | ||
434 | * Drop packet and return | ||
435 | */ | ||
436 | if (dh->dccph_type == DCCP_PKT_RESET) { | ||
437 | /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ | ||
438 | dccp_fin(sk, skb); | ||
439 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
440 | return 0; | ||
441 | /* | ||
442 | * Step 7: Check for unexpected packet types | ||
443 | * If (S.is_server and P.type == CloseReq) | ||
444 | * or (S.is_server and P.type == Response) | ||
445 | * or (S.is_client and P.type == Request) | ||
446 | * or (S.state == RESPOND and P.type == Data), | ||
447 | * Send Sync packet acknowledging P.seqno | ||
448 | * Drop packet and return | ||
449 | */ | ||
450 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && | ||
451 | (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || | ||
452 | (dp->dccps_role == DCCP_ROLE_CLIENT && | ||
453 | dh->dccph_type == DCCP_PKT_REQUEST) || | ||
454 | (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { | ||
455 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
456 | goto discard; | ||
457 | } | ||
458 | |||
459 | switch (sk->sk_state) { | ||
460 | case DCCP_CLOSED: | ||
461 | return 1; | ||
462 | |||
463 | case DCCP_LISTEN: | ||
464 | if (dh->dccph_type == DCCP_PKT_ACK || | ||
465 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
466 | return 1; | ||
467 | |||
468 | if (dh->dccph_type == DCCP_PKT_RESET) | ||
469 | goto discard; | ||
470 | |||
471 | if (dh->dccph_type == DCCP_PKT_REQUEST) { | ||
472 | if (dccp_v4_conn_request(sk, skb) < 0) | ||
473 | return 1; | ||
474 | |||
475 | /* FIXME: do congestion control initialization */ | ||
476 | goto discard; | ||
477 | } | ||
478 | goto discard; | ||
479 | |||
480 | case DCCP_REQUESTING: | ||
481 | /* FIXME: do congestion control initialization */ | ||
482 | |||
483 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); | ||
484 | if (queued >= 0) | ||
485 | return queued; | ||
486 | |||
487 | __kfree_skb(skb); | ||
488 | return 0; | ||
489 | |||
490 | case DCCP_RESPOND: | ||
491 | case DCCP_PARTOPEN: | ||
492 | queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); | ||
493 | break; | ||
494 | } | ||
495 | |||
496 | if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { | ||
497 | switch (old_state) { | ||
498 | case DCCP_PARTOPEN: | ||
499 | sk->sk_state_change(sk); | ||
500 | sk_wake_async(sk, 0, POLL_OUT); | ||
501 | break; | ||
502 | } | ||
503 | } | ||
504 | |||
505 | if (!queued) { | ||
506 | discard: | ||
507 | __kfree_skb(skb); | ||
508 | } | ||
509 | return 0; | ||
510 | } | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..083bacaecb3b --- /dev/null +++ b/net/dccp/ipv4.c | |||
@@ -0,0 +1,1289 @@ | |||
1 | /* | ||
2 | * net/dccp/ipv4.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/icmp.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/random.h> | ||
19 | |||
20 | #include <net/icmp.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/sock.h> | ||
23 | #include <net/tcp_states.h> | ||
24 | #include <net/xfrm.h> | ||
25 | |||
26 | #include "ccid.h" | ||
27 | #include "dccp.h" | ||
28 | |||
29 | struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | ||
30 | .lhash_lock = RW_LOCK_UNLOCKED, | ||
31 | .lhash_users = ATOMIC_INIT(0), | ||
32 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | ||
33 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
34 | .port_rover = 1024 - 1, | ||
35 | }; | ||
36 | |||
37 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | ||
38 | { | ||
39 | return inet_csk_get_port(&dccp_hashinfo, sk, snum); | ||
40 | } | ||
41 | |||
42 | static void dccp_v4_hash(struct sock *sk) | ||
43 | { | ||
44 | inet_hash(&dccp_hashinfo, sk); | ||
45 | } | ||
46 | |||
47 | static void dccp_v4_unhash(struct sock *sk) | ||
48 | { | ||
49 | inet_unhash(&dccp_hashinfo, sk); | ||
50 | } | ||
51 | |||
52 | /* called with local bh disabled */ | ||
53 | static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, | ||
54 | struct inet_timewait_sock **twp) | ||
55 | { | ||
56 | struct inet_sock *inet = inet_sk(sk); | ||
57 | const u32 daddr = inet->rcv_saddr; | ||
58 | const u32 saddr = inet->daddr; | ||
59 | const int dif = sk->sk_bound_dev_if; | ||
60 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
61 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
62 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); | ||
63 | struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; | ||
64 | const struct sock *sk2; | ||
65 | const struct hlist_node *node; | ||
66 | struct inet_timewait_sock *tw; | ||
67 | |||
68 | write_lock(&head->lock); | ||
69 | |||
70 | /* Check TIME-WAIT sockets first. */ | ||
71 | sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { | ||
72 | tw = inet_twsk(sk2); | ||
73 | |||
74 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
75 | goto not_unique; | ||
76 | } | ||
77 | tw = NULL; | ||
78 | |||
79 | /* And established part... */ | ||
80 | sk_for_each(sk2, node, &head->chain) { | ||
81 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
82 | goto not_unique; | ||
83 | } | ||
84 | |||
85 | /* Must record num and sport now. Otherwise we will see | ||
86 | * in hash table socket with a funny identity. */ | ||
87 | inet->num = lport; | ||
88 | inet->sport = htons(lport); | ||
89 | sk->sk_hashent = hash; | ||
90 | BUG_TRAP(sk_unhashed(sk)); | ||
91 | __sk_add_node(sk, &head->chain); | ||
92 | sock_prot_inc_use(sk->sk_prot); | ||
93 | write_unlock(&head->lock); | ||
94 | |||
95 | if (twp != NULL) { | ||
96 | *twp = tw; | ||
97 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
98 | } else if (tw != NULL) { | ||
99 | /* Silly. Should hash-dance instead... */ | ||
100 | dccp_tw_deschedule(tw); | ||
101 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
102 | |||
103 | inet_twsk_put(tw); | ||
104 | } | ||
105 | |||
106 | return 0; | ||
107 | |||
108 | not_unique: | ||
109 | write_unlock(&head->lock); | ||
110 | return -EADDRNOTAVAIL; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Bind a port for a connect operation and hash it. | ||
115 | */ | ||
116 | static int dccp_v4_hash_connect(struct sock *sk) | ||
117 | { | ||
118 | const unsigned short snum = inet_sk(sk)->num; | ||
119 | struct inet_bind_hashbucket *head; | ||
120 | struct inet_bind_bucket *tb; | ||
121 | int ret; | ||
122 | |||
123 | if (snum == 0) { | ||
124 | int rover; | ||
125 | int low = sysctl_local_port_range[0]; | ||
126 | int high = sysctl_local_port_range[1]; | ||
127 | int remaining = (high - low) + 1; | ||
128 | struct hlist_node *node; | ||
129 | struct inet_timewait_sock *tw = NULL; | ||
130 | |||
131 | local_bh_disable(); | ||
132 | |||
133 | /* TODO. Actually it is not so bad idea to remove | ||
134 | * dccp_hashinfo.portalloc_lock before next submission to Linus. | ||
135 | * As soon as we touch this place at all it is time to think. | ||
136 | * | ||
137 | * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, | ||
138 | * hence it is mostly useless. | ||
139 | * Code will work nicely if we just delete it, but | ||
140 | * I am afraid in contented case it will work not better or | ||
141 | * even worse: another cpu just will hit the same bucket | ||
142 | * and spin there. | ||
143 | * So some cpu salt could remove both contention and | ||
144 | * memory pingpong. Any ideas how to do this in a nice way? | ||
145 | */ | ||
146 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
147 | rover = dccp_hashinfo.port_rover; | ||
148 | |||
149 | do { | ||
150 | rover++; | ||
151 | if ((rover < low) || (rover > high)) | ||
152 | rover = low; | ||
153 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; | ||
154 | spin_lock(&head->lock); | ||
155 | |||
156 | /* Does not bother with rcv_saddr checks, | ||
157 | * because the established check is already | ||
158 | * unique enough. | ||
159 | */ | ||
160 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
161 | if (tb->port == rover) { | ||
162 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
163 | if (tb->fastreuse >= 0) | ||
164 | goto next_port; | ||
165 | if (!__dccp_v4_check_established(sk, | ||
166 | rover, | ||
167 | &tw)) | ||
168 | goto ok; | ||
169 | goto next_port; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); | ||
174 | if (tb == NULL) { | ||
175 | spin_unlock(&head->lock); | ||
176 | break; | ||
177 | } | ||
178 | tb->fastreuse = -1; | ||
179 | goto ok; | ||
180 | |||
181 | next_port: | ||
182 | spin_unlock(&head->lock); | ||
183 | } while (--remaining > 0); | ||
184 | dccp_hashinfo.port_rover = rover; | ||
185 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
186 | |||
187 | local_bh_enable(); | ||
188 | |||
189 | return -EADDRNOTAVAIL; | ||
190 | |||
191 | ok: | ||
192 | /* All locks still held and bhs disabled */ | ||
193 | dccp_hashinfo.port_rover = rover; | ||
194 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
195 | |||
196 | inet_bind_hash(sk, tb, rover); | ||
197 | if (sk_unhashed(sk)) { | ||
198 | inet_sk(sk)->sport = htons(rover); | ||
199 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
200 | } | ||
201 | spin_unlock(&head->lock); | ||
202 | |||
203 | if (tw != NULL) { | ||
204 | dccp_tw_deschedule(tw); | ||
205 | inet_twsk_put(tw); | ||
206 | } | ||
207 | |||
208 | ret = 0; | ||
209 | goto out; | ||
210 | } | ||
211 | |||
212 | head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; | ||
213 | tb = inet_csk(sk)->icsk_bind_hash; | ||
214 | spin_lock_bh(&head->lock); | ||
215 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
216 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
217 | spin_unlock_bh(&head->lock); | ||
218 | return 0; | ||
219 | } else { | ||
220 | spin_unlock(&head->lock); | ||
221 | /* No definite answer... Walk to established hash table */ | ||
222 | ret = __dccp_v4_check_established(sk, snum, NULL); | ||
223 | out: | ||
224 | local_bh_enable(); | ||
225 | return ret; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
230 | int addr_len) | ||
231 | { | ||
232 | struct inet_sock *inet = inet_sk(sk); | ||
233 | struct dccp_sock *dp = dccp_sk(sk); | ||
234 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
235 | struct rtable *rt; | ||
236 | u32 daddr, nexthop; | ||
237 | int tmp; | ||
238 | int err; | ||
239 | |||
240 | dp->dccps_role = DCCP_ROLE_CLIENT; | ||
241 | |||
242 | if (addr_len < sizeof(struct sockaddr_in)) | ||
243 | return -EINVAL; | ||
244 | |||
245 | if (usin->sin_family != AF_INET) | ||
246 | return -EAFNOSUPPORT; | ||
247 | |||
248 | nexthop = daddr = usin->sin_addr.s_addr; | ||
249 | if (inet->opt != NULL && inet->opt->srr) { | ||
250 | if (daddr == 0) | ||
251 | return -EINVAL; | ||
252 | nexthop = inet->opt->faddr; | ||
253 | } | ||
254 | |||
255 | tmp = ip_route_connect(&rt, nexthop, inet->saddr, | ||
256 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | ||
257 | IPPROTO_DCCP, | ||
258 | inet->sport, usin->sin_port, sk); | ||
259 | if (tmp < 0) | ||
260 | return tmp; | ||
261 | |||
262 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | ||
263 | ip_rt_put(rt); | ||
264 | return -ENETUNREACH; | ||
265 | } | ||
266 | |||
267 | if (inet->opt == NULL || !inet->opt->srr) | ||
268 | daddr = rt->rt_dst; | ||
269 | |||
270 | if (inet->saddr == 0) | ||
271 | inet->saddr = rt->rt_src; | ||
272 | inet->rcv_saddr = inet->saddr; | ||
273 | |||
274 | inet->dport = usin->sin_port; | ||
275 | inet->daddr = daddr; | ||
276 | |||
277 | dp->dccps_ext_header_len = 0; | ||
278 | if (inet->opt != NULL) | ||
279 | dp->dccps_ext_header_len = inet->opt->optlen; | ||
280 | /* | ||
281 | * Socket identity is still unknown (sport may be zero). | ||
282 | * However we set state to DCCP_REQUESTING and not releasing socket | ||
283 | * lock select source port, enter ourselves into the hash tables and | ||
284 | * complete initialization after this. | ||
285 | */ | ||
286 | dccp_set_state(sk, DCCP_REQUESTING); | ||
287 | err = dccp_v4_hash_connect(sk); | ||
288 | if (err != 0) | ||
289 | goto failure; | ||
290 | |||
291 | err = ip_route_newports(&rt, inet->sport, inet->dport, sk); | ||
292 | if (err != 0) | ||
293 | goto failure; | ||
294 | |||
295 | /* OK, now commit destination to socket. */ | ||
296 | sk_setup_caps(sk, &rt->u.dst); | ||
297 | |||
298 | dp->dccps_gar = | ||
299 | dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, | ||
300 | inet->daddr, | ||
301 | inet->sport, | ||
302 | usin->sin_port); | ||
303 | dccp_update_gss(sk, dp->dccps_iss); | ||
304 | |||
305 | inet->id = dp->dccps_iss ^ jiffies; | ||
306 | |||
307 | err = dccp_connect(sk); | ||
308 | rt = NULL; | ||
309 | if (err != 0) | ||
310 | goto failure; | ||
311 | out: | ||
312 | return err; | ||
313 | failure: | ||
314 | /* This unhashes the socket and releases the local port, if necessary. */ | ||
315 | dccp_set_state(sk, DCCP_CLOSED); | ||
316 | ip_rt_put(rt); | ||
317 | sk->sk_route_caps = 0; | ||
318 | inet->dport = 0; | ||
319 | goto out; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * This routine does path mtu discovery as defined in RFC1191. | ||
324 | */ | ||
325 | static inline void dccp_do_pmtu_discovery(struct sock *sk, | ||
326 | const struct iphdr *iph, | ||
327 | u32 mtu) | ||
328 | { | ||
329 | struct dst_entry *dst; | ||
330 | const struct inet_sock *inet = inet_sk(sk); | ||
331 | const struct dccp_sock *dp = dccp_sk(sk); | ||
332 | |||
333 | /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs | ||
334 | * send out by Linux are always < 576bytes so they should go through | ||
335 | * unfragmented). | ||
336 | */ | ||
337 | if (sk->sk_state == DCCP_LISTEN) | ||
338 | return; | ||
339 | |||
340 | /* We don't check in the destentry if pmtu discovery is forbidden | ||
341 | * on this route. We just assume that no packet_to_big packets | ||
342 | * are send back when pmtu discovery is not active. | ||
343 | * There is a small race when the user changes this flag in the | ||
344 | * route, but I think that's acceptable. | ||
345 | */ | ||
346 | if ((dst = __sk_dst_check(sk, 0)) == NULL) | ||
347 | return; | ||
348 | |||
349 | dst->ops->update_pmtu(dst, mtu); | ||
350 | |||
351 | /* Something is about to be wrong... Remember soft error | ||
352 | * for the case, if this connection will not able to recover. | ||
353 | */ | ||
354 | if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) | ||
355 | sk->sk_err_soft = EMSGSIZE; | ||
356 | |||
357 | mtu = dst_mtu(dst); | ||
358 | |||
359 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | ||
360 | dp->dccps_pmtu_cookie > mtu) { | ||
361 | dccp_sync_mss(sk, mtu); | ||
362 | |||
363 | /* | ||
364 | * From: draft-ietf-dccp-spec-11.txt | ||
365 | * | ||
366 | * DCCP-Sync packets are the best choice for upward probing, | ||
367 | * since DCCP-Sync probes do not risk application data loss. | ||
368 | */ | ||
369 | dccp_send_sync(sk, dp->dccps_gsr); | ||
370 | } /* else let the usual retransmit timer handle it */ | ||
371 | } | ||
372 | |||
373 | static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) | ||
374 | { | ||
375 | int err; | ||
376 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
377 | const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + | ||
378 | sizeof(struct dccp_hdr_ext) + | ||
379 | sizeof(struct dccp_hdr_ack_bits); | ||
380 | struct sk_buff *skb; | ||
381 | |||
382 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
383 | return; | ||
384 | |||
385 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
386 | if (skb == NULL) | ||
387 | return; | ||
388 | |||
389 | /* Reserve space for headers. */ | ||
390 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
391 | |||
392 | skb->dst = dst_clone(rxskb->dst); | ||
393 | |||
394 | skb->h.raw = skb_push(skb, dccp_hdr_ack_len); | ||
395 | dh = dccp_hdr(skb); | ||
396 | memset(dh, 0, dccp_hdr_ack_len); | ||
397 | |||
398 | /* Build DCCP header and checksum it. */ | ||
399 | dh->dccph_type = DCCP_PKT_ACK; | ||
400 | dh->dccph_sport = rxdh->dccph_dport; | ||
401 | dh->dccph_dport = rxdh->dccph_sport; | ||
402 | dh->dccph_doff = dccp_hdr_ack_len / 4; | ||
403 | dh->dccph_x = 1; | ||
404 | |||
405 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
406 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
407 | |||
408 | bh_lock_sock(dccp_ctl_socket->sk); | ||
409 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
410 | rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); | ||
411 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
412 | |||
413 | if (err == NET_XMIT_CN || err == 0) { | ||
414 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
415 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | ||
420 | { | ||
421 | dccp_v4_ctl_send_ack(skb); | ||
422 | } | ||
423 | |||
424 | static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | ||
425 | struct dst_entry *dst) | ||
426 | { | ||
427 | int err = -1; | ||
428 | struct sk_buff *skb; | ||
429 | |||
430 | /* First, grab a route. */ | ||
431 | |||
432 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
433 | goto out; | ||
434 | |||
435 | skb = dccp_make_response(sk, dst, req); | ||
436 | if (skb != NULL) { | ||
437 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
438 | |||
439 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | ||
440 | ireq->rmt_addr, | ||
441 | ireq->opt); | ||
442 | if (err == NET_XMIT_CN) | ||
443 | err = 0; | ||
444 | } | ||
445 | |||
446 | out: | ||
447 | dst_release(dst); | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * This routine is called by the ICMP module when it gets some sort of error | ||
453 | * condition. If err < 0 then the socket should be closed and the error | ||
454 | * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. | ||
455 | * After adjustment header points to the first 8 bytes of the tcp header. We | ||
456 | * need to find the appropriate port. | ||
457 | * | ||
458 | * The locking strategy used here is very "optimistic". When someone else | ||
459 | * accesses the socket the ICMP is just dropped and for some paths there is no | ||
460 | * check at all. A more general error queue to queue errors for later handling | ||
461 | * is probably better. | ||
462 | */ | ||
463 | void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
464 | { | ||
465 | const struct iphdr *iph = (struct iphdr *)skb->data; | ||
466 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); | ||
467 | struct dccp_sock *dp; | ||
468 | struct inet_sock *inet; | ||
469 | const int type = skb->h.icmph->type; | ||
470 | const int code = skb->h.icmph->code; | ||
471 | struct sock *sk; | ||
472 | __u64 seq; | ||
473 | int err; | ||
474 | |||
475 | if (skb->len < (iph->ihl << 2) + 8) { | ||
476 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
477 | return; | ||
478 | } | ||
479 | |||
480 | sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, | ||
481 | iph->saddr, dh->dccph_sport, inet_iif(skb)); | ||
482 | if (sk == NULL) { | ||
483 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
484 | return; | ||
485 | } | ||
486 | |||
487 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
488 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
489 | return; | ||
490 | } | ||
491 | |||
492 | bh_lock_sock(sk); | ||
493 | /* If too many ICMPs get dropped on busy | ||
494 | * servers this needs to be solved differently. | ||
495 | */ | ||
496 | if (sock_owned_by_user(sk)) | ||
497 | NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); | ||
498 | |||
499 | if (sk->sk_state == DCCP_CLOSED) | ||
500 | goto out; | ||
501 | |||
502 | dp = dccp_sk(sk); | ||
503 | seq = dccp_hdr_seq(skb); | ||
504 | if (sk->sk_state != DCCP_LISTEN && | ||
505 | !between48(seq, dp->dccps_swl, dp->dccps_swh)) { | ||
506 | NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); | ||
507 | goto out; | ||
508 | } | ||
509 | |||
510 | switch (type) { | ||
511 | case ICMP_SOURCE_QUENCH: | ||
512 | /* Just silently ignore these. */ | ||
513 | goto out; | ||
514 | case ICMP_PARAMETERPROB: | ||
515 | err = EPROTO; | ||
516 | break; | ||
517 | case ICMP_DEST_UNREACH: | ||
518 | if (code > NR_ICMP_UNREACH) | ||
519 | goto out; | ||
520 | |||
521 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | ||
522 | if (!sock_owned_by_user(sk)) | ||
523 | dccp_do_pmtu_discovery(sk, iph, info); | ||
524 | goto out; | ||
525 | } | ||
526 | |||
527 | err = icmp_err_convert[code].errno; | ||
528 | break; | ||
529 | case ICMP_TIME_EXCEEDED: | ||
530 | err = EHOSTUNREACH; | ||
531 | break; | ||
532 | default: | ||
533 | goto out; | ||
534 | } | ||
535 | |||
536 | switch (sk->sk_state) { | ||
537 | struct request_sock *req , **prev; | ||
538 | case DCCP_LISTEN: | ||
539 | if (sock_owned_by_user(sk)) | ||
540 | goto out; | ||
541 | req = inet_csk_search_req(sk, &prev, dh->dccph_dport, | ||
542 | iph->daddr, iph->saddr); | ||
543 | if (!req) | ||
544 | goto out; | ||
545 | |||
546 | /* | ||
547 | * ICMPs are not backlogged, hence we cannot get an established | ||
548 | * socket here. | ||
549 | */ | ||
550 | BUG_TRAP(!req->sk); | ||
551 | |||
552 | if (seq != dccp_rsk(req)->dreq_iss) { | ||
553 | NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); | ||
554 | goto out; | ||
555 | } | ||
556 | /* | ||
557 | * Still in RESPOND, just remove it silently. | ||
558 | * There is no good way to pass the error to the newly | ||
559 | * created socket, and POSIX does not want network | ||
560 | * errors returned from accept(). | ||
561 | */ | ||
562 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
563 | goto out; | ||
564 | |||
565 | case DCCP_REQUESTING: | ||
566 | case DCCP_RESPOND: | ||
567 | if (!sock_owned_by_user(sk)) { | ||
568 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
569 | sk->sk_err = err; | ||
570 | |||
571 | sk->sk_error_report(sk); | ||
572 | |||
573 | dccp_done(sk); | ||
574 | } else | ||
575 | sk->sk_err_soft = err; | ||
576 | goto out; | ||
577 | } | ||
578 | |||
579 | /* If we've already connected we will keep trying | ||
580 | * until we time out, or the user gives up. | ||
581 | * | ||
582 | * rfc1122 4.2.3.9 allows to consider as hard errors | ||
583 | * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, | ||
584 | * but it is obsoleted by pmtu discovery). | ||
585 | * | ||
586 | * Note, that in modern internet, where routing is unreliable | ||
587 | * and in each dark corner broken firewalls sit, sending random | ||
588 | * errors ordered by their masters even this two messages finally lose | ||
589 | * their original sense (even Linux sends invalid PORT_UNREACHs) | ||
590 | * | ||
591 | * Now we are in compliance with RFCs. | ||
592 | * --ANK (980905) | ||
593 | */ | ||
594 | |||
595 | inet = inet_sk(sk); | ||
596 | if (!sock_owned_by_user(sk) && inet->recverr) { | ||
597 | sk->sk_err = err; | ||
598 | sk->sk_error_report(sk); | ||
599 | } else /* Only an error on timeout */ | ||
600 | sk->sk_err_soft = err; | ||
601 | out: | ||
602 | bh_unlock_sock(sk); | ||
603 | sock_put(sk); | ||
604 | } | ||
605 | |||
606 | extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); | ||
607 | |||
608 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) | ||
609 | { | ||
610 | struct sk_buff *skb; | ||
611 | /* | ||
612 | * FIXME: what if rebuild_header fails? | ||
613 | * Should we be doing a rebuild_header here? | ||
614 | */ | ||
615 | int err = inet_sk_rebuild_header(sk); | ||
616 | |||
617 | if (err != 0) | ||
618 | return err; | ||
619 | |||
620 | skb = dccp_make_reset(sk, sk->sk_dst_cache, code); | ||
621 | if (skb != NULL) { | ||
622 | const struct dccp_sock *dp = dccp_sk(sk); | ||
623 | const struct inet_sock *inet = inet_sk(sk); | ||
624 | |||
625 | err = ip_build_and_send_pkt(skb, sk, | ||
626 | inet->saddr, inet->daddr, NULL); | ||
627 | if (err == NET_XMIT_CN) | ||
628 | err = 0; | ||
629 | |||
630 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
631 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
632 | } | ||
633 | |||
634 | return err; | ||
635 | } | ||
636 | |||
637 | static inline u64 dccp_v4_init_sequence(const struct sock *sk, | ||
638 | const struct sk_buff *skb) | ||
639 | { | ||
640 | return secure_dccp_sequence_number(skb->nh.iph->daddr, | ||
641 | skb->nh.iph->saddr, | ||
642 | dccp_hdr(skb)->dccph_dport, | ||
643 | dccp_hdr(skb)->dccph_sport); | ||
644 | } | ||
645 | |||
646 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | ||
647 | { | ||
648 | struct inet_request_sock *ireq; | ||
649 | struct dccp_sock dp; | ||
650 | struct request_sock *req; | ||
651 | struct dccp_request_sock *dreq; | ||
652 | const __u32 saddr = skb->nh.iph->saddr; | ||
653 | const __u32 daddr = skb->nh.iph->daddr; | ||
654 | struct dst_entry *dst = NULL; | ||
655 | |||
656 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | ||
657 | if (((struct rtable *)skb->dst)->rt_flags & | ||
658 | (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
659 | goto drop; | ||
660 | |||
661 | /* | ||
662 | * TW buckets are converted to open requests without | ||
663 | * limitations, they conserve resources and peer is | ||
664 | * evidently real one. | ||
665 | */ | ||
666 | if (inet_csk_reqsk_queue_is_full(sk)) | ||
667 | goto drop; | ||
668 | |||
669 | /* | ||
670 | * Accept backlog is full. If we have already queued enough | ||
671 | * of warm entries in syn queue, drop request. It is better than | ||
672 | * clogging syn queue with openreqs with exponentially increasing | ||
673 | * timeout. | ||
674 | */ | ||
675 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | ||
676 | goto drop; | ||
677 | |||
678 | req = reqsk_alloc(sk->sk_prot->rsk_prot); | ||
679 | if (req == NULL) | ||
680 | goto drop; | ||
681 | |||
682 | /* FIXME: process options */ | ||
683 | |||
684 | dccp_openreq_init(req, &dp, skb); | ||
685 | |||
686 | ireq = inet_rsk(req); | ||
687 | ireq->loc_addr = daddr; | ||
688 | ireq->rmt_addr = saddr; | ||
689 | /* FIXME: Merge Aristeu's option parsing code when ready */ | ||
690 | req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ | ||
691 | ireq->opt = NULL; | ||
692 | |||
693 | /* | ||
694 | * Step 3: Process LISTEN state | ||
695 | * | ||
696 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
697 | * | ||
698 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | ||
699 | * dccp_create_openreq_child. | ||
700 | */ | ||
701 | dreq = dccp_rsk(req); | ||
702 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
703 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | ||
704 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | ||
705 | |||
706 | if (dccp_v4_send_response(sk, req, dst)) | ||
707 | goto drop_and_free; | ||
708 | |||
709 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | ||
710 | return 0; | ||
711 | |||
712 | drop_and_free: | ||
713 | /* | ||
714 | * FIXME: should be reqsk_free after implementing req->rsk_ops | ||
715 | */ | ||
716 | __reqsk_free(req); | ||
717 | drop: | ||
718 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
719 | return -1; | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * The three way handshake has completed - we got a valid ACK or DATAACK - | ||
724 | * now create the new socket. | ||
725 | * | ||
726 | * This is the equivalent of TCP's tcp_v4_syn_recv_sock | ||
727 | */ | ||
728 | struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | ||
729 | struct request_sock *req, | ||
730 | struct dst_entry *dst) | ||
731 | { | ||
732 | struct inet_request_sock *ireq; | ||
733 | struct inet_sock *newinet; | ||
734 | struct dccp_sock *newdp; | ||
735 | struct sock *newsk; | ||
736 | |||
737 | if (sk_acceptq_is_full(sk)) | ||
738 | goto exit_overflow; | ||
739 | |||
740 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
741 | goto exit; | ||
742 | |||
743 | newsk = dccp_create_openreq_child(sk, req, skb); | ||
744 | if (newsk == NULL) | ||
745 | goto exit; | ||
746 | |||
747 | sk_setup_caps(newsk, dst); | ||
748 | |||
749 | newdp = dccp_sk(newsk); | ||
750 | newinet = inet_sk(newsk); | ||
751 | ireq = inet_rsk(req); | ||
752 | newinet->daddr = ireq->rmt_addr; | ||
753 | newinet->rcv_saddr = ireq->loc_addr; | ||
754 | newinet->saddr = ireq->loc_addr; | ||
755 | newinet->opt = ireq->opt; | ||
756 | ireq->opt = NULL; | ||
757 | newinet->mc_index = inet_iif(skb); | ||
758 | newinet->mc_ttl = skb->nh.iph->ttl; | ||
759 | newinet->id = jiffies; | ||
760 | |||
761 | dccp_sync_mss(newsk, dst_mtu(dst)); | ||
762 | |||
763 | __inet_hash(&dccp_hashinfo, newsk, 0); | ||
764 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | ||
765 | |||
766 | return newsk; | ||
767 | |||
768 | exit_overflow: | ||
769 | NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); | ||
770 | exit: | ||
771 | NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); | ||
772 | dst_release(dst); | ||
773 | return NULL; | ||
774 | } | ||
775 | |||
776 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | ||
777 | { | ||
778 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
779 | const struct iphdr *iph = skb->nh.iph; | ||
780 | struct sock *nsk; | ||
781 | struct request_sock **prev; | ||
782 | /* Find possible connection requests. */ | ||
783 | struct request_sock *req = inet_csk_search_req(sk, &prev, | ||
784 | dh->dccph_sport, | ||
785 | iph->saddr, iph->daddr); | ||
786 | if (req != NULL) | ||
787 | return dccp_check_req(sk, skb, req, prev); | ||
788 | |||
789 | nsk = __inet_lookup_established(&dccp_hashinfo, | ||
790 | iph->saddr, dh->dccph_sport, | ||
791 | iph->daddr, ntohs(dh->dccph_dport), | ||
792 | inet_iif(skb)); | ||
793 | if (nsk != NULL) { | ||
794 | if (nsk->sk_state != DCCP_TIME_WAIT) { | ||
795 | bh_lock_sock(nsk); | ||
796 | return nsk; | ||
797 | } | ||
798 | inet_twsk_put((struct inet_timewait_sock *)nsk); | ||
799 | return NULL; | ||
800 | } | ||
801 | |||
802 | return sk; | ||
803 | } | ||
804 | |||
805 | int dccp_v4_checksum(struct sk_buff *skb) | ||
806 | { | ||
807 | struct dccp_hdr* dh = dccp_hdr(skb); | ||
808 | int checksum_len; | ||
809 | u32 tmp; | ||
810 | |||
811 | if (dh->dccph_cscov == 0) | ||
812 | checksum_len = skb->len; | ||
813 | else { | ||
814 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
815 | checksum_len = checksum_len < skb->len ? checksum_len : skb->len; | ||
816 | } | ||
817 | |||
818 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
819 | return csum_fold(tmp); | ||
820 | } | ||
821 | |||
822 | static int dccp_v4_verify_checksum(struct sk_buff *skb) | ||
823 | { | ||
824 | struct dccp_hdr *th = dccp_hdr(skb); | ||
825 | const u16 remote_checksum = th->dccph_checksum; | ||
826 | u16 local_checksum; | ||
827 | |||
828 | /* FIXME: don't mess with skb payload */ | ||
829 | th->dccph_checksum = 0; /* zero it for computation */ | ||
830 | |||
831 | local_checksum = dccp_v4_checksum(skb); | ||
832 | |||
833 | /* FIXME: don't mess with skb payload */ | ||
834 | th->dccph_checksum = remote_checksum; /* put it back */ | ||
835 | |||
836 | return remote_checksum == local_checksum ? 0 : -1; | ||
837 | } | ||
838 | |||
839 | static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | ||
840 | struct sk_buff *skb) | ||
841 | { | ||
842 | struct rtable *rt; | ||
843 | struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, | ||
844 | .nl_u = { .ip4_u = | ||
845 | { .daddr = skb->nh.iph->saddr, | ||
846 | .saddr = skb->nh.iph->daddr, | ||
847 | .tos = RT_CONN_FLAGS(sk) } }, | ||
848 | .proto = sk->sk_protocol, | ||
849 | .uli_u = { .ports = | ||
850 | { .sport = dccp_hdr(skb)->dccph_dport, | ||
851 | .dport = dccp_hdr(skb)->dccph_sport } } }; | ||
852 | |||
853 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
854 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
855 | return NULL; | ||
856 | } | ||
857 | |||
858 | return &rt->u.dst; | ||
859 | } | ||
860 | |||
861 | void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) | ||
862 | { | ||
863 | int err; | ||
864 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
865 | const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + | ||
866 | sizeof(struct dccp_hdr_ext) + | ||
867 | sizeof(struct dccp_hdr_reset); | ||
868 | struct sk_buff *skb; | ||
869 | struct dst_entry *dst; | ||
870 | |||
871 | /* Never send a reset in response to a reset. */ | ||
872 | if (rxdh->dccph_type == DCCP_PKT_RESET) | ||
873 | return; | ||
874 | |||
875 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
876 | return; | ||
877 | |||
878 | dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); | ||
879 | if (dst == NULL) | ||
880 | return; | ||
881 | |||
882 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
883 | if (skb == NULL) | ||
884 | goto out; | ||
885 | |||
886 | /* Reserve space for headers. */ | ||
887 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
888 | skb->dst = dst_clone(dst); | ||
889 | |||
890 | skb->h.raw = skb_push(skb, dccp_hdr_reset_len); | ||
891 | dh = dccp_hdr(skb); | ||
892 | memset(dh, 0, dccp_hdr_reset_len); | ||
893 | |||
894 | /* Build DCCP header and checksum it. */ | ||
895 | dh->dccph_type = DCCP_PKT_RESET; | ||
896 | dh->dccph_sport = rxdh->dccph_dport; | ||
897 | dh->dccph_dport = rxdh->dccph_sport; | ||
898 | dh->dccph_doff = dccp_hdr_reset_len / 4; | ||
899 | dh->dccph_x = 1; | ||
900 | dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; | ||
901 | |||
902 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
903 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
904 | |||
905 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
906 | |||
907 | bh_lock_sock(dccp_ctl_socket->sk); | ||
908 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
909 | rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); | ||
910 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
911 | |||
912 | if (err == NET_XMIT_CN || err == 0) { | ||
913 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
914 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
915 | } | ||
916 | out: | ||
917 | dst_release(dst); | ||
918 | } | ||
919 | |||
920 | int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
921 | { | ||
922 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
923 | |||
924 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ | ||
925 | if (dccp_rcv_established(sk, skb, dh, skb->len)) | ||
926 | goto reset; | ||
927 | return 0; | ||
928 | } | ||
929 | |||
930 | /* | ||
931 | * Step 3: Process LISTEN state | ||
932 | * If S.state == LISTEN, | ||
933 | * If P.type == Request or P contains a valid Init Cookie option, | ||
934 | * * Must scan the packet's options to check for an Init | ||
935 | * Cookie. Only the Init Cookie is processed here, | ||
936 | * however; other options are processed in Step 8. This | ||
937 | * scan need only be performed if the endpoint uses Init | ||
938 | * Cookies * | ||
939 | * * Generate a new socket and switch to that socket * | ||
940 | * Set S := new socket for this port pair | ||
941 | * S.state = RESPOND | ||
942 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
943 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
944 | * Continue with S.state == RESPOND | ||
945 | * * A Response packet will be generated in Step 11 * | ||
946 | * Otherwise, | ||
947 | * Generate Reset(No Connection) unless P.type == Reset | ||
948 | * Drop packet and return | ||
949 | * | ||
950 | * NOTE: the check for the packet types is done in dccp_rcv_state_process | ||
951 | */ | ||
952 | if (sk->sk_state == DCCP_LISTEN) { | ||
953 | struct sock *nsk = dccp_v4_hnd_req(sk, skb); | ||
954 | |||
955 | if (nsk == NULL) | ||
956 | goto discard; | ||
957 | |||
958 | if (nsk != sk) { | ||
959 | if (dccp_child_process(sk, nsk, skb)) | ||
960 | goto reset; | ||
961 | return 0; | ||
962 | } | ||
963 | } | ||
964 | |||
965 | if (dccp_rcv_state_process(sk, skb, dh, skb->len)) | ||
966 | goto reset; | ||
967 | return 0; | ||
968 | |||
969 | reset: | ||
970 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
971 | dccp_v4_ctl_send_reset(skb); | ||
972 | discard: | ||
973 | kfree_skb(skb); | ||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | static inline int dccp_invalid_packet(struct sk_buff *skb) | ||
978 | { | ||
979 | const struct dccp_hdr *dh; | ||
980 | |||
981 | if (skb->pkt_type != PACKET_HOST) | ||
982 | return 1; | ||
983 | |||
984 | if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { | ||
985 | dccp_pr_debug("pskb_may_pull failed\n"); | ||
986 | return 1; | ||
987 | } | ||
988 | |||
989 | dh = dccp_hdr(skb); | ||
990 | |||
991 | /* If the packet type is not understood, drop packet and return */ | ||
992 | if (dh->dccph_type >= DCCP_PKT_INVALID) { | ||
993 | dccp_pr_debug("invalid packet type\n"); | ||
994 | return 1; | ||
995 | } | ||
996 | |||
997 | /* | ||
998 | * If P.Data Offset is too small for packet type, or too large for | ||
999 | * packet, drop packet and return | ||
1000 | */ | ||
1001 | if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { | ||
1002 | dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); | ||
1003 | return 1; | ||
1004 | } | ||
1005 | |||
1006 | if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { | ||
1007 | dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); | ||
1008 | return 1; | ||
1009 | } | ||
1010 | |||
1011 | dh = dccp_hdr(skb); | ||
1012 | |||
1013 | /* | ||
1014 | * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet | ||
1015 | * has short sequence numbers), drop packet and return | ||
1016 | */ | ||
1017 | if (dh->dccph_x == 0 && | ||
1018 | dh->dccph_type != DCCP_PKT_DATA && | ||
1019 | dh->dccph_type != DCCP_PKT_ACK && | ||
1020 | dh->dccph_type != DCCP_PKT_DATAACK) { | ||
1021 | dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", | ||
1022 | dccp_packet_name(dh->dccph_type)); | ||
1023 | return 1; | ||
1024 | } | ||
1025 | |||
1026 | /* If the header checksum is incorrect, drop packet and return */ | ||
1027 | if (dccp_v4_verify_checksum(skb) < 0) { | ||
1028 | dccp_pr_debug("header checksum is incorrect\n"); | ||
1029 | return 1; | ||
1030 | } | ||
1031 | |||
1032 | return 0; | ||
1033 | } | ||
1034 | |||
1035 | /* this is called when real data arrives */ | ||
1036 | int dccp_v4_rcv(struct sk_buff *skb) | ||
1037 | { | ||
1038 | const struct dccp_hdr *dh; | ||
1039 | struct sock *sk; | ||
1040 | int rc; | ||
1041 | |||
1042 | /* Step 1: Check header basics: */ | ||
1043 | |||
1044 | if (dccp_invalid_packet(skb)) | ||
1045 | goto discard_it; | ||
1046 | |||
1047 | dh = dccp_hdr(skb); | ||
1048 | #if 0 | ||
1049 | /* | ||
1050 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
1051 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
1052 | * sends 10 DATA/DATAACK packets: | ||
1053 | * | ||
1054 | * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
1055 | * | ||
1056 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
1057 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state | ||
1058 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
1059 | * | ||
1060 | * So... | ||
1061 | * | ||
1062 | * 281473596467422 was received | ||
1063 | * 281473596467421 was not received | ||
1064 | * 281473596467420 was received | ||
1065 | * 281473596467419 was not received | ||
1066 | * 281473596467418 was received | ||
1067 | * 281473596467417 was not received | ||
1068 | * 281473596467416 was received | ||
1069 | * 281473596467415 was not received | ||
1070 | * 281473596467414 was received | ||
1071 | * 281473596467413 was received (this one was the 3way handshake RESPONSE) | ||
1072 | * | ||
1073 | */ | ||
1074 | if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { | ||
1075 | static int discard = 0; | ||
1076 | |||
1077 | if (discard) { | ||
1078 | discard = 0; | ||
1079 | goto discard_it; | ||
1080 | } | ||
1081 | discard = 1; | ||
1082 | } | ||
1083 | #endif | ||
1084 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | ||
1085 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | ||
1086 | |||
1087 | dccp_pr_debug("%8.8s " | ||
1088 | "src=%u.%u.%u.%u@%-5d " | ||
1089 | "dst=%u.%u.%u.%u@%-5d seq=%llu", | ||
1090 | dccp_packet_name(dh->dccph_type), | ||
1091 | NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), | ||
1092 | NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), | ||
1093 | DCCP_SKB_CB(skb)->dccpd_seq); | ||
1094 | |||
1095 | if (dccp_packet_without_ack(skb)) { | ||
1096 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; | ||
1097 | dccp_pr_debug_cat("\n"); | ||
1098 | } else { | ||
1099 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); | ||
1100 | dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
1101 | } | ||
1102 | |||
1103 | /* Step 2: | ||
1104 | * Look up flow ID in table and get corresponding socket */ | ||
1105 | sk = __inet_lookup(&dccp_hashinfo, | ||
1106 | skb->nh.iph->saddr, dh->dccph_sport, | ||
1107 | skb->nh.iph->daddr, ntohs(dh->dccph_dport), | ||
1108 | inet_iif(skb)); | ||
1109 | |||
1110 | /* | ||
1111 | * Step 2: | ||
1112 | * If no socket ... | ||
1113 | * Generate Reset(No Connection) unless P.type == Reset | ||
1114 | * Drop packet and return | ||
1115 | */ | ||
1116 | if (sk == NULL) { | ||
1117 | dccp_pr_debug("failed to look up flow ID in table and " | ||
1118 | "get corresponding socket\n"); | ||
1119 | goto no_dccp_socket; | ||
1120 | } | ||
1121 | |||
1122 | /* | ||
1123 | * Step 2: | ||
1124 | * ... or S.state == TIMEWAIT, | ||
1125 | * Generate Reset(No Connection) unless P.type == Reset | ||
1126 | * Drop packet and return | ||
1127 | */ | ||
1128 | |||
1129 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
1130 | dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); | ||
1131 | goto discard_and_relse; | ||
1132 | } | ||
1133 | |||
1134 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | ||
1135 | dccp_pr_debug("xfrm4_policy_check failed\n"); | ||
1136 | goto discard_and_relse; | ||
1137 | } | ||
1138 | |||
1139 | if (sk_filter(sk, skb, 0)) { | ||
1140 | dccp_pr_debug("sk_filter failed\n"); | ||
1141 | goto discard_and_relse; | ||
1142 | } | ||
1143 | |||
1144 | skb->dev = NULL; | ||
1145 | |||
1146 | bh_lock_sock(sk); | ||
1147 | rc = 0; | ||
1148 | if (!sock_owned_by_user(sk)) | ||
1149 | rc = dccp_v4_do_rcv(sk, skb); | ||
1150 | else | ||
1151 | sk_add_backlog(sk, skb); | ||
1152 | bh_unlock_sock(sk); | ||
1153 | |||
1154 | sock_put(sk); | ||
1155 | return rc; | ||
1156 | |||
1157 | no_dccp_socket: | ||
1158 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1159 | goto discard_it; | ||
1160 | /* | ||
1161 | * Step 2: | ||
1162 | * Generate Reset(No Connection) unless P.type == Reset | ||
1163 | * Drop packet and return | ||
1164 | */ | ||
1165 | if (dh->dccph_type != DCCP_PKT_RESET) { | ||
1166 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
1167 | dccp_v4_ctl_send_reset(skb); | ||
1168 | } | ||
1169 | |||
1170 | discard_it: | ||
1171 | /* Discard frame. */ | ||
1172 | kfree_skb(skb); | ||
1173 | return 0; | ||
1174 | |||
1175 | discard_and_relse: | ||
1176 | sock_put(sk); | ||
1177 | goto discard_it; | ||
1178 | } | ||
1179 | |||
1180 | static int dccp_v4_init_sock(struct sock *sk) | ||
1181 | { | ||
1182 | struct dccp_sock *dp = dccp_sk(sk); | ||
1183 | static int dccp_ctl_socket_init = 1; | ||
1184 | |||
1185 | dccp_options_init(&dp->dccps_options); | ||
1186 | |||
1187 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
1188 | dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
1189 | GFP_KERNEL); | ||
1190 | |||
1191 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
1192 | return -ENOMEM; | ||
1193 | } | ||
1194 | |||
1195 | /* | ||
1196 | * FIXME: We're hardcoding the CCID, and doing this at this point makes | ||
1197 | * the listening (master) sock get CCID control blocks, which is not | ||
1198 | * necessary, but for now, to not mess with the test userspace apps, | ||
1199 | * lets leave it here, later the real solution is to do this in a | ||
1200 | * setsockopt(CCIDs-I-want/accept). -acme | ||
1201 | */ | ||
1202 | if (likely(!dccp_ctl_socket_init)) { | ||
1203 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); | ||
1204 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); | ||
1205 | if (dp->dccps_hc_rx_ccid == NULL || | ||
1206 | dp->dccps_hc_tx_ccid == NULL) { | ||
1207 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1208 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1209 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1210 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1211 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1212 | return -ENOMEM; | ||
1213 | } | ||
1214 | } else | ||
1215 | dccp_ctl_socket_init = 0; | ||
1216 | |||
1217 | dccp_init_xmit_timers(sk); | ||
1218 | sk->sk_state = DCCP_CLOSED; | ||
1219 | dp->dccps_mss_cache = 536; | ||
1220 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||
1224 | |||
1225 | int dccp_v4_destroy_sock(struct sock *sk) | ||
1226 | { | ||
1227 | struct dccp_sock *dp = dccp_sk(sk); | ||
1228 | |||
1229 | /* | ||
1230 | * DCCP doesn't use sk_qrite_queue, just sk_send_head | ||
1231 | * for retransmissions | ||
1232 | */ | ||
1233 | if (sk->sk_send_head != NULL) { | ||
1234 | kfree_skb(sk->sk_send_head); | ||
1235 | sk->sk_send_head = NULL; | ||
1236 | } | ||
1237 | |||
1238 | /* Clean up a referenced DCCP bind bucket. */ | ||
1239 | if (inet_csk(sk)->icsk_bind_hash != NULL) | ||
1240 | inet_put_port(&dccp_hashinfo, sk); | ||
1241 | |||
1242 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1243 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1244 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1245 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1246 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1247 | |||
1248 | return 0; | ||
1249 | } | ||
1250 | |||
1251 | static void dccp_v4_reqsk_destructor(struct request_sock *req) | ||
1252 | { | ||
1253 | kfree(inet_rsk(req)->opt); | ||
1254 | } | ||
1255 | |||
1256 | static struct request_sock_ops dccp_request_sock_ops = { | ||
1257 | .family = PF_INET, | ||
1258 | .obj_size = sizeof(struct dccp_request_sock), | ||
1259 | .rtx_syn_ack = dccp_v4_send_response, | ||
1260 | .send_ack = dccp_v4_reqsk_send_ack, | ||
1261 | .destructor = dccp_v4_reqsk_destructor, | ||
1262 | .send_reset = dccp_v4_ctl_send_reset, | ||
1263 | }; | ||
1264 | |||
1265 | struct proto dccp_v4_prot = { | ||
1266 | .name = "DCCP", | ||
1267 | .owner = THIS_MODULE, | ||
1268 | .close = dccp_close, | ||
1269 | .connect = dccp_v4_connect, | ||
1270 | .disconnect = dccp_disconnect, | ||
1271 | .ioctl = dccp_ioctl, | ||
1272 | .init = dccp_v4_init_sock, | ||
1273 | .setsockopt = dccp_setsockopt, | ||
1274 | .getsockopt = dccp_getsockopt, | ||
1275 | .sendmsg = dccp_sendmsg, | ||
1276 | .recvmsg = dccp_recvmsg, | ||
1277 | .backlog_rcv = dccp_v4_do_rcv, | ||
1278 | .hash = dccp_v4_hash, | ||
1279 | .unhash = dccp_v4_unhash, | ||
1280 | .accept = inet_csk_accept, | ||
1281 | .get_port = dccp_v4_get_port, | ||
1282 | .shutdown = dccp_shutdown, | ||
1283 | .destroy = dccp_v4_destroy_sock, | ||
1284 | .orphan_count = &dccp_orphan_count, | ||
1285 | .max_header = MAX_DCCP_HEADER, | ||
1286 | .obj_size = sizeof(struct dccp_sock), | ||
1287 | .rsk_prot = &dccp_request_sock_ops, | ||
1288 | .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ | ||
1289 | }; | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..810f0c293b85 --- /dev/null +++ b/net/dccp/minisocks.c | |||
@@ -0,0 +1,199 @@ | |||
1 | /* | ||
2 | * net/dccp/minisocks.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/timer.h> | ||
17 | |||
18 | #include <net/sock.h> | ||
19 | #include <net/xfrm.h> | ||
20 | #include <net/inet_timewait_sock.h> | ||
21 | |||
22 | #include "ccid.h" | ||
23 | #include "dccp.h" | ||
24 | |||
25 | void dccp_time_wait(struct sock *sk, int state, int timeo) | ||
26 | { | ||
27 | /* FIXME: Implement */ | ||
28 | dccp_pr_debug("Want to help? Start here\n"); | ||
29 | dccp_set_state(sk, state); | ||
30 | } | ||
31 | |||
32 | /* This is for handling early-kills of TIME_WAIT sockets. */ | ||
33 | void dccp_tw_deschedule(struct inet_timewait_sock *tw) | ||
34 | { | ||
35 | dccp_pr_debug("Want to help? Start here\n"); | ||
36 | __inet_twsk_kill(tw, &dccp_hashinfo); | ||
37 | } | ||
38 | |||
39 | struct sock *dccp_create_openreq_child(struct sock *sk, | ||
40 | const struct request_sock *req, | ||
41 | const struct sk_buff *skb) | ||
42 | { | ||
43 | /* | ||
44 | * Step 3: Process LISTEN state | ||
45 | * | ||
46 | * // Generate a new socket and switch to that socket | ||
47 | * Set S := new socket for this port pair | ||
48 | */ | ||
49 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); | ||
50 | |||
51 | if (newsk != NULL) { | ||
52 | const struct dccp_request_sock *dreq = dccp_rsk(req); | ||
53 | struct inet_connection_sock *newicsk = inet_csk(sk); | ||
54 | struct dccp_sock *newdp = dccp_sk(newsk); | ||
55 | |||
56 | newdp->dccps_hc_rx_ackpkts = NULL; | ||
57 | newdp->dccps_role = DCCP_ROLE_SERVER; | ||
58 | newicsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
59 | |||
60 | if (newdp->dccps_options.dccpo_send_ack_vector) { | ||
61 | newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
62 | GFP_ATOMIC); | ||
63 | /* | ||
64 | * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone | ||
65 | * copied the master sock and left the CCID pointers for this child, | ||
66 | * that is why we do the __ccid_get calls. | ||
67 | */ | ||
68 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | ||
69 | goto out_free; | ||
70 | } | ||
71 | |||
72 | if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || | ||
73 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { | ||
74 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | ||
75 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | ||
76 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | ||
77 | out_free: | ||
78 | /* It is still raw copy of parent, so invalidate | ||
79 | * destructor and make plain sk_free() */ | ||
80 | newsk->sk_destruct = NULL; | ||
81 | sk_free(newsk); | ||
82 | return NULL; | ||
83 | } | ||
84 | |||
85 | __ccid_get(newdp->dccps_hc_rx_ccid); | ||
86 | __ccid_get(newdp->dccps_hc_tx_ccid); | ||
87 | |||
88 | /* | ||
89 | * Step 3: Process LISTEN state | ||
90 | * | ||
91 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
92 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
93 | */ | ||
94 | |||
95 | /* See dccp_v4_conn_request */ | ||
96 | newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; | ||
97 | |||
98 | newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; | ||
99 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
100 | |||
101 | newdp->dccps_iss = dreq->dreq_iss; | ||
102 | dccp_update_gss(newsk, dreq->dreq_iss); | ||
103 | |||
104 | dccp_init_xmit_timers(newsk); | ||
105 | |||
106 | DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); | ||
107 | } | ||
108 | return newsk; | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Process an incoming packet for RESPOND sockets represented | ||
113 | * as an request_sock. | ||
114 | */ | ||
115 | struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
116 | struct request_sock *req, | ||
117 | struct request_sock **prev) | ||
118 | { | ||
119 | struct sock *child = NULL; | ||
120 | |||
121 | /* Check for retransmitted REQUEST */ | ||
122 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | ||
123 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { | ||
124 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
125 | |||
126 | dccp_pr_debug("Retransmitted REQUEST\n"); | ||
127 | /* Send another RESPONSE packet */ | ||
128 | dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); | ||
129 | dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); | ||
130 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | ||
131 | } | ||
132 | /* Network Duplicate, discard packet */ | ||
133 | return NULL; | ||
134 | } | ||
135 | |||
136 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; | ||
137 | |||
138 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && | ||
139 | dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) | ||
140 | goto drop; | ||
141 | |||
142 | /* Invalid ACK */ | ||
143 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { | ||
144 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", | ||
145 | DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); | ||
146 | goto drop; | ||
147 | } | ||
148 | |||
149 | child = dccp_v4_request_recv_sock(sk, skb, req, NULL); | ||
150 | if (child == NULL) | ||
151 | goto listen_overflow; | ||
152 | |||
153 | /* FIXME: deal with options */ | ||
154 | |||
155 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
156 | inet_csk_reqsk_queue_removed(sk, req); | ||
157 | inet_csk_reqsk_queue_add(sk, req, child); | ||
158 | out: | ||
159 | return child; | ||
160 | listen_overflow: | ||
161 | dccp_pr_debug("listen_overflow!\n"); | ||
162 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
163 | drop: | ||
164 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) | ||
165 | req->rsk_ops->send_reset(skb); | ||
166 | |||
167 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
168 | goto out; | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Queue segment on the new socket if the new socket is active, | ||
173 | * otherwise we just shortcircuit this and continue with | ||
174 | * the new socket. | ||
175 | */ | ||
176 | int dccp_child_process(struct sock *parent, struct sock *child, | ||
177 | struct sk_buff *skb) | ||
178 | { | ||
179 | int ret = 0; | ||
180 | const int state = child->sk_state; | ||
181 | |||
182 | if (!sock_owned_by_user(child)) { | ||
183 | ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); | ||
184 | |||
185 | /* Wakeup parent, send SIGIO */ | ||
186 | if (state == DCCP_RESPOND && child->sk_state != state) | ||
187 | parent->sk_data_ready(parent, 0); | ||
188 | } else { | ||
189 | /* Alas, it is possible again, because we do lookup | ||
190 | * in main socket hash table and lock on listening | ||
191 | * socket does not protect us more. | ||
192 | */ | ||
193 | sk_add_backlog(child, skb); | ||
194 | } | ||
195 | |||
196 | bh_unlock_sock(child); | ||
197 | sock_put(child); | ||
198 | return ret; | ||
199 | } | ||
diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..e1867767946c --- /dev/null +++ b/net/dccp/options.c | |||
@@ -0,0 +1,763 @@ | |||
1 | /* | ||
2 | * net/dccp/options.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> | ||
6 | * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | |||
20 | #include "ccid.h" | ||
21 | #include "dccp.h" | ||
22 | |||
23 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
24 | struct sock *sk, | ||
25 | const u64 ackno, | ||
26 | const unsigned char len, | ||
27 | const unsigned char *vector); | ||
28 | |||
29 | /* stores the default values for new connection. may be changed with sysctl */ | ||
30 | static const struct dccp_options dccpo_default_values = { | ||
31 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | ||
32 | .dccpo_ccid = DCCPF_INITIAL_CCID, | ||
33 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | ||
34 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | ||
35 | }; | ||
36 | |||
37 | void dccp_options_init(struct dccp_options *dccpo) | ||
38 | { | ||
39 | memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); | ||
40 | } | ||
41 | |||
42 | static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) | ||
43 | { | ||
44 | u32 value = 0; | ||
45 | |||
46 | if (len > 3) | ||
47 | value += *bf++ << 24; | ||
48 | if (len > 2) | ||
49 | value += *bf++ << 16; | ||
50 | if (len > 1) | ||
51 | value += *bf++ << 8; | ||
52 | if (len > 0) | ||
53 | value += *bf; | ||
54 | |||
55 | return value; | ||
56 | } | ||
57 | |||
58 | int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | ||
59 | { | ||
60 | struct dccp_sock *dp = dccp_sk(sk); | ||
61 | #ifdef DCCP_DEBUG | ||
62 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : | ||
63 | "server rx opt: "; | ||
64 | #endif | ||
65 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
66 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | ||
67 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
68 | unsigned char *opt_ptr = options; | ||
69 | const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); | ||
70 | struct dccp_options_received *opt_recv = &dp->dccps_options_received; | ||
71 | unsigned char opt, len; | ||
72 | unsigned char *value; | ||
73 | |||
74 | memset(opt_recv, 0, sizeof(*opt_recv)); | ||
75 | |||
76 | while (opt_ptr != opt_end) { | ||
77 | opt = *opt_ptr++; | ||
78 | len = 0; | ||
79 | value = NULL; | ||
80 | |||
81 | /* Check if this isn't a single byte option */ | ||
82 | if (opt > DCCPO_MAX_RESERVED) { | ||
83 | if (opt_ptr == opt_end) | ||
84 | goto out_invalid_option; | ||
85 | |||
86 | len = *opt_ptr++; | ||
87 | if (len < 3) | ||
88 | goto out_invalid_option; | ||
89 | /* | ||
90 | * Remove the type and len fields, leaving | ||
91 | * just the value size | ||
92 | */ | ||
93 | len -= 2; | ||
94 | value = opt_ptr; | ||
95 | opt_ptr += len; | ||
96 | |||
97 | if (opt_ptr > opt_end) | ||
98 | goto out_invalid_option; | ||
99 | } | ||
100 | |||
101 | switch (opt) { | ||
102 | case DCCPO_PADDING: | ||
103 | break; | ||
104 | case DCCPO_NDP_COUNT: | ||
105 | if (len > 3) | ||
106 | goto out_invalid_option; | ||
107 | |||
108 | opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); | ||
109 | dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); | ||
110 | break; | ||
111 | case DCCPO_ACK_VECTOR_0: | ||
112 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | ||
113 | goto out_invalid_option; | ||
114 | |||
115 | if (pkt_type == DCCP_PKT_DATA) | ||
116 | continue; | ||
117 | |||
118 | opt_recv->dccpor_ack_vector_len = len; | ||
119 | opt_recv->dccpor_ack_vector_idx = value - options; | ||
120 | |||
121 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
122 | debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
123 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
124 | value, len); | ||
125 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, | ||
126 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
127 | len, value); | ||
128 | break; | ||
129 | case DCCPO_TIMESTAMP: | ||
130 | if (len != 4) | ||
131 | goto out_invalid_option; | ||
132 | |||
133 | opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); | ||
134 | |||
135 | dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; | ||
136 | dp->dccps_timestamp_time = jiffies; | ||
137 | |||
138 | dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", | ||
139 | debug_prefix, opt_recv->dccpor_timestamp, | ||
140 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
141 | break; | ||
142 | case DCCPO_TIMESTAMP_ECHO: | ||
143 | if (len < 4 || len > 8) | ||
144 | goto out_invalid_option; | ||
145 | |||
146 | opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); | ||
147 | |||
148 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", | ||
149 | debug_prefix, opt_recv->dccpor_timestamp_echo, | ||
150 | len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
151 | tcp_time_stamp - opt_recv->dccpor_timestamp_echo); | ||
152 | |||
153 | opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); | ||
154 | dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, | ||
155 | opt_recv->dccpor_elapsed_time); | ||
156 | break; | ||
157 | case DCCPO_ELAPSED_TIME: | ||
158 | if (len > 4) | ||
159 | goto out_invalid_option; | ||
160 | |||
161 | if (pkt_type == DCCP_PKT_DATA) | ||
162 | continue; | ||
163 | opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); | ||
164 | dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, | ||
165 | opt_recv->dccpor_elapsed_time); | ||
166 | break; | ||
167 | /* | ||
168 | * From draft-ietf-dccp-spec-11.txt: | ||
169 | * | ||
170 | * Option numbers 128 through 191 are for options sent from the HC- | ||
171 | * Sender to the HC-Receiver; option numbers 192 through 255 are for | ||
172 | * options sent from the HC-Receiver to the HC-Sender. | ||
173 | */ | ||
174 | case 128 ... 191: { | ||
175 | const u16 idx = value - options; | ||
176 | |||
177 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) | ||
178 | goto out_invalid_option; | ||
179 | } | ||
180 | break; | ||
181 | case 192 ... 255: { | ||
182 | const u16 idx = value - options; | ||
183 | |||
184 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) | ||
185 | goto out_invalid_option; | ||
186 | } | ||
187 | break; | ||
188 | default: | ||
189 | pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", | ||
190 | sk, opt, len); | ||
191 | break; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | return 0; | ||
196 | |||
197 | out_invalid_option: | ||
198 | DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); | ||
199 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; | ||
200 | pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); | ||
201 | return -1; | ||
202 | } | ||
203 | |||
204 | static void dccp_encode_value_var(const u32 value, unsigned char *to, | ||
205 | const unsigned int len) | ||
206 | { | ||
207 | if (len > 3) | ||
208 | *to++ = (value & 0xFF000000) >> 24; | ||
209 | if (len > 2) | ||
210 | *to++ = (value & 0xFF0000) >> 16; | ||
211 | if (len > 1) | ||
212 | *to++ = (value & 0xFF00) >> 8; | ||
213 | if (len > 0) | ||
214 | *to++ = (value & 0xFF); | ||
215 | } | ||
216 | |||
217 | static inline int dccp_ndp_len(const int ndp) | ||
218 | { | ||
219 | return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; | ||
220 | } | ||
221 | |||
222 | void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
223 | const unsigned char option, | ||
224 | const void *value, const unsigned char len) | ||
225 | { | ||
226 | unsigned char *to; | ||
227 | |||
228 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { | ||
229 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); | ||
230 | return; | ||
231 | } | ||
232 | |||
233 | DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; | ||
234 | |||
235 | to = skb_push(skb, len + 2); | ||
236 | *to++ = option; | ||
237 | *to++ = len + 2; | ||
238 | |||
239 | memcpy(to, value, len); | ||
240 | } | ||
241 | |||
242 | EXPORT_SYMBOL_GPL(dccp_insert_option); | ||
243 | |||
244 | static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) | ||
245 | { | ||
246 | struct dccp_sock *dp = dccp_sk(sk); | ||
247 | int ndp = dp->dccps_ndp_count; | ||
248 | |||
249 | if (dccp_non_data_packet(skb)) | ||
250 | ++dp->dccps_ndp_count; | ||
251 | else | ||
252 | dp->dccps_ndp_count = 0; | ||
253 | |||
254 | if (ndp > 0) { | ||
255 | unsigned char *ptr; | ||
256 | const int ndp_len = dccp_ndp_len(ndp); | ||
257 | const int len = ndp_len + 2; | ||
258 | |||
259 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
260 | return; | ||
261 | |||
262 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
263 | |||
264 | ptr = skb_push(skb, len); | ||
265 | *ptr++ = DCCPO_NDP_COUNT; | ||
266 | *ptr++ = len; | ||
267 | dccp_encode_value_var(ndp, ptr, ndp_len); | ||
268 | } | ||
269 | } | ||
270 | |||
271 | static inline int dccp_elapsed_time_len(const u32 elapsed_time) | ||
272 | { | ||
273 | return elapsed_time == 0 ? 0 : | ||
274 | elapsed_time <= 0xFF ? 1 : | ||
275 | elapsed_time <= 0xFFFF ? 2 : | ||
276 | elapsed_time <= 0xFFFFFF ? 3 : 4; | ||
277 | } | ||
278 | |||
279 | void dccp_insert_option_elapsed_time(struct sock *sk, | ||
280 | struct sk_buff *skb, | ||
281 | u32 elapsed_time) | ||
282 | { | ||
283 | #ifdef DCCP_DEBUG | ||
284 | struct dccp_sock *dp = dccp_sk(sk); | ||
285 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
286 | "server TX opt: "; | ||
287 | #endif | ||
288 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
289 | const int len = 2 + elapsed_time_len; | ||
290 | unsigned char *to; | ||
291 | |||
292 | /* If elapsed_time == 0... */ | ||
293 | if (elapsed_time_len == 2) | ||
294 | return; | ||
295 | |||
296 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
297 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); | ||
298 | return; | ||
299 | } | ||
300 | |||
301 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
302 | |||
303 | to = skb_push(skb, len); | ||
304 | *to++ = DCCPO_ELAPSED_TIME; | ||
305 | *to++ = len; | ||
306 | |||
307 | dccp_encode_value_var(elapsed_time, to, elapsed_time_len); | ||
308 | |||
309 | dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", | ||
310 | debug_prefix, elapsed_time, | ||
311 | len, DCCP_SKB_CB(skb)->dccpd_seq); | ||
312 | } | ||
313 | |||
314 | EXPORT_SYMBOL(dccp_insert_option_elapsed_time); | ||
315 | |||
316 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
317 | { | ||
318 | struct dccp_sock *dp = dccp_sk(sk); | ||
319 | #ifdef DCCP_DEBUG | ||
320 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
321 | "server TX opt: "; | ||
322 | #endif | ||
323 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
324 | int len = ap->dccpap_buf_vector_len + 2; | ||
325 | const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; | ||
326 | unsigned char *to, *from; | ||
327 | |||
328 | if (elapsed_time != 0) | ||
329 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
330 | |||
331 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
332 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * XXX: now we have just one ack vector sent record, so | ||
338 | * we have to wait for it to be cleared. | ||
339 | * | ||
340 | * Of course this is not acceptable, but this is just for | ||
341 | * basic testing now. | ||
342 | */ | ||
343 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
344 | return; | ||
345 | |||
346 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
347 | |||
348 | to = skb_push(skb, len); | ||
349 | *to++ = DCCPO_ACK_VECTOR_0; | ||
350 | *to++ = len; | ||
351 | |||
352 | len = ap->dccpap_buf_vector_len; | ||
353 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
354 | |||
355 | /* Check if buf_head wraps */ | ||
356 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
357 | const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; | ||
358 | |||
359 | memcpy(to, from, tailsize); | ||
360 | to += tailsize; | ||
361 | len -= tailsize; | ||
362 | from = ap->dccpap_buf; | ||
363 | } | ||
364 | |||
365 | memcpy(to, from, len); | ||
366 | /* | ||
367 | * From draft-ietf-dccp-spec-11.txt: | ||
368 | * | ||
369 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
370 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
371 | * sequence number it used for the ack packet; ack_ptr will equal | ||
372 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal | ||
373 | * buf_nonce. | ||
374 | * | ||
375 | * This implemention uses just one ack record for now. | ||
376 | */ | ||
377 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
378 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
379 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
380 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
381 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
382 | |||
383 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", | ||
384 | debug_prefix, ap->dccpap_ack_vector_len, | ||
385 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
386 | } | ||
387 | |||
388 | static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) | ||
389 | { | ||
390 | const u32 now = htonl(tcp_time_stamp); | ||
391 | dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | ||
392 | } | ||
393 | |||
394 | static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) | ||
395 | { | ||
396 | struct dccp_sock *dp = dccp_sk(sk); | ||
397 | #ifdef DCCP_DEBUG | ||
398 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
399 | "server TX opt: "; | ||
400 | #endif | ||
401 | u32 tstamp_echo; | ||
402 | const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; | ||
403 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
404 | const int len = 6 + elapsed_time_len; | ||
405 | unsigned char *to; | ||
406 | |||
407 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
408 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); | ||
409 | return; | ||
410 | } | ||
411 | |||
412 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
413 | |||
414 | to = skb_push(skb, len); | ||
415 | *to++ = DCCPO_TIMESTAMP_ECHO; | ||
416 | *to++ = len; | ||
417 | |||
418 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
419 | memcpy(to, &tstamp_echo, 4); | ||
420 | to += 4; | ||
421 | dccp_encode_value_var(elapsed_time, to, elapsed_time_len); | ||
422 | |||
423 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", | ||
424 | debug_prefix, dp->dccps_timestamp_echo, | ||
425 | len, DCCP_SKB_CB(skb)->dccpd_seq); | ||
426 | |||
427 | dp->dccps_timestamp_echo = 0; | ||
428 | dp->dccps_timestamp_time = 0; | ||
429 | } | ||
430 | |||
431 | void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | ||
432 | { | ||
433 | struct dccp_sock *dp = dccp_sk(sk); | ||
434 | |||
435 | DCCP_SKB_CB(skb)->dccpd_opt_len = 0; | ||
436 | |||
437 | if (dp->dccps_options.dccpo_send_ndp_count) | ||
438 | dccp_insert_option_ndp(sk, skb); | ||
439 | |||
440 | if (!dccp_packet_without_ack(skb)) { | ||
441 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
442 | dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) | ||
443 | dccp_insert_option_ack_vector(sk, skb); | ||
444 | |||
445 | dccp_insert_option_timestamp(sk, skb); | ||
446 | if (dp->dccps_timestamp_echo != 0) | ||
447 | dccp_insert_option_timestamp_echo(sk, skb); | ||
448 | } | ||
449 | |||
450 | ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); | ||
451 | ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); | ||
452 | |||
453 | /* XXX: insert other options when appropriate */ | ||
454 | |||
455 | if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { | ||
456 | /* The length of all options has to be a multiple of 4 */ | ||
457 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | ||
458 | |||
459 | if (padding != 0) { | ||
460 | padding = 4 - padding; | ||
461 | memset(skb_push(skb, padding), 0, padding); | ||
462 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
463 | } | ||
464 | } | ||
465 | } | ||
466 | |||
467 | struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) | ||
468 | { | ||
469 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
470 | |||
471 | if (ap != NULL) { | ||
472 | #ifdef DCCP_DEBUG | ||
473 | memset(ap->dccpap_buf, 0xFF, len); | ||
474 | #endif | ||
475 | ap->dccpap_buf_len = len; | ||
476 | ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; | ||
477 | ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
478 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
479 | ap->dccpap_ack_ptr = 0; | ||
480 | ap->dccpap_time = 0; | ||
481 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
482 | } | ||
483 | |||
484 | return ap; | ||
485 | } | ||
486 | |||
487 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
488 | { | ||
489 | if (ap != NULL) { | ||
490 | #ifdef DCCP_DEBUG | ||
491 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
492 | #endif | ||
493 | kfree(ap); | ||
494 | } | ||
495 | } | ||
496 | |||
497 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
498 | const unsigned int index) | ||
499 | { | ||
500 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
501 | } | ||
502 | |||
503 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
504 | const unsigned int index) | ||
505 | { | ||
506 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
511 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
512 | * this simplifies table updates if one of the missing packets arrives. | ||
513 | */ | ||
514 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
515 | const unsigned int packets, | ||
516 | const unsigned char state) | ||
517 | { | ||
518 | unsigned int gap; | ||
519 | signed long new_head; | ||
520 | |||
521 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
522 | return -ENOBUFS; | ||
523 | |||
524 | gap = packets - 1; | ||
525 | new_head = ap->dccpap_buf_head - packets; | ||
526 | |||
527 | if (new_head < 0) { | ||
528 | if (gap > 0) { | ||
529 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
530 | gap + new_head + 1); | ||
531 | gap = -new_head; | ||
532 | } | ||
533 | new_head += ap->dccpap_buf_len; | ||
534 | } | ||
535 | |||
536 | ap->dccpap_buf_head = new_head; | ||
537 | |||
538 | if (gap > 0) | ||
539 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
540 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
541 | |||
542 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
543 | ap->dccpap_buf_vector_len += packets; | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
549 | */ | ||
550 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) | ||
551 | { | ||
552 | /* | ||
553 | * Check at the right places if the buffer is full, if it is, tell the | ||
554 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
555 | * vectors, when we will free up space in dccpap_buf. | ||
556 | * | ||
557 | * We may well decide to do buffer compression, etc, but for now lets | ||
558 | * just drop. | ||
559 | * | ||
560 | * From Appendix A: | ||
561 | * | ||
562 | * Of course, the circular buffer may overflow, either when the HC- | ||
563 | * Sender is sending data at a very high rate, when the HC-Receiver's | ||
564 | * acknowledgements are not reaching the HC-Sender, or when the HC- | ||
565 | * Sender is forgetting to acknowledge those acks (so the HC-Receiver | ||
566 | * is unable to clean up old state). In this case, the HC-Receiver | ||
567 | * should either compress the buffer (by increasing run lengths when | ||
568 | * possible), transfer its state to a larger buffer, or, as a last | ||
569 | * resort, drop all received packets, without processing them | ||
570 | * whatsoever, until its buffer shrinks again. | ||
571 | */ | ||
572 | |||
573 | /* See if this is the first ackno being inserted */ | ||
574 | if (ap->dccpap_buf_vector_len == 0) { | ||
575 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
576 | ap->dccpap_buf_vector_len = 1; | ||
577 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
578 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); | ||
579 | |||
580 | /* | ||
581 | * Look if the state of this packet is the same as the previous ackno | ||
582 | * and if so if we can bump the head len. | ||
583 | */ | ||
584 | if (delta == 1 && | ||
585 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
586 | dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) | ||
587 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
588 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
589 | return -ENOBUFS; | ||
590 | } else { | ||
591 | /* | ||
592 | * A.1.2. Old Packets | ||
593 | * | ||
594 | * When a packet with Sequence Number S arrives, and S <= buf_ackno, | ||
595 | * the HC-Receiver will scan the table for the byte corresponding to S. | ||
596 | * (Indexing structures could reduce the complexity of this scan.) | ||
597 | */ | ||
598 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
599 | unsigned int index = ap->dccpap_buf_head; | ||
600 | |||
601 | while (1) { | ||
602 | const u8 len = dccp_ackpkts_len(ap, index); | ||
603 | const u8 state = dccp_ackpkts_state(ap, index); | ||
604 | /* | ||
605 | * valid packets not yet in dccpap_buf have a reserved entry, with | ||
606 | * a len equal to 0 | ||
607 | */ | ||
608 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
609 | len == 0 && delta == 0) { /* Found our reserved seat! */ | ||
610 | dccp_pr_debug("Found %llu reserved seat!\n", ackno); | ||
611 | ap->dccpap_buf[index] = state; | ||
612 | goto out; | ||
613 | } | ||
614 | /* len == 0 means one packet */ | ||
615 | if (delta < len + 1) | ||
616 | goto out_duplicate; | ||
617 | |||
618 | delta -= len + 1; | ||
619 | if (++index == ap->dccpap_buf_len) | ||
620 | index = 0; | ||
621 | } | ||
622 | } | ||
623 | |||
624 | ap->dccpap_buf_ackno = ackno; | ||
625 | ap->dccpap_time = jiffies; | ||
626 | out: | ||
627 | dccp_pr_debug(""); | ||
628 | dccp_ackpkts_print(ap); | ||
629 | return 0; | ||
630 | |||
631 | out_duplicate: | ||
632 | /* Duplicate packet */ | ||
633 | dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); | ||
634 | return -EILSEQ; | ||
635 | } | ||
636 | |||
637 | #ifdef DCCP_DEBUG | ||
638 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) | ||
639 | { | ||
640 | if (!dccp_debug) | ||
641 | return; | ||
642 | |||
643 | printk("ACK vector len=%d, ackno=%llu |", len, ackno); | ||
644 | |||
645 | while (len--) { | ||
646 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
647 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
648 | |||
649 | printk("%d,%d|", state, rl); | ||
650 | ++vector; | ||
651 | } | ||
652 | |||
653 | printk("\n"); | ||
654 | } | ||
655 | |||
656 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
657 | { | ||
658 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
659 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
660 | ap->dccpap_buf_vector_len); | ||
661 | } | ||
662 | #endif | ||
663 | |||
664 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
665 | { | ||
666 | /* | ||
667 | * As we're keeping track of the ack vector size | ||
668 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
669 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
670 | * keep this code here as in the future we'll implement a vector of ack | ||
671 | * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme | ||
672 | */ | ||
673 | #if 0 | ||
674 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
675 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
676 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
677 | #endif | ||
678 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
679 | } | ||
680 | |||
681 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
682 | u64 ackno) | ||
683 | { | ||
684 | /* Check if we actually sent an ACK vector */ | ||
685 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
686 | return; | ||
687 | |||
688 | if (ackno == ap->dccpap_ack_seqno) { | ||
689 | #ifdef DCCP_DEBUG | ||
690 | struct dccp_sock *dp = dccp_sk(sk); | ||
691 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : | ||
692 | "server rx ack: "; | ||
693 | #endif | ||
694 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", | ||
695 | debug_prefix, 1, | ||
696 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
697 | dccp_ackpkts_trow_away_ack_record(ap); | ||
698 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
699 | } | ||
700 | } | ||
701 | |||
702 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
703 | struct sock *sk, u64 ackno, | ||
704 | const unsigned char len, | ||
705 | const unsigned char *vector) | ||
706 | { | ||
707 | unsigned char i; | ||
708 | |||
709 | /* Check if we actually sent an ACK vector */ | ||
710 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
711 | return; | ||
712 | /* | ||
713 | * We're in the receiver half connection, so if the received an ACK vector | ||
714 | * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. | ||
715 | * | ||
716 | * Extra explanation with example: | ||
717 | * | ||
718 | * if we received an ACK vector with ackno 50, it can only be acking | ||
719 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
720 | */ | ||
721 | // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); | ||
722 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
723 | // dccp_pr_debug_cat("yes\n"); | ||
724 | return; | ||
725 | } | ||
726 | // dccp_pr_debug_cat("no\n"); | ||
727 | |||
728 | i = len; | ||
729 | while (i--) { | ||
730 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
731 | u64 ackno_end_rl; | ||
732 | |||
733 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
734 | |||
735 | // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); | ||
736 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
737 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
738 | // dccp_pr_debug_cat("yes\n"); | ||
739 | |||
740 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
741 | #ifdef DCCP_DEBUG | ||
742 | struct dccp_sock *dp = dccp_sk(sk); | ||
743 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : | ||
744 | "server rx ack: "; | ||
745 | #endif | ||
746 | dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", | ||
747 | debug_prefix, len, | ||
748 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
749 | dccp_ackpkts_trow_away_ack_record(ap); | ||
750 | } | ||
751 | /* | ||
752 | * If dccpap_ack_seqno was not received, no problem we'll | ||
753 | * send another ACK vector. | ||
754 | */ | ||
755 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
756 | break; | ||
757 | } | ||
758 | // dccp_pr_debug_cat("no\n"); | ||
759 | |||
760 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
761 | ++vector; | ||
762 | } | ||
763 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..22ca2910d4f2 --- /dev/null +++ b/net/dccp/output.c | |||
@@ -0,0 +1,406 @@ | |||
1 | /* | ||
2 | * net/dccp/output.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static inline void dccp_event_ack_sent(struct sock *sk) | ||
23 | { | ||
24 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * All SKB's seen here are completely headerless. It is our | ||
29 | * job to build the DCCP header, and pass the packet down to | ||
30 | * IP so it can do the same plus pass the packet off to the | ||
31 | * device. | ||
32 | */ | ||
33 | int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | ||
34 | { | ||
35 | if (likely(skb != NULL)) { | ||
36 | const struct inet_sock *inet = inet_sk(sk); | ||
37 | struct dccp_sock *dp = dccp_sk(sk); | ||
38 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
39 | struct dccp_hdr *dh; | ||
40 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
41 | const int dccp_header_size = sizeof(*dh) + | ||
42 | sizeof(struct dccp_hdr_ext) + | ||
43 | dccp_packet_hdr_len(dcb->dccpd_type); | ||
44 | int err, set_ack = 1; | ||
45 | u64 ackno = dp->dccps_gsr; | ||
46 | |||
47 | /* | ||
48 | * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing | ||
49 | * to do here... | ||
50 | */ | ||
51 | dccp_inc_seqno(&dp->dccps_gss); | ||
52 | |||
53 | dcb->dccpd_seq = dp->dccps_gss; | ||
54 | dccp_insert_options(sk, skb); | ||
55 | |||
56 | switch (dcb->dccpd_type) { | ||
57 | case DCCP_PKT_DATA: | ||
58 | set_ack = 0; | ||
59 | break; | ||
60 | case DCCP_PKT_SYNC: | ||
61 | case DCCP_PKT_SYNCACK: | ||
62 | ackno = dcb->dccpd_seq; | ||
63 | break; | ||
64 | } | ||
65 | |||
66 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
67 | dh = dccp_hdr(skb); | ||
68 | /* Data packets are not cloned as they are never retransmitted */ | ||
69 | if (skb_cloned(skb)) | ||
70 | skb_set_owner_w(skb, sk); | ||
71 | |||
72 | /* Build DCCP header and checksum it. */ | ||
73 | memset(dh, 0, dccp_header_size); | ||
74 | dh->dccph_type = dcb->dccpd_type; | ||
75 | dh->dccph_sport = inet->sport; | ||
76 | dh->dccph_dport = inet->dport; | ||
77 | dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; | ||
78 | dh->dccph_ccval = dcb->dccpd_ccval; | ||
79 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
80 | dh->dccph_x = 1; | ||
81 | |||
82 | dp->dccps_awh = dp->dccps_gss; | ||
83 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
84 | if (set_ack) | ||
85 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); | ||
86 | |||
87 | switch (dcb->dccpd_type) { | ||
88 | case DCCP_PKT_REQUEST: | ||
89 | dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; | ||
90 | break; | ||
91 | case DCCP_PKT_RESET: | ||
92 | dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
97 | |||
98 | if (dcb->dccpd_type == DCCP_PKT_ACK || | ||
99 | dcb->dccpd_type == DCCP_PKT_DATAACK) | ||
100 | dccp_event_ack_sent(sk); | ||
101 | |||
102 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
103 | |||
104 | err = ip_queue_xmit(skb, 0); | ||
105 | if (err <= 0) | ||
106 | return err; | ||
107 | |||
108 | /* NET_XMIT_CN is special. It does not guarantee, | ||
109 | * that this packet is lost. It tells that device | ||
110 | * is about to start to drop packets or already | ||
111 | * drops some packets of the same priority and | ||
112 | * invokes us to send less aggressively. | ||
113 | */ | ||
114 | return err == NET_XMIT_CN ? 0 : err; | ||
115 | } | ||
116 | return -ENOBUFS; | ||
117 | } | ||
118 | |||
119 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | ||
120 | { | ||
121 | struct dccp_sock *dp = dccp_sk(sk); | ||
122 | int mss_now; | ||
123 | |||
124 | /* | ||
125 | * FIXME: we really should be using the af_specific thing to support IPv6. | ||
126 | * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
127 | */ | ||
128 | mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
129 | |||
130 | /* Now subtract optional transport overhead */ | ||
131 | mss_now -= dp->dccps_ext_header_len; | ||
132 | |||
133 | /* | ||
134 | * FIXME: this should come from the CCID infrastructure, where, say, | ||
135 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | ||
136 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | ||
137 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | ||
138 | * make it a multiple of 4 | ||
139 | */ | ||
140 | |||
141 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | ||
142 | |||
143 | /* And store cached results */ | ||
144 | dp->dccps_pmtu_cookie = pmtu; | ||
145 | dp->dccps_mss_cache = mss_now; | ||
146 | |||
147 | return mss_now; | ||
148 | } | ||
149 | |||
150 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | ||
151 | { | ||
152 | if (inet_sk_rebuild_header(sk) != 0) | ||
153 | return -EHOSTUNREACH; /* Routing failure or similar. */ | ||
154 | |||
155 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? | ||
156 | pskb_copy(skb, GFP_ATOMIC): | ||
157 | skb_clone(skb, GFP_ATOMIC))); | ||
158 | } | ||
159 | |||
160 | struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | ||
161 | struct request_sock *req) | ||
162 | { | ||
163 | struct dccp_hdr *dh; | ||
164 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
165 | sizeof(struct dccp_hdr_ext) + | ||
166 | sizeof(struct dccp_hdr_response); | ||
167 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
168 | dccp_header_size, 1, | ||
169 | GFP_ATOMIC); | ||
170 | if (skb == NULL) | ||
171 | return NULL; | ||
172 | |||
173 | /* Reserve space for headers. */ | ||
174 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
175 | |||
176 | skb->dst = dst_clone(dst); | ||
177 | skb->csum = 0; | ||
178 | |||
179 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | ||
180 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | ||
181 | dccp_insert_options(sk, skb); | ||
182 | |||
183 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
184 | |||
185 | dh = dccp_hdr(skb); | ||
186 | memset(dh, 0, dccp_header_size); | ||
187 | |||
188 | dh->dccph_sport = inet_sk(sk)->sport; | ||
189 | dh->dccph_dport = inet_rsk(req)->rmt_port; | ||
190 | dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
191 | dh->dccph_type = DCCP_PKT_RESPONSE; | ||
192 | dh->dccph_x = 1; | ||
193 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | ||
194 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | ||
195 | |||
196 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
197 | |||
198 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
199 | return skb; | ||
200 | } | ||
201 | |||
202 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | ||
203 | const enum dccp_reset_codes code) | ||
204 | |||
205 | { | ||
206 | struct dccp_hdr *dh; | ||
207 | struct dccp_sock *dp = dccp_sk(sk); | ||
208 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
209 | sizeof(struct dccp_hdr_ext) + | ||
210 | sizeof(struct dccp_hdr_reset); | ||
211 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
212 | dccp_header_size, 1, | ||
213 | GFP_ATOMIC); | ||
214 | if (skb == NULL) | ||
215 | return NULL; | ||
216 | |||
217 | /* Reserve space for headers. */ | ||
218 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
219 | |||
220 | skb->dst = dst_clone(dst); | ||
221 | skb->csum = 0; | ||
222 | |||
223 | dccp_inc_seqno(&dp->dccps_gss); | ||
224 | |||
225 | DCCP_SKB_CB(skb)->dccpd_reset_code = code; | ||
226 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; | ||
227 | DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; | ||
228 | dccp_insert_options(sk, skb); | ||
229 | |||
230 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
231 | |||
232 | dh = dccp_hdr(skb); | ||
233 | memset(dh, 0, dccp_header_size); | ||
234 | |||
235 | dh->dccph_sport = inet_sk(sk)->sport; | ||
236 | dh->dccph_dport = inet_sk(sk)->dport; | ||
237 | dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
238 | dh->dccph_type = DCCP_PKT_RESET; | ||
239 | dh->dccph_x = 1; | ||
240 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
241 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); | ||
242 | |||
243 | dccp_hdr_reset(skb)->dccph_reset_code = code; | ||
244 | |||
245 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
246 | |||
247 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
248 | return skb; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Do all connect socket setups that can be done AF independent. | ||
253 | */ | ||
254 | static inline void dccp_connect_init(struct sock *sk) | ||
255 | { | ||
256 | struct dst_entry *dst = __sk_dst_get(sk); | ||
257 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
258 | |||
259 | sk->sk_err = 0; | ||
260 | sock_reset_flag(sk, SOCK_DONE); | ||
261 | |||
262 | dccp_sync_mss(sk, dst_mtu(dst)); | ||
263 | |||
264 | /* | ||
265 | * FIXME: set dp->{dccps_swh,dccps_swl}, with | ||
266 | * something like dccp_inc_seq | ||
267 | */ | ||
268 | |||
269 | icsk->icsk_retransmits = 0; | ||
270 | } | ||
271 | |||
272 | int dccp_connect(struct sock *sk) | ||
273 | { | ||
274 | struct sk_buff *skb; | ||
275 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
276 | |||
277 | dccp_connect_init(sk); | ||
278 | |||
279 | skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); | ||
280 | if (unlikely(skb == NULL)) | ||
281 | return -ENOBUFS; | ||
282 | |||
283 | /* Reserve space for headers. */ | ||
284 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
285 | |||
286 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | ||
287 | /* FIXME: set service to something meaningful, coming | ||
288 | * from userspace*/ | ||
289 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
290 | skb->csum = 0; | ||
291 | skb_set_owner_w(skb, sk); | ||
292 | |||
293 | BUG_TRAP(sk->sk_send_head == NULL); | ||
294 | sk->sk_send_head = skb; | ||
295 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
296 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | ||
297 | |||
298 | /* Timer for repeating the REQUEST until an answer. */ | ||
299 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | void dccp_send_ack(struct sock *sk) | ||
304 | { | ||
305 | /* If we have been reset, we may not send again. */ | ||
306 | if (sk->sk_state != DCCP_CLOSED) { | ||
307 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
308 | |||
309 | if (skb == NULL) { | ||
310 | inet_csk_schedule_ack(sk); | ||
311 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | ||
312 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | /* Reserve space for headers */ | ||
317 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
318 | skb->csum = 0; | ||
319 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; | ||
320 | skb_set_owner_w(skb, sk); | ||
321 | dccp_transmit_skb(sk, skb); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | EXPORT_SYMBOL_GPL(dccp_send_ack); | ||
326 | |||
327 | void dccp_send_delayed_ack(struct sock *sk) | ||
328 | { | ||
329 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
330 | /* | ||
331 | * FIXME: tune this timer. elapsed time fixes the skew, so no problem | ||
332 | * with using 2s, and active senders also piggyback the ACK into a | ||
333 | * DATAACK packet, so this is really for quiescent senders. | ||
334 | */ | ||
335 | unsigned long timeout = jiffies + 2 * HZ; | ||
336 | |||
337 | /* Use new timeout only if there wasn't a older one earlier. */ | ||
338 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { | ||
339 | /* If delack timer was blocked or is about to expire, | ||
340 | * send ACK now. | ||
341 | * | ||
342 | * FIXME: check the "about to expire" part | ||
343 | */ | ||
344 | if (icsk->icsk_ack.blocked) { | ||
345 | dccp_send_ack(sk); | ||
346 | return; | ||
347 | } | ||
348 | |||
349 | if (!time_before(timeout, icsk->icsk_ack.timeout)) | ||
350 | timeout = icsk->icsk_ack.timeout; | ||
351 | } | ||
352 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; | ||
353 | icsk->icsk_ack.timeout = timeout; | ||
354 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); | ||
355 | } | ||
356 | |||
357 | void dccp_send_sync(struct sock *sk, u64 seq) | ||
358 | { | ||
359 | /* | ||
360 | * We are not putting this on the write queue, so | ||
361 | * dccp_transmit_skb() will set the ownership to this | ||
362 | * sock. | ||
363 | */ | ||
364 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
365 | |||
366 | if (skb == NULL) | ||
367 | /* FIXME: how to make sure the sync is sent? */ | ||
368 | return; | ||
369 | |||
370 | /* Reserve space for headers and prepare control bits. */ | ||
371 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
372 | skb->csum = 0; | ||
373 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; | ||
374 | DCCP_SKB_CB(skb)->dccpd_seq = seq; | ||
375 | |||
376 | skb_set_owner_w(skb, sk); | ||
377 | dccp_transmit_skb(sk, skb); | ||
378 | } | ||
379 | |||
380 | /* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be | ||
381 | * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. | ||
382 | */ | ||
383 | void dccp_send_close(struct sock *sk) | ||
384 | { | ||
385 | struct dccp_sock *dp = dccp_sk(sk); | ||
386 | struct sk_buff *skb; | ||
387 | |||
388 | /* Socket is locked, keep trying until memory is available. */ | ||
389 | for (;;) { | ||
390 | skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); | ||
391 | if (skb != NULL) | ||
392 | break; | ||
393 | yield(); | ||
394 | } | ||
395 | |||
396 | /* Reserve space for headers and prepare control bits. */ | ||
397 | skb_reserve(skb, sk->sk_prot->max_header); | ||
398 | skb->csum = 0; | ||
399 | DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | ||
400 | |||
401 | skb_set_owner_w(skb, sk); | ||
402 | dccp_transmit_skb(sk, skb); | ||
403 | |||
404 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
405 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
406 | } | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..70284e6afe05 --- /dev/null +++ b/net/dccp/proto.c | |||
@@ -0,0 +1,818 @@ | |||
1 | /* | ||
2 | * net/dccp/proto.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/dccp.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/netdevice.h> | ||
20 | #include <linux/in.h> | ||
21 | #include <linux/if_arp.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/random.h> | ||
24 | #include <net/checksum.h> | ||
25 | |||
26 | #include <net/inet_common.h> | ||
27 | #include <net/ip.h> | ||
28 | #include <net/protocol.h> | ||
29 | #include <net/sock.h> | ||
30 | #include <net/xfrm.h> | ||
31 | |||
32 | #include <asm/semaphore.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/timer.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/poll.h> | ||
37 | #include <linux/dccp.h> | ||
38 | |||
39 | #include "ccid.h" | ||
40 | #include "dccp.h" | ||
41 | |||
42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
43 | |||
44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | ||
45 | |||
46 | static struct net_protocol dccp_protocol = { | ||
47 | .handler = dccp_v4_rcv, | ||
48 | .err_handler = dccp_v4_err, | ||
49 | }; | ||
50 | |||
51 | const char *dccp_packet_name(const int type) | ||
52 | { | ||
53 | static const char *dccp_packet_names[] = { | ||
54 | [DCCP_PKT_REQUEST] = "REQUEST", | ||
55 | [DCCP_PKT_RESPONSE] = "RESPONSE", | ||
56 | [DCCP_PKT_DATA] = "DATA", | ||
57 | [DCCP_PKT_ACK] = "ACK", | ||
58 | [DCCP_PKT_DATAACK] = "DATAACK", | ||
59 | [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", | ||
60 | [DCCP_PKT_CLOSE] = "CLOSE", | ||
61 | [DCCP_PKT_RESET] = "RESET", | ||
62 | [DCCP_PKT_SYNC] = "SYNC", | ||
63 | [DCCP_PKT_SYNCACK] = "SYNCACK", | ||
64 | }; | ||
65 | |||
66 | if (type >= DCCP_NR_PKT_TYPES) | ||
67 | return "INVALID"; | ||
68 | else | ||
69 | return dccp_packet_names[type]; | ||
70 | } | ||
71 | |||
72 | EXPORT_SYMBOL_GPL(dccp_packet_name); | ||
73 | |||
74 | const char *dccp_state_name(const int state) | ||
75 | { | ||
76 | static char *dccp_state_names[] = { | ||
77 | [DCCP_OPEN] = "OPEN", | ||
78 | [DCCP_REQUESTING] = "REQUESTING", | ||
79 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
80 | [DCCP_LISTEN] = "LISTEN", | ||
81 | [DCCP_RESPOND] = "RESPOND", | ||
82 | [DCCP_CLOSING] = "CLOSING", | ||
83 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
84 | [DCCP_CLOSED] = "CLOSED", | ||
85 | }; | ||
86 | |||
87 | if (state >= DCCP_MAX_STATES) | ||
88 | return "INVALID STATE!"; | ||
89 | else | ||
90 | return dccp_state_names[state]; | ||
91 | } | ||
92 | |||
93 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
94 | |||
95 | static inline int dccp_listen_start(struct sock *sk) | ||
96 | { | ||
97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | ||
98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | ||
99 | } | ||
100 | |||
101 | int dccp_disconnect(struct sock *sk, int flags) | ||
102 | { | ||
103 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
104 | struct inet_sock *inet = inet_sk(sk); | ||
105 | int err = 0; | ||
106 | const int old_state = sk->sk_state; | ||
107 | |||
108 | if (old_state != DCCP_CLOSED) | ||
109 | dccp_set_state(sk, DCCP_CLOSED); | ||
110 | |||
111 | /* ABORT function of RFC793 */ | ||
112 | if (old_state == DCCP_LISTEN) { | ||
113 | inet_csk_listen_stop(sk); | ||
114 | /* FIXME: do the active reset thing */ | ||
115 | } else if (old_state == DCCP_REQUESTING) | ||
116 | sk->sk_err = ECONNRESET; | ||
117 | |||
118 | dccp_clear_xmit_timers(sk); | ||
119 | __skb_queue_purge(&sk->sk_receive_queue); | ||
120 | if (sk->sk_send_head != NULL) { | ||
121 | __kfree_skb(sk->sk_send_head); | ||
122 | sk->sk_send_head = NULL; | ||
123 | } | ||
124 | |||
125 | inet->dport = 0; | ||
126 | |||
127 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | ||
128 | inet_reset_saddr(sk); | ||
129 | |||
130 | sk->sk_shutdown = 0; | ||
131 | sock_reset_flag(sk, SOCK_DONE); | ||
132 | |||
133 | icsk->icsk_backoff = 0; | ||
134 | inet_csk_delack_init(sk); | ||
135 | __sk_dst_reset(sk); | ||
136 | |||
137 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); | ||
138 | |||
139 | sk->sk_error_report(sk); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | ||
144 | { | ||
145 | dccp_pr_debug("entry\n"); | ||
146 | return -ENOIOCTLCMD; | ||
147 | } | ||
148 | |||
149 | int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
150 | char *optval, int optlen) | ||
151 | { | ||
152 | dccp_pr_debug("entry\n"); | ||
153 | |||
154 | if (level != SOL_DCCP) | ||
155 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
156 | |||
157 | return -EOPNOTSUPP; | ||
158 | } | ||
159 | |||
160 | int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
161 | char *optval, int *optlen) | ||
162 | { | ||
163 | dccp_pr_debug("entry\n"); | ||
164 | |||
165 | if (level != SOL_DCCP) | ||
166 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
167 | |||
168 | return -EOPNOTSUPP; | ||
169 | } | ||
170 | |||
171 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
172 | size_t len) | ||
173 | { | ||
174 | const struct dccp_sock *dp = dccp_sk(sk); | ||
175 | const int flags = msg->msg_flags; | ||
176 | const int noblock = flags & MSG_DONTWAIT; | ||
177 | struct sk_buff *skb; | ||
178 | int rc, size; | ||
179 | long timeo; | ||
180 | |||
181 | if (len > dp->dccps_mss_cache) | ||
182 | return -EMSGSIZE; | ||
183 | |||
184 | lock_sock(sk); | ||
185 | |||
186 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | ||
187 | |||
188 | /* | ||
189 | * We have to use sk_stream_wait_connect here to set sk_write_pending, | ||
190 | * so that the trick in dccp_rcv_request_sent_state_process. | ||
191 | */ | ||
192 | /* Wait for a connection to finish. */ | ||
193 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) | ||
194 | if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) | ||
195 | goto out_err; | ||
196 | |||
197 | size = sk->sk_prot->max_header + len; | ||
198 | release_sock(sk); | ||
199 | skb = sock_alloc_send_skb(sk, size, noblock, &rc); | ||
200 | lock_sock(sk); | ||
201 | |||
202 | if (skb == NULL) | ||
203 | goto out_release; | ||
204 | |||
205 | skb_reserve(skb, sk->sk_prot->max_header); | ||
206 | rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
207 | if (rc == 0) { | ||
208 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
209 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
210 | long delay; | ||
211 | |||
212 | /* | ||
213 | * XXX: This is just to match the Waikato tree CA interaction | ||
214 | * points, after the CCID3 code is stable and I have a better | ||
215 | * understanding of behaviour I'll change this to look more like | ||
216 | * TCP. | ||
217 | */ | ||
218 | while (1) { | ||
219 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, | ||
220 | skb, len, &delay); | ||
221 | if (rc == 0) | ||
222 | break; | ||
223 | if (rc != -EAGAIN) | ||
224 | goto out_discard; | ||
225 | if (delay > timeo) | ||
226 | goto out_discard; | ||
227 | release_sock(sk); | ||
228 | delay = schedule_timeout(delay); | ||
229 | lock_sock(sk); | ||
230 | timeo -= delay; | ||
231 | if (signal_pending(current)) | ||
232 | goto out_interrupted; | ||
233 | rc = -EPIPE; | ||
234 | if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) | ||
235 | goto out_discard; | ||
236 | } | ||
237 | |||
238 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
239 | /* See 8.1.5. Handshake Completion */ | ||
240 | inet_csk_schedule_ack(sk); | ||
241 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
242 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
243 | /* FIXME: we really should have a dccps_ack_pending or use icsk */ | ||
244 | } else if (inet_csk_ack_scheduled(sk) || | ||
245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
249 | else | ||
250 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
251 | dccp_transmit_skb(sk, skb); | ||
252 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | ||
253 | } else { | ||
254 | out_discard: | ||
255 | kfree_skb(skb); | ||
256 | } | ||
257 | out_release: | ||
258 | release_sock(sk); | ||
259 | return rc ? : len; | ||
260 | out_err: | ||
261 | rc = sk_stream_error(sk, flags, rc); | ||
262 | goto out_release; | ||
263 | out_interrupted: | ||
264 | rc = sock_intr_errno(timeo); | ||
265 | goto out_discard; | ||
266 | } | ||
267 | |||
268 | EXPORT_SYMBOL(dccp_sendmsg); | ||
269 | |||
270 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
271 | size_t len, int nonblock, int flags, int *addr_len) | ||
272 | { | ||
273 | const struct dccp_hdr *dh; | ||
274 | int copied = 0; | ||
275 | unsigned long used; | ||
276 | int err; | ||
277 | int target; /* Read at least this many bytes */ | ||
278 | long timeo; | ||
279 | |||
280 | lock_sock(sk); | ||
281 | |||
282 | err = -ENOTCONN; | ||
283 | if (sk->sk_state == DCCP_LISTEN) | ||
284 | goto out; | ||
285 | |||
286 | timeo = sock_rcvtimeo(sk, nonblock); | ||
287 | |||
288 | /* Urgent data needs to be handled specially. */ | ||
289 | if (flags & MSG_OOB) | ||
290 | goto recv_urg; | ||
291 | |||
292 | /* FIXME */ | ||
293 | #if 0 | ||
294 | seq = &tp->copied_seq; | ||
295 | if (flags & MSG_PEEK) { | ||
296 | peek_seq = tp->copied_seq; | ||
297 | seq = &peek_seq; | ||
298 | } | ||
299 | #endif | ||
300 | |||
301 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | ||
302 | |||
303 | do { | ||
304 | struct sk_buff *skb; | ||
305 | u32 offset; | ||
306 | |||
307 | /* FIXME */ | ||
308 | #if 0 | ||
309 | /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ | ||
310 | if (tp->urg_data && tp->urg_seq == *seq) { | ||
311 | if (copied) | ||
312 | break; | ||
313 | if (signal_pending(current)) { | ||
314 | copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; | ||
315 | break; | ||
316 | } | ||
317 | } | ||
318 | #endif | ||
319 | |||
320 | /* Next get a buffer. */ | ||
321 | |||
322 | skb = skb_peek(&sk->sk_receive_queue); | ||
323 | do { | ||
324 | if (!skb) | ||
325 | break; | ||
326 | |||
327 | offset = 0; | ||
328 | dh = dccp_hdr(skb); | ||
329 | |||
330 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
331 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
332 | goto found_ok_skb; | ||
333 | |||
334 | if (dh->dccph_type == DCCP_PKT_RESET || | ||
335 | dh->dccph_type == DCCP_PKT_CLOSE) { | ||
336 | dccp_pr_debug("found fin ok!\n"); | ||
337 | goto found_fin_ok; | ||
338 | } | ||
339 | dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); | ||
340 | BUG_TRAP(flags & MSG_PEEK); | ||
341 | skb = skb->next; | ||
342 | } while (skb != (struct sk_buff *)&sk->sk_receive_queue); | ||
343 | |||
344 | /* Well, if we have backlog, try to process it now yet. */ | ||
345 | if (copied >= target && !sk->sk_backlog.tail) | ||
346 | break; | ||
347 | |||
348 | if (copied) { | ||
349 | if (sk->sk_err || | ||
350 | sk->sk_state == DCCP_CLOSED || | ||
351 | (sk->sk_shutdown & RCV_SHUTDOWN) || | ||
352 | !timeo || | ||
353 | signal_pending(current) || | ||
354 | (flags & MSG_PEEK)) | ||
355 | break; | ||
356 | } else { | ||
357 | if (sock_flag(sk, SOCK_DONE)) | ||
358 | break; | ||
359 | |||
360 | if (sk->sk_err) { | ||
361 | copied = sock_error(sk); | ||
362 | break; | ||
363 | } | ||
364 | |||
365 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
366 | break; | ||
367 | |||
368 | if (sk->sk_state == DCCP_CLOSED) { | ||
369 | if (!sock_flag(sk, SOCK_DONE)) { | ||
370 | /* This occurs when user tries to read | ||
371 | * from never connected socket. | ||
372 | */ | ||
373 | copied = -ENOTCONN; | ||
374 | break; | ||
375 | } | ||
376 | break; | ||
377 | } | ||
378 | |||
379 | if (!timeo) { | ||
380 | copied = -EAGAIN; | ||
381 | break; | ||
382 | } | ||
383 | |||
384 | if (signal_pending(current)) { | ||
385 | copied = sock_intr_errno(timeo); | ||
386 | break; | ||
387 | } | ||
388 | } | ||
389 | |||
390 | /* FIXME: cleanup_rbuf(sk, copied); */ | ||
391 | |||
392 | if (copied >= target) { | ||
393 | /* Do not sleep, just process backlog. */ | ||
394 | release_sock(sk); | ||
395 | lock_sock(sk); | ||
396 | } else | ||
397 | sk_wait_data(sk, &timeo); | ||
398 | |||
399 | continue; | ||
400 | |||
401 | found_ok_skb: | ||
402 | /* Ok so how much can we use? */ | ||
403 | used = skb->len - offset; | ||
404 | if (len < used) | ||
405 | used = len; | ||
406 | |||
407 | if (!(flags & MSG_TRUNC)) { | ||
408 | err = skb_copy_datagram_iovec(skb, offset, | ||
409 | msg->msg_iov, used); | ||
410 | if (err) { | ||
411 | /* Exception. Bailout! */ | ||
412 | if (!copied) | ||
413 | copied = -EFAULT; | ||
414 | break; | ||
415 | } | ||
416 | } | ||
417 | |||
418 | copied += used; | ||
419 | len -= used; | ||
420 | |||
421 | /* FIXME: tcp_rcv_space_adjust(sk); */ | ||
422 | |||
423 | //skip_copy: | ||
424 | if (used + offset < skb->len) | ||
425 | continue; | ||
426 | |||
427 | if (!(flags & MSG_PEEK)) | ||
428 | sk_eat_skb(sk, skb); | ||
429 | continue; | ||
430 | found_fin_ok: | ||
431 | if (!(flags & MSG_PEEK)) | ||
432 | sk_eat_skb(sk, skb); | ||
433 | break; | ||
434 | |||
435 | } while (len > 0); | ||
436 | |||
437 | /* According to UNIX98, msg_name/msg_namelen are ignored | ||
438 | * on connected socket. I was just happy when found this 8) --ANK | ||
439 | */ | ||
440 | |||
441 | /* Clean up data we have read: This will do ACK frames. */ | ||
442 | /* FIXME: cleanup_rbuf(sk, copied); */ | ||
443 | |||
444 | release_sock(sk); | ||
445 | return copied; | ||
446 | |||
447 | out: | ||
448 | release_sock(sk); | ||
449 | return err; | ||
450 | |||
451 | recv_urg: | ||
452 | /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ | ||
453 | goto out; | ||
454 | } | ||
455 | |||
456 | static int inet_dccp_listen(struct socket *sock, int backlog) | ||
457 | { | ||
458 | struct sock *sk = sock->sk; | ||
459 | unsigned char old_state; | ||
460 | int err; | ||
461 | |||
462 | lock_sock(sk); | ||
463 | |||
464 | err = -EINVAL; | ||
465 | if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) | ||
466 | goto out; | ||
467 | |||
468 | old_state = sk->sk_state; | ||
469 | if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) | ||
470 | goto out; | ||
471 | |||
472 | /* Really, if the socket is already in listen state | ||
473 | * we can only allow the backlog to be adjusted. | ||
474 | */ | ||
475 | if (old_state != DCCP_LISTEN) { | ||
476 | /* | ||
477 | * FIXME: here it probably should be sk->sk_prot->listen_start | ||
478 | * see tcp_listen_start | ||
479 | */ | ||
480 | err = dccp_listen_start(sk); | ||
481 | if (err) | ||
482 | goto out; | ||
483 | } | ||
484 | sk->sk_max_ack_backlog = backlog; | ||
485 | err = 0; | ||
486 | |||
487 | out: | ||
488 | release_sock(sk); | ||
489 | return err; | ||
490 | } | ||
491 | |||
492 | static const unsigned char dccp_new_state[] = { | ||
493 | /* current state: new state: action: */ | ||
494 | [0] = DCCP_CLOSED, | ||
495 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
496 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
497 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
498 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
499 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
500 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
501 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
502 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
503 | }; | ||
504 | |||
505 | static int dccp_close_state(struct sock *sk) | ||
506 | { | ||
507 | const int next = dccp_new_state[sk->sk_state]; | ||
508 | const int ns = next & DCCP_STATE_MASK; | ||
509 | |||
510 | if (ns != sk->sk_state) | ||
511 | dccp_set_state(sk, ns); | ||
512 | |||
513 | return next & DCCP_ACTION_FIN; | ||
514 | } | ||
515 | |||
516 | void dccp_close(struct sock *sk, long timeout) | ||
517 | { | ||
518 | struct sk_buff *skb; | ||
519 | |||
520 | lock_sock(sk); | ||
521 | |||
522 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
523 | |||
524 | if (sk->sk_state == DCCP_LISTEN) { | ||
525 | dccp_set_state(sk, DCCP_CLOSED); | ||
526 | |||
527 | /* Special case. */ | ||
528 | inet_csk_listen_stop(sk); | ||
529 | |||
530 | goto adjudge_to_death; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * We need to flush the recv. buffs. We do this only on the | ||
535 | * descriptor close, not protocol-sourced closes, because the | ||
536 | *reader process may not have drained the data yet! | ||
537 | */ | ||
538 | /* FIXME: check for unread data */ | ||
539 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
540 | __kfree_skb(skb); | ||
541 | } | ||
542 | |||
543 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
544 | /* Check zero linger _after_ checking for unread data. */ | ||
545 | sk->sk_prot->disconnect(sk, 0); | ||
546 | } else if (dccp_close_state(sk)) { | ||
547 | dccp_send_close(sk); | ||
548 | } | ||
549 | |||
550 | sk_stream_wait_close(sk, timeout); | ||
551 | |||
552 | adjudge_to_death: | ||
553 | release_sock(sk); | ||
554 | /* | ||
555 | * Now socket is owned by kernel and we acquire BH lock | ||
556 | * to finish close. No need to check for user refs. | ||
557 | */ | ||
558 | local_bh_disable(); | ||
559 | bh_lock_sock(sk); | ||
560 | BUG_TRAP(!sock_owned_by_user(sk)); | ||
561 | |||
562 | sock_hold(sk); | ||
563 | sock_orphan(sk); | ||
564 | |||
565 | if (sk->sk_state != DCCP_CLOSED) | ||
566 | dccp_set_state(sk, DCCP_CLOSED); | ||
567 | |||
568 | atomic_inc(&dccp_orphan_count); | ||
569 | if (sk->sk_state == DCCP_CLOSED) | ||
570 | inet_csk_destroy_sock(sk); | ||
571 | |||
572 | /* Otherwise, socket is reprieved until protocol close. */ | ||
573 | |||
574 | bh_unlock_sock(sk); | ||
575 | local_bh_enable(); | ||
576 | sock_put(sk); | ||
577 | } | ||
578 | |||
579 | void dccp_shutdown(struct sock *sk, int how) | ||
580 | { | ||
581 | dccp_pr_debug("entry\n"); | ||
582 | } | ||
583 | |||
584 | struct proto_ops inet_dccp_ops = { | ||
585 | .family = PF_INET, | ||
586 | .owner = THIS_MODULE, | ||
587 | .release = inet_release, | ||
588 | .bind = inet_bind, | ||
589 | .connect = inet_stream_connect, | ||
590 | .socketpair = sock_no_socketpair, | ||
591 | .accept = inet_accept, | ||
592 | .getname = inet_getname, | ||
593 | .poll = sock_no_poll, | ||
594 | .ioctl = inet_ioctl, | ||
595 | .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ | ||
596 | .shutdown = inet_shutdown, | ||
597 | .setsockopt = sock_common_setsockopt, | ||
598 | .getsockopt = sock_common_getsockopt, | ||
599 | .sendmsg = inet_sendmsg, | ||
600 | .recvmsg = sock_common_recvmsg, | ||
601 | .mmap = sock_no_mmap, | ||
602 | .sendpage = sock_no_sendpage, | ||
603 | }; | ||
604 | |||
605 | extern struct net_proto_family inet_family_ops; | ||
606 | |||
607 | static struct inet_protosw dccp_v4_protosw = { | ||
608 | .type = SOCK_DCCP, | ||
609 | .protocol = IPPROTO_DCCP, | ||
610 | .prot = &dccp_v4_prot, | ||
611 | .ops = &inet_dccp_ops, | ||
612 | .capability = -1, | ||
613 | .no_check = 0, | ||
614 | .flags = 0, | ||
615 | }; | ||
616 | |||
617 | /* | ||
618 | * This is the global socket data structure used for responding to | ||
619 | * the Out-of-the-blue (OOTB) packets. A control sock will be created | ||
620 | * for this socket at the initialization time. | ||
621 | */ | ||
622 | struct socket *dccp_ctl_socket; | ||
623 | |||
624 | static char dccp_ctl_socket_err_msg[] __initdata = | ||
625 | KERN_ERR "DCCP: Failed to create the control socket.\n"; | ||
626 | |||
627 | static int __init dccp_ctl_sock_init(void) | ||
628 | { | ||
629 | int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, | ||
630 | &dccp_ctl_socket); | ||
631 | if (rc < 0) | ||
632 | printk(dccp_ctl_socket_err_msg); | ||
633 | else { | ||
634 | dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; | ||
635 | inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; | ||
636 | |||
637 | /* Unhash it so that IP input processing does not even | ||
638 | * see it, we do not wish this socket to see incoming | ||
639 | * packets. | ||
640 | */ | ||
641 | dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); | ||
642 | } | ||
643 | |||
644 | return rc; | ||
645 | } | ||
646 | |||
647 | static void __exit dccp_ctl_sock_exit(void) | ||
648 | { | ||
649 | if (dccp_ctl_socket != NULL) | ||
650 | sock_release(dccp_ctl_socket); | ||
651 | } | ||
652 | |||
653 | static int __init init_dccp_v4_mibs(void) | ||
654 | { | ||
655 | int rc = -ENOMEM; | ||
656 | |||
657 | dccp_statistics[0] = alloc_percpu(struct dccp_mib); | ||
658 | if (dccp_statistics[0] == NULL) | ||
659 | goto out; | ||
660 | |||
661 | dccp_statistics[1] = alloc_percpu(struct dccp_mib); | ||
662 | if (dccp_statistics[1] == NULL) | ||
663 | goto out_free_one; | ||
664 | |||
665 | rc = 0; | ||
666 | out: | ||
667 | return rc; | ||
668 | out_free_one: | ||
669 | free_percpu(dccp_statistics[0]); | ||
670 | dccp_statistics[0] = NULL; | ||
671 | goto out; | ||
672 | |||
673 | } | ||
674 | |||
675 | static int thash_entries; | ||
676 | module_param(thash_entries, int, 0444); | ||
677 | MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | ||
678 | |||
679 | int dccp_debug; | ||
680 | module_param(dccp_debug, int, 0444); | ||
681 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | ||
682 | |||
683 | static int __init dccp_init(void) | ||
684 | { | ||
685 | unsigned long goal; | ||
686 | int ehash_order, bhash_order, i; | ||
687 | int rc = proto_register(&dccp_v4_prot, 1); | ||
688 | |||
689 | if (rc) | ||
690 | goto out; | ||
691 | |||
692 | dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", | ||
693 | sizeof(struct inet_bind_bucket), | ||
694 | 0, SLAB_HWCACHE_ALIGN, | ||
695 | NULL, NULL); | ||
696 | if (!dccp_hashinfo.bind_bucket_cachep) | ||
697 | goto out_proto_unregister; | ||
698 | |||
699 | /* | ||
700 | * Size and allocate the main established and bind bucket | ||
701 | * hash tables. | ||
702 | * | ||
703 | * The methodology is similar to that of the buffer cache. | ||
704 | */ | ||
705 | if (num_physpages >= (128 * 1024)) | ||
706 | goal = num_physpages >> (21 - PAGE_SHIFT); | ||
707 | else | ||
708 | goal = num_physpages >> (23 - PAGE_SHIFT); | ||
709 | |||
710 | if (thash_entries) | ||
711 | goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; | ||
712 | for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) | ||
713 | ; | ||
714 | do { | ||
715 | dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / | ||
716 | sizeof(struct inet_ehash_bucket); | ||
717 | dccp_hashinfo.ehash_size >>= 1; | ||
718 | while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) | ||
719 | dccp_hashinfo.ehash_size--; | ||
720 | dccp_hashinfo.ehash = (struct inet_ehash_bucket *) | ||
721 | __get_free_pages(GFP_ATOMIC, ehash_order); | ||
722 | } while (!dccp_hashinfo.ehash && --ehash_order > 0); | ||
723 | |||
724 | if (!dccp_hashinfo.ehash) { | ||
725 | printk(KERN_CRIT "Failed to allocate DCCP " | ||
726 | "established hash table\n"); | ||
727 | goto out_free_bind_bucket_cachep; | ||
728 | } | ||
729 | |||
730 | for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { | ||
731 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
732 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | ||
733 | } | ||
734 | |||
735 | bhash_order = ehash_order; | ||
736 | |||
737 | do { | ||
738 | dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / | ||
739 | sizeof(struct inet_bind_hashbucket); | ||
740 | if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) | ||
741 | continue; | ||
742 | dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) | ||
743 | __get_free_pages(GFP_ATOMIC, bhash_order); | ||
744 | } while (!dccp_hashinfo.bhash && --bhash_order >= 0); | ||
745 | |||
746 | if (!dccp_hashinfo.bhash) { | ||
747 | printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); | ||
748 | goto out_free_dccp_ehash; | ||
749 | } | ||
750 | |||
751 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | ||
752 | spin_lock_init(&dccp_hashinfo.bhash[i].lock); | ||
753 | INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); | ||
754 | } | ||
755 | |||
756 | if (init_dccp_v4_mibs()) | ||
757 | goto out_free_dccp_bhash; | ||
758 | |||
759 | rc = -EAGAIN; | ||
760 | if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) | ||
761 | goto out_free_dccp_v4_mibs; | ||
762 | |||
763 | inet_register_protosw(&dccp_v4_protosw); | ||
764 | |||
765 | rc = dccp_ctl_sock_init(); | ||
766 | if (rc) | ||
767 | goto out_unregister_protosw; | ||
768 | out: | ||
769 | return rc; | ||
770 | out_unregister_protosw: | ||
771 | inet_unregister_protosw(&dccp_v4_protosw); | ||
772 | inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); | ||
773 | out_free_dccp_v4_mibs: | ||
774 | free_percpu(dccp_statistics[0]); | ||
775 | free_percpu(dccp_statistics[1]); | ||
776 | dccp_statistics[0] = dccp_statistics[1] = NULL; | ||
777 | out_free_dccp_bhash: | ||
778 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | ||
779 | dccp_hashinfo.bhash = NULL; | ||
780 | out_free_dccp_ehash: | ||
781 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | ||
782 | dccp_hashinfo.ehash = NULL; | ||
783 | out_free_bind_bucket_cachep: | ||
784 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
785 | dccp_hashinfo.bind_bucket_cachep = NULL; | ||
786 | out_proto_unregister: | ||
787 | proto_unregister(&dccp_v4_prot); | ||
788 | goto out; | ||
789 | } | ||
790 | |||
791 | static const char dccp_del_proto_err_msg[] __exitdata = | ||
792 | KERN_ERR "can't remove dccp net_protocol\n"; | ||
793 | |||
794 | static void __exit dccp_fini(void) | ||
795 | { | ||
796 | dccp_ctl_sock_exit(); | ||
797 | |||
798 | inet_unregister_protosw(&dccp_v4_protosw); | ||
799 | |||
800 | if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) | ||
801 | printk(dccp_del_proto_err_msg); | ||
802 | |||
803 | /* Free the control endpoint. */ | ||
804 | sock_release(dccp_ctl_socket); | ||
805 | |||
806 | proto_unregister(&dccp_v4_prot); | ||
807 | |||
808 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
809 | } | ||
810 | |||
811 | module_init(dccp_init); | ||
812 | module_exit(dccp_fini); | ||
813 | |||
814 | /* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ | ||
815 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); | ||
816 | MODULE_LICENSE("GPL"); | ||
817 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); | ||
818 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); | ||
diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..8c396ee01aac --- /dev/null +++ b/net/dccp/timer.c | |||
@@ -0,0 +1,249 @@ | |||
1 | /* | ||
2 | * net/dccp/timer.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include "dccp.h" | ||
18 | |||
19 | static void dccp_write_timer(unsigned long data); | ||
20 | static void dccp_keepalive_timer(unsigned long data); | ||
21 | static void dccp_delack_timer(unsigned long data); | ||
22 | |||
23 | void dccp_init_xmit_timers(struct sock *sk) | ||
24 | { | ||
25 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | ||
26 | &dccp_keepalive_timer); | ||
27 | } | ||
28 | |||
29 | static void dccp_write_err(struct sock *sk) | ||
30 | { | ||
31 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | ||
32 | sk->sk_error_report(sk); | ||
33 | |||
34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
35 | dccp_done(sk); | ||
36 | DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); | ||
37 | } | ||
38 | |||
39 | /* A write timeout has occurred. Process the after effects. */ | ||
40 | static int dccp_write_timeout(struct sock *sk) | ||
41 | { | ||
42 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
43 | int retry_until; | ||
44 | |||
45 | if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { | ||
46 | if (icsk->icsk_retransmits != 0) | ||
47 | dst_negative_advice(&sk->sk_dst_cache); | ||
48 | retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; | ||
49 | } else { | ||
50 | if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { | ||
51 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black | ||
52 | hole detection. :-( | ||
53 | |||
54 | It is place to make it. It is not made. I do not want | ||
55 | to make it. It is disguisting. It does not work in any | ||
56 | case. Let me to cite the same draft, which requires for | ||
57 | us to implement this: | ||
58 | |||
59 | "The one security concern raised by this memo is that ICMP black holes | ||
60 | are often caused by over-zealous security administrators who block | ||
61 | all ICMP messages. It is vitally important that those who design and | ||
62 | deploy security systems understand the impact of strict filtering on | ||
63 | upper-layer protocols. The safest web site in the world is worthless | ||
64 | if most TCP implementations cannot transfer data from it. It would | ||
65 | be far nicer to have all of the black holes fixed rather than fixing | ||
66 | all of the TCP implementations." | ||
67 | |||
68 | Golden words :-). | ||
69 | */ | ||
70 | |||
71 | dst_negative_advice(&sk->sk_dst_cache); | ||
72 | } | ||
73 | |||
74 | retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; | ||
75 | /* | ||
76 | * FIXME: see tcp_write_timout and tcp_out_of_resources | ||
77 | */ | ||
78 | } | ||
79 | |||
80 | if (icsk->icsk_retransmits >= retry_until) { | ||
81 | /* Has it gone just too far? */ | ||
82 | dccp_write_err(sk); | ||
83 | return 1; | ||
84 | } | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ | ||
89 | static void dccp_delack_timer(unsigned long data) | ||
90 | { | ||
91 | struct sock *sk = (struct sock *)data; | ||
92 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
93 | |||
94 | bh_lock_sock(sk); | ||
95 | if (sock_owned_by_user(sk)) { | ||
96 | /* Try again later. */ | ||
97 | icsk->icsk_ack.blocked = 1; | ||
98 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); | ||
99 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); | ||
100 | goto out; | ||
101 | } | ||
102 | |||
103 | if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
104 | goto out; | ||
105 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { | ||
106 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
107 | goto out; | ||
108 | } | ||
109 | |||
110 | icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; | ||
111 | |||
112 | if (inet_csk_ack_scheduled(sk)) { | ||
113 | if (!icsk->icsk_ack.pingpong) { | ||
114 | /* Delayed ACK missed: inflate ATO. */ | ||
115 | icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); | ||
116 | } else { | ||
117 | /* Delayed ACK missed: leave pingpong mode and | ||
118 | * deflate ATO. | ||
119 | */ | ||
120 | icsk->icsk_ack.pingpong = 0; | ||
121 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
122 | } | ||
123 | dccp_send_ack(sk); | ||
124 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); | ||
125 | } | ||
126 | out: | ||
127 | bh_unlock_sock(sk); | ||
128 | sock_put(sk); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * The DCCP retransmit timer. | ||
133 | */ | ||
134 | static void dccp_retransmit_timer(struct sock *sk) | ||
135 | { | ||
136 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
137 | |||
138 | /* | ||
139 | * sk->sk_send_head has to have one skb with | ||
140 | * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP | ||
141 | * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake | ||
142 | * (PARTOPEN timer), etc). | ||
143 | */ | ||
144 | BUG_TRAP(sk->sk_send_head != NULL); | ||
145 | |||
146 | /* | ||
147 | * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was | ||
148 | * sent, no need to retransmit, this sock is dead. | ||
149 | */ | ||
150 | if (dccp_write_timeout(sk)) | ||
151 | goto out; | ||
152 | |||
153 | /* | ||
154 | * We want to know the number of packets retransmitted, not the | ||
155 | * total number of retransmissions of clones of original packets. | ||
156 | */ | ||
157 | if (icsk->icsk_retransmits == 0) | ||
158 | DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); | ||
159 | |||
160 | if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { | ||
161 | /* | ||
162 | * Retransmission failed because of local congestion, | ||
163 | * do not backoff. | ||
164 | */ | ||
165 | if (icsk->icsk_retransmits == 0) | ||
166 | icsk->icsk_retransmits = 1; | ||
167 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
168 | min(icsk->icsk_rto, | ||
169 | TCP_RESOURCE_PROBE_INTERVAL), | ||
170 | TCP_RTO_MAX); | ||
171 | goto out; | ||
172 | } | ||
173 | |||
174 | icsk->icsk_backoff++; | ||
175 | icsk->icsk_retransmits++; | ||
176 | |||
177 | icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); | ||
178 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | ||
179 | if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) | ||
180 | __sk_dst_reset(sk); | ||
181 | out:; | ||
182 | } | ||
183 | |||
184 | static void dccp_write_timer(unsigned long data) | ||
185 | { | ||
186 | struct sock *sk = (struct sock *)data; | ||
187 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
188 | int event = 0; | ||
189 | |||
190 | bh_lock_sock(sk); | ||
191 | if (sock_owned_by_user(sk)) { | ||
192 | /* Try again later */ | ||
193 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); | ||
194 | goto out; | ||
195 | } | ||
196 | |||
197 | if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) | ||
198 | goto out; | ||
199 | |||
200 | if (time_after(icsk->icsk_timeout, jiffies)) { | ||
201 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
202 | goto out; | ||
203 | } | ||
204 | |||
205 | event = icsk->icsk_pending; | ||
206 | icsk->icsk_pending = 0; | ||
207 | |||
208 | switch (event) { | ||
209 | case ICSK_TIME_RETRANS: | ||
210 | dccp_retransmit_timer(sk); | ||
211 | break; | ||
212 | } | ||
213 | out: | ||
214 | bh_unlock_sock(sk); | ||
215 | sock_put(sk); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Timer for listening sockets | ||
220 | */ | ||
221 | static void dccp_response_timer(struct sock *sk) | ||
222 | { | ||
223 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
224 | const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; | ||
225 | |||
226 | reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, | ||
227 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); | ||
228 | } | ||
229 | |||
230 | static void dccp_keepalive_timer(unsigned long data) | ||
231 | { | ||
232 | struct sock *sk = (struct sock *)data; | ||
233 | |||
234 | /* Only process if socket is not in use. */ | ||
235 | bh_lock_sock(sk); | ||
236 | if (sock_owned_by_user(sk)) { | ||
237 | /* Try again later. */ | ||
238 | inet_csk_reset_keepalive_timer(sk, HZ / 20); | ||
239 | goto out; | ||
240 | } | ||
241 | |||
242 | if (sk->sk_state == DCCP_LISTEN) { | ||
243 | dccp_response_timer(sk); | ||
244 | goto out; | ||
245 | } | ||
246 | out: | ||
247 | bh_unlock_sock(sk); | ||
248 | sock_put(sk); | ||
249 | } | ||