diff options
| -rw-r--r-- | include/linux/dccp.h | 432 | ||||
| -rw-r--r-- | include/linux/in.h | 1 | ||||
| -rw-r--r-- | include/linux/net.h | 1 | ||||
| -rw-r--r-- | include/linux/socket.h | 1 | ||||
| -rw-r--r-- | net/Kconfig | 1 | ||||
| -rw-r--r-- | net/Makefile | 1 | ||||
| -rw-r--r-- | net/dccp/Kconfig | 24 | ||||
| -rw-r--r-- | net/dccp/Makefile | 5 | ||||
| -rw-r--r-- | net/dccp/ccid.c | 139 | ||||
| -rw-r--r-- | net/dccp/ccid.h | 156 | ||||
| -rw-r--r-- | net/dccp/ccids/Kconfig | 25 | ||||
| -rw-r--r-- | net/dccp/ccids/Makefile | 3 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid3.c | 2164 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid3.h | 137 | ||||
| -rw-r--r-- | net/dccp/dccp.h | 422 | ||||
| -rw-r--r-- | net/dccp/input.c | 510 | ||||
| -rw-r--r-- | net/dccp/ipv4.c | 1289 | ||||
| -rw-r--r-- | net/dccp/minisocks.c | 199 | ||||
| -rw-r--r-- | net/dccp/options.c | 763 | ||||
| -rw-r--r-- | net/dccp/output.c | 406 | ||||
| -rw-r--r-- | net/dccp/proto.c | 818 | ||||
| -rw-r--r-- | net/dccp/timer.c | 249 |
22 files changed, 7746 insertions, 0 deletions
diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 000000000000..e3b4bf7346bb --- /dev/null +++ b/include/linux/dccp.h | |||
| @@ -0,0 +1,432 @@ | |||
| 1 | #ifndef _LINUX_DCCP_H | ||
| 2 | #define _LINUX_DCCP_H | ||
| 3 | |||
| 4 | #include <linux/in.h> | ||
| 5 | #include <linux/list.h> | ||
| 6 | #include <linux/types.h> | ||
| 7 | #include <linux/uio.h> | ||
| 8 | #include <linux/workqueue.h> | ||
| 9 | |||
| 10 | #include <net/inet_connection_sock.h> | ||
| 11 | #include <net/sock.h> | ||
| 12 | #include <net/tcp_states.h> | ||
| 13 | #include <net/tcp.h> | ||
| 14 | |||
| 15 | /* FIXME: this is utterly wrong */ | ||
| 16 | struct sockaddr_dccp { | ||
| 17 | struct sockaddr_in in; | ||
| 18 | unsigned int service; | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum dccp_state { | ||
| 22 | DCCP_OPEN = TCP_ESTABLISHED, | ||
| 23 | DCCP_REQUESTING = TCP_SYN_SENT, | ||
| 24 | DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: | ||
| 25 | This mapping is horrible, but TCP has | ||
| 26 | no matching state for DCCP_PARTOPEN, | ||
| 27 | as TCP_SYN_RECV is already used by | ||
| 28 | DCCP_RESPOND, why don't stop using TCP | ||
| 29 | mapping of states? OK, now we don't use | ||
| 30 | sk_stream_sendmsg anymore, so doesn't | ||
| 31 | seem to exist any reason for us to | ||
| 32 | do the TCP mapping here */ | ||
| 33 | DCCP_LISTEN = TCP_LISTEN, | ||
| 34 | DCCP_RESPOND = TCP_SYN_RECV, | ||
| 35 | DCCP_CLOSING = TCP_CLOSING, | ||
| 36 | DCCP_TIME_WAIT = TCP_TIME_WAIT, | ||
| 37 | DCCP_CLOSED = TCP_CLOSE, | ||
| 38 | DCCP_MAX_STATES = TCP_MAX_STATES, | ||
| 39 | }; | ||
| 40 | |||
| 41 | #define DCCP_STATE_MASK 0xf | ||
| 42 | #define DCCP_ACTION_FIN (1<<7) | ||
| 43 | |||
| 44 | enum { | ||
| 45 | DCCPF_OPEN = TCPF_ESTABLISHED, | ||
| 46 | DCCPF_REQUESTING = TCPF_SYN_SENT, | ||
| 47 | DCCPF_PARTOPEN = TCPF_FIN_WAIT1, | ||
| 48 | DCCPF_LISTEN = TCPF_LISTEN, | ||
| 49 | DCCPF_RESPOND = TCPF_SYN_RECV, | ||
| 50 | DCCPF_CLOSING = TCPF_CLOSING, | ||
| 51 | DCCPF_TIME_WAIT = TCPF_TIME_WAIT, | ||
| 52 | DCCPF_CLOSED = TCPF_CLOSE, | ||
| 53 | }; | ||
| 54 | |||
| 55 | /** | ||
| 56 | * struct dccp_hdr - generic part of DCCP packet header | ||
| 57 | * | ||
| 58 | * @dccph_sport - Relevant port on the endpoint that sent this packet | ||
| 59 | * @dccph_dport - Relevant port on the other endpoint | ||
| 60 | * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words | ||
| 61 | * @dccph_ccval - Used by the HC-Sender CCID | ||
| 62 | * @dccph_cscov - Parts of the packet that are covered by the Checksum field | ||
| 63 | * @dccph_checksum - Internet checksum, depends on dccph_cscov | ||
| 64 | * @dccph_x - 0 = 24 bit sequence number, 1 = 48 | ||
| 65 | * @dccph_type - packet type, see DCCP_PKT_ prefixed macros | ||
| 66 | * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x | ||
| 67 | */ | ||
| 68 | struct dccp_hdr { | ||
| 69 | __u16 dccph_sport, | ||
| 70 | dccph_dport; | ||
| 71 | __u8 dccph_doff; | ||
| 72 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 73 | __u8 dccph_cscov:4, | ||
| 74 | dccph_ccval:4; | ||
| 75 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 76 | __u8 dccph_ccval:4, | ||
| 77 | dccph_cscov:4; | ||
| 78 | #else | ||
| 79 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 80 | #endif | ||
| 81 | __u16 dccph_checksum; | ||
| 82 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 83 | __u32 dccph_x:1, | ||
| 84 | dccph_type:4, | ||
| 85 | dccph_reserved:3, | ||
| 86 | dccph_seq:24; | ||
| 87 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 88 | __u32 dccph_reserved:3, | ||
| 89 | dccph_type:4, | ||
| 90 | dccph_x:1, | ||
| 91 | dccph_seq:24; | ||
| 92 | #else | ||
| 93 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 94 | #endif | ||
| 95 | }; | ||
| 96 | |||
| 97 | static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) | ||
| 98 | { | ||
| 99 | return (struct dccp_hdr *)skb->h.raw; | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * struct dccp_hdr_ext - the low bits of a 48 bit seq packet | ||
| 104 | * | ||
| 105 | * @dccph_seq_low - low 24 bits of a 48 bit seq packet | ||
| 106 | */ | ||
| 107 | struct dccp_hdr_ext { | ||
| 108 | __u32 dccph_seq_low; | ||
| 109 | }; | ||
| 110 | |||
| 111 | static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) | ||
| 112 | { | ||
| 113 | return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) | ||
| 117 | { | ||
| 118 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 119 | return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); | ||
| 120 | } | ||
| 121 | |||
| 122 | static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) | ||
| 123 | { | ||
| 124 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 125 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 126 | __u64 seq_nr = ntohl(dh->dccph_seq << 8); | ||
| 127 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 128 | __u64 seq_nr = ntohl(dh->dccph_seq); | ||
| 129 | #else | ||
| 130 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 131 | #endif | ||
| 132 | |||
| 133 | if (dh->dccph_x != 0) | ||
| 134 | seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); | ||
| 135 | |||
| 136 | return seq_nr; | ||
| 137 | } | ||
| 138 | |||
| 139 | /** | ||
| 140 | * struct dccp_hdr_request - Conection initiation request header | ||
| 141 | * | ||
| 142 | * @dccph_req_service - Service to which the client app wants to connect | ||
| 143 | * @dccph_req_options - list of options (must be a multiple of 32 bits | ||
| 144 | */ | ||
| 145 | struct dccp_hdr_request { | ||
| 146 | __u32 dccph_req_service; | ||
| 147 | }; | ||
| 148 | |||
| 149 | static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) | ||
| 150 | { | ||
| 151 | return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
| 152 | } | ||
| 153 | |||
| 154 | /** | ||
| 155 | * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets | ||
| 156 | * | ||
| 157 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
| 158 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
| 159 | */ | ||
| 160 | struct dccp_hdr_ack_bits { | ||
| 161 | __u32 dccph_reserved1:8, | ||
| 162 | dccph_ack_nr_high:24; | ||
| 163 | __u32 dccph_ack_nr_low; | ||
| 164 | }; | ||
| 165 | |||
| 166 | static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) | ||
| 167 | { | ||
| 168 | return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
| 169 | } | ||
| 170 | |||
| 171 | static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) | ||
| 172 | { | ||
| 173 | const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); | ||
| 174 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 175 | return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
| 176 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 177 | return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
| 178 | #else | ||
| 179 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 180 | #endif | ||
| 181 | } | ||
| 182 | |||
| 183 | /** | ||
| 184 | * struct dccp_hdr_response - Conection initiation response header | ||
| 185 | * | ||
| 186 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
| 187 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
| 188 | * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request | ||
| 189 | * @dccph_resp_options - list of options (must be a multiple of 32 bits | ||
| 190 | */ | ||
| 191 | struct dccp_hdr_response { | ||
| 192 | struct dccp_hdr_ack_bits dccph_resp_ack; | ||
| 193 | __u32 dccph_resp_service; | ||
| 194 | }; | ||
| 195 | |||
| 196 | static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) | ||
| 197 | { | ||
| 198 | return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * struct dccp_hdr_reset - Unconditionally shut down a connection | ||
| 203 | * | ||
| 204 | * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request | ||
| 205 | * @dccph_reset_options - list of options (must be a multiple of 32 bits | ||
| 206 | */ | ||
| 207 | struct dccp_hdr_reset { | ||
| 208 | struct dccp_hdr_ack_bits dccph_reset_ack; | ||
| 209 | __u8 dccph_reset_code, | ||
| 210 | dccph_reset_data[3]; | ||
| 211 | }; | ||
| 212 | |||
| 213 | static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) | ||
| 214 | { | ||
| 215 | return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
| 216 | } | ||
| 217 | |||
| 218 | enum dccp_pkt_type { | ||
| 219 | DCCP_PKT_REQUEST = 0, | ||
| 220 | DCCP_PKT_RESPONSE, | ||
| 221 | DCCP_PKT_DATA, | ||
| 222 | DCCP_PKT_ACK, | ||
| 223 | DCCP_PKT_DATAACK, | ||
| 224 | DCCP_PKT_CLOSEREQ, | ||
| 225 | DCCP_PKT_CLOSE, | ||
| 226 | DCCP_PKT_RESET, | ||
| 227 | DCCP_PKT_SYNC, | ||
| 228 | DCCP_PKT_SYNCACK, | ||
| 229 | DCCP_PKT_INVALID, | ||
| 230 | }; | ||
| 231 | |||
| 232 | #define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID | ||
| 233 | |||
| 234 | static inline unsigned int dccp_packet_hdr_len(const __u8 type) | ||
| 235 | { | ||
| 236 | if (type == DCCP_PKT_DATA) | ||
| 237 | return 0; | ||
| 238 | if (type == DCCP_PKT_DATAACK || | ||
| 239 | type == DCCP_PKT_ACK || | ||
| 240 | type == DCCP_PKT_SYNC || | ||
| 241 | type == DCCP_PKT_SYNCACK || | ||
| 242 | type == DCCP_PKT_CLOSE || | ||
| 243 | type == DCCP_PKT_CLOSEREQ) | ||
| 244 | return sizeof(struct dccp_hdr_ack_bits); | ||
| 245 | if (type == DCCP_PKT_REQUEST) | ||
| 246 | return sizeof(struct dccp_hdr_request); | ||
| 247 | if (type == DCCP_PKT_RESPONSE) | ||
| 248 | return sizeof(struct dccp_hdr_response); | ||
| 249 | return sizeof(struct dccp_hdr_reset); | ||
| 250 | } | ||
| 251 | |||
| 252 | static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) | ||
| 253 | { | ||
| 254 | return dccp_basic_hdr_len(skb) + | ||
| 255 | dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); | ||
| 256 | } | ||
| 257 | |||
| 258 | enum dccp_reset_codes { | ||
| 259 | DCCP_RESET_CODE_UNSPECIFIED = 0, | ||
| 260 | DCCP_RESET_CODE_CLOSED, | ||
| 261 | DCCP_RESET_CODE_ABORTED, | ||
| 262 | DCCP_RESET_CODE_NO_CONNECTION, | ||
| 263 | DCCP_RESET_CODE_PACKET_ERROR, | ||
| 264 | DCCP_RESET_CODE_OPTION_ERROR, | ||
| 265 | DCCP_RESET_CODE_MANDATORY_ERROR, | ||
| 266 | DCCP_RESET_CODE_CONNECTION_REFUSED, | ||
| 267 | DCCP_RESET_CODE_BAD_SERVICE_CODE, | ||
| 268 | DCCP_RESET_CODE_TOO_BUSY, | ||
| 269 | DCCP_RESET_CODE_BAD_INIT_COOKIE, | ||
| 270 | DCCP_RESET_CODE_AGGRESSION_PENALTY, | ||
| 271 | }; | ||
| 272 | |||
| 273 | /* DCCP options */ | ||
| 274 | enum { | ||
| 275 | DCCPO_PADDING = 0, | ||
| 276 | DCCPO_MANDATORY = 1, | ||
| 277 | DCCPO_MIN_RESERVED = 3, | ||
| 278 | DCCPO_MAX_RESERVED = 31, | ||
| 279 | DCCPO_NDP_COUNT = 37, | ||
| 280 | DCCPO_ACK_VECTOR_0 = 38, | ||
| 281 | DCCPO_ACK_VECTOR_1 = 39, | ||
| 282 | DCCPO_TIMESTAMP = 41, | ||
| 283 | DCCPO_TIMESTAMP_ECHO = 42, | ||
| 284 | DCCPO_ELAPSED_TIME = 43, | ||
| 285 | DCCPO_MAX = 45, | ||
| 286 | DCCPO_MIN_CCID_SPECIFIC = 128, | ||
| 287 | DCCPO_MAX_CCID_SPECIFIC = 255, | ||
| 288 | }; | ||
| 289 | |||
| 290 | /* DCCP features */ | ||
| 291 | enum { | ||
| 292 | DCCPF_RESERVED = 0, | ||
| 293 | DCCPF_SEQUENCE_WINDOW = 3, | ||
| 294 | DCCPF_SEND_ACK_VECTOR = 6, | ||
| 295 | DCCPF_SEND_NDP_COUNT = 7, | ||
| 296 | /* 10-127 reserved */ | ||
| 297 | DCCPF_MIN_CCID_SPECIFIC = 128, | ||
| 298 | DCCPF_MAX_CCID_SPECIFIC = 255, | ||
| 299 | }; | ||
| 300 | |||
| 301 | /* initial values for each feature */ | ||
| 302 | #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 | ||
| 303 | /* FIXME: for now we're using CCID 3 (TFRC) */ | ||
| 304 | #define DCCPF_INITIAL_CCID 3 | ||
| 305 | #define DCCPF_INITIAL_SEND_ACK_VECTOR 0 | ||
| 306 | /* FIXME: for now we're default to 1 but it should really be 0 */ | ||
| 307 | #define DCCPF_INITIAL_SEND_NDP_COUNT 1 | ||
| 308 | |||
| 309 | #define DCCP_NDP_LIMIT 0xFFFFFF | ||
| 310 | |||
| 311 | /** | ||
| 312 | * struct dccp_options - option values for a DCCP connection | ||
| 313 | * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) | ||
| 314 | * @dccpo_ccid - Congestion Control Id (CCID) (section 10) | ||
| 315 | * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) | ||
| 316 | * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) | ||
| 317 | */ | ||
| 318 | struct dccp_options { | ||
| 319 | __u64 dccpo_sequence_window; | ||
| 320 | __u8 dccpo_ccid; | ||
| 321 | __u8 dccpo_send_ack_vector; | ||
| 322 | __u8 dccpo_send_ndp_count; | ||
| 323 | }; | ||
| 324 | |||
| 325 | extern void __dccp_options_init(struct dccp_options *dccpo); | ||
| 326 | extern void dccp_options_init(struct dccp_options *dccpo); | ||
| 327 | extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); | ||
| 328 | |||
| 329 | struct dccp_request_sock { | ||
| 330 | struct inet_request_sock dreq_inet_rsk; | ||
| 331 | __u64 dreq_iss; | ||
| 332 | __u64 dreq_isr; | ||
| 333 | __u32 dreq_service; | ||
| 334 | }; | ||
| 335 | |||
| 336 | static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) | ||
| 337 | { | ||
| 338 | return (struct dccp_request_sock *)req; | ||
| 339 | } | ||
| 340 | |||
| 341 | /* Read about the ECN nonce to see why it is 253 */ | ||
| 342 | #define DCCP_MAX_ACK_VECTOR_LEN 253 | ||
| 343 | |||
| 344 | struct dccp_options_received { | ||
| 345 | u32 dccpor_ndp:24, | ||
| 346 | dccpor_ack_vector_len:8; | ||
| 347 | u32 dccpor_ack_vector_idx:10; | ||
| 348 | /* 22 bits hole, try to pack */ | ||
| 349 | u32 dccpor_timestamp; | ||
| 350 | u32 dccpor_timestamp_echo; | ||
| 351 | u32 dccpor_elapsed_time; | ||
| 352 | }; | ||
| 353 | |||
| 354 | struct ccid; | ||
| 355 | |||
| 356 | enum dccp_role { | ||
| 357 | DCCP_ROLE_UNDEFINED, | ||
| 358 | DCCP_ROLE_LISTEN, | ||
| 359 | DCCP_ROLE_CLIENT, | ||
| 360 | DCCP_ROLE_SERVER, | ||
| 361 | }; | ||
| 362 | |||
| 363 | /** | ||
| 364 | * struct dccp_sock - DCCP socket state | ||
| 365 | * | ||
| 366 | * @dccps_swl - sequence number window low | ||
| 367 | * @dccps_swh - sequence number window high | ||
| 368 | * @dccps_awl - acknowledgement number window low | ||
| 369 | * @dccps_awh - acknowledgement number window high | ||
| 370 | * @dccps_iss - initial sequence number sent | ||
| 371 | * @dccps_isr - initial sequence number received | ||
| 372 | * @dccps_osr - first OPEN sequence number received | ||
| 373 | * @dccps_gss - greatest sequence number sent | ||
| 374 | * @dccps_gsr - greatest valid sequence number received | ||
| 375 | * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss | ||
| 376 | * @dccps_timestamp_time - time of latest TIMESTAMP option | ||
| 377 | * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option | ||
| 378 | * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) | ||
| 379 | * @dccps_pmtu_cookie - Last pmtu seen by socket | ||
| 380 | * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it | ||
| 381 | * @dccps_role - Role of this sock, one of %dccp_role | ||
| 382 | * @dccps_ndp_count - number of Non Data Packets since last data packet | ||
| 383 | * @dccps_hc_rx_ackpkts - receiver half connection acked packets | ||
| 384 | */ | ||
| 385 | struct dccp_sock { | ||
| 386 | /* inet_connection_sock has to be the first member of dccp_sock */ | ||
| 387 | struct inet_connection_sock dccps_inet_connection; | ||
| 388 | __u64 dccps_swl; | ||
| 389 | __u64 dccps_swh; | ||
| 390 | __u64 dccps_awl; | ||
| 391 | __u64 dccps_awh; | ||
| 392 | __u64 dccps_iss; | ||
| 393 | __u64 dccps_isr; | ||
| 394 | __u64 dccps_osr; | ||
| 395 | __u64 dccps_gss; | ||
| 396 | __u64 dccps_gsr; | ||
| 397 | __u64 dccps_gar; | ||
| 398 | unsigned long dccps_service; | ||
| 399 | unsigned long dccps_timestamp_time; | ||
| 400 | __u32 dccps_timestamp_echo; | ||
| 401 | __u32 dccps_avg_packet_size; | ||
| 402 | unsigned long dccps_ndp_count; | ||
| 403 | __u16 dccps_ext_header_len; | ||
| 404 | __u32 dccps_pmtu_cookie; | ||
| 405 | __u32 dccps_mss_cache; | ||
| 406 | struct dccp_options dccps_options; | ||
| 407 | struct dccp_ackpkts *dccps_hc_rx_ackpkts; | ||
| 408 | void *dccps_hc_rx_ccid_private; | ||
| 409 | void *dccps_hc_tx_ccid_private; | ||
| 410 | struct ccid *dccps_hc_rx_ccid; | ||
| 411 | struct ccid *dccps_hc_tx_ccid; | ||
| 412 | struct dccp_options_received dccps_options_received; | ||
| 413 | enum dccp_role dccps_role:2; | ||
| 414 | }; | ||
| 415 | |||
| 416 | static inline struct dccp_sock *dccp_sk(const struct sock *sk) | ||
| 417 | { | ||
| 418 | return (struct dccp_sock *)sk; | ||
| 419 | } | ||
| 420 | |||
| 421 | static inline const char *dccp_role(const struct sock *sk) | ||
| 422 | { | ||
| 423 | switch (dccp_sk(sk)->dccps_role) { | ||
| 424 | case DCCP_ROLE_UNDEFINED: return "undefined"; | ||
| 425 | case DCCP_ROLE_LISTEN: return "listen"; | ||
| 426 | case DCCP_ROLE_SERVER: return "server"; | ||
| 427 | case DCCP_ROLE_CLIENT: return "client"; | ||
| 428 | } | ||
| 429 | return NULL; | ||
| 430 | } | ||
| 431 | |||
| 432 | #endif /* _LINUX_DCCP_H */ | ||
diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748d..ba355384016a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h | |||
| @@ -32,6 +32,7 @@ enum { | |||
| 32 | IPPROTO_PUP = 12, /* PUP protocol */ | 32 | IPPROTO_PUP = 12, /* PUP protocol */ |
| 33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ | 33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ |
| 34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ | 34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ |
| 35 | IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ | ||
| 35 | IPPROTO_RSVP = 46, /* RSVP protocol */ | 36 | IPPROTO_RSVP = 46, /* RSVP protocol */ |
| 36 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ | 37 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ |
| 37 | 38 | ||
diff --git a/include/linux/net.h b/include/linux/net.h index 39906619b9d7..5f8b632ff653 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
| @@ -84,6 +84,7 @@ enum sock_type { | |||
| 84 | SOCK_RAW = 3, | 84 | SOCK_RAW = 3, |
| 85 | SOCK_RDM = 4, | 85 | SOCK_RDM = 4, |
| 86 | SOCK_SEQPACKET = 5, | 86 | SOCK_SEQPACKET = 5, |
| 87 | SOCK_DCCP = 6, | ||
| 87 | SOCK_PACKET = 10, | 88 | SOCK_PACKET = 10, |
| 88 | }; | 89 | }; |
| 89 | 90 | ||
diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2e..ddf22559f484 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
| @@ -271,6 +271,7 @@ struct ucred { | |||
| 271 | #define SOL_IRDA 266 | 271 | #define SOL_IRDA 266 |
| 272 | #define SOL_NETBEUI 267 | 272 | #define SOL_NETBEUI 267 |
| 273 | #define SOL_LLC 268 | 273 | #define SOL_LLC 268 |
| 274 | #define SOL_DCCP 269 | ||
| 274 | 275 | ||
| 275 | /* IPX options */ | 276 | /* IPX options */ |
| 276 | #define IPX_TYPE 1 | 277 | #define IPX_TYPE 1 |
diff --git a/net/Kconfig b/net/Kconfig index 02877ac0f2f4..c07aafb59a0f 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
| @@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" | |||
| 147 | 147 | ||
| 148 | endif | 148 | endif |
| 149 | 149 | ||
| 150 | source "net/dccp/Kconfig" | ||
| 150 | source "net/sctp/Kconfig" | 151 | source "net/sctp/Kconfig" |
| 151 | source "net/atm/Kconfig" | 152 | source "net/atm/Kconfig" |
| 152 | source "net/bridge/Kconfig" | 153 | source "net/bridge/Kconfig" |
diff --git a/net/Makefile b/net/Makefile index 4a01be8d3e1e..7e6eff206c81 100644 --- a/net/Makefile +++ b/net/Makefile | |||
| @@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ | |||
| 42 | obj-$(CONFIG_DECNET) += decnet/ | 42 | obj-$(CONFIG_DECNET) += decnet/ |
| 43 | obj-$(CONFIG_ECONET) += econet/ | 43 | obj-$(CONFIG_ECONET) += econet/ |
| 44 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ | 44 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ |
| 45 | obj-$(CONFIG_IP_DCCP) += dccp/ | ||
| 45 | obj-$(CONFIG_IP_SCTP) += sctp/ | 46 | obj-$(CONFIG_IP_SCTP) += sctp/ |
| 46 | 47 | ||
| 47 | ifeq ($(CONFIG_NET),y) | 48 | ifeq ($(CONFIG_NET),y) |
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..90460bc629b3 --- /dev/null +++ b/net/dccp/Kconfig | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | menu "DCCP Configuration (EXPERIMENTAL)" | ||
| 2 | depends on INET && EXPERIMENTAL | ||
| 3 | |||
| 4 | config IP_DCCP | ||
| 5 | tristate "The DCCP Protocol (EXPERIMENTAL)" | ||
| 6 | ---help--- | ||
| 7 | Datagram Congestion Control Protocol | ||
| 8 | |||
| 9 | From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. | ||
| 10 | |||
| 11 | The Datagram Congestion Control Protocol (DCCP) is a transport | ||
| 12 | protocol that implements bidirectional, unicast connections of | ||
| 13 | congestion-controlled, unreliable datagrams. It should be suitable | ||
| 14 | for use by applications such as streaming media, Internet telephony, | ||
| 15 | and on-line games | ||
| 16 | |||
| 17 | To compile this protocol support as a module, choose M here: the | ||
| 18 | module will be called dccp. | ||
| 19 | |||
| 20 | If in doubt, say N. | ||
| 21 | |||
| 22 | source "net/dccp/ccids/Kconfig" | ||
| 23 | |||
| 24 | endmenu | ||
diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..c6e6ba55c36b --- /dev/null +++ b/net/dccp/Makefile | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | obj-$(CONFIG_IP_DCCP) += dccp.o | ||
| 2 | |||
| 3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o | ||
| 4 | |||
| 5 | obj-y += ccids/ | ||
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/ccid.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * CCID infrastructure | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify it | ||
| 10 | * under the terms of the GNU General Public License version 2 as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include "ccid.h" | ||
| 15 | |||
| 16 | static struct ccid *ccids[CCID_MAX]; | ||
| 17 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
| 18 | static atomic_t ccids_lockct = ATOMIC_INIT(0); | ||
| 19 | static DEFINE_SPINLOCK(ccids_lock); | ||
| 20 | |||
| 21 | /* | ||
| 22 | * The strategy is: modifications ccids vector are short, do not sleep and | ||
| 23 | * veeery rare, but read access should be free of any exclusive locks. | ||
| 24 | */ | ||
| 25 | static void ccids_write_lock(void) | ||
| 26 | { | ||
| 27 | spin_lock(&ccids_lock); | ||
| 28 | while (atomic_read(&ccids_lockct) != 0) { | ||
| 29 | spin_unlock(&ccids_lock); | ||
| 30 | yield(); | ||
| 31 | spin_lock(&ccids_lock); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline void ccids_write_unlock(void) | ||
| 36 | { | ||
| 37 | spin_unlock(&ccids_lock); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline void ccids_read_lock(void) | ||
| 41 | { | ||
| 42 | atomic_inc(&ccids_lockct); | ||
| 43 | spin_unlock_wait(&ccids_lock); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void ccids_read_unlock(void) | ||
| 47 | { | ||
| 48 | atomic_dec(&ccids_lockct); | ||
| 49 | } | ||
| 50 | |||
| 51 | #else | ||
| 52 | #define ccids_write_lock() do { } while(0) | ||
| 53 | #define ccids_write_unlock() do { } while(0) | ||
| 54 | #define ccids_read_lock() do { } while(0) | ||
| 55 | #define ccids_read_unlock() do { } while(0) | ||
| 56 | #endif | ||
| 57 | |||
| 58 | int ccid_register(struct ccid *ccid) | ||
| 59 | { | ||
| 60 | int err; | ||
| 61 | |||
| 62 | if (ccid->ccid_init == NULL) | ||
| 63 | return -1; | ||
| 64 | |||
| 65 | ccids_write_lock(); | ||
| 66 | err = -EEXIST; | ||
| 67 | if (ccids[ccid->ccid_id] == NULL) { | ||
| 68 | ccids[ccid->ccid_id] = ccid; | ||
| 69 | err = 0; | ||
| 70 | } | ||
| 71 | ccids_write_unlock(); | ||
| 72 | if (err == 0) | ||
| 73 | pr_info("CCID: Registered CCID %d (%s)\n", | ||
| 74 | ccid->ccid_id, ccid->ccid_name); | ||
| 75 | return err; | ||
| 76 | } | ||
| 77 | |||
| 78 | EXPORT_SYMBOL_GPL(ccid_register); | ||
| 79 | |||
| 80 | int ccid_unregister(struct ccid *ccid) | ||
| 81 | { | ||
| 82 | ccids_write_lock(); | ||
| 83 | ccids[ccid->ccid_id] = NULL; | ||
| 84 | ccids_write_unlock(); | ||
| 85 | pr_info("CCID: Unregistered CCID %d (%s)\n", | ||
| 86 | ccid->ccid_id, ccid->ccid_name); | ||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | EXPORT_SYMBOL_GPL(ccid_unregister); | ||
| 91 | |||
| 92 | struct ccid *ccid_init(unsigned char id, struct sock *sk) | ||
| 93 | { | ||
| 94 | struct ccid *ccid; | ||
| 95 | |||
| 96 | #ifdef CONFIG_KMOD | ||
| 97 | if (ccids[id] == NULL) | ||
| 98 | request_module("net-dccp-ccid-%d", id); | ||
| 99 | #endif | ||
| 100 | ccids_read_lock(); | ||
| 101 | |||
| 102 | ccid = ccids[id]; | ||
| 103 | if (ccid == NULL) | ||
| 104 | goto out; | ||
| 105 | |||
| 106 | if (!try_module_get(ccid->ccid_owner)) | ||
| 107 | goto out_err; | ||
| 108 | |||
| 109 | if (ccid->ccid_init(sk) != 0) | ||
| 110 | goto out_module_put; | ||
| 111 | out: | ||
| 112 | ccids_read_unlock(); | ||
| 113 | return ccid; | ||
| 114 | out_module_put: | ||
| 115 | module_put(ccid->ccid_owner); | ||
| 116 | out_err: | ||
| 117 | ccid = NULL; | ||
| 118 | goto out; | ||
| 119 | } | ||
| 120 | |||
| 121 | EXPORT_SYMBOL_GPL(ccid_init); | ||
| 122 | |||
| 123 | void ccid_exit(struct ccid *ccid, struct sock *sk) | ||
| 124 | { | ||
| 125 | if (ccid == NULL) | ||
| 126 | return; | ||
| 127 | |||
| 128 | ccids_read_lock(); | ||
| 129 | |||
| 130 | if (ccids[ccid->ccid_id] != NULL) { | ||
| 131 | if (ccid->ccid_exit != NULL) | ||
| 132 | ccid->ccid_exit(sk); | ||
| 133 | module_put(ccid->ccid_owner); | ||
| 134 | } | ||
| 135 | |||
| 136 | ccids_read_unlock(); | ||
| 137 | } | ||
| 138 | |||
| 139 | EXPORT_SYMBOL_GPL(ccid_exit); | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..06105b2a613c --- /dev/null +++ b/net/dccp/ccid.h | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | #ifndef _CCID_H | ||
| 2 | #define _CCID_H | ||
| 3 | /* | ||
| 4 | * net/dccp/ccid.h | ||
| 5 | * | ||
| 6 | * An implementation of the DCCP protocol | ||
| 7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 8 | * | ||
| 9 | * CCID infrastructure | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify it | ||
| 12 | * under the terms of the GNU General Public License version 2 as | ||
| 13 | * published by the Free Software Foundation. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <net/sock.h> | ||
| 17 | #include <linux/dccp.h> | ||
| 18 | #include <linux/list.h> | ||
| 19 | #include <linux/module.h> | ||
| 20 | |||
| 21 | #define CCID_MAX 255 | ||
| 22 | |||
| 23 | struct ccid { | ||
| 24 | unsigned char ccid_id; | ||
| 25 | const char *ccid_name; | ||
| 26 | struct module *ccid_owner; | ||
| 27 | int (*ccid_init)(struct sock *sk); | ||
| 28 | void (*ccid_exit)(struct sock *sk); | ||
| 29 | int (*ccid_hc_rx_init)(struct sock *sk); | ||
| 30 | int (*ccid_hc_tx_init)(struct sock *sk); | ||
| 31 | void (*ccid_hc_rx_exit)(struct sock *sk); | ||
| 32 | void (*ccid_hc_tx_exit)(struct sock *sk); | ||
| 33 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); | ||
| 34 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | ||
| 35 | unsigned char option, | ||
| 36 | unsigned char len, u16 idx, | ||
| 37 | unsigned char* value); | ||
| 38 | void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); | ||
| 39 | void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); | ||
| 40 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); | ||
| 41 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | ||
| 42 | unsigned char option, | ||
| 43 | unsigned char len, u16 idx, | ||
| 44 | unsigned char* value); | ||
| 45 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | ||
| 46 | struct sk_buff *skb, int len, | ||
| 47 | long *delay); | ||
| 48 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); | ||
| 49 | }; | ||
| 50 | |||
| 51 | extern int ccid_register(struct ccid *ccid); | ||
| 52 | extern int ccid_unregister(struct ccid *ccid); | ||
| 53 | |||
| 54 | extern struct ccid *ccid_init(unsigned char id, struct sock *sk); | ||
| 55 | extern void ccid_exit(struct ccid *ccid, struct sock *sk); | ||
| 56 | |||
| 57 | static inline void __ccid_get(struct ccid *ccid) | ||
| 58 | { | ||
| 59 | __module_get(ccid->ccid_owner); | ||
| 60 | } | ||
| 61 | |||
| 62 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | ||
| 63 | struct sk_buff *skb, int len, | ||
| 64 | long *delay) | ||
| 65 | { | ||
| 66 | int rc = 0; | ||
| 67 | if (ccid->ccid_hc_tx_send_packet != NULL) | ||
| 68 | rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); | ||
| 69 | return rc; | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | ||
| 73 | int more, int len) | ||
| 74 | { | ||
| 75 | if (ccid->ccid_hc_tx_packet_sent != NULL) | ||
| 76 | ccid->ccid_hc_tx_packet_sent(sk, more, len); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) | ||
| 80 | { | ||
| 81 | int rc = 0; | ||
| 82 | if (ccid->ccid_hc_rx_init != NULL) | ||
| 83 | rc = ccid->ccid_hc_rx_init(sk); | ||
| 84 | return rc; | ||
| 85 | } | ||
| 86 | |||
| 87 | static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) | ||
| 88 | { | ||
| 89 | int rc = 0; | ||
| 90 | if (ccid->ccid_hc_tx_init != NULL) | ||
| 91 | rc = ccid->ccid_hc_tx_init(sk); | ||
| 92 | return rc; | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) | ||
| 96 | { | ||
| 97 | if (ccid->ccid_hc_rx_exit != NULL) | ||
| 98 | ccid->ccid_hc_rx_exit(sk); | ||
| 99 | } | ||
| 100 | |||
| 101 | static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) | ||
| 102 | { | ||
| 103 | if (ccid->ccid_hc_tx_exit != NULL) | ||
| 104 | ccid->ccid_hc_tx_exit(sk); | ||
| 105 | } | ||
| 106 | |||
| 107 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
| 108 | struct sk_buff *skb) | ||
| 109 | { | ||
| 110 | if (ccid->ccid_hc_rx_packet_recv != NULL) | ||
| 111 | ccid->ccid_hc_rx_packet_recv(sk, skb); | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
| 115 | struct sk_buff *skb) | ||
| 116 | { | ||
| 117 | if (ccid->ccid_hc_tx_packet_recv != NULL) | ||
| 118 | ccid->ccid_hc_tx_packet_recv(sk, skb); | ||
| 119 | } | ||
| 120 | |||
| 121 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | ||
| 122 | unsigned char option, | ||
| 123 | unsigned char len, u16 idx, | ||
| 124 | unsigned char* value) | ||
| 125 | { | ||
| 126 | int rc = 0; | ||
| 127 | if (ccid->ccid_hc_tx_parse_options != NULL) | ||
| 128 | rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); | ||
| 129 | return rc; | ||
| 130 | } | ||
| 131 | |||
| 132 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | ||
| 133 | unsigned char option, | ||
| 134 | unsigned char len, u16 idx, | ||
| 135 | unsigned char* value) | ||
| 136 | { | ||
| 137 | int rc = 0; | ||
| 138 | if (ccid->ccid_hc_rx_parse_options != NULL) | ||
| 139 | rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); | ||
| 140 | return rc; | ||
| 141 | } | ||
| 142 | |||
| 143 | static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, | ||
| 144 | struct sk_buff *skb) | ||
| 145 | { | ||
| 146 | if (ccid->ccid_hc_tx_insert_options != NULL) | ||
| 147 | ccid->ccid_hc_tx_insert_options(sk, skb); | ||
| 148 | } | ||
| 149 | |||
| 150 | static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | ||
| 151 | struct sk_buff *skb) | ||
| 152 | { | ||
| 153 | if (ccid->ccid_hc_rx_insert_options != NULL) | ||
| 154 | ccid->ccid_hc_rx_insert_options(sk, skb); | ||
| 155 | } | ||
| 156 | #endif /* _CCID_H */ | ||
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..67f9c06bd179 --- /dev/null +++ b/net/dccp/ccids/Kconfig | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | ||
| 2 | depends on IP_DCCP && EXPERIMENTAL | ||
| 3 | |||
| 4 | config IP_DCCP_CCID3 | ||
| 5 | tristate "CCID3 (TFRC) (EXPERIMENTAL)" | ||
| 6 | depends on IP_DCCP | ||
| 7 | ---help--- | ||
| 8 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based | ||
| 9 | rate-controlled congestion control mechanism. TFRC is designed to | ||
| 10 | be reasonably fair when competing for bandwidth with TCP-like flows, | ||
| 11 | where a flow is "reasonably fair" if its sending rate is generally | ||
| 12 | within a factor of two of the sending rate of a TCP flow under the | ||
| 13 | same conditions. However, TFRC has a much lower variation of | ||
| 14 | throughput over time compared with TCP, which makes CCID 3 more | ||
| 15 | suitable than CCID 2 for applications such streaming media where a | ||
| 16 | relatively smooth sending rate is of importance. | ||
| 17 | |||
| 18 | CCID 3 is further described in [CCID 3 PROFILE]. The TFRC | ||
| 19 | congestion control algorithms were initially described in RFC 3448. | ||
| 20 | |||
| 21 | This text was extracted from draft-ietf-dccp-spec-11.txt. | ||
| 22 | |||
| 23 | If in doubt, say M. | ||
| 24 | |||
| 25 | endmenu | ||
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..1c720131c5db --- /dev/null +++ b/net/dccp/ccids/Makefile | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o | ||
| 2 | |||
| 3 | dccp_ccid3-y := ccid3.o | ||
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..4f45902cb55e --- /dev/null +++ b/net/dccp/ccids/ccid3.c | |||
| @@ -0,0 +1,2164 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/ccids/ccid3.c | ||
| 3 | * | ||
| 4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
| 5 | * | ||
| 6 | * An implementation of the DCCP protocol | ||
| 7 | * | ||
| 8 | * This code has been developed by the University of Waikato WAND | ||
| 9 | * research group. For further information please see http://www.wand.net.nz/ | ||
| 10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
| 11 | * | ||
| 12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
| 13 | * authors: | ||
| 14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
| 15 | * | ||
| 16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
| 17 | * and to make it work as a loadable module in the DCCP stack written by | ||
| 18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
| 19 | * | ||
| 20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 21 | * | ||
| 22 | * This program is free software; you can redistribute it and/or modify | ||
| 23 | * it under the terms of the GNU General Public License as published by | ||
| 24 | * the Free Software Foundation; either version 2 of the License, or | ||
| 25 | * (at your option) any later version. | ||
| 26 | * | ||
| 27 | * This program is distributed in the hope that it will be useful, | ||
| 28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 30 | * GNU General Public License for more details. | ||
| 31 | * | ||
| 32 | * You should have received a copy of the GNU General Public License | ||
| 33 | * along with this program; if not, write to the Free Software | ||
| 34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 35 | */ | ||
| 36 | |||
| 37 | #include "../ccid.h" | ||
| 38 | #include "../dccp.h" | ||
| 39 | #include "ccid3.h" | ||
| 40 | |||
| 41 | #ifdef CCID3_DEBUG | ||
| 42 | extern int ccid3_debug; | ||
| 43 | |||
| 44 | #define ccid3_pr_debug(format, a...) \ | ||
| 45 | do { if (ccid3_debug) \ | ||
| 46 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ | ||
| 47 | } while (0) | ||
| 48 | #else | ||
| 49 | #define ccid3_pr_debug(format, a...) | ||
| 50 | #endif | ||
| 51 | |||
| 52 | #define TFRC_MIN_PACKET_SIZE 16 | ||
| 53 | #define TFRC_STD_PACKET_SIZE 256 | ||
| 54 | #define TFRC_MAX_PACKET_SIZE 65535 | ||
| 55 | |||
| 56 | #define USEC_IN_SEC 1000000 | ||
| 57 | |||
| 58 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) | ||
| 59 | /* two seconds as per CCID3 spec 11 */ | ||
| 60 | |||
| 61 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) | ||
| 62 | /* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
| 63 | |||
| 64 | #define TFRC_WIN_COUNT_PER_RTT 4 | ||
| 65 | #define TFRC_WIN_COUNT_LIMIT 16 | ||
| 66 | |||
| 67 | #define TFRC_MAX_BACK_OFF_TIME 64 | ||
| 68 | /* above is in seconds */ | ||
| 69 | |||
| 70 | #define TFRC_SMALLEST_P 40 | ||
| 71 | |||
| 72 | #define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ | ||
| 73 | |||
| 74 | /* Number of later packets received before one is considered lost */ | ||
| 75 | #define TFRC_RECV_NUM_LATE_LOSS 3 | ||
| 76 | |||
| 77 | enum ccid3_options { | ||
| 78 | TFRC_OPT_LOSS_EVENT_RATE = 192, | ||
| 79 | TFRC_OPT_LOSS_INTERVALS = 193, | ||
| 80 | TFRC_OPT_RECEIVE_RATE = 194, | ||
| 81 | }; | ||
| 82 | |||
| 83 | static int ccid3_debug; | ||
| 84 | |||
| 85 | static kmem_cache_t *ccid3_tx_hist_slab; | ||
| 86 | static kmem_cache_t *ccid3_rx_hist_slab; | ||
| 87 | static kmem_cache_t *ccid3_loss_interval_hist_slab; | ||
| 88 | |||
| 89 | static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) | ||
| 90 | { | ||
| 91 | struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); | ||
| 92 | |||
| 93 | if (entry != NULL) | ||
| 94 | entry->ccid3htx_sent = 0; | ||
| 95 | |||
| 96 | return entry; | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) | ||
| 100 | { | ||
| 101 | if (entry != NULL) | ||
| 102 | kmem_cache_free(ccid3_tx_hist_slab, entry); | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, | ||
| 106 | struct sk_buff *skb, | ||
| 107 | int prio) | ||
| 108 | { | ||
| 109 | struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); | ||
| 110 | |||
| 111 | if (entry != NULL) { | ||
| 112 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 113 | |||
| 114 | entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 115 | entry->ccid3hrx_win_count = dh->dccph_ccval; | ||
| 116 | entry->ccid3hrx_type = dh->dccph_type; | ||
| 117 | entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; | ||
| 118 | do_gettimeofday(&(entry->ccid3hrx_tstamp)); | ||
| 119 | } | ||
| 120 | |||
| 121 | return entry; | ||
| 122 | } | ||
| 123 | |||
| 124 | static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) | ||
| 125 | { | ||
| 126 | if (entry != NULL) | ||
| 127 | kmem_cache_free(ccid3_rx_hist_slab, entry); | ||
| 128 | } | ||
| 129 | |||
| 130 | static void ccid3_rx_history_delete(struct list_head *hist) | ||
| 131 | { | ||
| 132 | struct ccid3_rx_hist_entry *entry, *next; | ||
| 133 | |||
| 134 | list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { | ||
| 135 | list_del_init(&entry->ccid3hrx_node); | ||
| 136 | kmem_cache_free(ccid3_rx_hist_slab, entry); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) | ||
| 141 | { | ||
| 142 | return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); | ||
| 143 | } | ||
| 144 | |||
| 145 | static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) | ||
| 146 | { | ||
| 147 | if (entry != NULL) | ||
| 148 | kmem_cache_free(ccid3_loss_interval_hist_slab, entry); | ||
| 149 | } | ||
| 150 | |||
| 151 | static void ccid3_loss_interval_history_delete(struct list_head *hist) | ||
| 152 | { | ||
| 153 | struct ccid3_loss_interval_hist_entry *entry, *next; | ||
| 154 | |||
| 155 | list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { | ||
| 156 | list_del_init(&entry->ccid3lih_node); | ||
| 157 | kmem_cache_free(ccid3_loss_interval_hist_slab, entry); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | static int ccid3_init(struct sock *sk) | ||
| 162 | { | ||
| 163 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 167 | static void ccid3_exit(struct sock *sk) | ||
| 168 | { | ||
| 169 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* TFRC sender states */ | ||
| 173 | enum ccid3_hc_tx_states { | ||
| 174 | TFRC_SSTATE_NO_SENT = 1, | ||
| 175 | TFRC_SSTATE_NO_FBACK, | ||
| 176 | TFRC_SSTATE_FBACK, | ||
| 177 | TFRC_SSTATE_TERM, | ||
| 178 | }; | ||
| 179 | |||
| 180 | #ifdef CCID3_DEBUG | ||
| 181 | static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | ||
| 182 | { | ||
| 183 | static char *ccid3_state_names[] = { | ||
| 184 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | ||
| 185 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | ||
| 186 | [TFRC_SSTATE_FBACK] = "FBACK", | ||
| 187 | [TFRC_SSTATE_TERM] = "TERM", | ||
| 188 | }; | ||
| 189 | |||
| 190 | return ccid3_state_names[state]; | ||
| 191 | } | ||
| 192 | #endif | ||
| 193 | |||
| 194 | static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) | ||
| 195 | { | ||
| 196 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 197 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 198 | enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; | ||
| 199 | |||
| 200 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
| 201 | dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); | ||
| 202 | WARN_ON(state == oldstate); | ||
| 203 | hctx->ccid3hctx_state = state; | ||
| 204 | } | ||
| 205 | |||
| 206 | static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { | ||
| 207 | |||
| 208 | result->tv_sec = large.tv_sec-small.tv_sec; | ||
| 209 | if (large.tv_usec < small.tv_usec) { | ||
| 210 | (result->tv_sec)--; | ||
| 211 | result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; | ||
| 212 | } else | ||
| 213 | result->tv_usec = large.tv_usec-small.tv_usec; | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline void timeval_fix(struct timeval *tv) { | ||
| 217 | if (tv->tv_usec >= USEC_IN_SEC) { | ||
| 218 | tv->tv_sec++; | ||
| 219 | tv->tv_usec -= USEC_IN_SEC; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | /* returns the difference in usecs between timeval passed in and current time */ | ||
| 224 | static inline u32 now_delta(struct timeval tv) { | ||
| 225 | struct timeval now; | ||
| 226 | |||
| 227 | do_gettimeofday(&now); | ||
| 228 | return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); | ||
| 229 | } | ||
| 230 | |||
| 231 | #define CALCX_ARRSIZE 500 | ||
| 232 | |||
| 233 | #define CALCX_SPLIT 50000 | ||
| 234 | /* equivalent to 0.05 */ | ||
| 235 | |||
| 236 | static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { | ||
| 237 | { 37172 , 8172 }, | ||
| 238 | { 53499 , 11567 }, | ||
| 239 | { 66664 , 14180 }, | ||
| 240 | { 78298 , 16388 }, | ||
| 241 | { 89021 , 18339 }, | ||
| 242 | { 99147 , 20108 }, | ||
| 243 | { 108858 , 21738 }, | ||
| 244 | { 118273 , 23260 }, | ||
| 245 | { 127474 , 24693 }, | ||
| 246 | { 136520 , 26052 }, | ||
| 247 | { 145456 , 27348 }, | ||
| 248 | { 154316 , 28589 }, | ||
| 249 | { 163130 , 29783 }, | ||
| 250 | { 171919 , 30935 }, | ||
| 251 | { 180704 , 32049 }, | ||
| 252 | { 189502 , 33130 }, | ||
| 253 | { 198328 , 34180 }, | ||
| 254 | { 207194 , 35202 }, | ||
| 255 | { 216114 , 36198 }, | ||
| 256 | { 225097 , 37172 }, | ||
| 257 | { 234153 , 38123 }, | ||
| 258 | { 243294 , 39055 }, | ||
| 259 | { 252527 , 39968 }, | ||
| 260 | { 261861 , 40864 }, | ||
| 261 | { 271305 , 41743 }, | ||
| 262 | { 280866 , 42607 }, | ||
| 263 | { 290553 , 43457 }, | ||
| 264 | { 300372 , 44293 }, | ||
| 265 | { 310333 , 45117 }, | ||
| 266 | { 320441 , 45929 }, | ||
| 267 | { 330705 , 46729 }, | ||
| 268 | { 341131 , 47518 }, | ||
| 269 | { 351728 , 48297 }, | ||
| 270 | { 362501 , 49066 }, | ||
| 271 | { 373460 , 49826 }, | ||
| 272 | { 384609 , 50577 }, | ||
| 273 | { 395958 , 51320 }, | ||
| 274 | { 407513 , 52054 }, | ||
| 275 | { 419281 , 52780 }, | ||
| 276 | { 431270 , 53499 }, | ||
| 277 | { 443487 , 54211 }, | ||
| 278 | { 455940 , 54916 }, | ||
| 279 | { 468635 , 55614 }, | ||
| 280 | { 481581 , 56306 }, | ||
| 281 | { 494785 , 56991 }, | ||
| 282 | { 508254 , 57671 }, | ||
| 283 | { 521996 , 58345 }, | ||
| 284 | { 536019 , 59014 }, | ||
| 285 | { 550331 , 59677 }, | ||
| 286 | { 564939 , 60335 }, | ||
| 287 | { 579851 , 60988 }, | ||
| 288 | { 595075 , 61636 }, | ||
| 289 | { 610619 , 62279 }, | ||
| 290 | { 626491 , 62918 }, | ||
| 291 | { 642700 , 63553 }, | ||
| 292 | { 659253 , 64183 }, | ||
| 293 | { 676158 , 64809 }, | ||
| 294 | { 693424 , 65431 }, | ||
| 295 | { 711060 , 66050 }, | ||
| 296 | { 729073 , 66664 }, | ||
| 297 | { 747472 , 67275 }, | ||
| 298 | { 766266 , 67882 }, | ||
| 299 | { 785464 , 68486 }, | ||
| 300 | { 805073 , 69087 }, | ||
| 301 | { 825103 , 69684 }, | ||
| 302 | { 845562 , 70278 }, | ||
| 303 | { 866460 , 70868 }, | ||
| 304 | { 887805 , 71456 }, | ||
| 305 | { 909606 , 72041 }, | ||
| 306 | { 931873 , 72623 }, | ||
| 307 | { 954614 , 73202 }, | ||
| 308 | { 977839 , 73778 }, | ||
| 309 | { 1001557 , 74352 }, | ||
| 310 | { 1025777 , 74923 }, | ||
| 311 | { 1050508 , 75492 }, | ||
| 312 | { 1075761 , 76058 }, | ||
| 313 | { 1101544 , 76621 }, | ||
| 314 | { 1127867 , 77183 }, | ||
| 315 | { 1154739 , 77741 }, | ||
| 316 | { 1182172 , 78298 }, | ||
| 317 | { 1210173 , 78852 }, | ||
| 318 | { 1238753 , 79405 }, | ||
| 319 | { 1267922 , 79955 }, | ||
| 320 | { 1297689 , 80503 }, | ||
| 321 | { 1328066 , 81049 }, | ||
| 322 | { 1359060 , 81593 }, | ||
| 323 | { 1390684 , 82135 }, | ||
| 324 | { 1422947 , 82675 }, | ||
| 325 | { 1455859 , 83213 }, | ||
| 326 | { 1489430 , 83750 }, | ||
| 327 | { 1523671 , 84284 }, | ||
| 328 | { 1558593 , 84817 }, | ||
| 329 | { 1594205 , 85348 }, | ||
| 330 | { 1630518 , 85878 }, | ||
| 331 | { 1667543 , 86406 }, | ||
| 332 | { 1705290 , 86932 }, | ||
| 333 | { 1743770 , 87457 }, | ||
| 334 | { 1782994 , 87980 }, | ||
| 335 | { 1822973 , 88501 }, | ||
| 336 | { 1863717 , 89021 }, | ||
| 337 | { 1905237 , 89540 }, | ||
| 338 | { 1947545 , 90057 }, | ||
| 339 | { 1990650 , 90573 }, | ||
| 340 | { 2034566 , 91087 }, | ||
| 341 | { 2079301 , 91600 }, | ||
| 342 | { 2124869 , 92111 }, | ||
| 343 | { 2171279 , 92622 }, | ||
| 344 | { 2218543 , 93131 }, | ||
| 345 | { 2266673 , 93639 }, | ||
| 346 | { 2315680 , 94145 }, | ||
| 347 | { 2365575 , 94650 }, | ||
| 348 | { 2416371 , 95154 }, | ||
| 349 | { 2468077 , 95657 }, | ||
| 350 | { 2520707 , 96159 }, | ||
| 351 | { 2574271 , 96660 }, | ||
| 352 | { 2628782 , 97159 }, | ||
| 353 | { 2684250 , 97658 }, | ||
| 354 | { 2740689 , 98155 }, | ||
| 355 | { 2798110 , 98651 }, | ||
| 356 | { 2856524 , 99147 }, | ||
| 357 | { 2915944 , 99641 }, | ||
| 358 | { 2976382 , 100134 }, | ||
| 359 | { 3037850 , 100626 }, | ||
| 360 | { 3100360 , 101117 }, | ||
| 361 | { 3163924 , 101608 }, | ||
| 362 | { 3228554 , 102097 }, | ||
| 363 | { 3294263 , 102586 }, | ||
| 364 | { 3361063 , 103073 }, | ||
| 365 | { 3428966 , 103560 }, | ||
| 366 | { 3497984 , 104045 }, | ||
| 367 | { 3568131 , 104530 }, | ||
| 368 | { 3639419 , 105014 }, | ||
| 369 | { 3711860 , 105498 }, | ||
| 370 | { 3785467 , 105980 }, | ||
| 371 | { 3860253 , 106462 }, | ||
| 372 | { 3936229 , 106942 }, | ||
| 373 | { 4013410 , 107422 }, | ||
| 374 | { 4091808 , 107902 }, | ||
| 375 | { 4171435 , 108380 }, | ||
| 376 | { 4252306 , 108858 }, | ||
| 377 | { 4334431 , 109335 }, | ||
| 378 | { 4417825 , 109811 }, | ||
| 379 | { 4502501 , 110287 }, | ||
| 380 | { 4588472 , 110762 }, | ||
| 381 | { 4675750 , 111236 }, | ||
| 382 | { 4764349 , 111709 }, | ||
| 383 | { 4854283 , 112182 }, | ||
| 384 | { 4945564 , 112654 }, | ||
| 385 | { 5038206 , 113126 }, | ||
| 386 | { 5132223 , 113597 }, | ||
| 387 | { 5227627 , 114067 }, | ||
| 388 | { 5324432 , 114537 }, | ||
| 389 | { 5422652 , 115006 }, | ||
| 390 | { 5522299 , 115474 }, | ||
| 391 | { 5623389 , 115942 }, | ||
| 392 | { 5725934 , 116409 }, | ||
| 393 | { 5829948 , 116876 }, | ||
| 394 | { 5935446 , 117342 }, | ||
| 395 | { 6042439 , 117808 }, | ||
| 396 | { 6150943 , 118273 }, | ||
| 397 | { 6260972 , 118738 }, | ||
| 398 | { 6372538 , 119202 }, | ||
| 399 | { 6485657 , 119665 }, | ||
| 400 | { 6600342 , 120128 }, | ||
| 401 | { 6716607 , 120591 }, | ||
| 402 | { 6834467 , 121053 }, | ||
| 403 | { 6953935 , 121514 }, | ||
| 404 | { 7075025 , 121976 }, | ||
| 405 | { 7197752 , 122436 }, | ||
| 406 | { 7322131 , 122896 }, | ||
| 407 | { 7448175 , 123356 }, | ||
| 408 | { 7575898 , 123815 }, | ||
| 409 | { 7705316 , 124274 }, | ||
| 410 | { 7836442 , 124733 }, | ||
| 411 | { 7969291 , 125191 }, | ||
| 412 | { 8103877 , 125648 }, | ||
| 413 | { 8240216 , 126105 }, | ||
| 414 | { 8378321 , 126562 }, | ||
| 415 | { 8518208 , 127018 }, | ||
| 416 | { 8659890 , 127474 }, | ||
| 417 | { 8803384 , 127930 }, | ||
| 418 | { 8948702 , 128385 }, | ||
| 419 | { 9095861 , 128840 }, | ||
| 420 | { 9244875 , 129294 }, | ||
| 421 | { 9395760 , 129748 }, | ||
| 422 | { 9548529 , 130202 }, | ||
| 423 | { 9703198 , 130655 }, | ||
| 424 | { 9859782 , 131108 }, | ||
| 425 | { 10018296 , 131561 }, | ||
| 426 | { 10178755 , 132014 }, | ||
| 427 | { 10341174 , 132466 }, | ||
| 428 | { 10505569 , 132917 }, | ||
| 429 | { 10671954 , 133369 }, | ||
| 430 | { 10840345 , 133820 }, | ||
| 431 | { 11010757 , 134271 }, | ||
| 432 | { 11183206 , 134721 }, | ||
| 433 | { 11357706 , 135171 }, | ||
| 434 | { 11534274 , 135621 }, | ||
| 435 | { 11712924 , 136071 }, | ||
| 436 | { 11893673 , 136520 }, | ||
| 437 | { 12076536 , 136969 }, | ||
| 438 | { 12261527 , 137418 }, | ||
| 439 | { 12448664 , 137867 }, | ||
| 440 | { 12637961 , 138315 }, | ||
| 441 | { 12829435 , 138763 }, | ||
| 442 | { 13023101 , 139211 }, | ||
| 443 | { 13218974 , 139658 }, | ||
| 444 | { 13417071 , 140106 }, | ||
| 445 | { 13617407 , 140553 }, | ||
| 446 | { 13819999 , 140999 }, | ||
| 447 | { 14024862 , 141446 }, | ||
| 448 | { 14232012 , 141892 }, | ||
| 449 | { 14441465 , 142339 }, | ||
| 450 | { 14653238 , 142785 }, | ||
| 451 | { 14867346 , 143230 }, | ||
| 452 | { 15083805 , 143676 }, | ||
| 453 | { 15302632 , 144121 }, | ||
| 454 | { 15523842 , 144566 }, | ||
| 455 | { 15747453 , 145011 }, | ||
| 456 | { 15973479 , 145456 }, | ||
| 457 | { 16201939 , 145900 }, | ||
| 458 | { 16432847 , 146345 }, | ||
| 459 | { 16666221 , 146789 }, | ||
| 460 | { 16902076 , 147233 }, | ||
| 461 | { 17140429 , 147677 }, | ||
| 462 | { 17381297 , 148121 }, | ||
| 463 | { 17624696 , 148564 }, | ||
| 464 | { 17870643 , 149007 }, | ||
| 465 | { 18119154 , 149451 }, | ||
| 466 | { 18370247 , 149894 }, | ||
| 467 | { 18623936 , 150336 }, | ||
| 468 | { 18880241 , 150779 }, | ||
| 469 | { 19139176 , 151222 }, | ||
| 470 | { 19400759 , 151664 }, | ||
| 471 | { 19665007 , 152107 }, | ||
| 472 | { 19931936 , 152549 }, | ||
| 473 | { 20201564 , 152991 }, | ||
| 474 | { 20473907 , 153433 }, | ||
| 475 | { 20748982 , 153875 }, | ||
| 476 | { 21026807 , 154316 }, | ||
| 477 | { 21307399 , 154758 }, | ||
| 478 | { 21590773 , 155199 }, | ||
| 479 | { 21876949 , 155641 }, | ||
| 480 | { 22165941 , 156082 }, | ||
| 481 | { 22457769 , 156523 }, | ||
| 482 | { 22752449 , 156964 }, | ||
| 483 | { 23049999 , 157405 }, | ||
| 484 | { 23350435 , 157846 }, | ||
| 485 | { 23653774 , 158287 }, | ||
| 486 | { 23960036 , 158727 }, | ||
| 487 | { 24269236 , 159168 }, | ||
| 488 | { 24581392 , 159608 }, | ||
| 489 | { 24896521 , 160049 }, | ||
| 490 | { 25214642 , 160489 }, | ||
| 491 | { 25535772 , 160929 }, | ||
| 492 | { 25859927 , 161370 }, | ||
| 493 | { 26187127 , 161810 }, | ||
| 494 | { 26517388 , 162250 }, | ||
| 495 | { 26850728 , 162690 }, | ||
| 496 | { 27187165 , 163130 }, | ||
| 497 | { 27526716 , 163569 }, | ||
| 498 | { 27869400 , 164009 }, | ||
| 499 | { 28215234 , 164449 }, | ||
| 500 | { 28564236 , 164889 }, | ||
| 501 | { 28916423 , 165328 }, | ||
| 502 | { 29271815 , 165768 }, | ||
| 503 | { 29630428 , 166208 }, | ||
| 504 | { 29992281 , 166647 }, | ||
| 505 | { 30357392 , 167087 }, | ||
| 506 | { 30725779 , 167526 }, | ||
| 507 | { 31097459 , 167965 }, | ||
| 508 | { 31472452 , 168405 }, | ||
| 509 | { 31850774 , 168844 }, | ||
| 510 | { 32232445 , 169283 }, | ||
| 511 | { 32617482 , 169723 }, | ||
| 512 | { 33005904 , 170162 }, | ||
| 513 | { 33397730 , 170601 }, | ||
| 514 | { 33792976 , 171041 }, | ||
| 515 | { 34191663 , 171480 }, | ||
| 516 | { 34593807 , 171919 }, | ||
| 517 | { 34999428 , 172358 }, | ||
| 518 | { 35408544 , 172797 }, | ||
| 519 | { 35821174 , 173237 }, | ||
| 520 | { 36237335 , 173676 }, | ||
| 521 | { 36657047 , 174115 }, | ||
| 522 | { 37080329 , 174554 }, | ||
| 523 | { 37507197 , 174993 }, | ||
| 524 | { 37937673 , 175433 }, | ||
| 525 | { 38371773 , 175872 }, | ||
| 526 | { 38809517 , 176311 }, | ||
| 527 | { 39250924 , 176750 }, | ||
| 528 | { 39696012 , 177190 }, | ||
| 529 | { 40144800 , 177629 }, | ||
| 530 | { 40597308 , 178068 }, | ||
| 531 | { 41053553 , 178507 }, | ||
| 532 | { 41513554 , 178947 }, | ||
| 533 | { 41977332 , 179386 }, | ||
| 534 | { 42444904 , 179825 }, | ||
| 535 | { 42916290 , 180265 }, | ||
| 536 | { 43391509 , 180704 }, | ||
| 537 | { 43870579 , 181144 }, | ||
| 538 | { 44353520 , 181583 }, | ||
| 539 | { 44840352 , 182023 }, | ||
| 540 | { 45331092 , 182462 }, | ||
| 541 | { 45825761 , 182902 }, | ||
| 542 | { 46324378 , 183342 }, | ||
| 543 | { 46826961 , 183781 }, | ||
| 544 | { 47333531 , 184221 }, | ||
| 545 | { 47844106 , 184661 }, | ||
| 546 | { 48358706 , 185101 }, | ||
| 547 | { 48877350 , 185541 }, | ||
| 548 | { 49400058 , 185981 }, | ||
| 549 | { 49926849 , 186421 }, | ||
| 550 | { 50457743 , 186861 }, | ||
| 551 | { 50992759 , 187301 }, | ||
| 552 | { 51531916 , 187741 }, | ||
| 553 | { 52075235 , 188181 }, | ||
| 554 | { 52622735 , 188622 }, | ||
| 555 | { 53174435 , 189062 }, | ||
| 556 | { 53730355 , 189502 }, | ||
| 557 | { 54290515 , 189943 }, | ||
| 558 | { 54854935 , 190383 }, | ||
| 559 | { 55423634 , 190824 }, | ||
| 560 | { 55996633 , 191265 }, | ||
| 561 | { 56573950 , 191706 }, | ||
| 562 | { 57155606 , 192146 }, | ||
| 563 | { 57741621 , 192587 }, | ||
| 564 | { 58332014 , 193028 }, | ||
| 565 | { 58926806 , 193470 }, | ||
| 566 | { 59526017 , 193911 }, | ||
| 567 | { 60129666 , 194352 }, | ||
| 568 | { 60737774 , 194793 }, | ||
| 569 | { 61350361 , 195235 }, | ||
| 570 | { 61967446 , 195677 }, | ||
| 571 | { 62589050 , 196118 }, | ||
| 572 | { 63215194 , 196560 }, | ||
| 573 | { 63845897 , 197002 }, | ||
| 574 | { 64481179 , 197444 }, | ||
| 575 | { 65121061 , 197886 }, | ||
| 576 | { 65765563 , 198328 }, | ||
| 577 | { 66414705 , 198770 }, | ||
| 578 | { 67068508 , 199213 }, | ||
| 579 | { 67726992 , 199655 }, | ||
| 580 | { 68390177 , 200098 }, | ||
| 581 | { 69058085 , 200540 }, | ||
| 582 | { 69730735 , 200983 }, | ||
| 583 | { 70408147 , 201426 }, | ||
| 584 | { 71090343 , 201869 }, | ||
| 585 | { 71777343 , 202312 }, | ||
| 586 | { 72469168 , 202755 }, | ||
| 587 | { 73165837 , 203199 }, | ||
| 588 | { 73867373 , 203642 }, | ||
| 589 | { 74573795 , 204086 }, | ||
| 590 | { 75285124 , 204529 }, | ||
| 591 | { 76001380 , 204973 }, | ||
| 592 | { 76722586 , 205417 }, | ||
| 593 | { 77448761 , 205861 }, | ||
| 594 | { 78179926 , 206306 }, | ||
| 595 | { 78916102 , 206750 }, | ||
| 596 | { 79657310 , 207194 }, | ||
| 597 | { 80403571 , 207639 }, | ||
| 598 | { 81154906 , 208084 }, | ||
| 599 | { 81911335 , 208529 }, | ||
| 600 | { 82672880 , 208974 }, | ||
| 601 | { 83439562 , 209419 }, | ||
| 602 | { 84211402 , 209864 }, | ||
| 603 | { 84988421 , 210309 }, | ||
| 604 | { 85770640 , 210755 }, | ||
| 605 | { 86558080 , 211201 }, | ||
| 606 | { 87350762 , 211647 }, | ||
| 607 | { 88148708 , 212093 }, | ||
| 608 | { 88951938 , 212539 }, | ||
| 609 | { 89760475 , 212985 }, | ||
| 610 | { 90574339 , 213432 }, | ||
| 611 | { 91393551 , 213878 }, | ||
| 612 | { 92218133 , 214325 }, | ||
| 613 | { 93048107 , 214772 }, | ||
| 614 | { 93883493 , 215219 }, | ||
| 615 | { 94724314 , 215666 }, | ||
| 616 | { 95570590 , 216114 }, | ||
| 617 | { 96422343 , 216561 }, | ||
| 618 | { 97279594 , 217009 }, | ||
| 619 | { 98142366 , 217457 }, | ||
| 620 | { 99010679 , 217905 }, | ||
| 621 | { 99884556 , 218353 }, | ||
| 622 | { 100764018 , 218801 }, | ||
| 623 | { 101649086 , 219250 }, | ||
| 624 | { 102539782 , 219698 }, | ||
| 625 | { 103436128 , 220147 }, | ||
| 626 | { 104338146 , 220596 }, | ||
| 627 | { 105245857 , 221046 }, | ||
| 628 | { 106159284 , 221495 }, | ||
| 629 | { 107078448 , 221945 }, | ||
| 630 | { 108003370 , 222394 }, | ||
| 631 | { 108934074 , 222844 }, | ||
| 632 | { 109870580 , 223294 }, | ||
| 633 | { 110812910 , 223745 }, | ||
| 634 | { 111761087 , 224195 }, | ||
| 635 | { 112715133 , 224646 }, | ||
| 636 | { 113675069 , 225097 }, | ||
| 637 | { 114640918 , 225548 }, | ||
| 638 | { 115612702 , 225999 }, | ||
| 639 | { 116590442 , 226450 }, | ||
| 640 | { 117574162 , 226902 }, | ||
| 641 | { 118563882 , 227353 }, | ||
| 642 | { 119559626 , 227805 }, | ||
| 643 | { 120561415 , 228258 }, | ||
| 644 | { 121569272 , 228710 }, | ||
| 645 | { 122583219 , 229162 }, | ||
| 646 | { 123603278 , 229615 }, | ||
| 647 | { 124629471 , 230068 }, | ||
| 648 | { 125661822 , 230521 }, | ||
| 649 | { 126700352 , 230974 }, | ||
| 650 | { 127745083 , 231428 }, | ||
| 651 | { 128796039 , 231882 }, | ||
| 652 | { 129853241 , 232336 }, | ||
| 653 | { 130916713 , 232790 }, | ||
| 654 | { 131986475 , 233244 }, | ||
| 655 | { 133062553 , 233699 }, | ||
| 656 | { 134144966 , 234153 }, | ||
| 657 | { 135233739 , 234608 }, | ||
| 658 | { 136328894 , 235064 }, | ||
| 659 | { 137430453 , 235519 }, | ||
| 660 | { 138538440 , 235975 }, | ||
| 661 | { 139652876 , 236430 }, | ||
| 662 | { 140773786 , 236886 }, | ||
| 663 | { 141901190 , 237343 }, | ||
| 664 | { 143035113 , 237799 }, | ||
| 665 | { 144175576 , 238256 }, | ||
| 666 | { 145322604 , 238713 }, | ||
| 667 | { 146476218 , 239170 }, | ||
| 668 | { 147636442 , 239627 }, | ||
| 669 | { 148803298 , 240085 }, | ||
| 670 | { 149976809 , 240542 }, | ||
| 671 | { 151156999 , 241000 }, | ||
| 672 | { 152343890 , 241459 }, | ||
| 673 | { 153537506 , 241917 }, | ||
| 674 | { 154737869 , 242376 }, | ||
| 675 | { 155945002 , 242835 }, | ||
| 676 | { 157158929 , 243294 }, | ||
| 677 | { 158379673 , 243753 }, | ||
| 678 | { 159607257 , 244213 }, | ||
| 679 | { 160841704 , 244673 }, | ||
| 680 | { 162083037 , 245133 }, | ||
| 681 | { 163331279 , 245593 }, | ||
| 682 | { 164586455 , 246054 }, | ||
| 683 | { 165848586 , 246514 }, | ||
| 684 | { 167117696 , 246975 }, | ||
| 685 | { 168393810 , 247437 }, | ||
| 686 | { 169676949 , 247898 }, | ||
| 687 | { 170967138 , 248360 }, | ||
| 688 | { 172264399 , 248822 }, | ||
| 689 | { 173568757 , 249284 }, | ||
| 690 | { 174880235 , 249747 }, | ||
| 691 | { 176198856 , 250209 }, | ||
| 692 | { 177524643 , 250672 }, | ||
| 693 | { 178857621 , 251136 }, | ||
| 694 | { 180197813 , 251599 }, | ||
| 695 | { 181545242 , 252063 }, | ||
| 696 | { 182899933 , 252527 }, | ||
| 697 | { 184261908 , 252991 }, | ||
| 698 | { 185631191 , 253456 }, | ||
| 699 | { 187007807 , 253920 }, | ||
| 700 | { 188391778 , 254385 }, | ||
| 701 | { 189783129 , 254851 }, | ||
| 702 | { 191181884 , 255316 }, | ||
| 703 | { 192588065 , 255782 }, | ||
| 704 | { 194001698 , 256248 }, | ||
| 705 | { 195422805 , 256714 }, | ||
| 706 | { 196851411 , 257181 }, | ||
| 707 | { 198287540 , 257648 }, | ||
| 708 | { 199731215 , 258115 }, | ||
| 709 | { 201182461 , 258582 }, | ||
| 710 | { 202641302 , 259050 }, | ||
| 711 | { 204107760 , 259518 }, | ||
| 712 | { 205581862 , 259986 }, | ||
| 713 | { 207063630 , 260454 }, | ||
| 714 | { 208553088 , 260923 }, | ||
| 715 | { 210050262 , 261392 }, | ||
| 716 | { 211555174 , 261861 }, | ||
| 717 | { 213067849 , 262331 }, | ||
| 718 | { 214588312 , 262800 }, | ||
| 719 | { 216116586 , 263270 }, | ||
| 720 | { 217652696 , 263741 }, | ||
| 721 | { 219196666 , 264211 }, | ||
| 722 | { 220748520 , 264682 }, | ||
| 723 | { 222308282 , 265153 }, | ||
| 724 | { 223875978 , 265625 }, | ||
| 725 | { 225451630 , 266097 }, | ||
| 726 | { 227035265 , 266569 }, | ||
| 727 | { 228626905 , 267041 }, | ||
| 728 | { 230226576 , 267514 }, | ||
| 729 | { 231834302 , 267986 }, | ||
| 730 | { 233450107 , 268460 }, | ||
| 731 | { 235074016 , 268933 }, | ||
| 732 | { 236706054 , 269407 }, | ||
| 733 | { 238346244 , 269881 }, | ||
| 734 | { 239994613 , 270355 }, | ||
| 735 | { 241651183 , 270830 }, | ||
| 736 | { 243315981 , 271305 } | ||
| 737 | }; | ||
| 738 | |||
| 739 | /* Calculate the send rate as per section 3.1 of RFC3448 | ||
| 740 | |||
| 741 | Returns send rate in bytes per second | ||
| 742 | |||
| 743 | Integer maths and lookups are used as not allowed floating point in kernel | ||
| 744 | |||
| 745 | The function for Xcalc as per section 3.1 of RFC3448 is: | ||
| 746 | |||
| 747 | X = s | ||
| 748 | ------------------------------------------------------------- | ||
| 749 | R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) | ||
| 750 | |||
| 751 | where | ||
| 752 | X is the trasmit rate in bytes/second | ||
| 753 | s is the packet size in bytes | ||
| 754 | R is the round trip time in seconds | ||
| 755 | p is the loss event rate, between 0 and 1.0, of the number of loss events | ||
| 756 | as a fraction of the number of packets transmitted | ||
| 757 | t_RTO is the TCP retransmission timeout value in seconds | ||
| 758 | b is the number of packets acknowledged by a single TCP acknowledgement | ||
| 759 | |||
| 760 | we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: | ||
| 761 | |||
| 762 | X = s | ||
| 763 | ----------------------------------------------------------------------- | ||
| 764 | R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) | ||
| 765 | |||
| 766 | |||
| 767 | which we can break down into: | ||
| 768 | |||
| 769 | X = s | ||
| 770 | -------- | ||
| 771 | R * f(p) | ||
| 772 | |||
| 773 | where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) | ||
| 774 | |||
| 775 | Function parameters: | ||
| 776 | s - bytes | ||
| 777 | R - RTT in usecs | ||
| 778 | p - loss rate (decimal fraction multiplied by 1,000,000) | ||
| 779 | |||
| 780 | Returns Xcalc in bytes per second | ||
| 781 | |||
| 782 | DON'T alter this code unless you run test cases against it as the code | ||
| 783 | has been manipulated to stop underflow/overlow. | ||
| 784 | |||
| 785 | */ | ||
| 786 | static u32 ccid3_calc_x(u16 s, u32 R, u32 p) | ||
| 787 | { | ||
| 788 | int index; | ||
| 789 | u32 f; | ||
| 790 | u64 tmp1, tmp2; | ||
| 791 | |||
| 792 | if (p < CALCX_SPLIT) | ||
| 793 | index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; | ||
| 794 | else | ||
| 795 | index = (p / (1000000 / CALCX_ARRSIZE)) - 1; | ||
| 796 | |||
| 797 | if (index < 0) | ||
| 798 | /* p should be 0 unless there is a bug in my code */ | ||
| 799 | index = 0; | ||
| 800 | |||
| 801 | if (R == 0) | ||
| 802 | R = 1; /* RTT can't be zero or else divide by zero */ | ||
| 803 | |||
| 804 | BUG_ON(index >= CALCX_ARRSIZE); | ||
| 805 | |||
| 806 | if (p >= CALCX_SPLIT) | ||
| 807 | f = calcx_lookup[index][0]; | ||
| 808 | else | ||
| 809 | f = calcx_lookup[index][1]; | ||
| 810 | |||
| 811 | tmp1 = ((u64)s * 100000000); | ||
| 812 | tmp2 = ((u64)R * (u64)f); | ||
| 813 | do_div(tmp2,10000); | ||
| 814 | do_div(tmp1,tmp2); | ||
| 815 | /* don't alter above math unless you test due to overflow on 32 bit */ | ||
| 816 | |||
| 817 | return (u32)tmp1; | ||
| 818 | } | ||
| 819 | |||
| 820 | /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ | ||
| 821 | static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) | ||
| 822 | { | ||
| 823 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) | ||
| 824 | return; | ||
| 825 | /* if no feedback spec says t_ipi is 1 second (set elsewhere and then | ||
| 826 | * doubles after every no feedback timer (separate function) */ | ||
| 827 | |||
| 828 | if (hctx->ccid3hctx_x < 10) { | ||
| 829 | ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); | ||
| 830 | hctx->ccid3hctx_x = 10; | ||
| 831 | } | ||
| 832 | hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) | ||
| 833 | / (hctx->ccid3hctx_x / 10); | ||
| 834 | /* reason for above maths with 10 in there is to avoid 32 bit | ||
| 835 | * overflow for jumbo packets */ | ||
| 836 | |||
| 837 | } | ||
| 838 | |||
| 839 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | ||
| 840 | static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) | ||
| 841 | { | ||
| 842 | hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); | ||
| 843 | |||
| 844 | } | ||
| 845 | |||
| 846 | /* | ||
| 847 | * Update X by | ||
| 848 | * If (p > 0) | ||
| 849 | * x_calc = calcX(s, R, p); | ||
| 850 | * X = max(min(X_calc, 2 * X_recv), s / t_mbi); | ||
| 851 | * Else | ||
| 852 | * If (now - tld >= R) | ||
| 853 | * X = max(min(2 * X, 2 * X_recv), s / R); | ||
| 854 | * tld = now; | ||
| 855 | */ | ||
| 856 | static void ccid3_hc_tx_update_x(struct sock *sk) | ||
| 857 | { | ||
| 858 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 859 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 860 | |||
| 861 | if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ | ||
| 862 | hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, | ||
| 863 | hctx->ccid3hctx_rtt, | ||
| 864 | hctx->ccid3hctx_p); | ||
| 865 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), | ||
| 866 | hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); | ||
| 867 | } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { | ||
| 868 | u32 rtt = hctx->ccid3hctx_rtt; | ||
| 869 | if (rtt < 10) { | ||
| 870 | rtt = 10; | ||
| 871 | } /* avoid divide by zero below */ | ||
| 872 | |||
| 873 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), | ||
| 874 | (hctx->ccid3hctx_s * 100000) / (rtt / 10)); | ||
| 875 | /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ | ||
| 876 | do_gettimeofday(&hctx->ccid3hctx_t_ld); | ||
| 877 | } | ||
| 878 | |||
| 879 | if (hctx->ccid3hctx_x == 0) { | ||
| 880 | ccid3_pr_debug("ccid3hctx_x = 0!\n"); | ||
| 881 | hctx->ccid3hctx_x = 1; | ||
| 882 | } | ||
| 883 | } | ||
| 884 | |||
| 885 | static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | ||
| 886 | { | ||
| 887 | struct sock *sk = (struct sock *)data; | ||
| 888 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 889 | unsigned long next_tmout = 0; | ||
| 890 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 891 | u32 rtt; | ||
| 892 | |||
| 893 | bh_lock_sock(sk); | ||
| 894 | if (sock_owned_by_user(sk)) { | ||
| 895 | /* Try again later. */ | ||
| 896 | /* XXX: set some sensible MIB */ | ||
| 897 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); | ||
| 898 | goto out; | ||
| 899 | } | ||
| 900 | |||
| 901 | ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, | ||
| 902 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
| 903 | |||
| 904 | if (hctx->ccid3hctx_x < 10) { | ||
| 905 | ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); | ||
| 906 | hctx->ccid3hctx_x = 10; | ||
| 907 | } | ||
| 908 | |||
| 909 | switch (hctx->ccid3hctx_state) { | ||
| 910 | case TFRC_SSTATE_TERM: | ||
| 911 | goto out; | ||
| 912 | case TFRC_SSTATE_NO_FBACK: | ||
| 913 | /* Halve send rate */ | ||
| 914 | hctx->ccid3hctx_x /= 2; | ||
| 915 | if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) | ||
| 916 | hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; | ||
| 917 | |||
| 918 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", | ||
| 919 | dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), | ||
| 920 | hctx->ccid3hctx_x); | ||
| 921 | next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) | ||
| 922 | / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); | ||
| 923 | /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ | ||
| 924 | /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 | ||
| 925 | * should adjust tx_t_ipi and double that to achieve it really */ | ||
| 926 | break; | ||
| 927 | case TFRC_SSTATE_FBACK: | ||
| 928 | /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ | ||
| 929 | rtt = hctx->ccid3hctx_rtt; | ||
| 930 | if (rtt < 10) | ||
| 931 | rtt = 10; | ||
| 932 | /* stop divide by zero below */ | ||
| 933 | if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= | ||
| 934 | 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { | ||
| 935 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, | ||
| 936 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
| 937 | /* Halve sending rate */ | ||
| 938 | |||
| 939 | /* If (X_calc > 2 * X_recv) | ||
| 940 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | ||
| 941 | * Else | ||
| 942 | * X_recv = X_calc / 4; | ||
| 943 | */ | ||
| 944 | BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); | ||
| 945 | |||
| 946 | /* check also if p is zero -> x_calc is infinity? */ | ||
| 947 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || | ||
| 948 | hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) | ||
| 949 | hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, | ||
| 950 | hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); | ||
| 951 | else | ||
| 952 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; | ||
| 953 | |||
| 954 | /* Update sending rate */ | ||
| 955 | ccid3_hc_tx_update_x(sk); | ||
| 956 | } | ||
| 957 | if (hctx->ccid3hctx_x == 0) { | ||
| 958 | ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); | ||
| 959 | hctx->ccid3hctx_x = 10; | ||
| 960 | } | ||
| 961 | /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ | ||
| 962 | next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, | ||
| 963 | 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); | ||
| 964 | break; | ||
| 965 | default: | ||
| 966 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 967 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
| 968 | dump_stack(); | ||
| 969 | goto out; | ||
| 970 | } | ||
| 971 | |||
| 972 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
| 973 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
| 974 | hctx->ccid3hctx_idle = 1; | ||
| 975 | out: | ||
| 976 | bh_unlock_sock(sk); | ||
| 977 | sock_put(sk); | ||
| 978 | } | ||
| 979 | |||
| 980 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, | ||
| 981 | int len, long *delay) | ||
| 982 | { | ||
| 983 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 984 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 985 | struct ccid3_tx_hist_entry *new_packet = NULL; | ||
| 986 | struct timeval now; | ||
| 987 | int rc = -ENOTCONN; | ||
| 988 | |||
| 989 | // ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); | ||
| 990 | /* | ||
| 991 | * check if pure ACK or Terminating */ | ||
| 992 | /* XXX: We only call this function for DATA and DATAACK, on, these packets can have | ||
| 993 | * zero length, but why the comment about "pure ACK"? | ||
| 994 | */ | ||
| 995 | if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) | ||
| 996 | goto out; | ||
| 997 | |||
| 998 | /* See if last packet allocated was not sent */ | ||
| 999 | if (!list_empty(&hctx->ccid3hctx_hist)) | ||
| 1000 | new_packet = list_entry(hctx->ccid3hctx_hist.next, | ||
| 1001 | struct ccid3_tx_hist_entry, ccid3htx_node); | ||
| 1002 | |||
| 1003 | if (new_packet == NULL || new_packet->ccid3htx_sent) { | ||
| 1004 | new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); | ||
| 1005 | |||
| 1006 | rc = -ENOBUFS; | ||
| 1007 | if (new_packet == NULL) { | ||
| 1008 | ccid3_pr_debug("%s, sk=%p, not enough mem to add " | ||
| 1009 | "to history, send refused\n", dccp_role(sk), sk); | ||
| 1010 | goto out; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | do_gettimeofday(&now); | ||
| 1017 | |||
| 1018 | switch (hctx->ccid3hctx_state) { | ||
| 1019 | case TFRC_SSTATE_NO_SENT: | ||
| 1020 | ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, | ||
| 1021 | dp->dccps_gss); | ||
| 1022 | |||
| 1023 | hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; | ||
| 1024 | hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; | ||
| 1025 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); | ||
| 1026 | hctx->ccid3hctx_last_win_count = 0; | ||
| 1027 | hctx->ccid3hctx_t_last_win_count = now; | ||
| 1028 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | ||
| 1029 | hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; | ||
| 1030 | |||
| 1031 | /* Set nominal send time for initial packet */ | ||
| 1032 | hctx->ccid3hctx_t_nom = now; | ||
| 1033 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
| 1034 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
| 1035 | ccid3_calc_new_delta(hctx); | ||
| 1036 | rc = 0; | ||
| 1037 | break; | ||
| 1038 | case TFRC_SSTATE_NO_FBACK: | ||
| 1039 | case TFRC_SSTATE_FBACK: | ||
| 1040 | *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); | ||
| 1041 | ccid3_pr_debug("send_packet delay=%ld\n",*delay); | ||
| 1042 | *delay /= -1000; | ||
| 1043 | /* divide by -1000 is to convert to ms and get sign right */ | ||
| 1044 | rc = *delay > 0 ? -EAGAIN : 0; | ||
| 1045 | break; | ||
| 1046 | default: | ||
| 1047 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 1048 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
| 1049 | dump_stack(); | ||
| 1050 | rc = -EINVAL; | ||
| 1051 | break; | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | /* Can we send? if so add options and add to packet history */ | ||
| 1055 | if (rc == 0) | ||
| 1056 | new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
| 1057 | out: | ||
| 1058 | return rc; | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) | ||
| 1062 | { | ||
| 1063 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1064 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 1065 | struct ccid3_tx_hist_entry *packet = NULL; | ||
| 1066 | struct timeval now; | ||
| 1067 | |||
| 1068 | // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); | ||
| 1069 | BUG_ON(hctx == NULL); | ||
| 1070 | |||
| 1071 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
| 1072 | ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", | ||
| 1073 | dccp_role(sk), sk); | ||
| 1074 | return; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | do_gettimeofday(&now); | ||
| 1078 | |||
| 1079 | /* check if we have sent a data packet */ | ||
| 1080 | if (len > 0) { | ||
| 1081 | unsigned long quarter_rtt; | ||
| 1082 | |||
| 1083 | if (list_empty(&hctx->ccid3hctx_hist)) { | ||
| 1084 | printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); | ||
| 1085 | return; | ||
| 1086 | } | ||
| 1087 | packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); | ||
| 1088 | if (packet->ccid3htx_sent) { | ||
| 1089 | printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); | ||
| 1090 | return; | ||
| 1091 | } | ||
| 1092 | packet->ccid3htx_tstamp = now; | ||
| 1093 | packet->ccid3htx_seqno = dp->dccps_gss; | ||
| 1094 | // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); | ||
| 1095 | |||
| 1096 | /* | ||
| 1097 | * Check if win_count have changed */ | ||
| 1098 | /* COMPLIANCE_BEGIN | ||
| 1099 | * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt | ||
| 1100 | */ | ||
| 1101 | quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); | ||
| 1102 | if (quarter_rtt > 0) { | ||
| 1103 | hctx->ccid3hctx_t_last_win_count = now; | ||
| 1104 | hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + | ||
| 1105 | min_t(unsigned long, quarter_rtt, 5)) % 16; | ||
| 1106 | ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", | ||
| 1107 | dccp_role(sk), sk, | ||
| 1108 | packet->ccid3htx_win_count, | ||
| 1109 | hctx->ccid3hctx_last_win_count); | ||
| 1110 | } | ||
| 1111 | /* COMPLIANCE_END */ | ||
| 1112 | #if 0 | ||
| 1113 | ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", | ||
| 1114 | dccp_role(sk), sk, | ||
| 1115 | packet->ccid3htx_seqno, | ||
| 1116 | packet->ccid3htx_win_count); | ||
| 1117 | #endif | ||
| 1118 | hctx->ccid3hctx_idle = 0; | ||
| 1119 | packet->ccid3htx_sent = 1; | ||
| 1120 | } else | ||
| 1121 | ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", | ||
| 1122 | dccp_role(sk), sk, dp->dccps_gss); | ||
| 1123 | |||
| 1124 | switch (hctx->ccid3hctx_state) { | ||
| 1125 | case TFRC_SSTATE_NO_SENT: | ||
| 1126 | /* if first wasn't pure ack */ | ||
| 1127 | if (len != 0) | ||
| 1128 | printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", | ||
| 1129 | __FUNCTION__, dccp_role(sk)); | ||
| 1130 | return; | ||
| 1131 | case TFRC_SSTATE_NO_FBACK: | ||
| 1132 | case TFRC_SSTATE_FBACK: | ||
| 1133 | if (len > 0) { | ||
| 1134 | hctx->ccid3hctx_t_nom = now; | ||
| 1135 | ccid3_calc_new_t_ipi(hctx); | ||
| 1136 | ccid3_calc_new_delta(hctx); | ||
| 1137 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
| 1138 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
| 1139 | } | ||
| 1140 | break; | ||
| 1141 | default: | ||
| 1142 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 1143 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
| 1144 | dump_stack(); | ||
| 1145 | break; | ||
| 1146 | } | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
| 1150 | { | ||
| 1151 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1152 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 1153 | struct ccid3_options_received *opt_recv; | ||
| 1154 | struct ccid3_tx_hist_entry *entry, *next, *packet; | ||
| 1155 | unsigned long next_tmout; | ||
| 1156 | u16 t_elapsed; | ||
| 1157 | u32 pinv; | ||
| 1158 | u32 x_recv; | ||
| 1159 | u32 r_sample; | ||
| 1160 | #if 0 | ||
| 1161 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", | ||
| 1162 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), | ||
| 1163 | skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
| 1164 | #endif | ||
| 1165 | if (hctx == NULL) | ||
| 1166 | return; | ||
| 1167 | |||
| 1168 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
| 1169 | ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); | ||
| 1170 | return; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | /* we are only interested in ACKs */ | ||
| 1174 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | ||
| 1175 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | ||
| 1176 | return; | ||
| 1177 | |||
| 1178 | opt_recv = &hctx->ccid3hctx_options_received; | ||
| 1179 | |||
| 1180 | t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; | ||
| 1181 | x_recv = opt_recv->ccid3or_receive_rate; | ||
| 1182 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
| 1183 | |||
| 1184 | switch (hctx->ccid3hctx_state) { | ||
| 1185 | case TFRC_SSTATE_NO_SENT: | ||
| 1186 | /* FIXME: what to do here? */ | ||
| 1187 | return; | ||
| 1188 | case TFRC_SSTATE_NO_FBACK: | ||
| 1189 | case TFRC_SSTATE_FBACK: | ||
| 1190 | /* Calculate new round trip sample by | ||
| 1191 | * R_sample = (now - t_recvdata) - t_delay */ | ||
| 1192 | /* get t_recvdata from history */ | ||
| 1193 | packet = NULL; | ||
| 1194 | list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) | ||
| 1195 | if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { | ||
| 1196 | packet = entry; | ||
| 1197 | break; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | if (packet == NULL) { | ||
| 1201 | ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", | ||
| 1202 | dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 1203 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
| 1204 | return; | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | /* Update RTT */ | ||
| 1208 | r_sample = now_delta(packet->ccid3htx_tstamp); | ||
| 1209 | /* FIXME: */ | ||
| 1210 | // r_sample -= usecs_to_jiffies(t_elapsed * 10); | ||
| 1211 | |||
| 1212 | /* Update RTT estimate by | ||
| 1213 | * If (No feedback recv) | ||
| 1214 | * R = R_sample; | ||
| 1215 | * Else | ||
| 1216 | * R = q * R + (1 - q) * R_sample; | ||
| 1217 | * | ||
| 1218 | * q is a constant, RFC 3448 recomments 0.9 | ||
| 1219 | */ | ||
| 1220 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
| 1221 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
| 1222 | hctx->ccid3hctx_rtt = r_sample; | ||
| 1223 | } else | ||
| 1224 | hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; | ||
| 1225 | |||
| 1226 | /* | ||
| 1227 | * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent | ||
| 1228 | * implemention of the new window count. | ||
| 1229 | */ | ||
| 1230 | if (hctx->ccid3hctx_rtt < 4) | ||
| 1231 | hctx->ccid3hctx_rtt = 4; | ||
| 1232 | |||
| 1233 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", | ||
| 1234 | dccp_role(sk), sk, | ||
| 1235 | hctx->ccid3hctx_rtt, | ||
| 1236 | r_sample); | ||
| 1237 | |||
| 1238 | /* Update timeout interval */ | ||
| 1239 | inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); | ||
| 1240 | |||
| 1241 | /* Update receive rate */ | ||
| 1242 | hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ | ||
| 1243 | |||
| 1244 | /* Update loss event rate */ | ||
| 1245 | if (pinv == ~0 || pinv == 0) | ||
| 1246 | hctx->ccid3hctx_p = 0; | ||
| 1247 | else { | ||
| 1248 | hctx->ccid3hctx_p = 1000000 / pinv; | ||
| 1249 | |||
| 1250 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { | ||
| 1251 | hctx->ccid3hctx_p = TFRC_SMALLEST_P; | ||
| 1252 | ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); | ||
| 1253 | } | ||
| 1254 | } | ||
| 1255 | |||
| 1256 | /* unschedule no feedback timer */ | ||
| 1257 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
| 1258 | |||
| 1259 | /* Update sending rate */ | ||
| 1260 | ccid3_hc_tx_update_x(sk); | ||
| 1261 | |||
| 1262 | /* Update next send time */ | ||
| 1263 | if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { | ||
| 1264 | (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; | ||
| 1265 | (hctx->ccid3hctx_t_nom).tv_sec--; | ||
| 1266 | } | ||
| 1267 | /* FIXME - if no feedback then t_ipi can go > 1 second */ | ||
| 1268 | (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; | ||
| 1269 | ccid3_calc_new_t_ipi(hctx); | ||
| 1270 | (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; | ||
| 1271 | timeval_fix(&(hctx->ccid3hctx_t_nom)); | ||
| 1272 | ccid3_calc_new_delta(hctx); | ||
| 1273 | |||
| 1274 | /* remove all packets older than the one acked from history */ | ||
| 1275 | #if 0 | ||
| 1276 | FIXME! | ||
| 1277 | list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { | ||
| 1278 | list_del_init(&entry->ccid3htx_node); | ||
| 1279 | ccid3_tx_hist_entry_delete(entry); | ||
| 1280 | } | ||
| 1281 | #endif | ||
| 1282 | if (hctx->ccid3hctx_x < 10) { | ||
| 1283 | ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); | ||
| 1284 | hctx->ccid3hctx_x = 10; | ||
| 1285 | } | ||
| 1286 | /* to prevent divide by zero below */ | ||
| 1287 | |||
| 1288 | /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ | ||
| 1289 | next_tmout = max(inet_csk(sk)->icsk_rto, | ||
| 1290 | 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); | ||
| 1291 | /* maths with 100000 and 10 is to prevent overflow with 32 bit */ | ||
| 1292 | |||
| 1293 | ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", | ||
| 1294 | dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); | ||
| 1295 | |||
| 1296 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
| 1297 | jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); | ||
| 1298 | |||
| 1299 | /* set idle flag */ | ||
| 1300 | hctx->ccid3hctx_idle = 1; | ||
| 1301 | break; | ||
| 1302 | default: | ||
| 1303 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 1304 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
| 1305 | dump_stack(); | ||
| 1306 | break; | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
| 1311 | { | ||
| 1312 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 1313 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 1314 | |||
| 1315 | if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | ||
| 1316 | return; | ||
| 1317 | |||
| 1318 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | ||
| 1322 | unsigned char len, u16 idx, unsigned char *value) | ||
| 1323 | { | ||
| 1324 | int rc = 0; | ||
| 1325 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1326 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 1327 | struct ccid3_options_received *opt_recv; | ||
| 1328 | |||
| 1329 | if (hctx == NULL) | ||
| 1330 | return 0; | ||
| 1331 | |||
| 1332 | opt_recv = &hctx->ccid3hctx_options_received; | ||
| 1333 | |||
| 1334 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
| 1335 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
| 1336 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
| 1337 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
| 1338 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
| 1339 | opt_recv->ccid3or_receive_rate = 0; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | switch (option) { | ||
| 1343 | case TFRC_OPT_LOSS_EVENT_RATE: | ||
| 1344 | if (len != 4) { | ||
| 1345 | ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", | ||
| 1346 | dccp_role(sk), sk); | ||
| 1347 | rc = -EINVAL; | ||
| 1348 | } else { | ||
| 1349 | opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); | ||
| 1350 | ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", | ||
| 1351 | dccp_role(sk), sk, | ||
| 1352 | opt_recv->ccid3or_loss_event_rate); | ||
| 1353 | } | ||
| 1354 | break; | ||
| 1355 | case TFRC_OPT_LOSS_INTERVALS: | ||
| 1356 | opt_recv->ccid3or_loss_intervals_idx = idx; | ||
| 1357 | opt_recv->ccid3or_loss_intervals_len = len; | ||
| 1358 | ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", | ||
| 1359 | dccp_role(sk), sk, | ||
| 1360 | opt_recv->ccid3or_loss_intervals_idx, | ||
| 1361 | opt_recv->ccid3or_loss_intervals_len); | ||
| 1362 | break; | ||
| 1363 | case TFRC_OPT_RECEIVE_RATE: | ||
| 1364 | if (len != 4) { | ||
| 1365 | ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", | ||
| 1366 | dccp_role(sk), sk); | ||
| 1367 | rc = -EINVAL; | ||
| 1368 | } else { | ||
| 1369 | opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); | ||
| 1370 | ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", | ||
| 1371 | dccp_role(sk), sk, | ||
| 1372 | opt_recv->ccid3or_receive_rate); | ||
| 1373 | } | ||
| 1374 | break; | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | return rc; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | static int ccid3_hc_tx_init(struct sock *sk) | ||
| 1381 | { | ||
| 1382 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1383 | struct ccid3_hc_tx_sock *hctx; | ||
| 1384 | |||
| 1385 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 1386 | |||
| 1387 | hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); | ||
| 1388 | if (hctx == NULL) | ||
| 1389 | return -ENOMEM; | ||
| 1390 | |||
| 1391 | memset(hctx, 0, sizeof(*hctx)); | ||
| 1392 | |||
| 1393 | if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
| 1394 | dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
| 1395 | hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; | ||
| 1396 | else | ||
| 1397 | hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; | ||
| 1398 | |||
| 1399 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ | ||
| 1400 | hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ | ||
| 1401 | inet_csk(sk)->icsk_rto = USEC_IN_SEC; | ||
| 1402 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; | ||
| 1403 | INIT_LIST_HEAD(&hctx->ccid3hctx_hist); | ||
| 1404 | init_timer(&hctx->ccid3hctx_no_feedback_timer); | ||
| 1405 | |||
| 1406 | return 0; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | static void ccid3_hc_tx_exit(struct sock *sk) | ||
| 1410 | { | ||
| 1411 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1412 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
| 1413 | struct ccid3_tx_hist_entry *entry, *next; | ||
| 1414 | |||
| 1415 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 1416 | BUG_ON(hctx == NULL); | ||
| 1417 | |||
| 1418 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
| 1419 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
| 1420 | |||
| 1421 | /* Empty packet history */ | ||
| 1422 | list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { | ||
| 1423 | list_del_init(&entry->ccid3htx_node); | ||
| 1424 | ccid3_tx_hist_entry_delete(entry); | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | kfree(dp->dccps_hc_tx_ccid_private); | ||
| 1428 | dp->dccps_hc_tx_ccid_private = NULL; | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | /* | ||
| 1432 | * RX Half Connection methods | ||
| 1433 | */ | ||
| 1434 | |||
| 1435 | /* TFRC receiver states */ | ||
| 1436 | enum ccid3_hc_rx_states { | ||
| 1437 | TFRC_RSTATE_NO_DATA = 1, | ||
| 1438 | TFRC_RSTATE_DATA, | ||
| 1439 | TFRC_RSTATE_TERM = 127, | ||
| 1440 | }; | ||
| 1441 | |||
| 1442 | #ifdef CCID3_DEBUG | ||
| 1443 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | ||
| 1444 | { | ||
| 1445 | static char *ccid3_rx_state_names[] = { | ||
| 1446 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | ||
| 1447 | [TFRC_RSTATE_DATA] = "DATA", | ||
| 1448 | [TFRC_RSTATE_TERM] = "TERM", | ||
| 1449 | }; | ||
| 1450 | |||
| 1451 | return ccid3_rx_state_names[state]; | ||
| 1452 | } | ||
| 1453 | #endif | ||
| 1454 | |||
| 1455 | static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) | ||
| 1456 | { | ||
| 1457 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1458 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1459 | enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; | ||
| 1460 | |||
| 1461 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
| 1462 | dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); | ||
| 1463 | WARN_ON(state == oldstate); | ||
| 1464 | hcrx->ccid3hcrx_state = state; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) | ||
| 1468 | { | ||
| 1469 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1470 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1471 | struct ccid3_rx_hist_entry *entry, *next; | ||
| 1472 | u8 num_later = 0; | ||
| 1473 | |||
| 1474 | if (list_empty(&hcrx->ccid3hcrx_hist)) | ||
| 1475 | list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
| 1476 | else { | ||
| 1477 | u64 seqno = packet->ccid3hrx_seqno; | ||
| 1478 | struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, | ||
| 1479 | struct ccid3_rx_hist_entry, | ||
| 1480 | ccid3hrx_node); | ||
| 1481 | if (after48(seqno, iter->ccid3hrx_seqno)) | ||
| 1482 | list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
| 1483 | else { | ||
| 1484 | if (iter->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1485 | iter->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1486 | num_later = 1; | ||
| 1487 | |||
| 1488 | list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1489 | if (after48(seqno, iter->ccid3hrx_seqno)) { | ||
| 1490 | list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); | ||
| 1491 | goto trim_history; | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | if (iter->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1495 | iter->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1496 | num_later++; | ||
| 1497 | |||
| 1498 | if (num_later == TFRC_RECV_NUM_LATE_LOSS) { | ||
| 1499 | ccid3_rx_hist_entry_delete(packet); | ||
| 1500 | ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", | ||
| 1501 | dccp_role(sk), sk, seqno); | ||
| 1502 | return 1; | ||
| 1503 | } | ||
| 1504 | } | ||
| 1505 | |||
| 1506 | if (num_later < TFRC_RECV_NUM_LATE_LOSS) | ||
| 1507 | list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); | ||
| 1508 | /* FIXME: else what? should we destroy the packet like above? */ | ||
| 1509 | } | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | trim_history: | ||
| 1513 | /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ | ||
| 1514 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | ||
| 1515 | |||
| 1516 | if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
| 1517 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1518 | if (num_later == 0) { | ||
| 1519 | list_del_init(&entry->ccid3hrx_node); | ||
| 1520 | ccid3_rx_hist_entry_delete(entry); | ||
| 1521 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1522 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1523 | --num_later; | ||
| 1524 | } | ||
| 1525 | } else { | ||
| 1526 | int step = 0; | ||
| 1527 | u8 win_count = 0; /* Not needed, but lets shut up gcc */ | ||
| 1528 | int tmp; | ||
| 1529 | /* | ||
| 1530 | * We have no loss interval history so we need at least one | ||
| 1531 | * rtt:s of data packets to approximate rtt. | ||
| 1532 | */ | ||
| 1533 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1534 | if (num_later == 0) { | ||
| 1535 | switch (step) { | ||
| 1536 | case 0: | ||
| 1537 | step = 1; | ||
| 1538 | /* OK, find next data packet */ | ||
| 1539 | num_later = 1; | ||
| 1540 | break; | ||
| 1541 | case 1: | ||
| 1542 | step = 2; | ||
| 1543 | /* OK, find next data packet */ | ||
| 1544 | num_later = 1; | ||
| 1545 | win_count = entry->ccid3hrx_win_count; | ||
| 1546 | break; | ||
| 1547 | case 2: | ||
| 1548 | tmp = win_count - entry->ccid3hrx_win_count; | ||
| 1549 | if (tmp < 0) | ||
| 1550 | tmp += TFRC_WIN_COUNT_LIMIT; | ||
| 1551 | if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { | ||
| 1552 | /* we have found a packet older than one rtt | ||
| 1553 | * remove the rest */ | ||
| 1554 | step = 3; | ||
| 1555 | } else /* OK, find next data packet */ | ||
| 1556 | num_later = 1; | ||
| 1557 | break; | ||
| 1558 | case 3: | ||
| 1559 | list_del_init(&entry->ccid3hrx_node); | ||
| 1560 | ccid3_rx_hist_entry_delete(entry); | ||
| 1561 | break; | ||
| 1562 | } | ||
| 1563 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1564 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1565 | --num_later; | ||
| 1566 | } | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | return 0; | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
| 1573 | { | ||
| 1574 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1575 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1576 | struct ccid3_rx_hist_entry *entry, *packet; | ||
| 1577 | |||
| 1578 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 1579 | |||
| 1580 | switch (hcrx->ccid3hcrx_state) { | ||
| 1581 | case TFRC_RSTATE_NO_DATA: | ||
| 1582 | hcrx->ccid3hcrx_x_recv = 0; | ||
| 1583 | break; | ||
| 1584 | case TFRC_RSTATE_DATA: { | ||
| 1585 | u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); | ||
| 1586 | |||
| 1587 | if (delta == 0) | ||
| 1588 | delta = 1; /* to prevent divide by zero */ | ||
| 1589 | hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; | ||
| 1590 | } | ||
| 1591 | break; | ||
| 1592 | default: | ||
| 1593 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 1594 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
| 1595 | dump_stack(); | ||
| 1596 | return; | ||
| 1597 | } | ||
| 1598 | |||
| 1599 | packet = NULL; | ||
| 1600 | list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) | ||
| 1601 | if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1602 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) { | ||
| 1603 | packet = entry; | ||
| 1604 | break; | ||
| 1605 | } | ||
| 1606 | |||
| 1607 | if (packet == NULL) { | ||
| 1608 | printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", | ||
| 1609 | __FUNCTION__, dccp_role(sk), sk); | ||
| 1610 | dump_stack(); | ||
| 1611 | return; | ||
| 1612 | } | ||
| 1613 | |||
| 1614 | do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); | ||
| 1615 | hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; | ||
| 1616 | hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; | ||
| 1617 | hcrx->ccid3hcrx_bytes_recv = 0; | ||
| 1618 | |||
| 1619 | /* Convert to multiples of 10us */ | ||
| 1620 | hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; | ||
| 1621 | if (hcrx->ccid3hcrx_p == 0) | ||
| 1622 | hcrx->ccid3hcrx_pinv = ~0; | ||
| 1623 | else | ||
| 1624 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
| 1625 | dccp_send_ack(sk); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
| 1629 | { | ||
| 1630 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 1631 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1632 | |||
| 1633 | if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | ||
| 1634 | return; | ||
| 1635 | |||
| 1636 | if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) | ||
| 1637 | dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); | ||
| 1638 | |||
| 1639 | if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { | ||
| 1640 | const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); | ||
| 1641 | const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); | ||
| 1642 | |||
| 1643 | dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); | ||
| 1644 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); | ||
| 1645 | } | ||
| 1646 | |||
| 1647 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; | ||
| 1648 | } | ||
| 1649 | |||
| 1650 | /* Weights used to calculate loss event rate */ | ||
| 1651 | /* | ||
| 1652 | * These are integers as per section 8 of RFC3448. We can then divide by 4 * | ||
| 1653 | * when we use it. | ||
| 1654 | */ | ||
| 1655 | const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; | ||
| 1656 | |||
| 1657 | /* | ||
| 1658 | * args: fvalue - function value to match | ||
| 1659 | * returns: p closest to that value | ||
| 1660 | * | ||
| 1661 | * both fvalue and p are multiplied by 1,000,000 to use ints | ||
| 1662 | */ | ||
| 1663 | u32 calcx_reverse_lookup(u32 fvalue) { | ||
| 1664 | int ctr = 0; | ||
| 1665 | int small; | ||
| 1666 | |||
| 1667 | if (fvalue < calcx_lookup[0][1]) | ||
| 1668 | return 0; | ||
| 1669 | if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) | ||
| 1670 | small = 1; | ||
| 1671 | else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) | ||
| 1672 | return 1000000; | ||
| 1673 | else | ||
| 1674 | small = 0; | ||
| 1675 | while (fvalue > calcx_lookup[ctr][small]) | ||
| 1676 | ctr++; | ||
| 1677 | if (small) | ||
| 1678 | return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); | ||
| 1679 | else | ||
| 1680 | return (1000000 * ctr / CALCX_ARRSIZE) ; | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | /* calculate first loss interval | ||
| 1684 | * | ||
| 1685 | * returns estimated loss interval in usecs */ | ||
| 1686 | |||
| 1687 | static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) | ||
| 1688 | { | ||
| 1689 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1690 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1691 | struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; | ||
| 1692 | u32 rtt, delta, x_recv, fval, p, tmp2; | ||
| 1693 | struct timeval tstamp, tmp_tv; | ||
| 1694 | int interval = 0; | ||
| 1695 | int win_count = 0; | ||
| 1696 | int step = 0; | ||
| 1697 | u64 tmp1; | ||
| 1698 | |||
| 1699 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1700 | if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1701 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) { | ||
| 1702 | tail = entry; | ||
| 1703 | |||
| 1704 | switch (step) { | ||
| 1705 | case 0: | ||
| 1706 | tstamp = entry->ccid3hrx_tstamp; | ||
| 1707 | win_count = entry->ccid3hrx_win_count; | ||
| 1708 | step = 1; | ||
| 1709 | break; | ||
| 1710 | case 1: | ||
| 1711 | interval = win_count - entry->ccid3hrx_win_count; | ||
| 1712 | if (interval < 0) | ||
| 1713 | interval += TFRC_WIN_COUNT_LIMIT; | ||
| 1714 | if (interval > 4) | ||
| 1715 | goto found; | ||
| 1716 | break; | ||
| 1717 | } | ||
| 1718 | } | ||
| 1719 | } | ||
| 1720 | |||
| 1721 | if (step == 0) { | ||
| 1722 | printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", | ||
| 1723 | __FUNCTION__, dccp_role(sk), sk); | ||
| 1724 | return ~0; | ||
| 1725 | } | ||
| 1726 | |||
| 1727 | if (interval == 0) { | ||
| 1728 | ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", | ||
| 1729 | dccp_role(sk), sk); | ||
| 1730 | interval = 1; | ||
| 1731 | } | ||
| 1732 | found: | ||
| 1733 | timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); | ||
| 1734 | rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; | ||
| 1735 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", | ||
| 1736 | dccp_role(sk), sk, rtt); | ||
| 1737 | if (rtt == 0) | ||
| 1738 | rtt = 1; | ||
| 1739 | |||
| 1740 | delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); | ||
| 1741 | if (delta == 0) | ||
| 1742 | delta = 1; | ||
| 1743 | |||
| 1744 | x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; | ||
| 1745 | |||
| 1746 | tmp1 = (u64)x_recv * (u64)rtt; | ||
| 1747 | do_div(tmp1,10000000); | ||
| 1748 | tmp2 = (u32)tmp1; | ||
| 1749 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; | ||
| 1750 | /* do not alter order above or you will get overflow on 32 bit */ | ||
| 1751 | p = calcx_reverse_lookup(fval); | ||
| 1752 | ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ | ||
| 1753 | dccp_role(sk), sk, x_recv, p); | ||
| 1754 | |||
| 1755 | if (p == 0) | ||
| 1756 | return ~0; | ||
| 1757 | else | ||
| 1758 | return 1000000 / p; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | ||
| 1762 | { | ||
| 1763 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1764 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1765 | struct ccid3_loss_interval_hist_entry *li_entry; | ||
| 1766 | |||
| 1767 | if (seq_loss != DCCP_MAX_SEQNO + 1) { | ||
| 1768 | ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", | ||
| 1769 | dccp_role(sk), sk, seq_loss, win_loss); | ||
| 1770 | |||
| 1771 | if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
| 1772 | struct ccid3_loss_interval_hist_entry *li_tail = NULL; | ||
| 1773 | int i; | ||
| 1774 | |||
| 1775 | ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); | ||
| 1776 | for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { | ||
| 1777 | li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); | ||
| 1778 | if (li_entry == NULL) { | ||
| 1779 | ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); | ||
| 1780 | ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", | ||
| 1781 | dccp_role(sk), sk); | ||
| 1782 | return; | ||
| 1783 | } | ||
| 1784 | if (li_tail == NULL) | ||
| 1785 | li_tail = li_entry; | ||
| 1786 | list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); | ||
| 1787 | } | ||
| 1788 | |||
| 1789 | li_entry->ccid3lih_seqno = seq_loss; | ||
| 1790 | li_entry->ccid3lih_win_count = win_loss; | ||
| 1791 | |||
| 1792 | li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); | ||
| 1793 | } | ||
| 1794 | } | ||
| 1795 | /* FIXME: find end of interval */ | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | static void ccid3_hc_rx_detect_loss(struct sock *sk) | ||
| 1799 | { | ||
| 1800 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1801 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1802 | struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; | ||
| 1803 | struct ccid3_rx_hist_entry *a_loss = NULL; | ||
| 1804 | struct ccid3_rx_hist_entry *b_loss = NULL; | ||
| 1805 | u64 seq_loss = DCCP_MAX_SEQNO + 1; | ||
| 1806 | u8 win_loss = 0; | ||
| 1807 | u8 num_later = TFRC_RECV_NUM_LATE_LOSS; | ||
| 1808 | |||
| 1809 | list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1810 | if (num_later == 0) { | ||
| 1811 | b_loss = entry; | ||
| 1812 | break; | ||
| 1813 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1814 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1815 | --num_later; | ||
| 1816 | } | ||
| 1817 | |||
| 1818 | if (b_loss == NULL) | ||
| 1819 | goto out_update_li; | ||
| 1820 | |||
| 1821 | a_next = b_next; | ||
| 1822 | num_later = 1; | ||
| 1823 | #if 0 | ||
| 1824 | FIXME MERGE GIT! | ||
| 1825 | list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1826 | if (num_later == 0) { | ||
| 1827 | a_loss = entry; | ||
| 1828 | break; | ||
| 1829 | } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1830 | entry->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1831 | --num_later; | ||
| 1832 | } | ||
| 1833 | #endif | ||
| 1834 | |||
| 1835 | if (a_loss == NULL) { | ||
| 1836 | if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { | ||
| 1837 | /* no loss event have occured yet */ | ||
| 1838 | ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " | ||
| 1839 | "packet by comparing to initial seqno\n", | ||
| 1840 | dccp_role(sk), sk); | ||
| 1841 | goto out_update_li; | ||
| 1842 | } else { | ||
| 1843 | pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", | ||
| 1844 | __FUNCTION__, dccp_role(sk), sk); | ||
| 1845 | return; | ||
| 1846 | } | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | /* Locate a lost data packet */ | ||
| 1850 | entry = packet = b_loss; | ||
| 1851 | #if 0 | ||
| 1852 | FIXME MERGE GIT! | ||
| 1853 | list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { | ||
| 1854 | u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); | ||
| 1855 | |||
| 1856 | if (delta != 0) { | ||
| 1857 | if (packet->ccid3hrx_type == DCCP_PKT_DATA || | ||
| 1858 | packet->ccid3hrx_type == DCCP_PKT_DATAACK) | ||
| 1859 | --delta; | ||
| 1860 | /* | ||
| 1861 | * FIXME: check this, probably this % usage is because | ||
| 1862 | * in earlier drafts the ndp count was just 8 bits | ||
| 1863 | * long, but now it cam be up to 24 bits long. | ||
| 1864 | */ | ||
| 1865 | #if 0 | ||
| 1866 | if (delta % DCCP_NDP_LIMIT != | ||
| 1867 | (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) | ||
| 1868 | #endif | ||
| 1869 | if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { | ||
| 1870 | seq_loss = entry->ccid3hrx_seqno; | ||
| 1871 | dccp_inc_seqno(&seq_loss); | ||
| 1872 | } | ||
| 1873 | } | ||
| 1874 | packet = entry; | ||
| 1875 | if (packet == a_loss) | ||
| 1876 | break; | ||
| 1877 | } | ||
| 1878 | #endif | ||
| 1879 | |||
| 1880 | if (seq_loss != DCCP_MAX_SEQNO + 1) | ||
| 1881 | win_loss = a_loss->ccid3hrx_win_count; | ||
| 1882 | |||
| 1883 | out_update_li: | ||
| 1884 | ccid3_hc_rx_update_li(sk, seq_loss, win_loss); | ||
| 1885 | } | ||
| 1886 | |||
| 1887 | static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) | ||
| 1888 | { | ||
| 1889 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1890 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1891 | struct ccid3_loss_interval_hist_entry *li_entry, *li_next; | ||
| 1892 | int i = 0; | ||
| 1893 | u32 i_tot; | ||
| 1894 | u32 i_tot0 = 0; | ||
| 1895 | u32 i_tot1 = 0; | ||
| 1896 | u32 w_tot = 0; | ||
| 1897 | |||
| 1898 | list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { | ||
| 1899 | if (i < TFRC_RECV_IVAL_F_LENGTH) { | ||
| 1900 | i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; | ||
| 1901 | w_tot += ccid3_hc_rx_w[i]; | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | if (i != 0) | ||
| 1905 | i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; | ||
| 1906 | |||
| 1907 | if (++i > TFRC_RECV_IVAL_F_LENGTH) | ||
| 1908 | break; | ||
| 1909 | } | ||
| 1910 | |||
| 1911 | if (i != TFRC_RECV_IVAL_F_LENGTH) { | ||
| 1912 | pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", | ||
| 1913 | __FUNCTION__, dccp_role(sk), sk); | ||
| 1914 | return 0; | ||
| 1915 | } | ||
| 1916 | |||
| 1917 | i_tot = max(i_tot0, i_tot1); | ||
| 1918 | |||
| 1919 | /* FIXME: Why do we do this? -Ian McDonald */ | ||
| 1920 | if (i_tot * 4 < w_tot) | ||
| 1921 | i_tot = w_tot * 4; | ||
| 1922 | |||
| 1923 | return i_tot * 4 / w_tot; | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
| 1927 | { | ||
| 1928 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1929 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 1930 | struct ccid3_rx_hist_entry *packet; | ||
| 1931 | struct timeval now; | ||
| 1932 | u8 win_count; | ||
| 1933 | u32 p_prev; | ||
| 1934 | int ins; | ||
| 1935 | #if 0 | ||
| 1936 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", | ||
| 1937 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), | ||
| 1938 | skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
| 1939 | #endif | ||
| 1940 | if (hcrx == NULL) | ||
| 1941 | return; | ||
| 1942 | |||
| 1943 | BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || | ||
| 1944 | hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); | ||
| 1945 | |||
| 1946 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | ||
| 1947 | case DCCP_PKT_ACK: | ||
| 1948 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | ||
| 1949 | return; | ||
| 1950 | case DCCP_PKT_DATAACK: | ||
| 1951 | if (dp->dccps_options_received.dccpor_timestamp_echo == 0) | ||
| 1952 | break; | ||
| 1953 | p_prev = hcrx->ccid3hcrx_rtt; | ||
| 1954 | do_gettimeofday(&now); | ||
| 1955 | /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - | ||
| 1956 | usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); | ||
| 1957 | FIXME - I think above code is broken - have to look at options more, will also need | ||
| 1958 | to fix pr_debug below */ | ||
| 1959 | if (p_prev != hcrx->ccid3hcrx_rtt) | ||
| 1960 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", | ||
| 1961 | dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, | ||
| 1962 | dp->dccps_options_received.dccpor_timestamp_echo, | ||
| 1963 | dp->dccps_options_received.dccpor_elapsed_time); | ||
| 1964 | break; | ||
| 1965 | case DCCP_PKT_DATA: | ||
| 1966 | break; | ||
| 1967 | default: | ||
| 1968 | ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", | ||
| 1969 | dccp_role(sk), sk, | ||
| 1970 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
| 1971 | return; | ||
| 1972 | } | ||
| 1973 | |||
| 1974 | packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); | ||
| 1975 | if (packet == NULL) { | ||
| 1976 | ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", | ||
| 1977 | dccp_role(sk), sk); | ||
| 1978 | return; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | win_count = packet->ccid3hrx_win_count; | ||
| 1982 | |||
| 1983 | ins = ccid3_hc_rx_add_hist(sk, packet); | ||
| 1984 | |||
| 1985 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | ||
| 1986 | return; | ||
| 1987 | |||
| 1988 | switch (hcrx->ccid3hcrx_state) { | ||
| 1989 | case TFRC_RSTATE_NO_DATA: | ||
| 1990 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", | ||
| 1991 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); | ||
| 1992 | ccid3_hc_rx_send_feedback(sk); | ||
| 1993 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
| 1994 | return; | ||
| 1995 | case TFRC_RSTATE_DATA: | ||
| 1996 | hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
| 1997 | if (ins == 0) { | ||
| 1998 | do_gettimeofday(&now); | ||
| 1999 | if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { | ||
| 2000 | hcrx->ccid3hcrx_tstamp_last_ack = now; | ||
| 2001 | ccid3_hc_rx_send_feedback(sk); | ||
| 2002 | } | ||
| 2003 | return; | ||
| 2004 | } | ||
| 2005 | break; | ||
| 2006 | default: | ||
| 2007 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
| 2008 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
| 2009 | dump_stack(); | ||
| 2010 | return; | ||
| 2011 | } | ||
| 2012 | |||
| 2013 | /* Dealing with packet loss */ | ||
| 2014 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", | ||
| 2015 | dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); | ||
| 2016 | |||
| 2017 | ccid3_hc_rx_detect_loss(sk); | ||
| 2018 | p_prev = hcrx->ccid3hcrx_p; | ||
| 2019 | |||
| 2020 | /* Calculate loss event rate */ | ||
| 2021 | if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) | ||
| 2022 | /* Scaling up by 1000000 as fixed decimal */ | ||
| 2023 | hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); | ||
| 2024 | |||
| 2025 | if (hcrx->ccid3hcrx_p > p_prev) { | ||
| 2026 | ccid3_hc_rx_send_feedback(sk); | ||
| 2027 | return; | ||
| 2028 | } | ||
| 2029 | } | ||
| 2030 | |||
| 2031 | static int ccid3_hc_rx_init(struct sock *sk) | ||
| 2032 | { | ||
| 2033 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 2034 | struct ccid3_hc_rx_sock *hcrx; | ||
| 2035 | |||
| 2036 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 2037 | |||
| 2038 | hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); | ||
| 2039 | if (hcrx == NULL) | ||
| 2040 | return -ENOMEM; | ||
| 2041 | |||
| 2042 | memset(hcrx, 0, sizeof(*hcrx)); | ||
| 2043 | |||
| 2044 | if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
| 2045 | dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
| 2046 | hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; | ||
| 2047 | else | ||
| 2048 | hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; | ||
| 2049 | |||
| 2050 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | ||
| 2051 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | ||
| 2052 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); | ||
| 2053 | |||
| 2054 | return 0; | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | static void ccid3_hc_rx_exit(struct sock *sk) | ||
| 2058 | { | ||
| 2059 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 2060 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
| 2061 | |||
| 2062 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
| 2063 | |||
| 2064 | if (hcrx == NULL) | ||
| 2065 | return; | ||
| 2066 | |||
| 2067 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
| 2068 | |||
| 2069 | /* Empty packet history */ | ||
| 2070 | ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); | ||
| 2071 | |||
| 2072 | /* Empty loss interval history */ | ||
| 2073 | ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); | ||
| 2074 | |||
| 2075 | kfree(dp->dccps_hc_rx_ccid_private); | ||
| 2076 | dp->dccps_hc_rx_ccid_private = NULL; | ||
| 2077 | } | ||
| 2078 | |||
| 2079 | static struct ccid ccid3 = { | ||
| 2080 | .ccid_id = 3, | ||
| 2081 | .ccid_name = "ccid3", | ||
| 2082 | .ccid_owner = THIS_MODULE, | ||
| 2083 | .ccid_init = ccid3_init, | ||
| 2084 | .ccid_exit = ccid3_exit, | ||
| 2085 | .ccid_hc_tx_init = ccid3_hc_tx_init, | ||
| 2086 | .ccid_hc_tx_exit = ccid3_hc_tx_exit, | ||
| 2087 | .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, | ||
| 2088 | .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, | ||
| 2089 | .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, | ||
| 2090 | .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, | ||
| 2091 | .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, | ||
| 2092 | .ccid_hc_rx_init = ccid3_hc_rx_init, | ||
| 2093 | .ccid_hc_rx_exit = ccid3_hc_rx_exit, | ||
| 2094 | .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, | ||
| 2095 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | ||
| 2096 | }; | ||
| 2097 | |||
| 2098 | module_param(ccid3_debug, int, 0444); | ||
| 2099 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | ||
| 2100 | |||
| 2101 | static __init int ccid3_module_init(void) | ||
| 2102 | { | ||
| 2103 | int rc = -ENOMEM; | ||
| 2104 | |||
| 2105 | ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", | ||
| 2106 | sizeof(struct ccid3_tx_hist_entry), 0, | ||
| 2107 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
| 2108 | if (ccid3_tx_hist_slab == NULL) | ||
| 2109 | goto out; | ||
| 2110 | |||
| 2111 | ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", | ||
| 2112 | sizeof(struct ccid3_rx_hist_entry), 0, | ||
| 2113 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
| 2114 | if (ccid3_rx_hist_slab == NULL) | ||
| 2115 | goto out_free_tx_history; | ||
| 2116 | |||
| 2117 | ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", | ||
| 2118 | sizeof(struct ccid3_loss_interval_hist_entry), 0, | ||
| 2119 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
| 2120 | if (ccid3_loss_interval_hist_slab == NULL) | ||
| 2121 | goto out_free_rx_history; | ||
| 2122 | |||
| 2123 | rc = ccid_register(&ccid3); | ||
| 2124 | if (rc != 0) | ||
| 2125 | goto out_free_loss_interval_history; | ||
| 2126 | |||
| 2127 | out: | ||
| 2128 | return rc; | ||
| 2129 | out_free_loss_interval_history: | ||
| 2130 | kmem_cache_destroy(ccid3_loss_interval_hist_slab); | ||
| 2131 | ccid3_loss_interval_hist_slab = NULL; | ||
| 2132 | out_free_rx_history: | ||
| 2133 | kmem_cache_destroy(ccid3_rx_hist_slab); | ||
| 2134 | ccid3_rx_hist_slab = NULL; | ||
| 2135 | out_free_tx_history: | ||
| 2136 | kmem_cache_destroy(ccid3_tx_hist_slab); | ||
| 2137 | ccid3_tx_hist_slab = NULL; | ||
| 2138 | goto out; | ||
| 2139 | } | ||
| 2140 | module_init(ccid3_module_init); | ||
| 2141 | |||
| 2142 | static __exit void ccid3_module_exit(void) | ||
| 2143 | { | ||
| 2144 | ccid_unregister(&ccid3); | ||
| 2145 | |||
| 2146 | if (ccid3_tx_hist_slab != NULL) { | ||
| 2147 | kmem_cache_destroy(ccid3_tx_hist_slab); | ||
| 2148 | ccid3_tx_hist_slab = NULL; | ||
| 2149 | } | ||
| 2150 | if (ccid3_rx_hist_slab != NULL) { | ||
| 2151 | kmem_cache_destroy(ccid3_rx_hist_slab); | ||
| 2152 | ccid3_rx_hist_slab = NULL; | ||
| 2153 | } | ||
| 2154 | if (ccid3_loss_interval_hist_slab != NULL) { | ||
| 2155 | kmem_cache_destroy(ccid3_loss_interval_hist_slab); | ||
| 2156 | ccid3_loss_interval_hist_slab = NULL; | ||
| 2157 | } | ||
| 2158 | } | ||
| 2159 | module_exit(ccid3_module_exit); | ||
| 2160 | |||
| 2161 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
| 2162 | MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); | ||
| 2163 | MODULE_LICENSE("GPL"); | ||
| 2164 | MODULE_ALIAS("net-dccp-ccid-3"); | ||
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..5d6b623e64da --- /dev/null +++ b/net/dccp/ccids/ccid3.h | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/ccids/ccid3.h | ||
| 3 | * | ||
| 4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
| 5 | * | ||
| 6 | * An implementation of the DCCP protocol | ||
| 7 | * | ||
| 8 | * This code has been developed by the University of Waikato WAND | ||
| 9 | * research group. For further information please see http://www.wand.net.nz/ | ||
| 10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
| 11 | * | ||
| 12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
| 13 | * authors: | ||
| 14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
| 15 | * | ||
| 16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
| 17 | * and to make it work as a loadable module in the DCCP stack written by | ||
| 18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
| 19 | * | ||
| 20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 21 | * | ||
| 22 | * This program is free software; you can redistribute it and/or modify | ||
| 23 | * it under the terms of the GNU General Public License as published by | ||
| 24 | * the Free Software Foundation; either version 2 of the License, or | ||
| 25 | * (at your option) any later version. | ||
| 26 | * | ||
| 27 | * This program is distributed in the hope that it will be useful, | ||
| 28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 30 | * GNU General Public License for more details. | ||
| 31 | * | ||
| 32 | * You should have received a copy of the GNU General Public License | ||
| 33 | * along with this program; if not, write to the Free Software | ||
| 34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 35 | */ | ||
| 36 | #ifndef _DCCP_CCID3_H_ | ||
| 37 | #define _DCCP_CCID3_H_ | ||
| 38 | |||
| 39 | #include <linux/types.h> | ||
| 40 | #include <linux/list.h> | ||
| 41 | #include <linux/timer.h> | ||
| 42 | |||
| 43 | struct ccid3_tx_hist_entry { | ||
| 44 | struct list_head ccid3htx_node; | ||
| 45 | u64 ccid3htx_seqno:48, | ||
| 46 | ccid3htx_win_count:8, | ||
| 47 | ccid3htx_sent:1; | ||
| 48 | struct timeval ccid3htx_tstamp; | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct ccid3_options_received { | ||
| 52 | u64 ccid3or_seqno:48, | ||
| 53 | ccid3or_loss_intervals_idx:16; | ||
| 54 | u16 ccid3or_loss_intervals_len; | ||
| 55 | u32 ccid3or_loss_event_rate; | ||
| 56 | u32 ccid3or_receive_rate; | ||
| 57 | }; | ||
| 58 | |||
| 59 | /** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block | ||
| 60 | * | ||
| 61 | * @ccid3hctx_state - Sender state | ||
| 62 | * @ccid3hctx_x - Current sending rate | ||
| 63 | * @ccid3hctx_x_recv - Receive rate | ||
| 64 | * @ccid3hctx_x_calc - Calculated send (?) rate | ||
| 65 | * @ccid3hctx_s - Packet size | ||
| 66 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs | ||
| 67 | * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 | ||
| 68 | * @ccid3hctx_last_win_count - Last window counter sent | ||
| 69 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent | ||
| 70 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer | ||
| 71 | * @ccid3hctx_idle - FIXME | ||
| 72 | * @ccid3hctx_t_ld - Time last doubled during slow start | ||
| 73 | * @ccid3hctx_t_nom - Nominal send time of next packet | ||
| 74 | * @ccid3hctx_t_ipi - Interpacket (send) interval | ||
| 75 | * @ccid3hctx_delta - Send timer delta | ||
| 76 | * @ccid3hctx_hist - Packet history | ||
| 77 | */ | ||
| 78 | struct ccid3_hc_tx_sock { | ||
| 79 | u32 ccid3hctx_x; | ||
| 80 | u32 ccid3hctx_x_recv; | ||
| 81 | u32 ccid3hctx_x_calc; | ||
| 82 | u16 ccid3hctx_s; | ||
| 83 | u32 ccid3hctx_rtt; | ||
| 84 | u32 ccid3hctx_p; | ||
| 85 | u8 ccid3hctx_state; | ||
| 86 | u8 ccid3hctx_last_win_count; | ||
| 87 | u8 ccid3hctx_idle; | ||
| 88 | struct timeval ccid3hctx_t_last_win_count; | ||
| 89 | struct timer_list ccid3hctx_no_feedback_timer; | ||
| 90 | struct timeval ccid3hctx_t_ld; | ||
| 91 | struct timeval ccid3hctx_t_nom; | ||
| 92 | u32 ccid3hctx_t_ipi; | ||
| 93 | u32 ccid3hctx_delta; | ||
| 94 | struct list_head ccid3hctx_hist; | ||
| 95 | struct ccid3_options_received ccid3hctx_options_received; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct ccid3_loss_interval_hist_entry { | ||
| 99 | struct list_head ccid3lih_node; | ||
| 100 | u64 ccid3lih_seqno:48, | ||
| 101 | ccid3lih_win_count:4; | ||
| 102 | u32 ccid3lih_interval; | ||
| 103 | }; | ||
| 104 | |||
| 105 | struct ccid3_rx_hist_entry { | ||
| 106 | struct list_head ccid3hrx_node; | ||
| 107 | u64 ccid3hrx_seqno:48, | ||
| 108 | ccid3hrx_win_count:4, | ||
| 109 | ccid3hrx_type:4; | ||
| 110 | u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ | ||
| 111 | struct timeval ccid3hrx_tstamp; | ||
| 112 | }; | ||
| 113 | |||
| 114 | struct ccid3_hc_rx_sock { | ||
| 115 | u64 ccid3hcrx_seqno_last_counter:48, | ||
| 116 | ccid3hcrx_state:8, | ||
| 117 | ccid3hcrx_last_counter:4; | ||
| 118 | unsigned long ccid3hcrx_rtt; | ||
| 119 | u32 ccid3hcrx_p; | ||
| 120 | u32 ccid3hcrx_bytes_recv; | ||
| 121 | struct timeval ccid3hcrx_tstamp_last_feedback; | ||
| 122 | struct timeval ccid3hcrx_tstamp_last_ack; | ||
| 123 | struct list_head ccid3hcrx_hist; | ||
| 124 | struct list_head ccid3hcrx_loss_interval_hist; | ||
| 125 | u16 ccid3hcrx_s; | ||
| 126 | u32 ccid3hcrx_pinv; | ||
| 127 | u32 ccid3hcrx_elapsed_time; | ||
| 128 | u32 ccid3hcrx_x_recv; | ||
| 129 | }; | ||
| 130 | |||
| 131 | #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ | ||
| 132 | ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) | ||
| 133 | |||
| 134 | #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ | ||
| 135 | ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) | ||
| 136 | |||
| 137 | #endif /* _DCCP_CCID3_H_ */ | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..fb83454102c1 --- /dev/null +++ b/net/dccp/dccp.h | |||
| @@ -0,0 +1,422 @@ | |||
| 1 | #ifndef _DCCP_H | ||
| 2 | #define _DCCP_H | ||
| 3 | /* | ||
| 4 | * net/dccp/dccp.h | ||
| 5 | * | ||
| 6 | * An implementation of the DCCP protocol | ||
| 7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify it | ||
| 10 | * under the terms of the GNU General Public License version 2 as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <net/snmp.h> | ||
| 16 | #include <net/sock.h> | ||
| 17 | #include <net/tcp.h> | ||
| 18 | |||
| 19 | #define DCCP_DEBUG | ||
| 20 | |||
| 21 | #ifdef DCCP_DEBUG | ||
| 22 | extern int dccp_debug; | ||
| 23 | |||
| 24 | #define dccp_pr_debug(format, a...) \ | ||
| 25 | do { if (dccp_debug) \ | ||
| 26 | printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ | ||
| 27 | } while (0) | ||
| 28 | #define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) | ||
| 29 | #else | ||
| 30 | #define dccp_pr_debug(format, a...) | ||
| 31 | #define dccp_pr_debug_cat(format, a...) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | extern struct inet_hashinfo dccp_hashinfo; | ||
| 35 | |||
| 36 | extern atomic_t dccp_orphan_count; | ||
| 37 | extern int dccp_tw_count; | ||
| 38 | extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); | ||
| 39 | |||
| 40 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); | ||
| 41 | |||
| 42 | /* FIXME: Right size this */ | ||
| 43 | #define DCCP_MAX_OPT_LEN 128 | ||
| 44 | |||
| 45 | #define DCCP_MAX_PACKET_HDR 32 | ||
| 46 | |||
| 47 | #define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) | ||
| 48 | |||
| 49 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT | ||
| 50 | * state, about 60 seconds */ | ||
| 51 | |||
| 52 | /* draft-ietf-dccp-spec-11.txt initial RTO value */ | ||
| 53 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) | ||
| 54 | |||
| 55 | /* Maximal interval between probes for local resources. */ | ||
| 56 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
| 57 | |||
| 58 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | ||
| 59 | |||
| 60 | extern struct proto dccp_v4_prot; | ||
| 61 | |||
| 62 | /* is seq1 < seq2 ? */ | ||
| 63 | static inline const int before48(const u64 seq1, const u64 seq2) | ||
| 64 | { | ||
| 65 | return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | /* is seq1 > seq2 ? */ | ||
| 69 | static inline const int after48(const u64 seq1, const u64 seq2) | ||
| 70 | { | ||
| 71 | return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | /* is seq2 <= seq1 <= seq3 ? */ | ||
| 75 | static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) | ||
| 76 | { | ||
| 77 | return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); | ||
| 78 | } | ||
| 79 | |||
| 80 | static inline u64 max48(const u64 seq1, const u64 seq2) | ||
| 81 | { | ||
| 82 | return after48(seq1, seq2) ? seq1 : seq2; | ||
| 83 | } | ||
| 84 | |||
| 85 | enum { | ||
| 86 | DCCP_MIB_NUM = 0, | ||
| 87 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ | ||
| 88 | DCCP_MIB_ESTABRESETS, /* EstabResets */ | ||
| 89 | DCCP_MIB_CURRESTAB, /* CurrEstab */ | ||
| 90 | DCCP_MIB_OUTSEGS, /* OutSegs */ | ||
| 91 | DCCP_MIB_OUTRSTS, | ||
| 92 | DCCP_MIB_ABORTONTIMEOUT, | ||
| 93 | DCCP_MIB_TIMEOUTS, | ||
| 94 | DCCP_MIB_ABORTFAILED, | ||
| 95 | DCCP_MIB_PASSIVEOPENS, | ||
| 96 | DCCP_MIB_ATTEMPTFAILS, | ||
| 97 | DCCP_MIB_OUTDATAGRAMS, | ||
| 98 | DCCP_MIB_INERRS, | ||
| 99 | DCCP_MIB_OPTMANDATORYERROR, | ||
| 100 | DCCP_MIB_INVALIDOPT, | ||
| 101 | __DCCP_MIB_MAX | ||
| 102 | }; | ||
| 103 | |||
| 104 | #define DCCP_MIB_MAX __DCCP_MIB_MAX | ||
| 105 | struct dccp_mib { | ||
| 106 | unsigned long mibs[DCCP_MIB_MAX]; | ||
| 107 | } __SNMP_MIB_ALIGN__; | ||
| 108 | |||
| 109 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
| 110 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | ||
| 111 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | ||
| 112 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
| 113 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | ||
| 114 | #define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
| 115 | #define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
| 116 | |||
| 117 | extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); | ||
| 118 | extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); | ||
| 119 | |||
| 120 | extern int dccp_send_response(struct sock *sk); | ||
| 121 | extern void dccp_send_ack(struct sock *sk); | ||
| 122 | extern void dccp_send_delayed_ack(struct sock *sk); | ||
| 123 | extern void dccp_send_sync(struct sock *sk, u64 seq); | ||
| 124 | |||
| 125 | extern void dccp_init_xmit_timers(struct sock *sk); | ||
| 126 | static inline void dccp_clear_xmit_timers(struct sock *sk) | ||
| 127 | { | ||
| 128 | inet_csk_clear_xmit_timers(sk); | ||
| 129 | } | ||
| 130 | |||
| 131 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | ||
| 132 | |||
| 133 | extern const char *dccp_packet_name(const int type); | ||
| 134 | extern const char *dccp_state_name(const int state); | ||
| 135 | |||
| 136 | static inline void dccp_set_state(struct sock *sk, const int state) | ||
| 137 | { | ||
| 138 | const int oldstate = sk->sk_state; | ||
| 139 | |||
| 140 | dccp_pr_debug("%s(%p) %-10.10s -> %s\n", | ||
| 141 | dccp_role(sk), sk, | ||
| 142 | dccp_state_name(oldstate), dccp_state_name(state)); | ||
| 143 | WARN_ON(state == oldstate); | ||
| 144 | |||
| 145 | switch (state) { | ||
| 146 | case DCCP_OPEN: | ||
| 147 | if (oldstate != DCCP_OPEN) | ||
| 148 | DCCP_INC_STATS(DCCP_MIB_CURRESTAB); | ||
| 149 | break; | ||
| 150 | |||
| 151 | case DCCP_CLOSED: | ||
| 152 | if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) | ||
| 153 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); | ||
| 154 | |||
| 155 | sk->sk_prot->unhash(sk); | ||
| 156 | if (inet_csk(sk)->icsk_bind_hash != NULL && | ||
| 157 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | ||
| 158 | inet_put_port(&dccp_hashinfo, sk); | ||
| 159 | /* fall through */ | ||
| 160 | default: | ||
| 161 | if (oldstate == DCCP_OPEN) | ||
| 162 | DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); | ||
| 163 | } | ||
| 164 | |||
| 165 | /* Change state AFTER socket is unhashed to avoid closed | ||
| 166 | * socket sitting in hash tables. | ||
| 167 | */ | ||
| 168 | sk->sk_state = state; | ||
| 169 | } | ||
| 170 | |||
| 171 | static inline void dccp_done(struct sock *sk) | ||
| 172 | { | ||
| 173 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 174 | dccp_clear_xmit_timers(sk); | ||
| 175 | |||
| 176 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
| 177 | |||
| 178 | if (!sock_flag(sk, SOCK_DEAD)) | ||
| 179 | sk->sk_state_change(sk); | ||
| 180 | else | ||
| 181 | inet_csk_destroy_sock(sk); | ||
| 182 | } | ||
| 183 | |||
| 184 | static inline void dccp_openreq_init(struct request_sock *req, | ||
| 185 | struct dccp_sock *dp, | ||
| 186 | struct sk_buff *skb) | ||
| 187 | { | ||
| 188 | /* | ||
| 189 | * FIXME: fill in the other req fields from the DCCP options | ||
| 190 | * received | ||
| 191 | */ | ||
| 192 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; | ||
| 193 | inet_rsk(req)->acked = 0; | ||
| 194 | req->rcv_wnd = 0; | ||
| 195 | } | ||
| 196 | |||
| 197 | extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, | ||
| 198 | struct sk_buff *skb); | ||
| 199 | extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); | ||
| 200 | |||
| 201 | extern struct sock *dccp_create_openreq_child(struct sock *sk, | ||
| 202 | const struct request_sock *req, | ||
| 203 | const struct sk_buff *skb); | ||
| 204 | |||
| 205 | extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); | ||
| 206 | |||
| 207 | extern void dccp_v4_err(struct sk_buff *skb, u32); | ||
| 208 | |||
| 209 | extern int dccp_v4_rcv(struct sk_buff *skb); | ||
| 210 | |||
| 211 | extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, | ||
| 212 | struct sk_buff *skb, | ||
| 213 | struct request_sock *req, | ||
| 214 | struct dst_entry *dst); | ||
| 215 | extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
| 216 | struct request_sock *req, | ||
| 217 | struct request_sock **prev); | ||
| 218 | |||
| 219 | extern int dccp_child_process(struct sock *parent, struct sock *child, | ||
| 220 | struct sk_buff *skb); | ||
| 221 | extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
| 222 | struct dccp_hdr *dh, unsigned len); | ||
| 223 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
| 224 | const struct dccp_hdr *dh, const unsigned len); | ||
| 225 | |||
| 226 | extern void dccp_close(struct sock *sk, long timeout); | ||
| 227 | extern struct sk_buff *dccp_make_response(struct sock *sk, | ||
| 228 | struct dst_entry *dst, | ||
| 229 | struct request_sock *req); | ||
| 230 | |||
| 231 | extern int dccp_connect(struct sock *sk); | ||
| 232 | extern int dccp_disconnect(struct sock *sk, int flags); | ||
| 233 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
| 234 | char *optval, int *optlen); | ||
| 235 | extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); | ||
| 236 | extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
| 237 | size_t size); | ||
| 238 | extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, | ||
| 239 | struct msghdr *msg, size_t len, int nonblock, | ||
| 240 | int flags, int *addr_len); | ||
| 241 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
| 242 | char *optval, int optlen); | ||
| 243 | extern void dccp_shutdown(struct sock *sk, int how); | ||
| 244 | |||
| 245 | extern int dccp_v4_checksum(struct sk_buff *skb); | ||
| 246 | |||
| 247 | extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); | ||
| 248 | extern void dccp_send_close(struct sock *sk); | ||
| 249 | |||
| 250 | struct dccp_skb_cb { | ||
| 251 | __u8 dccpd_type; | ||
| 252 | __u8 dccpd_reset_code; | ||
| 253 | __u8 dccpd_service; | ||
| 254 | __u8 dccpd_ccval; | ||
| 255 | __u64 dccpd_seq; | ||
| 256 | __u64 dccpd_ack_seq; | ||
| 257 | int dccpd_opt_len; | ||
| 258 | }; | ||
| 259 | |||
| 260 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | ||
| 261 | |||
| 262 | static inline int dccp_non_data_packet(const struct sk_buff *skb) | ||
| 263 | { | ||
| 264 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
| 265 | |||
| 266 | return type == DCCP_PKT_ACK || | ||
| 267 | type == DCCP_PKT_CLOSE || | ||
| 268 | type == DCCP_PKT_CLOSEREQ || | ||
| 269 | type == DCCP_PKT_RESET || | ||
| 270 | type == DCCP_PKT_SYNC || | ||
| 271 | type == DCCP_PKT_SYNCACK; | ||
| 272 | } | ||
| 273 | |||
| 274 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) | ||
| 275 | { | ||
| 276 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
| 277 | |||
| 278 | return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; | ||
| 279 | } | ||
| 280 | |||
| 281 | #define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) | ||
| 282 | #define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) | ||
| 283 | |||
| 284 | static inline void dccp_set_seqno(u64 *seqno, u64 value) | ||
| 285 | { | ||
| 286 | if (value > DCCP_MAX_SEQNO) | ||
| 287 | value -= DCCP_MAX_SEQNO + 1; | ||
| 288 | *seqno = value; | ||
| 289 | } | ||
| 290 | |||
| 291 | static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) | ||
| 292 | { | ||
| 293 | return ((seqno2 << 16) - (seqno1 << 16)) >> 16; | ||
| 294 | } | ||
| 295 | |||
| 296 | static inline void dccp_inc_seqno(u64 *seqno) | ||
| 297 | { | ||
| 298 | if (++*seqno > DCCP_MAX_SEQNO) | ||
| 299 | *seqno = 0; | ||
| 300 | } | ||
| 301 | |||
| 302 | static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) | ||
| 303 | { | ||
| 304 | struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); | ||
| 305 | |||
| 306 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 307 | dh->dccph_seq = htonl((gss >> 32)) >> 8; | ||
| 308 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 309 | dh->dccph_seq = htonl((gss >> 32)); | ||
| 310 | #else | ||
| 311 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 312 | #endif | ||
| 313 | dhx->dccph_seq_low = htonl(gss & 0xffffffff); | ||
| 314 | } | ||
| 315 | |||
| 316 | static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) | ||
| 317 | { | ||
| 318 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
| 319 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; | ||
| 320 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
| 321 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)); | ||
| 322 | #else | ||
| 323 | #error "Adjust your <asm/byteorder.h> defines" | ||
| 324 | #endif | ||
| 325 | dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); | ||
| 326 | } | ||
| 327 | |||
| 328 | static inline void dccp_update_gsr(struct sock *sk, u64 seq) | ||
| 329 | { | ||
| 330 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 331 | u64 tmp_gsr; | ||
| 332 | |||
| 333 | dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); | ||
| 334 | dp->dccps_gsr = seq; | ||
| 335 | dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); | ||
| 336 | dccp_set_seqno(&dp->dccps_swh, | ||
| 337 | dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); | ||
| 338 | } | ||
| 339 | |||
| 340 | static inline void dccp_update_gss(struct sock *sk, u64 seq) | ||
| 341 | { | ||
| 342 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 343 | u64 tmp_gss; | ||
| 344 | |||
| 345 | dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); | ||
| 346 | dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); | ||
| 347 | dp->dccps_awh = dp->dccps_gss = seq; | ||
| 348 | } | ||
| 349 | |||
| 350 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | ||
| 351 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | ||
| 352 | struct sk_buff *skb, | ||
| 353 | u32 elapsed_time); | ||
| 354 | extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
| 355 | unsigned char option, | ||
| 356 | const void *value, unsigned char len); | ||
| 357 | |||
| 358 | extern struct socket *dccp_ctl_socket; | ||
| 359 | |||
| 360 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
| 361 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
| 362 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
| 363 | |||
| 364 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
| 365 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
| 366 | |||
| 367 | /** struct dccp_ackpkts - acknowledgeable packets | ||
| 368 | * | ||
| 369 | * This data structure is the one defined in the DCCP draft | ||
| 370 | * Appendix A. | ||
| 371 | * | ||
| 372 | * @dccpap_buf_head - circular buffer head | ||
| 373 | * @dccpap_buf_tail - circular buffer tail | ||
| 374 | * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) | ||
| 375 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 | ||
| 376 | * | ||
| 377 | * Additionally, the HC-Receiver must keep some information about the | ||
| 378 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
| 379 | * Ack Vector, it remembers four variables: | ||
| 380 | * | ||
| 381 | * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) | ||
| 382 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
| 383 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) | ||
| 384 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
| 385 | * | ||
| 386 | * @dccpap_buf_len - circular buffer length | ||
| 387 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
| 388 | */ | ||
| 389 | struct dccp_ackpkts { | ||
| 390 | unsigned int dccpap_buf_head; | ||
| 391 | unsigned int dccpap_buf_tail; | ||
| 392 | u64 dccpap_buf_ackno; | ||
| 393 | u64 dccpap_ack_seqno; | ||
| 394 | u64 dccpap_ack_ackno; | ||
| 395 | unsigned int dccpap_ack_ptr; | ||
| 396 | unsigned int dccpap_buf_vector_len; | ||
| 397 | unsigned int dccpap_ack_vector_len; | ||
| 398 | unsigned int dccpap_buf_len; | ||
| 399 | unsigned long dccpap_time; | ||
| 400 | u8 dccpap_buf_nonce; | ||
| 401 | u8 dccpap_ack_nonce; | ||
| 402 | u8 dccpap_buf[0]; | ||
| 403 | }; | ||
| 404 | |||
| 405 | extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); | ||
| 406 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
| 407 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); | ||
| 408 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
| 409 | struct sock *sk, u64 ackno); | ||
| 410 | |||
| 411 | #ifdef DCCP_DEBUG | ||
| 412 | extern void dccp_ackvector_print(const u64 ackno, | ||
| 413 | const unsigned char *vector, int len); | ||
| 414 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
| 415 | #else | ||
| 416 | static inline void dccp_ackvector_print(const u64 ackno, | ||
| 417 | const unsigned char *vector, | ||
| 418 | int len) { } | ||
| 419 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
| 420 | #endif | ||
| 421 | |||
| 422 | #endif /* _DCCP_H */ | ||
diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..622e976a51fe --- /dev/null +++ b/net/dccp/input.c | |||
| @@ -0,0 +1,510 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/input.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/skbuff.h> | ||
| 16 | |||
| 17 | #include <net/sock.h> | ||
| 18 | |||
| 19 | #include "ccid.h" | ||
| 20 | #include "dccp.h" | ||
| 21 | |||
| 22 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) | ||
| 23 | { | ||
| 24 | sk->sk_shutdown |= RCV_SHUTDOWN; | ||
| 25 | sock_set_flag(sk, SOCK_DONE); | ||
| 26 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); | ||
| 27 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
| 28 | skb_set_owner_r(skb, sk); | ||
| 29 | sk->sk_data_ready(sk, 0); | ||
| 30 | } | ||
| 31 | |||
| 32 | static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | ||
| 33 | { | ||
| 34 | switch (sk->sk_state) { | ||
| 35 | case DCCP_PARTOPEN: | ||
| 36 | case DCCP_OPEN: | ||
| 37 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
| 38 | dccp_fin(sk, skb); | ||
| 39 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | ||
| 45 | { | ||
| 46 | /* | ||
| 47 | * Step 7: Check for unexpected packet types | ||
| 48 | * If (S.is_server and P.type == CloseReq) | ||
| 49 | * Send Sync packet acknowledging P.seqno | ||
| 50 | * Drop packet and return | ||
| 51 | */ | ||
| 52 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { | ||
| 53 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | |||
| 57 | switch (sk->sk_state) { | ||
| 58 | case DCCP_PARTOPEN: | ||
| 59 | case DCCP_OPEN: | ||
| 60 | dccp_set_state(sk, DCCP_CLOSING); | ||
| 61 | dccp_send_close(sk); | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | ||
| 67 | { | ||
| 68 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 69 | |||
| 70 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
| 71 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | ||
| 72 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
| 73 | } | ||
| 74 | |||
| 75 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | ||
| 76 | { | ||
| 77 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 78 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 79 | u64 lswl = dp->dccps_swl; | ||
| 80 | u64 lawl = dp->dccps_awl; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Step 5: Prepare sequence numbers for Sync | ||
| 84 | * If P.type == Sync or P.type == SyncAck, | ||
| 85 | * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, | ||
| 86 | * / * P is valid, so update sequence number variables | ||
| 87 | * accordingly. After this update, P will pass the tests | ||
| 88 | * in Step 6. A SyncAck is generated if necessary in | ||
| 89 | * Step 15 * / | ||
| 90 | * Update S.GSR, S.SWL, S.SWH | ||
| 91 | * Otherwise, | ||
| 92 | * Drop packet and return | ||
| 93 | */ | ||
| 94 | if (dh->dccph_type == DCCP_PKT_SYNC || | ||
| 95 | dh->dccph_type == DCCP_PKT_SYNCACK) { | ||
| 96 | if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && | ||
| 97 | !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) | ||
| 98 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 99 | else | ||
| 100 | return -1; | ||
| 101 | /* | ||
| 102 | * Step 6: Check sequence numbers | ||
| 103 | * Let LSWL = S.SWL and LAWL = S.AWL | ||
| 104 | * If P.type == CloseReq or P.type == Close or P.type == Reset, | ||
| 105 | * LSWL := S.GSR + 1, LAWL := S.GAR | ||
| 106 | * If LSWL <= P.seqno <= S.SWH | ||
| 107 | * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), | ||
| 108 | * Update S.GSR, S.SWL, S.SWH | ||
| 109 | * If P.type != Sync, | ||
| 110 | * Update S.GAR | ||
| 111 | * Otherwise, | ||
| 112 | * Send Sync packet acknowledging P.seqno | ||
| 113 | * Drop packet and return | ||
| 114 | */ | ||
| 115 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || | ||
| 116 | dh->dccph_type == DCCP_PKT_CLOSE || | ||
| 117 | dh->dccph_type == DCCP_PKT_RESET) { | ||
| 118 | lswl = dp->dccps_gsr; | ||
| 119 | dccp_inc_seqno(&lswl); | ||
| 120 | lawl = dp->dccps_gar; | ||
| 121 | } | ||
| 122 | |||
| 123 | if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && | ||
| 124 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || | ||
| 125 | between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { | ||
| 126 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 127 | |||
| 128 | if (dh->dccph_type != DCCP_PKT_SYNC && | ||
| 129 | DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
| 130 | dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
| 131 | } else { | ||
| 132 | dccp_pr_debug("Step 6 failed, sending SYNC...\n"); | ||
| 133 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 134 | return -1; | ||
| 135 | } | ||
| 136 | |||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
| 141 | const struct dccp_hdr *dh, const unsigned len) | ||
| 142 | { | ||
| 143 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 144 | |||
| 145 | if (dccp_check_seqno(sk, skb)) | ||
| 146 | goto discard; | ||
| 147 | |||
| 148 | if (dccp_parse_options(sk, skb)) | ||
| 149 | goto discard; | ||
| 150 | |||
| 151 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
| 152 | dccp_event_ack_recv(sk, skb); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * FIXME: check ECN to see if we should use | ||
| 156 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
| 157 | */ | ||
| 158 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
| 159 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 160 | |||
| 161 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
| 162 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
| 163 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
| 164 | LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); | ||
| 165 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 166 | inet_csk_schedule_ack(sk); | ||
| 167 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); | ||
| 168 | goto discard; | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | * FIXME: this activation is probably wrong, have to study more | ||
| 173 | * TCP delack machinery and how it fits into DCCP draft, but | ||
| 174 | * for now it kinda "works" 8) | ||
| 175 | */ | ||
| 176 | if (!inet_csk_ack_scheduled(sk)) { | ||
| 177 | inet_csk_schedule_ack(sk); | ||
| 178 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
| 183 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
| 184 | |||
| 185 | switch (dccp_hdr(skb)->dccph_type) { | ||
| 186 | case DCCP_PKT_DATAACK: | ||
| 187 | case DCCP_PKT_DATA: | ||
| 188 | /* | ||
| 189 | * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option | ||
| 190 | * if it is. | ||
| 191 | */ | ||
| 192 | __skb_pull(skb, dh->dccph_doff * 4); | ||
| 193 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
| 194 | skb_set_owner_r(skb, sk); | ||
| 195 | sk->sk_data_ready(sk, 0); | ||
| 196 | return 0; | ||
| 197 | case DCCP_PKT_ACK: | ||
| 198 | goto discard; | ||
| 199 | case DCCP_PKT_RESET: | ||
| 200 | /* | ||
| 201 | * Step 9: Process Reset | ||
| 202 | * If P.type == Reset, | ||
| 203 | * Tear down connection | ||
| 204 | * S.state := TIMEWAIT | ||
| 205 | * Set TIMEWAIT timer | ||
| 206 | * Drop packet and return | ||
| 207 | */ | ||
| 208 | dccp_fin(sk, skb); | ||
| 209 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
| 210 | return 0; | ||
| 211 | case DCCP_PKT_CLOSEREQ: | ||
| 212 | dccp_rcv_closereq(sk, skb); | ||
| 213 | goto discard; | ||
| 214 | case DCCP_PKT_CLOSE: | ||
| 215 | dccp_rcv_close(sk, skb); | ||
| 216 | return 0; | ||
| 217 | case DCCP_PKT_REQUEST: | ||
| 218 | /* Step 7 | ||
| 219 | * or (S.is_server and P.type == Response) | ||
| 220 | * or (S.is_client and P.type == Request) | ||
| 221 | * or (S.state >= OPEN and P.type == Request | ||
| 222 | * and P.seqno >= S.OSR) | ||
| 223 | * or (S.state >= OPEN and P.type == Response | ||
| 224 | * and P.seqno >= S.OSR) | ||
| 225 | * or (S.state == RESPOND and P.type == Data), | ||
| 226 | * Send Sync packet acknowledging P.seqno | ||
| 227 | * Drop packet and return | ||
| 228 | */ | ||
| 229 | if (dp->dccps_role != DCCP_ROLE_LISTEN) | ||
| 230 | goto send_sync; | ||
| 231 | goto check_seq; | ||
| 232 | case DCCP_PKT_RESPONSE: | ||
| 233 | if (dp->dccps_role != DCCP_ROLE_CLIENT) | ||
| 234 | goto send_sync; | ||
| 235 | check_seq: | ||
| 236 | if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { | ||
| 237 | send_sync: | ||
| 238 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 239 | } | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | |||
| 243 | DCCP_INC_STATS_BH(DCCP_MIB_INERRS); | ||
| 244 | discard: | ||
| 245 | __kfree_skb(skb); | ||
| 246 | return 0; | ||
| 247 | } | ||
| 248 | |||
| 249 | static int dccp_rcv_request_sent_state_process(struct sock *sk, | ||
| 250 | struct sk_buff *skb, | ||
| 251 | const struct dccp_hdr *dh, | ||
| 252 | const unsigned len) | ||
| 253 | { | ||
| 254 | /* | ||
| 255 | * Step 4: Prepare sequence numbers in REQUEST | ||
| 256 | * If S.state == REQUEST, | ||
| 257 | * If (P.type == Response or P.type == Reset) | ||
| 258 | * and S.AWL <= P.ackno <= S.AWH, | ||
| 259 | * / * Set sequence number variables corresponding to the | ||
| 260 | * other endpoint, so P will pass the tests in Step 6 * / | ||
| 261 | * Set S.GSR, S.ISR, S.SWL, S.SWH | ||
| 262 | * / * Response processing continues in Step 10; Reset | ||
| 263 | * processing continues in Step 9 * / | ||
| 264 | */ | ||
| 265 | if (dh->dccph_type == DCCP_PKT_RESPONSE) { | ||
| 266 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 267 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 268 | |||
| 269 | /* Stop the REQUEST timer */ | ||
| 270 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | ||
| 271 | BUG_TRAP(sk->sk_send_head != NULL); | ||
| 272 | __kfree_skb(sk->sk_send_head); | ||
| 273 | sk->sk_send_head = NULL; | ||
| 274 | |||
| 275 | if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { | ||
| 276 | dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", | ||
| 277 | dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); | ||
| 278 | goto out_invalid_packet; | ||
| 279 | } | ||
| 280 | |||
| 281 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 282 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 283 | |||
| 284 | if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || | ||
| 285 | ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { | ||
| 286 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
| 287 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
| 288 | /* FIXME: send appropriate RESET code */ | ||
| 289 | goto out_invalid_packet; | ||
| 290 | } | ||
| 291 | |||
| 292 | dccp_sync_mss(sk, dp->dccps_pmtu_cookie); | ||
| 293 | |||
| 294 | /* | ||
| 295 | * Step 10: Process REQUEST state (second part) | ||
| 296 | * If S.state == REQUEST, | ||
| 297 | * / * If we get here, P is a valid Response from the server (see | ||
| 298 | * Step 4), and we should move to PARTOPEN state. PARTOPEN | ||
| 299 | * means send an Ack, don't send Data packets, retransmit | ||
| 300 | * Acks periodically, and always include any Init Cookie from | ||
| 301 | * the Response * / | ||
| 302 | * S.state := PARTOPEN | ||
| 303 | * Set PARTOPEN timer | ||
| 304 | * Continue with S.state == PARTOPEN | ||
| 305 | * / * Step 12 will send the Ack completing the three-way | ||
| 306 | * handshake * / | ||
| 307 | */ | ||
| 308 | dccp_set_state(sk, DCCP_PARTOPEN); | ||
| 309 | |||
| 310 | /* Make sure socket is routed, for correct metrics. */ | ||
| 311 | inet_sk_rebuild_header(sk); | ||
| 312 | |||
| 313 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
| 314 | sk->sk_state_change(sk); | ||
| 315 | sk_wake_async(sk, 0, POLL_OUT); | ||
| 316 | } | ||
| 317 | |||
| 318 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || | ||
| 319 | icsk->icsk_accept_queue.rskq_defer_accept) { | ||
| 320 | /* Save one ACK. Data will be ready after | ||
| 321 | * several ticks, if write_pending is set. | ||
| 322 | * | ||
| 323 | * It may be deleted, but with this feature tcpdumps | ||
| 324 | * look so _wonderfully_ clever, that I was not able | ||
| 325 | * to stand against the temptation 8) --ANK | ||
| 326 | */ | ||
| 327 | /* | ||
| 328 | * OK, in DCCP we can as well do a similar trick, its | ||
| 329 | * even in the draft, but there is no need for us to | ||
| 330 | * schedule an ack here, as dccp_sendmsg does this for | ||
| 331 | * us, also stated in the draft. -acme | ||
| 332 | */ | ||
| 333 | __kfree_skb(skb); | ||
| 334 | return 0; | ||
| 335 | } | ||
| 336 | dccp_send_ack(sk); | ||
| 337 | return -1; | ||
| 338 | } | ||
| 339 | |||
| 340 | out_invalid_packet: | ||
| 341 | return 1; /* dccp_v4_do_rcv will send a reset, but... | ||
| 342 | FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ | ||
| 343 | } | ||
| 344 | |||
| 345 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | ||
| 346 | struct sk_buff *skb, | ||
| 347 | const struct dccp_hdr *dh, | ||
| 348 | const unsigned len) | ||
| 349 | { | ||
| 350 | int queued = 0; | ||
| 351 | |||
| 352 | switch (dh->dccph_type) { | ||
| 353 | case DCCP_PKT_RESET: | ||
| 354 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
| 355 | break; | ||
| 356 | case DCCP_PKT_DATAACK: | ||
| 357 | case DCCP_PKT_ACK: | ||
| 358 | /* | ||
| 359 | * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, | ||
| 360 | * but only if we haven't used the DELACK timer for something else, | ||
| 361 | * like sending a delayed ack for a TIMESTAMP echo, etc, for now | ||
| 362 | * were not clearing it, sending an extra ACK when there is nothing | ||
| 363 | * else to do in DELACK is not a big deal after all. | ||
| 364 | */ | ||
| 365 | |||
| 366 | /* Stop the PARTOPEN timer */ | ||
| 367 | if (sk->sk_state == DCCP_PARTOPEN) | ||
| 368 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
| 369 | |||
| 370 | dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 371 | dccp_set_state(sk, DCCP_OPEN); | ||
| 372 | |||
| 373 | if (dh->dccph_type == DCCP_PKT_DATAACK) { | ||
| 374 | dccp_rcv_established(sk, skb, dh, len); | ||
| 375 | queued = 1; /* packet was queued (by dccp_rcv_established) */ | ||
| 376 | } | ||
| 377 | break; | ||
| 378 | } | ||
| 379 | |||
| 380 | return queued; | ||
| 381 | } | ||
| 382 | |||
| 383 | int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
| 384 | struct dccp_hdr *dh, unsigned len) | ||
| 385 | { | ||
| 386 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 387 | const int old_state = sk->sk_state; | ||
| 388 | int queued = 0; | ||
| 389 | |||
| 390 | if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { | ||
| 391 | if (dccp_check_seqno(sk, skb)) | ||
| 392 | goto discard; | ||
| 393 | |||
| 394 | /* | ||
| 395 | * Step 8: Process options and mark acknowledgeable | ||
| 396 | */ | ||
| 397 | if (dccp_parse_options(sk, skb)) | ||
| 398 | goto discard; | ||
| 399 | |||
| 400 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
| 401 | dccp_event_ack_recv(sk, skb); | ||
| 402 | |||
| 403 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
| 404 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
| 405 | |||
| 406 | /* | ||
| 407 | * FIXME: check ECN to see if we should use | ||
| 408 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
| 409 | */ | ||
| 410 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
| 411 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
| 412 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
| 413 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
| 414 | goto discard; | ||
| 415 | /* | ||
| 416 | * FIXME: this activation is probably wrong, have to study more | ||
| 417 | * TCP delack machinery and how it fits into DCCP draft, but | ||
| 418 | * for now it kinda "works" 8) | ||
| 419 | */ | ||
| 420 | if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && | ||
| 421 | !inet_csk_ack_scheduled(sk)) { | ||
| 422 | inet_csk_schedule_ack(sk); | ||
| 423 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); | ||
| 424 | } | ||
| 425 | } | ||
| 426 | } | ||
| 427 | |||
| 428 | /* | ||
| 429 | * Step 9: Process Reset | ||
| 430 | * If P.type == Reset, | ||
| 431 | * Tear down connection | ||
| 432 | * S.state := TIMEWAIT | ||
| 433 | * Set TIMEWAIT timer | ||
| 434 | * Drop packet and return | ||
| 435 | */ | ||
| 436 | if (dh->dccph_type == DCCP_PKT_RESET) { | ||
| 437 | /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ | ||
| 438 | dccp_fin(sk, skb); | ||
| 439 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
| 440 | return 0; | ||
| 441 | /* | ||
| 442 | * Step 7: Check for unexpected packet types | ||
| 443 | * If (S.is_server and P.type == CloseReq) | ||
| 444 | * or (S.is_server and P.type == Response) | ||
| 445 | * or (S.is_client and P.type == Request) | ||
| 446 | * or (S.state == RESPOND and P.type == Data), | ||
| 447 | * Send Sync packet acknowledging P.seqno | ||
| 448 | * Drop packet and return | ||
| 449 | */ | ||
| 450 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && | ||
| 451 | (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || | ||
| 452 | (dp->dccps_role == DCCP_ROLE_CLIENT && | ||
| 453 | dh->dccph_type == DCCP_PKT_REQUEST) || | ||
| 454 | (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { | ||
| 455 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 456 | goto discard; | ||
| 457 | } | ||
| 458 | |||
| 459 | switch (sk->sk_state) { | ||
| 460 | case DCCP_CLOSED: | ||
| 461 | return 1; | ||
| 462 | |||
| 463 | case DCCP_LISTEN: | ||
| 464 | if (dh->dccph_type == DCCP_PKT_ACK || | ||
| 465 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
| 466 | return 1; | ||
| 467 | |||
| 468 | if (dh->dccph_type == DCCP_PKT_RESET) | ||
| 469 | goto discard; | ||
| 470 | |||
| 471 | if (dh->dccph_type == DCCP_PKT_REQUEST) { | ||
| 472 | if (dccp_v4_conn_request(sk, skb) < 0) | ||
| 473 | return 1; | ||
| 474 | |||
| 475 | /* FIXME: do congestion control initialization */ | ||
| 476 | goto discard; | ||
| 477 | } | ||
| 478 | goto discard; | ||
| 479 | |||
| 480 | case DCCP_REQUESTING: | ||
| 481 | /* FIXME: do congestion control initialization */ | ||
| 482 | |||
| 483 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); | ||
| 484 | if (queued >= 0) | ||
| 485 | return queued; | ||
| 486 | |||
| 487 | __kfree_skb(skb); | ||
| 488 | return 0; | ||
| 489 | |||
| 490 | case DCCP_RESPOND: | ||
| 491 | case DCCP_PARTOPEN: | ||
| 492 | queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); | ||
| 493 | break; | ||
| 494 | } | ||
| 495 | |||
| 496 | if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { | ||
| 497 | switch (old_state) { | ||
| 498 | case DCCP_PARTOPEN: | ||
| 499 | sk->sk_state_change(sk); | ||
| 500 | sk_wake_async(sk, 0, POLL_OUT); | ||
| 501 | break; | ||
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | if (!queued) { | ||
| 506 | discard: | ||
| 507 | __kfree_skb(skb); | ||
| 508 | } | ||
| 509 | return 0; | ||
| 510 | } | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..083bacaecb3b --- /dev/null +++ b/net/dccp/ipv4.c | |||
| @@ -0,0 +1,1289 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/ipv4.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/icmp.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/skbuff.h> | ||
| 18 | #include <linux/random.h> | ||
| 19 | |||
| 20 | #include <net/icmp.h> | ||
| 21 | #include <net/inet_hashtables.h> | ||
| 22 | #include <net/sock.h> | ||
| 23 | #include <net/tcp_states.h> | ||
| 24 | #include <net/xfrm.h> | ||
| 25 | |||
| 26 | #include "ccid.h" | ||
| 27 | #include "dccp.h" | ||
| 28 | |||
| 29 | struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | ||
| 30 | .lhash_lock = RW_LOCK_UNLOCKED, | ||
| 31 | .lhash_users = ATOMIC_INIT(0), | ||
| 32 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | ||
| 33 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
| 34 | .port_rover = 1024 - 1, | ||
| 35 | }; | ||
| 36 | |||
| 37 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | ||
| 38 | { | ||
| 39 | return inet_csk_get_port(&dccp_hashinfo, sk, snum); | ||
| 40 | } | ||
| 41 | |||
| 42 | static void dccp_v4_hash(struct sock *sk) | ||
| 43 | { | ||
| 44 | inet_hash(&dccp_hashinfo, sk); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void dccp_v4_unhash(struct sock *sk) | ||
| 48 | { | ||
| 49 | inet_unhash(&dccp_hashinfo, sk); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* called with local bh disabled */ | ||
| 53 | static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, | ||
| 54 | struct inet_timewait_sock **twp) | ||
| 55 | { | ||
| 56 | struct inet_sock *inet = inet_sk(sk); | ||
| 57 | const u32 daddr = inet->rcv_saddr; | ||
| 58 | const u32 saddr = inet->daddr; | ||
| 59 | const int dif = sk->sk_bound_dev_if; | ||
| 60 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
| 61 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
| 62 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); | ||
| 63 | struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; | ||
| 64 | const struct sock *sk2; | ||
| 65 | const struct hlist_node *node; | ||
| 66 | struct inet_timewait_sock *tw; | ||
| 67 | |||
| 68 | write_lock(&head->lock); | ||
| 69 | |||
| 70 | /* Check TIME-WAIT sockets first. */ | ||
| 71 | sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { | ||
| 72 | tw = inet_twsk(sk2); | ||
| 73 | |||
| 74 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
| 75 | goto not_unique; | ||
| 76 | } | ||
| 77 | tw = NULL; | ||
| 78 | |||
| 79 | /* And established part... */ | ||
| 80 | sk_for_each(sk2, node, &head->chain) { | ||
| 81 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
| 82 | goto not_unique; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* Must record num and sport now. Otherwise we will see | ||
| 86 | * in hash table socket with a funny identity. */ | ||
| 87 | inet->num = lport; | ||
| 88 | inet->sport = htons(lport); | ||
| 89 | sk->sk_hashent = hash; | ||
| 90 | BUG_TRAP(sk_unhashed(sk)); | ||
| 91 | __sk_add_node(sk, &head->chain); | ||
| 92 | sock_prot_inc_use(sk->sk_prot); | ||
| 93 | write_unlock(&head->lock); | ||
| 94 | |||
| 95 | if (twp != NULL) { | ||
| 96 | *twp = tw; | ||
| 97 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
| 98 | } else if (tw != NULL) { | ||
| 99 | /* Silly. Should hash-dance instead... */ | ||
| 100 | dccp_tw_deschedule(tw); | ||
| 101 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
| 102 | |||
| 103 | inet_twsk_put(tw); | ||
| 104 | } | ||
| 105 | |||
| 106 | return 0; | ||
| 107 | |||
| 108 | not_unique: | ||
| 109 | write_unlock(&head->lock); | ||
| 110 | return -EADDRNOTAVAIL; | ||
| 111 | } | ||
| 112 | |||
| 113 | /* | ||
| 114 | * Bind a port for a connect operation and hash it. | ||
| 115 | */ | ||
| 116 | static int dccp_v4_hash_connect(struct sock *sk) | ||
| 117 | { | ||
| 118 | const unsigned short snum = inet_sk(sk)->num; | ||
| 119 | struct inet_bind_hashbucket *head; | ||
| 120 | struct inet_bind_bucket *tb; | ||
| 121 | int ret; | ||
| 122 | |||
| 123 | if (snum == 0) { | ||
| 124 | int rover; | ||
| 125 | int low = sysctl_local_port_range[0]; | ||
| 126 | int high = sysctl_local_port_range[1]; | ||
| 127 | int remaining = (high - low) + 1; | ||
| 128 | struct hlist_node *node; | ||
| 129 | struct inet_timewait_sock *tw = NULL; | ||
| 130 | |||
| 131 | local_bh_disable(); | ||
| 132 | |||
| 133 | /* TODO. Actually it is not so bad idea to remove | ||
| 134 | * dccp_hashinfo.portalloc_lock before next submission to Linus. | ||
| 135 | * As soon as we touch this place at all it is time to think. | ||
| 136 | * | ||
| 137 | * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, | ||
| 138 | * hence it is mostly useless. | ||
| 139 | * Code will work nicely if we just delete it, but | ||
| 140 | * I am afraid in contented case it will work not better or | ||
| 141 | * even worse: another cpu just will hit the same bucket | ||
| 142 | * and spin there. | ||
| 143 | * So some cpu salt could remove both contention and | ||
| 144 | * memory pingpong. Any ideas how to do this in a nice way? | ||
| 145 | */ | ||
| 146 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
| 147 | rover = dccp_hashinfo.port_rover; | ||
| 148 | |||
| 149 | do { | ||
| 150 | rover++; | ||
| 151 | if ((rover < low) || (rover > high)) | ||
| 152 | rover = low; | ||
| 153 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; | ||
| 154 | spin_lock(&head->lock); | ||
| 155 | |||
| 156 | /* Does not bother with rcv_saddr checks, | ||
| 157 | * because the established check is already | ||
| 158 | * unique enough. | ||
| 159 | */ | ||
| 160 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
| 161 | if (tb->port == rover) { | ||
| 162 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
| 163 | if (tb->fastreuse >= 0) | ||
| 164 | goto next_port; | ||
| 165 | if (!__dccp_v4_check_established(sk, | ||
| 166 | rover, | ||
| 167 | &tw)) | ||
| 168 | goto ok; | ||
| 169 | goto next_port; | ||
| 170 | } | ||
| 171 | } | ||
| 172 | |||
| 173 | tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); | ||
| 174 | if (tb == NULL) { | ||
| 175 | spin_unlock(&head->lock); | ||
| 176 | break; | ||
| 177 | } | ||
| 178 | tb->fastreuse = -1; | ||
| 179 | goto ok; | ||
| 180 | |||
| 181 | next_port: | ||
| 182 | spin_unlock(&head->lock); | ||
| 183 | } while (--remaining > 0); | ||
| 184 | dccp_hashinfo.port_rover = rover; | ||
| 185 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
| 186 | |||
| 187 | local_bh_enable(); | ||
| 188 | |||
| 189 | return -EADDRNOTAVAIL; | ||
| 190 | |||
| 191 | ok: | ||
| 192 | /* All locks still held and bhs disabled */ | ||
| 193 | dccp_hashinfo.port_rover = rover; | ||
| 194 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
| 195 | |||
| 196 | inet_bind_hash(sk, tb, rover); | ||
| 197 | if (sk_unhashed(sk)) { | ||
| 198 | inet_sk(sk)->sport = htons(rover); | ||
| 199 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
| 200 | } | ||
| 201 | spin_unlock(&head->lock); | ||
| 202 | |||
| 203 | if (tw != NULL) { | ||
| 204 | dccp_tw_deschedule(tw); | ||
| 205 | inet_twsk_put(tw); | ||
| 206 | } | ||
| 207 | |||
| 208 | ret = 0; | ||
| 209 | goto out; | ||
| 210 | } | ||
| 211 | |||
| 212 | head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; | ||
| 213 | tb = inet_csk(sk)->icsk_bind_hash; | ||
| 214 | spin_lock_bh(&head->lock); | ||
| 215 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
| 216 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
| 217 | spin_unlock_bh(&head->lock); | ||
| 218 | return 0; | ||
| 219 | } else { | ||
| 220 | spin_unlock(&head->lock); | ||
| 221 | /* No definite answer... Walk to established hash table */ | ||
| 222 | ret = __dccp_v4_check_established(sk, snum, NULL); | ||
| 223 | out: | ||
| 224 | local_bh_enable(); | ||
| 225 | return ret; | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
| 230 | int addr_len) | ||
| 231 | { | ||
| 232 | struct inet_sock *inet = inet_sk(sk); | ||
| 233 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 234 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
| 235 | struct rtable *rt; | ||
| 236 | u32 daddr, nexthop; | ||
| 237 | int tmp; | ||
| 238 | int err; | ||
| 239 | |||
| 240 | dp->dccps_role = DCCP_ROLE_CLIENT; | ||
| 241 | |||
| 242 | if (addr_len < sizeof(struct sockaddr_in)) | ||
| 243 | return -EINVAL; | ||
| 244 | |||
| 245 | if (usin->sin_family != AF_INET) | ||
| 246 | return -EAFNOSUPPORT; | ||
| 247 | |||
| 248 | nexthop = daddr = usin->sin_addr.s_addr; | ||
| 249 | if (inet->opt != NULL && inet->opt->srr) { | ||
| 250 | if (daddr == 0) | ||
| 251 | return -EINVAL; | ||
| 252 | nexthop = inet->opt->faddr; | ||
| 253 | } | ||
| 254 | |||
| 255 | tmp = ip_route_connect(&rt, nexthop, inet->saddr, | ||
| 256 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | ||
| 257 | IPPROTO_DCCP, | ||
| 258 | inet->sport, usin->sin_port, sk); | ||
| 259 | if (tmp < 0) | ||
| 260 | return tmp; | ||
| 261 | |||
| 262 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | ||
| 263 | ip_rt_put(rt); | ||
| 264 | return -ENETUNREACH; | ||
| 265 | } | ||
| 266 | |||
| 267 | if (inet->opt == NULL || !inet->opt->srr) | ||
| 268 | daddr = rt->rt_dst; | ||
| 269 | |||
| 270 | if (inet->saddr == 0) | ||
| 271 | inet->saddr = rt->rt_src; | ||
| 272 | inet->rcv_saddr = inet->saddr; | ||
| 273 | |||
| 274 | inet->dport = usin->sin_port; | ||
| 275 | inet->daddr = daddr; | ||
| 276 | |||
| 277 | dp->dccps_ext_header_len = 0; | ||
| 278 | if (inet->opt != NULL) | ||
| 279 | dp->dccps_ext_header_len = inet->opt->optlen; | ||
| 280 | /* | ||
| 281 | * Socket identity is still unknown (sport may be zero). | ||
| 282 | * However we set state to DCCP_REQUESTING and not releasing socket | ||
| 283 | * lock select source port, enter ourselves into the hash tables and | ||
| 284 | * complete initialization after this. | ||
| 285 | */ | ||
| 286 | dccp_set_state(sk, DCCP_REQUESTING); | ||
| 287 | err = dccp_v4_hash_connect(sk); | ||
| 288 | if (err != 0) | ||
| 289 | goto failure; | ||
| 290 | |||
| 291 | err = ip_route_newports(&rt, inet->sport, inet->dport, sk); | ||
| 292 | if (err != 0) | ||
| 293 | goto failure; | ||
| 294 | |||
| 295 | /* OK, now commit destination to socket. */ | ||
| 296 | sk_setup_caps(sk, &rt->u.dst); | ||
| 297 | |||
| 298 | dp->dccps_gar = | ||
| 299 | dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, | ||
| 300 | inet->daddr, | ||
| 301 | inet->sport, | ||
| 302 | usin->sin_port); | ||
| 303 | dccp_update_gss(sk, dp->dccps_iss); | ||
| 304 | |||
| 305 | inet->id = dp->dccps_iss ^ jiffies; | ||
| 306 | |||
| 307 | err = dccp_connect(sk); | ||
| 308 | rt = NULL; | ||
| 309 | if (err != 0) | ||
| 310 | goto failure; | ||
| 311 | out: | ||
| 312 | return err; | ||
| 313 | failure: | ||
| 314 | /* This unhashes the socket and releases the local port, if necessary. */ | ||
| 315 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 316 | ip_rt_put(rt); | ||
| 317 | sk->sk_route_caps = 0; | ||
| 318 | inet->dport = 0; | ||
| 319 | goto out; | ||
| 320 | } | ||
| 321 | |||
| 322 | /* | ||
| 323 | * This routine does path mtu discovery as defined in RFC1191. | ||
| 324 | */ | ||
| 325 | static inline void dccp_do_pmtu_discovery(struct sock *sk, | ||
| 326 | const struct iphdr *iph, | ||
| 327 | u32 mtu) | ||
| 328 | { | ||
| 329 | struct dst_entry *dst; | ||
| 330 | const struct inet_sock *inet = inet_sk(sk); | ||
| 331 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 332 | |||
| 333 | /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs | ||
| 334 | * send out by Linux are always < 576bytes so they should go through | ||
| 335 | * unfragmented). | ||
| 336 | */ | ||
| 337 | if (sk->sk_state == DCCP_LISTEN) | ||
| 338 | return; | ||
| 339 | |||
| 340 | /* We don't check in the destentry if pmtu discovery is forbidden | ||
| 341 | * on this route. We just assume that no packet_to_big packets | ||
| 342 | * are send back when pmtu discovery is not active. | ||
| 343 | * There is a small race when the user changes this flag in the | ||
| 344 | * route, but I think that's acceptable. | ||
| 345 | */ | ||
| 346 | if ((dst = __sk_dst_check(sk, 0)) == NULL) | ||
| 347 | return; | ||
| 348 | |||
| 349 | dst->ops->update_pmtu(dst, mtu); | ||
| 350 | |||
| 351 | /* Something is about to be wrong... Remember soft error | ||
| 352 | * for the case, if this connection will not able to recover. | ||
| 353 | */ | ||
| 354 | if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) | ||
| 355 | sk->sk_err_soft = EMSGSIZE; | ||
| 356 | |||
| 357 | mtu = dst_mtu(dst); | ||
| 358 | |||
| 359 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | ||
| 360 | dp->dccps_pmtu_cookie > mtu) { | ||
| 361 | dccp_sync_mss(sk, mtu); | ||
| 362 | |||
| 363 | /* | ||
| 364 | * From: draft-ietf-dccp-spec-11.txt | ||
| 365 | * | ||
| 366 | * DCCP-Sync packets are the best choice for upward probing, | ||
| 367 | * since DCCP-Sync probes do not risk application data loss. | ||
| 368 | */ | ||
| 369 | dccp_send_sync(sk, dp->dccps_gsr); | ||
| 370 | } /* else let the usual retransmit timer handle it */ | ||
| 371 | } | ||
| 372 | |||
| 373 | static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) | ||
| 374 | { | ||
| 375 | int err; | ||
| 376 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
| 377 | const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + | ||
| 378 | sizeof(struct dccp_hdr_ext) + | ||
| 379 | sizeof(struct dccp_hdr_ack_bits); | ||
| 380 | struct sk_buff *skb; | ||
| 381 | |||
| 382 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
| 383 | return; | ||
| 384 | |||
| 385 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
| 386 | if (skb == NULL) | ||
| 387 | return; | ||
| 388 | |||
| 389 | /* Reserve space for headers. */ | ||
| 390 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
| 391 | |||
| 392 | skb->dst = dst_clone(rxskb->dst); | ||
| 393 | |||
| 394 | skb->h.raw = skb_push(skb, dccp_hdr_ack_len); | ||
| 395 | dh = dccp_hdr(skb); | ||
| 396 | memset(dh, 0, dccp_hdr_ack_len); | ||
| 397 | |||
| 398 | /* Build DCCP header and checksum it. */ | ||
| 399 | dh->dccph_type = DCCP_PKT_ACK; | ||
| 400 | dh->dccph_sport = rxdh->dccph_dport; | ||
| 401 | dh->dccph_dport = rxdh->dccph_sport; | ||
| 402 | dh->dccph_doff = dccp_hdr_ack_len / 4; | ||
| 403 | dh->dccph_x = 1; | ||
| 404 | |||
| 405 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
| 406 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
| 407 | |||
| 408 | bh_lock_sock(dccp_ctl_socket->sk); | ||
| 409 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
| 410 | rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); | ||
| 411 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
| 412 | |||
| 413 | if (err == NET_XMIT_CN || err == 0) { | ||
| 414 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
| 415 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | ||
| 420 | { | ||
| 421 | dccp_v4_ctl_send_ack(skb); | ||
| 422 | } | ||
| 423 | |||
| 424 | static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | ||
| 425 | struct dst_entry *dst) | ||
| 426 | { | ||
| 427 | int err = -1; | ||
| 428 | struct sk_buff *skb; | ||
| 429 | |||
| 430 | /* First, grab a route. */ | ||
| 431 | |||
| 432 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
| 433 | goto out; | ||
| 434 | |||
| 435 | skb = dccp_make_response(sk, dst, req); | ||
| 436 | if (skb != NULL) { | ||
| 437 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
| 438 | |||
| 439 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | ||
| 440 | ireq->rmt_addr, | ||
| 441 | ireq->opt); | ||
| 442 | if (err == NET_XMIT_CN) | ||
| 443 | err = 0; | ||
| 444 | } | ||
| 445 | |||
| 446 | out: | ||
| 447 | dst_release(dst); | ||
| 448 | return err; | ||
| 449 | } | ||
| 450 | |||
| 451 | /* | ||
| 452 | * This routine is called by the ICMP module when it gets some sort of error | ||
| 453 | * condition. If err < 0 then the socket should be closed and the error | ||
| 454 | * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. | ||
| 455 | * After adjustment header points to the first 8 bytes of the tcp header. We | ||
| 456 | * need to find the appropriate port. | ||
| 457 | * | ||
| 458 | * The locking strategy used here is very "optimistic". When someone else | ||
| 459 | * accesses the socket the ICMP is just dropped and for some paths there is no | ||
| 460 | * check at all. A more general error queue to queue errors for later handling | ||
| 461 | * is probably better. | ||
| 462 | */ | ||
| 463 | void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
| 464 | { | ||
| 465 | const struct iphdr *iph = (struct iphdr *)skb->data; | ||
| 466 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); | ||
| 467 | struct dccp_sock *dp; | ||
| 468 | struct inet_sock *inet; | ||
| 469 | const int type = skb->h.icmph->type; | ||
| 470 | const int code = skb->h.icmph->code; | ||
| 471 | struct sock *sk; | ||
| 472 | __u64 seq; | ||
| 473 | int err; | ||
| 474 | |||
| 475 | if (skb->len < (iph->ihl << 2) + 8) { | ||
| 476 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
| 477 | return; | ||
| 478 | } | ||
| 479 | |||
| 480 | sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, | ||
| 481 | iph->saddr, dh->dccph_sport, inet_iif(skb)); | ||
| 482 | if (sk == NULL) { | ||
| 483 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
| 484 | return; | ||
| 485 | } | ||
| 486 | |||
| 487 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
| 488 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
| 489 | return; | ||
| 490 | } | ||
| 491 | |||
| 492 | bh_lock_sock(sk); | ||
| 493 | /* If too many ICMPs get dropped on busy | ||
| 494 | * servers this needs to be solved differently. | ||
| 495 | */ | ||
| 496 | if (sock_owned_by_user(sk)) | ||
| 497 | NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); | ||
| 498 | |||
| 499 | if (sk->sk_state == DCCP_CLOSED) | ||
| 500 | goto out; | ||
| 501 | |||
| 502 | dp = dccp_sk(sk); | ||
| 503 | seq = dccp_hdr_seq(skb); | ||
| 504 | if (sk->sk_state != DCCP_LISTEN && | ||
| 505 | !between48(seq, dp->dccps_swl, dp->dccps_swh)) { | ||
| 506 | NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); | ||
| 507 | goto out; | ||
| 508 | } | ||
| 509 | |||
| 510 | switch (type) { | ||
| 511 | case ICMP_SOURCE_QUENCH: | ||
| 512 | /* Just silently ignore these. */ | ||
| 513 | goto out; | ||
| 514 | case ICMP_PARAMETERPROB: | ||
| 515 | err = EPROTO; | ||
| 516 | break; | ||
| 517 | case ICMP_DEST_UNREACH: | ||
| 518 | if (code > NR_ICMP_UNREACH) | ||
| 519 | goto out; | ||
| 520 | |||
| 521 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | ||
| 522 | if (!sock_owned_by_user(sk)) | ||
| 523 | dccp_do_pmtu_discovery(sk, iph, info); | ||
| 524 | goto out; | ||
| 525 | } | ||
| 526 | |||
| 527 | err = icmp_err_convert[code].errno; | ||
| 528 | break; | ||
| 529 | case ICMP_TIME_EXCEEDED: | ||
| 530 | err = EHOSTUNREACH; | ||
| 531 | break; | ||
| 532 | default: | ||
| 533 | goto out; | ||
| 534 | } | ||
| 535 | |||
| 536 | switch (sk->sk_state) { | ||
| 537 | struct request_sock *req , **prev; | ||
| 538 | case DCCP_LISTEN: | ||
| 539 | if (sock_owned_by_user(sk)) | ||
| 540 | goto out; | ||
| 541 | req = inet_csk_search_req(sk, &prev, dh->dccph_dport, | ||
| 542 | iph->daddr, iph->saddr); | ||
| 543 | if (!req) | ||
| 544 | goto out; | ||
| 545 | |||
| 546 | /* | ||
| 547 | * ICMPs are not backlogged, hence we cannot get an established | ||
| 548 | * socket here. | ||
| 549 | */ | ||
| 550 | BUG_TRAP(!req->sk); | ||
| 551 | |||
| 552 | if (seq != dccp_rsk(req)->dreq_iss) { | ||
| 553 | NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); | ||
| 554 | goto out; | ||
| 555 | } | ||
| 556 | /* | ||
| 557 | * Still in RESPOND, just remove it silently. | ||
| 558 | * There is no good way to pass the error to the newly | ||
| 559 | * created socket, and POSIX does not want network | ||
| 560 | * errors returned from accept(). | ||
| 561 | */ | ||
| 562 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
| 563 | goto out; | ||
| 564 | |||
| 565 | case DCCP_REQUESTING: | ||
| 566 | case DCCP_RESPOND: | ||
| 567 | if (!sock_owned_by_user(sk)) { | ||
| 568 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
| 569 | sk->sk_err = err; | ||
| 570 | |||
| 571 | sk->sk_error_report(sk); | ||
| 572 | |||
| 573 | dccp_done(sk); | ||
| 574 | } else | ||
| 575 | sk->sk_err_soft = err; | ||
| 576 | goto out; | ||
| 577 | } | ||
| 578 | |||
| 579 | /* If we've already connected we will keep trying | ||
| 580 | * until we time out, or the user gives up. | ||
| 581 | * | ||
| 582 | * rfc1122 4.2.3.9 allows to consider as hard errors | ||
| 583 | * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, | ||
| 584 | * but it is obsoleted by pmtu discovery). | ||
| 585 | * | ||
| 586 | * Note, that in modern internet, where routing is unreliable | ||
| 587 | * and in each dark corner broken firewalls sit, sending random | ||
| 588 | * errors ordered by their masters even this two messages finally lose | ||
| 589 | * their original sense (even Linux sends invalid PORT_UNREACHs) | ||
| 590 | * | ||
| 591 | * Now we are in compliance with RFCs. | ||
| 592 | * --ANK (980905) | ||
| 593 | */ | ||
| 594 | |||
| 595 | inet = inet_sk(sk); | ||
| 596 | if (!sock_owned_by_user(sk) && inet->recverr) { | ||
| 597 | sk->sk_err = err; | ||
| 598 | sk->sk_error_report(sk); | ||
| 599 | } else /* Only an error on timeout */ | ||
| 600 | sk->sk_err_soft = err; | ||
| 601 | out: | ||
| 602 | bh_unlock_sock(sk); | ||
| 603 | sock_put(sk); | ||
| 604 | } | ||
| 605 | |||
| 606 | extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); | ||
| 607 | |||
| 608 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) | ||
| 609 | { | ||
| 610 | struct sk_buff *skb; | ||
| 611 | /* | ||
| 612 | * FIXME: what if rebuild_header fails? | ||
| 613 | * Should we be doing a rebuild_header here? | ||
| 614 | */ | ||
| 615 | int err = inet_sk_rebuild_header(sk); | ||
| 616 | |||
| 617 | if (err != 0) | ||
| 618 | return err; | ||
| 619 | |||
| 620 | skb = dccp_make_reset(sk, sk->sk_dst_cache, code); | ||
| 621 | if (skb != NULL) { | ||
| 622 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 623 | const struct inet_sock *inet = inet_sk(sk); | ||
| 624 | |||
| 625 | err = ip_build_and_send_pkt(skb, sk, | ||
| 626 | inet->saddr, inet->daddr, NULL); | ||
| 627 | if (err == NET_XMIT_CN) | ||
| 628 | err = 0; | ||
| 629 | |||
| 630 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
| 631 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
| 632 | } | ||
| 633 | |||
| 634 | return err; | ||
| 635 | } | ||
| 636 | |||
| 637 | static inline u64 dccp_v4_init_sequence(const struct sock *sk, | ||
| 638 | const struct sk_buff *skb) | ||
| 639 | { | ||
| 640 | return secure_dccp_sequence_number(skb->nh.iph->daddr, | ||
| 641 | skb->nh.iph->saddr, | ||
| 642 | dccp_hdr(skb)->dccph_dport, | ||
| 643 | dccp_hdr(skb)->dccph_sport); | ||
| 644 | } | ||
| 645 | |||
| 646 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | ||
| 647 | { | ||
| 648 | struct inet_request_sock *ireq; | ||
| 649 | struct dccp_sock dp; | ||
| 650 | struct request_sock *req; | ||
| 651 | struct dccp_request_sock *dreq; | ||
| 652 | const __u32 saddr = skb->nh.iph->saddr; | ||
| 653 | const __u32 daddr = skb->nh.iph->daddr; | ||
| 654 | struct dst_entry *dst = NULL; | ||
| 655 | |||
| 656 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | ||
| 657 | if (((struct rtable *)skb->dst)->rt_flags & | ||
| 658 | (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
| 659 | goto drop; | ||
| 660 | |||
| 661 | /* | ||
| 662 | * TW buckets are converted to open requests without | ||
| 663 | * limitations, they conserve resources and peer is | ||
| 664 | * evidently real one. | ||
| 665 | */ | ||
| 666 | if (inet_csk_reqsk_queue_is_full(sk)) | ||
| 667 | goto drop; | ||
| 668 | |||
| 669 | /* | ||
| 670 | * Accept backlog is full. If we have already queued enough | ||
| 671 | * of warm entries in syn queue, drop request. It is better than | ||
| 672 | * clogging syn queue with openreqs with exponentially increasing | ||
| 673 | * timeout. | ||
| 674 | */ | ||
| 675 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | ||
| 676 | goto drop; | ||
| 677 | |||
| 678 | req = reqsk_alloc(sk->sk_prot->rsk_prot); | ||
| 679 | if (req == NULL) | ||
| 680 | goto drop; | ||
| 681 | |||
| 682 | /* FIXME: process options */ | ||
| 683 | |||
| 684 | dccp_openreq_init(req, &dp, skb); | ||
| 685 | |||
| 686 | ireq = inet_rsk(req); | ||
| 687 | ireq->loc_addr = daddr; | ||
| 688 | ireq->rmt_addr = saddr; | ||
| 689 | /* FIXME: Merge Aristeu's option parsing code when ready */ | ||
| 690 | req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ | ||
| 691 | ireq->opt = NULL; | ||
| 692 | |||
| 693 | /* | ||
| 694 | * Step 3: Process LISTEN state | ||
| 695 | * | ||
| 696 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
| 697 | * | ||
| 698 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | ||
| 699 | * dccp_create_openreq_child. | ||
| 700 | */ | ||
| 701 | dreq = dccp_rsk(req); | ||
| 702 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 703 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | ||
| 704 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | ||
| 705 | |||
| 706 | if (dccp_v4_send_response(sk, req, dst)) | ||
| 707 | goto drop_and_free; | ||
| 708 | |||
| 709 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | ||
| 710 | return 0; | ||
| 711 | |||
| 712 | drop_and_free: | ||
| 713 | /* | ||
| 714 | * FIXME: should be reqsk_free after implementing req->rsk_ops | ||
| 715 | */ | ||
| 716 | __reqsk_free(req); | ||
| 717 | drop: | ||
| 718 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
| 719 | return -1; | ||
| 720 | } | ||
| 721 | |||
| 722 | /* | ||
| 723 | * The three way handshake has completed - we got a valid ACK or DATAACK - | ||
| 724 | * now create the new socket. | ||
| 725 | * | ||
| 726 | * This is the equivalent of TCP's tcp_v4_syn_recv_sock | ||
| 727 | */ | ||
| 728 | struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | ||
| 729 | struct request_sock *req, | ||
| 730 | struct dst_entry *dst) | ||
| 731 | { | ||
| 732 | struct inet_request_sock *ireq; | ||
| 733 | struct inet_sock *newinet; | ||
| 734 | struct dccp_sock *newdp; | ||
| 735 | struct sock *newsk; | ||
| 736 | |||
| 737 | if (sk_acceptq_is_full(sk)) | ||
| 738 | goto exit_overflow; | ||
| 739 | |||
| 740 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
| 741 | goto exit; | ||
| 742 | |||
| 743 | newsk = dccp_create_openreq_child(sk, req, skb); | ||
| 744 | if (newsk == NULL) | ||
| 745 | goto exit; | ||
| 746 | |||
| 747 | sk_setup_caps(newsk, dst); | ||
| 748 | |||
| 749 | newdp = dccp_sk(newsk); | ||
| 750 | newinet = inet_sk(newsk); | ||
| 751 | ireq = inet_rsk(req); | ||
| 752 | newinet->daddr = ireq->rmt_addr; | ||
| 753 | newinet->rcv_saddr = ireq->loc_addr; | ||
| 754 | newinet->saddr = ireq->loc_addr; | ||
| 755 | newinet->opt = ireq->opt; | ||
| 756 | ireq->opt = NULL; | ||
| 757 | newinet->mc_index = inet_iif(skb); | ||
| 758 | newinet->mc_ttl = skb->nh.iph->ttl; | ||
| 759 | newinet->id = jiffies; | ||
| 760 | |||
| 761 | dccp_sync_mss(newsk, dst_mtu(dst)); | ||
| 762 | |||
| 763 | __inet_hash(&dccp_hashinfo, newsk, 0); | ||
| 764 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | ||
| 765 | |||
| 766 | return newsk; | ||
| 767 | |||
| 768 | exit_overflow: | ||
| 769 | NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); | ||
| 770 | exit: | ||
| 771 | NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); | ||
| 772 | dst_release(dst); | ||
| 773 | return NULL; | ||
| 774 | } | ||
| 775 | |||
| 776 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | ||
| 777 | { | ||
| 778 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 779 | const struct iphdr *iph = skb->nh.iph; | ||
| 780 | struct sock *nsk; | ||
| 781 | struct request_sock **prev; | ||
| 782 | /* Find possible connection requests. */ | ||
| 783 | struct request_sock *req = inet_csk_search_req(sk, &prev, | ||
| 784 | dh->dccph_sport, | ||
| 785 | iph->saddr, iph->daddr); | ||
| 786 | if (req != NULL) | ||
| 787 | return dccp_check_req(sk, skb, req, prev); | ||
| 788 | |||
| 789 | nsk = __inet_lookup_established(&dccp_hashinfo, | ||
| 790 | iph->saddr, dh->dccph_sport, | ||
| 791 | iph->daddr, ntohs(dh->dccph_dport), | ||
| 792 | inet_iif(skb)); | ||
| 793 | if (nsk != NULL) { | ||
| 794 | if (nsk->sk_state != DCCP_TIME_WAIT) { | ||
| 795 | bh_lock_sock(nsk); | ||
| 796 | return nsk; | ||
| 797 | } | ||
| 798 | inet_twsk_put((struct inet_timewait_sock *)nsk); | ||
| 799 | return NULL; | ||
| 800 | } | ||
| 801 | |||
| 802 | return sk; | ||
| 803 | } | ||
| 804 | |||
| 805 | int dccp_v4_checksum(struct sk_buff *skb) | ||
| 806 | { | ||
| 807 | struct dccp_hdr* dh = dccp_hdr(skb); | ||
| 808 | int checksum_len; | ||
| 809 | u32 tmp; | ||
| 810 | |||
| 811 | if (dh->dccph_cscov == 0) | ||
| 812 | checksum_len = skb->len; | ||
| 813 | else { | ||
| 814 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
| 815 | checksum_len = checksum_len < skb->len ? checksum_len : skb->len; | ||
| 816 | } | ||
| 817 | |||
| 818 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
| 819 | return csum_fold(tmp); | ||
| 820 | } | ||
| 821 | |||
| 822 | static int dccp_v4_verify_checksum(struct sk_buff *skb) | ||
| 823 | { | ||
| 824 | struct dccp_hdr *th = dccp_hdr(skb); | ||
| 825 | const u16 remote_checksum = th->dccph_checksum; | ||
| 826 | u16 local_checksum; | ||
| 827 | |||
| 828 | /* FIXME: don't mess with skb payload */ | ||
| 829 | th->dccph_checksum = 0; /* zero it for computation */ | ||
| 830 | |||
| 831 | local_checksum = dccp_v4_checksum(skb); | ||
| 832 | |||
| 833 | /* FIXME: don't mess with skb payload */ | ||
| 834 | th->dccph_checksum = remote_checksum; /* put it back */ | ||
| 835 | |||
| 836 | return remote_checksum == local_checksum ? 0 : -1; | ||
| 837 | } | ||
| 838 | |||
| 839 | static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | ||
| 840 | struct sk_buff *skb) | ||
| 841 | { | ||
| 842 | struct rtable *rt; | ||
| 843 | struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, | ||
| 844 | .nl_u = { .ip4_u = | ||
| 845 | { .daddr = skb->nh.iph->saddr, | ||
| 846 | .saddr = skb->nh.iph->daddr, | ||
| 847 | .tos = RT_CONN_FLAGS(sk) } }, | ||
| 848 | .proto = sk->sk_protocol, | ||
| 849 | .uli_u = { .ports = | ||
| 850 | { .sport = dccp_hdr(skb)->dccph_dport, | ||
| 851 | .dport = dccp_hdr(skb)->dccph_sport } } }; | ||
| 852 | |||
| 853 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
| 854 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
| 855 | return NULL; | ||
| 856 | } | ||
| 857 | |||
| 858 | return &rt->u.dst; | ||
| 859 | } | ||
| 860 | |||
| 861 | void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) | ||
| 862 | { | ||
| 863 | int err; | ||
| 864 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
| 865 | const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + | ||
| 866 | sizeof(struct dccp_hdr_ext) + | ||
| 867 | sizeof(struct dccp_hdr_reset); | ||
| 868 | struct sk_buff *skb; | ||
| 869 | struct dst_entry *dst; | ||
| 870 | |||
| 871 | /* Never send a reset in response to a reset. */ | ||
| 872 | if (rxdh->dccph_type == DCCP_PKT_RESET) | ||
| 873 | return; | ||
| 874 | |||
| 875 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
| 876 | return; | ||
| 877 | |||
| 878 | dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); | ||
| 879 | if (dst == NULL) | ||
| 880 | return; | ||
| 881 | |||
| 882 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
| 883 | if (skb == NULL) | ||
| 884 | goto out; | ||
| 885 | |||
| 886 | /* Reserve space for headers. */ | ||
| 887 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
| 888 | skb->dst = dst_clone(dst); | ||
| 889 | |||
| 890 | skb->h.raw = skb_push(skb, dccp_hdr_reset_len); | ||
| 891 | dh = dccp_hdr(skb); | ||
| 892 | memset(dh, 0, dccp_hdr_reset_len); | ||
| 893 | |||
| 894 | /* Build DCCP header and checksum it. */ | ||
| 895 | dh->dccph_type = DCCP_PKT_RESET; | ||
| 896 | dh->dccph_sport = rxdh->dccph_dport; | ||
| 897 | dh->dccph_dport = rxdh->dccph_sport; | ||
| 898 | dh->dccph_doff = dccp_hdr_reset_len / 4; | ||
| 899 | dh->dccph_x = 1; | ||
| 900 | dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; | ||
| 901 | |||
| 902 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
| 903 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
| 904 | |||
| 905 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
| 906 | |||
| 907 | bh_lock_sock(dccp_ctl_socket->sk); | ||
| 908 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
| 909 | rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); | ||
| 910 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
| 911 | |||
| 912 | if (err == NET_XMIT_CN || err == 0) { | ||
| 913 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
| 914 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
| 915 | } | ||
| 916 | out: | ||
| 917 | dst_release(dst); | ||
| 918 | } | ||
| 919 | |||
| 920 | int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
| 921 | { | ||
| 922 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 923 | |||
| 924 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ | ||
| 925 | if (dccp_rcv_established(sk, skb, dh, skb->len)) | ||
| 926 | goto reset; | ||
| 927 | return 0; | ||
| 928 | } | ||
| 929 | |||
| 930 | /* | ||
| 931 | * Step 3: Process LISTEN state | ||
| 932 | * If S.state == LISTEN, | ||
| 933 | * If P.type == Request or P contains a valid Init Cookie option, | ||
| 934 | * * Must scan the packet's options to check for an Init | ||
| 935 | * Cookie. Only the Init Cookie is processed here, | ||
| 936 | * however; other options are processed in Step 8. This | ||
| 937 | * scan need only be performed if the endpoint uses Init | ||
| 938 | * Cookies * | ||
| 939 | * * Generate a new socket and switch to that socket * | ||
| 940 | * Set S := new socket for this port pair | ||
| 941 | * S.state = RESPOND | ||
| 942 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
| 943 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
| 944 | * Continue with S.state == RESPOND | ||
| 945 | * * A Response packet will be generated in Step 11 * | ||
| 946 | * Otherwise, | ||
| 947 | * Generate Reset(No Connection) unless P.type == Reset | ||
| 948 | * Drop packet and return | ||
| 949 | * | ||
| 950 | * NOTE: the check for the packet types is done in dccp_rcv_state_process | ||
| 951 | */ | ||
| 952 | if (sk->sk_state == DCCP_LISTEN) { | ||
| 953 | struct sock *nsk = dccp_v4_hnd_req(sk, skb); | ||
| 954 | |||
| 955 | if (nsk == NULL) | ||
| 956 | goto discard; | ||
| 957 | |||
| 958 | if (nsk != sk) { | ||
| 959 | if (dccp_child_process(sk, nsk, skb)) | ||
| 960 | goto reset; | ||
| 961 | return 0; | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | if (dccp_rcv_state_process(sk, skb, dh, skb->len)) | ||
| 966 | goto reset; | ||
| 967 | return 0; | ||
| 968 | |||
| 969 | reset: | ||
| 970 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 971 | dccp_v4_ctl_send_reset(skb); | ||
| 972 | discard: | ||
| 973 | kfree_skb(skb); | ||
| 974 | return 0; | ||
| 975 | } | ||
| 976 | |||
| 977 | static inline int dccp_invalid_packet(struct sk_buff *skb) | ||
| 978 | { | ||
| 979 | const struct dccp_hdr *dh; | ||
| 980 | |||
| 981 | if (skb->pkt_type != PACKET_HOST) | ||
| 982 | return 1; | ||
| 983 | |||
| 984 | if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { | ||
| 985 | dccp_pr_debug("pskb_may_pull failed\n"); | ||
| 986 | return 1; | ||
| 987 | } | ||
| 988 | |||
| 989 | dh = dccp_hdr(skb); | ||
| 990 | |||
| 991 | /* If the packet type is not understood, drop packet and return */ | ||
| 992 | if (dh->dccph_type >= DCCP_PKT_INVALID) { | ||
| 993 | dccp_pr_debug("invalid packet type\n"); | ||
| 994 | return 1; | ||
| 995 | } | ||
| 996 | |||
| 997 | /* | ||
| 998 | * If P.Data Offset is too small for packet type, or too large for | ||
| 999 | * packet, drop packet and return | ||
| 1000 | */ | ||
| 1001 | if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { | ||
| 1002 | dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); | ||
| 1003 | return 1; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { | ||
| 1007 | dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); | ||
| 1008 | return 1; | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | dh = dccp_hdr(skb); | ||
| 1012 | |||
| 1013 | /* | ||
| 1014 | * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet | ||
| 1015 | * has short sequence numbers), drop packet and return | ||
| 1016 | */ | ||
| 1017 | if (dh->dccph_x == 0 && | ||
| 1018 | dh->dccph_type != DCCP_PKT_DATA && | ||
| 1019 | dh->dccph_type != DCCP_PKT_ACK && | ||
| 1020 | dh->dccph_type != DCCP_PKT_DATAACK) { | ||
| 1021 | dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", | ||
| 1022 | dccp_packet_name(dh->dccph_type)); | ||
| 1023 | return 1; | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | /* If the header checksum is incorrect, drop packet and return */ | ||
| 1027 | if (dccp_v4_verify_checksum(skb) < 0) { | ||
| 1028 | dccp_pr_debug("header checksum is incorrect\n"); | ||
| 1029 | return 1; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | return 0; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | /* this is called when real data arrives */ | ||
| 1036 | int dccp_v4_rcv(struct sk_buff *skb) | ||
| 1037 | { | ||
| 1038 | const struct dccp_hdr *dh; | ||
| 1039 | struct sock *sk; | ||
| 1040 | int rc; | ||
| 1041 | |||
| 1042 | /* Step 1: Check header basics: */ | ||
| 1043 | |||
| 1044 | if (dccp_invalid_packet(skb)) | ||
| 1045 | goto discard_it; | ||
| 1046 | |||
| 1047 | dh = dccp_hdr(skb); | ||
| 1048 | #if 0 | ||
| 1049 | /* | ||
| 1050 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
| 1051 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
| 1052 | * sends 10 DATA/DATAACK packets: | ||
| 1053 | * | ||
| 1054 | * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
| 1055 | * | ||
| 1056 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
| 1057 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state | ||
| 1058 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
| 1059 | * | ||
| 1060 | * So... | ||
| 1061 | * | ||
| 1062 | * 281473596467422 was received | ||
| 1063 | * 281473596467421 was not received | ||
| 1064 | * 281473596467420 was received | ||
| 1065 | * 281473596467419 was not received | ||
| 1066 | * 281473596467418 was received | ||
| 1067 | * 281473596467417 was not received | ||
| 1068 | * 281473596467416 was received | ||
| 1069 | * 281473596467415 was not received | ||
| 1070 | * 281473596467414 was received | ||
| 1071 | * 281473596467413 was received (this one was the 3way handshake RESPONSE) | ||
| 1072 | * | ||
| 1073 | */ | ||
| 1074 | if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { | ||
| 1075 | static int discard = 0; | ||
| 1076 | |||
| 1077 | if (discard) { | ||
| 1078 | discard = 0; | ||
| 1079 | goto discard_it; | ||
| 1080 | } | ||
| 1081 | discard = 1; | ||
| 1082 | } | ||
| 1083 | #endif | ||
| 1084 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | ||
| 1085 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | ||
| 1086 | |||
| 1087 | dccp_pr_debug("%8.8s " | ||
| 1088 | "src=%u.%u.%u.%u@%-5d " | ||
| 1089 | "dst=%u.%u.%u.%u@%-5d seq=%llu", | ||
| 1090 | dccp_packet_name(dh->dccph_type), | ||
| 1091 | NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), | ||
| 1092 | NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), | ||
| 1093 | DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 1094 | |||
| 1095 | if (dccp_packet_without_ack(skb)) { | ||
| 1096 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; | ||
| 1097 | dccp_pr_debug_cat("\n"); | ||
| 1098 | } else { | ||
| 1099 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); | ||
| 1100 | dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | /* Step 2: | ||
| 1104 | * Look up flow ID in table and get corresponding socket */ | ||
| 1105 | sk = __inet_lookup(&dccp_hashinfo, | ||
| 1106 | skb->nh.iph->saddr, dh->dccph_sport, | ||
| 1107 | skb->nh.iph->daddr, ntohs(dh->dccph_dport), | ||
| 1108 | inet_iif(skb)); | ||
| 1109 | |||
| 1110 | /* | ||
| 1111 | * Step 2: | ||
| 1112 | * If no socket ... | ||
| 1113 | * Generate Reset(No Connection) unless P.type == Reset | ||
| 1114 | * Drop packet and return | ||
| 1115 | */ | ||
| 1116 | if (sk == NULL) { | ||
| 1117 | dccp_pr_debug("failed to look up flow ID in table and " | ||
| 1118 | "get corresponding socket\n"); | ||
| 1119 | goto no_dccp_socket; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | /* | ||
| 1123 | * Step 2: | ||
| 1124 | * ... or S.state == TIMEWAIT, | ||
| 1125 | * Generate Reset(No Connection) unless P.type == Reset | ||
| 1126 | * Drop packet and return | ||
| 1127 | */ | ||
| 1128 | |||
| 1129 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
| 1130 | dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); | ||
| 1131 | goto discard_and_relse; | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | ||
| 1135 | dccp_pr_debug("xfrm4_policy_check failed\n"); | ||
| 1136 | goto discard_and_relse; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | if (sk_filter(sk, skb, 0)) { | ||
| 1140 | dccp_pr_debug("sk_filter failed\n"); | ||
| 1141 | goto discard_and_relse; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | skb->dev = NULL; | ||
| 1145 | |||
| 1146 | bh_lock_sock(sk); | ||
| 1147 | rc = 0; | ||
| 1148 | if (!sock_owned_by_user(sk)) | ||
| 1149 | rc = dccp_v4_do_rcv(sk, skb); | ||
| 1150 | else | ||
| 1151 | sk_add_backlog(sk, skb); | ||
| 1152 | bh_unlock_sock(sk); | ||
| 1153 | |||
| 1154 | sock_put(sk); | ||
| 1155 | return rc; | ||
| 1156 | |||
| 1157 | no_dccp_socket: | ||
| 1158 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
| 1159 | goto discard_it; | ||
| 1160 | /* | ||
| 1161 | * Step 2: | ||
| 1162 | * Generate Reset(No Connection) unless P.type == Reset | ||
| 1163 | * Drop packet and return | ||
| 1164 | */ | ||
| 1165 | if (dh->dccph_type != DCCP_PKT_RESET) { | ||
| 1166 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 1167 | dccp_v4_ctl_send_reset(skb); | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | discard_it: | ||
| 1171 | /* Discard frame. */ | ||
| 1172 | kfree_skb(skb); | ||
| 1173 | return 0; | ||
| 1174 | |||
| 1175 | discard_and_relse: | ||
| 1176 | sock_put(sk); | ||
| 1177 | goto discard_it; | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | static int dccp_v4_init_sock(struct sock *sk) | ||
| 1181 | { | ||
| 1182 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1183 | static int dccp_ctl_socket_init = 1; | ||
| 1184 | |||
| 1185 | dccp_options_init(&dp->dccps_options); | ||
| 1186 | |||
| 1187 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
| 1188 | dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
| 1189 | GFP_KERNEL); | ||
| 1190 | |||
| 1191 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
| 1192 | return -ENOMEM; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | /* | ||
| 1196 | * FIXME: We're hardcoding the CCID, and doing this at this point makes | ||
| 1197 | * the listening (master) sock get CCID control blocks, which is not | ||
| 1198 | * necessary, but for now, to not mess with the test userspace apps, | ||
| 1199 | * lets leave it here, later the real solution is to do this in a | ||
| 1200 | * setsockopt(CCIDs-I-want/accept). -acme | ||
| 1201 | */ | ||
| 1202 | if (likely(!dccp_ctl_socket_init)) { | ||
| 1203 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); | ||
| 1204 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); | ||
| 1205 | if (dp->dccps_hc_rx_ccid == NULL || | ||
| 1206 | dp->dccps_hc_tx_ccid == NULL) { | ||
| 1207 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
| 1208 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
| 1209 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
| 1210 | dp->dccps_hc_rx_ackpkts = NULL; | ||
| 1211 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
| 1212 | return -ENOMEM; | ||
| 1213 | } | ||
| 1214 | } else | ||
| 1215 | dccp_ctl_socket_init = 0; | ||
| 1216 | |||
| 1217 | dccp_init_xmit_timers(sk); | ||
| 1218 | sk->sk_state = DCCP_CLOSED; | ||
| 1219 | dp->dccps_mss_cache = 536; | ||
| 1220 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
| 1221 | |||
| 1222 | return 0; | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | int dccp_v4_destroy_sock(struct sock *sk) | ||
| 1226 | { | ||
| 1227 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 1228 | |||
| 1229 | /* | ||
| 1230 | * DCCP doesn't use sk_qrite_queue, just sk_send_head | ||
| 1231 | * for retransmissions | ||
| 1232 | */ | ||
| 1233 | if (sk->sk_send_head != NULL) { | ||
| 1234 | kfree_skb(sk->sk_send_head); | ||
| 1235 | sk->sk_send_head = NULL; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | /* Clean up a referenced DCCP bind bucket. */ | ||
| 1239 | if (inet_csk(sk)->icsk_bind_hash != NULL) | ||
| 1240 | inet_put_port(&dccp_hashinfo, sk); | ||
| 1241 | |||
| 1242 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
| 1243 | dp->dccps_hc_rx_ackpkts = NULL; | ||
| 1244 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
| 1245 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
| 1246 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
| 1247 | |||
| 1248 | return 0; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | static void dccp_v4_reqsk_destructor(struct request_sock *req) | ||
| 1252 | { | ||
| 1253 | kfree(inet_rsk(req)->opt); | ||
| 1254 | } | ||
| 1255 | |||
| 1256 | static struct request_sock_ops dccp_request_sock_ops = { | ||
| 1257 | .family = PF_INET, | ||
| 1258 | .obj_size = sizeof(struct dccp_request_sock), | ||
| 1259 | .rtx_syn_ack = dccp_v4_send_response, | ||
| 1260 | .send_ack = dccp_v4_reqsk_send_ack, | ||
| 1261 | .destructor = dccp_v4_reqsk_destructor, | ||
| 1262 | .send_reset = dccp_v4_ctl_send_reset, | ||
| 1263 | }; | ||
| 1264 | |||
| 1265 | struct proto dccp_v4_prot = { | ||
| 1266 | .name = "DCCP", | ||
| 1267 | .owner = THIS_MODULE, | ||
| 1268 | .close = dccp_close, | ||
| 1269 | .connect = dccp_v4_connect, | ||
| 1270 | .disconnect = dccp_disconnect, | ||
| 1271 | .ioctl = dccp_ioctl, | ||
| 1272 | .init = dccp_v4_init_sock, | ||
| 1273 | .setsockopt = dccp_setsockopt, | ||
| 1274 | .getsockopt = dccp_getsockopt, | ||
| 1275 | .sendmsg = dccp_sendmsg, | ||
| 1276 | .recvmsg = dccp_recvmsg, | ||
| 1277 | .backlog_rcv = dccp_v4_do_rcv, | ||
| 1278 | .hash = dccp_v4_hash, | ||
| 1279 | .unhash = dccp_v4_unhash, | ||
| 1280 | .accept = inet_csk_accept, | ||
| 1281 | .get_port = dccp_v4_get_port, | ||
| 1282 | .shutdown = dccp_shutdown, | ||
| 1283 | .destroy = dccp_v4_destroy_sock, | ||
| 1284 | .orphan_count = &dccp_orphan_count, | ||
| 1285 | .max_header = MAX_DCCP_HEADER, | ||
| 1286 | .obj_size = sizeof(struct dccp_sock), | ||
| 1287 | .rsk_prot = &dccp_request_sock_ops, | ||
| 1288 | .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ | ||
| 1289 | }; | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..810f0c293b85 --- /dev/null +++ b/net/dccp/minisocks.c | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/minisocks.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/skbuff.h> | ||
| 16 | #include <linux/timer.h> | ||
| 17 | |||
| 18 | #include <net/sock.h> | ||
| 19 | #include <net/xfrm.h> | ||
| 20 | #include <net/inet_timewait_sock.h> | ||
| 21 | |||
| 22 | #include "ccid.h" | ||
| 23 | #include "dccp.h" | ||
| 24 | |||
| 25 | void dccp_time_wait(struct sock *sk, int state, int timeo) | ||
| 26 | { | ||
| 27 | /* FIXME: Implement */ | ||
| 28 | dccp_pr_debug("Want to help? Start here\n"); | ||
| 29 | dccp_set_state(sk, state); | ||
| 30 | } | ||
| 31 | |||
| 32 | /* This is for handling early-kills of TIME_WAIT sockets. */ | ||
| 33 | void dccp_tw_deschedule(struct inet_timewait_sock *tw) | ||
| 34 | { | ||
| 35 | dccp_pr_debug("Want to help? Start here\n"); | ||
| 36 | __inet_twsk_kill(tw, &dccp_hashinfo); | ||
| 37 | } | ||
| 38 | |||
| 39 | struct sock *dccp_create_openreq_child(struct sock *sk, | ||
| 40 | const struct request_sock *req, | ||
| 41 | const struct sk_buff *skb) | ||
| 42 | { | ||
| 43 | /* | ||
| 44 | * Step 3: Process LISTEN state | ||
| 45 | * | ||
| 46 | * // Generate a new socket and switch to that socket | ||
| 47 | * Set S := new socket for this port pair | ||
| 48 | */ | ||
| 49 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); | ||
| 50 | |||
| 51 | if (newsk != NULL) { | ||
| 52 | const struct dccp_request_sock *dreq = dccp_rsk(req); | ||
| 53 | struct inet_connection_sock *newicsk = inet_csk(sk); | ||
| 54 | struct dccp_sock *newdp = dccp_sk(newsk); | ||
| 55 | |||
| 56 | newdp->dccps_hc_rx_ackpkts = NULL; | ||
| 57 | newdp->dccps_role = DCCP_ROLE_SERVER; | ||
| 58 | newicsk->icsk_rto = TCP_TIMEOUT_INIT; | ||
| 59 | |||
| 60 | if (newdp->dccps_options.dccpo_send_ack_vector) { | ||
| 61 | newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
| 62 | GFP_ATOMIC); | ||
| 63 | /* | ||
| 64 | * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone | ||
| 65 | * copied the master sock and left the CCID pointers for this child, | ||
| 66 | * that is why we do the __ccid_get calls. | ||
| 67 | */ | ||
| 68 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | ||
| 69 | goto out_free; | ||
| 70 | } | ||
| 71 | |||
| 72 | if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || | ||
| 73 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { | ||
| 74 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | ||
| 75 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | ||
| 76 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | ||
| 77 | out_free: | ||
| 78 | /* It is still raw copy of parent, so invalidate | ||
| 79 | * destructor and make plain sk_free() */ | ||
| 80 | newsk->sk_destruct = NULL; | ||
| 81 | sk_free(newsk); | ||
| 82 | return NULL; | ||
| 83 | } | ||
| 84 | |||
| 85 | __ccid_get(newdp->dccps_hc_rx_ccid); | ||
| 86 | __ccid_get(newdp->dccps_hc_tx_ccid); | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Step 3: Process LISTEN state | ||
| 90 | * | ||
| 91 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
| 92 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
| 93 | */ | ||
| 94 | |||
| 95 | /* See dccp_v4_conn_request */ | ||
| 96 | newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; | ||
| 97 | |||
| 98 | newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; | ||
| 99 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
| 100 | |||
| 101 | newdp->dccps_iss = dreq->dreq_iss; | ||
| 102 | dccp_update_gss(newsk, dreq->dreq_iss); | ||
| 103 | |||
| 104 | dccp_init_xmit_timers(newsk); | ||
| 105 | |||
| 106 | DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); | ||
| 107 | } | ||
| 108 | return newsk; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* | ||
| 112 | * Process an incoming packet for RESPOND sockets represented | ||
| 113 | * as an request_sock. | ||
| 114 | */ | ||
| 115 | struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
| 116 | struct request_sock *req, | ||
| 117 | struct request_sock **prev) | ||
| 118 | { | ||
| 119 | struct sock *child = NULL; | ||
| 120 | |||
| 121 | /* Check for retransmitted REQUEST */ | ||
| 122 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | ||
| 123 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { | ||
| 124 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
| 125 | |||
| 126 | dccp_pr_debug("Retransmitted REQUEST\n"); | ||
| 127 | /* Send another RESPONSE packet */ | ||
| 128 | dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); | ||
| 129 | dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 130 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | ||
| 131 | } | ||
| 132 | /* Network Duplicate, discard packet */ | ||
| 133 | return NULL; | ||
| 134 | } | ||
| 135 | |||
| 136 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; | ||
| 137 | |||
| 138 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && | ||
| 139 | dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) | ||
| 140 | goto drop; | ||
| 141 | |||
| 142 | /* Invalid ACK */ | ||
| 143 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { | ||
| 144 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", | ||
| 145 | DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); | ||
| 146 | goto drop; | ||
| 147 | } | ||
| 148 | |||
| 149 | child = dccp_v4_request_recv_sock(sk, skb, req, NULL); | ||
| 150 | if (child == NULL) | ||
| 151 | goto listen_overflow; | ||
| 152 | |||
| 153 | /* FIXME: deal with options */ | ||
| 154 | |||
| 155 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
| 156 | inet_csk_reqsk_queue_removed(sk, req); | ||
| 157 | inet_csk_reqsk_queue_add(sk, req, child); | ||
| 158 | out: | ||
| 159 | return child; | ||
| 160 | listen_overflow: | ||
| 161 | dccp_pr_debug("listen_overflow!\n"); | ||
| 162 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
| 163 | drop: | ||
| 164 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) | ||
| 165 | req->rsk_ops->send_reset(skb); | ||
| 166 | |||
| 167 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
| 168 | goto out; | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | * Queue segment on the new socket if the new socket is active, | ||
| 173 | * otherwise we just shortcircuit this and continue with | ||
| 174 | * the new socket. | ||
| 175 | */ | ||
| 176 | int dccp_child_process(struct sock *parent, struct sock *child, | ||
| 177 | struct sk_buff *skb) | ||
| 178 | { | ||
| 179 | int ret = 0; | ||
| 180 | const int state = child->sk_state; | ||
| 181 | |||
| 182 | if (!sock_owned_by_user(child)) { | ||
| 183 | ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); | ||
| 184 | |||
| 185 | /* Wakeup parent, send SIGIO */ | ||
| 186 | if (state == DCCP_RESPOND && child->sk_state != state) | ||
| 187 | parent->sk_data_ready(parent, 0); | ||
| 188 | } else { | ||
| 189 | /* Alas, it is possible again, because we do lookup | ||
| 190 | * in main socket hash table and lock on listening | ||
| 191 | * socket does not protect us more. | ||
| 192 | */ | ||
| 193 | sk_add_backlog(child, skb); | ||
| 194 | } | ||
| 195 | |||
| 196 | bh_unlock_sock(child); | ||
| 197 | sock_put(child); | ||
| 198 | return ret; | ||
| 199 | } | ||
diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..e1867767946c --- /dev/null +++ b/net/dccp/options.c | |||
| @@ -0,0 +1,763 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/options.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> | ||
| 6 | * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License | ||
| 10 | * as published by the Free Software Foundation; either version | ||
| 11 | * 2 of the License, or (at your option) any later version. | ||
| 12 | */ | ||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/types.h> | ||
| 17 | #include <linux/kernel.h> | ||
| 18 | #include <linux/skbuff.h> | ||
| 19 | |||
| 20 | #include "ccid.h" | ||
| 21 | #include "dccp.h" | ||
| 22 | |||
| 23 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
| 24 | struct sock *sk, | ||
| 25 | const u64 ackno, | ||
| 26 | const unsigned char len, | ||
| 27 | const unsigned char *vector); | ||
| 28 | |||
| 29 | /* stores the default values for new connection. may be changed with sysctl */ | ||
| 30 | static const struct dccp_options dccpo_default_values = { | ||
| 31 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | ||
| 32 | .dccpo_ccid = DCCPF_INITIAL_CCID, | ||
| 33 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | ||
| 34 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | ||
| 35 | }; | ||
| 36 | |||
| 37 | void dccp_options_init(struct dccp_options *dccpo) | ||
| 38 | { | ||
| 39 | memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); | ||
| 40 | } | ||
| 41 | |||
| 42 | static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) | ||
| 43 | { | ||
| 44 | u32 value = 0; | ||
| 45 | |||
| 46 | if (len > 3) | ||
| 47 | value += *bf++ << 24; | ||
| 48 | if (len > 2) | ||
| 49 | value += *bf++ << 16; | ||
| 50 | if (len > 1) | ||
| 51 | value += *bf++ << 8; | ||
| 52 | if (len > 0) | ||
| 53 | value += *bf; | ||
| 54 | |||
| 55 | return value; | ||
| 56 | } | ||
| 57 | |||
| 58 | int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | ||
| 59 | { | ||
| 60 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 61 | #ifdef DCCP_DEBUG | ||
| 62 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : | ||
| 63 | "server rx opt: "; | ||
| 64 | #endif | ||
| 65 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
| 66 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | ||
| 67 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
| 68 | unsigned char *opt_ptr = options; | ||
| 69 | const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); | ||
| 70 | struct dccp_options_received *opt_recv = &dp->dccps_options_received; | ||
| 71 | unsigned char opt, len; | ||
| 72 | unsigned char *value; | ||
| 73 | |||
| 74 | memset(opt_recv, 0, sizeof(*opt_recv)); | ||
| 75 | |||
| 76 | while (opt_ptr != opt_end) { | ||
| 77 | opt = *opt_ptr++; | ||
| 78 | len = 0; | ||
| 79 | value = NULL; | ||
| 80 | |||
| 81 | /* Check if this isn't a single byte option */ | ||
| 82 | if (opt > DCCPO_MAX_RESERVED) { | ||
| 83 | if (opt_ptr == opt_end) | ||
| 84 | goto out_invalid_option; | ||
| 85 | |||
| 86 | len = *opt_ptr++; | ||
| 87 | if (len < 3) | ||
| 88 | goto out_invalid_option; | ||
| 89 | /* | ||
| 90 | * Remove the type and len fields, leaving | ||
| 91 | * just the value size | ||
| 92 | */ | ||
| 93 | len -= 2; | ||
| 94 | value = opt_ptr; | ||
| 95 | opt_ptr += len; | ||
| 96 | |||
| 97 | if (opt_ptr > opt_end) | ||
| 98 | goto out_invalid_option; | ||
| 99 | } | ||
| 100 | |||
| 101 | switch (opt) { | ||
| 102 | case DCCPO_PADDING: | ||
| 103 | break; | ||
| 104 | case DCCPO_NDP_COUNT: | ||
| 105 | if (len > 3) | ||
| 106 | goto out_invalid_option; | ||
| 107 | |||
| 108 | opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); | ||
| 109 | dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); | ||
| 110 | break; | ||
| 111 | case DCCPO_ACK_VECTOR_0: | ||
| 112 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | ||
| 113 | goto out_invalid_option; | ||
| 114 | |||
| 115 | if (pkt_type == DCCP_PKT_DATA) | ||
| 116 | continue; | ||
| 117 | |||
| 118 | opt_recv->dccpor_ack_vector_len = len; | ||
| 119 | opt_recv->dccpor_ack_vector_idx = value - options; | ||
| 120 | |||
| 121 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
| 122 | debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
| 123 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 124 | value, len); | ||
| 125 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, | ||
| 126 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 127 | len, value); | ||
| 128 | break; | ||
| 129 | case DCCPO_TIMESTAMP: | ||
| 130 | if (len != 4) | ||
| 131 | goto out_invalid_option; | ||
| 132 | |||
| 133 | opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); | ||
| 134 | |||
| 135 | dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; | ||
| 136 | dp->dccps_timestamp_time = jiffies; | ||
| 137 | |||
| 138 | dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", | ||
| 139 | debug_prefix, opt_recv->dccpor_timestamp, | ||
| 140 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
| 141 | break; | ||
| 142 | case DCCPO_TIMESTAMP_ECHO: | ||
| 143 | if (len < 4 || len > 8) | ||
| 144 | goto out_invalid_option; | ||
| 145 | |||
| 146 | opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); | ||
| 147 | |||
| 148 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", | ||
| 149 | debug_prefix, opt_recv->dccpor_timestamp_echo, | ||
| 150 | len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 151 | tcp_time_stamp - opt_recv->dccpor_timestamp_echo); | ||
| 152 | |||
| 153 | opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); | ||
| 154 | dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, | ||
| 155 | opt_recv->dccpor_elapsed_time); | ||
| 156 | break; | ||
| 157 | case DCCPO_ELAPSED_TIME: | ||
| 158 | if (len > 4) | ||
| 159 | goto out_invalid_option; | ||
| 160 | |||
| 161 | if (pkt_type == DCCP_PKT_DATA) | ||
| 162 | continue; | ||
| 163 | opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); | ||
| 164 | dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, | ||
| 165 | opt_recv->dccpor_elapsed_time); | ||
| 166 | break; | ||
| 167 | /* | ||
| 168 | * From draft-ietf-dccp-spec-11.txt: | ||
| 169 | * | ||
| 170 | * Option numbers 128 through 191 are for options sent from the HC- | ||
| 171 | * Sender to the HC-Receiver; option numbers 192 through 255 are for | ||
| 172 | * options sent from the HC-Receiver to the HC-Sender. | ||
| 173 | */ | ||
| 174 | case 128 ... 191: { | ||
| 175 | const u16 idx = value - options; | ||
| 176 | |||
| 177 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) | ||
| 178 | goto out_invalid_option; | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | case 192 ... 255: { | ||
| 182 | const u16 idx = value - options; | ||
| 183 | |||
| 184 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) | ||
| 185 | goto out_invalid_option; | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | default: | ||
| 189 | pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", | ||
| 190 | sk, opt, len); | ||
| 191 | break; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | return 0; | ||
| 196 | |||
| 197 | out_invalid_option: | ||
| 198 | DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); | ||
| 199 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; | ||
| 200 | pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); | ||
| 201 | return -1; | ||
| 202 | } | ||
| 203 | |||
| 204 | static void dccp_encode_value_var(const u32 value, unsigned char *to, | ||
| 205 | const unsigned int len) | ||
| 206 | { | ||
| 207 | if (len > 3) | ||
| 208 | *to++ = (value & 0xFF000000) >> 24; | ||
| 209 | if (len > 2) | ||
| 210 | *to++ = (value & 0xFF0000) >> 16; | ||
| 211 | if (len > 1) | ||
| 212 | *to++ = (value & 0xFF00) >> 8; | ||
| 213 | if (len > 0) | ||
| 214 | *to++ = (value & 0xFF); | ||
| 215 | } | ||
| 216 | |||
| 217 | static inline int dccp_ndp_len(const int ndp) | ||
| 218 | { | ||
| 219 | return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; | ||
| 220 | } | ||
| 221 | |||
| 222 | void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
| 223 | const unsigned char option, | ||
| 224 | const void *value, const unsigned char len) | ||
| 225 | { | ||
| 226 | unsigned char *to; | ||
| 227 | |||
| 228 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { | ||
| 229 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); | ||
| 230 | return; | ||
| 231 | } | ||
| 232 | |||
| 233 | DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; | ||
| 234 | |||
| 235 | to = skb_push(skb, len + 2); | ||
| 236 | *to++ = option; | ||
| 237 | *to++ = len + 2; | ||
| 238 | |||
| 239 | memcpy(to, value, len); | ||
| 240 | } | ||
| 241 | |||
| 242 | EXPORT_SYMBOL_GPL(dccp_insert_option); | ||
| 243 | |||
| 244 | static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) | ||
| 245 | { | ||
| 246 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 247 | int ndp = dp->dccps_ndp_count; | ||
| 248 | |||
| 249 | if (dccp_non_data_packet(skb)) | ||
| 250 | ++dp->dccps_ndp_count; | ||
| 251 | else | ||
| 252 | dp->dccps_ndp_count = 0; | ||
| 253 | |||
| 254 | if (ndp > 0) { | ||
| 255 | unsigned char *ptr; | ||
| 256 | const int ndp_len = dccp_ndp_len(ndp); | ||
| 257 | const int len = ndp_len + 2; | ||
| 258 | |||
| 259 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
| 260 | return; | ||
| 261 | |||
| 262 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 263 | |||
| 264 | ptr = skb_push(skb, len); | ||
| 265 | *ptr++ = DCCPO_NDP_COUNT; | ||
| 266 | *ptr++ = len; | ||
| 267 | dccp_encode_value_var(ndp, ptr, ndp_len); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | static inline int dccp_elapsed_time_len(const u32 elapsed_time) | ||
| 272 | { | ||
| 273 | return elapsed_time == 0 ? 0 : | ||
| 274 | elapsed_time <= 0xFF ? 1 : | ||
| 275 | elapsed_time <= 0xFFFF ? 2 : | ||
| 276 | elapsed_time <= 0xFFFFFF ? 3 : 4; | ||
| 277 | } | ||
| 278 | |||
| 279 | void dccp_insert_option_elapsed_time(struct sock *sk, | ||
| 280 | struct sk_buff *skb, | ||
| 281 | u32 elapsed_time) | ||
| 282 | { | ||
| 283 | #ifdef DCCP_DEBUG | ||
| 284 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 285 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
| 286 | "server TX opt: "; | ||
| 287 | #endif | ||
| 288 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
| 289 | const int len = 2 + elapsed_time_len; | ||
| 290 | unsigned char *to; | ||
| 291 | |||
| 292 | /* If elapsed_time == 0... */ | ||
| 293 | if (elapsed_time_len == 2) | ||
| 294 | return; | ||
| 295 | |||
| 296 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
| 297 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); | ||
| 298 | return; | ||
| 299 | } | ||
| 300 | |||
| 301 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 302 | |||
| 303 | to = skb_push(skb, len); | ||
| 304 | *to++ = DCCPO_ELAPSED_TIME; | ||
| 305 | *to++ = len; | ||
| 306 | |||
| 307 | dccp_encode_value_var(elapsed_time, to, elapsed_time_len); | ||
| 308 | |||
| 309 | dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", | ||
| 310 | debug_prefix, elapsed_time, | ||
| 311 | len, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 312 | } | ||
| 313 | |||
| 314 | EXPORT_SYMBOL(dccp_insert_option_elapsed_time); | ||
| 315 | |||
| 316 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
| 317 | { | ||
| 318 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 319 | #ifdef DCCP_DEBUG | ||
| 320 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
| 321 | "server TX opt: "; | ||
| 322 | #endif | ||
| 323 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 324 | int len = ap->dccpap_buf_vector_len + 2; | ||
| 325 | const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; | ||
| 326 | unsigned char *to, *from; | ||
| 327 | |||
| 328 | if (elapsed_time != 0) | ||
| 329 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
| 330 | |||
| 331 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
| 332 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); | ||
| 333 | return; | ||
| 334 | } | ||
| 335 | |||
| 336 | /* | ||
| 337 | * XXX: now we have just one ack vector sent record, so | ||
| 338 | * we have to wait for it to be cleared. | ||
| 339 | * | ||
| 340 | * Of course this is not acceptable, but this is just for | ||
| 341 | * basic testing now. | ||
| 342 | */ | ||
| 343 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
| 344 | return; | ||
| 345 | |||
| 346 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 347 | |||
| 348 | to = skb_push(skb, len); | ||
| 349 | *to++ = DCCPO_ACK_VECTOR_0; | ||
| 350 | *to++ = len; | ||
| 351 | |||
| 352 | len = ap->dccpap_buf_vector_len; | ||
| 353 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
| 354 | |||
| 355 | /* Check if buf_head wraps */ | ||
| 356 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
| 357 | const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; | ||
| 358 | |||
| 359 | memcpy(to, from, tailsize); | ||
| 360 | to += tailsize; | ||
| 361 | len -= tailsize; | ||
| 362 | from = ap->dccpap_buf; | ||
| 363 | } | ||
| 364 | |||
| 365 | memcpy(to, from, len); | ||
| 366 | /* | ||
| 367 | * From draft-ietf-dccp-spec-11.txt: | ||
| 368 | * | ||
| 369 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
| 370 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
| 371 | * sequence number it used for the ack packet; ack_ptr will equal | ||
| 372 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal | ||
| 373 | * buf_nonce. | ||
| 374 | * | ||
| 375 | * This implemention uses just one ack record for now. | ||
| 376 | */ | ||
| 377 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 378 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
| 379 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
| 380 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
| 381 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
| 382 | |||
| 383 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", | ||
| 384 | debug_prefix, ap->dccpap_ack_vector_len, | ||
| 385 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
| 386 | } | ||
| 387 | |||
| 388 | static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) | ||
| 389 | { | ||
| 390 | const u32 now = htonl(tcp_time_stamp); | ||
| 391 | dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | ||
| 392 | } | ||
| 393 | |||
| 394 | static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) | ||
| 395 | { | ||
| 396 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 397 | #ifdef DCCP_DEBUG | ||
| 398 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : | ||
| 399 | "server TX opt: "; | ||
| 400 | #endif | ||
| 401 | u32 tstamp_echo; | ||
| 402 | const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; | ||
| 403 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
| 404 | const int len = 6 + elapsed_time_len; | ||
| 405 | unsigned char *to; | ||
| 406 | |||
| 407 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
| 408 | LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); | ||
| 409 | return; | ||
| 410 | } | ||
| 411 | |||
| 412 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 413 | |||
| 414 | to = skb_push(skb, len); | ||
| 415 | *to++ = DCCPO_TIMESTAMP_ECHO; | ||
| 416 | *to++ = len; | ||
| 417 | |||
| 418 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
| 419 | memcpy(to, &tstamp_echo, 4); | ||
| 420 | to += 4; | ||
| 421 | dccp_encode_value_var(elapsed_time, to, elapsed_time_len); | ||
| 422 | |||
| 423 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", | ||
| 424 | debug_prefix, dp->dccps_timestamp_echo, | ||
| 425 | len, DCCP_SKB_CB(skb)->dccpd_seq); | ||
| 426 | |||
| 427 | dp->dccps_timestamp_echo = 0; | ||
| 428 | dp->dccps_timestamp_time = 0; | ||
| 429 | } | ||
| 430 | |||
| 431 | void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | ||
| 432 | { | ||
| 433 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 434 | |||
| 435 | DCCP_SKB_CB(skb)->dccpd_opt_len = 0; | ||
| 436 | |||
| 437 | if (dp->dccps_options.dccpo_send_ndp_count) | ||
| 438 | dccp_insert_option_ndp(sk, skb); | ||
| 439 | |||
| 440 | if (!dccp_packet_without_ack(skb)) { | ||
| 441 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
| 442 | dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) | ||
| 443 | dccp_insert_option_ack_vector(sk, skb); | ||
| 444 | |||
| 445 | dccp_insert_option_timestamp(sk, skb); | ||
| 446 | if (dp->dccps_timestamp_echo != 0) | ||
| 447 | dccp_insert_option_timestamp_echo(sk, skb); | ||
| 448 | } | ||
| 449 | |||
| 450 | ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); | ||
| 451 | ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); | ||
| 452 | |||
| 453 | /* XXX: insert other options when appropriate */ | ||
| 454 | |||
| 455 | if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { | ||
| 456 | /* The length of all options has to be a multiple of 4 */ | ||
| 457 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | ||
| 458 | |||
| 459 | if (padding != 0) { | ||
| 460 | padding = 4 - padding; | ||
| 461 | memset(skb_push(skb, padding), 0, padding); | ||
| 462 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | |||
| 467 | struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) | ||
| 468 | { | ||
| 469 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
| 470 | |||
| 471 | if (ap != NULL) { | ||
| 472 | #ifdef DCCP_DEBUG | ||
| 473 | memset(ap->dccpap_buf, 0xFF, len); | ||
| 474 | #endif | ||
| 475 | ap->dccpap_buf_len = len; | ||
| 476 | ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; | ||
| 477 | ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 478 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
| 479 | ap->dccpap_ack_ptr = 0; | ||
| 480 | ap->dccpap_time = 0; | ||
| 481 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
| 482 | } | ||
| 483 | |||
| 484 | return ap; | ||
| 485 | } | ||
| 486 | |||
| 487 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
| 488 | { | ||
| 489 | if (ap != NULL) { | ||
| 490 | #ifdef DCCP_DEBUG | ||
| 491 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
| 492 | #endif | ||
| 493 | kfree(ap); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
| 498 | const unsigned int index) | ||
| 499 | { | ||
| 500 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
| 501 | } | ||
| 502 | |||
| 503 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
| 504 | const unsigned int index) | ||
| 505 | { | ||
| 506 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* | ||
| 510 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
| 511 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
| 512 | * this simplifies table updates if one of the missing packets arrives. | ||
| 513 | */ | ||
| 514 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
| 515 | const unsigned int packets, | ||
| 516 | const unsigned char state) | ||
| 517 | { | ||
| 518 | unsigned int gap; | ||
| 519 | signed long new_head; | ||
| 520 | |||
| 521 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
| 522 | return -ENOBUFS; | ||
| 523 | |||
| 524 | gap = packets - 1; | ||
| 525 | new_head = ap->dccpap_buf_head - packets; | ||
| 526 | |||
| 527 | if (new_head < 0) { | ||
| 528 | if (gap > 0) { | ||
| 529 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
| 530 | gap + new_head + 1); | ||
| 531 | gap = -new_head; | ||
| 532 | } | ||
| 533 | new_head += ap->dccpap_buf_len; | ||
| 534 | } | ||
| 535 | |||
| 536 | ap->dccpap_buf_head = new_head; | ||
| 537 | |||
| 538 | if (gap > 0) | ||
| 539 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
| 540 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
| 541 | |||
| 542 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
| 543 | ap->dccpap_buf_vector_len += packets; | ||
| 544 | return 0; | ||
| 545 | } | ||
| 546 | |||
| 547 | /* | ||
| 548 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
| 549 | */ | ||
| 550 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) | ||
| 551 | { | ||
| 552 | /* | ||
| 553 | * Check at the right places if the buffer is full, if it is, tell the | ||
| 554 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
| 555 | * vectors, when we will free up space in dccpap_buf. | ||
| 556 | * | ||
| 557 | * We may well decide to do buffer compression, etc, but for now lets | ||
| 558 | * just drop. | ||
| 559 | * | ||
| 560 | * From Appendix A: | ||
| 561 | * | ||
| 562 | * Of course, the circular buffer may overflow, either when the HC- | ||
| 563 | * Sender is sending data at a very high rate, when the HC-Receiver's | ||
| 564 | * acknowledgements are not reaching the HC-Sender, or when the HC- | ||
| 565 | * Sender is forgetting to acknowledge those acks (so the HC-Receiver | ||
| 566 | * is unable to clean up old state). In this case, the HC-Receiver | ||
| 567 | * should either compress the buffer (by increasing run lengths when | ||
| 568 | * possible), transfer its state to a larger buffer, or, as a last | ||
| 569 | * resort, drop all received packets, without processing them | ||
| 570 | * whatsoever, until its buffer shrinks again. | ||
| 571 | */ | ||
| 572 | |||
| 573 | /* See if this is the first ackno being inserted */ | ||
| 574 | if (ap->dccpap_buf_vector_len == 0) { | ||
| 575 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
| 576 | ap->dccpap_buf_vector_len = 1; | ||
| 577 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
| 578 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); | ||
| 579 | |||
| 580 | /* | ||
| 581 | * Look if the state of this packet is the same as the previous ackno | ||
| 582 | * and if so if we can bump the head len. | ||
| 583 | */ | ||
| 584 | if (delta == 1 && | ||
| 585 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
| 586 | dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) | ||
| 587 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
| 588 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
| 589 | return -ENOBUFS; | ||
| 590 | } else { | ||
| 591 | /* | ||
| 592 | * A.1.2. Old Packets | ||
| 593 | * | ||
| 594 | * When a packet with Sequence Number S arrives, and S <= buf_ackno, | ||
| 595 | * the HC-Receiver will scan the table for the byte corresponding to S. | ||
| 596 | * (Indexing structures could reduce the complexity of this scan.) | ||
| 597 | */ | ||
| 598 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
| 599 | unsigned int index = ap->dccpap_buf_head; | ||
| 600 | |||
| 601 | while (1) { | ||
| 602 | const u8 len = dccp_ackpkts_len(ap, index); | ||
| 603 | const u8 state = dccp_ackpkts_state(ap, index); | ||
| 604 | /* | ||
| 605 | * valid packets not yet in dccpap_buf have a reserved entry, with | ||
| 606 | * a len equal to 0 | ||
| 607 | */ | ||
| 608 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
| 609 | len == 0 && delta == 0) { /* Found our reserved seat! */ | ||
| 610 | dccp_pr_debug("Found %llu reserved seat!\n", ackno); | ||
| 611 | ap->dccpap_buf[index] = state; | ||
| 612 | goto out; | ||
| 613 | } | ||
| 614 | /* len == 0 means one packet */ | ||
| 615 | if (delta < len + 1) | ||
| 616 | goto out_duplicate; | ||
| 617 | |||
| 618 | delta -= len + 1; | ||
| 619 | if (++index == ap->dccpap_buf_len) | ||
| 620 | index = 0; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 624 | ap->dccpap_buf_ackno = ackno; | ||
| 625 | ap->dccpap_time = jiffies; | ||
| 626 | out: | ||
| 627 | dccp_pr_debug(""); | ||
| 628 | dccp_ackpkts_print(ap); | ||
| 629 | return 0; | ||
| 630 | |||
| 631 | out_duplicate: | ||
| 632 | /* Duplicate packet */ | ||
| 633 | dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); | ||
| 634 | return -EILSEQ; | ||
| 635 | } | ||
| 636 | |||
| 637 | #ifdef DCCP_DEBUG | ||
| 638 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) | ||
| 639 | { | ||
| 640 | if (!dccp_debug) | ||
| 641 | return; | ||
| 642 | |||
| 643 | printk("ACK vector len=%d, ackno=%llu |", len, ackno); | ||
| 644 | |||
| 645 | while (len--) { | ||
| 646 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
| 647 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
| 648 | |||
| 649 | printk("%d,%d|", state, rl); | ||
| 650 | ++vector; | ||
| 651 | } | ||
| 652 | |||
| 653 | printk("\n"); | ||
| 654 | } | ||
| 655 | |||
| 656 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
| 657 | { | ||
| 658 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
| 659 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
| 660 | ap->dccpap_buf_vector_len); | ||
| 661 | } | ||
| 662 | #endif | ||
| 663 | |||
| 664 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
| 665 | { | ||
| 666 | /* | ||
| 667 | * As we're keeping track of the ack vector size | ||
| 668 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
| 669 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
| 670 | * keep this code here as in the future we'll implement a vector of ack | ||
| 671 | * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme | ||
| 672 | */ | ||
| 673 | #if 0 | ||
| 674 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
| 675 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
| 676 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
| 677 | #endif | ||
| 678 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
| 679 | } | ||
| 680 | |||
| 681 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
| 682 | u64 ackno) | ||
| 683 | { | ||
| 684 | /* Check if we actually sent an ACK vector */ | ||
| 685 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 686 | return; | ||
| 687 | |||
| 688 | if (ackno == ap->dccpap_ack_seqno) { | ||
| 689 | #ifdef DCCP_DEBUG | ||
| 690 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 691 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : | ||
| 692 | "server rx ack: "; | ||
| 693 | #endif | ||
| 694 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", | ||
| 695 | debug_prefix, 1, | ||
| 696 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
| 697 | dccp_ackpkts_trow_away_ack_record(ap); | ||
| 698 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
| 703 | struct sock *sk, u64 ackno, | ||
| 704 | const unsigned char len, | ||
| 705 | const unsigned char *vector) | ||
| 706 | { | ||
| 707 | unsigned char i; | ||
| 708 | |||
| 709 | /* Check if we actually sent an ACK vector */ | ||
| 710 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 711 | return; | ||
| 712 | /* | ||
| 713 | * We're in the receiver half connection, so if the received an ACK vector | ||
| 714 | * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. | ||
| 715 | * | ||
| 716 | * Extra explanation with example: | ||
| 717 | * | ||
| 718 | * if we received an ACK vector with ackno 50, it can only be acking | ||
| 719 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
| 720 | */ | ||
| 721 | // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); | ||
| 722 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
| 723 | // dccp_pr_debug_cat("yes\n"); | ||
| 724 | return; | ||
| 725 | } | ||
| 726 | // dccp_pr_debug_cat("no\n"); | ||
| 727 | |||
| 728 | i = len; | ||
| 729 | while (i--) { | ||
| 730 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
| 731 | u64 ackno_end_rl; | ||
| 732 | |||
| 733 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
| 734 | |||
| 735 | // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); | ||
| 736 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
| 737 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
| 738 | // dccp_pr_debug_cat("yes\n"); | ||
| 739 | |||
| 740 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
| 741 | #ifdef DCCP_DEBUG | ||
| 742 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 743 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : | ||
| 744 | "server rx ack: "; | ||
| 745 | #endif | ||
| 746 | dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", | ||
| 747 | debug_prefix, len, | ||
| 748 | ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); | ||
| 749 | dccp_ackpkts_trow_away_ack_record(ap); | ||
| 750 | } | ||
| 751 | /* | ||
| 752 | * If dccpap_ack_seqno was not received, no problem we'll | ||
| 753 | * send another ACK vector. | ||
| 754 | */ | ||
| 755 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 756 | break; | ||
| 757 | } | ||
| 758 | // dccp_pr_debug_cat("no\n"); | ||
| 759 | |||
| 760 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
| 761 | ++vector; | ||
| 762 | } | ||
| 763 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..22ca2910d4f2 --- /dev/null +++ b/net/dccp/output.c | |||
| @@ -0,0 +1,406 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/output.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/skbuff.h> | ||
| 16 | |||
| 17 | #include <net/sock.h> | ||
| 18 | |||
| 19 | #include "ccid.h" | ||
| 20 | #include "dccp.h" | ||
| 21 | |||
| 22 | static inline void dccp_event_ack_sent(struct sock *sk) | ||
| 23 | { | ||
| 24 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
| 25 | } | ||
| 26 | |||
| 27 | /* | ||
| 28 | * All SKB's seen here are completely headerless. It is our | ||
| 29 | * job to build the DCCP header, and pass the packet down to | ||
| 30 | * IP so it can do the same plus pass the packet off to the | ||
| 31 | * device. | ||
| 32 | */ | ||
| 33 | int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | ||
| 34 | { | ||
| 35 | if (likely(skb != NULL)) { | ||
| 36 | const struct inet_sock *inet = inet_sk(sk); | ||
| 37 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 38 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
| 39 | struct dccp_hdr *dh; | ||
| 40 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
| 41 | const int dccp_header_size = sizeof(*dh) + | ||
| 42 | sizeof(struct dccp_hdr_ext) + | ||
| 43 | dccp_packet_hdr_len(dcb->dccpd_type); | ||
| 44 | int err, set_ack = 1; | ||
| 45 | u64 ackno = dp->dccps_gsr; | ||
| 46 | |||
| 47 | /* | ||
| 48 | * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing | ||
| 49 | * to do here... | ||
| 50 | */ | ||
| 51 | dccp_inc_seqno(&dp->dccps_gss); | ||
| 52 | |||
| 53 | dcb->dccpd_seq = dp->dccps_gss; | ||
| 54 | dccp_insert_options(sk, skb); | ||
| 55 | |||
| 56 | switch (dcb->dccpd_type) { | ||
| 57 | case DCCP_PKT_DATA: | ||
| 58 | set_ack = 0; | ||
| 59 | break; | ||
| 60 | case DCCP_PKT_SYNC: | ||
| 61 | case DCCP_PKT_SYNCACK: | ||
| 62 | ackno = dcb->dccpd_seq; | ||
| 63 | break; | ||
| 64 | } | ||
| 65 | |||
| 66 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
| 67 | dh = dccp_hdr(skb); | ||
| 68 | /* Data packets are not cloned as they are never retransmitted */ | ||
| 69 | if (skb_cloned(skb)) | ||
| 70 | skb_set_owner_w(skb, sk); | ||
| 71 | |||
| 72 | /* Build DCCP header and checksum it. */ | ||
| 73 | memset(dh, 0, dccp_header_size); | ||
| 74 | dh->dccph_type = dcb->dccpd_type; | ||
| 75 | dh->dccph_sport = inet->sport; | ||
| 76 | dh->dccph_dport = inet->dport; | ||
| 77 | dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; | ||
| 78 | dh->dccph_ccval = dcb->dccpd_ccval; | ||
| 79 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
| 80 | dh->dccph_x = 1; | ||
| 81 | |||
| 82 | dp->dccps_awh = dp->dccps_gss; | ||
| 83 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
| 84 | if (set_ack) | ||
| 85 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); | ||
| 86 | |||
| 87 | switch (dcb->dccpd_type) { | ||
| 88 | case DCCP_PKT_REQUEST: | ||
| 89 | dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; | ||
| 90 | break; | ||
| 91 | case DCCP_PKT_RESET: | ||
| 92 | dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; | ||
| 93 | break; | ||
| 94 | } | ||
| 95 | |||
| 96 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
| 97 | |||
| 98 | if (dcb->dccpd_type == DCCP_PKT_ACK || | ||
| 99 | dcb->dccpd_type == DCCP_PKT_DATAACK) | ||
| 100 | dccp_event_ack_sent(sk); | ||
| 101 | |||
| 102 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
| 103 | |||
| 104 | err = ip_queue_xmit(skb, 0); | ||
| 105 | if (err <= 0) | ||
| 106 | return err; | ||
| 107 | |||
| 108 | /* NET_XMIT_CN is special. It does not guarantee, | ||
| 109 | * that this packet is lost. It tells that device | ||
| 110 | * is about to start to drop packets or already | ||
| 111 | * drops some packets of the same priority and | ||
| 112 | * invokes us to send less aggressively. | ||
| 113 | */ | ||
| 114 | return err == NET_XMIT_CN ? 0 : err; | ||
| 115 | } | ||
| 116 | return -ENOBUFS; | ||
| 117 | } | ||
| 118 | |||
| 119 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | ||
| 120 | { | ||
| 121 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 122 | int mss_now; | ||
| 123 | |||
| 124 | /* | ||
| 125 | * FIXME: we really should be using the af_specific thing to support IPv6. | ||
| 126 | * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
| 127 | */ | ||
| 128 | mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
| 129 | |||
| 130 | /* Now subtract optional transport overhead */ | ||
| 131 | mss_now -= dp->dccps_ext_header_len; | ||
| 132 | |||
| 133 | /* | ||
| 134 | * FIXME: this should come from the CCID infrastructure, where, say, | ||
| 135 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | ||
| 136 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | ||
| 137 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | ||
| 138 | * make it a multiple of 4 | ||
| 139 | */ | ||
| 140 | |||
| 141 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | ||
| 142 | |||
| 143 | /* And store cached results */ | ||
| 144 | dp->dccps_pmtu_cookie = pmtu; | ||
| 145 | dp->dccps_mss_cache = mss_now; | ||
| 146 | |||
| 147 | return mss_now; | ||
| 148 | } | ||
| 149 | |||
| 150 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | ||
| 151 | { | ||
| 152 | if (inet_sk_rebuild_header(sk) != 0) | ||
| 153 | return -EHOSTUNREACH; /* Routing failure or similar. */ | ||
| 154 | |||
| 155 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? | ||
| 156 | pskb_copy(skb, GFP_ATOMIC): | ||
| 157 | skb_clone(skb, GFP_ATOMIC))); | ||
| 158 | } | ||
| 159 | |||
| 160 | struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | ||
| 161 | struct request_sock *req) | ||
| 162 | { | ||
| 163 | struct dccp_hdr *dh; | ||
| 164 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
| 165 | sizeof(struct dccp_hdr_ext) + | ||
| 166 | sizeof(struct dccp_hdr_response); | ||
| 167 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
| 168 | dccp_header_size, 1, | ||
| 169 | GFP_ATOMIC); | ||
| 170 | if (skb == NULL) | ||
| 171 | return NULL; | ||
| 172 | |||
| 173 | /* Reserve space for headers. */ | ||
| 174 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
| 175 | |||
| 176 | skb->dst = dst_clone(dst); | ||
| 177 | skb->csum = 0; | ||
| 178 | |||
| 179 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | ||
| 180 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | ||
| 181 | dccp_insert_options(sk, skb); | ||
| 182 | |||
| 183 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
| 184 | |||
| 185 | dh = dccp_hdr(skb); | ||
| 186 | memset(dh, 0, dccp_header_size); | ||
| 187 | |||
| 188 | dh->dccph_sport = inet_sk(sk)->sport; | ||
| 189 | dh->dccph_dport = inet_rsk(req)->rmt_port; | ||
| 190 | dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
| 191 | dh->dccph_type = DCCP_PKT_RESPONSE; | ||
| 192 | dh->dccph_x = 1; | ||
| 193 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | ||
| 194 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | ||
| 195 | |||
| 196 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
| 197 | |||
| 198 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
| 199 | return skb; | ||
| 200 | } | ||
| 201 | |||
| 202 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | ||
| 203 | const enum dccp_reset_codes code) | ||
| 204 | |||
| 205 | { | ||
| 206 | struct dccp_hdr *dh; | ||
| 207 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 208 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
| 209 | sizeof(struct dccp_hdr_ext) + | ||
| 210 | sizeof(struct dccp_hdr_reset); | ||
| 211 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
| 212 | dccp_header_size, 1, | ||
| 213 | GFP_ATOMIC); | ||
| 214 | if (skb == NULL) | ||
| 215 | return NULL; | ||
| 216 | |||
| 217 | /* Reserve space for headers. */ | ||
| 218 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
| 219 | |||
| 220 | skb->dst = dst_clone(dst); | ||
| 221 | skb->csum = 0; | ||
| 222 | |||
| 223 | dccp_inc_seqno(&dp->dccps_gss); | ||
| 224 | |||
| 225 | DCCP_SKB_CB(skb)->dccpd_reset_code = code; | ||
| 226 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; | ||
| 227 | DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; | ||
| 228 | dccp_insert_options(sk, skb); | ||
| 229 | |||
| 230 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
| 231 | |||
| 232 | dh = dccp_hdr(skb); | ||
| 233 | memset(dh, 0, dccp_header_size); | ||
| 234 | |||
| 235 | dh->dccph_sport = inet_sk(sk)->sport; | ||
| 236 | dh->dccph_dport = inet_sk(sk)->dport; | ||
| 237 | dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
| 238 | dh->dccph_type = DCCP_PKT_RESET; | ||
| 239 | dh->dccph_x = 1; | ||
| 240 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
| 241 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); | ||
| 242 | |||
| 243 | dccp_hdr_reset(skb)->dccph_reset_code = code; | ||
| 244 | |||
| 245 | dh->dccph_checksum = dccp_v4_checksum(skb); | ||
| 246 | |||
| 247 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
| 248 | return skb; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Do all connect socket setups that can be done AF independent. | ||
| 253 | */ | ||
| 254 | static inline void dccp_connect_init(struct sock *sk) | ||
| 255 | { | ||
| 256 | struct dst_entry *dst = __sk_dst_get(sk); | ||
| 257 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 258 | |||
| 259 | sk->sk_err = 0; | ||
| 260 | sock_reset_flag(sk, SOCK_DONE); | ||
| 261 | |||
| 262 | dccp_sync_mss(sk, dst_mtu(dst)); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * FIXME: set dp->{dccps_swh,dccps_swl}, with | ||
| 266 | * something like dccp_inc_seq | ||
| 267 | */ | ||
| 268 | |||
| 269 | icsk->icsk_retransmits = 0; | ||
| 270 | } | ||
| 271 | |||
| 272 | int dccp_connect(struct sock *sk) | ||
| 273 | { | ||
| 274 | struct sk_buff *skb; | ||
| 275 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 276 | |||
| 277 | dccp_connect_init(sk); | ||
| 278 | |||
| 279 | skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); | ||
| 280 | if (unlikely(skb == NULL)) | ||
| 281 | return -ENOBUFS; | ||
| 282 | |||
| 283 | /* Reserve space for headers. */ | ||
| 284 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
| 285 | |||
| 286 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | ||
| 287 | /* FIXME: set service to something meaningful, coming | ||
| 288 | * from userspace*/ | ||
| 289 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
| 290 | skb->csum = 0; | ||
| 291 | skb_set_owner_w(skb, sk); | ||
| 292 | |||
| 293 | BUG_TRAP(sk->sk_send_head == NULL); | ||
| 294 | sk->sk_send_head = skb; | ||
| 295 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
| 296 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | ||
| 297 | |||
| 298 | /* Timer for repeating the REQUEST until an answer. */ | ||
| 299 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | ||
| 300 | return 0; | ||
| 301 | } | ||
| 302 | |||
| 303 | void dccp_send_ack(struct sock *sk) | ||
| 304 | { | ||
| 305 | /* If we have been reset, we may not send again. */ | ||
| 306 | if (sk->sk_state != DCCP_CLOSED) { | ||
| 307 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
| 308 | |||
| 309 | if (skb == NULL) { | ||
| 310 | inet_csk_schedule_ack(sk); | ||
| 311 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | ||
| 312 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); | ||
| 313 | return; | ||
| 314 | } | ||
| 315 | |||
| 316 | /* Reserve space for headers */ | ||
| 317 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
| 318 | skb->csum = 0; | ||
| 319 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; | ||
| 320 | skb_set_owner_w(skb, sk); | ||
| 321 | dccp_transmit_skb(sk, skb); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | EXPORT_SYMBOL_GPL(dccp_send_ack); | ||
| 326 | |||
| 327 | void dccp_send_delayed_ack(struct sock *sk) | ||
| 328 | { | ||
| 329 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 330 | /* | ||
| 331 | * FIXME: tune this timer. elapsed time fixes the skew, so no problem | ||
| 332 | * with using 2s, and active senders also piggyback the ACK into a | ||
| 333 | * DATAACK packet, so this is really for quiescent senders. | ||
| 334 | */ | ||
| 335 | unsigned long timeout = jiffies + 2 * HZ; | ||
| 336 | |||
| 337 | /* Use new timeout only if there wasn't a older one earlier. */ | ||
| 338 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { | ||
| 339 | /* If delack timer was blocked or is about to expire, | ||
| 340 | * send ACK now. | ||
| 341 | * | ||
| 342 | * FIXME: check the "about to expire" part | ||
| 343 | */ | ||
| 344 | if (icsk->icsk_ack.blocked) { | ||
| 345 | dccp_send_ack(sk); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | |||
| 349 | if (!time_before(timeout, icsk->icsk_ack.timeout)) | ||
| 350 | timeout = icsk->icsk_ack.timeout; | ||
| 351 | } | ||
| 352 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; | ||
| 353 | icsk->icsk_ack.timeout = timeout; | ||
| 354 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); | ||
| 355 | } | ||
| 356 | |||
| 357 | void dccp_send_sync(struct sock *sk, u64 seq) | ||
| 358 | { | ||
| 359 | /* | ||
| 360 | * We are not putting this on the write queue, so | ||
| 361 | * dccp_transmit_skb() will set the ownership to this | ||
| 362 | * sock. | ||
| 363 | */ | ||
| 364 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
| 365 | |||
| 366 | if (skb == NULL) | ||
| 367 | /* FIXME: how to make sure the sync is sent? */ | ||
| 368 | return; | ||
| 369 | |||
| 370 | /* Reserve space for headers and prepare control bits. */ | ||
| 371 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
| 372 | skb->csum = 0; | ||
| 373 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; | ||
| 374 | DCCP_SKB_CB(skb)->dccpd_seq = seq; | ||
| 375 | |||
| 376 | skb_set_owner_w(skb, sk); | ||
| 377 | dccp_transmit_skb(sk, skb); | ||
| 378 | } | ||
| 379 | |||
| 380 | /* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be | ||
| 381 | * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. | ||
| 382 | */ | ||
| 383 | void dccp_send_close(struct sock *sk) | ||
| 384 | { | ||
| 385 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 386 | struct sk_buff *skb; | ||
| 387 | |||
| 388 | /* Socket is locked, keep trying until memory is available. */ | ||
| 389 | for (;;) { | ||
| 390 | skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); | ||
| 391 | if (skb != NULL) | ||
| 392 | break; | ||
| 393 | yield(); | ||
| 394 | } | ||
| 395 | |||
| 396 | /* Reserve space for headers and prepare control bits. */ | ||
| 397 | skb_reserve(skb, sk->sk_prot->max_header); | ||
| 398 | skb->csum = 0; | ||
| 399 | DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | ||
| 400 | |||
| 401 | skb_set_owner_w(skb, sk); | ||
| 402 | dccp_transmit_skb(sk, skb); | ||
| 403 | |||
| 404 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
| 405 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
| 406 | } | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..70284e6afe05 --- /dev/null +++ b/net/dccp/proto.c | |||
| @@ -0,0 +1,818 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/proto.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/config.h> | ||
| 13 | #include <linux/dccp.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/types.h> | ||
| 16 | #include <linux/sched.h> | ||
| 17 | #include <linux/kernel.h> | ||
| 18 | #include <linux/skbuff.h> | ||
| 19 | #include <linux/netdevice.h> | ||
| 20 | #include <linux/in.h> | ||
| 21 | #include <linux/if_arp.h> | ||
| 22 | #include <linux/init.h> | ||
| 23 | #include <linux/random.h> | ||
| 24 | #include <net/checksum.h> | ||
| 25 | |||
| 26 | #include <net/inet_common.h> | ||
| 27 | #include <net/ip.h> | ||
| 28 | #include <net/protocol.h> | ||
| 29 | #include <net/sock.h> | ||
| 30 | #include <net/xfrm.h> | ||
| 31 | |||
| 32 | #include <asm/semaphore.h> | ||
| 33 | #include <linux/spinlock.h> | ||
| 34 | #include <linux/timer.h> | ||
| 35 | #include <linux/delay.h> | ||
| 36 | #include <linux/poll.h> | ||
| 37 | #include <linux/dccp.h> | ||
| 38 | |||
| 39 | #include "ccid.h" | ||
| 40 | #include "dccp.h" | ||
| 41 | |||
| 42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
| 43 | |||
| 44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | ||
| 45 | |||
| 46 | static struct net_protocol dccp_protocol = { | ||
| 47 | .handler = dccp_v4_rcv, | ||
| 48 | .err_handler = dccp_v4_err, | ||
| 49 | }; | ||
| 50 | |||
| 51 | const char *dccp_packet_name(const int type) | ||
| 52 | { | ||
| 53 | static const char *dccp_packet_names[] = { | ||
| 54 | [DCCP_PKT_REQUEST] = "REQUEST", | ||
| 55 | [DCCP_PKT_RESPONSE] = "RESPONSE", | ||
| 56 | [DCCP_PKT_DATA] = "DATA", | ||
| 57 | [DCCP_PKT_ACK] = "ACK", | ||
| 58 | [DCCP_PKT_DATAACK] = "DATAACK", | ||
| 59 | [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", | ||
| 60 | [DCCP_PKT_CLOSE] = "CLOSE", | ||
| 61 | [DCCP_PKT_RESET] = "RESET", | ||
| 62 | [DCCP_PKT_SYNC] = "SYNC", | ||
| 63 | [DCCP_PKT_SYNCACK] = "SYNCACK", | ||
| 64 | }; | ||
| 65 | |||
| 66 | if (type >= DCCP_NR_PKT_TYPES) | ||
| 67 | return "INVALID"; | ||
| 68 | else | ||
| 69 | return dccp_packet_names[type]; | ||
| 70 | } | ||
| 71 | |||
| 72 | EXPORT_SYMBOL_GPL(dccp_packet_name); | ||
| 73 | |||
| 74 | const char *dccp_state_name(const int state) | ||
| 75 | { | ||
| 76 | static char *dccp_state_names[] = { | ||
| 77 | [DCCP_OPEN] = "OPEN", | ||
| 78 | [DCCP_REQUESTING] = "REQUESTING", | ||
| 79 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
| 80 | [DCCP_LISTEN] = "LISTEN", | ||
| 81 | [DCCP_RESPOND] = "RESPOND", | ||
| 82 | [DCCP_CLOSING] = "CLOSING", | ||
| 83 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
| 84 | [DCCP_CLOSED] = "CLOSED", | ||
| 85 | }; | ||
| 86 | |||
| 87 | if (state >= DCCP_MAX_STATES) | ||
| 88 | return "INVALID STATE!"; | ||
| 89 | else | ||
| 90 | return dccp_state_names[state]; | ||
| 91 | } | ||
| 92 | |||
| 93 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
| 94 | |||
| 95 | static inline int dccp_listen_start(struct sock *sk) | ||
| 96 | { | ||
| 97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | ||
| 98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | ||
| 99 | } | ||
| 100 | |||
| 101 | int dccp_disconnect(struct sock *sk, int flags) | ||
| 102 | { | ||
| 103 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 104 | struct inet_sock *inet = inet_sk(sk); | ||
| 105 | int err = 0; | ||
| 106 | const int old_state = sk->sk_state; | ||
| 107 | |||
| 108 | if (old_state != DCCP_CLOSED) | ||
| 109 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 110 | |||
| 111 | /* ABORT function of RFC793 */ | ||
| 112 | if (old_state == DCCP_LISTEN) { | ||
| 113 | inet_csk_listen_stop(sk); | ||
| 114 | /* FIXME: do the active reset thing */ | ||
| 115 | } else if (old_state == DCCP_REQUESTING) | ||
| 116 | sk->sk_err = ECONNRESET; | ||
| 117 | |||
| 118 | dccp_clear_xmit_timers(sk); | ||
| 119 | __skb_queue_purge(&sk->sk_receive_queue); | ||
| 120 | if (sk->sk_send_head != NULL) { | ||
| 121 | __kfree_skb(sk->sk_send_head); | ||
| 122 | sk->sk_send_head = NULL; | ||
| 123 | } | ||
| 124 | |||
| 125 | inet->dport = 0; | ||
| 126 | |||
| 127 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | ||
| 128 | inet_reset_saddr(sk); | ||
| 129 | |||
| 130 | sk->sk_shutdown = 0; | ||
| 131 | sock_reset_flag(sk, SOCK_DONE); | ||
| 132 | |||
| 133 | icsk->icsk_backoff = 0; | ||
| 134 | inet_csk_delack_init(sk); | ||
| 135 | __sk_dst_reset(sk); | ||
| 136 | |||
| 137 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); | ||
| 138 | |||
| 139 | sk->sk_error_report(sk); | ||
| 140 | return err; | ||
| 141 | } | ||
| 142 | |||
| 143 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | ||
| 144 | { | ||
| 145 | dccp_pr_debug("entry\n"); | ||
| 146 | return -ENOIOCTLCMD; | ||
| 147 | } | ||
| 148 | |||
| 149 | int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
| 150 | char *optval, int optlen) | ||
| 151 | { | ||
| 152 | dccp_pr_debug("entry\n"); | ||
| 153 | |||
| 154 | if (level != SOL_DCCP) | ||
| 155 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
| 156 | |||
| 157 | return -EOPNOTSUPP; | ||
| 158 | } | ||
| 159 | |||
| 160 | int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
| 161 | char *optval, int *optlen) | ||
| 162 | { | ||
| 163 | dccp_pr_debug("entry\n"); | ||
| 164 | |||
| 165 | if (level != SOL_DCCP) | ||
| 166 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
| 167 | |||
| 168 | return -EOPNOTSUPP; | ||
| 169 | } | ||
| 170 | |||
| 171 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
| 172 | size_t len) | ||
| 173 | { | ||
| 174 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 175 | const int flags = msg->msg_flags; | ||
| 176 | const int noblock = flags & MSG_DONTWAIT; | ||
| 177 | struct sk_buff *skb; | ||
| 178 | int rc, size; | ||
| 179 | long timeo; | ||
| 180 | |||
| 181 | if (len > dp->dccps_mss_cache) | ||
| 182 | return -EMSGSIZE; | ||
| 183 | |||
| 184 | lock_sock(sk); | ||
| 185 | |||
| 186 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * We have to use sk_stream_wait_connect here to set sk_write_pending, | ||
| 190 | * so that the trick in dccp_rcv_request_sent_state_process. | ||
| 191 | */ | ||
| 192 | /* Wait for a connection to finish. */ | ||
| 193 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) | ||
| 194 | if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) | ||
| 195 | goto out_err; | ||
| 196 | |||
| 197 | size = sk->sk_prot->max_header + len; | ||
| 198 | release_sock(sk); | ||
| 199 | skb = sock_alloc_send_skb(sk, size, noblock, &rc); | ||
| 200 | lock_sock(sk); | ||
| 201 | |||
| 202 | if (skb == NULL) | ||
| 203 | goto out_release; | ||
| 204 | |||
| 205 | skb_reserve(skb, sk->sk_prot->max_header); | ||
| 206 | rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
| 207 | if (rc == 0) { | ||
| 208 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
| 209 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 210 | long delay; | ||
| 211 | |||
| 212 | /* | ||
| 213 | * XXX: This is just to match the Waikato tree CA interaction | ||
| 214 | * points, after the CCID3 code is stable and I have a better | ||
| 215 | * understanding of behaviour I'll change this to look more like | ||
| 216 | * TCP. | ||
| 217 | */ | ||
| 218 | while (1) { | ||
| 219 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, | ||
| 220 | skb, len, &delay); | ||
| 221 | if (rc == 0) | ||
| 222 | break; | ||
| 223 | if (rc != -EAGAIN) | ||
| 224 | goto out_discard; | ||
| 225 | if (delay > timeo) | ||
| 226 | goto out_discard; | ||
| 227 | release_sock(sk); | ||
| 228 | delay = schedule_timeout(delay); | ||
| 229 | lock_sock(sk); | ||
| 230 | timeo -= delay; | ||
| 231 | if (signal_pending(current)) | ||
| 232 | goto out_interrupted; | ||
| 233 | rc = -EPIPE; | ||
| 234 | if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) | ||
| 235 | goto out_discard; | ||
| 236 | } | ||
| 237 | |||
| 238 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
| 239 | /* See 8.1.5. Handshake Completion */ | ||
| 240 | inet_csk_schedule_ack(sk); | ||
| 241 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
| 242 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
| 243 | /* FIXME: we really should have a dccps_ack_pending or use icsk */ | ||
| 244 | } else if (inet_csk_ack_scheduled(sk) || | ||
| 245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
| 246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
| 247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
| 248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
| 249 | else | ||
| 250 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
| 251 | dccp_transmit_skb(sk, skb); | ||
| 252 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | ||
| 253 | } else { | ||
| 254 | out_discard: | ||
| 255 | kfree_skb(skb); | ||
| 256 | } | ||
| 257 | out_release: | ||
| 258 | release_sock(sk); | ||
| 259 | return rc ? : len; | ||
| 260 | out_err: | ||
| 261 | rc = sk_stream_error(sk, flags, rc); | ||
| 262 | goto out_release; | ||
| 263 | out_interrupted: | ||
| 264 | rc = sock_intr_errno(timeo); | ||
| 265 | goto out_discard; | ||
| 266 | } | ||
| 267 | |||
| 268 | EXPORT_SYMBOL(dccp_sendmsg); | ||
| 269 | |||
| 270 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
| 271 | size_t len, int nonblock, int flags, int *addr_len) | ||
| 272 | { | ||
| 273 | const struct dccp_hdr *dh; | ||
| 274 | int copied = 0; | ||
| 275 | unsigned long used; | ||
| 276 | int err; | ||
| 277 | int target; /* Read at least this many bytes */ | ||
| 278 | long timeo; | ||
| 279 | |||
| 280 | lock_sock(sk); | ||
| 281 | |||
| 282 | err = -ENOTCONN; | ||
| 283 | if (sk->sk_state == DCCP_LISTEN) | ||
| 284 | goto out; | ||
| 285 | |||
| 286 | timeo = sock_rcvtimeo(sk, nonblock); | ||
| 287 | |||
| 288 | /* Urgent data needs to be handled specially. */ | ||
| 289 | if (flags & MSG_OOB) | ||
| 290 | goto recv_urg; | ||
| 291 | |||
| 292 | /* FIXME */ | ||
| 293 | #if 0 | ||
| 294 | seq = &tp->copied_seq; | ||
| 295 | if (flags & MSG_PEEK) { | ||
| 296 | peek_seq = tp->copied_seq; | ||
| 297 | seq = &peek_seq; | ||
| 298 | } | ||
| 299 | #endif | ||
| 300 | |||
| 301 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | ||
| 302 | |||
| 303 | do { | ||
| 304 | struct sk_buff *skb; | ||
| 305 | u32 offset; | ||
| 306 | |||
| 307 | /* FIXME */ | ||
| 308 | #if 0 | ||
| 309 | /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ | ||
| 310 | if (tp->urg_data && tp->urg_seq == *seq) { | ||
| 311 | if (copied) | ||
| 312 | break; | ||
| 313 | if (signal_pending(current)) { | ||
| 314 | copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; | ||
| 315 | break; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | #endif | ||
| 319 | |||
| 320 | /* Next get a buffer. */ | ||
| 321 | |||
| 322 | skb = skb_peek(&sk->sk_receive_queue); | ||
| 323 | do { | ||
| 324 | if (!skb) | ||
| 325 | break; | ||
| 326 | |||
| 327 | offset = 0; | ||
| 328 | dh = dccp_hdr(skb); | ||
| 329 | |||
| 330 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
| 331 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
| 332 | goto found_ok_skb; | ||
| 333 | |||
| 334 | if (dh->dccph_type == DCCP_PKT_RESET || | ||
| 335 | dh->dccph_type == DCCP_PKT_CLOSE) { | ||
| 336 | dccp_pr_debug("found fin ok!\n"); | ||
| 337 | goto found_fin_ok; | ||
| 338 | } | ||
| 339 | dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); | ||
| 340 | BUG_TRAP(flags & MSG_PEEK); | ||
| 341 | skb = skb->next; | ||
| 342 | } while (skb != (struct sk_buff *)&sk->sk_receive_queue); | ||
| 343 | |||
| 344 | /* Well, if we have backlog, try to process it now yet. */ | ||
| 345 | if (copied >= target && !sk->sk_backlog.tail) | ||
| 346 | break; | ||
| 347 | |||
| 348 | if (copied) { | ||
| 349 | if (sk->sk_err || | ||
| 350 | sk->sk_state == DCCP_CLOSED || | ||
| 351 | (sk->sk_shutdown & RCV_SHUTDOWN) || | ||
| 352 | !timeo || | ||
| 353 | signal_pending(current) || | ||
| 354 | (flags & MSG_PEEK)) | ||
| 355 | break; | ||
| 356 | } else { | ||
| 357 | if (sock_flag(sk, SOCK_DONE)) | ||
| 358 | break; | ||
| 359 | |||
| 360 | if (sk->sk_err) { | ||
| 361 | copied = sock_error(sk); | ||
| 362 | break; | ||
| 363 | } | ||
| 364 | |||
| 365 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
| 366 | break; | ||
| 367 | |||
| 368 | if (sk->sk_state == DCCP_CLOSED) { | ||
| 369 | if (!sock_flag(sk, SOCK_DONE)) { | ||
| 370 | /* This occurs when user tries to read | ||
| 371 | * from never connected socket. | ||
| 372 | */ | ||
| 373 | copied = -ENOTCONN; | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | |||
| 379 | if (!timeo) { | ||
| 380 | copied = -EAGAIN; | ||
| 381 | break; | ||
| 382 | } | ||
| 383 | |||
| 384 | if (signal_pending(current)) { | ||
| 385 | copied = sock_intr_errno(timeo); | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | } | ||
| 389 | |||
| 390 | /* FIXME: cleanup_rbuf(sk, copied); */ | ||
| 391 | |||
| 392 | if (copied >= target) { | ||
| 393 | /* Do not sleep, just process backlog. */ | ||
| 394 | release_sock(sk); | ||
| 395 | lock_sock(sk); | ||
| 396 | } else | ||
| 397 | sk_wait_data(sk, &timeo); | ||
| 398 | |||
| 399 | continue; | ||
| 400 | |||
| 401 | found_ok_skb: | ||
| 402 | /* Ok so how much can we use? */ | ||
| 403 | used = skb->len - offset; | ||
| 404 | if (len < used) | ||
| 405 | used = len; | ||
| 406 | |||
| 407 | if (!(flags & MSG_TRUNC)) { | ||
| 408 | err = skb_copy_datagram_iovec(skb, offset, | ||
| 409 | msg->msg_iov, used); | ||
| 410 | if (err) { | ||
| 411 | /* Exception. Bailout! */ | ||
| 412 | if (!copied) | ||
| 413 | copied = -EFAULT; | ||
| 414 | break; | ||
| 415 | } | ||
| 416 | } | ||
| 417 | |||
| 418 | copied += used; | ||
| 419 | len -= used; | ||
| 420 | |||
| 421 | /* FIXME: tcp_rcv_space_adjust(sk); */ | ||
| 422 | |||
| 423 | //skip_copy: | ||
| 424 | if (used + offset < skb->len) | ||
| 425 | continue; | ||
| 426 | |||
| 427 | if (!(flags & MSG_PEEK)) | ||
| 428 | sk_eat_skb(sk, skb); | ||
| 429 | continue; | ||
| 430 | found_fin_ok: | ||
| 431 | if (!(flags & MSG_PEEK)) | ||
| 432 | sk_eat_skb(sk, skb); | ||
| 433 | break; | ||
| 434 | |||
| 435 | } while (len > 0); | ||
| 436 | |||
| 437 | /* According to UNIX98, msg_name/msg_namelen are ignored | ||
| 438 | * on connected socket. I was just happy when found this 8) --ANK | ||
| 439 | */ | ||
| 440 | |||
| 441 | /* Clean up data we have read: This will do ACK frames. */ | ||
| 442 | /* FIXME: cleanup_rbuf(sk, copied); */ | ||
| 443 | |||
| 444 | release_sock(sk); | ||
| 445 | return copied; | ||
| 446 | |||
| 447 | out: | ||
| 448 | release_sock(sk); | ||
| 449 | return err; | ||
| 450 | |||
| 451 | recv_urg: | ||
| 452 | /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ | ||
| 453 | goto out; | ||
| 454 | } | ||
| 455 | |||
| 456 | static int inet_dccp_listen(struct socket *sock, int backlog) | ||
| 457 | { | ||
| 458 | struct sock *sk = sock->sk; | ||
| 459 | unsigned char old_state; | ||
| 460 | int err; | ||
| 461 | |||
| 462 | lock_sock(sk); | ||
| 463 | |||
| 464 | err = -EINVAL; | ||
| 465 | if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) | ||
| 466 | goto out; | ||
| 467 | |||
| 468 | old_state = sk->sk_state; | ||
| 469 | if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) | ||
| 470 | goto out; | ||
| 471 | |||
| 472 | /* Really, if the socket is already in listen state | ||
| 473 | * we can only allow the backlog to be adjusted. | ||
| 474 | */ | ||
| 475 | if (old_state != DCCP_LISTEN) { | ||
| 476 | /* | ||
| 477 | * FIXME: here it probably should be sk->sk_prot->listen_start | ||
| 478 | * see tcp_listen_start | ||
| 479 | */ | ||
| 480 | err = dccp_listen_start(sk); | ||
| 481 | if (err) | ||
| 482 | goto out; | ||
| 483 | } | ||
| 484 | sk->sk_max_ack_backlog = backlog; | ||
| 485 | err = 0; | ||
| 486 | |||
| 487 | out: | ||
| 488 | release_sock(sk); | ||
| 489 | return err; | ||
| 490 | } | ||
| 491 | |||
| 492 | static const unsigned char dccp_new_state[] = { | ||
| 493 | /* current state: new state: action: */ | ||
| 494 | [0] = DCCP_CLOSED, | ||
| 495 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
| 496 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
| 497 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
| 498 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
| 499 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
| 500 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
| 501 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
| 502 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
| 503 | }; | ||
| 504 | |||
| 505 | static int dccp_close_state(struct sock *sk) | ||
| 506 | { | ||
| 507 | const int next = dccp_new_state[sk->sk_state]; | ||
| 508 | const int ns = next & DCCP_STATE_MASK; | ||
| 509 | |||
| 510 | if (ns != sk->sk_state) | ||
| 511 | dccp_set_state(sk, ns); | ||
| 512 | |||
| 513 | return next & DCCP_ACTION_FIN; | ||
| 514 | } | ||
| 515 | |||
| 516 | void dccp_close(struct sock *sk, long timeout) | ||
| 517 | { | ||
| 518 | struct sk_buff *skb; | ||
| 519 | |||
| 520 | lock_sock(sk); | ||
| 521 | |||
| 522 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
| 523 | |||
| 524 | if (sk->sk_state == DCCP_LISTEN) { | ||
| 525 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 526 | |||
| 527 | /* Special case. */ | ||
| 528 | inet_csk_listen_stop(sk); | ||
| 529 | |||
| 530 | goto adjudge_to_death; | ||
| 531 | } | ||
| 532 | |||
| 533 | /* | ||
| 534 | * We need to flush the recv. buffs. We do this only on the | ||
| 535 | * descriptor close, not protocol-sourced closes, because the | ||
| 536 | *reader process may not have drained the data yet! | ||
| 537 | */ | ||
| 538 | /* FIXME: check for unread data */ | ||
| 539 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
| 540 | __kfree_skb(skb); | ||
| 541 | } | ||
| 542 | |||
| 543 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
| 544 | /* Check zero linger _after_ checking for unread data. */ | ||
| 545 | sk->sk_prot->disconnect(sk, 0); | ||
| 546 | } else if (dccp_close_state(sk)) { | ||
| 547 | dccp_send_close(sk); | ||
| 548 | } | ||
| 549 | |||
| 550 | sk_stream_wait_close(sk, timeout); | ||
| 551 | |||
| 552 | adjudge_to_death: | ||
| 553 | release_sock(sk); | ||
| 554 | /* | ||
| 555 | * Now socket is owned by kernel and we acquire BH lock | ||
| 556 | * to finish close. No need to check for user refs. | ||
| 557 | */ | ||
| 558 | local_bh_disable(); | ||
| 559 | bh_lock_sock(sk); | ||
| 560 | BUG_TRAP(!sock_owned_by_user(sk)); | ||
| 561 | |||
| 562 | sock_hold(sk); | ||
| 563 | sock_orphan(sk); | ||
| 564 | |||
| 565 | if (sk->sk_state != DCCP_CLOSED) | ||
| 566 | dccp_set_state(sk, DCCP_CLOSED); | ||
| 567 | |||
| 568 | atomic_inc(&dccp_orphan_count); | ||
| 569 | if (sk->sk_state == DCCP_CLOSED) | ||
| 570 | inet_csk_destroy_sock(sk); | ||
| 571 | |||
| 572 | /* Otherwise, socket is reprieved until protocol close. */ | ||
| 573 | |||
| 574 | bh_unlock_sock(sk); | ||
| 575 | local_bh_enable(); | ||
| 576 | sock_put(sk); | ||
| 577 | } | ||
| 578 | |||
| 579 | void dccp_shutdown(struct sock *sk, int how) | ||
| 580 | { | ||
| 581 | dccp_pr_debug("entry\n"); | ||
| 582 | } | ||
| 583 | |||
| 584 | struct proto_ops inet_dccp_ops = { | ||
| 585 | .family = PF_INET, | ||
| 586 | .owner = THIS_MODULE, | ||
| 587 | .release = inet_release, | ||
| 588 | .bind = inet_bind, | ||
| 589 | .connect = inet_stream_connect, | ||
| 590 | .socketpair = sock_no_socketpair, | ||
| 591 | .accept = inet_accept, | ||
| 592 | .getname = inet_getname, | ||
| 593 | .poll = sock_no_poll, | ||
| 594 | .ioctl = inet_ioctl, | ||
| 595 | .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ | ||
| 596 | .shutdown = inet_shutdown, | ||
| 597 | .setsockopt = sock_common_setsockopt, | ||
| 598 | .getsockopt = sock_common_getsockopt, | ||
| 599 | .sendmsg = inet_sendmsg, | ||
| 600 | .recvmsg = sock_common_recvmsg, | ||
| 601 | .mmap = sock_no_mmap, | ||
| 602 | .sendpage = sock_no_sendpage, | ||
| 603 | }; | ||
| 604 | |||
| 605 | extern struct net_proto_family inet_family_ops; | ||
| 606 | |||
| 607 | static struct inet_protosw dccp_v4_protosw = { | ||
| 608 | .type = SOCK_DCCP, | ||
| 609 | .protocol = IPPROTO_DCCP, | ||
| 610 | .prot = &dccp_v4_prot, | ||
| 611 | .ops = &inet_dccp_ops, | ||
| 612 | .capability = -1, | ||
| 613 | .no_check = 0, | ||
| 614 | .flags = 0, | ||
| 615 | }; | ||
| 616 | |||
| 617 | /* | ||
| 618 | * This is the global socket data structure used for responding to | ||
| 619 | * the Out-of-the-blue (OOTB) packets. A control sock will be created | ||
| 620 | * for this socket at the initialization time. | ||
| 621 | */ | ||
| 622 | struct socket *dccp_ctl_socket; | ||
| 623 | |||
| 624 | static char dccp_ctl_socket_err_msg[] __initdata = | ||
| 625 | KERN_ERR "DCCP: Failed to create the control socket.\n"; | ||
| 626 | |||
| 627 | static int __init dccp_ctl_sock_init(void) | ||
| 628 | { | ||
| 629 | int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, | ||
| 630 | &dccp_ctl_socket); | ||
| 631 | if (rc < 0) | ||
| 632 | printk(dccp_ctl_socket_err_msg); | ||
| 633 | else { | ||
| 634 | dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; | ||
| 635 | inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; | ||
| 636 | |||
| 637 | /* Unhash it so that IP input processing does not even | ||
| 638 | * see it, we do not wish this socket to see incoming | ||
| 639 | * packets. | ||
| 640 | */ | ||
| 641 | dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); | ||
| 642 | } | ||
| 643 | |||
| 644 | return rc; | ||
| 645 | } | ||
| 646 | |||
| 647 | static void __exit dccp_ctl_sock_exit(void) | ||
| 648 | { | ||
| 649 | if (dccp_ctl_socket != NULL) | ||
| 650 | sock_release(dccp_ctl_socket); | ||
| 651 | } | ||
| 652 | |||
| 653 | static int __init init_dccp_v4_mibs(void) | ||
| 654 | { | ||
| 655 | int rc = -ENOMEM; | ||
| 656 | |||
| 657 | dccp_statistics[0] = alloc_percpu(struct dccp_mib); | ||
| 658 | if (dccp_statistics[0] == NULL) | ||
| 659 | goto out; | ||
| 660 | |||
| 661 | dccp_statistics[1] = alloc_percpu(struct dccp_mib); | ||
| 662 | if (dccp_statistics[1] == NULL) | ||
| 663 | goto out_free_one; | ||
| 664 | |||
| 665 | rc = 0; | ||
| 666 | out: | ||
| 667 | return rc; | ||
| 668 | out_free_one: | ||
| 669 | free_percpu(dccp_statistics[0]); | ||
| 670 | dccp_statistics[0] = NULL; | ||
| 671 | goto out; | ||
| 672 | |||
| 673 | } | ||
| 674 | |||
| 675 | static int thash_entries; | ||
| 676 | module_param(thash_entries, int, 0444); | ||
| 677 | MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | ||
| 678 | |||
| 679 | int dccp_debug; | ||
| 680 | module_param(dccp_debug, int, 0444); | ||
| 681 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | ||
| 682 | |||
| 683 | static int __init dccp_init(void) | ||
| 684 | { | ||
| 685 | unsigned long goal; | ||
| 686 | int ehash_order, bhash_order, i; | ||
| 687 | int rc = proto_register(&dccp_v4_prot, 1); | ||
| 688 | |||
| 689 | if (rc) | ||
| 690 | goto out; | ||
| 691 | |||
| 692 | dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", | ||
| 693 | sizeof(struct inet_bind_bucket), | ||
| 694 | 0, SLAB_HWCACHE_ALIGN, | ||
| 695 | NULL, NULL); | ||
| 696 | if (!dccp_hashinfo.bind_bucket_cachep) | ||
| 697 | goto out_proto_unregister; | ||
| 698 | |||
| 699 | /* | ||
| 700 | * Size and allocate the main established and bind bucket | ||
| 701 | * hash tables. | ||
| 702 | * | ||
| 703 | * The methodology is similar to that of the buffer cache. | ||
| 704 | */ | ||
| 705 | if (num_physpages >= (128 * 1024)) | ||
| 706 | goal = num_physpages >> (21 - PAGE_SHIFT); | ||
| 707 | else | ||
| 708 | goal = num_physpages >> (23 - PAGE_SHIFT); | ||
| 709 | |||
| 710 | if (thash_entries) | ||
| 711 | goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; | ||
| 712 | for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) | ||
| 713 | ; | ||
| 714 | do { | ||
| 715 | dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / | ||
| 716 | sizeof(struct inet_ehash_bucket); | ||
| 717 | dccp_hashinfo.ehash_size >>= 1; | ||
| 718 | while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) | ||
| 719 | dccp_hashinfo.ehash_size--; | ||
| 720 | dccp_hashinfo.ehash = (struct inet_ehash_bucket *) | ||
| 721 | __get_free_pages(GFP_ATOMIC, ehash_order); | ||
| 722 | } while (!dccp_hashinfo.ehash && --ehash_order > 0); | ||
| 723 | |||
| 724 | if (!dccp_hashinfo.ehash) { | ||
| 725 | printk(KERN_CRIT "Failed to allocate DCCP " | ||
| 726 | "established hash table\n"); | ||
| 727 | goto out_free_bind_bucket_cachep; | ||
| 728 | } | ||
| 729 | |||
| 730 | for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { | ||
| 731 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
| 732 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | ||
| 733 | } | ||
| 734 | |||
| 735 | bhash_order = ehash_order; | ||
| 736 | |||
| 737 | do { | ||
| 738 | dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / | ||
| 739 | sizeof(struct inet_bind_hashbucket); | ||
| 740 | if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) | ||
| 741 | continue; | ||
| 742 | dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) | ||
| 743 | __get_free_pages(GFP_ATOMIC, bhash_order); | ||
| 744 | } while (!dccp_hashinfo.bhash && --bhash_order >= 0); | ||
| 745 | |||
| 746 | if (!dccp_hashinfo.bhash) { | ||
| 747 | printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); | ||
| 748 | goto out_free_dccp_ehash; | ||
| 749 | } | ||
| 750 | |||
| 751 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | ||
| 752 | spin_lock_init(&dccp_hashinfo.bhash[i].lock); | ||
| 753 | INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); | ||
| 754 | } | ||
| 755 | |||
| 756 | if (init_dccp_v4_mibs()) | ||
| 757 | goto out_free_dccp_bhash; | ||
| 758 | |||
| 759 | rc = -EAGAIN; | ||
| 760 | if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) | ||
| 761 | goto out_free_dccp_v4_mibs; | ||
| 762 | |||
| 763 | inet_register_protosw(&dccp_v4_protosw); | ||
| 764 | |||
| 765 | rc = dccp_ctl_sock_init(); | ||
| 766 | if (rc) | ||
| 767 | goto out_unregister_protosw; | ||
| 768 | out: | ||
| 769 | return rc; | ||
| 770 | out_unregister_protosw: | ||
| 771 | inet_unregister_protosw(&dccp_v4_protosw); | ||
| 772 | inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); | ||
| 773 | out_free_dccp_v4_mibs: | ||
| 774 | free_percpu(dccp_statistics[0]); | ||
| 775 | free_percpu(dccp_statistics[1]); | ||
| 776 | dccp_statistics[0] = dccp_statistics[1] = NULL; | ||
| 777 | out_free_dccp_bhash: | ||
| 778 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | ||
| 779 | dccp_hashinfo.bhash = NULL; | ||
| 780 | out_free_dccp_ehash: | ||
| 781 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | ||
| 782 | dccp_hashinfo.ehash = NULL; | ||
| 783 | out_free_bind_bucket_cachep: | ||
| 784 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
| 785 | dccp_hashinfo.bind_bucket_cachep = NULL; | ||
| 786 | out_proto_unregister: | ||
| 787 | proto_unregister(&dccp_v4_prot); | ||
| 788 | goto out; | ||
| 789 | } | ||
| 790 | |||
| 791 | static const char dccp_del_proto_err_msg[] __exitdata = | ||
| 792 | KERN_ERR "can't remove dccp net_protocol\n"; | ||
| 793 | |||
| 794 | static void __exit dccp_fini(void) | ||
| 795 | { | ||
| 796 | dccp_ctl_sock_exit(); | ||
| 797 | |||
| 798 | inet_unregister_protosw(&dccp_v4_protosw); | ||
| 799 | |||
| 800 | if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) | ||
| 801 | printk(dccp_del_proto_err_msg); | ||
| 802 | |||
| 803 | /* Free the control endpoint. */ | ||
| 804 | sock_release(dccp_ctl_socket); | ||
| 805 | |||
| 806 | proto_unregister(&dccp_v4_prot); | ||
| 807 | |||
| 808 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
| 809 | } | ||
| 810 | |||
| 811 | module_init(dccp_init); | ||
| 812 | module_exit(dccp_fini); | ||
| 813 | |||
| 814 | /* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ | ||
| 815 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); | ||
| 816 | MODULE_LICENSE("GPL"); | ||
| 817 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); | ||
| 818 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); | ||
diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..8c396ee01aac --- /dev/null +++ b/net/dccp/timer.c | |||
| @@ -0,0 +1,249 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/timer.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/config.h> | ||
| 14 | #include <linux/dccp.h> | ||
| 15 | #include <linux/skbuff.h> | ||
| 16 | |||
| 17 | #include "dccp.h" | ||
| 18 | |||
| 19 | static void dccp_write_timer(unsigned long data); | ||
| 20 | static void dccp_keepalive_timer(unsigned long data); | ||
| 21 | static void dccp_delack_timer(unsigned long data); | ||
| 22 | |||
| 23 | void dccp_init_xmit_timers(struct sock *sk) | ||
| 24 | { | ||
| 25 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | ||
| 26 | &dccp_keepalive_timer); | ||
| 27 | } | ||
| 28 | |||
| 29 | static void dccp_write_err(struct sock *sk) | ||
| 30 | { | ||
| 31 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | ||
| 32 | sk->sk_error_report(sk); | ||
| 33 | |||
| 34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
| 35 | dccp_done(sk); | ||
| 36 | DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); | ||
| 37 | } | ||
| 38 | |||
| 39 | /* A write timeout has occurred. Process the after effects. */ | ||
| 40 | static int dccp_write_timeout(struct sock *sk) | ||
| 41 | { | ||
| 42 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 43 | int retry_until; | ||
| 44 | |||
| 45 | if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { | ||
| 46 | if (icsk->icsk_retransmits != 0) | ||
| 47 | dst_negative_advice(&sk->sk_dst_cache); | ||
| 48 | retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; | ||
| 49 | } else { | ||
| 50 | if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { | ||
| 51 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black | ||
| 52 | hole detection. :-( | ||
| 53 | |||
| 54 | It is place to make it. It is not made. I do not want | ||
| 55 | to make it. It is disguisting. It does not work in any | ||
| 56 | case. Let me to cite the same draft, which requires for | ||
| 57 | us to implement this: | ||
| 58 | |||
| 59 | "The one security concern raised by this memo is that ICMP black holes | ||
| 60 | are often caused by over-zealous security administrators who block | ||
| 61 | all ICMP messages. It is vitally important that those who design and | ||
| 62 | deploy security systems understand the impact of strict filtering on | ||
| 63 | upper-layer protocols. The safest web site in the world is worthless | ||
| 64 | if most TCP implementations cannot transfer data from it. It would | ||
| 65 | be far nicer to have all of the black holes fixed rather than fixing | ||
| 66 | all of the TCP implementations." | ||
| 67 | |||
| 68 | Golden words :-). | ||
| 69 | */ | ||
| 70 | |||
| 71 | dst_negative_advice(&sk->sk_dst_cache); | ||
| 72 | } | ||
| 73 | |||
| 74 | retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; | ||
| 75 | /* | ||
| 76 | * FIXME: see tcp_write_timout and tcp_out_of_resources | ||
| 77 | */ | ||
| 78 | } | ||
| 79 | |||
| 80 | if (icsk->icsk_retransmits >= retry_until) { | ||
| 81 | /* Has it gone just too far? */ | ||
| 82 | dccp_write_err(sk); | ||
| 83 | return 1; | ||
| 84 | } | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 88 | /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ | ||
| 89 | static void dccp_delack_timer(unsigned long data) | ||
| 90 | { | ||
| 91 | struct sock *sk = (struct sock *)data; | ||
| 92 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 93 | |||
| 94 | bh_lock_sock(sk); | ||
| 95 | if (sock_owned_by_user(sk)) { | ||
| 96 | /* Try again later. */ | ||
| 97 | icsk->icsk_ack.blocked = 1; | ||
| 98 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); | ||
| 99 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); | ||
| 100 | goto out; | ||
| 101 | } | ||
| 102 | |||
| 103 | if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
| 104 | goto out; | ||
| 105 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { | ||
| 106 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
| 107 | goto out; | ||
| 108 | } | ||
| 109 | |||
| 110 | icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; | ||
| 111 | |||
| 112 | if (inet_csk_ack_scheduled(sk)) { | ||
| 113 | if (!icsk->icsk_ack.pingpong) { | ||
| 114 | /* Delayed ACK missed: inflate ATO. */ | ||
| 115 | icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); | ||
| 116 | } else { | ||
| 117 | /* Delayed ACK missed: leave pingpong mode and | ||
| 118 | * deflate ATO. | ||
| 119 | */ | ||
| 120 | icsk->icsk_ack.pingpong = 0; | ||
| 121 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
| 122 | } | ||
| 123 | dccp_send_ack(sk); | ||
| 124 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); | ||
| 125 | } | ||
| 126 | out: | ||
| 127 | bh_unlock_sock(sk); | ||
| 128 | sock_put(sk); | ||
| 129 | } | ||
| 130 | |||
| 131 | /* | ||
| 132 | * The DCCP retransmit timer. | ||
| 133 | */ | ||
| 134 | static void dccp_retransmit_timer(struct sock *sk) | ||
| 135 | { | ||
| 136 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 137 | |||
| 138 | /* | ||
| 139 | * sk->sk_send_head has to have one skb with | ||
| 140 | * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP | ||
| 141 | * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake | ||
| 142 | * (PARTOPEN timer), etc). | ||
| 143 | */ | ||
| 144 | BUG_TRAP(sk->sk_send_head != NULL); | ||
| 145 | |||
| 146 | /* | ||
| 147 | * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was | ||
| 148 | * sent, no need to retransmit, this sock is dead. | ||
| 149 | */ | ||
| 150 | if (dccp_write_timeout(sk)) | ||
| 151 | goto out; | ||
| 152 | |||
| 153 | /* | ||
| 154 | * We want to know the number of packets retransmitted, not the | ||
| 155 | * total number of retransmissions of clones of original packets. | ||
| 156 | */ | ||
| 157 | if (icsk->icsk_retransmits == 0) | ||
| 158 | DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); | ||
| 159 | |||
| 160 | if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { | ||
| 161 | /* | ||
| 162 | * Retransmission failed because of local congestion, | ||
| 163 | * do not backoff. | ||
| 164 | */ | ||
| 165 | if (icsk->icsk_retransmits == 0) | ||
| 166 | icsk->icsk_retransmits = 1; | ||
| 167 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
| 168 | min(icsk->icsk_rto, | ||
| 169 | TCP_RESOURCE_PROBE_INTERVAL), | ||
| 170 | TCP_RTO_MAX); | ||
| 171 | goto out; | ||
| 172 | } | ||
| 173 | |||
| 174 | icsk->icsk_backoff++; | ||
| 175 | icsk->icsk_retransmits++; | ||
| 176 | |||
| 177 | icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); | ||
| 178 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | ||
| 179 | if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) | ||
| 180 | __sk_dst_reset(sk); | ||
| 181 | out:; | ||
| 182 | } | ||
| 183 | |||
| 184 | static void dccp_write_timer(unsigned long data) | ||
| 185 | { | ||
| 186 | struct sock *sk = (struct sock *)data; | ||
| 187 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 188 | int event = 0; | ||
| 189 | |||
| 190 | bh_lock_sock(sk); | ||
| 191 | if (sock_owned_by_user(sk)) { | ||
| 192 | /* Try again later */ | ||
| 193 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); | ||
| 194 | goto out; | ||
| 195 | } | ||
| 196 | |||
| 197 | if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) | ||
| 198 | goto out; | ||
| 199 | |||
| 200 | if (time_after(icsk->icsk_timeout, jiffies)) { | ||
| 201 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
| 202 | goto out; | ||
| 203 | } | ||
| 204 | |||
| 205 | event = icsk->icsk_pending; | ||
| 206 | icsk->icsk_pending = 0; | ||
| 207 | |||
| 208 | switch (event) { | ||
| 209 | case ICSK_TIME_RETRANS: | ||
| 210 | dccp_retransmit_timer(sk); | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | out: | ||
| 214 | bh_unlock_sock(sk); | ||
| 215 | sock_put(sk); | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * Timer for listening sockets | ||
| 220 | */ | ||
| 221 | static void dccp_response_timer(struct sock *sk) | ||
| 222 | { | ||
| 223 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 224 | const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; | ||
| 225 | |||
| 226 | reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, | ||
| 227 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); | ||
| 228 | } | ||
| 229 | |||
| 230 | static void dccp_keepalive_timer(unsigned long data) | ||
| 231 | { | ||
| 232 | struct sock *sk = (struct sock *)data; | ||
| 233 | |||
| 234 | /* Only process if socket is not in use. */ | ||
| 235 | bh_lock_sock(sk); | ||
| 236 | if (sock_owned_by_user(sk)) { | ||
| 237 | /* Try again later. */ | ||
| 238 | inet_csk_reset_keepalive_timer(sk, HZ / 20); | ||
| 239 | goto out; | ||
| 240 | } | ||
| 241 | |||
| 242 | if (sk->sk_state == DCCP_LISTEN) { | ||
| 243 | dccp_response_timer(sk); | ||
| 244 | goto out; | ||
| 245 | } | ||
| 246 | out: | ||
| 247 | bh_unlock_sock(sk); | ||
| 248 | sock_put(sk); | ||
| 249 | } | ||
