diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/Kconfig | 3 | ||||
| -rw-r--r-- | net/dccp/Makefile | 2 | ||||
| -rw-r--r-- | net/dccp/ackvec.c | 419 | ||||
| -rw-r--r-- | net/dccp/ackvec.h | 133 | ||||
| -rw-r--r-- | net/dccp/ccid.h | 31 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid3.c | 56 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid3.h | 23 | ||||
| -rw-r--r-- | net/dccp/dccp.h | 91 | ||||
| -rw-r--r-- | net/dccp/input.c | 89 | ||||
| -rw-r--r-- | net/dccp/ipv4.c | 101 | ||||
| -rw-r--r-- | net/dccp/minisocks.c | 19 | ||||
| -rw-r--r-- | net/dccp/options.c | 443 | ||||
| -rw-r--r-- | net/dccp/output.c | 26 | ||||
| -rw-r--r-- | net/dccp/proto.c | 94 | ||||
| -rw-r--r-- | net/ipv4/netfilter/Kconfig | 14 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 5 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ipt_CLUSTERIP.c | 223 | ||||
| -rw-r--r-- | net/ipv6/udp.c | 10 | ||||
| -rw-r--r-- | net/socket.c | 3 |
19 files changed, 1005 insertions, 780 deletions
diff --git a/net/Kconfig b/net/Kconfig index 2bdd5623fdd5..60f6f321bd76 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
| @@ -140,6 +140,7 @@ config BRIDGE_NETFILTER | |||
| 140 | 140 | ||
| 141 | If unsure, say N. | 141 | If unsure, say N. |
| 142 | 142 | ||
| 143 | source "net/netfilter/Kconfig" | ||
| 143 | source "net/ipv4/netfilter/Kconfig" | 144 | source "net/ipv4/netfilter/Kconfig" |
| 144 | source "net/ipv6/netfilter/Kconfig" | 145 | source "net/ipv6/netfilter/Kconfig" |
| 145 | source "net/decnet/netfilter/Kconfig" | 146 | source "net/decnet/netfilter/Kconfig" |
| @@ -206,8 +207,6 @@ config NET_PKTGEN | |||
| 206 | To compile this code as a module, choose M here: the | 207 | To compile this code as a module, choose M here: the |
| 207 | module will be called pktgen. | 208 | module will be called pktgen. |
| 208 | 209 | ||
| 209 | source "net/netfilter/Kconfig" | ||
| 210 | |||
| 211 | endmenu | 210 | endmenu |
| 212 | 211 | ||
| 213 | endmenu | 212 | endmenu |
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index fb97bb042455..344a8da153fc 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
| @@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o | |||
| 3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ | 3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ |
| 4 | timer.o | 4 | timer.o |
| 5 | 5 | ||
| 6 | dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o | ||
| 7 | |||
| 6 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o | 8 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o |
| 7 | 9 | ||
| 8 | dccp_diag-y := diag.o | 10 | dccp_diag-y := diag.o |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c new file mode 100644 index 000000000000..6530283eafca --- /dev/null +++ b/net/dccp/ackvec.c | |||
| @@ -0,0 +1,419 @@ | |||
| 1 | /* | ||
| 2 | * net/dccp/ackvec.c | ||
| 3 | * | ||
| 4 | * An implementation of the DCCP protocol | ||
| 5 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms of the GNU General Public License as published by the | ||
| 9 | * Free Software Foundation; version 2 of the License; | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include "ackvec.h" | ||
| 13 | #include "dccp.h" | ||
| 14 | |||
| 15 | #include <linux/dccp.h> | ||
| 16 | #include <linux/skbuff.h> | ||
| 17 | |||
| 18 | #include <net/sock.h> | ||
| 19 | |||
| 20 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | ||
| 21 | { | ||
| 22 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 23 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
| 24 | int len = av->dccpav_vec_len + 2; | ||
| 25 | struct timeval now; | ||
| 26 | u32 elapsed_time; | ||
| 27 | unsigned char *to, *from; | ||
| 28 | |||
| 29 | dccp_timestamp(sk, &now); | ||
| 30 | elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; | ||
| 31 | |||
| 32 | if (elapsed_time != 0) | ||
| 33 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
| 34 | |||
| 35 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
| 36 | return -1; | ||
| 37 | |||
| 38 | /* | ||
| 39 | * XXX: now we have just one ack vector sent record, so | ||
| 40 | * we have to wait for it to be cleared. | ||
| 41 | * | ||
| 42 | * Of course this is not acceptable, but this is just for | ||
| 43 | * basic testing now. | ||
| 44 | */ | ||
| 45 | if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
| 46 | return -1; | ||
| 47 | |||
| 48 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 49 | |||
| 50 | to = skb_push(skb, len); | ||
| 51 | *to++ = DCCPO_ACK_VECTOR_0; | ||
| 52 | *to++ = len; | ||
| 53 | |||
| 54 | len = av->dccpav_vec_len; | ||
| 55 | from = av->dccpav_buf + av->dccpav_buf_head; | ||
| 56 | |||
| 57 | /* Check if buf_head wraps */ | ||
| 58 | if (av->dccpav_buf_head + len > av->dccpav_vec_len) { | ||
| 59 | const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); | ||
| 60 | |||
| 61 | memcpy(to, from, tailsize); | ||
| 62 | to += tailsize; | ||
| 63 | len -= tailsize; | ||
| 64 | from = av->dccpav_buf; | ||
| 65 | } | ||
| 66 | |||
| 67 | memcpy(to, from, len); | ||
| 68 | /* | ||
| 69 | * From draft-ietf-dccp-spec-11.txt: | ||
| 70 | * | ||
| 71 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
| 72 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
| 73 | * sequence number it used for the ack packet; ack_ptr will equal | ||
| 74 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
| 75 | * equal buf_nonce. | ||
| 76 | * | ||
| 77 | * This implemention uses just one ack record for now. | ||
| 78 | */ | ||
| 79 | av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 80 | av->dccpav_ack_ptr = av->dccpav_buf_head; | ||
| 81 | av->dccpav_ack_ackno = av->dccpav_buf_ackno; | ||
| 82 | av->dccpav_ack_nonce = av->dccpav_buf_nonce; | ||
| 83 | av->dccpav_sent_len = av->dccpav_vec_len; | ||
| 84 | |||
| 85 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
| 86 | "ack_ackno=%llu\n", | ||
| 87 | debug_prefix, av->dccpav_sent_len, | ||
| 88 | (unsigned long long)av->dccpav_ack_seqno, | ||
| 89 | (unsigned long long)av->dccpav_ack_ackno); | ||
| 90 | return -1; | ||
| 91 | } | ||
| 92 | |||
| 93 | struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, | ||
| 94 | const unsigned int __nocast priority) | ||
| 95 | { | ||
| 96 | struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); | ||
| 97 | |||
| 98 | if (av != NULL) { | ||
| 99 | av->dccpav_buf_len = len; | ||
| 100 | av->dccpav_buf_head = | ||
| 101 | av->dccpav_buf_tail = av->dccpav_buf_len - 1; | ||
| 102 | av->dccpav_buf_ackno = | ||
| 103 | av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; | ||
| 104 | av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; | ||
| 105 | av->dccpav_ack_ptr = 0; | ||
| 106 | av->dccpav_time.tv_sec = 0; | ||
| 107 | av->dccpav_time.tv_usec = 0; | ||
| 108 | av->dccpav_sent_len = av->dccpav_vec_len = 0; | ||
| 109 | } | ||
| 110 | |||
| 111 | return av; | ||
| 112 | } | ||
| 113 | |||
| 114 | void dccp_ackvec_free(struct dccp_ackvec *av) | ||
| 115 | { | ||
| 116 | kfree(av); | ||
| 117 | } | ||
| 118 | |||
| 119 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | ||
| 120 | const unsigned int index) | ||
| 121 | { | ||
| 122 | return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; | ||
| 123 | } | ||
| 124 | |||
| 125 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | ||
| 126 | const unsigned int index) | ||
| 127 | { | ||
| 128 | return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; | ||
| 129 | } | ||
| 130 | |||
| 131 | /* | ||
| 132 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
| 133 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
| 134 | * this simplifies table updates if one of the missing packets arrives. | ||
| 135 | */ | ||
| 136 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | ||
| 137 | const unsigned int packets, | ||
| 138 | const unsigned char state) | ||
| 139 | { | ||
| 140 | unsigned int gap; | ||
| 141 | signed long new_head; | ||
| 142 | |||
| 143 | if (av->dccpav_vec_len + packets > av->dccpav_buf_len) | ||
| 144 | return -ENOBUFS; | ||
| 145 | |||
| 146 | gap = packets - 1; | ||
| 147 | new_head = av->dccpav_buf_head - packets; | ||
| 148 | |||
| 149 | if (new_head < 0) { | ||
| 150 | if (gap > 0) { | ||
| 151 | memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, | ||
| 152 | gap + new_head + 1); | ||
| 153 | gap = -new_head; | ||
| 154 | } | ||
| 155 | new_head += av->dccpav_buf_len; | ||
| 156 | } | ||
| 157 | |||
| 158 | av->dccpav_buf_head = new_head; | ||
| 159 | |||
| 160 | if (gap > 0) | ||
| 161 | memset(av->dccpav_buf + av->dccpav_buf_head + 1, | ||
| 162 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); | ||
| 163 | |||
| 164 | av->dccpav_buf[av->dccpav_buf_head] = state; | ||
| 165 | av->dccpav_vec_len += packets; | ||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
| 171 | */ | ||
| 172 | int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
| 173 | const u64 ackno, const u8 state) | ||
| 174 | { | ||
| 175 | /* | ||
| 176 | * Check at the right places if the buffer is full, if it is, tell the | ||
| 177 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
| 178 | * vectors, when we will free up space in dccpav_buf. | ||
| 179 | * | ||
| 180 | * We may well decide to do buffer compression, etc, but for now lets | ||
| 181 | * just drop. | ||
| 182 | * | ||
| 183 | * From Appendix A: | ||
| 184 | * | ||
| 185 | * Of course, the circular buffer may overflow, either when the | ||
| 186 | * HC-Sender is sending data at a very high rate, when the | ||
| 187 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
| 188 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
| 189 | * (so the HC-Receiver is unable to clean up old state). In this | ||
| 190 | * case, the HC-Receiver should either compress the buffer (by | ||
| 191 | * increasing run lengths when possible), transfer its state to | ||
| 192 | * a larger buffer, or, as a last resort, drop all received | ||
| 193 | * packets, without processing them whatsoever, until its buffer | ||
| 194 | * shrinks again. | ||
| 195 | */ | ||
| 196 | |||
| 197 | /* See if this is the first ackno being inserted */ | ||
| 198 | if (av->dccpav_vec_len == 0) { | ||
| 199 | av->dccpav_buf[av->dccpav_buf_head] = state; | ||
| 200 | av->dccpav_vec_len = 1; | ||
| 201 | } else if (after48(ackno, av->dccpav_buf_ackno)) { | ||
| 202 | const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, | ||
| 203 | ackno); | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Look if the state of this packet is the same as the | ||
| 207 | * previous ackno and if so if we can bump the head len. | ||
| 208 | */ | ||
| 209 | if (delta == 1 && | ||
| 210 | dccp_ackvec_state(av, av->dccpav_buf_head) == state && | ||
| 211 | (dccp_ackvec_len(av, av->dccpav_buf_head) < | ||
| 212 | DCCP_ACKVEC_LEN_MASK)) | ||
| 213 | av->dccpav_buf[av->dccpav_buf_head]++; | ||
| 214 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | ||
| 215 | return -ENOBUFS; | ||
| 216 | } else { | ||
| 217 | /* | ||
| 218 | * A.1.2. Old Packets | ||
| 219 | * | ||
| 220 | * When a packet with Sequence Number S arrives, and | ||
| 221 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
| 222 | * for the byte corresponding to S. (Indexing structures | ||
| 223 | * could reduce the complexity of this scan.) | ||
| 224 | */ | ||
| 225 | u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); | ||
| 226 | unsigned int index = av->dccpav_buf_head; | ||
| 227 | |||
| 228 | while (1) { | ||
| 229 | const u8 len = dccp_ackvec_len(av, index); | ||
| 230 | const u8 state = dccp_ackvec_state(av, index); | ||
| 231 | /* | ||
| 232 | * valid packets not yet in dccpav_buf have a reserved | ||
| 233 | * entry, with a len equal to 0. | ||
| 234 | */ | ||
| 235 | if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && | ||
| 236 | len == 0 && delta == 0) { /* Found our | ||
| 237 | reserved seat! */ | ||
| 238 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
| 239 | (unsigned long long)ackno); | ||
| 240 | av->dccpav_buf[index] = state; | ||
| 241 | goto out; | ||
| 242 | } | ||
| 243 | /* len == 0 means one packet */ | ||
| 244 | if (delta < len + 1) | ||
| 245 | goto out_duplicate; | ||
| 246 | |||
| 247 | delta -= len + 1; | ||
| 248 | if (++index == av->dccpav_buf_len) | ||
| 249 | index = 0; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | av->dccpav_buf_ackno = ackno; | ||
| 254 | dccp_timestamp(sk, &av->dccpav_time); | ||
| 255 | out: | ||
| 256 | dccp_pr_debug(""); | ||
| 257 | return 0; | ||
| 258 | |||
| 259 | out_duplicate: | ||
| 260 | /* Duplicate packet */ | ||
| 261 | dccp_pr_debug("Received a dup or already considered lost " | ||
| 262 | "packet: %llu\n", (unsigned long long)ackno); | ||
| 263 | return -EILSEQ; | ||
| 264 | } | ||
| 265 | |||
| 266 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 267 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) | ||
| 268 | { | ||
| 269 | if (!dccp_debug) | ||
| 270 | return; | ||
| 271 | |||
| 272 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
| 273 | (unsigned long long)ackno); | ||
| 274 | |||
| 275 | while (len--) { | ||
| 276 | const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; | ||
| 277 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
| 278 | |||
| 279 | printk("%d,%d|", state, rl); | ||
| 280 | ++vector; | ||
| 281 | } | ||
| 282 | |||
| 283 | printk("\n"); | ||
| 284 | } | ||
| 285 | |||
| 286 | void dccp_ackvec_print(const struct dccp_ackvec *av) | ||
| 287 | { | ||
| 288 | dccp_ackvector_print(av->dccpav_buf_ackno, | ||
| 289 | av->dccpav_buf + av->dccpav_buf_head, | ||
| 290 | av->dccpav_vec_len); | ||
| 291 | } | ||
| 292 | #endif | ||
| 293 | |||
| 294 | static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) | ||
| 295 | { | ||
| 296 | /* | ||
| 297 | * As we're keeping track of the ack vector size (dccpav_vec_len) and | ||
| 298 | * the sent ack vector size (dccpav_sent_len) we don't need | ||
| 299 | * dccpav_buf_tail at all, but keep this code here as in the future | ||
| 300 | * we'll implement a vector of ack records, as suggested in | ||
| 301 | * draft-ietf-dccp-spec-11.txt Appendix A. -acme | ||
| 302 | */ | ||
| 303 | #if 0 | ||
| 304 | av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; | ||
| 305 | if (av->dccpav_buf_tail >= av->dccpav_vec_len) | ||
| 306 | av->dccpav_buf_tail -= av->dccpav_vec_len; | ||
| 307 | #endif | ||
| 308 | av->dccpav_vec_len -= av->dccpav_sent_len; | ||
| 309 | } | ||
| 310 | |||
| 311 | void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | ||
| 312 | const u64 ackno) | ||
| 313 | { | ||
| 314 | /* Check if we actually sent an ACK vector */ | ||
| 315 | if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 316 | return; | ||
| 317 | |||
| 318 | if (ackno == av->dccpav_ack_seqno) { | ||
| 319 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 320 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 321 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
| 322 | "CLIENT rx ack: " : "server rx ack: "; | ||
| 323 | #endif | ||
| 324 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
| 325 | "ack_ackno=%llu, ACKED!\n", | ||
| 326 | debug_prefix, 1, | ||
| 327 | (unsigned long long)av->dccpav_ack_seqno, | ||
| 328 | (unsigned long long)av->dccpav_ack_ackno); | ||
| 329 | dccp_ackvec_trow_away_ack_record(av); | ||
| 330 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | ||
| 335 | struct sock *sk, u64 ackno, | ||
| 336 | const unsigned char len, | ||
| 337 | const unsigned char *vector) | ||
| 338 | { | ||
| 339 | unsigned char i; | ||
| 340 | |||
| 341 | /* Check if we actually sent an ACK vector */ | ||
| 342 | if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 343 | return; | ||
| 344 | /* | ||
| 345 | * We're in the receiver half connection, so if the received an ACK | ||
| 346 | * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're | ||
| 347 | * not interested. | ||
| 348 | * | ||
| 349 | * Extra explanation with example: | ||
| 350 | * | ||
| 351 | * if we received an ACK vector with ackno 50, it can only be acking | ||
| 352 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
| 353 | */ | ||
| 354 | /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ | ||
| 355 | if (before48(ackno, av->dccpav_ack_seqno)) { | ||
| 356 | /* dccp_pr_debug_cat("yes\n"); */ | ||
| 357 | return; | ||
| 358 | } | ||
| 359 | /* dccp_pr_debug_cat("no\n"); */ | ||
| 360 | |||
| 361 | i = len; | ||
| 362 | while (i--) { | ||
| 363 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
| 364 | u64 ackno_end_rl; | ||
| 365 | |||
| 366 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
| 367 | |||
| 368 | /* | ||
| 369 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
| 370 | * av->dccpav_ack_seqno, ackno); | ||
| 371 | */ | ||
| 372 | if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { | ||
| 373 | const u8 state = (*vector & | ||
| 374 | DCCP_ACKVEC_STATE_MASK) >> 6; | ||
| 375 | /* dccp_pr_debug_cat("yes\n"); */ | ||
| 376 | |||
| 377 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | ||
| 378 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 379 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 380 | const char *debug_prefix = | ||
| 381 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
| 382 | "CLIENT rx ack: " : "server rx ack: "; | ||
| 383 | #endif | ||
| 384 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
| 385 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
| 386 | "ACKED!\n", | ||
| 387 | debug_prefix, len, | ||
| 388 | (unsigned long long) | ||
| 389 | av->dccpav_ack_seqno, | ||
| 390 | (unsigned long long) | ||
| 391 | av->dccpav_ack_ackno); | ||
| 392 | dccp_ackvec_trow_away_ack_record(av); | ||
| 393 | } | ||
| 394 | /* | ||
| 395 | * If dccpav_ack_seqno was not received, no problem | ||
| 396 | * we'll send another ACK vector. | ||
| 397 | */ | ||
| 398 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 399 | break; | ||
| 400 | } | ||
| 401 | /* dccp_pr_debug_cat("no\n"); */ | ||
| 402 | |||
| 403 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
| 404 | ++vector; | ||
| 405 | } | ||
| 406 | } | ||
| 407 | |||
| 408 | int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
| 409 | const u8 opt, const u8 *value, const u8 len) | ||
| 410 | { | ||
| 411 | if (len > DCCP_MAX_ACKVEC_LEN) | ||
| 412 | return -1; | ||
| 413 | |||
| 414 | /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ | ||
| 415 | dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, | ||
| 416 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 417 | len, value); | ||
| 418 | return 0; | ||
| 419 | } | ||
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h new file mode 100644 index 000000000000..8ca51c9191f7 --- /dev/null +++ b/net/dccp/ackvec.h | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | #ifndef _ACKVEC_H | ||
| 2 | #define _ACKVEC_H | ||
| 3 | /* | ||
| 4 | * net/dccp/ackvec.h | ||
| 5 | * | ||
| 6 | * An implementation of the DCCP protocol | ||
| 7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify it | ||
| 10 | * under the terms of the GNU General Public License version 2 as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/config.h> | ||
| 15 | #include <linux/compiler.h> | ||
| 16 | #include <linux/time.h> | ||
| 17 | #include <linux/types.h> | ||
| 18 | |||
| 19 | /* Read about the ECN nonce to see why it is 253 */ | ||
| 20 | #define DCCP_MAX_ACKVEC_LEN 253 | ||
| 21 | |||
| 22 | #define DCCP_ACKVEC_STATE_RECEIVED 0 | ||
| 23 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) | ||
| 24 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) | ||
| 25 | |||
| 26 | #define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ | ||
| 27 | #define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ | ||
| 28 | |||
| 29 | /** struct dccp_ackvec - ack vector | ||
| 30 | * | ||
| 31 | * This data structure is the one defined in the DCCP draft | ||
| 32 | * Appendix A. | ||
| 33 | * | ||
| 34 | * @dccpav_buf_head - circular buffer head | ||
| 35 | * @dccpav_buf_tail - circular buffer tail | ||
| 36 | * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
| 37 | * buffer (i.e. %dccpav_buf_head) | ||
| 38 | * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
| 39 | * by the buffer with State 0 | ||
| 40 | * | ||
| 41 | * Additionally, the HC-Receiver must keep some information about the | ||
| 42 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
| 43 | * Ack Vector, it remembers four variables: | ||
| 44 | * | ||
| 45 | * @dccpav_ack_seqno - the Sequence Number used for the packet | ||
| 46 | * (HC-Receiver seqno) | ||
| 47 | * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
| 48 | * @dccpav_ack_ackno - the Acknowledgement Number used for the packet | ||
| 49 | * (HC-Sender seqno) | ||
| 50 | * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
| 51 | * | ||
| 52 | * @dccpav_buf_len - circular buffer length | ||
| 53 | * @dccpav_time - the time in usecs | ||
| 54 | * @dccpav_buf - circular buffer of acknowledgeable packets | ||
| 55 | */ | ||
| 56 | struct dccp_ackvec { | ||
| 57 | unsigned int dccpav_buf_head; | ||
| 58 | unsigned int dccpav_buf_tail; | ||
| 59 | u64 dccpav_buf_ackno; | ||
| 60 | u64 dccpav_ack_seqno; | ||
| 61 | u64 dccpav_ack_ackno; | ||
| 62 | unsigned int dccpav_ack_ptr; | ||
| 63 | unsigned int dccpav_sent_len; | ||
| 64 | unsigned int dccpav_vec_len; | ||
| 65 | unsigned int dccpav_buf_len; | ||
| 66 | struct timeval dccpav_time; | ||
| 67 | u8 dccpav_buf_nonce; | ||
| 68 | u8 dccpav_ack_nonce; | ||
| 69 | u8 dccpav_buf[0]; | ||
| 70 | }; | ||
| 71 | |||
| 72 | struct sock; | ||
| 73 | struct sk_buff; | ||
| 74 | |||
| 75 | #ifdef CONFIG_IP_DCCP_ACKVEC | ||
| 76 | extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, | ||
| 77 | const unsigned int __nocast priority); | ||
| 78 | extern void dccp_ackvec_free(struct dccp_ackvec *av); | ||
| 79 | |||
| 80 | extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
| 81 | const u64 ackno, const u8 state); | ||
| 82 | |||
| 83 | extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | ||
| 84 | struct sock *sk, const u64 ackno); | ||
| 85 | extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
| 86 | const u8 opt, const u8 *value, const u8 len); | ||
| 87 | |||
| 88 | extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | ||
| 89 | |||
| 90 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
| 91 | { | ||
| 92 | return av->dccpav_sent_len != av->dccpav_vec_len; | ||
| 93 | } | ||
| 94 | #else /* CONFIG_IP_DCCP_ACKVEC */ | ||
| 95 | static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, | ||
| 96 | const unsigned int __nocast priority) | ||
| 97 | { | ||
| 98 | return NULL; | ||
| 99 | } | ||
| 100 | |||
| 101 | static inline void dccp_ackvec_free(struct dccp_ackvec *av) | ||
| 102 | { | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | ||
| 106 | const u64 ackno, const u8 state) | ||
| 107 | { | ||
| 108 | return -1; | ||
| 109 | } | ||
| 110 | |||
| 111 | static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | ||
| 112 | struct sock *sk, const u64 ackno) | ||
| 113 | { | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
| 117 | const u8 opt, const u8 *value, const u8 len) | ||
| 118 | { | ||
| 119 | return -1; | ||
| 120 | } | ||
| 121 | |||
| 122 | static inline int dccp_insert_option_ackvec(const struct sock *sk, | ||
| 123 | const struct sk_buff *skb) | ||
| 124 | { | ||
| 125 | return -1; | ||
| 126 | } | ||
| 127 | |||
| 128 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
| 129 | { | ||
| 130 | return 0; | ||
| 131 | } | ||
| 132 | #endif /* CONFIG_IP_DCCP_ACKVEC */ | ||
| 133 | #endif /* _ACKVEC_H */ | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 962f1e9e2f7e..21e55142dcd3 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | */ | 14 | */ |
| 15 | 15 | ||
| 16 | #include <net/sock.h> | 16 | #include <net/sock.h> |
| 17 | #include <linux/compiler.h> | ||
| 17 | #include <linux/dccp.h> | 18 | #include <linux/dccp.h> |
| 18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
| 19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
| @@ -54,6 +55,14 @@ struct ccid { | |||
| 54 | struct tcp_info *info); | 55 | struct tcp_info *info); |
| 55 | void (*ccid_hc_tx_get_info)(struct sock *sk, | 56 | void (*ccid_hc_tx_get_info)(struct sock *sk, |
| 56 | struct tcp_info *info); | 57 | struct tcp_info *info); |
| 58 | int (*ccid_hc_rx_getsockopt)(struct sock *sk, | ||
| 59 | const int optname, int len, | ||
| 60 | u32 __user *optval, | ||
| 61 | int __user *optlen); | ||
| 62 | int (*ccid_hc_tx_getsockopt)(struct sock *sk, | ||
| 63 | const int optname, int len, | ||
| 64 | u32 __user *optval, | ||
| 65 | int __user *optlen); | ||
| 57 | }; | 66 | }; |
| 58 | 67 | ||
| 59 | extern int ccid_register(struct ccid *ccid); | 68 | extern int ccid_register(struct ccid *ccid); |
| @@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, | |||
| 177 | if (ccid->ccid_hc_tx_get_info != NULL) | 186 | if (ccid->ccid_hc_tx_get_info != NULL) |
| 178 | ccid->ccid_hc_tx_get_info(sk, info); | 187 | ccid->ccid_hc_tx_get_info(sk, info); |
| 179 | } | 188 | } |
| 189 | |||
| 190 | static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, | ||
| 191 | const int optname, int len, | ||
| 192 | u32 __user *optval, int __user *optlen) | ||
| 193 | { | ||
| 194 | int rc = -ENOPROTOOPT; | ||
| 195 | if (ccid->ccid_hc_rx_getsockopt != NULL) | ||
| 196 | rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, | ||
| 197 | optval, optlen); | ||
| 198 | return rc; | ||
| 199 | } | ||
| 200 | |||
| 201 | static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, | ||
| 202 | const int optname, int len, | ||
| 203 | u32 __user *optval, int __user *optlen) | ||
| 204 | { | ||
| 205 | int rc = -ENOPROTOOPT; | ||
| 206 | if (ccid->ccid_hc_tx_getsockopt != NULL) | ||
| 207 | rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, | ||
| 208 | optval, optlen); | ||
| 209 | return rc; | ||
| 210 | } | ||
| 180 | #endif /* _CCID_H */ | 211 | #endif /* _CCID_H */ |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 38aa84986118..aa68e0ab274d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
| @@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | |||
| 1120 | info->tcpi_rtt = hctx->ccid3hctx_rtt; | 1120 | info->tcpi_rtt = hctx->ccid3hctx_rtt; |
| 1121 | } | 1121 | } |
| 1122 | 1122 | ||
| 1123 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | ||
| 1124 | u32 __user *optval, int __user *optlen) | ||
| 1125 | { | ||
| 1126 | const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | ||
| 1127 | const void *val; | ||
| 1128 | |||
| 1129 | /* Listen socks doesn't have a private CCID block */ | ||
| 1130 | if (sk->sk_state == DCCP_LISTEN) | ||
| 1131 | return -EINVAL; | ||
| 1132 | |||
| 1133 | switch (optname) { | ||
| 1134 | case DCCP_SOCKOPT_CCID_RX_INFO: | ||
| 1135 | if (len < sizeof(hcrx->ccid3hcrx_tfrc)) | ||
| 1136 | return -EINVAL; | ||
| 1137 | len = sizeof(hcrx->ccid3hcrx_tfrc); | ||
| 1138 | val = &hcrx->ccid3hcrx_tfrc; | ||
| 1139 | break; | ||
| 1140 | default: | ||
| 1141 | return -ENOPROTOOPT; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | ||
| 1145 | return -EFAULT; | ||
| 1146 | |||
| 1147 | return 0; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | ||
| 1151 | u32 __user *optval, int __user *optlen) | ||
| 1152 | { | ||
| 1153 | const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); | ||
| 1154 | const void *val; | ||
| 1155 | |||
| 1156 | /* Listen socks doesn't have a private CCID block */ | ||
| 1157 | if (sk->sk_state == DCCP_LISTEN) | ||
| 1158 | return -EINVAL; | ||
| 1159 | |||
| 1160 | switch (optname) { | ||
| 1161 | case DCCP_SOCKOPT_CCID_TX_INFO: | ||
| 1162 | if (len < sizeof(hctx->ccid3hctx_tfrc)) | ||
| 1163 | return -EINVAL; | ||
| 1164 | len = sizeof(hctx->ccid3hctx_tfrc); | ||
| 1165 | val = &hctx->ccid3hctx_tfrc; | ||
| 1166 | break; | ||
| 1167 | default: | ||
| 1168 | return -ENOPROTOOPT; | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | if (put_user(len, optlen) || copy_to_user(optval, val, len)) | ||
| 1172 | return -EFAULT; | ||
| 1173 | |||
| 1174 | return 0; | ||
| 1175 | } | ||
| 1176 | |||
| 1123 | static struct ccid ccid3 = { | 1177 | static struct ccid ccid3 = { |
| 1124 | .ccid_id = 3, | 1178 | .ccid_id = 3, |
| 1125 | .ccid_name = "ccid3", | 1179 | .ccid_name = "ccid3", |
| @@ -1139,6 +1193,8 @@ static struct ccid ccid3 = { | |||
| 1139 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | 1193 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, |
| 1140 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, | 1194 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, |
| 1141 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, | 1195 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, |
| 1196 | .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, | ||
| 1197 | .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, | ||
| 1142 | }; | 1198 | }; |
| 1143 | 1199 | ||
| 1144 | module_param(ccid3_debug, int, 0444); | 1200 | module_param(ccid3_debug, int, 0444); |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index eb248778eea3..0bde4583d091 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
| 41 | #include <linux/time.h> | 41 | #include <linux/time.h> |
| 42 | #include <linux/types.h> | 42 | #include <linux/types.h> |
| 43 | #include <linux/tfrc.h> | ||
| 43 | 44 | ||
| 44 | #define TFRC_MIN_PACKET_SIZE 16 | 45 | #define TFRC_MIN_PACKET_SIZE 16 |
| 45 | #define TFRC_STD_PACKET_SIZE 256 | 46 | #define TFRC_STD_PACKET_SIZE 256 |
| @@ -93,12 +94,15 @@ struct ccid3_options_received { | |||
| 93 | * @ccid3hctx_hist - Packet history | 94 | * @ccid3hctx_hist - Packet history |
| 94 | */ | 95 | */ |
| 95 | struct ccid3_hc_tx_sock { | 96 | struct ccid3_hc_tx_sock { |
| 96 | u32 ccid3hctx_x; | 97 | struct tfrc_tx_info ccid3hctx_tfrc; |
| 97 | u32 ccid3hctx_x_recv; | 98 | #define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x |
| 98 | u32 ccid3hctx_x_calc; | 99 | #define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv |
| 100 | #define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc | ||
| 101 | #define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt | ||
| 102 | #define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p | ||
| 103 | #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto | ||
| 104 | #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi | ||
| 99 | u16 ccid3hctx_s; | 105 | u16 ccid3hctx_s; |
| 100 | u32 ccid3hctx_rtt; | ||
| 101 | u32 ccid3hctx_p; | ||
| 102 | u8 ccid3hctx_state; | 106 | u8 ccid3hctx_state; |
| 103 | u8 ccid3hctx_last_win_count; | 107 | u8 ccid3hctx_last_win_count; |
| 104 | u8 ccid3hctx_idle; | 108 | u8 ccid3hctx_idle; |
| @@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock { | |||
| 106 | struct timer_list ccid3hctx_no_feedback_timer; | 110 | struct timer_list ccid3hctx_no_feedback_timer; |
| 107 | struct timeval ccid3hctx_t_ld; | 111 | struct timeval ccid3hctx_t_ld; |
| 108 | struct timeval ccid3hctx_t_nom; | 112 | struct timeval ccid3hctx_t_nom; |
| 109 | u32 ccid3hctx_t_rto; | ||
| 110 | u32 ccid3hctx_t_ipi; | ||
| 111 | u32 ccid3hctx_delta; | 113 | u32 ccid3hctx_delta; |
| 112 | struct list_head ccid3hctx_hist; | 114 | struct list_head ccid3hctx_hist; |
| 113 | struct ccid3_options_received ccid3hctx_options_received; | 115 | struct ccid3_options_received ccid3hctx_options_received; |
| 114 | }; | 116 | }; |
| 115 | 117 | ||
| 116 | struct ccid3_hc_rx_sock { | 118 | struct ccid3_hc_rx_sock { |
| 119 | struct tfrc_rx_info ccid3hcrx_tfrc; | ||
| 120 | #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv | ||
| 121 | #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt | ||
| 122 | #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p | ||
| 117 | u64 ccid3hcrx_seqno_last_counter:48, | 123 | u64 ccid3hcrx_seqno_last_counter:48, |
| 118 | ccid3hcrx_state:8, | 124 | ccid3hcrx_state:8, |
| 119 | ccid3hcrx_last_counter:4; | 125 | ccid3hcrx_last_counter:4; |
| 120 | u32 ccid3hcrx_rtt; | ||
| 121 | u32 ccid3hcrx_p; | ||
| 122 | u32 ccid3hcrx_bytes_recv; | 126 | u32 ccid3hcrx_bytes_recv; |
| 123 | struct timeval ccid3hcrx_tstamp_last_feedback; | 127 | struct timeval ccid3hcrx_tstamp_last_feedback; |
| 124 | struct timeval ccid3hcrx_tstamp_last_ack; | 128 | struct timeval ccid3hcrx_tstamp_last_ack; |
| @@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock { | |||
| 127 | u16 ccid3hcrx_s; | 131 | u16 ccid3hcrx_s; |
| 128 | u32 ccid3hcrx_pinv; | 132 | u32 ccid3hcrx_pinv; |
| 129 | u32 ccid3hcrx_elapsed_time; | 133 | u32 ccid3hcrx_elapsed_time; |
| 130 | u32 ccid3hcrx_x_recv; | ||
| 131 | }; | 134 | }; |
| 132 | 135 | ||
| 133 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | 136 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 95c4630b3b18..5871c027f9dc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <net/snmp.h> | 17 | #include <net/snmp.h> |
| 18 | #include <net/sock.h> | 18 | #include <net/sock.h> |
| 19 | #include <net/tcp.h> | 19 | #include <net/tcp.h> |
| 20 | #include "ackvec.h" | ||
| 20 | 21 | ||
| 21 | #ifdef CONFIG_IP_DCCP_DEBUG | 22 | #ifdef CONFIG_IP_DCCP_DEBUG |
| 22 | extern int dccp_debug; | 23 | extern int dccp_debug; |
| @@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk, | |||
| 258 | extern void dccp_send_close(struct sock *sk, const int active); | 259 | extern void dccp_send_close(struct sock *sk, const int active); |
| 259 | 260 | ||
| 260 | struct dccp_skb_cb { | 261 | struct dccp_skb_cb { |
| 261 | __u8 dccpd_type; | 262 | __u8 dccpd_type:4; |
| 262 | __u8 dccpd_reset_code; | 263 | __u8 dccpd_ccval:4; |
| 263 | __u8 dccpd_service; | 264 | __u8 dccpd_reset_code; |
| 264 | __u8 dccpd_ccval; | 265 | __u16 dccpd_opt_len; |
| 265 | __u64 dccpd_seq; | 266 | __u64 dccpd_seq; |
| 266 | __u64 dccpd_ack_seq; | 267 | __u64 dccpd_ack_seq; |
| 267 | int dccpd_opt_len; | ||
| 268 | }; | 268 | }; |
| 269 | 269 | ||
| 270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | 270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) |
| @@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) | |||
| 359 | (dp->dccps_gss - | 359 | (dp->dccps_gss - |
| 360 | dp->dccps_options.dccpo_sequence_window + 1)); | 360 | dp->dccps_options.dccpo_sequence_window + 1)); |
| 361 | } | 361 | } |
| 362 | |||
| 363 | static inline int dccp_ack_pending(const struct sock *sk) | ||
| 364 | { | ||
| 365 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 366 | return dp->dccps_timestamp_echo != 0 || | ||
| 367 | #ifdef CONFIG_IP_DCCP_ACKVEC | ||
| 368 | (dp->dccps_options.dccpo_send_ack_vector && | ||
| 369 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || | ||
| 370 | #endif | ||
| 371 | inet_csk_ack_scheduled(sk); | ||
| 372 | } | ||
| 362 | 373 | ||
| 363 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | 374 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); |
| 364 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | 375 | extern void dccp_insert_option_elapsed_time(struct sock *sk, |
| @@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | |||
| 372 | 383 | ||
| 373 | extern struct socket *dccp_ctl_socket; | 384 | extern struct socket *dccp_ctl_socket; |
| 374 | 385 | ||
| 375 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
| 376 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
| 377 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
| 378 | |||
| 379 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
| 380 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
| 381 | |||
| 382 | /** struct dccp_ackpkts - acknowledgeable packets | ||
| 383 | * | ||
| 384 | * This data structure is the one defined in the DCCP draft | ||
| 385 | * Appendix A. | ||
| 386 | * | ||
| 387 | * @dccpap_buf_head - circular buffer head | ||
| 388 | * @dccpap_buf_tail - circular buffer tail | ||
| 389 | * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
| 390 | * buffer (i.e. %dccpap_buf_head) | ||
| 391 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
| 392 | * by the buffer with State 0 | ||
| 393 | * | ||
| 394 | * Additionally, the HC-Receiver must keep some information about the | ||
| 395 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
| 396 | * Ack Vector, it remembers four variables: | ||
| 397 | * | ||
| 398 | * @dccpap_ack_seqno - the Sequence Number used for the packet | ||
| 399 | * (HC-Receiver seqno) | ||
| 400 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
| 401 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet | ||
| 402 | * (HC-Sender seqno) | ||
| 403 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
| 404 | * | ||
| 405 | * @dccpap_buf_len - circular buffer length | ||
| 406 | * @dccpap_time - the time in usecs | ||
| 407 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
| 408 | */ | ||
| 409 | struct dccp_ackpkts { | ||
| 410 | unsigned int dccpap_buf_head; | ||
| 411 | unsigned int dccpap_buf_tail; | ||
| 412 | u64 dccpap_buf_ackno; | ||
| 413 | u64 dccpap_ack_seqno; | ||
| 414 | u64 dccpap_ack_ackno; | ||
| 415 | unsigned int dccpap_ack_ptr; | ||
| 416 | unsigned int dccpap_buf_vector_len; | ||
| 417 | unsigned int dccpap_ack_vector_len; | ||
| 418 | unsigned int dccpap_buf_len; | ||
| 419 | struct timeval dccpap_time; | ||
| 420 | u8 dccpap_buf_nonce; | ||
| 421 | u8 dccpap_ack_nonce; | ||
| 422 | u8 dccpap_buf[0]; | ||
| 423 | }; | ||
| 424 | |||
| 425 | extern struct dccp_ackpkts * | ||
| 426 | dccp_ackpkts_alloc(unsigned int len, | ||
| 427 | const unsigned int __nocast priority); | ||
| 428 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
| 429 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, | ||
| 430 | u64 ackno, u8 state); | ||
| 431 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
| 432 | struct sock *sk, u64 ackno); | ||
| 433 | |||
| 434 | extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); | 386 | extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); |
| 435 | 387 | ||
| 436 | static inline suseconds_t timeval_usecs(const struct timeval *tv) | 388 | static inline suseconds_t timeval_usecs(const struct timeval *tv) |
| @@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv, | |||
| 471 | } | 423 | } |
| 472 | } | 424 | } |
| 473 | 425 | ||
| 474 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 475 | extern void dccp_ackvector_print(const u64 ackno, | ||
| 476 | const unsigned char *vector, int len); | ||
| 477 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
| 478 | #else | ||
| 479 | static inline void dccp_ackvector_print(const u64 ackno, | ||
| 480 | const unsigned char *vector, | ||
| 481 | int len) { } | ||
| 482 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
| 483 | #endif | ||
| 484 | |||
| 485 | #endif /* _DCCP_H */ | 426 | #endif /* _DCCP_H */ |
diff --git a/net/dccp/input.c b/net/dccp/input.c index c74034cf7ede..1b6b2cb12376 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <net/sock.h> | 17 | #include <net/sock.h> |
| 18 | 18 | ||
| 19 | #include "ackvec.h" | ||
| 19 | #include "ccid.h" | 20 | #include "ccid.h" |
| 20 | #include "dccp.h" | 21 | #include "dccp.h" |
| 21 | 22 | ||
| @@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | |||
| 60 | struct dccp_sock *dp = dccp_sk(sk); | 61 | struct dccp_sock *dp = dccp_sk(sk); |
| 61 | 62 | ||
| 62 | if (dp->dccps_options.dccpo_send_ack_vector) | 63 | if (dp->dccps_options.dccpo_send_ack_vector) |
| 63 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | 64 | dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, |
| 64 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 65 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | 68 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) |
| @@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 164 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 165 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
| 165 | dccp_event_ack_recv(sk, skb); | 166 | dccp_event_ack_recv(sk, skb); |
| 166 | 167 | ||
| 167 | /* | 168 | if (dp->dccps_options.dccpo_send_ack_vector && |
| 168 | * FIXME: check ECN to see if we should use | 169 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, |
| 169 | * DCCP_ACKPKTS_STATE_ECN_MARKED | 170 | DCCP_SKB_CB(skb)->dccpd_seq, |
| 170 | */ | 171 | DCCP_ACKVEC_STATE_RECEIVED)) |
| 171 | if (dp->dccps_options.dccpo_send_ack_vector) { | 172 | goto discard; |
| 172 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 173 | |||
| 174 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, | ||
| 175 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
| 176 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
| 177 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " | ||
| 178 | "packets buffer full!\n"); | ||
| 179 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 180 | inet_csk_schedule_ack(sk); | ||
| 181 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
| 182 | TCP_DELACK_MIN, | ||
| 183 | DCCP_RTO_MAX); | ||
| 184 | goto discard; | ||
| 185 | } | ||
| 186 | |||
| 187 | /* | ||
| 188 | * FIXME: this activation is probably wrong, have to study more | ||
| 189 | * TCP delack machinery and how it fits into DCCP draft, but | ||
| 190 | * for now it kinda "works" 8) | ||
| 191 | */ | ||
| 192 | if (!inet_csk_ack_scheduled(sk)) { | ||
| 193 | inet_csk_schedule_ack(sk); | ||
| 194 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, | ||
| 195 | DCCP_RTO_MAX); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | 173 | ||
| 199 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | 174 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); |
| 200 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | 175 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); |
| @@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
| 384 | } | 359 | } |
| 385 | 360 | ||
| 386 | out_invalid_packet: | 361 | out_invalid_packet: |
| 387 | return 1; /* dccp_v4_do_rcv will send a reset, but... | 362 | /* dccp_v4_do_rcv will send a reset */ |
| 388 | FIXME: the reset code should be | 363 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; |
| 389 | DCCP_RESET_CODE_PACKET_ERROR */ | 364 | return 1; |
| 390 | } | 365 | } |
| 391 | 366 | ||
| 392 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | 367 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, |
| @@ -433,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 433 | struct dccp_hdr *dh, unsigned len) | 408 | struct dccp_hdr *dh, unsigned len) |
| 434 | { | 409 | { |
| 435 | struct dccp_sock *dp = dccp_sk(sk); | 410 | struct dccp_sock *dp = dccp_sk(sk); |
| 411 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
| 436 | const int old_state = sk->sk_state; | 412 | const int old_state = sk->sk_state; |
| 437 | int queued = 0; | 413 | int queued = 0; |
| 438 | 414 | ||
| @@ -473,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 473 | if (dh->dccph_type == DCCP_PKT_RESET) | 449 | if (dh->dccph_type == DCCP_PKT_RESET) |
| 474 | goto discard; | 450 | goto discard; |
| 475 | 451 | ||
| 476 | /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ | 452 | /* Caller (dccp_v4_do_rcv) will send Reset */ |
| 453 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 477 | return 1; | 454 | return 1; |
| 478 | } | 455 | } |
| 479 | 456 | ||
| @@ -487,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 487 | if (dccp_parse_options(sk, skb)) | 464 | if (dccp_parse_options(sk, skb)) |
| 488 | goto discard; | 465 | goto discard; |
| 489 | 466 | ||
| 490 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != | 467 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
| 491 | DCCP_PKT_WITHOUT_ACK_SEQ) | ||
| 492 | dccp_event_ack_recv(sk, skb); | 468 | dccp_event_ack_recv(sk, skb); |
| 493 | 469 | ||
| 494 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | 470 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); |
| 495 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | 471 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); |
| 496 | 472 | ||
| 497 | /* | 473 | if (dp->dccps_options.dccpo_send_ack_vector && |
| 498 | * FIXME: check ECN to see if we should use | 474 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, |
| 499 | * DCCP_ACKPKTS_STATE_ECN_MARKED | 475 | DCCP_SKB_CB(skb)->dccpd_seq, |
| 500 | */ | 476 | DCCP_ACKVEC_STATE_RECEIVED)) |
| 501 | if (dp->dccps_options.dccpo_send_ack_vector) { | 477 | goto discard; |
| 502 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, | ||
| 503 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
| 504 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
| 505 | goto discard; | ||
| 506 | /* | ||
| 507 | * FIXME: this activation is probably wrong, have to | ||
| 508 | * study more TCP delack machinery and how it fits into | ||
| 509 | * DCCP draft, but for now it kinda "works" 8) | ||
| 510 | */ | ||
| 511 | if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == | ||
| 512 | DCCP_MAX_SEQNO + 1) && | ||
| 513 | !inet_csk_ack_scheduled(sk)) { | ||
| 514 | inet_csk_schedule_ack(sk); | ||
| 515 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
| 516 | TCP_DELACK_MIN, | ||
| 517 | DCCP_RTO_MAX); | ||
| 518 | } | ||
| 519 | } | ||
| 520 | } | 478 | } |
| 521 | 479 | ||
| 522 | /* | 480 | /* |
| @@ -551,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 551 | dh->dccph_type == DCCP_PKT_REQUEST) || | 509 | dh->dccph_type == DCCP_PKT_REQUEST) || |
| 552 | (sk->sk_state == DCCP_RESPOND && | 510 | (sk->sk_state == DCCP_RESPOND && |
| 553 | dh->dccph_type == DCCP_PKT_DATA)) { | 511 | dh->dccph_type == DCCP_PKT_DATA)) { |
| 554 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | 512 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); |
| 555 | DCCP_PKT_SYNC); | ||
| 556 | goto discard; | 513 | goto discard; |
| 557 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { | 514 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { |
| 558 | dccp_rcv_closereq(sk, skb); | 515 | dccp_rcv_closereq(sk, skb); |
| @@ -563,13 +520,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 563 | } | 520 | } |
| 564 | 521 | ||
| 565 | if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { | 522 | if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { |
| 566 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | 523 | dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); |
| 567 | DCCP_PKT_SYNCACK); | ||
| 568 | goto discard; | 524 | goto discard; |
| 569 | } | 525 | } |
| 570 | 526 | ||
| 571 | switch (sk->sk_state) { | 527 | switch (sk->sk_state) { |
| 572 | case DCCP_CLOSED: | 528 | case DCCP_CLOSED: |
| 529 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 573 | return 1; | 530 | return 1; |
| 574 | 531 | ||
| 575 | case DCCP_REQUESTING: | 532 | case DCCP_REQUESTING: |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2afaa464e7f0..40fe6afacde6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <net/tcp_states.h> | 23 | #include <net/tcp_states.h> |
| 24 | #include <net/xfrm.h> | 24 | #include <net/xfrm.h> |
| 25 | 25 | ||
| 26 | #include "ackvec.h" | ||
| 26 | #include "ccid.h" | 27 | #include "ccid.h" |
| 27 | #include "dccp.h" | 28 | #include "dccp.h" |
| 28 | 29 | ||
| @@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | |||
| 246 | 247 | ||
| 247 | dp->dccps_role = DCCP_ROLE_CLIENT; | 248 | dp->dccps_role = DCCP_ROLE_CLIENT; |
| 248 | 249 | ||
| 250 | if (dccp_service_not_initialized(sk)) | ||
| 251 | return -EPROTO; | ||
| 252 | |||
| 249 | if (addr_len < sizeof(struct sockaddr_in)) | 253 | if (addr_len < sizeof(struct sockaddr_in)) |
| 250 | return -EINVAL; | 254 | return -EINVAL; |
| 251 | 255 | ||
| @@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, | |||
| 661 | dccp_hdr(skb)->dccph_sport); | 665 | dccp_hdr(skb)->dccph_sport); |
| 662 | } | 666 | } |
| 663 | 667 | ||
| 668 | static inline int dccp_bad_service_code(const struct sock *sk, | ||
| 669 | const __u32 service) | ||
| 670 | { | ||
| 671 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 672 | |||
| 673 | if (dp->dccps_service == service) | ||
| 674 | return 0; | ||
| 675 | return !dccp_list_has_service(dp->dccps_service_list, service); | ||
| 676 | } | ||
| 677 | |||
| 664 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 678 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
| 665 | { | 679 | { |
| 666 | struct inet_request_sock *ireq; | 680 | struct inet_request_sock *ireq; |
| @@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 669 | struct dccp_request_sock *dreq; | 683 | struct dccp_request_sock *dreq; |
| 670 | const __u32 saddr = skb->nh.iph->saddr; | 684 | const __u32 saddr = skb->nh.iph->saddr; |
| 671 | const __u32 daddr = skb->nh.iph->daddr; | 685 | const __u32 daddr = skb->nh.iph->daddr; |
| 686 | const __u32 service = dccp_hdr_request(skb)->dccph_req_service; | ||
| 687 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
| 688 | __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
| 672 | struct dst_entry *dst = NULL; | 689 | struct dst_entry *dst = NULL; |
| 673 | 690 | ||
| 674 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | 691 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ |
| 675 | if (((struct rtable *)skb->dst)->rt_flags & | 692 | if (((struct rtable *)skb->dst)->rt_flags & |
| 676 | (RTCF_BROADCAST | RTCF_MULTICAST)) | 693 | (RTCF_BROADCAST | RTCF_MULTICAST)) { |
| 694 | reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 677 | goto drop; | 695 | goto drop; |
| 696 | } | ||
| 678 | 697 | ||
| 698 | if (dccp_bad_service_code(sk, service)) { | ||
| 699 | reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; | ||
| 700 | goto drop; | ||
| 701 | } | ||
| 679 | /* | 702 | /* |
| 680 | * TW buckets are converted to open requests without | 703 | * TW buckets are converted to open requests without |
| 681 | * limitations, they conserve resources and peer is | 704 | * limitations, they conserve resources and peer is |
| @@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 718 | * dccp_create_openreq_child. | 741 | * dccp_create_openreq_child. |
| 719 | */ | 742 | */ |
| 720 | dreq = dccp_rsk(req); | 743 | dreq = dccp_rsk(req); |
| 721 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | 744 | dreq->dreq_isr = dcb->dccpd_seq; |
| 722 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | 745 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); |
| 723 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | 746 | dreq->dreq_service = service; |
| 724 | 747 | ||
| 725 | if (dccp_v4_send_response(sk, req, dst)) | 748 | if (dccp_v4_send_response(sk, req, dst)) |
| 726 | goto drop_and_free; | 749 | goto drop_and_free; |
| @@ -735,6 +758,7 @@ drop_and_free: | |||
| 735 | __reqsk_free(req); | 758 | __reqsk_free(req); |
| 736 | drop: | 759 | drop: |
| 737 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | 760 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); |
| 761 | dcb->dccpd_reset_code = reset_code; | ||
| 738 | return -1; | 762 | return -1; |
| 739 | } | 763 | } |
| 740 | 764 | ||
| @@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1005 | return 0; | 1029 | return 0; |
| 1006 | 1030 | ||
| 1007 | reset: | 1031 | reset: |
| 1008 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
| 1009 | dccp_v4_ctl_send_reset(skb); | 1032 | dccp_v4_ctl_send_reset(skb); |
| 1010 | discard: | 1033 | discard: |
| 1011 | kfree_skb(skb); | 1034 | kfree_skb(skb); |
| @@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb) | |||
| 1090 | goto discard_it; | 1113 | goto discard_it; |
| 1091 | 1114 | ||
| 1092 | dh = dccp_hdr(skb); | 1115 | dh = dccp_hdr(skb); |
| 1093 | #if 0 | ||
| 1094 | /* | ||
| 1095 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
| 1096 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
| 1097 | * sends 10 DATA/DATAACK packets: | ||
| 1098 | * | ||
| 1099 | * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
| 1100 | * | ||
| 1101 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
| 1102 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets | ||
| 1103 | * with the same state | ||
| 1104 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
| 1105 | * | ||
| 1106 | * So... | ||
| 1107 | * | ||
| 1108 | * 281473596467422 was received | ||
| 1109 | * 281473596467421 was not received | ||
| 1110 | * 281473596467420 was received | ||
| 1111 | * 281473596467419 was not received | ||
| 1112 | * 281473596467418 was received | ||
| 1113 | * 281473596467417 was not received | ||
| 1114 | * 281473596467416 was received | ||
| 1115 | * 281473596467415 was not received | ||
| 1116 | * 281473596467414 was received | ||
| 1117 | * 281473596467413 was received (this one was the 3way handshake | ||
| 1118 | * RESPONSE) | ||
| 1119 | * | ||
| 1120 | */ | ||
| 1121 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
| 1122 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
| 1123 | static int discard = 0; | ||
| 1124 | 1116 | ||
| 1125 | if (discard) { | ||
| 1126 | discard = 0; | ||
| 1127 | goto discard_it; | ||
| 1128 | } | ||
| 1129 | discard = 1; | ||
| 1130 | } | ||
| 1131 | #endif | ||
| 1132 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | 1117 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); |
| 1133 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | 1118 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; |
| 1134 | 1119 | ||
| @@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
| 1242 | do_gettimeofday(&dp->dccps_epoch); | 1227 | do_gettimeofday(&dp->dccps_epoch); |
| 1243 | 1228 | ||
| 1244 | if (dp->dccps_options.dccpo_send_ack_vector) { | 1229 | if (dp->dccps_options.dccpo_send_ack_vector) { |
| 1245 | dp->dccps_hc_rx_ackpkts = | 1230 | dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, |
| 1246 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | 1231 | GFP_KERNEL); |
| 1247 | GFP_KERNEL); | 1232 | if (dp->dccps_hc_rx_ackvec == NULL) |
| 1248 | |||
| 1249 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
| 1250 | return -ENOMEM; | 1233 | return -ENOMEM; |
| 1251 | } | 1234 | } |
| 1252 | 1235 | ||
| @@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
| 1258 | * setsockopt(CCIDs-I-want/accept). -acme | 1241 | * setsockopt(CCIDs-I-want/accept). -acme |
| 1259 | */ | 1242 | */ |
| 1260 | if (likely(!dccp_ctl_socket_init)) { | 1243 | if (likely(!dccp_ctl_socket_init)) { |
| 1261 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | 1244 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, |
| 1262 | sk); | 1245 | sk); |
| 1263 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | 1246 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, |
| 1264 | sk); | 1247 | sk); |
| 1265 | if (dp->dccps_hc_rx_ccid == NULL || | 1248 | if (dp->dccps_hc_rx_ccid == NULL || |
| 1266 | dp->dccps_hc_tx_ccid == NULL) { | 1249 | dp->dccps_hc_tx_ccid == NULL) { |
| 1267 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | 1250 | ccid_exit(dp->dccps_hc_rx_ccid, sk); |
| 1268 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | 1251 | ccid_exit(dp->dccps_hc_tx_ccid, sk); |
| 1269 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | 1252 | if (dp->dccps_options.dccpo_send_ack_vector) { |
| 1270 | dp->dccps_hc_rx_ackpkts = NULL; | 1253 | dccp_ackvec_free(dp->dccps_hc_rx_ackvec); |
| 1254 | dp->dccps_hc_rx_ackvec = NULL; | ||
| 1255 | } | ||
| 1271 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | 1256 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; |
| 1272 | return -ENOMEM; | 1257 | return -ENOMEM; |
| 1273 | } | 1258 | } |
| @@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
| 1280 | sk->sk_write_space = dccp_write_space; | 1265 | sk->sk_write_space = dccp_write_space; |
| 1281 | dp->dccps_mss_cache = 536; | 1266 | dp->dccps_mss_cache = 536; |
| 1282 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 1267 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
| 1268 | dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; | ||
| 1283 | 1269 | ||
| 1284 | return 0; | 1270 | return 0; |
| 1285 | } | 1271 | } |
| @@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk) | |||
| 1301 | if (inet_csk(sk)->icsk_bind_hash != NULL) | 1287 | if (inet_csk(sk)->icsk_bind_hash != NULL) |
| 1302 | inet_put_port(&dccp_hashinfo, sk); | 1288 | inet_put_port(&dccp_hashinfo, sk); |
| 1303 | 1289 | ||
| 1290 | if (dp->dccps_service_list != NULL) { | ||
| 1291 | kfree(dp->dccps_service_list); | ||
| 1292 | dp->dccps_service_list = NULL; | ||
| 1293 | } | ||
| 1294 | |||
| 1304 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | 1295 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); |
| 1305 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | 1296 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); |
| 1306 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | 1297 | if (dp->dccps_options.dccpo_send_ack_vector) { |
| 1307 | dp->dccps_hc_rx_ackpkts = NULL; | 1298 | dccp_ackvec_free(dp->dccps_hc_rx_ackvec); |
| 1299 | dp->dccps_hc_rx_ackvec = NULL; | ||
| 1300 | } | ||
| 1308 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | 1301 | ccid_exit(dp->dccps_hc_rx_ccid, sk); |
| 1309 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | 1302 | ccid_exit(dp->dccps_hc_tx_ccid, sk); |
| 1310 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | 1303 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; |
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 18461bc04cbe..1393461898bb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <net/xfrm.h> | 19 | #include <net/xfrm.h> |
| 20 | #include <net/inet_timewait_sock.h> | 20 | #include <net/inet_timewait_sock.h> |
| 21 | 21 | ||
| 22 | #include "ackvec.h" | ||
| 22 | #include "ccid.h" | 23 | #include "ccid.h" |
| 23 | #include "dccp.h" | 24 | #include "dccp.h" |
| 24 | 25 | ||
| @@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
| 93 | struct inet_connection_sock *newicsk = inet_csk(sk); | 94 | struct inet_connection_sock *newicsk = inet_csk(sk); |
| 94 | struct dccp_sock *newdp = dccp_sk(newsk); | 95 | struct dccp_sock *newdp = dccp_sk(newsk); |
| 95 | 96 | ||
| 96 | newdp->dccps_hc_rx_ackpkts = NULL; | 97 | newdp->dccps_role = DCCP_ROLE_SERVER; |
| 97 | newdp->dccps_role = DCCP_ROLE_SERVER; | 98 | newdp->dccps_hc_rx_ackvec = NULL; |
| 98 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | 99 | newdp->dccps_service_list = NULL; |
| 100 | newdp->dccps_service = dreq->dreq_service; | ||
| 101 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
| 99 | do_gettimeofday(&newdp->dccps_epoch); | 102 | do_gettimeofday(&newdp->dccps_epoch); |
| 100 | 103 | ||
| 101 | if (newdp->dccps_options.dccpo_send_ack_vector) { | 104 | if (newdp->dccps_options.dccpo_send_ack_vector) { |
| 102 | newdp->dccps_hc_rx_ackpkts = | 105 | newdp->dccps_hc_rx_ackvec = |
| 103 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | 106 | dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, |
| 104 | GFP_ATOMIC); | 107 | GFP_ATOMIC); |
| 105 | /* | 108 | /* |
| 106 | * XXX: We're using the same CCIDs set on the parent, | 109 | * XXX: We're using the same CCIDs set on the parent, |
| 107 | * i.e. sk_clone copied the master sock and left the | 110 | * i.e. sk_clone copied the master sock and left the |
| 108 | * CCID pointers for this child, that is why we do the | 111 | * CCID pointers for this child, that is why we do the |
| 109 | * __ccid_get calls. | 112 | * __ccid_get calls. |
| 110 | */ | 113 | */ |
| 111 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | 114 | if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) |
| 112 | goto out_free; | 115 | goto out_free; |
| 113 | } | 116 | } |
| 114 | 117 | ||
| @@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
| 116 | newsk) != 0 || | 119 | newsk) != 0 || |
| 117 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, | 120 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, |
| 118 | newsk) != 0)) { | 121 | newsk) != 0)) { |
| 119 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | 122 | dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); |
| 120 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | 123 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); |
| 121 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | 124 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); |
| 122 | out_free: | 125 | out_free: |
diff --git a/net/dccp/options.c b/net/dccp/options.c index d4c4242d8dd7..0a76426c9aea 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
| @@ -18,19 +18,15 @@ | |||
| 18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
| 19 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
| 20 | 20 | ||
| 21 | #include "ackvec.h" | ||
| 21 | #include "ccid.h" | 22 | #include "ccid.h" |
| 22 | #include "dccp.h" | 23 | #include "dccp.h" |
| 23 | 24 | ||
| 24 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
| 25 | struct sock *sk, | ||
| 26 | const u64 ackno, | ||
| 27 | const unsigned char len, | ||
| 28 | const unsigned char *vector); | ||
| 29 | |||
| 30 | /* stores the default values for new connection. may be changed with sysctl */ | 25 | /* stores the default values for new connection. may be changed with sysctl */ |
| 31 | static const struct dccp_options dccpo_default_values = { | 26 | static const struct dccp_options dccpo_default_values = { |
| 32 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | 27 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, |
| 33 | .dccpo_ccid = DCCPF_INITIAL_CCID, | 28 | .dccpo_rx_ccid = DCCPF_INITIAL_CCID, |
| 29 | .dccpo_tx_ccid = DCCPF_INITIAL_CCID, | ||
| 34 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | 30 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, |
| 35 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | 31 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, |
| 36 | }; | 32 | }; |
| @@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | |||
| 113 | opt_recv->dccpor_ndp); | 109 | opt_recv->dccpor_ndp); |
| 114 | break; | 110 | break; |
| 115 | case DCCPO_ACK_VECTOR_0: | 111 | case DCCPO_ACK_VECTOR_0: |
| 116 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | 112 | case DCCPO_ACK_VECTOR_1: |
| 117 | goto out_invalid_option; | ||
| 118 | |||
| 119 | if (pkt_type == DCCP_PKT_DATA) | 113 | if (pkt_type == DCCP_PKT_DATA) |
| 120 | continue; | 114 | continue; |
| 121 | 115 | ||
| 122 | opt_recv->dccpor_ack_vector_len = len; | 116 | if (dp->dccps_options.dccpo_send_ack_vector && |
| 123 | opt_recv->dccpor_ack_vector_idx = value - options; | 117 | dccp_ackvec_parse(sk, skb, opt, value, len)) |
| 124 | 118 | goto out_invalid_option; | |
| 125 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
| 126 | debug_prefix, len, | ||
| 127 | (unsigned long long) | ||
| 128 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
| 129 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 130 | value, len); | ||
| 131 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, | ||
| 132 | sk, | ||
| 133 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
| 134 | len, value); | ||
| 135 | break; | 119 | break; |
| 136 | case DCCPO_TIMESTAMP: | 120 | case DCCPO_TIMESTAMP: |
| 137 | if (len != 4) | 121 | if (len != 4) |
| @@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk, | |||
| 352 | 336 | ||
| 353 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | 337 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); |
| 354 | 338 | ||
| 355 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
| 356 | { | ||
| 357 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 358 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 359 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
| 360 | "CLIENT TX opt: " : "server TX opt: "; | ||
| 361 | #endif | ||
| 362 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 363 | int len = ap->dccpap_buf_vector_len + 2; | ||
| 364 | struct timeval now; | ||
| 365 | u32 elapsed_time; | ||
| 366 | unsigned char *to, *from; | ||
| 367 | |||
| 368 | dccp_timestamp(sk, &now); | ||
| 369 | elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; | ||
| 370 | |||
| 371 | if (elapsed_time != 0) | ||
| 372 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
| 373 | |||
| 374 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
| 375 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
| 376 | "insert ACK Vector!\n"); | ||
| 377 | return; | ||
| 378 | } | ||
| 379 | |||
| 380 | /* | ||
| 381 | * XXX: now we have just one ack vector sent record, so | ||
| 382 | * we have to wait for it to be cleared. | ||
| 383 | * | ||
| 384 | * Of course this is not acceptable, but this is just for | ||
| 385 | * basic testing now. | ||
| 386 | */ | ||
| 387 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
| 388 | return; | ||
| 389 | |||
| 390 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
| 391 | |||
| 392 | to = skb_push(skb, len); | ||
| 393 | *to++ = DCCPO_ACK_VECTOR_0; | ||
| 394 | *to++ = len; | ||
| 395 | |||
| 396 | len = ap->dccpap_buf_vector_len; | ||
| 397 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
| 398 | |||
| 399 | /* Check if buf_head wraps */ | ||
| 400 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
| 401 | const unsigned int tailsize = (ap->dccpap_buf_len - | ||
| 402 | ap->dccpap_buf_head); | ||
| 403 | |||
| 404 | memcpy(to, from, tailsize); | ||
| 405 | to += tailsize; | ||
| 406 | len -= tailsize; | ||
| 407 | from = ap->dccpap_buf; | ||
| 408 | } | ||
| 409 | |||
| 410 | memcpy(to, from, len); | ||
| 411 | /* | ||
| 412 | * From draft-ietf-dccp-spec-11.txt: | ||
| 413 | * | ||
| 414 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
| 415 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
| 416 | * sequence number it used for the ack packet; ack_ptr will equal | ||
| 417 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
| 418 | * equal buf_nonce. | ||
| 419 | * | ||
| 420 | * This implemention uses just one ack record for now. | ||
| 421 | */ | ||
| 422 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
| 423 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
| 424 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
| 425 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
| 426 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
| 427 | |||
| 428 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
| 429 | "ack_ackno=%llu\n", | ||
| 430 | debug_prefix, ap->dccpap_ack_vector_len, | ||
| 431 | (unsigned long long) ap->dccpap_ack_seqno, | ||
| 432 | (unsigned long long) ap->dccpap_ack_ackno); | ||
| 433 | } | ||
| 434 | |||
| 435 | void dccp_timestamp(const struct sock *sk, struct timeval *tv) | 339 | void dccp_timestamp(const struct sock *sk, struct timeval *tv) |
| 436 | { | 340 | { |
| 437 | const struct dccp_sock *dp = dccp_sk(sk); | 341 | const struct dccp_sock *dp = dccp_sk(sk); |
| @@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
| 528 | 432 | ||
| 529 | if (!dccp_packet_without_ack(skb)) { | 433 | if (!dccp_packet_without_ack(skb)) { |
| 530 | if (dp->dccps_options.dccpo_send_ack_vector && | 434 | if (dp->dccps_options.dccpo_send_ack_vector && |
| 531 | (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != | 435 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) |
| 532 | DCCP_MAX_SEQNO + 1)) | 436 | dccp_insert_option_ackvec(sk, skb); |
| 533 | dccp_insert_option_ack_vector(sk, skb); | ||
| 534 | if (dp->dccps_timestamp_echo != 0) | 437 | if (dp->dccps_timestamp_echo != 0) |
| 535 | dccp_insert_option_timestamp_echo(sk, skb); | 438 | dccp_insert_option_timestamp_echo(sk, skb); |
| 536 | } | 439 | } |
| @@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
| 557 | } | 460 | } |
| 558 | } | 461 | } |
| 559 | } | 462 | } |
| 560 | |||
| 561 | struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, | ||
| 562 | const unsigned int __nocast priority) | ||
| 563 | { | ||
| 564 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
| 565 | |||
| 566 | if (ap != NULL) { | ||
| 567 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 568 | memset(ap->dccpap_buf, 0xFF, len); | ||
| 569 | #endif | ||
| 570 | ap->dccpap_buf_len = len; | ||
| 571 | ap->dccpap_buf_head = | ||
| 572 | ap->dccpap_buf_tail = | ||
| 573 | ap->dccpap_buf_len - 1; | ||
| 574 | ap->dccpap_buf_ackno = | ||
| 575 | ap->dccpap_ack_ackno = | ||
| 576 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 577 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
| 578 | ap->dccpap_ack_ptr = 0; | ||
| 579 | ap->dccpap_time.tv_sec = 0; | ||
| 580 | ap->dccpap_time.tv_usec = 0; | ||
| 581 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
| 582 | } | ||
| 583 | |||
| 584 | return ap; | ||
| 585 | } | ||
| 586 | |||
| 587 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
| 588 | { | ||
| 589 | if (ap != NULL) { | ||
| 590 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 591 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
| 592 | #endif | ||
| 593 | kfree(ap); | ||
| 594 | } | ||
| 595 | } | ||
| 596 | |||
| 597 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
| 598 | const unsigned int index) | ||
| 599 | { | ||
| 600 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
| 601 | } | ||
| 602 | |||
| 603 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
| 604 | const unsigned int index) | ||
| 605 | { | ||
| 606 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
| 607 | } | ||
| 608 | |||
| 609 | /* | ||
| 610 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
| 611 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
| 612 | * this simplifies table updates if one of the missing packets arrives. | ||
| 613 | */ | ||
| 614 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
| 615 | const unsigned int packets, | ||
| 616 | const unsigned char state) | ||
| 617 | { | ||
| 618 | unsigned int gap; | ||
| 619 | signed long new_head; | ||
| 620 | |||
| 621 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
| 622 | return -ENOBUFS; | ||
| 623 | |||
| 624 | gap = packets - 1; | ||
| 625 | new_head = ap->dccpap_buf_head - packets; | ||
| 626 | |||
| 627 | if (new_head < 0) { | ||
| 628 | if (gap > 0) { | ||
| 629 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
| 630 | gap + new_head + 1); | ||
| 631 | gap = -new_head; | ||
| 632 | } | ||
| 633 | new_head += ap->dccpap_buf_len; | ||
| 634 | } | ||
| 635 | |||
| 636 | ap->dccpap_buf_head = new_head; | ||
| 637 | |||
| 638 | if (gap > 0) | ||
| 639 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
| 640 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
| 641 | |||
| 642 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
| 643 | ap->dccpap_buf_vector_len += packets; | ||
| 644 | return 0; | ||
| 645 | } | ||
| 646 | |||
| 647 | /* | ||
| 648 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
| 649 | */ | ||
| 650 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, | ||
| 651 | u64 ackno, u8 state) | ||
| 652 | { | ||
| 653 | /* | ||
| 654 | * Check at the right places if the buffer is full, if it is, tell the | ||
| 655 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
| 656 | * vectors, when we will free up space in dccpap_buf. | ||
| 657 | * | ||
| 658 | * We may well decide to do buffer compression, etc, but for now lets | ||
| 659 | * just drop. | ||
| 660 | * | ||
| 661 | * From Appendix A: | ||
| 662 | * | ||
| 663 | * Of course, the circular buffer may overflow, either when the | ||
| 664 | * HC-Sender is sending data at a very high rate, when the | ||
| 665 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
| 666 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
| 667 | * (so the HC-Receiver is unable to clean up old state). In this | ||
| 668 | * case, the HC-Receiver should either compress the buffer (by | ||
| 669 | * increasing run lengths when possible), transfer its state to | ||
| 670 | * a larger buffer, or, as a last resort, drop all received | ||
| 671 | * packets, without processing them whatsoever, until its buffer | ||
| 672 | * shrinks again. | ||
| 673 | */ | ||
| 674 | |||
| 675 | /* See if this is the first ackno being inserted */ | ||
| 676 | if (ap->dccpap_buf_vector_len == 0) { | ||
| 677 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
| 678 | ap->dccpap_buf_vector_len = 1; | ||
| 679 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
| 680 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, | ||
| 681 | ackno); | ||
| 682 | |||
| 683 | /* | ||
| 684 | * Look if the state of this packet is the same as the | ||
| 685 | * previous ackno and if so if we can bump the head len. | ||
| 686 | */ | ||
| 687 | if (delta == 1 && | ||
| 688 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
| 689 | (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < | ||
| 690 | DCCP_ACKPKTS_LEN_MASK)) | ||
| 691 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
| 692 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
| 693 | return -ENOBUFS; | ||
| 694 | } else { | ||
| 695 | /* | ||
| 696 | * A.1.2. Old Packets | ||
| 697 | * | ||
| 698 | * When a packet with Sequence Number S arrives, and | ||
| 699 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
| 700 | * for the byte corresponding to S. (Indexing structures | ||
| 701 | * could reduce the complexity of this scan.) | ||
| 702 | */ | ||
| 703 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
| 704 | unsigned int index = ap->dccpap_buf_head; | ||
| 705 | |||
| 706 | while (1) { | ||
| 707 | const u8 len = dccp_ackpkts_len(ap, index); | ||
| 708 | const u8 state = dccp_ackpkts_state(ap, index); | ||
| 709 | /* | ||
| 710 | * valid packets not yet in dccpap_buf have a reserved | ||
| 711 | * entry, with a len equal to 0. | ||
| 712 | */ | ||
| 713 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
| 714 | len == 0 && delta == 0) { /* Found our | ||
| 715 | reserved seat! */ | ||
| 716 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
| 717 | (unsigned long long) ackno); | ||
| 718 | ap->dccpap_buf[index] = state; | ||
| 719 | goto out; | ||
| 720 | } | ||
| 721 | /* len == 0 means one packet */ | ||
| 722 | if (delta < len + 1) | ||
| 723 | goto out_duplicate; | ||
| 724 | |||
| 725 | delta -= len + 1; | ||
| 726 | if (++index == ap->dccpap_buf_len) | ||
| 727 | index = 0; | ||
| 728 | } | ||
| 729 | } | ||
| 730 | |||
| 731 | ap->dccpap_buf_ackno = ackno; | ||
| 732 | dccp_timestamp(sk, &ap->dccpap_time); | ||
| 733 | out: | ||
| 734 | dccp_pr_debug(""); | ||
| 735 | dccp_ackpkts_print(ap); | ||
| 736 | return 0; | ||
| 737 | |||
| 738 | out_duplicate: | ||
| 739 | /* Duplicate packet */ | ||
| 740 | dccp_pr_debug("Received a dup or already considered lost " | ||
| 741 | "packet: %llu\n", (unsigned long long) ackno); | ||
| 742 | return -EILSEQ; | ||
| 743 | } | ||
| 744 | |||
| 745 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 746 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, | ||
| 747 | int len) | ||
| 748 | { | ||
| 749 | if (!dccp_debug) | ||
| 750 | return; | ||
| 751 | |||
| 752 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
| 753 | (unsigned long long) ackno); | ||
| 754 | |||
| 755 | while (len--) { | ||
| 756 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
| 757 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
| 758 | |||
| 759 | printk("%d,%d|", state, rl); | ||
| 760 | ++vector; | ||
| 761 | } | ||
| 762 | |||
| 763 | printk("\n"); | ||
| 764 | } | ||
| 765 | |||
| 766 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
| 767 | { | ||
| 768 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
| 769 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
| 770 | ap->dccpap_buf_vector_len); | ||
| 771 | } | ||
| 772 | #endif | ||
| 773 | |||
| 774 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
| 775 | { | ||
| 776 | /* | ||
| 777 | * As we're keeping track of the ack vector size | ||
| 778 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
| 779 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
| 780 | * keep this code here as in the future we'll implement a vector of | ||
| 781 | * ack records, as suggested in draft-ietf-dccp-spec-11.txt | ||
| 782 | * Appendix A. -acme | ||
| 783 | */ | ||
| 784 | #if 0 | ||
| 785 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
| 786 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
| 787 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
| 788 | #endif | ||
| 789 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
| 790 | } | ||
| 791 | |||
| 792 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
| 793 | u64 ackno) | ||
| 794 | { | ||
| 795 | /* Check if we actually sent an ACK vector */ | ||
| 796 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 797 | return; | ||
| 798 | |||
| 799 | if (ackno == ap->dccpap_ack_seqno) { | ||
| 800 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 801 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 802 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
| 803 | "CLIENT rx ack: " : "server rx ack: "; | ||
| 804 | #endif | ||
| 805 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
| 806 | "ack_ackno=%llu, ACKED!\n", | ||
| 807 | debug_prefix, 1, | ||
| 808 | (unsigned long long) ap->dccpap_ack_seqno, | ||
| 809 | (unsigned long long) ap->dccpap_ack_ackno); | ||
| 810 | dccp_ackpkts_trow_away_ack_record(ap); | ||
| 811 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 812 | } | ||
| 813 | } | ||
| 814 | |||
| 815 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
| 816 | struct sock *sk, u64 ackno, | ||
| 817 | const unsigned char len, | ||
| 818 | const unsigned char *vector) | ||
| 819 | { | ||
| 820 | unsigned char i; | ||
| 821 | |||
| 822 | /* Check if we actually sent an ACK vector */ | ||
| 823 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
| 824 | return; | ||
| 825 | /* | ||
| 826 | * We're in the receiver half connection, so if the received an ACK | ||
| 827 | * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're | ||
| 828 | * not interested. | ||
| 829 | * | ||
| 830 | * Extra explanation with example: | ||
| 831 | * | ||
| 832 | * if we received an ACK vector with ackno 50, it can only be acking | ||
| 833 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
| 834 | */ | ||
| 835 | /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ | ||
| 836 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
| 837 | /* dccp_pr_debug_cat("yes\n"); */ | ||
| 838 | return; | ||
| 839 | } | ||
| 840 | /* dccp_pr_debug_cat("no\n"); */ | ||
| 841 | |||
| 842 | i = len; | ||
| 843 | while (i--) { | ||
| 844 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
| 845 | u64 ackno_end_rl; | ||
| 846 | |||
| 847 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
| 848 | |||
| 849 | /* | ||
| 850 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
| 851 | * ap->dccpap_ack_seqno, ackno); | ||
| 852 | */ | ||
| 853 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
| 854 | const u8 state = (*vector & | ||
| 855 | DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
| 856 | /* dccp_pr_debug_cat("yes\n"); */ | ||
| 857 | |||
| 858 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
| 859 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
| 860 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 861 | const char *debug_prefix = | ||
| 862 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
| 863 | "CLIENT rx ack: " : "server rx ack: "; | ||
| 864 | #endif | ||
| 865 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
| 866 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
| 867 | "ACKED!\n", | ||
| 868 | debug_prefix, len, | ||
| 869 | (unsigned long long) | ||
| 870 | ap->dccpap_ack_seqno, | ||
| 871 | (unsigned long long) | ||
| 872 | ap->dccpap_ack_ackno); | ||
| 873 | dccp_ackpkts_trow_away_ack_record(ap); | ||
| 874 | } | ||
| 875 | /* | ||
| 876 | * If dccpap_ack_seqno was not received, no problem | ||
| 877 | * we'll send another ACK vector. | ||
| 878 | */ | ||
| 879 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
| 880 | break; | ||
| 881 | } | ||
| 882 | /* dccp_pr_debug_cat("no\n"); */ | ||
| 883 | |||
| 884 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
| 885 | ++vector; | ||
| 886 | } | ||
| 887 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index ea6d0e91e511..4786bdcddcc9 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <net/sock.h> | 17 | #include <net/sock.h> |
| 18 | 18 | ||
| 19 | #include "ackvec.h" | ||
| 19 | #include "ccid.h" | 20 | #include "ccid.h" |
| 20 | #include "dccp.h" | 21 | #include "dccp.h" |
| 21 | 22 | ||
| @@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 85 | switch (dcb->dccpd_type) { | 86 | switch (dcb->dccpd_type) { |
| 86 | case DCCP_PKT_REQUEST: | 87 | case DCCP_PKT_REQUEST: |
| 87 | dccp_hdr_request(skb)->dccph_req_service = | 88 | dccp_hdr_request(skb)->dccph_req_service = |
| 88 | dcb->dccpd_service; | 89 | dp->dccps_service; |
| 89 | break; | 90 | break; |
| 90 | case DCCP_PKT_RESET: | 91 | case DCCP_PKT_RESET: |
| 91 | dccp_hdr_reset(skb)->dccph_reset_code = | 92 | dccp_hdr_reset(skb)->dccph_reset_code = |
| @@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | |||
| 225 | err = dccp_wait_for_ccid(sk, skb, timeo); | 226 | err = dccp_wait_for_ccid(sk, skb, timeo); |
| 226 | 227 | ||
| 227 | if (err == 0) { | 228 | if (err == 0) { |
| 228 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
| 229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
| 230 | const int len = skb->len; | 230 | const int len = skb->len; |
| 231 | 231 | ||
| @@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | |||
| 236 | inet_csk(sk)->icsk_rto, | 236 | inet_csk(sk)->icsk_rto, |
| 237 | DCCP_RTO_MAX); | 237 | DCCP_RTO_MAX); |
| 238 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 238 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
| 239 | /* | 239 | } else if (dccp_ack_pending(sk)) |
| 240 | * FIXME: we really should have a | ||
| 241 | * dccps_ack_pending or use icsk. | ||
| 242 | */ | ||
| 243 | } else if (inet_csk_ack_scheduled(sk) || | ||
| 244 | dp->dccps_timestamp_echo != 0 || | ||
| 245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
| 246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
| 247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
| 248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 240 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
| 249 | else | 241 | else |
| 250 | dcb->dccpd_type = DCCP_PKT_DATA; | 242 | dcb->dccpd_type = DCCP_PKT_DATA; |
| @@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
| 270 | struct request_sock *req) | 262 | struct request_sock *req) |
| 271 | { | 263 | { |
| 272 | struct dccp_hdr *dh; | 264 | struct dccp_hdr *dh; |
| 265 | struct dccp_request_sock *dreq; | ||
| 273 | const int dccp_header_size = sizeof(struct dccp_hdr) + | 266 | const int dccp_header_size = sizeof(struct dccp_hdr) + |
| 274 | sizeof(struct dccp_hdr_ext) + | 267 | sizeof(struct dccp_hdr_ext) + |
| 275 | sizeof(struct dccp_hdr_response); | 268 | sizeof(struct dccp_hdr_response); |
| @@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
| 285 | skb->dst = dst_clone(dst); | 278 | skb->dst = dst_clone(dst); |
| 286 | skb->csum = 0; | 279 | skb->csum = 0; |
| 287 | 280 | ||
| 281 | dreq = dccp_rsk(req); | ||
| 288 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | 282 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; |
| 289 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | 283 | DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; |
| 290 | dccp_insert_options(sk, skb); | 284 | dccp_insert_options(sk, skb); |
| 291 | 285 | ||
| 292 | skb->h.raw = skb_push(skb, dccp_header_size); | 286 | skb->h.raw = skb_push(skb, dccp_header_size); |
| @@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
| 300 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | 294 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; |
| 301 | dh->dccph_type = DCCP_PKT_RESPONSE; | 295 | dh->dccph_type = DCCP_PKT_RESPONSE; |
| 302 | dh->dccph_x = 1; | 296 | dh->dccph_x = 1; |
| 303 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | 297 | dccp_hdr_set_seq(dh, dreq->dreq_iss); |
| 304 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | 298 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); |
| 299 | dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; | ||
| 305 | 300 | ||
| 306 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, | 301 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, |
| 307 | inet_rsk(req)->rmt_addr); | 302 | inet_rsk(req)->rmt_addr); |
| @@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk) | |||
| 397 | skb_reserve(skb, MAX_DCCP_HEADER); | 392 | skb_reserve(skb, MAX_DCCP_HEADER); |
| 398 | 393 | ||
| 399 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | 394 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; |
| 400 | /* FIXME: set service to something meaningful, coming | ||
| 401 | * from userspace*/ | ||
| 402 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
| 403 | skb->csum = 0; | 395 | skb->csum = 0; |
| 404 | skb_set_owner_w(skb, sk); | 396 | skb_set_owner_w(skb, sk); |
| 405 | 397 | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69c9dc7..a1cfd0e9e3bc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
| @@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); | |||
| 94 | 94 | ||
| 95 | static inline int dccp_listen_start(struct sock *sk) | 95 | static inline int dccp_listen_start(struct sock *sk) |
| 96 | { | 96 | { |
| 97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | 97 | struct dccp_sock *dp = dccp_sk(sk); |
| 98 | |||
| 99 | dp->dccps_role = DCCP_ROLE_LISTEN; | ||
| 100 | /* | ||
| 101 | * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) | ||
| 102 | * before calling listen() | ||
| 103 | */ | ||
| 104 | if (dccp_service_not_initialized(sk)) | ||
| 105 | return -EPROTO; | ||
| 98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | 106 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); |
| 99 | } | 107 | } |
| 100 | 108 | ||
| @@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 202 | return -ENOIOCTLCMD; | 210 | return -ENOIOCTLCMD; |
| 203 | } | 211 | } |
| 204 | 212 | ||
| 213 | static int dccp_setsockopt_service(struct sock *sk, const u32 service, | ||
| 214 | char __user *optval, int optlen) | ||
| 215 | { | ||
| 216 | struct dccp_sock *dp = dccp_sk(sk); | ||
| 217 | struct dccp_service_list *sl = NULL; | ||
| 218 | |||
| 219 | if (service == DCCP_SERVICE_INVALID_VALUE || | ||
| 220 | optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) | ||
| 221 | return -EINVAL; | ||
| 222 | |||
| 223 | if (optlen > sizeof(service)) { | ||
| 224 | sl = kmalloc(optlen, GFP_KERNEL); | ||
| 225 | if (sl == NULL) | ||
| 226 | return -ENOMEM; | ||
| 227 | |||
| 228 | sl->dccpsl_nr = optlen / sizeof(u32) - 1; | ||
| 229 | if (copy_from_user(sl->dccpsl_list, | ||
| 230 | optval + sizeof(service), | ||
| 231 | optlen - sizeof(service)) || | ||
| 232 | dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { | ||
| 233 | kfree(sl); | ||
| 234 | return -EFAULT; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | lock_sock(sk); | ||
| 239 | dp->dccps_service = service; | ||
| 240 | |||
| 241 | if (dp->dccps_service_list != NULL) | ||
| 242 | kfree(dp->dccps_service_list); | ||
| 243 | |||
| 244 | dp->dccps_service_list = sl; | ||
| 245 | release_sock(sk); | ||
| 246 | return 0; | ||
| 247 | } | ||
| 248 | |||
| 205 | int dccp_setsockopt(struct sock *sk, int level, int optname, | 249 | int dccp_setsockopt(struct sock *sk, int level, int optname, |
| 206 | char __user *optval, int optlen) | 250 | char __user *optval, int optlen) |
| 207 | { | 251 | { |
| @@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
| 218 | if (get_user(val, (int __user *)optval)) | 262 | if (get_user(val, (int __user *)optval)) |
| 219 | return -EFAULT; | 263 | return -EFAULT; |
| 220 | 264 | ||
| 221 | lock_sock(sk); | 265 | if (optname == DCCP_SOCKOPT_SERVICE) |
| 266 | return dccp_setsockopt_service(sk, val, optval, optlen); | ||
| 222 | 267 | ||
| 268 | lock_sock(sk); | ||
| 223 | dp = dccp_sk(sk); | 269 | dp = dccp_sk(sk); |
| 224 | err = 0; | 270 | err = 0; |
| 225 | 271 | ||
| @@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
| 236 | return err; | 282 | return err; |
| 237 | } | 283 | } |
| 238 | 284 | ||
| 285 | static int dccp_getsockopt_service(struct sock *sk, int len, | ||
| 286 | u32 __user *optval, | ||
| 287 | int __user *optlen) | ||
| 288 | { | ||
| 289 | const struct dccp_sock *dp = dccp_sk(sk); | ||
| 290 | const struct dccp_service_list *sl; | ||
| 291 | int err = -ENOENT, slen = 0, total_len = sizeof(u32); | ||
| 292 | |||
| 293 | lock_sock(sk); | ||
| 294 | if (dccp_service_not_initialized(sk)) | ||
| 295 | goto out; | ||
| 296 | |||
| 297 | if ((sl = dp->dccps_service_list) != NULL) { | ||
| 298 | slen = sl->dccpsl_nr * sizeof(u32); | ||
| 299 | total_len += slen; | ||
| 300 | } | ||
| 301 | |||
| 302 | err = -EINVAL; | ||
| 303 | if (total_len > len) | ||
| 304 | goto out; | ||
| 305 | |||
| 306 | err = 0; | ||
| 307 | if (put_user(total_len, optlen) || | ||
| 308 | put_user(dp->dccps_service, optval) || | ||
| 309 | (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) | ||
| 310 | err = -EFAULT; | ||
| 311 | out: | ||
| 312 | release_sock(sk); | ||
| 313 | return err; | ||
| 314 | } | ||
| 315 | |||
| 239 | int dccp_getsockopt(struct sock *sk, int level, int optname, | 316 | int dccp_getsockopt(struct sock *sk, int level, int optname, |
| 240 | char __user *optval, int __user *optlen) | 317 | char __user *optval, int __user *optlen) |
| 241 | { | 318 | { |
| @@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
| 248 | if (get_user(len, optlen)) | 325 | if (get_user(len, optlen)) |
| 249 | return -EFAULT; | 326 | return -EFAULT; |
| 250 | 327 | ||
| 251 | len = min_t(unsigned int, len, sizeof(int)); | 328 | if (len < sizeof(int)) |
| 252 | if (len < 0) | ||
| 253 | return -EINVAL; | 329 | return -EINVAL; |
| 254 | 330 | ||
| 255 | dp = dccp_sk(sk); | 331 | dp = dccp_sk(sk); |
| @@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
| 257 | switch (optname) { | 333 | switch (optname) { |
| 258 | case DCCP_SOCKOPT_PACKET_SIZE: | 334 | case DCCP_SOCKOPT_PACKET_SIZE: |
| 259 | val = dp->dccps_packet_size; | 335 | val = dp->dccps_packet_size; |
| 336 | len = sizeof(dp->dccps_packet_size); | ||
| 260 | break; | 337 | break; |
| 338 | case DCCP_SOCKOPT_SERVICE: | ||
| 339 | return dccp_getsockopt_service(sk, len, | ||
| 340 | (u32 __user *)optval, optlen); | ||
| 341 | case 128 ... 191: | ||
| 342 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | ||
| 343 | len, (u32 __user *)optval, optlen); | ||
| 344 | case 192 ... 255: | ||
| 345 | return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, | ||
| 346 | len, (u32 __user *)optval, optlen); | ||
| 261 | default: | 347 | default: |
| 262 | return -ENOPROTOOPT; | 348 | return -ENOPROTOOPT; |
| 263 | } | 349 | } |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30aa8e2ee214..e2162d270073 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
| @@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS | |||
| 51 | 51 | ||
| 52 | IF unsure, say `N'. | 52 | IF unsure, say `N'. |
| 53 | 53 | ||
| 54 | config IP_NF_CONNTRACK_NETLINK | ||
| 55 | tristate 'Connection tracking netlink interface' | ||
| 56 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
| 57 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m | ||
| 58 | help | ||
| 59 | This option enables support for a netlink-based userspace interface | ||
| 60 | |||
| 61 | |||
| 54 | config IP_NF_CT_PROTO_SCTP | 62 | config IP_NF_CT_PROTO_SCTP |
| 55 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' | 63 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' |
| 56 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | 64 | depends on IP_NF_CONNTRACK && EXPERIMENTAL |
| @@ -774,11 +782,5 @@ config IP_NF_ARP_MANGLE | |||
| 774 | Allows altering the ARP packet payload: source and destination | 782 | Allows altering the ARP packet payload: source and destination |
| 775 | hardware and network addresses. | 783 | hardware and network addresses. |
| 776 | 784 | ||
| 777 | config IP_NF_CONNTRACK_NETLINK | ||
| 778 | tristate 'Connection tracking netlink interface' | ||
| 779 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
| 780 | help | ||
| 781 | This option enables support for a netlink-based userspace interface | ||
| 782 | |||
| 783 | endmenu | 785 | endmenu |
| 784 | 786 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19cba16e6e1e..f8cd8e42961e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
| @@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, | |||
| 1143 | if (del_timer(&ct->timeout)) { | 1143 | if (del_timer(&ct->timeout)) { |
| 1144 | ct->timeout.expires = jiffies + extra_jiffies; | 1144 | ct->timeout.expires = jiffies + extra_jiffies; |
| 1145 | add_timer(&ct->timeout); | 1145 | add_timer(&ct->timeout); |
| 1146 | ip_conntrack_event_cache(IPCT_REFRESH, skb); | 1146 | /* FIXME: We loose some REFRESH events if this function |
| 1147 | * is called without an skb. I'll fix this later -HW */ | ||
| 1148 | if (skb) | ||
| 1149 | ip_conntrack_event_cache(IPCT_REFRESH, skb); | ||
| 1147 | } | 1150 | } |
| 1148 | ct_add_counters(ct, ctinfo, skb); | 1151 | ct_add_counters(ct, ctinfo, skb); |
| 1149 | write_unlock_bh(&ip_conntrack_lock); | 1152 | write_unlock_bh(&ip_conntrack_lock); |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d38913754b1..9bcb398fbc1f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
| 14 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |
| 15 | #include <linux/jhash.h> | 15 | #include <linux/jhash.h> |
| 16 | #include <linux/bitops.h> | ||
| 16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
| 17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
| 18 | #include <linux/tcp.h> | 19 | #include <linux/tcp.h> |
| @@ -30,7 +31,7 @@ | |||
| 30 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
| 31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
| 32 | 33 | ||
| 33 | #define CLUSTERIP_VERSION "0.7" | 34 | #define CLUSTERIP_VERSION "0.8" |
| 34 | 35 | ||
| 35 | #define DEBUG_CLUSTERIP | 36 | #define DEBUG_CLUSTERIP |
| 36 | 37 | ||
| @@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); | |||
| 49 | struct clusterip_config { | 50 | struct clusterip_config { |
| 50 | struct list_head list; /* list of all configs */ | 51 | struct list_head list; /* list of all configs */ |
| 51 | atomic_t refcount; /* reference count */ | 52 | atomic_t refcount; /* reference count */ |
| 53 | atomic_t entries; /* number of entries/rules | ||
| 54 | * referencing us */ | ||
| 52 | 55 | ||
| 53 | u_int32_t clusterip; /* the IP address */ | 56 | u_int32_t clusterip; /* the IP address */ |
| 54 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ | 57 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
| 55 | struct net_device *dev; /* device */ | 58 | struct net_device *dev; /* device */ |
| 56 | u_int16_t num_total_nodes; /* total number of nodes */ | 59 | u_int16_t num_total_nodes; /* total number of nodes */ |
| 57 | u_int16_t num_local_nodes; /* number of local nodes */ | 60 | unsigned long local_nodes; /* node number array */ |
| 58 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ | ||
| 59 | 61 | ||
| 60 | #ifdef CONFIG_PROC_FS | 62 | #ifdef CONFIG_PROC_FS |
| 61 | struct proc_dir_entry *pde; /* proc dir entry */ | 63 | struct proc_dir_entry *pde; /* proc dir entry */ |
| @@ -66,8 +68,7 @@ struct clusterip_config { | |||
| 66 | 68 | ||
| 67 | static LIST_HEAD(clusterip_configs); | 69 | static LIST_HEAD(clusterip_configs); |
| 68 | 70 | ||
| 69 | /* clusterip_lock protects the clusterip_configs list _AND_ the configurable | 71 | /* clusterip_lock protects the clusterip_configs list */ |
| 70 | * data within all structurses (num_local_nodes, local_nodes[]) */ | ||
| 71 | static DEFINE_RWLOCK(clusterip_lock); | 72 | static DEFINE_RWLOCK(clusterip_lock); |
| 72 | 73 | ||
| 73 | #ifdef CONFIG_PROC_FS | 74 | #ifdef CONFIG_PROC_FS |
| @@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir; | |||
| 76 | #endif | 77 | #endif |
| 77 | 78 | ||
| 78 | static inline void | 79 | static inline void |
| 79 | clusterip_config_get(struct clusterip_config *c) { | 80 | clusterip_config_get(struct clusterip_config *c) |
| 81 | { | ||
| 80 | atomic_inc(&c->refcount); | 82 | atomic_inc(&c->refcount); |
| 81 | } | 83 | } |
| 82 | 84 | ||
| 83 | static inline void | 85 | static inline void |
| 84 | clusterip_config_put(struct clusterip_config *c) { | 86 | clusterip_config_put(struct clusterip_config *c) |
| 85 | if (atomic_dec_and_test(&c->refcount)) { | 87 | { |
| 88 | if (atomic_dec_and_test(&c->refcount)) | ||
| 89 | kfree(c); | ||
| 90 | } | ||
| 91 | |||
| 92 | /* increase the count of entries(rules) using/referencing this config */ | ||
| 93 | static inline void | ||
| 94 | clusterip_config_entry_get(struct clusterip_config *c) | ||
| 95 | { | ||
| 96 | atomic_inc(&c->entries); | ||
| 97 | } | ||
| 98 | |||
| 99 | /* decrease the count of entries using/referencing this config. If last | ||
| 100 | * entry(rule) is removed, remove the config from lists, but don't free it | ||
| 101 | * yet, since proc-files could still be holding references */ | ||
| 102 | static inline void | ||
| 103 | clusterip_config_entry_put(struct clusterip_config *c) | ||
| 104 | { | ||
| 105 | if (atomic_dec_and_test(&c->entries)) { | ||
| 86 | write_lock_bh(&clusterip_lock); | 106 | write_lock_bh(&clusterip_lock); |
| 87 | list_del(&c->list); | 107 | list_del(&c->list); |
| 88 | write_unlock_bh(&clusterip_lock); | 108 | write_unlock_bh(&clusterip_lock); |
| 109 | |||
| 89 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); | 110 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); |
| 90 | dev_put(c->dev); | 111 | dev_put(c->dev); |
| 91 | kfree(c); | 112 | |
| 113 | /* In case anyone still accesses the file, the open/close | ||
| 114 | * functions are also incrementing the refcount on their own, | ||
| 115 | * so it's safe to remove the entry even if it's in use. */ | ||
| 116 | #ifdef CONFIG_PROC_FS | ||
| 117 | remove_proc_entry(c->pde->name, c->pde->parent); | ||
| 118 | #endif | ||
| 92 | } | 119 | } |
| 93 | } | 120 | } |
| 94 | 121 | ||
| 95 | |||
| 96 | static struct clusterip_config * | 122 | static struct clusterip_config * |
| 97 | __clusterip_config_find(u_int32_t clusterip) | 123 | __clusterip_config_find(u_int32_t clusterip) |
| 98 | { | 124 | { |
| @@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip) | |||
| 111 | } | 137 | } |
| 112 | 138 | ||
| 113 | static inline struct clusterip_config * | 139 | static inline struct clusterip_config * |
| 114 | clusterip_config_find_get(u_int32_t clusterip) | 140 | clusterip_config_find_get(u_int32_t clusterip, int entry) |
| 115 | { | 141 | { |
| 116 | struct clusterip_config *c; | 142 | struct clusterip_config *c; |
| 117 | 143 | ||
| @@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip) | |||
| 122 | return NULL; | 148 | return NULL; |
| 123 | } | 149 | } |
| 124 | atomic_inc(&c->refcount); | 150 | atomic_inc(&c->refcount); |
| 151 | if (entry) | ||
| 152 | atomic_inc(&c->entries); | ||
| 125 | read_unlock_bh(&clusterip_lock); | 153 | read_unlock_bh(&clusterip_lock); |
| 126 | 154 | ||
| 127 | return c; | 155 | return c; |
| 128 | } | 156 | } |
| 129 | 157 | ||
| 158 | static void | ||
| 159 | clusterip_config_init_nodelist(struct clusterip_config *c, | ||
| 160 | const struct ipt_clusterip_tgt_info *i) | ||
| 161 | { | ||
| 162 | int n; | ||
| 163 | |||
| 164 | for (n = 0; n < i->num_local_nodes; n++) { | ||
| 165 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 130 | static struct clusterip_config * | 169 | static struct clusterip_config * |
| 131 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | 170 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, |
| 132 | struct net_device *dev) | 171 | struct net_device *dev) |
| @@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
| 143 | c->clusterip = ip; | 182 | c->clusterip = ip; |
| 144 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | 183 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); |
| 145 | c->num_total_nodes = i->num_total_nodes; | 184 | c->num_total_nodes = i->num_total_nodes; |
| 146 | c->num_local_nodes = i->num_local_nodes; | 185 | clusterip_config_init_nodelist(c, i); |
| 147 | memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); | ||
| 148 | c->hash_mode = i->hash_mode; | 186 | c->hash_mode = i->hash_mode; |
| 149 | c->hash_initval = i->hash_initval; | 187 | c->hash_initval = i->hash_initval; |
| 150 | atomic_set(&c->refcount, 1); | 188 | atomic_set(&c->refcount, 1); |
| 189 | atomic_set(&c->entries, 1); | ||
| 151 | 190 | ||
| 152 | #ifdef CONFIG_PROC_FS | 191 | #ifdef CONFIG_PROC_FS |
| 153 | /* create proc dir entry */ | 192 | /* create proc dir entry */ |
| @@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
| 171 | static int | 210 | static int |
| 172 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | 211 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) |
| 173 | { | 212 | { |
| 174 | int i; | ||
| 175 | |||
| 176 | write_lock_bh(&clusterip_lock); | ||
| 177 | 213 | ||
| 178 | if (c->num_local_nodes >= CLUSTERIP_MAX_NODES | 214 | if (nodenum == 0 || |
| 179 | || nodenum > CLUSTERIP_MAX_NODES) { | 215 | nodenum > c->num_total_nodes) |
| 180 | write_unlock_bh(&clusterip_lock); | ||
| 181 | return 1; | 216 | return 1; |
| 182 | } | ||
| 183 | |||
| 184 | /* check if we alrady have this number in our array */ | ||
| 185 | for (i = 0; i < c->num_local_nodes; i++) { | ||
| 186 | if (c->local_nodes[i] == nodenum) { | ||
| 187 | write_unlock_bh(&clusterip_lock); | ||
| 188 | return 1; | ||
| 189 | } | ||
| 190 | } | ||
| 191 | 217 | ||
| 192 | c->local_nodes[c->num_local_nodes++] = nodenum; | 218 | /* check if we already have this number in our bitfield */ |
| 219 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | ||
| 220 | return 1; | ||
| 193 | 221 | ||
| 194 | write_unlock_bh(&clusterip_lock); | ||
| 195 | return 0; | 222 | return 0; |
| 196 | } | 223 | } |
| 197 | 224 | ||
| 198 | static int | 225 | static int |
| 199 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | 226 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
| 200 | { | 227 | { |
| 201 | int i; | 228 | if (nodenum == 0 || |
| 202 | 229 | nodenum > c->num_total_nodes) | |
| 203 | write_lock_bh(&clusterip_lock); | ||
| 204 | |||
| 205 | if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { | ||
| 206 | write_unlock_bh(&clusterip_lock); | ||
| 207 | return 1; | 230 | return 1; |
| 208 | } | ||
| 209 | 231 | ||
| 210 | for (i = 0; i < c->num_local_nodes; i++) { | 232 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
| 211 | if (c->local_nodes[i] == nodenum) { | 233 | return 0; |
| 212 | int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); | ||
| 213 | memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); | ||
| 214 | c->num_local_nodes--; | ||
| 215 | write_unlock_bh(&clusterip_lock); | ||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | } | ||
| 219 | 234 | ||
| 220 | write_unlock_bh(&clusterip_lock); | ||
| 221 | return 1; | 235 | return 1; |
| 222 | } | 236 | } |
| 223 | 237 | ||
| @@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | |||
| 285 | static inline int | 299 | static inline int |
| 286 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) | 300 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) |
| 287 | { | 301 | { |
| 288 | int i; | 302 | return test_bit(hash - 1, &config->local_nodes); |
| 289 | |||
| 290 | read_lock_bh(&clusterip_lock); | ||
| 291 | |||
| 292 | if (config->num_local_nodes == 0) { | ||
| 293 | read_unlock_bh(&clusterip_lock); | ||
| 294 | return 0; | ||
| 295 | } | ||
| 296 | |||
| 297 | for (i = 0; i < config->num_local_nodes; i++) { | ||
| 298 | if (config->local_nodes[i] == hash) { | ||
| 299 | read_unlock_bh(&clusterip_lock); | ||
| 300 | return 1; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | |||
| 304 | read_unlock_bh(&clusterip_lock); | ||
| 305 | |||
| 306 | return 0; | ||
| 307 | } | 303 | } |
| 308 | 304 | ||
| 309 | /*********************************************************************** | 305 | /*********************************************************************** |
| @@ -415,8 +411,26 @@ checkentry(const char *tablename, | |||
| 415 | 411 | ||
| 416 | /* FIXME: further sanity checks */ | 412 | /* FIXME: further sanity checks */ |
| 417 | 413 | ||
| 418 | config = clusterip_config_find_get(e->ip.dst.s_addr); | 414 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
| 419 | if (!config) { | 415 | if (config) { |
| 416 | if (cipinfo->config != NULL) { | ||
| 417 | /* Case A: This is an entry that gets reloaded, since | ||
| 418 | * it still has a cipinfo->config pointer. Simply | ||
| 419 | * increase the entry refcount and return */ | ||
| 420 | if (cipinfo->config != config) { | ||
| 421 | printk(KERN_ERR "CLUSTERIP: Reloaded entry " | ||
| 422 | "has invalid config pointer!\n"); | ||
| 423 | return 0; | ||
| 424 | } | ||
| 425 | clusterip_config_entry_get(cipinfo->config); | ||
| 426 | } else { | ||
| 427 | /* Case B: This is a new rule referring to an existing | ||
| 428 | * clusterip config. */ | ||
| 429 | cipinfo->config = config; | ||
| 430 | clusterip_config_entry_get(cipinfo->config); | ||
| 431 | } | ||
| 432 | } else { | ||
| 433 | /* Case C: This is a completely new clusterip config */ | ||
| 420 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { | 434 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
| 421 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | 435 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); |
| 422 | return 0; | 436 | return 0; |
| @@ -443,10 +457,9 @@ checkentry(const char *tablename, | |||
| 443 | } | 457 | } |
| 444 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | 458 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); |
| 445 | } | 459 | } |
| 460 | cipinfo->config = config; | ||
| 446 | } | 461 | } |
| 447 | 462 | ||
| 448 | cipinfo->config = config; | ||
| 449 | |||
| 450 | return 1; | 463 | return 1; |
| 451 | } | 464 | } |
| 452 | 465 | ||
| @@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) | |||
| 455 | { | 468 | { |
| 456 | struct ipt_clusterip_tgt_info *cipinfo = matchinfo; | 469 | struct ipt_clusterip_tgt_info *cipinfo = matchinfo; |
| 457 | 470 | ||
| 458 | /* we first remove the proc entry and then drop the reference | 471 | /* if no more entries are referencing the config, remove it |
| 459 | * count. In case anyone still accesses the file, the open/close | 472 | * from the list and destroy the proc entry */ |
| 460 | * functions are also incrementing the refcount on their own */ | 473 | clusterip_config_entry_put(cipinfo->config); |
| 461 | #ifdef CONFIG_PROC_FS | 474 | |
| 462 | remove_proc_entry(cipinfo->config->pde->name, | ||
| 463 | cipinfo->config->pde->parent); | ||
| 464 | #endif | ||
| 465 | clusterip_config_put(cipinfo->config); | 475 | clusterip_config_put(cipinfo->config); |
| 466 | } | 476 | } |
| 467 | 477 | ||
| @@ -533,7 +543,7 @@ arp_mangle(unsigned int hook, | |||
| 533 | 543 | ||
| 534 | /* if there is no clusterip configuration for the arp reply's | 544 | /* if there is no clusterip configuration for the arp reply's |
| 535 | * source ip, we don't want to mangle it */ | 545 | * source ip, we don't want to mangle it */ |
| 536 | c = clusterip_config_find_get(payload->src_ip); | 546 | c = clusterip_config_find_get(payload->src_ip, 0); |
| 537 | if (!c) | 547 | if (!c) |
| 538 | return NF_ACCEPT; | 548 | return NF_ACCEPT; |
| 539 | 549 | ||
| @@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { | |||
| 574 | 584 | ||
| 575 | #ifdef CONFIG_PROC_FS | 585 | #ifdef CONFIG_PROC_FS |
| 576 | 586 | ||
| 587 | struct clusterip_seq_position { | ||
| 588 | unsigned int pos; /* position */ | ||
| 589 | unsigned int weight; /* number of bits set == size */ | ||
| 590 | unsigned int bit; /* current bit */ | ||
| 591 | unsigned long val; /* current value */ | ||
| 592 | }; | ||
| 593 | |||
| 577 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 594 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
| 578 | { | 595 | { |
| 579 | struct proc_dir_entry *pde = s->private; | 596 | struct proc_dir_entry *pde = s->private; |
| 580 | struct clusterip_config *c = pde->data; | 597 | struct clusterip_config *c = pde->data; |
| 581 | unsigned int *nodeidx; | 598 | unsigned int weight; |
| 582 | 599 | u_int32_t local_nodes; | |
| 583 | read_lock_bh(&clusterip_lock); | 600 | struct clusterip_seq_position *idx; |
| 584 | if (*pos >= c->num_local_nodes) | 601 | |
| 602 | /* FIXME: possible race */ | ||
| 603 | local_nodes = c->local_nodes; | ||
| 604 | weight = hweight32(local_nodes); | ||
| 605 | if (*pos >= weight) | ||
| 585 | return NULL; | 606 | return NULL; |
| 586 | 607 | ||
| 587 | nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); | 608 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
| 588 | if (!nodeidx) | 609 | if (!idx) |
| 589 | return ERR_PTR(-ENOMEM); | 610 | return ERR_PTR(-ENOMEM); |
| 590 | 611 | ||
| 591 | *nodeidx = *pos; | 612 | idx->pos = *pos; |
| 592 | return nodeidx; | 613 | idx->weight = weight; |
| 614 | idx->bit = ffs(local_nodes); | ||
| 615 | idx->val = local_nodes; | ||
| 616 | clear_bit(idx->bit - 1, &idx->val); | ||
| 617 | |||
| 618 | return idx; | ||
| 593 | } | 619 | } |
| 594 | 620 | ||
| 595 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | 621 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) |
| 596 | { | 622 | { |
| 597 | struct proc_dir_entry *pde = s->private; | 623 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
| 598 | struct clusterip_config *c = pde->data; | ||
| 599 | unsigned int *nodeidx = (unsigned int *)v; | ||
| 600 | 624 | ||
| 601 | *pos = ++(*nodeidx); | 625 | *pos = ++idx->pos; |
| 602 | if (*pos >= c->num_local_nodes) { | 626 | if (*pos >= idx->weight) { |
| 603 | kfree(v); | 627 | kfree(v); |
| 604 | return NULL; | 628 | return NULL; |
| 605 | } | 629 | } |
| 606 | return nodeidx; | 630 | idx->bit = ffs(idx->val); |
| 631 | clear_bit(idx->bit - 1, &idx->val); | ||
| 632 | return idx; | ||
| 607 | } | 633 | } |
| 608 | 634 | ||
| 609 | static void clusterip_seq_stop(struct seq_file *s, void *v) | 635 | static void clusterip_seq_stop(struct seq_file *s, void *v) |
| 610 | { | 636 | { |
| 611 | kfree(v); | 637 | kfree(v); |
| 612 | |||
| 613 | read_unlock_bh(&clusterip_lock); | ||
| 614 | } | 638 | } |
| 615 | 639 | ||
| 616 | static int clusterip_seq_show(struct seq_file *s, void *v) | 640 | static int clusterip_seq_show(struct seq_file *s, void *v) |
| 617 | { | 641 | { |
| 618 | struct proc_dir_entry *pde = s->private; | 642 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
| 619 | struct clusterip_config *c = pde->data; | ||
| 620 | unsigned int *nodeidx = (unsigned int *)v; | ||
| 621 | 643 | ||
| 622 | if (*nodeidx != 0) | 644 | if (idx->pos != 0) |
| 623 | seq_putc(s, ','); | 645 | seq_putc(s, ','); |
| 624 | seq_printf(s, "%u", c->local_nodes[*nodeidx]); | ||
| 625 | 646 | ||
| 626 | if (*nodeidx == c->num_local_nodes-1) | 647 | seq_printf(s, "%u", idx->bit); |
| 648 | |||
| 649 | if (idx->pos == idx->weight - 1) | ||
| 627 | seq_putc(s, '\n'); | 650 | seq_putc(s, '\n'); |
| 628 | 651 | ||
| 629 | return 0; | 652 | return 0; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2b9bf9bd177f..6001948600f3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
| @@ -639,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, | |||
| 639 | int tclass = -1; | 639 | int tclass = -1; |
| 640 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 640 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
| 641 | int err; | 641 | int err; |
| 642 | int connected = 0; | ||
| 642 | 643 | ||
| 643 | /* destination address check */ | 644 | /* destination address check */ |
| 644 | if (sin6) { | 645 | if (sin6) { |
| @@ -748,6 +749,7 @@ do_udp_sendmsg: | |||
| 748 | fl->fl_ip_dport = inet->dport; | 749 | fl->fl_ip_dport = inet->dport; |
| 749 | daddr = &np->daddr; | 750 | daddr = &np->daddr; |
| 750 | fl->fl6_flowlabel = np->flow_label; | 751 | fl->fl6_flowlabel = np->flow_label; |
| 752 | connected = 1; | ||
| 751 | } | 753 | } |
| 752 | 754 | ||
| 753 | if (!fl->oif) | 755 | if (!fl->oif) |
| @@ -770,6 +772,7 @@ do_udp_sendmsg: | |||
| 770 | } | 772 | } |
| 771 | if (!(opt->opt_nflen|opt->opt_flen)) | 773 | if (!(opt->opt_nflen|opt->opt_flen)) |
| 772 | opt = NULL; | 774 | opt = NULL; |
| 775 | connected = 0; | ||
| 773 | } | 776 | } |
| 774 | if (opt == NULL) | 777 | if (opt == NULL) |
| 775 | opt = np->opt; | 778 | opt = np->opt; |
| @@ -787,10 +790,13 @@ do_udp_sendmsg: | |||
| 787 | ipv6_addr_copy(&final, &fl->fl6_dst); | 790 | ipv6_addr_copy(&final, &fl->fl6_dst); |
| 788 | ipv6_addr_copy(&fl->fl6_dst, rt0->addr); | 791 | ipv6_addr_copy(&fl->fl6_dst, rt0->addr); |
| 789 | final_p = &final; | 792 | final_p = &final; |
| 793 | connected = 0; | ||
| 790 | } | 794 | } |
| 791 | 795 | ||
| 792 | if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) | 796 | if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { |
| 793 | fl->oif = np->mcast_oif; | 797 | fl->oif = np->mcast_oif; |
| 798 | connected = 0; | ||
| 799 | } | ||
| 794 | 800 | ||
| 795 | err = ip6_dst_lookup(sk, &dst, fl); | 801 | err = ip6_dst_lookup(sk, &dst, fl); |
| 796 | if (err) | 802 | if (err) |
| @@ -846,7 +852,7 @@ do_append_data: | |||
| 846 | else if (!corkreq) | 852 | else if (!corkreq) |
| 847 | err = udp_v6_push_pending_frames(sk, up); | 853 | err = udp_v6_push_pending_frames(sk, up); |
| 848 | 854 | ||
| 849 | if (dst) | 855 | if (dst && connected) |
| 850 | ip6_dst_store(sk, dst, | 856 | ip6_dst_store(sk, dst, |
| 851 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? | 857 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? |
| 852 | &np->daddr : NULL); | 858 | &np->daddr : NULL); |
diff --git a/net/socket.c b/net/socket.c index c699e93c33d7..f9264472377f 100644 --- a/net/socket.c +++ b/net/socket.c | |||
| @@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
| 1862 | if (err < 0) | 1862 | if (err < 0) |
| 1863 | goto out_freeiov; | 1863 | goto out_freeiov; |
| 1864 | } | 1864 | } |
| 1865 | err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); | 1865 | err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), |
| 1866 | COMPAT_FLAGS(msg)); | ||
| 1866 | if (err) | 1867 | if (err) |
| 1867 | goto out_freeiov; | 1868 | goto out_freeiov; |
| 1868 | if (MSG_CMSG_COMPAT & flags) | 1869 | if (MSG_CMSG_COMPAT & flags) |
