diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/dccp | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Kconfig | 4 | ||||
-rw-r--r-- | net/dccp/Makefile | 4 | ||||
-rw-r--r-- | net/dccp/ackvec.c | 616 | ||||
-rw-r--r-- | net/dccp/ackvec.h | 151 | ||||
-rw-r--r-- | net/dccp/ccid.h | 86 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 31 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 444 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 42 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 268 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 51 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 2 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 39 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 22 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 1 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 14 | ||||
-rw-r--r-- | net/dccp/dccp.h | 84 | ||||
-rw-r--r-- | net/dccp/feat.c | 10 | ||||
-rw-r--r-- | net/dccp/feat.h | 1 | ||||
-rw-r--r-- | net/dccp/input.c | 65 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 102 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 208 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 30 | ||||
-rw-r--r-- | net/dccp/options.c | 133 | ||||
-rw-r--r-- | net/dccp/output.c | 251 | ||||
-rw-r--r-- | net/dccp/probe.c | 1 | ||||
-rw-r--r-- | net/dccp/proto.c | 142 | ||||
-rw-r--r-- | net/dccp/qpolicy.c | 137 | ||||
-rw-r--r-- | net/dccp/sysctl.c | 4 | ||||
-rw-r--r-- | net/dccp/timer.c | 27 |
29 files changed, 1531 insertions, 1439 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ad6dffd9070e..b75968a04017 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig | |||
@@ -49,7 +49,9 @@ config NET_DCCPPROBE | |||
49 | what was just said, you don't need it: say N. | 49 | what was just said, you don't need it: say N. |
50 | 50 | ||
51 | Documentation on how to use DCCP connection probing can be found | 51 | Documentation on how to use DCCP connection probing can be found |
52 | at http://linux-net.osdl.org/index.php/DccpProbe | 52 | at: |
53 | |||
54 | http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe | ||
53 | 55 | ||
54 | To compile this code as a module, choose M here: the | 56 | To compile this code as a module, choose M here: the |
55 | module will be called dccp_probe. | 57 | module will be called dccp_probe. |
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o | 1 | obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o |
2 | 2 | ||
3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o | 3 | dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ |
4 | 4 | qpolicy.o | |
5 | # | 5 | # |
6 | # CCID algorithms to be used by dccp.ko | 6 | # CCID algorithms to be used by dccp.ko |
7 | # | 7 | # |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 92a6fcb40d7d..25b7a8d1ad58 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -1,444 +1,375 @@ | |||
1 | /* | 1 | /* |
2 | * net/dccp/ackvec.c | 2 | * net/dccp/ackvec.c |
3 | * | 3 | * |
4 | * An implementation of the DCCP protocol | 4 | * An implementation of Ack Vectors for the DCCP protocol |
5 | * Copyright (c) 2007 University of Aberdeen, Scotland, UK | ||
5 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> |
6 | * | 7 | * |
7 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License as published by the | 9 | * under the terms of the GNU General Public License as published by the |
9 | * Free Software Foundation; version 2 of the License; | 10 | * Free Software Foundation; version 2 of the License; |
10 | */ | 11 | */ |
11 | |||
12 | #include "ackvec.h" | ||
13 | #include "dccp.h" | 12 | #include "dccp.h" |
14 | |||
15 | #include <linux/init.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
20 | 15 | ||
21 | #include <net/sock.h> | ||
22 | |||
23 | static struct kmem_cache *dccp_ackvec_slab; | 16 | static struct kmem_cache *dccp_ackvec_slab; |
24 | static struct kmem_cache *dccp_ackvec_record_slab; | 17 | static struct kmem_cache *dccp_ackvec_record_slab; |
25 | 18 | ||
26 | static struct dccp_ackvec_record *dccp_ackvec_record_new(void) | 19 | struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) |
27 | { | 20 | { |
28 | struct dccp_ackvec_record *avr = | 21 | struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority); |
29 | kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); | ||
30 | 22 | ||
31 | if (avr != NULL) | 23 | if (av != NULL) { |
32 | INIT_LIST_HEAD(&avr->avr_node); | 24 | av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1; |
33 | 25 | INIT_LIST_HEAD(&av->av_records); | |
34 | return avr; | 26 | } |
27 | return av; | ||
35 | } | 28 | } |
36 | 29 | ||
37 | static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) | 30 | static void dccp_ackvec_purge_records(struct dccp_ackvec *av) |
38 | { | 31 | { |
39 | if (unlikely(avr == NULL)) | 32 | struct dccp_ackvec_record *cur, *next; |
40 | return; | 33 | |
41 | /* Check if deleting a linked record */ | 34 | list_for_each_entry_safe(cur, next, &av->av_records, avr_node) |
42 | WARN_ON(!list_empty(&avr->avr_node)); | 35 | kmem_cache_free(dccp_ackvec_record_slab, cur); |
43 | kmem_cache_free(dccp_ackvec_record_slab, avr); | 36 | INIT_LIST_HEAD(&av->av_records); |
44 | } | 37 | } |
45 | 38 | ||
46 | static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, | 39 | void dccp_ackvec_free(struct dccp_ackvec *av) |
47 | struct dccp_ackvec_record *avr) | ||
48 | { | 40 | { |
49 | /* | 41 | if (likely(av != NULL)) { |
50 | * AVRs are sorted by seqno. Since we are sending them in order, we | 42 | dccp_ackvec_purge_records(av); |
51 | * just add the AVR at the head of the list. | 43 | kmem_cache_free(dccp_ackvec_slab, av); |
52 | * -sorbo. | ||
53 | */ | ||
54 | if (!list_empty(&av->av_records)) { | ||
55 | const struct dccp_ackvec_record *head = | ||
56 | list_entry(av->av_records.next, | ||
57 | struct dccp_ackvec_record, | ||
58 | avr_node); | ||
59 | BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno)); | ||
60 | } | 44 | } |
61 | |||
62 | list_add(&avr->avr_node, &av->av_records); | ||
63 | } | 45 | } |
64 | 46 | ||
65 | int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | 47 | /** |
48 | * dccp_ackvec_update_records - Record information about sent Ack Vectors | ||
49 | * @av: Ack Vector records to update | ||
50 | * @seqno: Sequence number of the packet carrying the Ack Vector just sent | ||
51 | * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector | ||
52 | */ | ||
53 | int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) | ||
66 | { | 54 | { |
67 | struct dccp_sock *dp = dccp_sk(sk); | ||
68 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
69 | /* Figure out how many options do we need to represent the ackvec */ | ||
70 | const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); | ||
71 | u16 len = av->av_vec_len + 2 * nr_opts, i; | ||
72 | u32 elapsed_time; | ||
73 | const unsigned char *tail, *from; | ||
74 | unsigned char *to; | ||
75 | struct dccp_ackvec_record *avr; | 55 | struct dccp_ackvec_record *avr; |
76 | suseconds_t delta; | ||
77 | |||
78 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
79 | return -1; | ||
80 | |||
81 | delta = ktime_us_delta(ktime_get_real(), av->av_time); | ||
82 | elapsed_time = delta / 10; | ||
83 | 56 | ||
84 | if (elapsed_time != 0 && | 57 | avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); |
85 | dccp_insert_option_elapsed_time(skb, elapsed_time)) | ||
86 | return -1; | ||
87 | |||
88 | avr = dccp_ackvec_record_new(); | ||
89 | if (avr == NULL) | 58 | if (avr == NULL) |
90 | return -1; | 59 | return -ENOBUFS; |
91 | |||
92 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
93 | |||
94 | to = skb_push(skb, len); | ||
95 | len = av->av_vec_len; | ||
96 | from = av->av_buf + av->av_buf_head; | ||
97 | tail = av->av_buf + DCCP_MAX_ACKVEC_LEN; | ||
98 | |||
99 | for (i = 0; i < nr_opts; ++i) { | ||
100 | int copylen = len; | ||
101 | |||
102 | if (len > DCCP_SINGLE_OPT_MAXLEN) | ||
103 | copylen = DCCP_SINGLE_OPT_MAXLEN; | ||
104 | |||
105 | *to++ = DCCPO_ACK_VECTOR_0; | ||
106 | *to++ = copylen + 2; | ||
107 | |||
108 | /* Check if buf_head wraps */ | ||
109 | if (from + copylen > tail) { | ||
110 | const u16 tailsize = tail - from; | ||
111 | |||
112 | memcpy(to, from, tailsize); | ||
113 | to += tailsize; | ||
114 | len -= tailsize; | ||
115 | copylen -= tailsize; | ||
116 | from = av->av_buf; | ||
117 | } | ||
118 | |||
119 | memcpy(to, from, copylen); | ||
120 | from += copylen; | ||
121 | to += copylen; | ||
122 | len -= copylen; | ||
123 | } | ||
124 | 60 | ||
61 | avr->avr_ack_seqno = seqno; | ||
62 | avr->avr_ack_ptr = av->av_buf_head; | ||
63 | avr->avr_ack_ackno = av->av_buf_ackno; | ||
64 | avr->avr_ack_nonce = nonce_sum; | ||
65 | avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head); | ||
125 | /* | 66 | /* |
126 | * From RFC 4340, A.2: | 67 | * When the buffer overflows, we keep no more than one record. This is |
127 | * | 68 | * the simplest way of disambiguating sender-Acks dating from before the |
128 | * For each acknowledgement it sends, the HC-Receiver will add an | 69 | * overflow from sender-Acks which refer to after the overflow; a simple |
129 | * acknowledgement record. ack_seqno will equal the HC-Receiver | 70 | * solution is preferable here since we are handling an exception. |
130 | * sequence number it used for the ack packet; ack_ptr will equal | ||
131 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
132 | * equal buf_nonce. | ||
133 | */ | 71 | */ |
134 | avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 72 | if (av->av_overflow) |
135 | avr->avr_ack_ptr = av->av_buf_head; | 73 | dccp_ackvec_purge_records(av); |
136 | avr->avr_ack_ackno = av->av_buf_ackno; | 74 | /* |
137 | avr->avr_ack_nonce = av->av_buf_nonce; | 75 | * Since GSS is incremented for each packet, the list is automatically |
138 | avr->avr_sent_len = av->av_vec_len; | 76 | * arranged in descending order of @ack_seqno. |
139 | 77 | */ | |
140 | dccp_ackvec_insert_avr(av, avr); | 78 | list_add(&avr->avr_node, &av->av_records); |
141 | 79 | ||
142 | dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " | 80 | dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n", |
143 | "ack_ackno=%llu\n", | ||
144 | dccp_role(sk), avr->avr_sent_len, | ||
145 | (unsigned long long)avr->avr_ack_seqno, | 81 | (unsigned long long)avr->avr_ack_seqno, |
146 | (unsigned long long)avr->avr_ack_ackno); | 82 | (unsigned long long)avr->avr_ack_ackno, |
83 | avr->avr_ack_runlen); | ||
147 | return 0; | 84 | return 0; |
148 | } | 85 | } |
149 | 86 | ||
150 | struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) | 87 | static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, |
88 | const u64 ackno) | ||
151 | { | 89 | { |
152 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); | 90 | struct dccp_ackvec_record *avr; |
153 | 91 | /* | |
154 | if (av != NULL) { | 92 | * Exploit that records are inserted in descending order of sequence |
155 | av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; | 93 | * number, start with the oldest record first. If @ackno is `before' |
156 | av->av_buf_ackno = UINT48_MAX + 1; | 94 | * the earliest ack_ackno, the packet is too old to be considered. |
157 | av->av_buf_nonce = 0; | 95 | */ |
158 | av->av_time = ktime_set(0, 0); | 96 | list_for_each_entry_reverse(avr, av_list, avr_node) { |
159 | av->av_vec_len = 0; | 97 | if (avr->avr_ack_seqno == ackno) |
160 | INIT_LIST_HEAD(&av->av_records); | 98 | return avr; |
99 | if (before48(ackno, avr->avr_ack_seqno)) | ||
100 | break; | ||
161 | } | 101 | } |
162 | 102 | return NULL; | |
163 | return av; | ||
164 | } | 103 | } |
165 | 104 | ||
166 | void dccp_ackvec_free(struct dccp_ackvec *av) | 105 | /* |
106 | * Buffer index and length computation using modulo-buffersize arithmetic. | ||
107 | * Note that, as pointers move from right to left, head is `before' tail. | ||
108 | */ | ||
109 | static inline u16 __ackvec_idx_add(const u16 a, const u16 b) | ||
167 | { | 110 | { |
168 | if (unlikely(av == NULL)) | 111 | return (a + b) % DCCPAV_MAX_ACKVEC_LEN; |
169 | return; | ||
170 | |||
171 | if (!list_empty(&av->av_records)) { | ||
172 | struct dccp_ackvec_record *avr, *next; | ||
173 | |||
174 | list_for_each_entry_safe(avr, next, &av->av_records, avr_node) { | ||
175 | list_del_init(&avr->avr_node); | ||
176 | dccp_ackvec_record_delete(avr); | ||
177 | } | ||
178 | } | ||
179 | |||
180 | kmem_cache_free(dccp_ackvec_slab, av); | ||
181 | } | 112 | } |
182 | 113 | ||
183 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | 114 | static inline u16 __ackvec_idx_sub(const u16 a, const u16 b) |
184 | const u32 index) | ||
185 | { | 115 | { |
186 | return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; | 116 | return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b); |
187 | } | 117 | } |
188 | 118 | ||
189 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | 119 | u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) |
190 | const u32 index) | ||
191 | { | 120 | { |
192 | return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; | 121 | if (unlikely(av->av_overflow)) |
122 | return DCCPAV_MAX_ACKVEC_LEN; | ||
123 | return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); | ||
193 | } | 124 | } |
194 | 125 | ||
195 | /* | 126 | /** |
196 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | 127 | * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 |
197 | * bytes with run length 0, rather than a single byte with a larger run length; | 128 | * @av: non-empty buffer to update |
198 | * this simplifies table updates if one of the missing packets arrives. | 129 | * @distance: negative or zero distance of @seqno from buf_ackno downward |
130 | * @seqno: the (old) sequence number whose record is to be updated | ||
131 | * @state: state in which packet carrying @seqno was received | ||
199 | */ | 132 | */ |
200 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | 133 | static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, |
201 | const unsigned int packets, | 134 | u64 seqno, enum dccp_ackvec_states state) |
202 | const unsigned char state) | ||
203 | { | 135 | { |
204 | long gap; | 136 | u16 ptr = av->av_buf_head; |
205 | long new_head; | ||
206 | 137 | ||
207 | if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) | 138 | BUG_ON(distance > 0); |
208 | return -ENOBUFS; | 139 | if (unlikely(dccp_ackvec_is_empty(av))) |
140 | return; | ||
209 | 141 | ||
210 | gap = packets - 1; | 142 | do { |
211 | new_head = av->av_buf_head - packets; | 143 | u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); |
212 | 144 | ||
213 | if (new_head < 0) { | 145 | if (distance + runlen >= 0) { |
214 | if (gap > 0) { | 146 | /* |
215 | memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, | 147 | * Only update the state if packet has not been received |
216 | gap + new_head + 1); | 148 | * yet. This is OK as per the second table in RFC 4340, |
217 | gap = -new_head; | 149 | * 11.4.1; i.e. here we are using the following table: |
150 | * RECEIVED | ||
151 | * 0 1 3 | ||
152 | * S +---+---+---+ | ||
153 | * T 0 | 0 | 0 | 0 | | ||
154 | * O +---+---+---+ | ||
155 | * R 1 | 1 | 1 | 1 | | ||
156 | * E +---+---+---+ | ||
157 | * D 3 | 0 | 1 | 3 | | ||
158 | * +---+---+---+ | ||
159 | * The "Not Received" state was set by reserve_seats(). | ||
160 | */ | ||
161 | if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) | ||
162 | av->av_buf[ptr] = state; | ||
163 | else | ||
164 | dccp_pr_debug("Not changing %llu state to %u\n", | ||
165 | (unsigned long long)seqno, state); | ||
166 | break; | ||
218 | } | 167 | } |
219 | new_head += DCCP_MAX_ACKVEC_LEN; | ||
220 | } | ||
221 | 168 | ||
222 | av->av_buf_head = new_head; | 169 | distance += runlen + 1; |
170 | ptr = __ackvec_idx_add(ptr, 1); | ||
223 | 171 | ||
224 | if (gap > 0) | 172 | } while (ptr != av->av_buf_tail); |
225 | memset(av->av_buf + av->av_buf_head + 1, | 173 | } |
226 | DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); | ||
227 | 174 | ||
228 | av->av_buf[av->av_buf_head] = state; | 175 | /* Mark @num entries after buf_head as "Not yet received". */ |
229 | av->av_vec_len += packets; | 176 | static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) |
230 | return 0; | 177 | { |
178 | u16 start = __ackvec_idx_add(av->av_buf_head, 1), | ||
179 | len = DCCPAV_MAX_ACKVEC_LEN - start; | ||
180 | |||
181 | /* check for buffer wrap-around */ | ||
182 | if (num > len) { | ||
183 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); | ||
184 | start = 0; | ||
185 | num -= len; | ||
186 | } | ||
187 | if (num) | ||
188 | memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); | ||
231 | } | 189 | } |
232 | 190 | ||
233 | /* | 191 | /** |
234 | * Implements the RFC 4340, Appendix A | 192 | * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer |
193 | * @av: container of buffer to update (can be empty or non-empty) | ||
194 | * @num_packets: number of packets to register (must be >= 1) | ||
195 | * @seqno: sequence number of the first packet in @num_packets | ||
196 | * @state: state in which packet carrying @seqno was received | ||
235 | */ | 197 | */ |
236 | int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 198 | static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, |
237 | const u64 ackno, const u8 state) | 199 | u64 seqno, enum dccp_ackvec_states state) |
238 | { | 200 | { |
239 | /* | 201 | u32 num_cells = num_packets; |
240 | * Check at the right places if the buffer is full, if it is, tell the | ||
241 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
242 | * vectors, when we will free up space in av_buf. | ||
243 | * | ||
244 | * We may well decide to do buffer compression, etc, but for now lets | ||
245 | * just drop. | ||
246 | * | ||
247 | * From Appendix A.1.1 (`New Packets'): | ||
248 | * | ||
249 | * Of course, the circular buffer may overflow, either when the | ||
250 | * HC-Sender is sending data at a very high rate, when the | ||
251 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
252 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
253 | * (so the HC-Receiver is unable to clean up old state). In this | ||
254 | * case, the HC-Receiver should either compress the buffer (by | ||
255 | * increasing run lengths when possible), transfer its state to | ||
256 | * a larger buffer, or, as a last resort, drop all received | ||
257 | * packets, without processing them whatsoever, until its buffer | ||
258 | * shrinks again. | ||
259 | */ | ||
260 | 202 | ||
261 | /* See if this is the first ackno being inserted */ | 203 | if (num_packets > DCCPAV_BURST_THRESH) { |
262 | if (av->av_vec_len == 0) { | 204 | u32 lost_packets = num_packets - 1; |
263 | av->av_buf[av->av_buf_head] = state; | ||
264 | av->av_vec_len = 1; | ||
265 | } else if (after48(ackno, av->av_buf_ackno)) { | ||
266 | const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); | ||
267 | 205 | ||
206 | DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); | ||
268 | /* | 207 | /* |
269 | * Look if the state of this packet is the same as the | 208 | * We received 1 packet and have a loss of size "num_packets-1" |
270 | * previous ackno and if so if we can bump the head len. | 209 | * which we squeeze into num_cells-1 rather than reserving an |
210 | * entire byte for each lost packet. | ||
211 | * The reason is that the vector grows in O(burst_length); when | ||
212 | * it grows too large there will no room left for the payload. | ||
213 | * This is a trade-off: if a few packets out of the burst show | ||
214 | * up later, their state will not be changed; it is simply too | ||
215 | * costly to reshuffle/reallocate/copy the buffer each time. | ||
216 | * Should such problems persist, we will need to switch to a | ||
217 | * different underlying data structure. | ||
271 | */ | 218 | */ |
272 | if (delta == 1 && | 219 | for (num_packets = num_cells = 1; lost_packets; ++num_cells) { |
273 | dccp_ackvec_state(av, av->av_buf_head) == state && | 220 | u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); |
274 | dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK) | ||
275 | av->av_buf[av->av_buf_head]++; | ||
276 | else if (dccp_ackvec_set_buf_head_state(av, delta, state)) | ||
277 | return -ENOBUFS; | ||
278 | } else { | ||
279 | /* | ||
280 | * A.1.2. Old Packets | ||
281 | * | ||
282 | * When a packet with Sequence Number S <= buf_ackno | ||
283 | * arrives, the HC-Receiver will scan the table for | ||
284 | * the byte corresponding to S. (Indexing structures | ||
285 | * could reduce the complexity of this scan.) | ||
286 | */ | ||
287 | u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); | ||
288 | u32 index = av->av_buf_head; | ||
289 | 221 | ||
290 | while (1) { | 222 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); |
291 | const u8 len = dccp_ackvec_len(av, index); | 223 | av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; |
292 | const u8 av_state = dccp_ackvec_state(av, index); | 224 | |
293 | /* | 225 | lost_packets -= len; |
294 | * valid packets not yet in av_buf have a reserved | ||
295 | * entry, with a len equal to 0. | ||
296 | */ | ||
297 | if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && | ||
298 | len == 0 && delta == 0) { /* Found our | ||
299 | reserved seat! */ | ||
300 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
301 | (unsigned long long)ackno); | ||
302 | av->av_buf[index] = state; | ||
303 | goto out; | ||
304 | } | ||
305 | /* len == 0 means one packet */ | ||
306 | if (delta < len + 1) | ||
307 | goto out_duplicate; | ||
308 | |||
309 | delta -= len + 1; | ||
310 | if (++index == DCCP_MAX_ACKVEC_LEN) | ||
311 | index = 0; | ||
312 | } | 226 | } |
313 | } | 227 | } |
314 | 228 | ||
315 | av->av_buf_ackno = ackno; | 229 | if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { |
316 | av->av_time = ktime_get_real(); | 230 | DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); |
317 | out: | 231 | av->av_overflow = true; |
318 | return 0; | 232 | } |
233 | |||
234 | av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); | ||
235 | if (av->av_overflow) | ||
236 | av->av_buf_tail = av->av_buf_head; | ||
319 | 237 | ||
320 | out_duplicate: | 238 | av->av_buf[av->av_buf_head] = state; |
321 | /* Duplicate packet */ | 239 | av->av_buf_ackno = seqno; |
322 | dccp_pr_debug("Received a dup or already considered lost " | 240 | |
323 | "packet: %llu\n", (unsigned long long)ackno); | 241 | if (num_packets > 1) |
324 | return -EILSEQ; | 242 | dccp_ackvec_reserve_seats(av, num_packets - 1); |
325 | } | 243 | } |
326 | 244 | ||
327 | static void dccp_ackvec_throw_record(struct dccp_ackvec *av, | 245 | /** |
328 | struct dccp_ackvec_record *avr) | 246 | * dccp_ackvec_input - Register incoming packet in the buffer |
247 | */ | ||
248 | void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) | ||
329 | { | 249 | { |
330 | struct dccp_ackvec_record *next; | 250 | u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
251 | enum dccp_ackvec_states state = DCCPAV_RECEIVED; | ||
331 | 252 | ||
332 | /* sort out vector length */ | 253 | if (dccp_ackvec_is_empty(av)) { |
333 | if (av->av_buf_head <= avr->avr_ack_ptr) | 254 | dccp_ackvec_add_new(av, 1, seqno, state); |
334 | av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; | 255 | av->av_tail_ackno = seqno; |
335 | else | ||
336 | av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - | ||
337 | av->av_buf_head + avr->avr_ack_ptr; | ||
338 | 256 | ||
339 | /* free records */ | 257 | } else { |
340 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | 258 | s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); |
341 | list_del_init(&avr->avr_node); | 259 | u8 *current_head = av->av_buf + av->av_buf_head; |
342 | dccp_ackvec_record_delete(avr); | ||
343 | } | ||
344 | } | ||
345 | 260 | ||
346 | void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | 261 | if (num_packets == 1 && |
347 | const u64 ackno) | 262 | dccp_ackvec_state(current_head) == state && |
348 | { | 263 | dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { |
349 | struct dccp_ackvec_record *avr; | ||
350 | 264 | ||
351 | /* | 265 | *current_head += 1; |
352 | * If we traverse backwards, it should be faster when we have large | 266 | av->av_buf_ackno = seqno; |
353 | * windows. We will be receiving ACKs for stuff we sent a while back | 267 | |
354 | * -sorbo. | 268 | } else if (num_packets > 0) { |
355 | */ | 269 | dccp_ackvec_add_new(av, num_packets, seqno, state); |
356 | list_for_each_entry_reverse(avr, &av->av_records, avr_node) { | 270 | } else { |
357 | if (ackno == avr->avr_ack_seqno) { | 271 | dccp_ackvec_update_old(av, num_packets, seqno, state); |
358 | dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " | 272 | } |
359 | "ack_ackno=%llu, ACKED!\n", | ||
360 | dccp_role(sk), 1, | ||
361 | (unsigned long long)avr->avr_ack_seqno, | ||
362 | (unsigned long long)avr->avr_ack_ackno); | ||
363 | dccp_ackvec_throw_record(av, avr); | ||
364 | break; | ||
365 | } else if (avr->avr_ack_seqno > ackno) | ||
366 | break; /* old news */ | ||
367 | } | 273 | } |
368 | } | 274 | } |
369 | 275 | ||
370 | static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | 276 | /** |
371 | struct sock *sk, u64 *ackno, | 277 | * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection |
372 | const unsigned char len, | 278 | * This routine is called when the peer acknowledges the receipt of Ack Vectors |
373 | const unsigned char *vector) | 279 | * up to and including @ackno. While based on on section A.3 of RFC 4340, here |
280 | * are additional precautions to prevent corrupted buffer state. In particular, | ||
281 | * we use tail_ackno to identify outdated records; it always marks the earliest | ||
282 | * packet of group (2) in 11.4.2. | ||
283 | */ | ||
284 | void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) | ||
374 | { | 285 | { |
375 | unsigned char i; | 286 | struct dccp_ackvec_record *avr, *next; |
376 | struct dccp_ackvec_record *avr; | 287 | u8 runlen_now, eff_runlen; |
288 | s64 delta; | ||
377 | 289 | ||
378 | /* Check if we actually sent an ACK vector */ | 290 | avr = dccp_ackvec_lookup(&av->av_records, ackno); |
379 | if (list_empty(&av->av_records)) | 291 | if (avr == NULL) |
380 | return; | 292 | return; |
293 | /* | ||
294 | * Deal with outdated acknowledgments: this arises when e.g. there are | ||
295 | * several old records and the acks from the peer come in slowly. In | ||
296 | * that case we may still have records that pre-date tail_ackno. | ||
297 | */ | ||
298 | delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); | ||
299 | if (delta < 0) | ||
300 | goto free_records; | ||
301 | /* | ||
302 | * Deal with overlapping Ack Vectors: don't subtract more than the | ||
303 | * number of packets between tail_ackno and ack_ackno. | ||
304 | */ | ||
305 | eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; | ||
381 | 306 | ||
382 | i = len; | 307 | runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); |
383 | /* | 308 | /* |
384 | * XXX | 309 | * The run length of Ack Vector cells does not decrease over time. If |
385 | * I think it might be more efficient to work backwards. See comment on | 310 | * the run length is the same as at the time the Ack Vector was sent, we |
386 | * rcv_ackno. -sorbo. | 311 | * free the ack_ptr cell. That cell can however not be freed if the run |
312 | * length has increased: in this case we need to move the tail pointer | ||
313 | * backwards (towards higher indices), to its next-oldest neighbour. | ||
387 | */ | 314 | */ |
388 | avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); | 315 | if (runlen_now > eff_runlen) { |
389 | while (i--) { | ||
390 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | ||
391 | u64 ackno_end_rl; | ||
392 | 316 | ||
393 | dccp_set_seqno(&ackno_end_rl, *ackno - rl); | 317 | av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; |
318 | av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); | ||
394 | 319 | ||
320 | /* This move may not have cleared the overflow flag. */ | ||
321 | if (av->av_overflow) | ||
322 | av->av_overflow = (av->av_buf_head == av->av_buf_tail); | ||
323 | } else { | ||
324 | av->av_buf_tail = avr->avr_ack_ptr; | ||
395 | /* | 325 | /* |
396 | * If our AVR sequence number is greater than the ack, go | 326 | * We have made sure that avr points to a valid cell within the |
397 | * forward in the AVR list until it is not so. | 327 | * buffer. This cell is either older than head, or equals head |
328 | * (empty buffer): in both cases we no longer have any overflow. | ||
398 | */ | 329 | */ |
399 | list_for_each_entry_from(avr, &av->av_records, avr_node) { | 330 | av->av_overflow = 0; |
400 | if (!after48(avr->avr_ack_seqno, *ackno)) | 331 | } |
401 | goto found; | ||
402 | } | ||
403 | /* End of the av_records list, not found, exit */ | ||
404 | break; | ||
405 | found: | ||
406 | if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { | ||
407 | const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; | ||
408 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | ||
409 | dccp_pr_debug("%s ACK vector 0, len=%d, " | ||
410 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
411 | "ACKED!\n", | ||
412 | dccp_role(sk), len, | ||
413 | (unsigned long long) | ||
414 | avr->avr_ack_seqno, | ||
415 | (unsigned long long) | ||
416 | avr->avr_ack_ackno); | ||
417 | dccp_ackvec_throw_record(av, avr); | ||
418 | break; | ||
419 | } | ||
420 | /* | ||
421 | * If it wasn't received, continue scanning... we might | ||
422 | * find another one. | ||
423 | */ | ||
424 | } | ||
425 | 332 | ||
426 | dccp_set_seqno(ackno, ackno_end_rl - 1); | 333 | /* |
427 | ++vector; | 334 | * The peer has acknowledged up to and including ack_ackno. Hence the |
335 | * first packet in group (2) of 11.4.2 is the successor of ack_ackno. | ||
336 | */ | ||
337 | av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); | ||
338 | |||
339 | free_records: | ||
340 | list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { | ||
341 | list_del(&avr->avr_node); | ||
342 | kmem_cache_free(dccp_ackvec_record_slab, avr); | ||
428 | } | 343 | } |
429 | } | 344 | } |
430 | 345 | ||
431 | int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | 346 | /* |
432 | u64 *ackno, const u8 opt, const u8 *value, const u8 len) | 347 | * Routines to keep track of Ack Vectors received in an skb |
348 | */ | ||
349 | int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) | ||
433 | { | 350 | { |
434 | if (len > DCCP_SINGLE_OPT_MAXLEN) | 351 | struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); |
435 | return -1; | 352 | |
353 | if (new == NULL) | ||
354 | return -ENOBUFS; | ||
355 | new->vec = vec; | ||
356 | new->len = len; | ||
357 | new->nonce = nonce; | ||
436 | 358 | ||
437 | /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ | 359 | list_add_tail(&new->node, head); |
438 | dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, | ||
439 | ackno, len, value); | ||
440 | return 0; | 360 | return 0; |
441 | } | 361 | } |
362 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); | ||
363 | |||
364 | void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) | ||
365 | { | ||
366 | struct dccp_ackvec_parsed *cur, *next; | ||
367 | |||
368 | list_for_each_entry_safe(cur, next, parsed_chunks, node) | ||
369 | kfree(cur); | ||
370 | INIT_LIST_HEAD(parsed_chunks); | ||
371 | } | ||
372 | EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); | ||
442 | 373 | ||
443 | int __init dccp_ackvec_init(void) | 374 | int __init dccp_ackvec_init(void) |
444 | { | 375 | { |
@@ -448,10 +379,9 @@ int __init dccp_ackvec_init(void) | |||
448 | if (dccp_ackvec_slab == NULL) | 379 | if (dccp_ackvec_slab == NULL) |
449 | goto out_err; | 380 | goto out_err; |
450 | 381 | ||
451 | dccp_ackvec_record_slab = | 382 | dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", |
452 | kmem_cache_create("dccp_ackvec_record", | 383 | sizeof(struct dccp_ackvec_record), |
453 | sizeof(struct dccp_ackvec_record), | 384 | 0, SLAB_HWCACHE_ALIGN, NULL); |
454 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
455 | if (dccp_ackvec_record_slab == NULL) | 385 | if (dccp_ackvec_record_slab == NULL) |
456 | goto out_destroy_slab; | 386 | goto out_destroy_slab; |
457 | 387 | ||
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 7ea557b7c6b1..e2ab0627a5ff 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -3,9 +3,9 @@ | |||
3 | /* | 3 | /* |
4 | * net/dccp/ackvec.h | 4 | * net/dccp/ackvec.h |
5 | * | 5 | * |
6 | * An implementation of the DCCP protocol | 6 | * An implementation of Ack Vectors for the DCCP protocol |
7 | * Copyright (c) 2007 University of Aberdeen, Scotland, UK | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> | 8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> |
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | 9 | * This program is free software; you can redistribute it and/or modify it |
10 | * under the terms of the GNU General Public License version 2 as | 10 | * under the terms of the GNU General Public License version 2 as |
11 | * published by the Free Software Foundation. | 11 | * published by the Free Software Foundation. |
@@ -13,99 +13,124 @@ | |||
13 | 13 | ||
14 | #include <linux/dccp.h> | 14 | #include <linux/dccp.h> |
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <linux/ktime.h> | ||
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
18 | #include <linux/types.h> | 17 | #include <linux/types.h> |
19 | 18 | ||
20 | /* We can spread an ack vector across multiple options */ | 19 | /* |
21 | #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) | 20 | * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN, |
21 | * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1 | ||
22 | * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives | ||
23 | * more headroom if Ack Ratio is higher or when the sender acknowledges slowly. | ||
24 | * The maximum value is bounded by the u16 types for indices and functions. | ||
25 | */ | ||
26 | #define DCCPAV_NUM_ACKVECS 2 | ||
27 | #define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS) | ||
22 | 28 | ||
23 | /* Estimated minimum average Ack Vector length - used for updating MPS */ | 29 | /* Estimated minimum average Ack Vector length - used for updating MPS */ |
24 | #define DCCPAV_MIN_OPTLEN 16 | 30 | #define DCCPAV_MIN_OPTLEN 16 |
25 | 31 | ||
26 | #define DCCP_ACKVEC_STATE_RECEIVED 0 | 32 | /* Threshold for coping with large bursts of losses */ |
27 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) | 33 | #define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) |
28 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) | ||
29 | 34 | ||
30 | #define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ | 35 | enum dccp_ackvec_states { |
31 | #define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ | 36 | DCCPAV_RECEIVED = 0x00, |
37 | DCCPAV_ECN_MARKED = 0x40, | ||
38 | DCCPAV_RESERVED = 0x80, | ||
39 | DCCPAV_NOT_RECEIVED = 0xC0 | ||
40 | }; | ||
41 | #define DCCPAV_MAX_RUNLEN 0x3F | ||
32 | 42 | ||
33 | /** struct dccp_ackvec - ack vector | 43 | static inline u8 dccp_ackvec_runlen(const u8 *cell) |
34 | * | 44 | { |
35 | * This data structure is the one defined in RFC 4340, Appendix A. | 45 | return *cell & DCCPAV_MAX_RUNLEN; |
36 | * | 46 | } |
37 | * @av_buf_head - circular buffer head | 47 | |
38 | * @av_buf_tail - circular buffer tail | 48 | static inline u8 dccp_ackvec_state(const u8 *cell) |
39 | * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the | 49 | { |
40 | * buffer (i.e. %av_buf_head) | 50 | return *cell & ~DCCPAV_MAX_RUNLEN; |
41 | * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | 51 | } |
42 | * by the buffer with State 0 | 52 | |
43 | * | 53 | /** struct dccp_ackvec - Ack Vector main data structure |
44 | * Additionally, the HC-Receiver must keep some information about the | ||
45 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
46 | * Ack Vector, it remembers four variables: | ||
47 | * | 54 | * |
48 | * @av_records - list of dccp_ackvec_record | 55 | * This implements a fixed-size circular buffer within an array and is largely |
49 | * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | 56 | * based on Appendix A of RFC 4340. |
50 | * | 57 | * |
51 | * @av_time - the time in usecs | 58 | * @av_buf: circular buffer storage area |
52 | * @av_buf - circular buffer of acknowledgeable packets | 59 | * @av_buf_head: head index; begin of live portion in @av_buf |
60 | * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf | ||
61 | * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf | ||
62 | * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf | ||
63 | * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to | ||
64 | * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf | ||
65 | * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound | ||
66 | * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) | ||
53 | */ | 67 | */ |
54 | struct dccp_ackvec { | 68 | struct dccp_ackvec { |
55 | u64 av_buf_ackno; | 69 | u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; |
56 | struct list_head av_records; | ||
57 | ktime_t av_time; | ||
58 | u16 av_buf_head; | 70 | u16 av_buf_head; |
59 | u16 av_vec_len; | 71 | u16 av_buf_tail; |
60 | u8 av_buf_nonce; | 72 | u64 av_buf_ackno:48; |
61 | u8 av_ack_nonce; | 73 | u64 av_tail_ackno:48; |
62 | u8 av_buf[DCCP_MAX_ACKVEC_LEN]; | 74 | bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; |
75 | u8 av_overflow:1; | ||
76 | struct list_head av_records; | ||
63 | }; | 77 | }; |
64 | 78 | ||
65 | /** struct dccp_ackvec_record - ack vector record | 79 | /** struct dccp_ackvec_record - Records information about sent Ack Vectors |
66 | * | 80 | * |
67 | * ACK vector record as defined in Appendix A of spec. | 81 | * These list entries define the additional information which the HC-Receiver |
82 | * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. | ||
68 | * | 83 | * |
69 | * The list is sorted by avr_ack_seqno | 84 | * @avr_node: the list node in @av_records |
85 | * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on | ||
86 | * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to | ||
87 | * @avr_ack_ptr: pointer into @av_buf where this record starts | ||
88 | * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending | ||
89 | * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent | ||
70 | * | 90 | * |
71 | * @avr_node - node in av_records | 91 | * The list as a whole is sorted in descending order by @avr_ack_seqno. |
72 | * @avr_ack_seqno - sequence number of the packet this record was sent on | ||
73 | * @avr_ack_ackno - sequence number being acknowledged | ||
74 | * @avr_ack_ptr - pointer into av_buf where this record starts | ||
75 | * @avr_ack_nonce - av_ack_nonce at the time this record was sent | ||
76 | * @avr_sent_len - lenght of the record in av_buf | ||
77 | */ | 92 | */ |
78 | struct dccp_ackvec_record { | 93 | struct dccp_ackvec_record { |
79 | struct list_head avr_node; | 94 | struct list_head avr_node; |
80 | u64 avr_ack_seqno; | 95 | u64 avr_ack_seqno:48; |
81 | u64 avr_ack_ackno; | 96 | u64 avr_ack_ackno:48; |
82 | u16 avr_ack_ptr; | 97 | u16 avr_ack_ptr; |
83 | u16 avr_sent_len; | 98 | u8 avr_ack_runlen; |
84 | u8 avr_ack_nonce; | 99 | u8 avr_ack_nonce:1; |
85 | }; | 100 | }; |
86 | 101 | ||
87 | struct sock; | ||
88 | struct sk_buff; | ||
89 | |||
90 | extern int dccp_ackvec_init(void); | 102 | extern int dccp_ackvec_init(void); |
91 | extern void dccp_ackvec_exit(void); | 103 | extern void dccp_ackvec_exit(void); |
92 | 104 | ||
93 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); | 105 | extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); |
94 | extern void dccp_ackvec_free(struct dccp_ackvec *av); | 106 | extern void dccp_ackvec_free(struct dccp_ackvec *av); |
95 | 107 | ||
96 | extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | 108 | extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); |
97 | const u64 ackno, const u8 state); | 109 | extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); |
98 | 110 | extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); | |
99 | extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, | 111 | extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); |
100 | struct sock *sk, const u64 ackno); | ||
101 | extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, | ||
102 | u64 *ackno, const u8 opt, | ||
103 | const u8 *value, const u8 len); | ||
104 | 112 | ||
105 | extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | 113 | static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) |
106 | |||
107 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | ||
108 | { | 114 | { |
109 | return av->av_vec_len; | 115 | return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; |
110 | } | 116 | } |
117 | |||
118 | /** | ||
119 | * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb | ||
120 | * @vec: start of vector (offset into skb) | ||
121 | * @len: length of @vec | ||
122 | * @nonce: whether @vec had an ECN nonce of 0 or 1 | ||
123 | * @node: FIFO - arranged in descending order of ack_ackno | ||
124 | * This structure is used by CCIDs to access Ack Vectors in a received skb. | ||
125 | */ | ||
126 | struct dccp_ackvec_parsed { | ||
127 | u8 *vec, | ||
128 | len, | ||
129 | nonce:1; | ||
130 | struct list_head node; | ||
131 | }; | ||
132 | |||
133 | extern int dccp_ackvec_parsed_add(struct list_head *head, | ||
134 | u8 *vec, u8 len, u8 nonce); | ||
135 | extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); | ||
111 | #endif /* _ACKVEC_H */ | 136 | #endif /* _ACKVEC_H */ |
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6df6f8ac9636..75c3582a7678 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
@@ -62,22 +62,18 @@ struct ccid_operations { | |||
62 | void (*ccid_hc_tx_exit)(struct sock *sk); | 62 | void (*ccid_hc_tx_exit)(struct sock *sk); |
63 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, | 63 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, |
64 | struct sk_buff *skb); | 64 | struct sk_buff *skb); |
65 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | 65 | int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt, |
66 | unsigned char option, | 66 | u8 opt, u8 *val, u8 len); |
67 | unsigned char len, u16 idx, | ||
68 | unsigned char* value); | ||
69 | int (*ccid_hc_rx_insert_options)(struct sock *sk, | 67 | int (*ccid_hc_rx_insert_options)(struct sock *sk, |
70 | struct sk_buff *skb); | 68 | struct sk_buff *skb); |
71 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, | 69 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, |
72 | struct sk_buff *skb); | 70 | struct sk_buff *skb); |
73 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | 71 | int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt, |
74 | unsigned char option, | 72 | u8 opt, u8 *val, u8 len); |
75 | unsigned char len, u16 idx, | ||
76 | unsigned char* value); | ||
77 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | 73 | int (*ccid_hc_tx_send_packet)(struct sock *sk, |
78 | struct sk_buff *skb); | 74 | struct sk_buff *skb); |
79 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, | 75 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, |
80 | int more, unsigned int len); | 76 | unsigned int len); |
81 | void (*ccid_hc_rx_get_info)(struct sock *sk, | 77 | void (*ccid_hc_rx_get_info)(struct sock *sk, |
82 | struct tcp_info *info); | 78 | struct tcp_info *info); |
83 | void (*ccid_hc_tx_get_info)(struct sock *sk, | 79 | void (*ccid_hc_tx_get_info)(struct sock *sk, |
@@ -138,20 +134,48 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) | |||
138 | extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); | 134 | extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); |
139 | extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); | 135 | extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); |
140 | 136 | ||
137 | /* | ||
138 | * Congestion control of queued data packets via CCID decision. | ||
139 | * | ||
140 | * The TX CCID performs its congestion-control by indicating whether and when a | ||
141 | * queued packet may be sent, using the return code of ccid_hc_tx_send_packet(). | ||
142 | * The following modes are supported via the symbolic constants below: | ||
143 | * - timer-based pacing (CCID returns a delay value in milliseconds); | ||
144 | * - autonomous dequeueing (CCID internally schedules dccps_xmitlet). | ||
145 | */ | ||
146 | |||
147 | enum ccid_dequeueing_decision { | ||
148 | CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */ | ||
149 | CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */ | ||
150 | CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */ | ||
151 | CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */ | ||
152 | CCID_PACKET_ERR = 0xF0000, /* error condition */ | ||
153 | }; | ||
154 | |||
155 | static inline int ccid_packet_dequeue_eval(const int return_code) | ||
156 | { | ||
157 | if (return_code < 0) | ||
158 | return CCID_PACKET_ERR; | ||
159 | if (return_code == 0) | ||
160 | return CCID_PACKET_SEND_AT_ONCE; | ||
161 | if (return_code <= CCID_PACKET_DELAY_MAX) | ||
162 | return CCID_PACKET_DELAY; | ||
163 | return return_code; | ||
164 | } | ||
165 | |||
141 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | 166 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, |
142 | struct sk_buff *skb) | 167 | struct sk_buff *skb) |
143 | { | 168 | { |
144 | int rc = 0; | ||
145 | if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) | 169 | if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) |
146 | rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); | 170 | return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); |
147 | return rc; | 171 | return CCID_PACKET_SEND_AT_ONCE; |
148 | } | 172 | } |
149 | 173 | ||
150 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | 174 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, |
151 | int more, unsigned int len) | 175 | unsigned int len) |
152 | { | 176 | { |
153 | if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) | 177 | if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) |
154 | ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); | 178 | ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len); |
155 | } | 179 | } |
156 | 180 | ||
157 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | 181 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, |
@@ -168,27 +192,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | |||
168 | ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); | 192 | ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); |
169 | } | 193 | } |
170 | 194 | ||
195 | /** | ||
196 | * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver | ||
197 | * @pkt: type of packet that @opt appears on (RFC 4340, 5.1) | ||
198 | * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3) | ||
199 | * @val: value of @opt | ||
200 | * @len: length of @val in bytes | ||
201 | */ | ||
171 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | 202 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, |
172 | unsigned char option, | 203 | u8 pkt, u8 opt, u8 *val, u8 len) |
173 | unsigned char len, u16 idx, | ||
174 | unsigned char* value) | ||
175 | { | 204 | { |
176 | int rc = 0; | 205 | if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) |
177 | if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) | 206 | return 0; |
178 | rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, | 207 | return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); |
179 | value); | ||
180 | return rc; | ||
181 | } | 208 | } |
182 | 209 | ||
210 | /** | ||
211 | * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender | ||
212 | * Arguments are analogous to ccid_hc_tx_parse_options() | ||
213 | */ | ||
183 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | 214 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, |
184 | unsigned char option, | 215 | u8 pkt, u8 opt, u8 *val, u8 len) |
185 | unsigned char len, u16 idx, | ||
186 | unsigned char* value) | ||
187 | { | 216 | { |
188 | int rc = 0; | 217 | if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) |
189 | if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) | 218 | return 0; |
190 | rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); | 219 | return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); |
191 | return rc; | ||
192 | } | 220 | } |
193 | 221 | ||
194 | static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | 222 | static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, |
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 8408398cd44e..0581143cb800 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig | |||
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG | |||
47 | 47 | ||
48 | If in doubt, say N. | 48 | If in doubt, say N. |
49 | 49 | ||
50 | config IP_DCCP_CCID3_RTO | ||
51 | int "Use higher bound for nofeedback timer" | ||
52 | default 100 | ||
53 | depends on IP_DCCP_CCID3 && EXPERIMENTAL | ||
54 | ---help--- | ||
55 | Use higher lower bound for nofeedback timer expiration. | ||
56 | |||
57 | The TFRC nofeedback timer normally expires after the maximum of 4 | ||
58 | RTTs and twice the current send interval (RFC 3448, 4.3). On LANs | ||
59 | with a small RTT this can mean a high processing load and reduced | ||
60 | performance, since then the nofeedback timer is triggered very | ||
61 | frequently. | ||
62 | |||
63 | This option enables to set a higher lower bound for the nofeedback | ||
64 | value. Values in units of milliseconds can be set here. | ||
65 | |||
66 | A value of 0 disables this feature by enforcing the value specified | ||
67 | in RFC 3448. The following values have been suggested as bounds for | ||
68 | experimental use: | ||
69 | * 16-20ms to match the typical multimedia inter-frame interval | ||
70 | * 100ms as a reasonable compromise [default] | ||
71 | * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4) | ||
72 | |||
73 | The default of 100ms is a compromise between a large value for | ||
74 | efficient DCCP implementations, and a small value to avoid disrupting | ||
75 | the network in times of congestion. | ||
76 | |||
77 | The purpose of the nofeedback timer is to slow DCCP down when there | ||
78 | is serious network congestion: experimenting with larger values should | ||
79 | therefore not be performed on WANs. | ||
80 | |||
81 | config IP_DCCP_TFRC_LIB | 50 | config IP_DCCP_TFRC_LIB |
82 | def_bool y if IP_DCCP_CCID3 | 51 | def_bool y if IP_DCCP_CCID3 |
83 | 52 | ||
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9b3ae9922be1..fadecd20d75b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -25,59 +25,14 @@ | |||
25 | */ | 25 | */ |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include "../feat.h" | 27 | #include "../feat.h" |
28 | #include "../ccid.h" | ||
29 | #include "../dccp.h" | ||
30 | #include "ccid2.h" | 28 | #include "ccid2.h" |
31 | 29 | ||
32 | 30 | ||
33 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 31 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
34 | static int ccid2_debug; | 32 | static int ccid2_debug; |
35 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) | 33 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) |
36 | |||
37 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc) | ||
38 | { | ||
39 | int len = 0; | ||
40 | int pipe = 0; | ||
41 | struct ccid2_seq *seqp = hc->tx_seqh; | ||
42 | |||
43 | /* there is data in the chain */ | ||
44 | if (seqp != hc->tx_seqt) { | ||
45 | seqp = seqp->ccid2s_prev; | ||
46 | len++; | ||
47 | if (!seqp->ccid2s_acked) | ||
48 | pipe++; | ||
49 | |||
50 | while (seqp != hc->tx_seqt) { | ||
51 | struct ccid2_seq *prev = seqp->ccid2s_prev; | ||
52 | |||
53 | len++; | ||
54 | if (!prev->ccid2s_acked) | ||
55 | pipe++; | ||
56 | |||
57 | /* packets are sent sequentially */ | ||
58 | BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, | ||
59 | prev->ccid2s_seq ) >= 0); | ||
60 | BUG_ON(time_before(seqp->ccid2s_sent, | ||
61 | prev->ccid2s_sent)); | ||
62 | |||
63 | seqp = prev; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | BUG_ON(pipe != hc->tx_pipe); | ||
68 | ccid2_pr_debug("len of chain=%d\n", len); | ||
69 | |||
70 | do { | ||
71 | seqp = seqp->ccid2s_prev; | ||
72 | len++; | ||
73 | } while (seqp != hc->tx_seqh); | ||
74 | |||
75 | ccid2_pr_debug("total len=%d\n", len); | ||
76 | BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN); | ||
77 | } | ||
78 | #else | 34 | #else |
79 | #define ccid2_pr_debug(format, a...) | 35 | #define ccid2_pr_debug(format, a...) |
80 | #define ccid2_hc_tx_check_sanity(hc) | ||
81 | #endif | 36 | #endif |
82 | 37 | ||
83 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) | 38 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) |
@@ -123,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) | |||
123 | 78 | ||
124 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | 79 | static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
125 | { | 80 | { |
126 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 81 | if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) |
127 | 82 | return CCID_PACKET_WILL_DEQUEUE_LATER; | |
128 | if (hc->tx_pipe < hc->tx_cwnd) | 83 | return CCID_PACKET_SEND_AT_ONCE; |
129 | return 0; | ||
130 | |||
131 | return 1; /* XXX CCID should dequeue when ready instead of polling */ | ||
132 | } | 84 | } |
133 | 85 | ||
134 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | 86 | static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) |
@@ -156,19 +108,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
156 | dp->dccps_l_ack_ratio = val; | 108 | dp->dccps_l_ack_ratio = val; |
157 | } | 109 | } |
158 | 110 | ||
159 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val) | ||
160 | { | ||
161 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
162 | hc->tx_srtt = val; | ||
163 | } | ||
164 | |||
165 | static void ccid2_start_rto_timer(struct sock *sk); | ||
166 | |||
167 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 111 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
168 | { | 112 | { |
169 | struct sock *sk = (struct sock *)data; | 113 | struct sock *sk = (struct sock *)data; |
170 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 114 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
171 | long s; | 115 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); |
172 | 116 | ||
173 | bh_lock_sock(sk); | 117 | bh_lock_sock(sk); |
174 | if (sock_owned_by_user(sk)) { | 118 | if (sock_owned_by_user(sk)) { |
@@ -178,23 +122,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
178 | 122 | ||
179 | ccid2_pr_debug("RTO_EXPIRE\n"); | 123 | ccid2_pr_debug("RTO_EXPIRE\n"); |
180 | 124 | ||
181 | ccid2_hc_tx_check_sanity(hc); | ||
182 | |||
183 | /* back-off timer */ | 125 | /* back-off timer */ |
184 | hc->tx_rto <<= 1; | 126 | hc->tx_rto <<= 1; |
185 | 127 | if (hc->tx_rto > DCCP_RTO_MAX) | |
186 | s = hc->tx_rto / HZ; | 128 | hc->tx_rto = DCCP_RTO_MAX; |
187 | if (s > 60) | ||
188 | hc->tx_rto = 60 * HZ; | ||
189 | |||
190 | ccid2_start_rto_timer(sk); | ||
191 | 129 | ||
192 | /* adjust pipe, cwnd etc */ | 130 | /* adjust pipe, cwnd etc */ |
193 | hc->tx_ssthresh = hc->tx_cwnd / 2; | 131 | hc->tx_ssthresh = hc->tx_cwnd / 2; |
194 | if (hc->tx_ssthresh < 2) | 132 | if (hc->tx_ssthresh < 2) |
195 | hc->tx_ssthresh = 2; | 133 | hc->tx_ssthresh = 2; |
196 | hc->tx_cwnd = 1; | 134 | hc->tx_cwnd = 1; |
197 | hc->tx_pipe = 0; | 135 | hc->tx_pipe = 0; |
198 | 136 | ||
199 | /* clear state about stuff we sent */ | 137 | /* clear state about stuff we sent */ |
200 | hc->tx_seqt = hc->tx_seqh; | 138 | hc->tx_seqt = hc->tx_seqh; |
@@ -204,23 +142,18 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
204 | hc->tx_rpseq = 0; | 142 | hc->tx_rpseq = 0; |
205 | hc->tx_rpdupack = -1; | 143 | hc->tx_rpdupack = -1; |
206 | ccid2_change_l_ack_ratio(sk, 1); | 144 | ccid2_change_l_ack_ratio(sk, 1); |
207 | ccid2_hc_tx_check_sanity(hc); | 145 | |
146 | /* if we were blocked before, we may now send cwnd=1 packet */ | ||
147 | if (sender_was_blocked) | ||
148 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
149 | /* restart backed-off timer */ | ||
150 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
208 | out: | 151 | out: |
209 | bh_unlock_sock(sk); | 152 | bh_unlock_sock(sk); |
210 | sock_put(sk); | 153 | sock_put(sk); |
211 | } | 154 | } |
212 | 155 | ||
213 | static void ccid2_start_rto_timer(struct sock *sk) | 156 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
214 | { | ||
215 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
216 | |||
217 | ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); | ||
218 | |||
219 | BUG_ON(timer_pending(&hc->tx_rtotimer)); | ||
220 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
221 | } | ||
222 | |||
223 | static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | ||
224 | { | 157 | { |
225 | struct dccp_sock *dp = dccp_sk(sk); | 158 | struct dccp_sock *dp = dccp_sk(sk); |
226 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 159 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
@@ -230,7 +163,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
230 | 163 | ||
231 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; | 164 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; |
232 | hc->tx_seqh->ccid2s_acked = 0; | 165 | hc->tx_seqh->ccid2s_acked = 0; |
233 | hc->tx_seqh->ccid2s_sent = jiffies; | 166 | hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; |
234 | 167 | ||
235 | next = hc->tx_seqh->ccid2s_next; | 168 | next = hc->tx_seqh->ccid2s_next; |
236 | /* check if we need to alloc more space */ | 169 | /* check if we need to alloc more space */ |
@@ -296,99 +229,104 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
296 | } | 229 | } |
297 | #endif | 230 | #endif |
298 | 231 | ||
299 | /* setup RTO timer */ | 232 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
300 | if (!timer_pending(&hc->tx_rtotimer)) | ||
301 | ccid2_start_rto_timer(sk); | ||
302 | 233 | ||
303 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 234 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
304 | do { | 235 | do { |
305 | struct ccid2_seq *seqp = hc->tx_seqt; | 236 | struct ccid2_seq *seqp = hc->tx_seqt; |
306 | 237 | ||
307 | while (seqp != hc->tx_seqh) { | 238 | while (seqp != hc->tx_seqh) { |
308 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", | 239 | ccid2_pr_debug("out seq=%llu acked=%d time=%u\n", |
309 | (unsigned long long)seqp->ccid2s_seq, | 240 | (unsigned long long)seqp->ccid2s_seq, |
310 | seqp->ccid2s_acked, seqp->ccid2s_sent); | 241 | seqp->ccid2s_acked, seqp->ccid2s_sent); |
311 | seqp = seqp->ccid2s_next; | 242 | seqp = seqp->ccid2s_next; |
312 | } | 243 | } |
313 | } while (0); | 244 | } while (0); |
314 | ccid2_pr_debug("=========\n"); | 245 | ccid2_pr_debug("=========\n"); |
315 | ccid2_hc_tx_check_sanity(hc); | ||
316 | #endif | 246 | #endif |
317 | } | 247 | } |
318 | 248 | ||
319 | /* XXX Lame code duplication! | 249 | /** |
320 | * returns -1 if none was found. | 250 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm |
321 | * else returns the next offset to use in the function call. | 251 | * This code is almost identical with TCP's tcp_rtt_estimator(), since |
252 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
253 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
254 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
255 | * which suggests that the gain should be set according to window size), | ||
256 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
322 | */ | 257 | */ |
323 | static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, | 258 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) |
324 | unsigned char **vec, unsigned char *veclen) | ||
325 | { | 259 | { |
326 | const struct dccp_hdr *dh = dccp_hdr(skb); | 260 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
327 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | 261 | long m = mrtt ? : 1; |
328 | unsigned char *opt_ptr; | 262 | |
329 | const unsigned char *opt_end = (unsigned char *)dh + | 263 | if (hc->tx_srtt == 0) { |
330 | (dh->dccph_doff * 4); | 264 | /* First measurement m */ |
331 | unsigned char opt, len; | 265 | hc->tx_srtt = m << 3; |
332 | unsigned char *value; | 266 | hc->tx_mdev = m << 1; |
333 | 267 | ||
334 | BUG_ON(offset < 0); | 268 | hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk)); |
335 | options += offset; | 269 | hc->tx_rttvar = hc->tx_mdev_max; |
336 | opt_ptr = options; | 270 | |
337 | if (opt_ptr >= opt_end) | 271 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; |
338 | return -1; | 272 | } else { |
339 | 273 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | |
340 | while (opt_ptr != opt_end) { | 274 | m -= (hc->tx_srtt >> 3); |
341 | opt = *opt_ptr++; | 275 | hc->tx_srtt += m; |
342 | len = 0; | 276 | |
343 | value = NULL; | 277 | /* Similarly, update scaled mdev with regard to |m| */ |
344 | 278 | if (m < 0) { | |
345 | /* Check if this isn't a single byte option */ | 279 | m = -m; |
346 | if (opt > DCCPO_MAX_RESERVED) { | 280 | m -= (hc->tx_mdev >> 2); |
347 | if (opt_ptr == opt_end) | ||
348 | goto out_invalid_option; | ||
349 | |||
350 | len = *opt_ptr++; | ||
351 | if (len < 3) | ||
352 | goto out_invalid_option; | ||
353 | /* | 281 | /* |
354 | * Remove the type and len fields, leaving | 282 | * This neutralises RTO increase when RTT < SRTT - mdev |
355 | * just the value size | 283 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control |
284 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
356 | */ | 285 | */ |
357 | len -= 2; | 286 | if (m > 0) |
358 | value = opt_ptr; | 287 | m >>= 3; |
359 | opt_ptr += len; | 288 | } else { |
289 | m -= (hc->tx_mdev >> 2); | ||
290 | } | ||
291 | hc->tx_mdev += m; | ||
360 | 292 | ||
361 | if (opt_ptr > opt_end) | 293 | if (hc->tx_mdev > hc->tx_mdev_max) { |
362 | goto out_invalid_option; | 294 | hc->tx_mdev_max = hc->tx_mdev; |
295 | if (hc->tx_mdev_max > hc->tx_rttvar) | ||
296 | hc->tx_rttvar = hc->tx_mdev_max; | ||
363 | } | 297 | } |
364 | 298 | ||
365 | switch (opt) { | 299 | /* |
366 | case DCCPO_ACK_VECTOR_0: | 300 | * Decay RTTVAR at most once per flight, exploiting that |
367 | case DCCPO_ACK_VECTOR_1: | 301 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) |
368 | *vec = value; | 302 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) |
369 | *veclen = len; | 303 | * GAR is a useful bound for FlightSize = pipe. |
370 | return offset + (opt_ptr - options); | 304 | * AWL is probably too low here, as it over-estimates pipe. |
305 | */ | ||
306 | if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) { | ||
307 | if (hc->tx_mdev_max < hc->tx_rttvar) | ||
308 | hc->tx_rttvar -= (hc->tx_rttvar - | ||
309 | hc->tx_mdev_max) >> 2; | ||
310 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
311 | hc->tx_mdev_max = tcp_rto_min(sk); | ||
371 | } | 312 | } |
372 | } | 313 | } |
373 | 314 | ||
374 | return -1; | 315 | /* |
375 | 316 | * Set RTO from SRTT and RTTVAR | |
376 | out_invalid_option: | 317 | * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms. |
377 | DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); | 318 | * This agrees with RFC 4341, 5: |
378 | return -1; | 319 | * "Because DCCP does not retransmit data, DCCP does not require |
379 | } | 320 | * TCP's recommended minimum timeout of one second". |
380 | 321 | */ | |
381 | static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) | 322 | hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar; |
382 | { | ||
383 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
384 | 323 | ||
385 | sk_stop_timer(sk, &hc->tx_rtotimer); | 324 | if (hc->tx_rto > DCCP_RTO_MAX) |
386 | ccid2_pr_debug("deleted RTO timer\n"); | 325 | hc->tx_rto = DCCP_RTO_MAX; |
387 | } | 326 | } |
388 | 327 | ||
389 | static inline void ccid2_new_ack(struct sock *sk, | 328 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, |
390 | struct ccid2_seq *seqp, | 329 | unsigned int *maxincr) |
391 | unsigned int *maxincr) | ||
392 | { | 330 | { |
393 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 331 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
394 | 332 | ||
@@ -402,93 +340,27 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
402 | hc->tx_cwnd += 1; | 340 | hc->tx_cwnd += 1; |
403 | hc->tx_packets_acked = 0; | 341 | hc->tx_packets_acked = 0; |
404 | } | 342 | } |
405 | 343 | /* | |
406 | /* update RTO */ | 344 | * FIXME: RTT is sampled several times per acknowledgment (for each |
407 | if (hc->tx_srtt == -1 || | 345 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). |
408 | time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { | 346 | * This causes the RTT to be over-estimated, since the older entries |
409 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | 347 | * in the Ack Vector have earlier sending times. |
410 | int s; | 348 | * The cleanest solution is to not use the ccid2s_sent field at all |
411 | 349 | * and instead use DCCP timestamps: requires changes in other places. | |
412 | /* first measurement */ | 350 | */ |
413 | if (hc->tx_srtt == -1) { | 351 | ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); |
414 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
415 | r, jiffies, | ||
416 | (unsigned long long)seqp->ccid2s_seq); | ||
417 | ccid2_change_srtt(hc, r); | ||
418 | hc->tx_rttvar = r >> 1; | ||
419 | } else { | ||
420 | /* RTTVAR */ | ||
421 | long tmp = hc->tx_srtt - r; | ||
422 | long srtt; | ||
423 | |||
424 | if (tmp < 0) | ||
425 | tmp *= -1; | ||
426 | |||
427 | tmp >>= 2; | ||
428 | hc->tx_rttvar *= 3; | ||
429 | hc->tx_rttvar >>= 2; | ||
430 | hc->tx_rttvar += tmp; | ||
431 | |||
432 | /* SRTT */ | ||
433 | srtt = hc->tx_srtt; | ||
434 | srtt *= 7; | ||
435 | srtt >>= 3; | ||
436 | tmp = r >> 3; | ||
437 | srtt += tmp; | ||
438 | ccid2_change_srtt(hc, srtt); | ||
439 | } | ||
440 | s = hc->tx_rttvar << 2; | ||
441 | /* clock granularity is 1 when based on jiffies */ | ||
442 | if (!s) | ||
443 | s = 1; | ||
444 | hc->tx_rto = hc->tx_srtt + s; | ||
445 | |||
446 | /* must be at least a second */ | ||
447 | s = hc->tx_rto / HZ; | ||
448 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
449 | #if 1 | ||
450 | if (s < 1) | ||
451 | hc->tx_rto = HZ; | ||
452 | #endif | ||
453 | /* max 60 seconds */ | ||
454 | if (s > 60) | ||
455 | hc->tx_rto = HZ * 60; | ||
456 | |||
457 | hc->tx_lastrtt = jiffies; | ||
458 | |||
459 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | ||
460 | hc->tx_srtt, hc->tx_rttvar, | ||
461 | hc->tx_rto, HZ, r); | ||
462 | } | ||
463 | |||
464 | /* we got a new ack, so re-start RTO timer */ | ||
465 | ccid2_hc_tx_kill_rto_timer(sk); | ||
466 | ccid2_start_rto_timer(sk); | ||
467 | } | ||
468 | |||
469 | static void ccid2_hc_tx_dec_pipe(struct sock *sk) | ||
470 | { | ||
471 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
472 | |||
473 | if (hc->tx_pipe == 0) | ||
474 | DCCP_BUG("pipe == 0"); | ||
475 | else | ||
476 | hc->tx_pipe--; | ||
477 | |||
478 | if (hc->tx_pipe == 0) | ||
479 | ccid2_hc_tx_kill_rto_timer(sk); | ||
480 | } | 352 | } |
481 | 353 | ||
482 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 354 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
483 | { | 355 | { |
484 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 356 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
485 | 357 | ||
486 | if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { | 358 | if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) { |
487 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | 359 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); |
488 | return; | 360 | return; |
489 | } | 361 | } |
490 | 362 | ||
491 | hc->tx_last_cong = jiffies; | 363 | hc->tx_last_cong = ccid2_time_stamp; |
492 | 364 | ||
493 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; | 365 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; |
494 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); | 366 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); |
@@ -498,19 +370,31 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | |||
498 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); | 370 | ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); |
499 | } | 371 | } |
500 | 372 | ||
373 | static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, | ||
374 | u8 option, u8 *optval, u8 optlen) | ||
375 | { | ||
376 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
377 | |||
378 | switch (option) { | ||
379 | case DCCPO_ACK_VECTOR_0: | ||
380 | case DCCPO_ACK_VECTOR_1: | ||
381 | return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, | ||
382 | option - DCCPO_ACK_VECTOR_0); | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
386 | |||
501 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 387 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
502 | { | 388 | { |
503 | struct dccp_sock *dp = dccp_sk(sk); | 389 | struct dccp_sock *dp = dccp_sk(sk); |
504 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 390 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
391 | const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); | ||
392 | struct dccp_ackvec_parsed *avp; | ||
505 | u64 ackno, seqno; | 393 | u64 ackno, seqno; |
506 | struct ccid2_seq *seqp; | 394 | struct ccid2_seq *seqp; |
507 | unsigned char *vector; | ||
508 | unsigned char veclen; | ||
509 | int offset = 0; | ||
510 | int done = 0; | 395 | int done = 0; |
511 | unsigned int maxincr = 0; | 396 | unsigned int maxincr = 0; |
512 | 397 | ||
513 | ccid2_hc_tx_check_sanity(hc); | ||
514 | /* check reverse path congestion */ | 398 | /* check reverse path congestion */ |
515 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 399 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
516 | 400 | ||
@@ -541,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
541 | } | 425 | } |
542 | 426 | ||
543 | /* check forward path congestion */ | 427 | /* check forward path congestion */ |
544 | /* still didn't send out new data packets */ | 428 | if (dccp_packet_without_ack(skb)) |
545 | if (hc->tx_seqh == hc->tx_seqt) | ||
546 | return; | 429 | return; |
547 | 430 | ||
548 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 431 | /* still didn't send out new data packets */ |
549 | case DCCP_PKT_ACK: | 432 | if (hc->tx_seqh == hc->tx_seqt) |
550 | case DCCP_PKT_DATAACK: | 433 | goto done; |
551 | break; | ||
552 | default: | ||
553 | return; | ||
554 | } | ||
555 | 434 | ||
556 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | 435 | ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; |
557 | if (after48(ackno, hc->tx_high_ack)) | 436 | if (after48(ackno, hc->tx_high_ack)) |
@@ -575,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
575 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); | 454 | maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); |
576 | 455 | ||
577 | /* go through all ack vectors */ | 456 | /* go through all ack vectors */ |
578 | while ((offset = ccid2_ackvector(sk, skb, offset, | 457 | list_for_each_entry(avp, &hc->tx_av_chunks, node) { |
579 | &vector, &veclen)) != -1) { | ||
580 | /* go through this ack vector */ | 458 | /* go through this ack vector */ |
581 | while (veclen--) { | 459 | for (; avp->len--; avp->vec++) { |
582 | const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; | 460 | u64 ackno_end_rl = SUB48(ackno, |
583 | u64 ackno_end_rl = SUB48(ackno, rl); | 461 | dccp_ackvec_runlen(avp->vec)); |
584 | 462 | ||
585 | ccid2_pr_debug("ackvec start:%llu end:%llu\n", | 463 | ccid2_pr_debug("ackvec %llu |%u,%u|\n", |
586 | (unsigned long long)ackno, | 464 | (unsigned long long)ackno, |
587 | (unsigned long long)ackno_end_rl); | 465 | dccp_ackvec_state(avp->vec) >> 6, |
466 | dccp_ackvec_runlen(avp->vec)); | ||
588 | /* if the seqno we are analyzing is larger than the | 467 | /* if the seqno we are analyzing is larger than the |
589 | * current ackno, then move towards the tail of our | 468 | * current ackno, then move towards the tail of our |
590 | * seqnos. | 469 | * seqnos. |
@@ -603,24 +482,22 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
603 | * run length | 482 | * run length |
604 | */ | 483 | */ |
605 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 484 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
606 | const u8 state = *vector & | 485 | const u8 state = dccp_ackvec_state(avp->vec); |
607 | DCCP_ACKVEC_STATE_MASK; | ||
608 | 486 | ||
609 | /* new packet received or marked */ | 487 | /* new packet received or marked */ |
610 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && | 488 | if (state != DCCPAV_NOT_RECEIVED && |
611 | !seqp->ccid2s_acked) { | 489 | !seqp->ccid2s_acked) { |
612 | if (state == | 490 | if (state == DCCPAV_ECN_MARKED) |
613 | DCCP_ACKVEC_STATE_ECN_MARKED) { | ||
614 | ccid2_congestion_event(sk, | 491 | ccid2_congestion_event(sk, |
615 | seqp); | 492 | seqp); |
616 | } else | 493 | else |
617 | ccid2_new_ack(sk, seqp, | 494 | ccid2_new_ack(sk, seqp, |
618 | &maxincr); | 495 | &maxincr); |
619 | 496 | ||
620 | seqp->ccid2s_acked = 1; | 497 | seqp->ccid2s_acked = 1; |
621 | ccid2_pr_debug("Got ack for %llu\n", | 498 | ccid2_pr_debug("Got ack for %llu\n", |
622 | (unsigned long long)seqp->ccid2s_seq); | 499 | (unsigned long long)seqp->ccid2s_seq); |
623 | ccid2_hc_tx_dec_pipe(sk); | 500 | hc->tx_pipe--; |
624 | } | 501 | } |
625 | if (seqp == hc->tx_seqt) { | 502 | if (seqp == hc->tx_seqt) { |
626 | done = 1; | 503 | done = 1; |
@@ -632,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
632 | break; | 509 | break; |
633 | 510 | ||
634 | ackno = SUB48(ackno_end_rl, 1); | 511 | ackno = SUB48(ackno_end_rl, 1); |
635 | vector++; | ||
636 | } | 512 | } |
637 | if (done) | 513 | if (done) |
638 | break; | 514 | break; |
@@ -677,7 +553,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
677 | * one ack vector. | 553 | * one ack vector. |
678 | */ | 554 | */ |
679 | ccid2_congestion_event(sk, seqp); | 555 | ccid2_congestion_event(sk, seqp); |
680 | ccid2_hc_tx_dec_pipe(sk); | 556 | hc->tx_pipe--; |
681 | } | 557 | } |
682 | if (seqp == hc->tx_seqt) | 558 | if (seqp == hc->tx_seqt) |
683 | break; | 559 | break; |
@@ -695,7 +571,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
695 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; | 571 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; |
696 | } | 572 | } |
697 | 573 | ||
698 | ccid2_hc_tx_check_sanity(hc); | 574 | /* restart RTO timer if not all outstanding data has been acked */ |
575 | if (hc->tx_pipe == 0) | ||
576 | sk_stop_timer(sk, &hc->tx_rtotimer); | ||
577 | else | ||
578 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
579 | done: | ||
580 | /* check if incoming Acks allow pending packets to be sent */ | ||
581 | if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) | ||
582 | tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); | ||
583 | dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * Convert RFC 3390 larger initial window into an equivalent number of packets. | ||
588 | * This is based on the numbers specified in RFC 5681, 3.1. | ||
589 | */ | ||
590 | static inline u32 rfc3390_bytes_to_packets(const u32 smss) | ||
591 | { | ||
592 | return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); | ||
699 | } | 593 | } |
700 | 594 | ||
701 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 595 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -707,12 +601,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
707 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ | 601 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ |
708 | hc->tx_ssthresh = ~0U; | 602 | hc->tx_ssthresh = ~0U; |
709 | 603 | ||
710 | /* | 604 | /* Use larger initial windows (RFC 4341, section 5). */ |
711 | * RFC 4341, 5: "The cwnd parameter is initialized to at most four | 605 | hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); |
712 | * packets for new connections, following the rules from [RFC3390]". | ||
713 | * We need to convert the bytes of RFC3390 into the packets of RFC 4341. | ||
714 | */ | ||
715 | hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); | ||
716 | 606 | ||
717 | /* Make sure that Ack Ratio is enabled and within bounds. */ | 607 | /* Make sure that Ack Ratio is enabled and within bounds. */ |
718 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); | 608 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); |
@@ -723,15 +613,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
723 | if (ccid2_hc_tx_alloc_seq(hc)) | 613 | if (ccid2_hc_tx_alloc_seq(hc)) |
724 | return -ENOMEM; | 614 | return -ENOMEM; |
725 | 615 | ||
726 | hc->tx_rto = 3 * HZ; | 616 | hc->tx_rto = DCCP_TIMEOUT_INIT; |
727 | ccid2_change_srtt(hc, -1); | ||
728 | hc->tx_rttvar = -1; | ||
729 | hc->tx_rpdupack = -1; | 617 | hc->tx_rpdupack = -1; |
730 | hc->tx_last_cong = jiffies; | 618 | hc->tx_last_cong = ccid2_time_stamp; |
731 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 619 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
732 | (unsigned long)sk); | 620 | (unsigned long)sk); |
733 | 621 | INIT_LIST_HEAD(&hc->tx_av_chunks); | |
734 | ccid2_hc_tx_check_sanity(hc); | ||
735 | return 0; | 622 | return 0; |
736 | } | 623 | } |
737 | 624 | ||
@@ -740,7 +627,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) | |||
740 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 627 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
741 | int i; | 628 | int i; |
742 | 629 | ||
743 | ccid2_hc_tx_kill_rto_timer(sk); | 630 | sk_stop_timer(sk, &hc->tx_rtotimer); |
744 | 631 | ||
745 | for (i = 0; i < hc->tx_seqbufc; i++) | 632 | for (i = 0; i < hc->tx_seqbufc; i++) |
746 | kfree(hc->tx_seqbuf[i]); | 633 | kfree(hc->tx_seqbuf[i]); |
@@ -765,16 +652,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
765 | } | 652 | } |
766 | 653 | ||
767 | struct ccid_operations ccid2_ops = { | 654 | struct ccid_operations ccid2_ops = { |
768 | .ccid_id = DCCPC_CCID2, | 655 | .ccid_id = DCCPC_CCID2, |
769 | .ccid_name = "TCP-like", | 656 | .ccid_name = "TCP-like", |
770 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), | 657 | .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), |
771 | .ccid_hc_tx_init = ccid2_hc_tx_init, | 658 | .ccid_hc_tx_init = ccid2_hc_tx_init, |
772 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, | 659 | .ccid_hc_tx_exit = ccid2_hc_tx_exit, |
773 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, | 660 | .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, |
774 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, | 661 | .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, |
775 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, | 662 | .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, |
776 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), | 663 | .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, |
777 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | 664 | .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), |
665 | .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, | ||
778 | }; | 666 | }; |
779 | 667 | ||
780 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 668 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 1ec6a30103bb..e9985dafc2c7 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -18,18 +18,23 @@ | |||
18 | #ifndef _DCCP_CCID2_H_ | 18 | #ifndef _DCCP_CCID2_H_ |
19 | #define _DCCP_CCID2_H_ | 19 | #define _DCCP_CCID2_H_ |
20 | 20 | ||
21 | #include <linux/dccp.h> | ||
22 | #include <linux/timer.h> | 21 | #include <linux/timer.h> |
23 | #include <linux/types.h> | 22 | #include <linux/types.h> |
24 | #include "../ccid.h" | 23 | #include "../ccid.h" |
24 | #include "../dccp.h" | ||
25 | |||
26 | /* | ||
27 | * CCID-2 timestamping faces the same issues as TCP timestamping. | ||
28 | * Hence we reuse/share as much of the code as possible. | ||
29 | */ | ||
30 | #define ccid2_time_stamp tcp_time_stamp | ||
31 | |||
25 | /* NUMDUPACK parameter from RFC 4341, p. 6 */ | 32 | /* NUMDUPACK parameter from RFC 4341, p. 6 */ |
26 | #define NUMDUPACK 3 | 33 | #define NUMDUPACK 3 |
27 | 34 | ||
28 | struct sock; | ||
29 | |||
30 | struct ccid2_seq { | 35 | struct ccid2_seq { |
31 | u64 ccid2s_seq; | 36 | u64 ccid2s_seq; |
32 | unsigned long ccid2s_sent; | 37 | u32 ccid2s_sent; |
33 | int ccid2s_acked; | 38 | int ccid2s_acked; |
34 | struct ccid2_seq *ccid2s_prev; | 39 | struct ccid2_seq *ccid2s_prev; |
35 | struct ccid2_seq *ccid2s_next; | 40 | struct ccid2_seq *ccid2s_next; |
@@ -42,9 +47,15 @@ struct ccid2_seq { | |||
42 | * struct ccid2_hc_tx_sock - CCID2 TX half connection | 47 | * struct ccid2_hc_tx_sock - CCID2 TX half connection |
43 | * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 | 48 | * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 |
44 | * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) | 49 | * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) |
45 | * @tx_lastrtt: time RTT was last measured | 50 | * @tx_srtt: smoothed RTT estimate, scaled by 2^3 |
51 | * @tx_mdev: smoothed RTT variation, scaled by 2^2 | ||
52 | * @tx_mdev_max: maximum of @mdev during one flight | ||
53 | * @tx_rttvar: moving average/maximum of @mdev_max | ||
54 | * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) | ||
55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight | ||
46 | * @tx_rpseq: last consecutive seqno | 56 | * @tx_rpseq: last consecutive seqno |
47 | * @tx_rpdupack: dupacks since rpseq | 57 | * @tx_rpdupack: dupacks since rpseq |
58 | * @tx_av_chunks: list of Ack Vectors received on current skb | ||
48 | */ | 59 | */ |
49 | struct ccid2_hc_tx_sock { | 60 | struct ccid2_hc_tx_sock { |
50 | u32 tx_cwnd; | 61 | u32 tx_cwnd; |
@@ -55,17 +66,28 @@ struct ccid2_hc_tx_sock { | |||
55 | int tx_seqbufc; | 66 | int tx_seqbufc; |
56 | struct ccid2_seq *tx_seqh; | 67 | struct ccid2_seq *tx_seqh; |
57 | struct ccid2_seq *tx_seqt; | 68 | struct ccid2_seq *tx_seqt; |
58 | long tx_rto; | 69 | |
59 | long tx_srtt; | 70 | /* RTT measurement: variables/principles are the same as in TCP */ |
60 | long tx_rttvar; | 71 | u32 tx_srtt, |
61 | unsigned long tx_lastrtt; | 72 | tx_mdev, |
73 | tx_mdev_max, | ||
74 | tx_rttvar, | ||
75 | tx_rto; | ||
76 | u64 tx_rtt_seq:48; | ||
62 | struct timer_list tx_rtotimer; | 77 | struct timer_list tx_rtotimer; |
78 | |||
63 | u64 tx_rpseq; | 79 | u64 tx_rpseq; |
64 | int tx_rpdupack; | 80 | int tx_rpdupack; |
65 | unsigned long tx_last_cong; | 81 | u32 tx_last_cong; |
66 | u64 tx_high_ack; | 82 | u64 tx_high_ack; |
83 | struct list_head tx_av_chunks; | ||
67 | }; | 84 | }; |
68 | 85 | ||
86 | static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) | ||
87 | { | ||
88 | return hc->tx_pipe >= hc->tx_cwnd; | ||
89 | } | ||
90 | |||
69 | struct ccid2_hc_rx_sock { | 91 | struct ccid2_hc_rx_sock { |
70 | int rx_data; | 92 | int rx_data; |
71 | }; | 93 | }; |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 95f752986497..3d604e1349c0 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | |||
54 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | 54 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", |
55 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | 55 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", |
56 | [TFRC_SSTATE_FBACK] = "FBACK", | 56 | [TFRC_SSTATE_FBACK] = "FBACK", |
57 | [TFRC_SSTATE_TERM] = "TERM", | ||
58 | }; | 57 | }; |
59 | 58 | ||
60 | return ccid3_state_names[state]; | 59 | return ccid3_state_names[state]; |
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk) | |||
91 | return scaled_div(w_init << 6, hc->tx_rtt); | 90 | return scaled_div(w_init << 6, hc->tx_rtt); |
92 | } | 91 | } |
93 | 92 | ||
94 | /* | 93 | /** |
95 | * Recalculate t_ipi and delta (should be called whenever X changes) | 94 | * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst |
95 | * This respects the granularity of X_inst (64 * bytes/second). | ||
96 | */ | 96 | */ |
97 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) | 97 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) |
98 | { | 98 | { |
99 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ | ||
100 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); | 99 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); |
101 | 100 | ||
102 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | 101 | ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, |
103 | hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); | 102 | hc->tx_s, (unsigned)(hc->tx_x >> 6)); |
104 | |||
105 | ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi, | ||
106 | hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6)); | ||
107 | } | 103 | } |
108 | 104 | ||
109 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) | 105 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) |
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
211 | ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, | 207 | ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, |
212 | ccid3_tx_state_name(hc->tx_state)); | 208 | ccid3_tx_state_name(hc->tx_state)); |
213 | 209 | ||
210 | /* Ignore and do not restart after leaving the established state */ | ||
211 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) | ||
212 | goto out; | ||
213 | |||
214 | /* Reset feedback state to "no feedback received" */ | ||
214 | if (hc->tx_state == TFRC_SSTATE_FBACK) | 215 | if (hc->tx_state == TFRC_SSTATE_FBACK) |
215 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | 216 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
216 | else if (hc->tx_state != TFRC_SSTATE_NO_FBACK) | ||
217 | goto out; | ||
218 | 217 | ||
219 | /* | 218 | /* |
220 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 | 219 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 |
220 | * RTO is 0 if and only if no feedback has been received yet. | ||
221 | */ | 221 | */ |
222 | if (hc->tx_t_rto == 0 || /* no feedback received yet */ | 222 | if (hc->tx_t_rto == 0 || hc->tx_p == 0) { |
223 | hc->tx_p == 0) { | ||
224 | 223 | ||
225 | /* halve send rate directly */ | 224 | /* halve send rate directly */ |
226 | hc->tx_x = max(hc->tx_x / 2, | 225 | hc->tx_x = max(hc->tx_x / 2, |
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
256 | * Set new timeout for the nofeedback timer. | 255 | * Set new timeout for the nofeedback timer. |
257 | * See comments in packet_recv() regarding the value of t_RTO. | 256 | * See comments in packet_recv() regarding the value of t_RTO. |
258 | */ | 257 | */ |
259 | if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ | 258 | if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */ |
260 | t_nfb = TFRC_INITIAL_TIMEOUT; | 259 | t_nfb = TFRC_INITIAL_TIMEOUT; |
261 | else | 260 | else |
262 | t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); | 261 | t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); |
@@ -269,11 +268,11 @@ out: | |||
269 | sock_put(sk); | 268 | sock_put(sk); |
270 | } | 269 | } |
271 | 270 | ||
272 | /* | 271 | /** |
273 | * returns | 272 | * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets |
274 | * > 0: delay (in msecs) that should pass before actually sending | 273 | * @skb: next packet candidate to send on @sk |
275 | * = 0: can send immediately | 274 | * This function uses the convention of ccid_packet_dequeue_eval() and |
276 | * < 0: error condition; do not send packet | 275 | * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. |
277 | */ | 276 | */ |
278 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | 277 | static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) |
279 | { | 278 | { |
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
290 | if (unlikely(skb->len == 0)) | 289 | if (unlikely(skb->len == 0)) |
291 | return -EBADMSG; | 290 | return -EBADMSG; |
292 | 291 | ||
293 | switch (hc->tx_state) { | 292 | if (hc->tx_state == TFRC_SSTATE_NO_SENT) { |
294 | case TFRC_SSTATE_NO_SENT: | ||
295 | sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + | 293 | sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + |
296 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); | 294 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); |
297 | hc->tx_last_win_count = 0; | 295 | hc->tx_last_win_count = 0; |
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
326 | ccid3_update_send_interval(hc); | 324 | ccid3_update_send_interval(hc); |
327 | 325 | ||
328 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | 326 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
329 | break; | 327 | |
330 | case TFRC_SSTATE_NO_FBACK: | 328 | } else { |
331 | case TFRC_SSTATE_FBACK: | ||
332 | delay = ktime_us_delta(hc->tx_t_nom, now); | 329 | delay = ktime_us_delta(hc->tx_t_nom, now); |
333 | ccid3_pr_debug("delay=%ld\n", (long)delay); | 330 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
334 | /* | 331 | /* |
335 | * Scheduling of packet transmissions [RFC 3448, 4.6] | 332 | * Scheduling of packet transmissions (RFC 5348, 8.3) |
336 | * | 333 | * |
337 | * if (t_now > t_nom - delta) | 334 | * if (t_now > t_nom - delta) |
338 | * // send the packet now | 335 | * // send the packet now |
339 | * else | 336 | * else |
340 | * // send the packet in (t_nom - t_now) milliseconds. | 337 | * // send the packet in (t_nom - t_now) milliseconds. |
341 | */ | 338 | */ |
342 | if (delay - (s64)hc->tx_delta >= 1000) | 339 | if (delay >= TFRC_T_DELTA) |
343 | return (u32)delay / 1000L; | 340 | return (u32)delay / USEC_PER_MSEC; |
344 | 341 | ||
345 | ccid3_hc_tx_update_win_count(hc, now); | 342 | ccid3_hc_tx_update_win_count(hc, now); |
346 | break; | ||
347 | case TFRC_SSTATE_TERM: | ||
348 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
349 | return -EINVAL; | ||
350 | } | 343 | } |
351 | 344 | ||
352 | /* prepare to send now (add options etc.) */ | 345 | /* prepare to send now (add options etc.) */ |
@@ -355,11 +348,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
355 | 348 | ||
356 | /* set the nominal send time for the next following packet */ | 349 | /* set the nominal send time for the next following packet */ |
357 | hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); | 350 | hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); |
358 | return 0; | 351 | return CCID_PACKET_SEND_AT_ONCE; |
359 | } | 352 | } |
360 | 353 | ||
361 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, | 354 | static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
362 | unsigned int len) | ||
363 | { | 355 | { |
364 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 356 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
365 | 357 | ||
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, | |||
372 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 364 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
373 | { | 365 | { |
374 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 366 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
375 | struct ccid3_options_received *opt_recv; | 367 | struct tfrc_tx_hist_entry *acked; |
376 | ktime_t now; | 368 | ktime_t now; |
377 | unsigned long t_nfb; | 369 | unsigned long t_nfb; |
378 | u32 pinv, r_sample; | 370 | u32 r_sample; |
379 | 371 | ||
380 | /* we are only interested in ACKs */ | 372 | /* we are only interested in ACKs */ |
381 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | 373 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || |
382 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | 374 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) |
383 | return; | 375 | return; |
384 | /* ... and only in the established state */ | ||
385 | if (hc->tx_state != TFRC_SSTATE_FBACK && | ||
386 | hc->tx_state != TFRC_SSTATE_NO_FBACK) | ||
387 | return; | ||
388 | |||
389 | opt_recv = &hc->tx_options_received; | ||
390 | now = ktime_get_real(); | ||
391 | |||
392 | /* Estimate RTT from history if ACK number is valid */ | ||
393 | r_sample = tfrc_tx_hist_rtt(hc->tx_hist, | ||
394 | DCCP_SKB_CB(skb)->dccpd_ack_seq, now); | ||
395 | if (r_sample == 0) { | ||
396 | DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, | ||
397 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), | ||
398 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
399 | return; | ||
400 | } | ||
401 | |||
402 | /* Update receive rate in units of 64 * bytes/second */ | ||
403 | hc->tx_x_recv = opt_recv->ccid3or_receive_rate; | ||
404 | hc->tx_x_recv <<= 6; | ||
405 | |||
406 | /* Update loss event rate (which is scaled by 1e6) */ | ||
407 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
408 | if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ | ||
409 | hc->tx_p = 0; | ||
410 | else /* can not exceed 100% */ | ||
411 | hc->tx_p = scaled_div(1, pinv); | ||
412 | /* | 376 | /* |
413 | * Validate new RTT sample and update moving average | 377 | * Locate the acknowledged packet in the TX history. |
378 | * | ||
379 | * Returning "entry not found" here can for instance happen when | ||
380 | * - the host has not sent out anything (e.g. a passive server), | ||
381 | * - the Ack is outdated (packet with higher Ack number was received), | ||
382 | * - it is a bogus Ack (for a packet not sent on this connection). | ||
414 | */ | 383 | */ |
415 | r_sample = dccp_sample_rtt(sk, r_sample); | 384 | acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb)); |
385 | if (acked == NULL) | ||
386 | return; | ||
387 | /* For the sake of RTT sampling, ignore/remove all older entries */ | ||
388 | tfrc_tx_hist_purge(&acked->next); | ||
389 | |||
390 | /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ | ||
391 | now = ktime_get_real(); | ||
392 | r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); | ||
416 | hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); | 393 | hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); |
394 | |||
417 | /* | 395 | /* |
418 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 | 396 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 |
419 | */ | 397 | */ |
@@ -461,13 +439,12 @@ done_computing_x: | |||
461 | sk->sk_write_space(sk); | 439 | sk->sk_write_space(sk); |
462 | 440 | ||
463 | /* | 441 | /* |
464 | * Update timeout interval for the nofeedback timer. | 442 | * Update timeout interval for the nofeedback timer. In order to control |
465 | * We use a configuration option to increase the lower bound. | 443 | * rate halving on networks with very low RTTs (<= 1 ms), use per-route |
466 | * This can help avoid triggering the nofeedback timer too | 444 | * tunable RTAX_RTO_MIN value as the lower bound. |
467 | * often ('spinning') on LANs with small RTTs. | ||
468 | */ | 445 | */ |
469 | hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * | 446 | hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, |
470 | (USEC_PER_SEC / 1000))); | 447 | USEC_PER_SEC/HZ * tcp_rto_min(sk)); |
471 | /* | 448 | /* |
472 | * Schedule no feedback timer to expire in | 449 | * Schedule no feedback timer to expire in |
473 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | 450 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) |
@@ -482,66 +459,41 @@ done_computing_x: | |||
482 | jiffies + usecs_to_jiffies(t_nfb)); | 459 | jiffies + usecs_to_jiffies(t_nfb)); |
483 | } | 460 | } |
484 | 461 | ||
485 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | 462 | static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, |
486 | unsigned char len, u16 idx, | 463 | u8 option, u8 *optval, u8 optlen) |
487 | unsigned char *value) | ||
488 | { | 464 | { |
489 | int rc = 0; | ||
490 | const struct dccp_sock *dp = dccp_sk(sk); | ||
491 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 465 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
492 | struct ccid3_options_received *opt_recv; | ||
493 | __be32 opt_val; | 466 | __be32 opt_val; |
494 | 467 | ||
495 | opt_recv = &hc->tx_options_received; | ||
496 | |||
497 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
498 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
499 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
500 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
501 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
502 | opt_recv->ccid3or_receive_rate = 0; | ||
503 | } | ||
504 | |||
505 | switch (option) { | 468 | switch (option) { |
469 | case TFRC_OPT_RECEIVE_RATE: | ||
506 | case TFRC_OPT_LOSS_EVENT_RATE: | 470 | case TFRC_OPT_LOSS_EVENT_RATE: |
507 | if (unlikely(len != 4)) { | 471 | /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ |
508 | DCCP_WARN("%s(%p), invalid len %d " | 472 | if (packet_type == DCCP_PKT_DATA) |
509 | "for TFRC_OPT_LOSS_EVENT_RATE\n", | 473 | break; |
510 | dccp_role(sk), sk, len); | 474 | if (unlikely(optlen != 4)) { |
511 | rc = -EINVAL; | 475 | DCCP_WARN("%s(%p), invalid len %d for %u\n", |
512 | } else { | 476 | dccp_role(sk), sk, optlen, option); |
513 | opt_val = get_unaligned((__be32 *)value); | 477 | return -EINVAL; |
514 | opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); | ||
515 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", | ||
516 | dccp_role(sk), sk, | ||
517 | opt_recv->ccid3or_loss_event_rate); | ||
518 | } | 478 | } |
519 | break; | 479 | opt_val = ntohl(get_unaligned((__be32 *)optval)); |
520 | case TFRC_OPT_LOSS_INTERVALS: | 480 | |
521 | opt_recv->ccid3or_loss_intervals_idx = idx; | 481 | if (option == TFRC_OPT_RECEIVE_RATE) { |
522 | opt_recv->ccid3or_loss_intervals_len = len; | 482 | /* Receive Rate is kept in units of 64 bytes/second */ |
523 | ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", | 483 | hc->tx_x_recv = opt_val; |
524 | dccp_role(sk), sk, | 484 | hc->tx_x_recv <<= 6; |
525 | opt_recv->ccid3or_loss_intervals_idx, | 485 | |
526 | opt_recv->ccid3or_loss_intervals_len); | ||
527 | break; | ||
528 | case TFRC_OPT_RECEIVE_RATE: | ||
529 | if (unlikely(len != 4)) { | ||
530 | DCCP_WARN("%s(%p), invalid len %d " | ||
531 | "for TFRC_OPT_RECEIVE_RATE\n", | ||
532 | dccp_role(sk), sk, len); | ||
533 | rc = -EINVAL; | ||
534 | } else { | ||
535 | opt_val = get_unaligned((__be32 *)value); | ||
536 | opt_recv->ccid3or_receive_rate = ntohl(opt_val); | ||
537 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", | 486 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", |
538 | dccp_role(sk), sk, | 487 | dccp_role(sk), sk, opt_val); |
539 | opt_recv->ccid3or_receive_rate); | 488 | } else { |
489 | /* Update the fixpoint Loss Event Rate fraction */ | ||
490 | hc->tx_p = tfrc_invert_loss_event_rate(opt_val); | ||
491 | |||
492 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", | ||
493 | dccp_role(sk), sk, opt_val); | ||
540 | } | 494 | } |
541 | break; | ||
542 | } | 495 | } |
543 | 496 | return 0; | |
544 | return rc; | ||
545 | } | 497 | } |
546 | 498 | ||
547 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) | 499 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk) | |||
559 | { | 511 | { |
560 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 512 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
561 | 513 | ||
562 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
563 | sk_stop_timer(sk, &hc->tx_no_feedback_timer); | 514 | sk_stop_timer(sk, &hc->tx_no_feedback_timer); |
564 | |||
565 | tfrc_tx_hist_purge(&hc->tx_hist); | 515 | tfrc_tx_hist_purge(&hc->tx_hist); |
566 | } | 516 | } |
567 | 517 | ||
568 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | 518 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) |
569 | { | 519 | { |
570 | struct ccid3_hc_tx_sock *hc; | 520 | info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto; |
571 | 521 | info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt; | |
572 | /* Listen socks doesn't have a private CCID block */ | ||
573 | if (sk->sk_state == DCCP_LISTEN) | ||
574 | return; | ||
575 | |||
576 | hc = ccid3_hc_tx_sk(sk); | ||
577 | info->tcpi_rto = hc->tx_t_rto; | ||
578 | info->tcpi_rtt = hc->tx_rtt; | ||
579 | } | 522 | } |
580 | 523 | ||
581 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | 524 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, |
582 | u32 __user *optval, int __user *optlen) | 525 | u32 __user *optval, int __user *optlen) |
583 | { | 526 | { |
584 | const struct ccid3_hc_tx_sock *hc; | 527 | const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
528 | struct tfrc_tx_info tfrc; | ||
585 | const void *val; | 529 | const void *val; |
586 | 530 | ||
587 | /* Listen socks doesn't have a private CCID block */ | ||
588 | if (sk->sk_state == DCCP_LISTEN) | ||
589 | return -EINVAL; | ||
590 | |||
591 | hc = ccid3_hc_tx_sk(sk); | ||
592 | switch (optname) { | 531 | switch (optname) { |
593 | case DCCP_SOCKOPT_CCID_TX_INFO: | 532 | case DCCP_SOCKOPT_CCID_TX_INFO: |
594 | if (len < sizeof(hc->tx_tfrc)) | 533 | if (len < sizeof(tfrc)) |
595 | return -EINVAL; | 534 | return -EINVAL; |
596 | len = sizeof(hc->tx_tfrc); | 535 | tfrc.tfrctx_x = hc->tx_x; |
597 | val = &hc->tx_tfrc; | 536 | tfrc.tfrctx_x_recv = hc->tx_x_recv; |
537 | tfrc.tfrctx_x_calc = hc->tx_x_calc; | ||
538 | tfrc.tfrctx_rtt = hc->tx_rtt; | ||
539 | tfrc.tfrctx_p = hc->tx_p; | ||
540 | tfrc.tfrctx_rto = hc->tx_t_rto; | ||
541 | tfrc.tfrctx_ipi = hc->tx_t_ipi; | ||
542 | len = sizeof(tfrc); | ||
543 | val = &tfrc; | ||
598 | break; | 544 | break; |
599 | default: | 545 | default: |
600 | return -ENOPROTOOPT; | 546 | return -ENOPROTOOPT; |
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | |||
624 | static const char *const ccid3_rx_state_names[] = { | 570 | static const char *const ccid3_rx_state_names[] = { |
625 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | 571 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", |
626 | [TFRC_RSTATE_DATA] = "DATA", | 572 | [TFRC_RSTATE_DATA] = "DATA", |
627 | [TFRC_RSTATE_TERM] = "TERM", | ||
628 | }; | 573 | }; |
629 | 574 | ||
630 | return ccid3_rx_state_names[state]; | 575 | return ccid3_rx_state_names[state]; |
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, | |||
650 | { | 595 | { |
651 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); | 596 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
652 | struct dccp_sock *dp = dccp_sk(sk); | 597 | struct dccp_sock *dp = dccp_sk(sk); |
653 | ktime_t now; | 598 | ktime_t now = ktime_get_real(); |
654 | s64 delta = 0; | 599 | s64 delta = 0; |
655 | 600 | ||
656 | if (unlikely(hc->rx_state == TFRC_RSTATE_TERM)) | ||
657 | return; | ||
658 | |||
659 | now = ktime_get_real(); | ||
660 | |||
661 | switch (fbtype) { | 601 | switch (fbtype) { |
662 | case CCID3_FBACK_INITIAL: | 602 | case CCID3_FBACK_INITIAL: |
663 | hc->rx_x_recv = 0; | 603 | hc->rx_x_recv = 0; |
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, | |||
701 | 641 | ||
702 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | 642 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) |
703 | { | 643 | { |
704 | const struct ccid3_hc_rx_sock *hc; | 644 | const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
705 | __be32 x_recv, pinv; | 645 | __be32 x_recv, pinv; |
706 | 646 | ||
707 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | 647 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) |
708 | return 0; | 648 | return 0; |
709 | 649 | ||
710 | hc = ccid3_hc_rx_sk(sk); | ||
711 | |||
712 | if (dccp_packet_without_ack(skb)) | 650 | if (dccp_packet_without_ack(skb)) |
713 | return 0; | 651 | return 0; |
714 | 652 | ||
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk) | |||
749 | x_recv = scaled_div32(hc->rx_bytes_recv, delta); | 687 | x_recv = scaled_div32(hc->rx_bytes_recv, delta); |
750 | if (x_recv == 0) { /* would also trigger divide-by-zero */ | 688 | if (x_recv == 0) { /* would also trigger divide-by-zero */ |
751 | DCCP_WARN("X_recv==0\n"); | 689 | DCCP_WARN("X_recv==0\n"); |
752 | if ((x_recv = hc->rx_x_recv) == 0) { | 690 | if (hc->rx_x_recv == 0) { |
753 | DCCP_BUG("stored value of X_recv is zero"); | 691 | DCCP_BUG("stored value of X_recv is zero"); |
754 | return ~0U; | 692 | return ~0U; |
755 | } | 693 | } |
694 | x_recv = hc->rx_x_recv; | ||
756 | } | 695 | } |
757 | 696 | ||
758 | fval = scaled_div(hc->rx_s, hc->rx_rtt); | 697 | fval = scaled_div(hc->rx_s, hc->rx_rtt); |
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk) | |||
862 | { | 801 | { |
863 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); | 802 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
864 | 803 | ||
865 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
866 | |||
867 | tfrc_rx_hist_purge(&hc->rx_hist); | 804 | tfrc_rx_hist_purge(&hc->rx_hist); |
868 | tfrc_lh_cleanup(&hc->rx_li_hist); | 805 | tfrc_lh_cleanup(&hc->rx_li_hist); |
869 | } | 806 | } |
870 | 807 | ||
871 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | 808 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) |
872 | { | 809 | { |
873 | const struct ccid3_hc_rx_sock *hc; | 810 | info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state; |
874 | |||
875 | /* Listen socks doesn't have a private CCID block */ | ||
876 | if (sk->sk_state == DCCP_LISTEN) | ||
877 | return; | ||
878 | |||
879 | hc = ccid3_hc_rx_sk(sk); | ||
880 | info->tcpi_ca_state = hc->rx_state; | ||
881 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | 811 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; |
882 | info->tcpi_rcv_rtt = hc->rx_rtt; | 812 | info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt; |
883 | } | 813 | } |
884 | 814 | ||
885 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | 815 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, |
886 | u32 __user *optval, int __user *optlen) | 816 | u32 __user *optval, int __user *optlen) |
887 | { | 817 | { |
888 | const struct ccid3_hc_rx_sock *hc; | 818 | const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
889 | struct tfrc_rx_info rx_info; | 819 | struct tfrc_rx_info rx_info; |
890 | const void *val; | 820 | const void *val; |
891 | 821 | ||
892 | /* Listen socks doesn't have a private CCID block */ | ||
893 | if (sk->sk_state == DCCP_LISTEN) | ||
894 | return -EINVAL; | ||
895 | |||
896 | hc = ccid3_hc_rx_sk(sk); | ||
897 | switch (optname) { | 822 | switch (optname) { |
898 | case DCCP_SOCKOPT_CCID_RX_INFO: | 823 | case DCCP_SOCKOPT_CCID_RX_INFO: |
899 | if (len < sizeof(rx_info)) | 824 | if (len < sizeof(rx_info)) |
900 | return -EINVAL; | 825 | return -EINVAL; |
901 | rx_info.tfrcrx_x_recv = hc->rx_x_recv; | 826 | rx_info.tfrcrx_x_recv = hc->rx_x_recv; |
902 | rx_info.tfrcrx_rtt = hc->rx_rtt; | 827 | rx_info.tfrcrx_rtt = hc->rx_rtt; |
903 | rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : | 828 | rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv); |
904 | scaled_div(1, hc->rx_pinv); | ||
905 | len = sizeof(rx_info); | 829 | len = sizeof(rx_info); |
906 | val = &rx_info; | 830 | val = &rx_info; |
907 | break; | 831 | break; |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 032635776653..1a9933c29672 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -42,35 +42,36 @@ | |||
42 | #include "lib/tfrc.h" | 42 | #include "lib/tfrc.h" |
43 | #include "../ccid.h" | 43 | #include "../ccid.h" |
44 | 44 | ||
45 | /* Two seconds as per RFC 3448 4.2 */ | 45 | /* Two seconds as per RFC 5348, 4.2 */ |
46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | 46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) |
47 | 47 | ||
48 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
49 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) | ||
50 | |||
51 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ | 48 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ |
52 | #define TFRC_T_MBI 64 | 49 | #define TFRC_T_MBI 64 |
53 | 50 | ||
51 | /* | ||
52 | * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are | ||
53 | * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. | ||
54 | * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse | ||
55 | * resolution of HZ < 500 means that the error is below one timer tick (t_gran) | ||
56 | * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). | ||
57 | */ | ||
58 | #if (HZ >= 500) | ||
59 | # define TFRC_T_DELTA USEC_PER_MSEC | ||
60 | #else | ||
61 | # define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) | ||
62 | #endif | ||
63 | |||
54 | enum ccid3_options { | 64 | enum ccid3_options { |
55 | TFRC_OPT_LOSS_EVENT_RATE = 192, | 65 | TFRC_OPT_LOSS_EVENT_RATE = 192, |
56 | TFRC_OPT_LOSS_INTERVALS = 193, | 66 | TFRC_OPT_LOSS_INTERVALS = 193, |
57 | TFRC_OPT_RECEIVE_RATE = 194, | 67 | TFRC_OPT_RECEIVE_RATE = 194, |
58 | }; | 68 | }; |
59 | 69 | ||
60 | struct ccid3_options_received { | ||
61 | u64 ccid3or_seqno:48, | ||
62 | ccid3or_loss_intervals_idx:16; | ||
63 | u16 ccid3or_loss_intervals_len; | ||
64 | u32 ccid3or_loss_event_rate; | ||
65 | u32 ccid3or_receive_rate; | ||
66 | }; | ||
67 | |||
68 | /* TFRC sender states */ | 70 | /* TFRC sender states */ |
69 | enum ccid3_hc_tx_states { | 71 | enum ccid3_hc_tx_states { |
70 | TFRC_SSTATE_NO_SENT = 1, | 72 | TFRC_SSTATE_NO_SENT = 1, |
71 | TFRC_SSTATE_NO_FBACK, | 73 | TFRC_SSTATE_NO_FBACK, |
72 | TFRC_SSTATE_FBACK, | 74 | TFRC_SSTATE_FBACK, |
73 | TFRC_SSTATE_TERM, | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | /** | 77 | /** |
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states { | |||
90 | * @tx_no_feedback_timer: Handle to no feedback timer | 91 | * @tx_no_feedback_timer: Handle to no feedback timer |
91 | * @tx_t_ld: Time last doubled during slow start | 92 | * @tx_t_ld: Time last doubled during slow start |
92 | * @tx_t_nom: Nominal send time of next packet | 93 | * @tx_t_nom: Nominal send time of next packet |
93 | * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs | ||
94 | * @tx_hist: Packet history | 94 | * @tx_hist: Packet history |
95 | * @tx_options_received: Parsed set of retrieved options | ||
96 | */ | 95 | */ |
97 | struct ccid3_hc_tx_sock { | 96 | struct ccid3_hc_tx_sock { |
98 | struct tfrc_tx_info tx_tfrc; | 97 | u64 tx_x; |
99 | #define tx_x tx_tfrc.tfrctx_x | 98 | u64 tx_x_recv; |
100 | #define tx_x_recv tx_tfrc.tfrctx_x_recv | 99 | u32 tx_x_calc; |
101 | #define tx_x_calc tx_tfrc.tfrctx_x_calc | 100 | u32 tx_rtt; |
102 | #define tx_rtt tx_tfrc.tfrctx_rtt | 101 | u32 tx_p; |
103 | #define tx_p tx_tfrc.tfrctx_p | 102 | u32 tx_t_rto; |
104 | #define tx_t_rto tx_tfrc.tfrctx_rto | 103 | u32 tx_t_ipi; |
105 | #define tx_t_ipi tx_tfrc.tfrctx_ipi | ||
106 | u16 tx_s; | 104 | u16 tx_s; |
107 | enum ccid3_hc_tx_states tx_state:8; | 105 | enum ccid3_hc_tx_states tx_state:8; |
108 | u8 tx_last_win_count; | 106 | u8 tx_last_win_count; |
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock { | |||
110 | struct timer_list tx_no_feedback_timer; | 108 | struct timer_list tx_no_feedback_timer; |
111 | ktime_t tx_t_ld; | 109 | ktime_t tx_t_ld; |
112 | ktime_t tx_t_nom; | 110 | ktime_t tx_t_nom; |
113 | u32 tx_delta; | ||
114 | struct tfrc_tx_hist_entry *tx_hist; | 111 | struct tfrc_tx_hist_entry *tx_hist; |
115 | struct ccid3_options_received tx_options_received; | ||
116 | }; | 112 | }; |
117 | 113 | ||
118 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | 114 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) |
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | |||
126 | enum ccid3_hc_rx_states { | 122 | enum ccid3_hc_rx_states { |
127 | TFRC_RSTATE_NO_DATA = 1, | 123 | TFRC_RSTATE_NO_DATA = 1, |
128 | TFRC_RSTATE_DATA, | 124 | TFRC_RSTATE_DATA, |
129 | TFRC_RSTATE_TERM = 127, | ||
130 | }; | 125 | }; |
131 | 126 | ||
132 | /** | 127 | /** |
133 | * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket | 128 | * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket |
134 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3) | ||
135 | * @rx_rtt: Receiver estimate of rtt (non-standard) | ||
136 | * @rx_p: Current loss event rate (RFC 3448 5.4) | ||
137 | * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) | 129 | * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) |
138 | * @rx_state: Receiver state, one of %ccid3_hc_rx_states | 130 | * @rx_state: Receiver state, one of %ccid3_hc_rx_states |
139 | * @rx_bytes_recv: Total sum of DCCP payload bytes | 131 | * @rx_bytes_recv: Total sum of DCCP payload bytes |
140 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) | 132 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) |
141 | * @rx_rtt: Receiver estimate of RTT | 133 | * @rx_rtt: Receiver estimate of RTT |
142 | * @rx_tstamp_last_feedback: Time at which last feedback was sent | 134 | * @rx_tstamp_last_feedback: Time at which last feedback was sent |
143 | * @rx_tstamp_last_ack: Time at which last feedback was sent | ||
144 | * @rx_hist: Packet history (loss detection + RTT sampling) | 135 | * @rx_hist: Packet history (loss detection + RTT sampling) |
145 | * @rx_li_hist: Loss Interval database | 136 | * @rx_li_hist: Loss Interval database |
146 | * @rx_s: Received packet size in bytes | 137 | * @rx_s: Received packet size in bytes |
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 8fc3cbf79071..497723c4d4bb 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) | |||
116 | cur->li_length = len; | 116 | cur->li_length = len; |
117 | tfrc_lh_calc_i_mean(lh); | 117 | tfrc_lh_calc_i_mean(lh); |
118 | 118 | ||
119 | return (lh->i_mean < old_i_mean); | 119 | return lh->i_mean < old_i_mean; |
120 | } | 120 | } |
121 | 121 | ||
122 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ | 122 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ |
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 3a4f414e94a0..de8fe294bf0b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -38,18 +38,6 @@ | |||
38 | #include "packet_history.h" | 38 | #include "packet_history.h" |
39 | #include "../../dccp.h" | 39 | #include "../../dccp.h" |
40 | 40 | ||
41 | /** | ||
42 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
43 | * @next: next oldest entry (LIFO order) | ||
44 | * @seqno: sequence number of this entry | ||
45 | * @stamp: send time of packet with sequence number @seqno | ||
46 | */ | ||
47 | struct tfrc_tx_hist_entry { | ||
48 | struct tfrc_tx_hist_entry *next; | ||
49 | u64 seqno; | ||
50 | ktime_t stamp; | ||
51 | }; | ||
52 | |||
53 | /* | 41 | /* |
54 | * Transmitter History Routines | 42 | * Transmitter History Routines |
55 | */ | 43 | */ |
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void) | |||
71 | } | 59 | } |
72 | } | 60 | } |
73 | 61 | ||
74 | static struct tfrc_tx_hist_entry * | ||
75 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
76 | { | ||
77 | while (head != NULL && head->seqno != seqno) | ||
78 | head = head->next; | ||
79 | |||
80 | return head; | ||
81 | } | ||
82 | |||
83 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) | 62 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) |
84 | { | 63 | { |
85 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); | 64 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); |
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) | |||
107 | *headp = NULL; | 86 | *headp = NULL; |
108 | } | 87 | } |
109 | 88 | ||
110 | u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, | ||
111 | const ktime_t now) | ||
112 | { | ||
113 | u32 rtt = 0; | ||
114 | struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); | ||
115 | |||
116 | if (packet != NULL) { | ||
117 | rtt = ktime_us_delta(now, packet->stamp); | ||
118 | /* | ||
119 | * Garbage-collect older (irrelevant) entries: | ||
120 | */ | ||
121 | tfrc_tx_hist_purge(&packet->next); | ||
122 | } | ||
123 | |||
124 | return rtt; | ||
125 | } | ||
126 | |||
127 | |||
128 | /* | 89 | /* |
129 | * Receiver History Routines | 90 | * Receiver History Routines |
130 | */ | 91 | */ |
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 7df6c5299999..7ee4a9d9d335 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -40,12 +40,28 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include "tfrc.h" | 41 | #include "tfrc.h" |
42 | 42 | ||
43 | struct tfrc_tx_hist_entry; | 43 | /** |
44 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
45 | * @next: next oldest entry (LIFO order) | ||
46 | * @seqno: sequence number of this entry | ||
47 | * @stamp: send time of packet with sequence number @seqno | ||
48 | */ | ||
49 | struct tfrc_tx_hist_entry { | ||
50 | struct tfrc_tx_hist_entry *next; | ||
51 | u64 seqno; | ||
52 | ktime_t stamp; | ||
53 | }; | ||
54 | |||
55 | static inline struct tfrc_tx_hist_entry * | ||
56 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
57 | { | ||
58 | while (head != NULL && head->seqno != seqno) | ||
59 | head = head->next; | ||
60 | return head; | ||
61 | } | ||
44 | 62 | ||
45 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); | 63 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); |
46 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); | 64 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); |
47 | extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, | ||
48 | const u64 seqno, const ktime_t now); | ||
49 | 65 | ||
50 | /* Subtraction a-b modulo-16, respects circular wrap-around */ | 66 | /* Subtraction a-b modulo-16, respects circular wrap-around */ |
51 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) | 67 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) |
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 01bb48e96c2e..f8ee3f549770 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) | |||
57 | 57 | ||
58 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | 58 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); |
59 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | 59 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); |
60 | extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); | ||
60 | 61 | ||
61 | extern int tfrc_tx_packet_history_init(void); | 62 | extern int tfrc_tx_packet_history_init(void); |
62 | extern void tfrc_tx_packet_history_exit(void); | 63 | extern void tfrc_tx_packet_history_exit(void); |
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 22ca1cf0eb55..a052a4377e26 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c | |||
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) | |||
687 | index = tfrc_binsearch(fvalue, 0); | 687 | index = tfrc_binsearch(fvalue, 0); |
688 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; | 688 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; |
689 | } | 689 | } |
690 | |||
691 | /** | ||
692 | * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% | ||
693 | * When @loss_event_rate is large, there is a chance that p is truncated to 0. | ||
694 | * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. | ||
695 | */ | ||
696 | u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) | ||
697 | { | ||
698 | if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ | ||
699 | return 0; | ||
700 | if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ | ||
701 | return 1000000; | ||
702 | return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); | ||
703 | } | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3ccef1b70fee..5fdb07229017 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -93,9 +93,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); | |||
93 | #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) | 93 | #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) |
94 | #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) | 94 | #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) |
95 | 95 | ||
96 | /* Maximal interval between probes for local resources. */ | ||
97 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
98 | |||
99 | /* sysctl variables for DCCP */ | 96 | /* sysctl variables for DCCP */ |
100 | extern int sysctl_dccp_request_retries; | 97 | extern int sysctl_dccp_request_retries; |
101 | extern int sysctl_dccp_retries1; | 98 | extern int sysctl_dccp_retries1; |
@@ -153,18 +150,27 @@ static inline u64 max48(const u64 seq1, const u64 seq2) | |||
153 | } | 150 | } |
154 | 151 | ||
155 | /** | 152 | /** |
156 | * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 | 153 | * dccp_loss_count - Approximate the number of lost data packets in a burst loss |
157 | * @s1: start sequence number | 154 | * @s1: last known sequence number before the loss ('hole') |
158 | * @s2: end sequence number | 155 | * @s2: first sequence number seen after the 'hole' |
159 | * @ndp: NDP count on packet with sequence number @s2 | 156 | * @ndp: NDP count on packet with sequence number @s2 |
160 | * Returns true if the sequence range s1...s2 has no data loss. | ||
161 | */ | 157 | */ |
162 | static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) | 158 | static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp) |
163 | { | 159 | { |
164 | s64 delta = dccp_delta_seqno(s1, s2); | 160 | s64 delta = dccp_delta_seqno(s1, s2); |
165 | 161 | ||
166 | WARN_ON(delta < 0); | 162 | WARN_ON(delta < 0); |
167 | return (u64)delta <= ndp + 1; | 163 | delta -= ndp + 1; |
164 | |||
165 | return delta > 0 ? delta : 0; | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1 | ||
170 | */ | ||
171 | static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) | ||
172 | { | ||
173 | return dccp_loss_count(s1, s2, ndp) == 0; | ||
168 | } | 174 | } |
169 | 175 | ||
170 | enum { | 176 | enum { |
@@ -194,12 +200,7 @@ struct dccp_mib { | |||
194 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | 200 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); |
195 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | 201 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) |
196 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | 202 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) |
197 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
198 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | 203 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) |
199 | #define DCCP_ADD_STATS_BH(field, val) \ | ||
200 | SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
201 | #define DCCP_ADD_STATS_USER(field, val) \ | ||
202 | SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
203 | 204 | ||
204 | /* | 205 | /* |
205 | * Checksumming routines | 206 | * Checksumming routines |
@@ -234,8 +235,22 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
234 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 235 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
235 | const enum dccp_pkt_type pkt_type); | 236 | const enum dccp_pkt_type pkt_type); |
236 | 237 | ||
237 | extern void dccp_write_xmit(struct sock *sk, int block); | 238 | /* |
238 | extern void dccp_write_space(struct sock *sk); | 239 | * TX Packet Dequeueing Interface |
240 | */ | ||
241 | extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); | ||
242 | extern bool dccp_qpolicy_full(struct sock *sk); | ||
243 | extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); | ||
244 | extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); | ||
245 | extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); | ||
246 | extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); | ||
247 | |||
248 | /* | ||
249 | * TX Packet Output and TX Timers | ||
250 | */ | ||
251 | extern void dccp_write_xmit(struct sock *sk); | ||
252 | extern void dccp_write_space(struct sock *sk); | ||
253 | extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); | ||
239 | 254 | ||
240 | extern void dccp_init_xmit_timers(struct sock *sk); | 255 | extern void dccp_init_xmit_timers(struct sock *sk); |
241 | static inline void dccp_clear_xmit_timers(struct sock *sk) | 256 | static inline void dccp_clear_xmit_timers(struct sock *sk) |
@@ -246,7 +261,6 @@ static inline void dccp_clear_xmit_timers(struct sock *sk) | |||
246 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | 261 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); |
247 | 262 | ||
248 | extern const char *dccp_packet_name(const int type); | 263 | extern const char *dccp_packet_name(const int type); |
249 | extern const char *dccp_state_name(const int state); | ||
250 | 264 | ||
251 | extern void dccp_set_state(struct sock *sk, const int state); | 265 | extern void dccp_set_state(struct sock *sk, const int state); |
252 | extern void dccp_done(struct sock *sk); | 266 | extern void dccp_done(struct sock *sk); |
@@ -412,9 +426,27 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) | |||
412 | { | 426 | { |
413 | struct dccp_sock *dp = dccp_sk(sk); | 427 | struct dccp_sock *dp = dccp_sk(sk); |
414 | 428 | ||
415 | dp->dccps_gsr = seq; | 429 | if (after48(seq, dp->dccps_gsr)) |
430 | dp->dccps_gsr = seq; | ||
416 | /* Sequence validity window depends on remote Sequence Window (7.5.1) */ | 431 | /* Sequence validity window depends on remote Sequence Window (7.5.1) */ |
417 | dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); | 432 | dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); |
433 | /* | ||
434 | * Adjust SWL so that it is not below ISR. In contrast to RFC 4340, | ||
435 | * 7.5.1 we perform this check beyond the initial handshake: W/W' are | ||
436 | * always > 32, so for the first W/W' packets in the lifetime of a | ||
437 | * connection we always have to adjust SWL. | ||
438 | * A second reason why we are doing this is that the window depends on | ||
439 | * the feature-remote value of Sequence Window: nothing stops the peer | ||
440 | * from updating this value while we are busy adjusting SWL for the | ||
441 | * first W packets (we would have to count from scratch again then). | ||
442 | * Therefore it is safer to always make sure that the Sequence Window | ||
443 | * is not artificially extended by a peer who grows SWL downwards by | ||
444 | * continually updating the feature-remote Sequence-Window. | ||
445 | * If sequence numbers wrap it is bad luck. But that will take a while | ||
446 | * (48 bit), and this measure prevents Sequence-number attacks. | ||
447 | */ | ||
448 | if (before48(dp->dccps_swl, dp->dccps_isr)) | ||
449 | dp->dccps_swl = dp->dccps_isr; | ||
418 | dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); | 450 | dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); |
419 | } | 451 | } |
420 | 452 | ||
@@ -425,16 +457,21 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) | |||
425 | dp->dccps_gss = seq; | 457 | dp->dccps_gss = seq; |
426 | /* Ack validity window depends on local Sequence Window value (7.5.1) */ | 458 | /* Ack validity window depends on local Sequence Window value (7.5.1) */ |
427 | dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); | 459 | dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); |
460 | /* Adjust AWL so that it is not below ISS - see comment above for SWL */ | ||
461 | if (before48(dp->dccps_awl, dp->dccps_iss)) | ||
462 | dp->dccps_awl = dp->dccps_iss; | ||
428 | dp->dccps_awh = dp->dccps_gss; | 463 | dp->dccps_awh = dp->dccps_gss; |
429 | } | 464 | } |
430 | 465 | ||
466 | static inline int dccp_ackvec_pending(const struct sock *sk) | ||
467 | { | ||
468 | return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL && | ||
469 | !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec); | ||
470 | } | ||
471 | |||
431 | static inline int dccp_ack_pending(const struct sock *sk) | 472 | static inline int dccp_ack_pending(const struct sock *sk) |
432 | { | 473 | { |
433 | const struct dccp_sock *dp = dccp_sk(sk); | 474 | return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); |
434 | return dp->dccps_timestamp_echo != 0 || | ||
435 | (dp->dccps_hc_rx_ackvec != NULL && | ||
436 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || | ||
437 | inet_csk_ack_scheduled(sk); | ||
438 | } | 475 | } |
439 | 476 | ||
440 | extern int dccp_feat_finalise_settings(struct dccp_sock *dp); | 477 | extern int dccp_feat_finalise_settings(struct dccp_sock *dp); |
@@ -449,7 +486,6 @@ extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); | |||
449 | extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); | 486 | extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); |
450 | extern u32 dccp_timestamp(void); | 487 | extern u32 dccp_timestamp(void); |
451 | extern void dccp_timestamping_init(void); | 488 | extern void dccp_timestamping_init(void); |
452 | extern int dccp_insert_option_timestamp(struct sk_buff *skb); | ||
453 | extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, | 489 | extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, |
454 | const void *value, unsigned char len); | 490 | const void *value, unsigned char len); |
455 | 491 | ||
diff --git a/net/dccp/feat.c b/net/dccp/feat.c index df7dd26cf07e..568def952722 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c | |||
@@ -730,16 +730,6 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, | |||
730 | 0, list, len); | 730 | 0, list, len); |
731 | } | 731 | } |
732 | 732 | ||
733 | /* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ | ||
734 | int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) | ||
735 | { | ||
736 | /* any changes must be registered before establishing the connection */ | ||
737 | if (sk->sk_state != DCCP_CLOSED) | ||
738 | return -EISCONN; | ||
739 | if (dccp_feat_type(feat) != FEAT_NN) | ||
740 | return -EINVAL; | ||
741 | return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); | ||
742 | } | ||
743 | 733 | ||
744 | /* | 734 | /* |
745 | * Tracking features whose value depend on the choice of CCID | 735 | * Tracking features whose value depend on the choice of CCID |
diff --git a/net/dccp/feat.h b/net/dccp/feat.h index f96721619def..e56a4e5e634e 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h | |||
@@ -111,7 +111,6 @@ extern int dccp_feat_init(struct sock *sk); | |||
111 | extern void dccp_feat_initialise_sysctls(void); | 111 | extern void dccp_feat_initialise_sysctls(void); |
112 | extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, | 112 | extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, |
113 | u8 const *list, u8 len); | 113 | u8 const *list, u8 len); |
114 | extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); | ||
115 | extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, | 114 | extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, |
116 | u8 mand, u8 opt, u8 feat, u8 *val, u8 len); | 115 | u8 mand, u8 opt, u8 feat, u8 *val, u8 len); |
117 | extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); | 116 | extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); |
diff --git a/net/dccp/input.c b/net/dccp/input.c index 10c957a88f4f..4222e7a654b0 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) | |||
160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | 160 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); |
161 | } | 161 | } |
162 | 162 | ||
163 | static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | 163 | static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) |
164 | { | 164 | { |
165 | struct dccp_sock *dp = dccp_sk(sk); | 165 | struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; |
166 | 166 | ||
167 | if (dp->dccps_hc_rx_ackvec != NULL) | 167 | if (av == NULL) |
168 | dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, | 168 | return; |
169 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 169 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
170 | dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
171 | dccp_ackvec_input(av, skb); | ||
170 | } | 172 | } |
171 | 173 | ||
172 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) | 174 | static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) |
@@ -239,7 +241,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | |||
239 | dccp_update_gsr(sk, seqno); | 241 | dccp_update_gsr(sk, seqno); |
240 | 242 | ||
241 | if (dh->dccph_type != DCCP_PKT_SYNC && | 243 | if (dh->dccph_type != DCCP_PKT_SYNC && |
242 | (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) | 244 | ackno != DCCP_PKT_WITHOUT_ACK_SEQ && |
245 | after48(ackno, dp->dccps_gar)) | ||
243 | dp->dccps_gar = ackno; | 246 | dp->dccps_gar = ackno; |
244 | } else { | 247 | } else { |
245 | unsigned long now = jiffies; | 248 | unsigned long now = jiffies; |
@@ -257,9 +260,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | |||
257 | */ | 260 | */ |
258 | if (time_before(now, (dp->dccps_rate_last + | 261 | if (time_before(now, (dp->dccps_rate_last + |
259 | sysctl_dccp_sync_ratelimit))) | 262 | sysctl_dccp_sync_ratelimit))) |
260 | return 0; | 263 | return -1; |
261 | 264 | ||
262 | DCCP_WARN("DCCP: Step 6 failed for %s packet, " | 265 | DCCP_WARN("Step 6 failed for %s packet, " |
263 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " | 266 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " |
264 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " | 267 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " |
265 | "sending SYNC...\n", dccp_packet_name(dh->dccph_type), | 268 | "sending SYNC...\n", dccp_packet_name(dh->dccph_type), |
@@ -365,22 +368,13 @@ discard: | |||
365 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | 368 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, |
366 | const struct dccp_hdr *dh, const unsigned len) | 369 | const struct dccp_hdr *dh, const unsigned len) |
367 | { | 370 | { |
368 | struct dccp_sock *dp = dccp_sk(sk); | ||
369 | |||
370 | if (dccp_check_seqno(sk, skb)) | 371 | if (dccp_check_seqno(sk, skb)) |
371 | goto discard; | 372 | goto discard; |
372 | 373 | ||
373 | if (dccp_parse_options(sk, NULL, skb)) | 374 | if (dccp_parse_options(sk, NULL, skb)) |
374 | return 1; | 375 | return 1; |
375 | 376 | ||
376 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 377 | dccp_handle_ackvec_processing(sk, skb); |
377 | dccp_event_ack_recv(sk, skb); | ||
378 | |||
379 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
380 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
381 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
382 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
383 | goto discard; | ||
384 | dccp_deliver_input_to_ccids(sk, skb); | 378 | dccp_deliver_input_to_ccids(sk, skb); |
385 | 379 | ||
386 | return __dccp_rcv_established(sk, skb, dh, len); | 380 | return __dccp_rcv_established(sk, skb, dh, len); |
@@ -441,20 +435,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
441 | kfree_skb(sk->sk_send_head); | 435 | kfree_skb(sk->sk_send_head); |
442 | sk->sk_send_head = NULL; | 436 | sk->sk_send_head = NULL; |
443 | 437 | ||
444 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
445 | dccp_update_gsr(sk, dp->dccps_isr); | ||
446 | /* | 438 | /* |
447 | * SWL and AWL are initially adjusted so that they are not less than | 439 | * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect |
448 | * the initial Sequence Numbers received and sent, respectively: | 440 | * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH |
449 | * SWL := max(GSR + 1 - floor(W/4), ISR), | 441 | * is done as part of activating the feature values below, since |
450 | * AWL := max(GSS - W' + 1, ISS). | 442 | * these settings depend on the local/remote Sequence Window |
451 | * These adjustments MUST be applied only at the beginning of the | 443 | * features, which were undefined or not confirmed until now. |
452 | * connection. | ||
453 | * | ||
454 | * AWL was adjusted in dccp_v4_connect -acme | ||
455 | */ | 444 | */ |
456 | dccp_set_seqno(&dp->dccps_swl, | 445 | dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; |
457 | max48(dp->dccps_swl, dp->dccps_isr)); | ||
458 | 446 | ||
459 | dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 447 | dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
460 | 448 | ||
@@ -626,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
626 | /* Caller (dccp_v4_do_rcv) will send Reset */ | 614 | /* Caller (dccp_v4_do_rcv) will send Reset */ |
627 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | 615 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; |
628 | return 1; | 616 | return 1; |
617 | } else if (sk->sk_state == DCCP_CLOSED) { | ||
618 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
619 | return 1; | ||
629 | } | 620 | } |
630 | 621 | ||
631 | if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { | 622 | if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { |
@@ -638,15 +629,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
638 | if (dccp_parse_options(sk, NULL, skb)) | 629 | if (dccp_parse_options(sk, NULL, skb)) |
639 | return 1; | 630 | return 1; |
640 | 631 | ||
641 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 632 | dccp_handle_ackvec_processing(sk, skb); |
642 | dccp_event_ack_recv(sk, skb); | ||
643 | |||
644 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
645 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
646 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
647 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
648 | goto discard; | ||
649 | |||
650 | dccp_deliver_input_to_ccids(sk, skb); | 633 | dccp_deliver_input_to_ccids(sk, skb); |
651 | } | 634 | } |
652 | 635 | ||
@@ -688,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
688 | } | 671 | } |
689 | 672 | ||
690 | switch (sk->sk_state) { | 673 | switch (sk->sk_state) { |
691 | case DCCP_CLOSED: | ||
692 | dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
693 | return 1; | ||
694 | |||
695 | case DCCP_REQUESTING: | 674 | case DCCP_REQUESTING: |
696 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); | 675 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); |
697 | if (queued >= 0) | 676 | if (queued >= 0) |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d4a166f0f391..8c36adfd1919 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -40,13 +40,15 @@ | |||
40 | 40 | ||
41 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 41 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
42 | { | 42 | { |
43 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
43 | struct inet_sock *inet = inet_sk(sk); | 44 | struct inet_sock *inet = inet_sk(sk); |
44 | struct dccp_sock *dp = dccp_sk(sk); | 45 | struct dccp_sock *dp = dccp_sk(sk); |
45 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | 46 | __be16 orig_sport, orig_dport; |
46 | struct rtable *rt; | ||
47 | __be32 daddr, nexthop; | 47 | __be32 daddr, nexthop; |
48 | int tmp; | 48 | struct flowi4 *fl4; |
49 | struct rtable *rt; | ||
49 | int err; | 50 | int err; |
51 | struct ip_options_rcu *inet_opt; | ||
50 | 52 | ||
51 | dp->dccps_role = DCCP_ROLE_CLIENT; | 53 | dp->dccps_role = DCCP_ROLE_CLIENT; |
52 | 54 | ||
@@ -57,37 +59,43 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
57 | return -EAFNOSUPPORT; | 59 | return -EAFNOSUPPORT; |
58 | 60 | ||
59 | nexthop = daddr = usin->sin_addr.s_addr; | 61 | nexthop = daddr = usin->sin_addr.s_addr; |
60 | if (inet->opt != NULL && inet->opt->srr) { | 62 | |
63 | inet_opt = rcu_dereference_protected(inet->inet_opt, | ||
64 | sock_owned_by_user(sk)); | ||
65 | if (inet_opt != NULL && inet_opt->opt.srr) { | ||
61 | if (daddr == 0) | 66 | if (daddr == 0) |
62 | return -EINVAL; | 67 | return -EINVAL; |
63 | nexthop = inet->opt->faddr; | 68 | nexthop = inet_opt->opt.faddr; |
64 | } | 69 | } |
65 | 70 | ||
66 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, | 71 | orig_sport = inet->inet_sport; |
67 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 72 | orig_dport = usin->sin_port; |
68 | IPPROTO_DCCP, | 73 | fl4 = &inet->cork.fl.u.ip4; |
69 | inet->inet_sport, usin->sin_port, sk, 1); | 74 | rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, |
70 | if (tmp < 0) | 75 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
71 | return tmp; | 76 | IPPROTO_DCCP, |
77 | orig_sport, orig_dport, sk, true); | ||
78 | if (IS_ERR(rt)) | ||
79 | return PTR_ERR(rt); | ||
72 | 80 | ||
73 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | 81 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { |
74 | ip_rt_put(rt); | 82 | ip_rt_put(rt); |
75 | return -ENETUNREACH; | 83 | return -ENETUNREACH; |
76 | } | 84 | } |
77 | 85 | ||
78 | if (inet->opt == NULL || !inet->opt->srr) | 86 | if (inet_opt == NULL || !inet_opt->opt.srr) |
79 | daddr = rt->rt_dst; | 87 | daddr = fl4->daddr; |
80 | 88 | ||
81 | if (inet->inet_saddr == 0) | 89 | if (inet->inet_saddr == 0) |
82 | inet->inet_saddr = rt->rt_src; | 90 | inet->inet_saddr = fl4->saddr; |
83 | inet->inet_rcv_saddr = inet->inet_saddr; | 91 | inet->inet_rcv_saddr = inet->inet_saddr; |
84 | 92 | ||
85 | inet->inet_dport = usin->sin_port; | 93 | inet->inet_dport = usin->sin_port; |
86 | inet->inet_daddr = daddr; | 94 | inet->inet_daddr = daddr; |
87 | 95 | ||
88 | inet_csk(sk)->icsk_ext_hdr_len = 0; | 96 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
89 | if (inet->opt != NULL) | 97 | if (inet_opt) |
90 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; | 98 | inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; |
91 | /* | 99 | /* |
92 | * Socket identity is still unknown (sport may be zero). | 100 | * Socket identity is still unknown (sport may be zero). |
93 | * However we set state to DCCP_REQUESTING and not releasing socket | 101 | * However we set state to DCCP_REQUESTING and not releasing socket |
@@ -99,11 +107,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
99 | if (err != 0) | 107 | if (err != 0) |
100 | goto failure; | 108 | goto failure; |
101 | 109 | ||
102 | err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, | 110 | rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, |
103 | inet->inet_dport, sk); | 111 | inet->inet_sport, inet->inet_dport, sk); |
104 | if (err != 0) | 112 | if (IS_ERR(rt)) { |
113 | rt = NULL; | ||
105 | goto failure; | 114 | goto failure; |
106 | 115 | } | |
107 | /* OK, now commit destination to socket. */ | 116 | /* OK, now commit destination to socket. */ |
108 | sk_setup_caps(sk, &rt->dst); | 117 | sk_setup_caps(sk, &rt->dst); |
109 | 118 | ||
@@ -387,39 +396,44 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
387 | if (sk_acceptq_is_full(sk)) | 396 | if (sk_acceptq_is_full(sk)) |
388 | goto exit_overflow; | 397 | goto exit_overflow; |
389 | 398 | ||
390 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
391 | goto exit; | ||
392 | |||
393 | newsk = dccp_create_openreq_child(sk, req, skb); | 399 | newsk = dccp_create_openreq_child(sk, req, skb); |
394 | if (newsk == NULL) | 400 | if (newsk == NULL) |
395 | goto exit; | 401 | goto exit_nonewsk; |
396 | |||
397 | sk_setup_caps(newsk, dst); | ||
398 | 402 | ||
399 | newinet = inet_sk(newsk); | 403 | newinet = inet_sk(newsk); |
400 | ireq = inet_rsk(req); | 404 | ireq = inet_rsk(req); |
401 | newinet->inet_daddr = ireq->rmt_addr; | 405 | newinet->inet_daddr = ireq->rmt_addr; |
402 | newinet->inet_rcv_saddr = ireq->loc_addr; | 406 | newinet->inet_rcv_saddr = ireq->loc_addr; |
403 | newinet->inet_saddr = ireq->loc_addr; | 407 | newinet->inet_saddr = ireq->loc_addr; |
404 | newinet->opt = ireq->opt; | 408 | newinet->inet_opt = ireq->opt; |
405 | ireq->opt = NULL; | 409 | ireq->opt = NULL; |
406 | newinet->mc_index = inet_iif(skb); | 410 | newinet->mc_index = inet_iif(skb); |
407 | newinet->mc_ttl = ip_hdr(skb)->ttl; | 411 | newinet->mc_ttl = ip_hdr(skb)->ttl; |
408 | newinet->inet_id = jiffies; | 412 | newinet->inet_id = jiffies; |
409 | 413 | ||
414 | if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) | ||
415 | goto put_and_exit; | ||
416 | |||
417 | sk_setup_caps(newsk, dst); | ||
418 | |||
410 | dccp_sync_mss(newsk, dst_mtu(dst)); | 419 | dccp_sync_mss(newsk, dst_mtu(dst)); |
411 | 420 | ||
421 | if (__inet_inherit_port(sk, newsk) < 0) | ||
422 | goto put_and_exit; | ||
412 | __inet_hash_nolisten(newsk, NULL); | 423 | __inet_hash_nolisten(newsk, NULL); |
413 | __inet_inherit_port(sk, newsk); | ||
414 | 424 | ||
415 | return newsk; | 425 | return newsk; |
416 | 426 | ||
417 | exit_overflow: | 427 | exit_overflow: |
418 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 428 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
429 | exit_nonewsk: | ||
430 | dst_release(dst); | ||
419 | exit: | 431 | exit: |
420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 432 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
421 | dst_release(dst); | ||
422 | return NULL; | 433 | return NULL; |
434 | put_and_exit: | ||
435 | sock_put(newsk); | ||
436 | goto exit; | ||
423 | } | 437 | } |
424 | 438 | ||
425 | EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); | 439 | EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); |
@@ -457,20 +471,19 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, | |||
457 | struct sk_buff *skb) | 471 | struct sk_buff *skb) |
458 | { | 472 | { |
459 | struct rtable *rt; | 473 | struct rtable *rt; |
460 | struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, | 474 | struct flowi4 fl4 = { |
461 | .nl_u = { .ip4_u = | 475 | .flowi4_oif = skb_rtable(skb)->rt_iif, |
462 | { .daddr = ip_hdr(skb)->saddr, | 476 | .daddr = ip_hdr(skb)->saddr, |
463 | .saddr = ip_hdr(skb)->daddr, | 477 | .saddr = ip_hdr(skb)->daddr, |
464 | .tos = RT_CONN_FLAGS(sk) } }, | 478 | .flowi4_tos = RT_CONN_FLAGS(sk), |
465 | .proto = sk->sk_protocol, | 479 | .flowi4_proto = sk->sk_protocol, |
466 | .uli_u = { .ports = | 480 | .fl4_sport = dccp_hdr(skb)->dccph_dport, |
467 | { .sport = dccp_hdr(skb)->dccph_dport, | 481 | .fl4_dport = dccp_hdr(skb)->dccph_sport, |
468 | .dport = dccp_hdr(skb)->dccph_sport } | 482 | }; |
469 | } | 483 | |
470 | }; | 484 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
471 | 485 | rt = ip_route_output_flow(net, &fl4, sk); | |
472 | security_skb_classify_flow(skb, &fl); | 486 | if (IS_ERR(rt)) { |
473 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { | ||
474 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 487 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
475 | return NULL; | 488 | return NULL; |
476 | } | 489 | } |
@@ -484,8 +497,9 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | |||
484 | int err = -1; | 497 | int err = -1; |
485 | struct sk_buff *skb; | 498 | struct sk_buff *skb; |
486 | struct dst_entry *dst; | 499 | struct dst_entry *dst; |
500 | struct flowi4 fl4; | ||
487 | 501 | ||
488 | dst = inet_csk_route_req(sk, req); | 502 | dst = inet_csk_route_req(sk, &fl4, req); |
489 | if (dst == NULL) | 503 | if (dst == NULL) |
490 | goto out; | 504 | goto out; |
491 | 505 | ||
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e3f32575df7..8dc4348774a5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -54,8 +54,8 @@ static void dccp_v6_hash(struct sock *sk) | |||
54 | 54 | ||
55 | /* add pseudo-header to DCCP checksum stored in skb->csum */ | 55 | /* add pseudo-header to DCCP checksum stored in skb->csum */ |
56 | static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, | 56 | static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, |
57 | struct in6_addr *saddr, | 57 | const struct in6_addr *saddr, |
58 | struct in6_addr *daddr) | 58 | const struct in6_addr *daddr) |
59 | { | 59 | { |
60 | return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); | 60 | return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); |
61 | } | 61 | } |
@@ -87,7 +87,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) | |||
87 | static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 87 | static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
88 | u8 type, u8 code, int offset, __be32 info) | 88 | u8 type, u8 code, int offset, __be32 info) |
89 | { | 89 | { |
90 | struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; | 90 | const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; |
91 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); | 91 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); |
92 | struct dccp_sock *dp; | 92 | struct dccp_sock *dp; |
93 | struct ipv6_pinfo *np; | 93 | struct ipv6_pinfo *np; |
@@ -147,30 +147,24 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
147 | dst = __sk_dst_check(sk, np->dst_cookie); | 147 | dst = __sk_dst_check(sk, np->dst_cookie); |
148 | if (dst == NULL) { | 148 | if (dst == NULL) { |
149 | struct inet_sock *inet = inet_sk(sk); | 149 | struct inet_sock *inet = inet_sk(sk); |
150 | struct flowi fl; | 150 | struct flowi6 fl6; |
151 | 151 | ||
152 | /* BUGGG_FUTURE: Again, it is not clear how | 152 | /* BUGGG_FUTURE: Again, it is not clear how |
153 | to handle rthdr case. Ignore this complexity | 153 | to handle rthdr case. Ignore this complexity |
154 | for now. | 154 | for now. |
155 | */ | 155 | */ |
156 | memset(&fl, 0, sizeof(fl)); | 156 | memset(&fl6, 0, sizeof(fl6)); |
157 | fl.proto = IPPROTO_DCCP; | 157 | fl6.flowi6_proto = IPPROTO_DCCP; |
158 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | 158 | ipv6_addr_copy(&fl6.daddr, &np->daddr); |
159 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | 159 | ipv6_addr_copy(&fl6.saddr, &np->saddr); |
160 | fl.oif = sk->sk_bound_dev_if; | 160 | fl6.flowi6_oif = sk->sk_bound_dev_if; |
161 | fl.fl_ip_dport = inet->inet_dport; | 161 | fl6.fl6_dport = inet->inet_dport; |
162 | fl.fl_ip_sport = inet->inet_sport; | 162 | fl6.fl6_sport = inet->inet_sport; |
163 | security_sk_classify_flow(sk, &fl); | 163 | security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); |
164 | 164 | ||
165 | err = ip6_dst_lookup(sk, &dst, &fl); | 165 | dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); |
166 | if (err) { | 166 | if (IS_ERR(dst)) { |
167 | sk->sk_err_soft = -err; | 167 | sk->sk_err_soft = -PTR_ERR(dst); |
168 | goto out; | ||
169 | } | ||
170 | |||
171 | err = xfrm_lookup(net, &dst, &fl, sk, 0); | ||
172 | if (err < 0) { | ||
173 | sk->sk_err_soft = -err; | ||
174 | goto out; | 168 | goto out; |
175 | } | 169 | } |
176 | } else | 170 | } else |
@@ -249,34 +243,30 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, | |||
249 | struct sk_buff *skb; | 243 | struct sk_buff *skb; |
250 | struct ipv6_txoptions *opt = NULL; | 244 | struct ipv6_txoptions *opt = NULL; |
251 | struct in6_addr *final_p, final; | 245 | struct in6_addr *final_p, final; |
252 | struct flowi fl; | 246 | struct flowi6 fl6; |
253 | int err = -1; | 247 | int err = -1; |
254 | struct dst_entry *dst; | 248 | struct dst_entry *dst; |
255 | 249 | ||
256 | memset(&fl, 0, sizeof(fl)); | 250 | memset(&fl6, 0, sizeof(fl6)); |
257 | fl.proto = IPPROTO_DCCP; | 251 | fl6.flowi6_proto = IPPROTO_DCCP; |
258 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | 252 | ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); |
259 | ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); | 253 | ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); |
260 | fl.fl6_flowlabel = 0; | 254 | fl6.flowlabel = 0; |
261 | fl.oif = ireq6->iif; | 255 | fl6.flowi6_oif = ireq6->iif; |
262 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 256 | fl6.fl6_dport = inet_rsk(req)->rmt_port; |
263 | fl.fl_ip_sport = inet_rsk(req)->loc_port; | 257 | fl6.fl6_sport = inet_rsk(req)->loc_port; |
264 | security_req_classify_flow(req, &fl); | 258 | security_req_classify_flow(req, flowi6_to_flowi(&fl6)); |
265 | 259 | ||
266 | opt = np->opt; | 260 | opt = np->opt; |
267 | 261 | ||
268 | final_p = fl6_update_dst(&fl, opt, &final); | 262 | final_p = fl6_update_dst(&fl6, opt, &final); |
269 | |||
270 | err = ip6_dst_lookup(sk, &dst, &fl); | ||
271 | if (err) | ||
272 | goto done; | ||
273 | |||
274 | if (final_p) | ||
275 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
276 | 263 | ||
277 | err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); | 264 | dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); |
278 | if (err < 0) | 265 | if (IS_ERR(dst)) { |
266 | err = PTR_ERR(dst); | ||
267 | dst = NULL; | ||
279 | goto done; | 268 | goto done; |
269 | } | ||
280 | 270 | ||
281 | skb = dccp_make_response(sk, dst, req); | 271 | skb = dccp_make_response(sk, dst, req); |
282 | if (skb != NULL) { | 272 | if (skb != NULL) { |
@@ -285,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, | |||
285 | dh->dccph_checksum = dccp_v6_csum_finish(skb, | 275 | dh->dccph_checksum = dccp_v6_csum_finish(skb, |
286 | &ireq6->loc_addr, | 276 | &ireq6->loc_addr, |
287 | &ireq6->rmt_addr); | 277 | &ireq6->rmt_addr); |
288 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | 278 | ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); |
289 | err = ip6_xmit(sk, skb, &fl, opt); | 279 | err = ip6_xmit(sk, skb, &fl6, opt); |
290 | err = net_xmit_eval(err); | 280 | err = net_xmit_eval(err); |
291 | } | 281 | } |
292 | 282 | ||
@@ -306,9 +296,9 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) | |||
306 | 296 | ||
307 | static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) | 297 | static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) |
308 | { | 298 | { |
309 | struct ipv6hdr *rxip6h; | 299 | const struct ipv6hdr *rxip6h; |
310 | struct sk_buff *skb; | 300 | struct sk_buff *skb; |
311 | struct flowi fl; | 301 | struct flowi6 fl6; |
312 | struct net *net = dev_net(skb_dst(rxskb)->dev); | 302 | struct net *net = dev_net(skb_dst(rxskb)->dev); |
313 | struct sock *ctl_sk = net->dccp.v6_ctl_sk; | 303 | struct sock *ctl_sk = net->dccp.v6_ctl_sk; |
314 | struct dst_entry *dst; | 304 | struct dst_entry *dst; |
@@ -327,25 +317,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) | |||
327 | dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, | 317 | dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, |
328 | &rxip6h->daddr); | 318 | &rxip6h->daddr); |
329 | 319 | ||
330 | memset(&fl, 0, sizeof(fl)); | 320 | memset(&fl6, 0, sizeof(fl6)); |
331 | ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); | 321 | ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr); |
332 | ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); | 322 | ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr); |
333 | 323 | ||
334 | fl.proto = IPPROTO_DCCP; | 324 | fl6.flowi6_proto = IPPROTO_DCCP; |
335 | fl.oif = inet6_iif(rxskb); | 325 | fl6.flowi6_oif = inet6_iif(rxskb); |
336 | fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; | 326 | fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; |
337 | fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; | 327 | fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; |
338 | security_skb_classify_flow(rxskb, &fl); | 328 | security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); |
339 | 329 | ||
340 | /* sk = NULL, but it is safe for now. RST socket required. */ | 330 | /* sk = NULL, but it is safe for now. RST socket required. */ |
341 | if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { | 331 | dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); |
342 | if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { | 332 | if (!IS_ERR(dst)) { |
343 | skb_dst_set(skb, dst); | 333 | skb_dst_set(skb, dst); |
344 | ip6_xmit(ctl_sk, skb, &fl, NULL); | 334 | ip6_xmit(ctl_sk, skb, &fl6, NULL); |
345 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | 335 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); |
346 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | 336 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); |
347 | return; | 337 | return; |
348 | } | ||
349 | } | 338 | } |
350 | 339 | ||
351 | kfree_skb(skb); | 340 | kfree_skb(skb); |
@@ -484,7 +473,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
484 | struct inet6_request_sock *ireq6 = inet6_rsk(req); | 473 | struct inet6_request_sock *ireq6 = inet6_rsk(req); |
485 | struct ipv6_pinfo *newnp, *np = inet6_sk(sk); | 474 | struct ipv6_pinfo *newnp, *np = inet6_sk(sk); |
486 | struct inet_sock *newinet; | 475 | struct inet_sock *newinet; |
487 | struct dccp_sock *newdp; | ||
488 | struct dccp6_sock *newdp6; | 476 | struct dccp6_sock *newdp6; |
489 | struct sock *newsk; | 477 | struct sock *newsk; |
490 | struct ipv6_txoptions *opt; | 478 | struct ipv6_txoptions *opt; |
@@ -498,7 +486,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
498 | return NULL; | 486 | return NULL; |
499 | 487 | ||
500 | newdp6 = (struct dccp6_sock *)newsk; | 488 | newdp6 = (struct dccp6_sock *)newsk; |
501 | newdp = dccp_sk(newsk); | ||
502 | newinet = inet_sk(newsk); | 489 | newinet = inet_sk(newsk); |
503 | newinet->pinet6 = &newdp6->inet6; | 490 | newinet->pinet6 = &newdp6->inet6; |
504 | newnp = inet6_sk(newsk); | 491 | newnp = inet6_sk(newsk); |
@@ -540,31 +527,26 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
540 | 527 | ||
541 | if (dst == NULL) { | 528 | if (dst == NULL) { |
542 | struct in6_addr *final_p, final; | 529 | struct in6_addr *final_p, final; |
543 | struct flowi fl; | 530 | struct flowi6 fl6; |
544 | 531 | ||
545 | memset(&fl, 0, sizeof(fl)); | 532 | memset(&fl6, 0, sizeof(fl6)); |
546 | fl.proto = IPPROTO_DCCP; | 533 | fl6.flowi6_proto = IPPROTO_DCCP; |
547 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | 534 | ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); |
548 | final_p = fl6_update_dst(&fl, opt, &final); | 535 | final_p = fl6_update_dst(&fl6, opt, &final); |
549 | ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); | 536 | ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); |
550 | fl.oif = sk->sk_bound_dev_if; | 537 | fl6.flowi6_oif = sk->sk_bound_dev_if; |
551 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 538 | fl6.fl6_dport = inet_rsk(req)->rmt_port; |
552 | fl.fl_ip_sport = inet_rsk(req)->loc_port; | 539 | fl6.fl6_sport = inet_rsk(req)->loc_port; |
553 | security_sk_classify_flow(sk, &fl); | 540 | security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); |
554 | 541 | ||
555 | if (ip6_dst_lookup(sk, &dst, &fl)) | 542 | dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); |
556 | goto out; | 543 | if (IS_ERR(dst)) |
557 | |||
558 | if (final_p) | ||
559 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
560 | |||
561 | if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) | ||
562 | goto out; | 544 | goto out; |
563 | } | 545 | } |
564 | 546 | ||
565 | newsk = dccp_create_openreq_child(sk, req, skb); | 547 | newsk = dccp_create_openreq_child(sk, req, skb); |
566 | if (newsk == NULL) | 548 | if (newsk == NULL) |
567 | goto out; | 549 | goto out_nonewsk; |
568 | 550 | ||
569 | /* | 551 | /* |
570 | * No need to charge this sock to the relevant IPv6 refcnt debug socks | 552 | * No need to charge this sock to the relevant IPv6 refcnt debug socks |
@@ -578,7 +560,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
578 | newdp6 = (struct dccp6_sock *)newsk; | 560 | newdp6 = (struct dccp6_sock *)newsk; |
579 | newinet = inet_sk(newsk); | 561 | newinet = inet_sk(newsk); |
580 | newinet->pinet6 = &newdp6->inet6; | 562 | newinet->pinet6 = &newdp6->inet6; |
581 | newdp = dccp_sk(newsk); | ||
582 | newnp = inet6_sk(newsk); | 563 | newnp = inet6_sk(newsk); |
583 | 564 | ||
584 | memcpy(newnp, np, sizeof(struct ipv6_pinfo)); | 565 | memcpy(newnp, np, sizeof(struct ipv6_pinfo)); |
@@ -592,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
592 | 573 | ||
593 | First: no IPv4 options. | 574 | First: no IPv4 options. |
594 | */ | 575 | */ |
595 | newinet->opt = NULL; | 576 | newinet->inet_opt = NULL; |
596 | 577 | ||
597 | /* Clone RX bits */ | 578 | /* Clone RX bits */ |
598 | newnp->rxopt.all = np->rxopt.all; | 579 | newnp->rxopt.all = np->rxopt.all; |
@@ -632,18 +613,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
632 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; | 613 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; |
633 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; | 614 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; |
634 | 615 | ||
616 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
617 | sock_put(newsk); | ||
618 | goto out; | ||
619 | } | ||
635 | __inet6_hash(newsk, NULL); | 620 | __inet6_hash(newsk, NULL); |
636 | __inet_inherit_port(sk, newsk); | ||
637 | 621 | ||
638 | return newsk; | 622 | return newsk; |
639 | 623 | ||
640 | out_overflow: | 624 | out_overflow: |
641 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 625 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
626 | out_nonewsk: | ||
627 | dst_release(dst); | ||
642 | out: | 628 | out: |
643 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 629 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
644 | if (opt != NULL && opt != np->opt) | 630 | if (opt != NULL && opt != np->opt) |
645 | sock_kfree_s(sk, opt, opt->tot_len); | 631 | sock_kfree_s(sk, opt, opt->tot_len); |
646 | dst_release(dst); | ||
647 | return NULL; | 632 | return NULL; |
648 | } | 633 | } |
649 | 634 | ||
@@ -874,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
874 | struct ipv6_pinfo *np = inet6_sk(sk); | 859 | struct ipv6_pinfo *np = inet6_sk(sk); |
875 | struct dccp_sock *dp = dccp_sk(sk); | 860 | struct dccp_sock *dp = dccp_sk(sk); |
876 | struct in6_addr *saddr = NULL, *final_p, final; | 861 | struct in6_addr *saddr = NULL, *final_p, final; |
877 | struct flowi fl; | 862 | struct flowi6 fl6; |
878 | struct dst_entry *dst; | 863 | struct dst_entry *dst; |
879 | int addr_type; | 864 | int addr_type; |
880 | int err; | 865 | int err; |
@@ -887,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
887 | if (usin->sin6_family != AF_INET6) | 872 | if (usin->sin6_family != AF_INET6) |
888 | return -EAFNOSUPPORT; | 873 | return -EAFNOSUPPORT; |
889 | 874 | ||
890 | memset(&fl, 0, sizeof(fl)); | 875 | memset(&fl6, 0, sizeof(fl6)); |
891 | 876 | ||
892 | if (np->sndflow) { | 877 | if (np->sndflow) { |
893 | fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; | 878 | fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; |
894 | IP6_ECN_flow_init(fl.fl6_flowlabel); | 879 | IP6_ECN_flow_init(fl6.flowlabel); |
895 | if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { | 880 | if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { |
896 | struct ip6_flowlabel *flowlabel; | 881 | struct ip6_flowlabel *flowlabel; |
897 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | 882 | flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); |
898 | if (flowlabel == NULL) | 883 | if (flowlabel == NULL) |
899 | return -EINVAL; | 884 | return -EINVAL; |
900 | ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); | 885 | ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); |
@@ -931,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
931 | } | 916 | } |
932 | 917 | ||
933 | ipv6_addr_copy(&np->daddr, &usin->sin6_addr); | 918 | ipv6_addr_copy(&np->daddr, &usin->sin6_addr); |
934 | np->flow_label = fl.fl6_flowlabel; | 919 | np->flow_label = fl6.flowlabel; |
935 | 920 | ||
936 | /* | 921 | /* |
937 | * DCCP over IPv4 | 922 | * DCCP over IPv4 |
@@ -968,33 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
968 | if (!ipv6_addr_any(&np->rcv_saddr)) | 953 | if (!ipv6_addr_any(&np->rcv_saddr)) |
969 | saddr = &np->rcv_saddr; | 954 | saddr = &np->rcv_saddr; |
970 | 955 | ||
971 | fl.proto = IPPROTO_DCCP; | 956 | fl6.flowi6_proto = IPPROTO_DCCP; |
972 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | 957 | ipv6_addr_copy(&fl6.daddr, &np->daddr); |
973 | ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); | 958 | ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); |
974 | fl.oif = sk->sk_bound_dev_if; | 959 | fl6.flowi6_oif = sk->sk_bound_dev_if; |
975 | fl.fl_ip_dport = usin->sin6_port; | 960 | fl6.fl6_dport = usin->sin6_port; |
976 | fl.fl_ip_sport = inet->inet_sport; | 961 | fl6.fl6_sport = inet->inet_sport; |
977 | security_sk_classify_flow(sk, &fl); | 962 | security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); |
978 | 963 | ||
979 | final_p = fl6_update_dst(&fl, np->opt, &final); | 964 | final_p = fl6_update_dst(&fl6, np->opt, &final); |
980 | 965 | ||
981 | err = ip6_dst_lookup(sk, &dst, &fl); | 966 | dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); |
982 | if (err) | 967 | if (IS_ERR(dst)) { |
968 | err = PTR_ERR(dst); | ||
983 | goto failure; | 969 | goto failure; |
984 | |||
985 | if (final_p) | ||
986 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
987 | |||
988 | err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); | ||
989 | if (err < 0) { | ||
990 | if (err == -EREMOTE) | ||
991 | err = ip6_dst_blackhole(sk, &dst, &fl); | ||
992 | if (err < 0) | ||
993 | goto failure; | ||
994 | } | 970 | } |
995 | 971 | ||
996 | if (saddr == NULL) { | 972 | if (saddr == NULL) { |
997 | saddr = &fl.fl6_src; | 973 | saddr = &fl6.saddr; |
998 | ipv6_addr_copy(&np->rcv_saddr, saddr); | 974 | ipv6_addr_copy(&np->rcv_saddr, saddr); |
999 | } | 975 | } |
1000 | 976 | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 128b089d3aef..d7041a0963af 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -121,30 +121,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
121 | * | 121 | * |
122 | * Choose S.ISS (initial seqno) or set from Init Cookies | 122 | * Choose S.ISS (initial seqno) or set from Init Cookies |
123 | * Initialize S.GAR := S.ISS | 123 | * Initialize S.GAR := S.ISS |
124 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies | 124 | * Set S.ISR, S.GSR from packet (or Init Cookies) |
125 | */ | 125 | * |
126 | newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; | 126 | * Setting AWL/AWH and SWL/SWH happens as part of the feature |
127 | dccp_update_gss(newsk, dreq->dreq_iss); | 127 | * activation below, as these windows all depend on the local |
128 | 128 | * and remote Sequence Window feature values (7.5.2). | |
129 | newdp->dccps_isr = dreq->dreq_isr; | ||
130 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
131 | |||
132 | /* | ||
133 | * SWL and AWL are initially adjusted so that they are not less than | ||
134 | * the initial Sequence Numbers received and sent, respectively: | ||
135 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
136 | * AWL := max(GSS - W' + 1, ISS). | ||
137 | * These adjustments MUST be applied only at the beginning of the | ||
138 | * connection. | ||
139 | */ | 129 | */ |
140 | dccp_set_seqno(&newdp->dccps_swl, | 130 | newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss; |
141 | max48(newdp->dccps_swl, newdp->dccps_isr)); | 131 | newdp->dccps_gar = newdp->dccps_iss; |
142 | dccp_set_seqno(&newdp->dccps_awl, | 132 | newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr; |
143 | max48(newdp->dccps_awl, newdp->dccps_iss)); | ||
144 | 133 | ||
145 | /* | 134 | /* |
146 | * Activate features after initialising the sequence numbers, | 135 | * Activate features: initialise CCIDs, sequence windows etc. |
147 | * since CCID initialisation may depend on GSS, ISR, ISS etc. | ||
148 | */ | 136 | */ |
149 | if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { | 137 | if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { |
150 | /* It is still raw copy of parent, so invalidate | 138 | /* It is still raw copy of parent, so invalidate |
diff --git a/net/dccp/options.c b/net/dccp/options.c index bfda087bd90d..4b2ab657ac8e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
54 | struct dccp_sock *dp = dccp_sk(sk); | 54 | struct dccp_sock *dp = dccp_sk(sk); |
55 | const struct dccp_hdr *dh = dccp_hdr(skb); | 55 | const struct dccp_hdr *dh = dccp_hdr(skb); |
56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | 56 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; |
57 | u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
58 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | 57 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); |
59 | unsigned char *opt_ptr = options; | 58 | unsigned char *opt_ptr = options; |
60 | const unsigned char *opt_end = (unsigned char *)dh + | 59 | const unsigned char *opt_end = (unsigned char *)dh + |
@@ -96,18 +95,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
96 | } | 95 | } |
97 | 96 | ||
98 | /* | 97 | /* |
99 | * CCID-Specific Options (from RFC 4340, sec. 10.3): | ||
100 | * | ||
101 | * Option numbers 128 through 191 are for options sent from the | ||
102 | * HC-Sender to the HC-Receiver; option numbers 192 through 255 | ||
103 | * are for options sent from the HC-Receiver to the HC-Sender. | ||
104 | * | ||
105 | * CCID-specific options are ignored during connection setup, as | 98 | * CCID-specific options are ignored during connection setup, as |
106 | * negotiation may still be in progress (see RFC 4340, 10.3). | 99 | * negotiation may still be in progress (see RFC 4340, 10.3). |
107 | * The same applies to Ack Vectors, as these depend on the CCID. | 100 | * The same applies to Ack Vectors, as these depend on the CCID. |
108 | * | ||
109 | */ | 101 | */ |
110 | if (dreq != NULL && (opt >= 128 || | 102 | if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC || |
111 | opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) | 103 | opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) |
112 | goto ignore_option; | 104 | goto ignore_option; |
113 | 105 | ||
@@ -131,19 +123,13 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
131 | case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: | 123 | case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: |
132 | if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ | 124 | if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ |
133 | break; | 125 | break; |
126 | if (len == 0) | ||
127 | goto out_invalid_option; | ||
134 | rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, | 128 | rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, |
135 | *value, value + 1, len - 1); | 129 | *value, value + 1, len - 1); |
136 | if (rc) | 130 | if (rc) |
137 | goto out_featneg_failed; | 131 | goto out_featneg_failed; |
138 | break; | 132 | break; |
139 | case DCCPO_ACK_VECTOR_0: | ||
140 | case DCCPO_ACK_VECTOR_1: | ||
141 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | ||
142 | break; | ||
143 | if (dp->dccps_hc_rx_ackvec != NULL && | ||
144 | dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) | ||
145 | goto out_invalid_option; | ||
146 | break; | ||
147 | case DCCPO_TIMESTAMP: | 133 | case DCCPO_TIMESTAMP: |
148 | if (len != 4) | 134 | if (len != 4) |
149 | goto out_invalid_option; | 135 | goto out_invalid_option; |
@@ -170,6 +156,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
170 | dccp_role(sk), ntohl(opt_val), | 156 | dccp_role(sk), ntohl(opt_val), |
171 | (unsigned long long) | 157 | (unsigned long long) |
172 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 158 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
159 | /* schedule an Ack in case this sender is quiescent */ | ||
160 | inet_csk_schedule_ack(sk); | ||
173 | break; | 161 | break; |
174 | case DCCPO_TIMESTAMP_ECHO: | 162 | case DCCPO_TIMESTAMP_ECHO: |
175 | if (len != 4 && len != 6 && len != 8) | 163 | if (len != 4 && len != 6 && len != 8) |
@@ -226,23 +214,25 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
226 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", | 214 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", |
227 | dccp_role(sk), elapsed_time); | 215 | dccp_role(sk), elapsed_time); |
228 | break; | 216 | break; |
229 | case 128 ... 191: { | 217 | case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC: |
230 | const u16 idx = value - options; | ||
231 | |||
232 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, | 218 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, |
233 | opt, len, idx, | 219 | pkt_type, opt, value, len)) |
234 | value) != 0) | ||
235 | goto out_invalid_option; | 220 | goto out_invalid_option; |
236 | } | ||
237 | break; | 221 | break; |
238 | case 192 ... 255: { | 222 | case DCCPO_ACK_VECTOR_0: |
239 | const u16 idx = value - options; | 223 | case DCCPO_ACK_VECTOR_1: |
240 | 224 | if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ | |
225 | break; | ||
226 | /* | ||
227 | * Ack vectors are processed by the TX CCID if it is | ||
228 | * interested. The RX CCID need not parse Ack Vectors, | ||
229 | * since it is only interested in clearing old state. | ||
230 | * Fall through. | ||
231 | */ | ||
232 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: | ||
241 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | 233 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, |
242 | opt, len, idx, | 234 | pkt_type, opt, value, len)) |
243 | value) != 0) | ||
244 | goto out_invalid_option; | 235 | goto out_invalid_option; |
245 | } | ||
246 | break; | 236 | break; |
247 | default: | 237 | default: |
248 | DCCP_CRIT("DCCP(%p): option %d(len=%d) not " | 238 | DCCP_CRIT("DCCP(%p): option %d(len=%d) not " |
@@ -353,6 +343,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) | |||
353 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; | 343 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; |
354 | } | 344 | } |
355 | 345 | ||
346 | /* FIXME: This function is currently not used anywhere */ | ||
356 | int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) | 347 | int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) |
357 | { | 348 | { |
358 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | 349 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); |
@@ -384,7 +375,7 @@ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) | |||
384 | 375 | ||
385 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | 376 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); |
386 | 377 | ||
387 | int dccp_insert_option_timestamp(struct sk_buff *skb) | 378 | static int dccp_insert_option_timestamp(struct sk_buff *skb) |
388 | { | 379 | { |
389 | __be32 now = htonl(dccp_timestamp()); | 380 | __be32 now = htonl(dccp_timestamp()); |
390 | /* yes this will overflow but that is the point as we want a | 381 | /* yes this will overflow but that is the point as we want a |
@@ -393,8 +384,6 @@ int dccp_insert_option_timestamp(struct sk_buff *skb) | |||
393 | return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | 384 | return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); |
394 | } | 385 | } |
395 | 386 | ||
396 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); | ||
397 | |||
398 | static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, | 387 | static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, |
399 | struct dccp_request_sock *dreq, | 388 | struct dccp_request_sock *dreq, |
400 | struct sk_buff *skb) | 389 | struct sk_buff *skb) |
@@ -439,6 +428,83 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, | |||
439 | return 0; | 428 | return 0; |
440 | } | 429 | } |
441 | 430 | ||
431 | static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | ||
432 | { | ||
433 | struct dccp_sock *dp = dccp_sk(sk); | ||
434 | struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; | ||
435 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
436 | const u16 buflen = dccp_ackvec_buflen(av); | ||
437 | /* Figure out how many options do we need to represent the ackvec */ | ||
438 | const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); | ||
439 | u16 len = buflen + 2 * nr_opts; | ||
440 | u8 i, nonce = 0; | ||
441 | const unsigned char *tail, *from; | ||
442 | unsigned char *to; | ||
443 | |||
444 | if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
445 | DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, | ||
446 | dccp_packet_name(dcb->dccpd_type)); | ||
447 | return -1; | ||
448 | } | ||
449 | /* | ||
450 | * Since Ack Vectors are variable-length, we can not always predict | ||
451 | * their size. To catch exception cases where the space is running out | ||
452 | * on the skb, a separate Sync is scheduled to carry the Ack Vector. | ||
453 | */ | ||
454 | if (len > DCCPAV_MIN_OPTLEN && | ||
455 | len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { | ||
456 | DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " | ||
457 | "MPS=%u ==> reduce payload size?\n", len, skb->len, | ||
458 | dcb->dccpd_opt_len, dp->dccps_mss_cache); | ||
459 | dp->dccps_sync_scheduled = 1; | ||
460 | return 0; | ||
461 | } | ||
462 | dcb->dccpd_opt_len += len; | ||
463 | |||
464 | to = skb_push(skb, len); | ||
465 | len = buflen; | ||
466 | from = av->av_buf + av->av_buf_head; | ||
467 | tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; | ||
468 | |||
469 | for (i = 0; i < nr_opts; ++i) { | ||
470 | int copylen = len; | ||
471 | |||
472 | if (len > DCCP_SINGLE_OPT_MAXLEN) | ||
473 | copylen = DCCP_SINGLE_OPT_MAXLEN; | ||
474 | |||
475 | /* | ||
476 | * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via | ||
477 | * its type; ack_nonce is the sum of all individual buf_nonce's. | ||
478 | */ | ||
479 | nonce ^= av->av_buf_nonce[i]; | ||
480 | |||
481 | *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; | ||
482 | *to++ = copylen + 2; | ||
483 | |||
484 | /* Check if buf_head wraps */ | ||
485 | if (from + copylen > tail) { | ||
486 | const u16 tailsize = tail - from; | ||
487 | |||
488 | memcpy(to, from, tailsize); | ||
489 | to += tailsize; | ||
490 | len -= tailsize; | ||
491 | copylen -= tailsize; | ||
492 | from = av->av_buf; | ||
493 | } | ||
494 | |||
495 | memcpy(to, from, copylen); | ||
496 | from += copylen; | ||
497 | to += copylen; | ||
498 | len -= copylen; | ||
499 | } | ||
500 | /* | ||
501 | * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. | ||
502 | */ | ||
503 | if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) | ||
504 | return -ENOBUFS; | ||
505 | return 0; | ||
506 | } | ||
507 | |||
442 | /** | 508 | /** |
443 | * dccp_insert_option_mandatory - Mandatory option (5.8.2) | 509 | * dccp_insert_option_mandatory - Mandatory option (5.8.2) |
444 | * Note that since we are using skb_push, this function needs to be called | 510 | * Note that since we are using skb_push, this function needs to be called |
@@ -534,8 +600,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) | |||
534 | if (dccp_insert_option_timestamp(skb)) | 600 | if (dccp_insert_option_timestamp(skb)) |
535 | return -1; | 601 | return -1; |
536 | 602 | ||
537 | } else if (dp->dccps_hc_rx_ackvec != NULL && | 603 | } else if (dccp_ackvec_pending(sk) && |
538 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && | ||
539 | dccp_insert_option_ackvec(sk, skb)) { | 604 | dccp_insert_option_ackvec(sk, skb)) { |
540 | return -1; | 605 | return -1; |
541 | } | 606 | } |
diff --git a/net/dccp/output.c b/net/dccp/output.c index aadbdb58758b..fab108e51e5a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -43,7 +43,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) | |||
43 | static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | 43 | static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) |
44 | { | 44 | { |
45 | if (likely(skb != NULL)) { | 45 | if (likely(skb != NULL)) { |
46 | const struct inet_sock *inet = inet_sk(sk); | 46 | struct inet_sock *inet = inet_sk(sk); |
47 | const struct inet_connection_sock *icsk = inet_csk(sk); | 47 | const struct inet_connection_sock *icsk = inet_csk(sk); |
48 | struct dccp_sock *dp = dccp_sk(sk); | 48 | struct dccp_sock *dp = dccp_sk(sk); |
49 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 49 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
@@ -136,14 +136,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
136 | 136 | ||
137 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | 137 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); |
138 | 138 | ||
139 | err = icsk->icsk_af_ops->queue_xmit(skb); | 139 | err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); |
140 | return net_xmit_eval(err); | 140 | return net_xmit_eval(err); |
141 | } | 141 | } |
142 | return -ENOBUFS; | 142 | return -ENOBUFS; |
143 | } | 143 | } |
144 | 144 | ||
145 | /** | 145 | /** |
146 | * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits | 146 | * dccp_determine_ccmps - Find out about CCID-specific packet-size limits |
147 | * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), | 147 | * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), |
148 | * since the RX CCID is restricted to feedback packets (Acks), which are small | 148 | * since the RX CCID is restricted to feedback packets (Acks), which are small |
149 | * in comparison with the data traffic. A value of 0 means "no current CCMPS". | 149 | * in comparison with the data traffic. A value of 0 means "no current CCMPS". |
@@ -209,108 +209,158 @@ void dccp_write_space(struct sock *sk) | |||
209 | } | 209 | } |
210 | 210 | ||
211 | /** | 211 | /** |
212 | * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet | 212 | * dccp_wait_for_ccid - Await CCID send permission |
213 | * @sk: socket to wait for | 213 | * @sk: socket to wait for |
214 | * @skb: current skb to pass on for waiting | 214 | * @delay: timeout in jiffies |
215 | * @delay: sleep timeout in milliseconds (> 0) | 215 | * This is used by CCIDs which need to delay the send time in process context. |
216 | * This function is called by default when the socket is closed, and | ||
217 | * when a non-zero linger time is set on the socket. For consistency | ||
218 | */ | 216 | */ |
219 | static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) | 217 | static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) |
220 | { | 218 | { |
221 | struct dccp_sock *dp = dccp_sk(sk); | ||
222 | DEFINE_WAIT(wait); | 219 | DEFINE_WAIT(wait); |
223 | unsigned long jiffdelay; | 220 | long remaining; |
224 | int rc; | ||
225 | 221 | ||
226 | do { | 222 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
227 | dccp_pr_debug("delayed send by %d msec\n", delay); | 223 | sk->sk_write_pending++; |
228 | jiffdelay = msecs_to_jiffies(delay); | 224 | release_sock(sk); |
229 | 225 | ||
230 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); | 226 | remaining = schedule_timeout(delay); |
231 | 227 | ||
232 | sk->sk_write_pending++; | 228 | lock_sock(sk); |
233 | release_sock(sk); | 229 | sk->sk_write_pending--; |
234 | schedule_timeout(jiffdelay); | 230 | finish_wait(sk_sleep(sk), &wait); |
235 | lock_sock(sk); | ||
236 | sk->sk_write_pending--; | ||
237 | 231 | ||
238 | if (sk->sk_err) | 232 | if (signal_pending(current) || sk->sk_err) |
239 | goto do_error; | 233 | return -1; |
240 | if (signal_pending(current)) | 234 | return remaining; |
241 | goto do_interrupted; | 235 | } |
242 | 236 | ||
243 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 237 | /** |
244 | } while ((delay = rc) > 0); | 238 | * dccp_xmit_packet - Send data packet under control of CCID |
245 | out: | 239 | * Transmits next-queued payload and informs CCID to account for the packet. |
246 | finish_wait(sk_sleep(sk), &wait); | 240 | */ |
247 | return rc; | 241 | static void dccp_xmit_packet(struct sock *sk) |
248 | 242 | { | |
249 | do_error: | 243 | int err, len; |
250 | rc = -EPIPE; | 244 | struct dccp_sock *dp = dccp_sk(sk); |
251 | goto out; | 245 | struct sk_buff *skb = dccp_qpolicy_pop(sk); |
252 | do_interrupted: | 246 | |
253 | rc = -EINTR; | 247 | if (unlikely(skb == NULL)) |
254 | goto out; | 248 | return; |
249 | len = skb->len; | ||
250 | |||
251 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
252 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | ||
253 | /* | ||
254 | * See 8.1.5 - Handshake Completion. | ||
255 | * | ||
256 | * For robustness we resend Confirm options until the client has | ||
257 | * entered OPEN. During the initial feature negotiation, the MPS | ||
258 | * is smaller than usual, reduced by the Change/Confirm options. | ||
259 | */ | ||
260 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
261 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
262 | dccp_send_ack(sk); | ||
263 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
264 | } | ||
265 | |||
266 | inet_csk_schedule_ack(sk); | ||
267 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
268 | inet_csk(sk)->icsk_rto, | ||
269 | DCCP_RTO_MAX); | ||
270 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
271 | } else if (dccp_ack_pending(sk)) { | ||
272 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; | ||
273 | } else { | ||
274 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; | ||
275 | } | ||
276 | |||
277 | err = dccp_transmit_skb(sk, skb); | ||
278 | if (err) | ||
279 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); | ||
280 | /* | ||
281 | * Register this one as sent even if an error occurred. To the remote | ||
282 | * end a local packet drop is indistinguishable from network loss, i.e. | ||
283 | * any local drop will eventually be reported via receiver feedback. | ||
284 | */ | ||
285 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); | ||
286 | |||
287 | /* | ||
288 | * If the CCID needs to transfer additional header options out-of-band | ||
289 | * (e.g. Ack Vectors or feature-negotiation options), it activates this | ||
290 | * flag to schedule a Sync. The Sync will automatically incorporate all | ||
291 | * currently pending header options, thus clearing the backlog. | ||
292 | */ | ||
293 | if (dp->dccps_sync_scheduled) | ||
294 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
255 | } | 295 | } |
256 | 296 | ||
257 | void dccp_write_xmit(struct sock *sk, int block) | 297 | /** |
298 | * dccp_flush_write_queue - Drain queue at end of connection | ||
299 | * Since dccp_sendmsg queues packets without waiting for them to be sent, it may | ||
300 | * happen that the TX queue is not empty at the end of a connection. We give the | ||
301 | * HC-sender CCID a grace period of up to @time_budget jiffies. If this function | ||
302 | * returns with a non-empty write queue, it will be purged later. | ||
303 | */ | ||
304 | void dccp_flush_write_queue(struct sock *sk, long *time_budget) | ||
258 | { | 305 | { |
259 | struct dccp_sock *dp = dccp_sk(sk); | 306 | struct dccp_sock *dp = dccp_sk(sk); |
260 | struct sk_buff *skb; | 307 | struct sk_buff *skb; |
308 | long delay, rc; | ||
261 | 309 | ||
262 | while ((skb = skb_peek(&sk->sk_write_queue))) { | 310 | while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { |
263 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | 311 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); |
264 | |||
265 | if (err > 0) { | ||
266 | if (!block) { | ||
267 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | ||
268 | msecs_to_jiffies(err)+jiffies); | ||
269 | break; | ||
270 | } else | ||
271 | err = dccp_wait_for_ccid(sk, skb, err); | ||
272 | if (err && err != -EINTR) | ||
273 | DCCP_BUG("err=%d after dccp_wait_for_ccid", err); | ||
274 | } | ||
275 | 312 | ||
276 | skb_dequeue(&sk->sk_write_queue); | 313 | switch (ccid_packet_dequeue_eval(rc)) { |
277 | if (err == 0) { | 314 | case CCID_PACKET_WILL_DEQUEUE_LATER: |
278 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 315 | /* |
279 | const int len = skb->len; | 316 | * If the CCID determines when to send, the next sending |
280 | 317 | * time is unknown or the CCID may not even send again | |
281 | if (sk->sk_state == DCCP_PARTOPEN) { | 318 | * (e.g. remote host crashes or lost Ack packets). |
282 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; | 319 | */ |
283 | /* | 320 | DCCP_WARN("CCID did not manage to send all packets\n"); |
284 | * See 8.1.5 - Handshake Completion. | 321 | return; |
285 | * | 322 | case CCID_PACKET_DELAY: |
286 | * For robustness we resend Confirm options until the client has | 323 | delay = msecs_to_jiffies(rc); |
287 | * entered OPEN. During the initial feature negotiation, the MPS | 324 | if (delay > *time_budget) |
288 | * is smaller than usual, reduced by the Change/Confirm options. | 325 | return; |
289 | */ | 326 | rc = dccp_wait_for_ccid(sk, delay); |
290 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | 327 | if (rc < 0) |
291 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | 328 | return; |
292 | dccp_send_ack(sk); | 329 | *time_budget -= (delay - rc); |
293 | dccp_feat_list_purge(&dp->dccps_featneg); | 330 | /* check again if we can send now */ |
294 | } | 331 | break; |
295 | 332 | case CCID_PACKET_SEND_AT_ONCE: | |
296 | inet_csk_schedule_ack(sk); | 333 | dccp_xmit_packet(sk); |
297 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 334 | break; |
298 | inet_csk(sk)->icsk_rto, | 335 | case CCID_PACKET_ERR: |
299 | DCCP_RTO_MAX); | 336 | skb_dequeue(&sk->sk_write_queue); |
300 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
301 | } else if (dccp_ack_pending(sk)) | ||
302 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
303 | else | ||
304 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
305 | |||
306 | err = dccp_transmit_skb(sk, skb); | ||
307 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | ||
308 | if (err) | ||
309 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", | ||
310 | err); | ||
311 | } else { | ||
312 | dccp_pr_debug("packet discarded due to err=%d\n", err); | ||
313 | kfree_skb(skb); | 337 | kfree_skb(skb); |
338 | dccp_pr_debug("packet discarded due to err=%ld\n", rc); | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | |||
343 | void dccp_write_xmit(struct sock *sk) | ||
344 | { | ||
345 | struct dccp_sock *dp = dccp_sk(sk); | ||
346 | struct sk_buff *skb; | ||
347 | |||
348 | while ((skb = dccp_qpolicy_top(sk))) { | ||
349 | int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); | ||
350 | |||
351 | switch (ccid_packet_dequeue_eval(rc)) { | ||
352 | case CCID_PACKET_WILL_DEQUEUE_LATER: | ||
353 | return; | ||
354 | case CCID_PACKET_DELAY: | ||
355 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | ||
356 | jiffies + msecs_to_jiffies(rc)); | ||
357 | return; | ||
358 | case CCID_PACKET_SEND_AT_ONCE: | ||
359 | dccp_xmit_packet(sk); | ||
360 | break; | ||
361 | case CCID_PACKET_ERR: | ||
362 | dccp_qpolicy_drop(sk, skb); | ||
363 | dccp_pr_debug("packet discarded due to err=%d\n", rc); | ||
314 | } | 364 | } |
315 | } | 365 | } |
316 | } | 366 | } |
@@ -474,8 +524,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) | |||
474 | /* | 524 | /* |
475 | * Do all connect socket setups that can be done AF independent. | 525 | * Do all connect socket setups that can be done AF independent. |
476 | */ | 526 | */ |
477 | static inline void dccp_connect_init(struct sock *sk) | 527 | int dccp_connect(struct sock *sk) |
478 | { | 528 | { |
529 | struct sk_buff *skb; | ||
479 | struct dccp_sock *dp = dccp_sk(sk); | 530 | struct dccp_sock *dp = dccp_sk(sk); |
480 | struct dst_entry *dst = __sk_dst_get(sk); | 531 | struct dst_entry *dst = __sk_dst_get(sk); |
481 | struct inet_connection_sock *icsk = inet_csk(sk); | 532 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -485,22 +536,12 @@ static inline void dccp_connect_init(struct sock *sk) | |||
485 | 536 | ||
486 | dccp_sync_mss(sk, dst_mtu(dst)); | 537 | dccp_sync_mss(sk, dst_mtu(dst)); |
487 | 538 | ||
488 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ | ||
489 | dp->dccps_gar = dp->dccps_iss; | ||
490 | |||
491 | icsk->icsk_retransmits = 0; | ||
492 | } | ||
493 | |||
494 | int dccp_connect(struct sock *sk) | ||
495 | { | ||
496 | struct sk_buff *skb; | ||
497 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
498 | |||
499 | /* do not connect if feature negotiation setup fails */ | 539 | /* do not connect if feature negotiation setup fails */ |
500 | if (dccp_feat_finalise_settings(dccp_sk(sk))) | 540 | if (dccp_feat_finalise_settings(dccp_sk(sk))) |
501 | return -EPROTO; | 541 | return -EPROTO; |
502 | 542 | ||
503 | dccp_connect_init(sk); | 543 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ |
544 | dp->dccps_gar = dp->dccps_iss; | ||
504 | 545 | ||
505 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); | 546 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); |
506 | if (unlikely(skb == NULL)) | 547 | if (unlikely(skb == NULL)) |
@@ -516,6 +557,7 @@ int dccp_connect(struct sock *sk) | |||
516 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | 557 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); |
517 | 558 | ||
518 | /* Timer for repeating the REQUEST until an answer. */ | 559 | /* Timer for repeating the REQUEST until an answer. */ |
560 | icsk->icsk_retransmits = 0; | ||
519 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 561 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
520 | icsk->icsk_rto, DCCP_RTO_MAX); | 562 | icsk->icsk_rto, DCCP_RTO_MAX); |
521 | return 0; | 563 | return 0; |
@@ -602,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, | |||
602 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | 644 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; |
603 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; | 645 | DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; |
604 | 646 | ||
647 | /* | ||
648 | * Clear the flag in case the Sync was scheduled for out-of-band data, | ||
649 | * such as carrying a long Ack Vector. | ||
650 | */ | ||
651 | dccp_sk(sk)->dccps_sync_scheduled = 0; | ||
652 | |||
605 | dccp_transmit_skb(sk, skb); | 653 | dccp_transmit_skb(sk, skb); |
606 | } | 654 | } |
607 | 655 | ||
@@ -630,7 +678,6 @@ void dccp_send_close(struct sock *sk, const int active) | |||
630 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; | 678 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; |
631 | 679 | ||
632 | if (active) { | 680 | if (active) { |
633 | dccp_write_xmit(sk, 1); | ||
634 | dccp_skb_entail(sk, skb); | 681 | dccp_skb_entail(sk, skb); |
635 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | 682 | dccp_transmit_skb(sk, skb_clone(skb, prio)); |
636 | /* | 683 | /* |
diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 078e48d442fd..33d0e6297c21 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c | |||
@@ -149,6 +149,7 @@ static const struct file_operations dccpprobe_fops = { | |||
149 | .owner = THIS_MODULE, | 149 | .owner = THIS_MODULE, |
150 | .open = dccpprobe_open, | 150 | .open = dccpprobe_open, |
151 | .read = dccpprobe_read, | 151 | .read = dccpprobe_read, |
152 | .llseek = noop_llseek, | ||
152 | }; | 153 | }; |
153 | 154 | ||
154 | static __init int dccpprobe_init(void) | 155 | static __init int dccpprobe_init(void) |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 096250d1323b..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -50,6 +50,30 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo); | |||
50 | /* the maximum queue length for tx in packets. 0 is no limit */ | 50 | /* the maximum queue length for tx in packets. 0 is no limit */ |
51 | int sysctl_dccp_tx_qlen __read_mostly = 5; | 51 | int sysctl_dccp_tx_qlen __read_mostly = 5; |
52 | 52 | ||
53 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
54 | static const char *dccp_state_name(const int state) | ||
55 | { | ||
56 | static const char *const dccp_state_names[] = { | ||
57 | [DCCP_OPEN] = "OPEN", | ||
58 | [DCCP_REQUESTING] = "REQUESTING", | ||
59 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
60 | [DCCP_LISTEN] = "LISTEN", | ||
61 | [DCCP_RESPOND] = "RESPOND", | ||
62 | [DCCP_CLOSING] = "CLOSING", | ||
63 | [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", | ||
64 | [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", | ||
65 | [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", | ||
66 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
67 | [DCCP_CLOSED] = "CLOSED", | ||
68 | }; | ||
69 | |||
70 | if (state >= DCCP_MAX_STATES) | ||
71 | return "INVALID STATE!"; | ||
72 | else | ||
73 | return dccp_state_names[state]; | ||
74 | } | ||
75 | #endif | ||
76 | |||
53 | void dccp_set_state(struct sock *sk, const int state) | 77 | void dccp_set_state(struct sock *sk, const int state) |
54 | { | 78 | { |
55 | const int oldstate = sk->sk_state; | 79 | const int oldstate = sk->sk_state; |
@@ -146,30 +170,6 @@ const char *dccp_packet_name(const int type) | |||
146 | 170 | ||
147 | EXPORT_SYMBOL_GPL(dccp_packet_name); | 171 | EXPORT_SYMBOL_GPL(dccp_packet_name); |
148 | 172 | ||
149 | const char *dccp_state_name(const int state) | ||
150 | { | ||
151 | static const char *const dccp_state_names[] = { | ||
152 | [DCCP_OPEN] = "OPEN", | ||
153 | [DCCP_REQUESTING] = "REQUESTING", | ||
154 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
155 | [DCCP_LISTEN] = "LISTEN", | ||
156 | [DCCP_RESPOND] = "RESPOND", | ||
157 | [DCCP_CLOSING] = "CLOSING", | ||
158 | [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", | ||
159 | [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", | ||
160 | [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", | ||
161 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
162 | [DCCP_CLOSED] = "CLOSED", | ||
163 | }; | ||
164 | |||
165 | if (state >= DCCP_MAX_STATES) | ||
166 | return "INVALID STATE!"; | ||
167 | else | ||
168 | return dccp_state_names[state]; | ||
169 | } | ||
170 | |||
171 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
172 | |||
173 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | 173 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) |
174 | { | 174 | { |
175 | struct dccp_sock *dp = dccp_sk(sk); | 175 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | |||
185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 185 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; | 186 | dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; |
187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; | 187 | dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; |
188 | dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; | ||
188 | 189 | ||
189 | dccp_init_xmit_timers(sk); | 190 | dccp_init_xmit_timers(sk); |
190 | 191 | ||
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, | |||
532 | case DCCP_SOCKOPT_RECV_CSCOV: | 533 | case DCCP_SOCKOPT_RECV_CSCOV: |
533 | err = dccp_setsockopt_cscov(sk, val, true); | 534 | err = dccp_setsockopt_cscov(sk, val, true); |
534 | break; | 535 | break; |
536 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
537 | if (sk->sk_state != DCCP_CLOSED) | ||
538 | err = -EISCONN; | ||
539 | else if (val < 0 || val >= DCCPQ_POLICY_MAX) | ||
540 | err = -EINVAL; | ||
541 | else | ||
542 | dp->dccps_qpolicy = val; | ||
543 | break; | ||
544 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
545 | if (val < 0) | ||
546 | err = -EINVAL; | ||
547 | else | ||
548 | dp->dccps_tx_qlen = val; | ||
549 | break; | ||
535 | default: | 550 | default: |
536 | err = -ENOPROTOOPT; | 551 | err = -ENOPROTOOPT; |
537 | break; | 552 | break; |
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
639 | case DCCP_SOCKOPT_RECV_CSCOV: | 654 | case DCCP_SOCKOPT_RECV_CSCOV: |
640 | val = dp->dccps_pcrlen; | 655 | val = dp->dccps_pcrlen; |
641 | break; | 656 | break; |
657 | case DCCP_SOCKOPT_QPOLICY_ID: | ||
658 | val = dp->dccps_qpolicy; | ||
659 | break; | ||
660 | case DCCP_SOCKOPT_QPOLICY_TXQLEN: | ||
661 | val = dp->dccps_tx_qlen; | ||
662 | break; | ||
642 | case 128 ... 191: | 663 | case 128 ... 191: |
643 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, | 664 | return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, |
644 | len, (u32 __user *)optval, optlen); | 665 | len, (u32 __user *)optval, optlen); |
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, | |||
681 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); | 702 | EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); |
682 | #endif | 703 | #endif |
683 | 704 | ||
705 | static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) | ||
706 | { | ||
707 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); | ||
708 | |||
709 | /* | ||
710 | * Assign an (opaque) qpolicy priority value to skb->priority. | ||
711 | * | ||
712 | * We are overloading this skb field for use with the qpolicy subystem. | ||
713 | * The skb->priority is normally used for the SO_PRIORITY option, which | ||
714 | * is initialised from sk_priority. Since the assignment of sk_priority | ||
715 | * to skb->priority happens later (on layer 3), we overload this field | ||
716 | * for use with queueing priorities as long as the skb is on layer 4. | ||
717 | * The default priority value (if nothing is set) is 0. | ||
718 | */ | ||
719 | skb->priority = 0; | ||
720 | |||
721 | for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
722 | |||
723 | if (!CMSG_OK(msg, cmsg)) | ||
724 | return -EINVAL; | ||
725 | |||
726 | if (cmsg->cmsg_level != SOL_DCCP) | ||
727 | continue; | ||
728 | |||
729 | if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && | ||
730 | !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) | ||
731 | return -EINVAL; | ||
732 | |||
733 | switch (cmsg->cmsg_type) { | ||
734 | case DCCP_SCM_PRIORITY: | ||
735 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) | ||
736 | return -EINVAL; | ||
737 | skb->priority = *(__u32 *)CMSG_DATA(cmsg); | ||
738 | break; | ||
739 | default: | ||
740 | return -EINVAL; | ||
741 | } | ||
742 | } | ||
743 | return 0; | ||
744 | } | ||
745 | |||
684 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 746 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
685 | size_t len) | 747 | size_t len) |
686 | { | 748 | { |
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
696 | 758 | ||
697 | lock_sock(sk); | 759 | lock_sock(sk); |
698 | 760 | ||
699 | if (sysctl_dccp_tx_qlen && | 761 | if (dccp_qpolicy_full(sk)) { |
700 | (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { | ||
701 | rc = -EAGAIN; | 762 | rc = -EAGAIN; |
702 | goto out_release; | 763 | goto out_release; |
703 | } | 764 | } |
@@ -725,8 +786,18 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
725 | if (rc != 0) | 786 | if (rc != 0) |
726 | goto out_discard; | 787 | goto out_discard; |
727 | 788 | ||
728 | skb_queue_tail(&sk->sk_write_queue, skb); | 789 | rc = dccp_msghdr_parse(msg, skb); |
729 | dccp_write_xmit(sk,0); | 790 | if (rc != 0) |
791 | goto out_discard; | ||
792 | |||
793 | dccp_qpolicy_push(sk, skb); | ||
794 | /* | ||
795 | * The xmit_timer is set if the TX CCID is rate-based and will expire | ||
796 | * when congestion control permits to release further packets into the | ||
797 | * network. Window-based CCIDs do not use this timer. | ||
798 | */ | ||
799 | if (!timer_pending(&dp->dccps_xmit_timer)) | ||
800 | dccp_write_xmit(sk); | ||
730 | out_release: | 801 | out_release: |
731 | release_sock(sk); | 802 | release_sock(sk); |
732 | return rc ? : len; | 803 | return rc ? : len; |
@@ -944,16 +1015,29 @@ void dccp_close(struct sock *sk, long timeout) | |||
944 | 1015 | ||
945 | if (data_was_unread) { | 1016 | if (data_was_unread) { |
946 | /* Unread data was tossed, send an appropriate Reset Code */ | 1017 | /* Unread data was tossed, send an appropriate Reset Code */ |
947 | DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); | 1018 | DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); |
948 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); | 1019 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); |
949 | dccp_set_state(sk, DCCP_CLOSED); | 1020 | dccp_set_state(sk, DCCP_CLOSED); |
950 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | 1021 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { |
951 | /* Check zero linger _after_ checking for unread data. */ | 1022 | /* Check zero linger _after_ checking for unread data. */ |
952 | sk->sk_prot->disconnect(sk, 0); | 1023 | sk->sk_prot->disconnect(sk, 0); |
953 | } else if (sk->sk_state != DCCP_CLOSED) { | 1024 | } else if (sk->sk_state != DCCP_CLOSED) { |
1025 | /* | ||
1026 | * Normal connection termination. May need to wait if there are | ||
1027 | * still packets in the TX queue that are delayed by the CCID. | ||
1028 | */ | ||
1029 | dccp_flush_write_queue(sk, &timeout); | ||
954 | dccp_terminate_connection(sk); | 1030 | dccp_terminate_connection(sk); |
955 | } | 1031 | } |
956 | 1032 | ||
1033 | /* | ||
1034 | * Flush write queue. This may be necessary in several cases: | ||
1035 | * - we have been closed by the peer but still have application data; | ||
1036 | * - abortive termination (unread data or zero linger time), | ||
1037 | * - normal termination but queue could not be flushed within time limit | ||
1038 | */ | ||
1039 | __skb_queue_purge(&sk->sk_write_queue); | ||
1040 | |||
957 | sk_stream_wait_close(sk, timeout); | 1041 | sk_stream_wait_close(sk, timeout); |
958 | 1042 | ||
959 | adjudge_to_death: | 1043 | adjudge_to_death: |
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/qpolicy.c | ||
3 | * | ||
4 | * Policy-based packet dequeueing interface for DCCP. | ||
5 | * | ||
6 | * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License v2 | ||
10 | * as published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include "dccp.h" | ||
13 | |||
14 | /* | ||
15 | * Simple Dequeueing Policy: | ||
16 | * If tx_qlen is different from 0, enqueue up to tx_qlen elements. | ||
17 | */ | ||
18 | static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) | ||
19 | { | ||
20 | skb_queue_tail(&sk->sk_write_queue, skb); | ||
21 | } | ||
22 | |||
23 | static bool qpolicy_simple_full(struct sock *sk) | ||
24 | { | ||
25 | return dccp_sk(sk)->dccps_tx_qlen && | ||
26 | sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; | ||
27 | } | ||
28 | |||
29 | static struct sk_buff *qpolicy_simple_top(struct sock *sk) | ||
30 | { | ||
31 | return skb_peek(&sk->sk_write_queue); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * Priority-based Dequeueing Policy: | ||
36 | * If tx_qlen is different from 0 and the queue has reached its upper bound | ||
37 | * of tx_qlen elements, replace older packets lowest-priority-first. | ||
38 | */ | ||
39 | static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) | ||
40 | { | ||
41 | struct sk_buff *skb, *best = NULL; | ||
42 | |||
43 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
44 | if (best == NULL || skb->priority > best->priority) | ||
45 | best = skb; | ||
46 | return best; | ||
47 | } | ||
48 | |||
49 | static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) | ||
50 | { | ||
51 | struct sk_buff *skb, *worst = NULL; | ||
52 | |||
53 | skb_queue_walk(&sk->sk_write_queue, skb) | ||
54 | if (worst == NULL || skb->priority < worst->priority) | ||
55 | worst = skb; | ||
56 | return worst; | ||
57 | } | ||
58 | |||
59 | static bool qpolicy_prio_full(struct sock *sk) | ||
60 | { | ||
61 | if (qpolicy_simple_full(sk)) | ||
62 | dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); | ||
63 | return false; | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface | ||
68 | * @push: add a new @skb to the write queue | ||
69 | * @full: indicates that no more packets will be admitted | ||
70 | * @top: peeks at whatever the queueing policy defines as its `top' | ||
71 | */ | ||
72 | static struct dccp_qpolicy_operations { | ||
73 | void (*push) (struct sock *sk, struct sk_buff *skb); | ||
74 | bool (*full) (struct sock *sk); | ||
75 | struct sk_buff* (*top) (struct sock *sk); | ||
76 | __be32 params; | ||
77 | |||
78 | } qpol_table[DCCPQ_POLICY_MAX] = { | ||
79 | [DCCPQ_POLICY_SIMPLE] = { | ||
80 | .push = qpolicy_simple_push, | ||
81 | .full = qpolicy_simple_full, | ||
82 | .top = qpolicy_simple_top, | ||
83 | .params = 0, | ||
84 | }, | ||
85 | [DCCPQ_POLICY_PRIO] = { | ||
86 | .push = qpolicy_simple_push, | ||
87 | .full = qpolicy_prio_full, | ||
88 | .top = qpolicy_prio_best_skb, | ||
89 | .params = DCCP_SCM_PRIORITY, | ||
90 | }, | ||
91 | }; | ||
92 | |||
93 | /* | ||
94 | * Externally visible interface | ||
95 | */ | ||
96 | void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) | ||
97 | { | ||
98 | qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); | ||
99 | } | ||
100 | |||
101 | bool dccp_qpolicy_full(struct sock *sk) | ||
102 | { | ||
103 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); | ||
104 | } | ||
105 | |||
106 | void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) | ||
107 | { | ||
108 | if (skb != NULL) { | ||
109 | skb_unlink(skb, &sk->sk_write_queue); | ||
110 | kfree_skb(skb); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | struct sk_buff *dccp_qpolicy_top(struct sock *sk) | ||
115 | { | ||
116 | return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); | ||
117 | } | ||
118 | |||
119 | struct sk_buff *dccp_qpolicy_pop(struct sock *sk) | ||
120 | { | ||
121 | struct sk_buff *skb = dccp_qpolicy_top(sk); | ||
122 | |||
123 | if (skb != NULL) { | ||
124 | /* Clear any skb fields that we used internally */ | ||
125 | skb->priority = 0; | ||
126 | skb_unlink(skb, &sk->sk_write_queue); | ||
127 | } | ||
128 | return skb; | ||
129 | } | ||
130 | |||
131 | bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) | ||
132 | { | ||
133 | /* check if exactly one bit is set */ | ||
134 | if (!param || (param & (param - 1))) | ||
135 | return false; | ||
136 | return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; | ||
137 | } | ||
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 563943822e58..42348824ee31 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c | |||
@@ -21,7 +21,8 @@ | |||
21 | /* Boundary values */ | 21 | /* Boundary values */ |
22 | static int zero = 0, | 22 | static int zero = 0, |
23 | u8_max = 0xFF; | 23 | u8_max = 0xFF; |
24 | static unsigned long seqw_min = 32; | 24 | static unsigned long seqw_min = DCCPF_SEQ_WMIN, |
25 | seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ | ||
25 | 26 | ||
26 | static struct ctl_table dccp_default_table[] = { | 27 | static struct ctl_table dccp_default_table[] = { |
27 | { | 28 | { |
@@ -31,6 +32,7 @@ static struct ctl_table dccp_default_table[] = { | |||
31 | .mode = 0644, | 32 | .mode = 0644, |
32 | .proc_handler = proc_doulongvec_minmax, | 33 | .proc_handler = proc_doulongvec_minmax, |
33 | .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ | 34 | .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ |
35 | .extra2 = &seqw_max, | ||
34 | }, | 36 | }, |
35 | { | 37 | { |
36 | .procname = "rx_ccid", | 38 | .procname = "rx_ccid", |
diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1a9aa05d4dc4..7587870b7040 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c | |||
@@ -237,32 +237,35 @@ out: | |||
237 | sock_put(sk); | 237 | sock_put(sk); |
238 | } | 238 | } |
239 | 239 | ||
240 | /* Transmit-delay timer: used by the CCIDs to delay actual send time */ | 240 | /** |
241 | static void dccp_write_xmit_timer(unsigned long data) | 241 | * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface |
242 | * See the comments above %ccid_dequeueing_decision for supported modes. | ||
243 | */ | ||
244 | static void dccp_write_xmitlet(unsigned long data) | ||
242 | { | 245 | { |
243 | struct sock *sk = (struct sock *)data; | 246 | struct sock *sk = (struct sock *)data; |
244 | struct dccp_sock *dp = dccp_sk(sk); | ||
245 | 247 | ||
246 | bh_lock_sock(sk); | 248 | bh_lock_sock(sk); |
247 | if (sock_owned_by_user(sk)) | 249 | if (sock_owned_by_user(sk)) |
248 | sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); | 250 | sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); |
249 | else | 251 | else |
250 | dccp_write_xmit(sk, 0); | 252 | dccp_write_xmit(sk); |
251 | bh_unlock_sock(sk); | 253 | bh_unlock_sock(sk); |
252 | sock_put(sk); | ||
253 | } | 254 | } |
254 | 255 | ||
255 | static void dccp_init_write_xmit_timer(struct sock *sk) | 256 | static void dccp_write_xmit_timer(unsigned long data) |
256 | { | 257 | { |
257 | struct dccp_sock *dp = dccp_sk(sk); | 258 | dccp_write_xmitlet(data); |
258 | 259 | sock_put((struct sock *)data); | |
259 | setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, | ||
260 | (unsigned long)sk); | ||
261 | } | 260 | } |
262 | 261 | ||
263 | void dccp_init_xmit_timers(struct sock *sk) | 262 | void dccp_init_xmit_timers(struct sock *sk) |
264 | { | 263 | { |
265 | dccp_init_write_xmit_timer(sk); | 264 | struct dccp_sock *dp = dccp_sk(sk); |
265 | |||
266 | tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); | ||
267 | setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, | ||
268 | (unsigned long)sk); | ||
266 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | 269 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, |
267 | &dccp_keepalive_timer); | 270 | &dccp_keepalive_timer); |
268 | } | 271 | } |