diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 14:47:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 14:47:02 -0400 |
commit | 5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0 (patch) | |
tree | 7851ef1c93aa1aba7ef327ca4b75fd35e6d10f29 /net/dccp | |
parent | 02f36038c568111ad4fc433f6fa760ff5e38fab4 (diff) | |
parent | ec37a48d1d16c30b655ac5280209edf52a6775d4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1699 commits)
bnx2/bnx2x: Unsupported Ethtool operations should return -EINVAL.
vlan: Calling vlan_hwaccel_do_receive() is always valid.
tproxy: use the interface primary IP address as a default value for --on-ip
tproxy: added IPv6 support to the socket match
cxgb3: function namespace cleanup
tproxy: added IPv6 support to the TPROXY target
tproxy: added IPv6 socket lookup function to nf_tproxy_core
be2net: Changes to use only priority codes allowed by f/w
tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled
tproxy: added tproxy sockopt interface in the IPV6 layer
tproxy: added udp6_lib_lookup function
tproxy: added const specifiers to udp lookup functions
tproxy: split off ipv6 defragmentation to a separate module
l2tp: small cleanup
nf_nat: restrict ICMP translation for embedded header
can: mcp251x: fix generation of error frames
can: mcp251x: fix endless loop in interrupt handler if CANINTF_MERRF is set
can-raw: add msg_flags to distinguish local traffic
9p: client code cleanup
rds: make local functions/variables static
...
Fix up conflicts in net/core/dev.c, drivers/net/pcmcia/smc91c92_cs.c and
drivers/net/wireless/ath/ath9k/debug.c as per David
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/ccid.h | 52 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 31 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 289 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.h | 35 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 256 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 51 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 2 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 39 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 22 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 1 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 14 | ||||
-rw-r--r-- | net/dccp/dccp.h | 46 | ||||
-rw-r--r-- | net/dccp/feat.c | 10 | ||||
-rw-r--r-- | net/dccp/feat.h | 1 | ||||
-rw-r--r-- | net/dccp/input.c | 20 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 10 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 10 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 30 | ||||
-rw-r--r-- | net/dccp/options.c | 31 | ||||
-rw-r--r-- | net/dccp/output.c | 20 | ||||
-rw-r--r-- | net/dccp/proto.c | 50 |
21 files changed, 410 insertions, 610 deletions
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6df6f8ac9636..117fb093dcaf 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
@@ -62,22 +62,18 @@ struct ccid_operations { | |||
62 | void (*ccid_hc_tx_exit)(struct sock *sk); | 62 | void (*ccid_hc_tx_exit)(struct sock *sk); |
63 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, | 63 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, |
64 | struct sk_buff *skb); | 64 | struct sk_buff *skb); |
65 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | 65 | int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt, |
66 | unsigned char option, | 66 | u8 opt, u8 *val, u8 len); |
67 | unsigned char len, u16 idx, | ||
68 | unsigned char* value); | ||
69 | int (*ccid_hc_rx_insert_options)(struct sock *sk, | 67 | int (*ccid_hc_rx_insert_options)(struct sock *sk, |
70 | struct sk_buff *skb); | 68 | struct sk_buff *skb); |
71 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, | 69 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, |
72 | struct sk_buff *skb); | 70 | struct sk_buff *skb); |
73 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | 71 | int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt, |
74 | unsigned char option, | 72 | u8 opt, u8 *val, u8 len); |
75 | unsigned char len, u16 idx, | ||
76 | unsigned char* value); | ||
77 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | 73 | int (*ccid_hc_tx_send_packet)(struct sock *sk, |
78 | struct sk_buff *skb); | 74 | struct sk_buff *skb); |
79 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, | 75 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, |
80 | int more, unsigned int len); | 76 | unsigned int len); |
81 | void (*ccid_hc_rx_get_info)(struct sock *sk, | 77 | void (*ccid_hc_rx_get_info)(struct sock *sk, |
82 | struct tcp_info *info); | 78 | struct tcp_info *info); |
83 | void (*ccid_hc_tx_get_info)(struct sock *sk, | 79 | void (*ccid_hc_tx_get_info)(struct sock *sk, |
@@ -148,10 +144,10 @@ static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | |||
148 | } | 144 | } |
149 | 145 | ||
150 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | 146 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, |
151 | int more, unsigned int len) | 147 | unsigned int len) |
152 | { | 148 | { |
153 | if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) | 149 | if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) |
154 | ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); | 150 | ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len); |
155 | } | 151 | } |
156 | 152 | ||
157 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | 153 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, |
@@ -168,27 +164,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | |||
168 | ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); | 164 | ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); |
169 | } | 165 | } |
170 | 166 | ||
167 | /** | ||
168 | * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver | ||
169 | * @pkt: type of packet that @opt appears on (RFC 4340, 5.1) | ||
170 | * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3) | ||
171 | * @val: value of @opt | ||
172 | * @len: length of @val in bytes | ||
173 | */ | ||
171 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | 174 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, |
172 | unsigned char option, | 175 | u8 pkt, u8 opt, u8 *val, u8 len) |
173 | unsigned char len, u16 idx, | ||
174 | unsigned char* value) | ||
175 | { | 176 | { |
176 | int rc = 0; | 177 | if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) |
177 | if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) | 178 | return 0; |
178 | rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, | 179 | return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); |
179 | value); | ||
180 | return rc; | ||
181 | } | 180 | } |
182 | 181 | ||
182 | /** | ||
183 | * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender | ||
184 | * Arguments are analogous to ccid_hc_tx_parse_options() | ||
185 | */ | ||
183 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | 186 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, |
184 | unsigned char option, | 187 | u8 pkt, u8 opt, u8 *val, u8 len) |
185 | unsigned char len, u16 idx, | ||
186 | unsigned char* value) | ||
187 | { | 188 | { |
188 | int rc = 0; | 189 | if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) |
189 | if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) | 190 | return 0; |
190 | rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); | 191 | return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); |
191 | return rc; | ||
192 | } | 192 | } |
193 | 193 | ||
194 | static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | 194 | static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, |
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 8408398cd44e..0581143cb800 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig | |||
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG | |||
47 | 47 | ||
48 | If in doubt, say N. | 48 | If in doubt, say N. |
49 | 49 | ||
50 | config IP_DCCP_CCID3_RTO | ||
51 | int "Use higher bound for nofeedback timer" | ||
52 | default 100 | ||
53 | depends on IP_DCCP_CCID3 && EXPERIMENTAL | ||
54 | ---help--- | ||
55 | Use higher lower bound for nofeedback timer expiration. | ||
56 | |||
57 | The TFRC nofeedback timer normally expires after the maximum of 4 | ||
58 | RTTs and twice the current send interval (RFC 3448, 4.3). On LANs | ||
59 | with a small RTT this can mean a high processing load and reduced | ||
60 | performance, since then the nofeedback timer is triggered very | ||
61 | frequently. | ||
62 | |||
63 | This option enables to set a higher lower bound for the nofeedback | ||
64 | value. Values in units of milliseconds can be set here. | ||
65 | |||
66 | A value of 0 disables this feature by enforcing the value specified | ||
67 | in RFC 3448. The following values have been suggested as bounds for | ||
68 | experimental use: | ||
69 | * 16-20ms to match the typical multimedia inter-frame interval | ||
70 | * 100ms as a reasonable compromise [default] | ||
71 | * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4) | ||
72 | |||
73 | The default of 100ms is a compromise between a large value for | ||
74 | efficient DCCP implementations, and a small value to avoid disrupting | ||
75 | the network in times of congestion. | ||
76 | |||
77 | The purpose of the nofeedback timer is to slow DCCP down when there | ||
78 | is serious network congestion: experimenting with larger values should | ||
79 | therefore not be performed on WANs. | ||
80 | |||
81 | config IP_DCCP_TFRC_LIB | 50 | config IP_DCCP_TFRC_LIB |
82 | def_bool y if IP_DCCP_CCID3 | 51 | def_bool y if IP_DCCP_CCID3 |
83 | 52 | ||
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9b3ae9922be1..d850e291f87c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -25,59 +25,14 @@ | |||
25 | */ | 25 | */ |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include "../feat.h" | 27 | #include "../feat.h" |
28 | #include "../ccid.h" | ||
29 | #include "../dccp.h" | ||
30 | #include "ccid2.h" | 28 | #include "ccid2.h" |
31 | 29 | ||
32 | 30 | ||
33 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 31 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
34 | static int ccid2_debug; | 32 | static int ccid2_debug; |
35 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) | 33 | #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) |
36 | |||
37 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc) | ||
38 | { | ||
39 | int len = 0; | ||
40 | int pipe = 0; | ||
41 | struct ccid2_seq *seqp = hc->tx_seqh; | ||
42 | |||
43 | /* there is data in the chain */ | ||
44 | if (seqp != hc->tx_seqt) { | ||
45 | seqp = seqp->ccid2s_prev; | ||
46 | len++; | ||
47 | if (!seqp->ccid2s_acked) | ||
48 | pipe++; | ||
49 | |||
50 | while (seqp != hc->tx_seqt) { | ||
51 | struct ccid2_seq *prev = seqp->ccid2s_prev; | ||
52 | |||
53 | len++; | ||
54 | if (!prev->ccid2s_acked) | ||
55 | pipe++; | ||
56 | |||
57 | /* packets are sent sequentially */ | ||
58 | BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, | ||
59 | prev->ccid2s_seq ) >= 0); | ||
60 | BUG_ON(time_before(seqp->ccid2s_sent, | ||
61 | prev->ccid2s_sent)); | ||
62 | |||
63 | seqp = prev; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | BUG_ON(pipe != hc->tx_pipe); | ||
68 | ccid2_pr_debug("len of chain=%d\n", len); | ||
69 | |||
70 | do { | ||
71 | seqp = seqp->ccid2s_prev; | ||
72 | len++; | ||
73 | } while (seqp != hc->tx_seqh); | ||
74 | |||
75 | ccid2_pr_debug("total len=%d\n", len); | ||
76 | BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN); | ||
77 | } | ||
78 | #else | 34 | #else |
79 | #define ccid2_pr_debug(format, a...) | 35 | #define ccid2_pr_debug(format, a...) |
80 | #define ccid2_hc_tx_check_sanity(hc) | ||
81 | #endif | 36 | #endif |
82 | 37 | ||
83 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) | 38 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) |
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) | |||
156 | dp->dccps_l_ack_ratio = val; | 111 | dp->dccps_l_ack_ratio = val; |
157 | } | 112 | } |
158 | 113 | ||
159 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val) | ||
160 | { | ||
161 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
162 | hc->tx_srtt = val; | ||
163 | } | ||
164 | |||
165 | static void ccid2_start_rto_timer(struct sock *sk); | ||
166 | |||
167 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 114 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
168 | { | 115 | { |
169 | struct sock *sk = (struct sock *)data; | 116 | struct sock *sk = (struct sock *)data; |
170 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 117 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
171 | long s; | ||
172 | 118 | ||
173 | bh_lock_sock(sk); | 119 | bh_lock_sock(sk); |
174 | if (sock_owned_by_user(sk)) { | 120 | if (sock_owned_by_user(sk)) { |
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
178 | 124 | ||
179 | ccid2_pr_debug("RTO_EXPIRE\n"); | 125 | ccid2_pr_debug("RTO_EXPIRE\n"); |
180 | 126 | ||
181 | ccid2_hc_tx_check_sanity(hc); | ||
182 | |||
183 | /* back-off timer */ | 127 | /* back-off timer */ |
184 | hc->tx_rto <<= 1; | 128 | hc->tx_rto <<= 1; |
129 | if (hc->tx_rto > DCCP_RTO_MAX) | ||
130 | hc->tx_rto = DCCP_RTO_MAX; | ||
185 | 131 | ||
186 | s = hc->tx_rto / HZ; | 132 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
187 | if (s > 60) | ||
188 | hc->tx_rto = 60 * HZ; | ||
189 | |||
190 | ccid2_start_rto_timer(sk); | ||
191 | 133 | ||
192 | /* adjust pipe, cwnd etc */ | 134 | /* adjust pipe, cwnd etc */ |
193 | hc->tx_ssthresh = hc->tx_cwnd / 2; | 135 | hc->tx_ssthresh = hc->tx_cwnd / 2; |
194 | if (hc->tx_ssthresh < 2) | 136 | if (hc->tx_ssthresh < 2) |
195 | hc->tx_ssthresh = 2; | 137 | hc->tx_ssthresh = 2; |
196 | hc->tx_cwnd = 1; | 138 | hc->tx_cwnd = 1; |
197 | hc->tx_pipe = 0; | 139 | hc->tx_pipe = 0; |
198 | 140 | ||
199 | /* clear state about stuff we sent */ | 141 | /* clear state about stuff we sent */ |
200 | hc->tx_seqt = hc->tx_seqh; | 142 | hc->tx_seqt = hc->tx_seqh; |
@@ -204,23 +146,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
204 | hc->tx_rpseq = 0; | 146 | hc->tx_rpseq = 0; |
205 | hc->tx_rpdupack = -1; | 147 | hc->tx_rpdupack = -1; |
206 | ccid2_change_l_ack_ratio(sk, 1); | 148 | ccid2_change_l_ack_ratio(sk, 1); |
207 | ccid2_hc_tx_check_sanity(hc); | ||
208 | out: | 149 | out: |
209 | bh_unlock_sock(sk); | 150 | bh_unlock_sock(sk); |
210 | sock_put(sk); | 151 | sock_put(sk); |
211 | } | 152 | } |
212 | 153 | ||
213 | static void ccid2_start_rto_timer(struct sock *sk) | 154 | static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
214 | { | ||
215 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
216 | |||
217 | ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); | ||
218 | |||
219 | BUG_ON(timer_pending(&hc->tx_rtotimer)); | ||
220 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
221 | } | ||
222 | |||
223 | static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | ||
224 | { | 155 | { |
225 | struct dccp_sock *dp = dccp_sk(sk); | 156 | struct dccp_sock *dp = dccp_sk(sk); |
226 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 157 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
230 | 161 | ||
231 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; | 162 | hc->tx_seqh->ccid2s_seq = dp->dccps_gss; |
232 | hc->tx_seqh->ccid2s_acked = 0; | 163 | hc->tx_seqh->ccid2s_acked = 0; |
233 | hc->tx_seqh->ccid2s_sent = jiffies; | 164 | hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; |
234 | 165 | ||
235 | next = hc->tx_seqh->ccid2s_next; | 166 | next = hc->tx_seqh->ccid2s_next; |
236 | /* check if we need to alloc more space */ | 167 | /* check if we need to alloc more space */ |
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) | |||
296 | } | 227 | } |
297 | #endif | 228 | #endif |
298 | 229 | ||
299 | /* setup RTO timer */ | 230 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); |
300 | if (!timer_pending(&hc->tx_rtotimer)) | ||
301 | ccid2_start_rto_timer(sk); | ||
302 | 231 | ||
303 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG | 232 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
304 | do { | 233 | do { |
305 | struct ccid2_seq *seqp = hc->tx_seqt; | 234 | struct ccid2_seq *seqp = hc->tx_seqt; |
306 | 235 | ||
307 | while (seqp != hc->tx_seqh) { | 236 | while (seqp != hc->tx_seqh) { |
308 | ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", | 237 | ccid2_pr_debug("out seq=%llu acked=%d time=%u\n", |
309 | (unsigned long long)seqp->ccid2s_seq, | 238 | (unsigned long long)seqp->ccid2s_seq, |
310 | seqp->ccid2s_acked, seqp->ccid2s_sent); | 239 | seqp->ccid2s_acked, seqp->ccid2s_sent); |
311 | seqp = seqp->ccid2s_next; | 240 | seqp = seqp->ccid2s_next; |
312 | } | 241 | } |
313 | } while (0); | 242 | } while (0); |
314 | ccid2_pr_debug("=========\n"); | 243 | ccid2_pr_debug("=========\n"); |
315 | ccid2_hc_tx_check_sanity(hc); | ||
316 | #endif | 244 | #endif |
317 | } | 245 | } |
318 | 246 | ||
@@ -378,17 +306,87 @@ out_invalid_option: | |||
378 | return -1; | 306 | return -1; |
379 | } | 307 | } |
380 | 308 | ||
381 | static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) | 309 | /** |
310 | * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm | ||
311 | * This code is almost identical with TCP's tcp_rtt_estimator(), since | ||
312 | * - it has a higher sampling frequency (recommended by RFC 1323), | ||
313 | * - the RTO does not collapse into RTT due to RTTVAR going towards zero, | ||
314 | * - it is simple (cf. more complex proposals such as Eifel timer or research | ||
315 | * which suggests that the gain should be set according to window size), | ||
316 | * - in tests it was found to work well with CCID2 [gerrit]. | ||
317 | */ | ||
318 | static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) | ||
382 | { | 319 | { |
383 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 320 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
321 | long m = mrtt ? : 1; | ||
384 | 322 | ||
385 | sk_stop_timer(sk, &hc->tx_rtotimer); | 323 | if (hc->tx_srtt == 0) { |
386 | ccid2_pr_debug("deleted RTO timer\n"); | 324 | /* First measurement m */ |
325 | hc->tx_srtt = m << 3; | ||
326 | hc->tx_mdev = m << 1; | ||
327 | |||
328 | hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk)); | ||
329 | hc->tx_rttvar = hc->tx_mdev_max; | ||
330 | |||
331 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
332 | } else { | ||
333 | /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ | ||
334 | m -= (hc->tx_srtt >> 3); | ||
335 | hc->tx_srtt += m; | ||
336 | |||
337 | /* Similarly, update scaled mdev with regard to |m| */ | ||
338 | if (m < 0) { | ||
339 | m = -m; | ||
340 | m -= (hc->tx_mdev >> 2); | ||
341 | /* | ||
342 | * This neutralises RTO increase when RTT < SRTT - mdev | ||
343 | * (see P. Sarolahti, A. Kuznetsov,"Congestion Control | ||
344 | * in Linux TCP", USENIX 2002, pp. 49-62). | ||
345 | */ | ||
346 | if (m > 0) | ||
347 | m >>= 3; | ||
348 | } else { | ||
349 | m -= (hc->tx_mdev >> 2); | ||
350 | } | ||
351 | hc->tx_mdev += m; | ||
352 | |||
353 | if (hc->tx_mdev > hc->tx_mdev_max) { | ||
354 | hc->tx_mdev_max = hc->tx_mdev; | ||
355 | if (hc->tx_mdev_max > hc->tx_rttvar) | ||
356 | hc->tx_rttvar = hc->tx_mdev_max; | ||
357 | } | ||
358 | |||
359 | /* | ||
360 | * Decay RTTVAR at most once per flight, exploiting that | ||
361 | * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) | ||
362 | * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) | ||
363 | * GAR is a useful bound for FlightSize = pipe. | ||
364 | * AWL is probably too low here, as it over-estimates pipe. | ||
365 | */ | ||
366 | if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) { | ||
367 | if (hc->tx_mdev_max < hc->tx_rttvar) | ||
368 | hc->tx_rttvar -= (hc->tx_rttvar - | ||
369 | hc->tx_mdev_max) >> 2; | ||
370 | hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss; | ||
371 | hc->tx_mdev_max = tcp_rto_min(sk); | ||
372 | } | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * Set RTO from SRTT and RTTVAR | ||
377 | * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms. | ||
378 | * This agrees with RFC 4341, 5: | ||
379 | * "Because DCCP does not retransmit data, DCCP does not require | ||
380 | * TCP's recommended minimum timeout of one second". | ||
381 | */ | ||
382 | hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar; | ||
383 | |||
384 | if (hc->tx_rto > DCCP_RTO_MAX) | ||
385 | hc->tx_rto = DCCP_RTO_MAX; | ||
387 | } | 386 | } |
388 | 387 | ||
389 | static inline void ccid2_new_ack(struct sock *sk, | 388 | static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, |
390 | struct ccid2_seq *seqp, | 389 | unsigned int *maxincr) |
391 | unsigned int *maxincr) | ||
392 | { | 390 | { |
393 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 391 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
394 | 392 | ||
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
402 | hc->tx_cwnd += 1; | 400 | hc->tx_cwnd += 1; |
403 | hc->tx_packets_acked = 0; | 401 | hc->tx_packets_acked = 0; |
404 | } | 402 | } |
405 | 403 | /* | |
406 | /* update RTO */ | 404 | * FIXME: RTT is sampled several times per acknowledgment (for each |
407 | if (hc->tx_srtt == -1 || | 405 | * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). |
408 | time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { | 406 | * This causes the RTT to be over-estimated, since the older entries |
409 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; | 407 | * in the Ack Vector have earlier sending times. |
410 | int s; | 408 | * The cleanest solution is to not use the ccid2s_sent field at all |
411 | 409 | * and instead use DCCP timestamps: requires changes in other places. | |
412 | /* first measurement */ | 410 | */ |
413 | if (hc->tx_srtt == -1) { | 411 | ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent); |
414 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | ||
415 | r, jiffies, | ||
416 | (unsigned long long)seqp->ccid2s_seq); | ||
417 | ccid2_change_srtt(hc, r); | ||
418 | hc->tx_rttvar = r >> 1; | ||
419 | } else { | ||
420 | /* RTTVAR */ | ||
421 | long tmp = hc->tx_srtt - r; | ||
422 | long srtt; | ||
423 | |||
424 | if (tmp < 0) | ||
425 | tmp *= -1; | ||
426 | |||
427 | tmp >>= 2; | ||
428 | hc->tx_rttvar *= 3; | ||
429 | hc->tx_rttvar >>= 2; | ||
430 | hc->tx_rttvar += tmp; | ||
431 | |||
432 | /* SRTT */ | ||
433 | srtt = hc->tx_srtt; | ||
434 | srtt *= 7; | ||
435 | srtt >>= 3; | ||
436 | tmp = r >> 3; | ||
437 | srtt += tmp; | ||
438 | ccid2_change_srtt(hc, srtt); | ||
439 | } | ||
440 | s = hc->tx_rttvar << 2; | ||
441 | /* clock granularity is 1 when based on jiffies */ | ||
442 | if (!s) | ||
443 | s = 1; | ||
444 | hc->tx_rto = hc->tx_srtt + s; | ||
445 | |||
446 | /* must be at least a second */ | ||
447 | s = hc->tx_rto / HZ; | ||
448 | /* DCCP doesn't require this [but I like it cuz my code sux] */ | ||
449 | #if 1 | ||
450 | if (s < 1) | ||
451 | hc->tx_rto = HZ; | ||
452 | #endif | ||
453 | /* max 60 seconds */ | ||
454 | if (s > 60) | ||
455 | hc->tx_rto = HZ * 60; | ||
456 | |||
457 | hc->tx_lastrtt = jiffies; | ||
458 | |||
459 | ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", | ||
460 | hc->tx_srtt, hc->tx_rttvar, | ||
461 | hc->tx_rto, HZ, r); | ||
462 | } | ||
463 | |||
464 | /* we got a new ack, so re-start RTO timer */ | ||
465 | ccid2_hc_tx_kill_rto_timer(sk); | ||
466 | ccid2_start_rto_timer(sk); | ||
467 | } | ||
468 | |||
469 | static void ccid2_hc_tx_dec_pipe(struct sock *sk) | ||
470 | { | ||
471 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | ||
472 | |||
473 | if (hc->tx_pipe == 0) | ||
474 | DCCP_BUG("pipe == 0"); | ||
475 | else | ||
476 | hc->tx_pipe--; | ||
477 | |||
478 | if (hc->tx_pipe == 0) | ||
479 | ccid2_hc_tx_kill_rto_timer(sk); | ||
480 | } | 412 | } |
481 | 413 | ||
482 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) | 414 | static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) |
483 | { | 415 | { |
484 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 416 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
485 | 417 | ||
486 | if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { | 418 | if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) { |
487 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | 419 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); |
488 | return; | 420 | return; |
489 | } | 421 | } |
490 | 422 | ||
491 | hc->tx_last_cong = jiffies; | 423 | hc->tx_last_cong = ccid2_time_stamp; |
492 | 424 | ||
493 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; | 425 | hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; |
494 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); | 426 | hc->tx_ssthresh = max(hc->tx_cwnd, 2U); |
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
510 | int done = 0; | 442 | int done = 0; |
511 | unsigned int maxincr = 0; | 443 | unsigned int maxincr = 0; |
512 | 444 | ||
513 | ccid2_hc_tx_check_sanity(hc); | ||
514 | /* check reverse path congestion */ | 445 | /* check reverse path congestion */ |
515 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; | 446 | seqno = DCCP_SKB_CB(skb)->dccpd_seq; |
516 | 447 | ||
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
620 | seqp->ccid2s_acked = 1; | 551 | seqp->ccid2s_acked = 1; |
621 | ccid2_pr_debug("Got ack for %llu\n", | 552 | ccid2_pr_debug("Got ack for %llu\n", |
622 | (unsigned long long)seqp->ccid2s_seq); | 553 | (unsigned long long)seqp->ccid2s_seq); |
623 | ccid2_hc_tx_dec_pipe(sk); | 554 | hc->tx_pipe--; |
624 | } | 555 | } |
625 | if (seqp == hc->tx_seqt) { | 556 | if (seqp == hc->tx_seqt) { |
626 | done = 1; | 557 | done = 1; |
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
677 | * one ack vector. | 608 | * one ack vector. |
678 | */ | 609 | */ |
679 | ccid2_congestion_event(sk, seqp); | 610 | ccid2_congestion_event(sk, seqp); |
680 | ccid2_hc_tx_dec_pipe(sk); | 611 | hc->tx_pipe--; |
681 | } | 612 | } |
682 | if (seqp == hc->tx_seqt) | 613 | if (seqp == hc->tx_seqt) |
683 | break; | 614 | break; |
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
695 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; | 626 | hc->tx_seqt = hc->tx_seqt->ccid2s_next; |
696 | } | 627 | } |
697 | 628 | ||
698 | ccid2_hc_tx_check_sanity(hc); | 629 | /* restart RTO timer if not all outstanding data has been acked */ |
630 | if (hc->tx_pipe == 0) | ||
631 | sk_stop_timer(sk, &hc->tx_rtotimer); | ||
632 | else | ||
633 | sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); | ||
699 | } | 634 | } |
700 | 635 | ||
701 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 636 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
707 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ | 642 | /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ |
708 | hc->tx_ssthresh = ~0U; | 643 | hc->tx_ssthresh = ~0U; |
709 | 644 | ||
710 | /* | 645 | /* Use larger initial windows (RFC 4341, section 5). */ |
711 | * RFC 4341, 5: "The cwnd parameter is initialized to at most four | 646 | hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); |
712 | * packets for new connections, following the rules from [RFC3390]". | ||
713 | * We need to convert the bytes of RFC3390 into the packets of RFC 4341. | ||
714 | */ | ||
715 | hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); | ||
716 | 647 | ||
717 | /* Make sure that Ack Ratio is enabled and within bounds. */ | 648 | /* Make sure that Ack Ratio is enabled and within bounds. */ |
718 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); | 649 | max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); |
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
723 | if (ccid2_hc_tx_alloc_seq(hc)) | 654 | if (ccid2_hc_tx_alloc_seq(hc)) |
724 | return -ENOMEM; | 655 | return -ENOMEM; |
725 | 656 | ||
726 | hc->tx_rto = 3 * HZ; | 657 | hc->tx_rto = DCCP_TIMEOUT_INIT; |
727 | ccid2_change_srtt(hc, -1); | ||
728 | hc->tx_rttvar = -1; | ||
729 | hc->tx_rpdupack = -1; | 658 | hc->tx_rpdupack = -1; |
730 | hc->tx_last_cong = jiffies; | 659 | hc->tx_last_cong = ccid2_time_stamp; |
731 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, | 660 | setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, |
732 | (unsigned long)sk); | 661 | (unsigned long)sk); |
733 | |||
734 | ccid2_hc_tx_check_sanity(hc); | ||
735 | return 0; | 662 | return 0; |
736 | } | 663 | } |
737 | 664 | ||
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) | |||
740 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); | 667 | struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); |
741 | int i; | 668 | int i; |
742 | 669 | ||
743 | ccid2_hc_tx_kill_rto_timer(sk); | 670 | sk_stop_timer(sk, &hc->tx_rtotimer); |
744 | 671 | ||
745 | for (i = 0; i < hc->tx_seqbufc; i++) | 672 | for (i = 0; i < hc->tx_seqbufc; i++) |
746 | kfree(hc->tx_seqbuf[i]); | 673 | kfree(hc->tx_seqbuf[i]); |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 1ec6a30103bb..9731c2dc1487 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -18,18 +18,23 @@ | |||
18 | #ifndef _DCCP_CCID2_H_ | 18 | #ifndef _DCCP_CCID2_H_ |
19 | #define _DCCP_CCID2_H_ | 19 | #define _DCCP_CCID2_H_ |
20 | 20 | ||
21 | #include <linux/dccp.h> | ||
22 | #include <linux/timer.h> | 21 | #include <linux/timer.h> |
23 | #include <linux/types.h> | 22 | #include <linux/types.h> |
24 | #include "../ccid.h" | 23 | #include "../ccid.h" |
24 | #include "../dccp.h" | ||
25 | |||
26 | /* | ||
27 | * CCID-2 timestamping faces the same issues as TCP timestamping. | ||
28 | * Hence we reuse/share as much of the code as possible. | ||
29 | */ | ||
30 | #define ccid2_time_stamp tcp_time_stamp | ||
31 | |||
25 | /* NUMDUPACK parameter from RFC 4341, p. 6 */ | 32 | /* NUMDUPACK parameter from RFC 4341, p. 6 */ |
26 | #define NUMDUPACK 3 | 33 | #define NUMDUPACK 3 |
27 | 34 | ||
28 | struct sock; | ||
29 | |||
30 | struct ccid2_seq { | 35 | struct ccid2_seq { |
31 | u64 ccid2s_seq; | 36 | u64 ccid2s_seq; |
32 | unsigned long ccid2s_sent; | 37 | u32 ccid2s_sent; |
33 | int ccid2s_acked; | 38 | int ccid2s_acked; |
34 | struct ccid2_seq *ccid2s_prev; | 39 | struct ccid2_seq *ccid2s_prev; |
35 | struct ccid2_seq *ccid2s_next; | 40 | struct ccid2_seq *ccid2s_next; |
@@ -42,7 +47,12 @@ struct ccid2_seq { | |||
42 | * struct ccid2_hc_tx_sock - CCID2 TX half connection | 47 | * struct ccid2_hc_tx_sock - CCID2 TX half connection |
43 | * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 | 48 | * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 |
44 | * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) | 49 | * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) |
45 | * @tx_lastrtt: time RTT was last measured | 50 | * @tx_srtt: smoothed RTT estimate, scaled by 2^3 |
51 | * @tx_mdev: smoothed RTT variation, scaled by 2^2 | ||
52 | * @tx_mdev_max: maximum of @mdev during one flight | ||
53 | * @tx_rttvar: moving average/maximum of @mdev_max | ||
54 | * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) | ||
55 | * @tx_rtt_seq: to decay RTTVAR at most once per flight | ||
46 | * @tx_rpseq: last consecutive seqno | 56 | * @tx_rpseq: last consecutive seqno |
47 | * @tx_rpdupack: dupacks since rpseq | 57 | * @tx_rpdupack: dupacks since rpseq |
48 | */ | 58 | */ |
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock { | |||
55 | int tx_seqbufc; | 65 | int tx_seqbufc; |
56 | struct ccid2_seq *tx_seqh; | 66 | struct ccid2_seq *tx_seqh; |
57 | struct ccid2_seq *tx_seqt; | 67 | struct ccid2_seq *tx_seqt; |
58 | long tx_rto; | 68 | |
59 | long tx_srtt; | 69 | /* RTT measurement: variables/principles are the same as in TCP */ |
60 | long tx_rttvar; | 70 | u32 tx_srtt, |
61 | unsigned long tx_lastrtt; | 71 | tx_mdev, |
72 | tx_mdev_max, | ||
73 | tx_rttvar, | ||
74 | tx_rto; | ||
75 | u64 tx_rtt_seq:48; | ||
62 | struct timer_list tx_rtotimer; | 76 | struct timer_list tx_rtotimer; |
77 | |||
63 | u64 tx_rpseq; | 78 | u64 tx_rpseq; |
64 | int tx_rpdupack; | 79 | int tx_rpdupack; |
65 | unsigned long tx_last_cong; | 80 | u32 tx_last_cong; |
66 | u64 tx_high_ack; | 81 | u64 tx_high_ack; |
67 | }; | 82 | }; |
68 | 83 | ||
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 95f752986497..3060a60ed5ab 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | |||
54 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | 54 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", |
55 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | 55 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", |
56 | [TFRC_SSTATE_FBACK] = "FBACK", | 56 | [TFRC_SSTATE_FBACK] = "FBACK", |
57 | [TFRC_SSTATE_TERM] = "TERM", | ||
58 | }; | 57 | }; |
59 | 58 | ||
60 | return ccid3_state_names[state]; | 59 | return ccid3_state_names[state]; |
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk) | |||
91 | return scaled_div(w_init << 6, hc->tx_rtt); | 90 | return scaled_div(w_init << 6, hc->tx_rtt); |
92 | } | 91 | } |
93 | 92 | ||
94 | /* | 93 | /** |
95 | * Recalculate t_ipi and delta (should be called whenever X changes) | 94 | * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst |
95 | * This respects the granularity of X_inst (64 * bytes/second). | ||
96 | */ | 96 | */ |
97 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) | 97 | static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) |
98 | { | 98 | { |
99 | /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ | ||
100 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); | 99 | hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); |
101 | 100 | ||
102 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | 101 | ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, |
103 | hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); | 102 | hc->tx_s, (unsigned)(hc->tx_x >> 6)); |
104 | |||
105 | ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi, | ||
106 | hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6)); | ||
107 | } | 103 | } |
108 | 104 | ||
109 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) | 105 | static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) |
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
211 | ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, | 207 | ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, |
212 | ccid3_tx_state_name(hc->tx_state)); | 208 | ccid3_tx_state_name(hc->tx_state)); |
213 | 209 | ||
210 | /* Ignore and do not restart after leaving the established state */ | ||
211 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) | ||
212 | goto out; | ||
213 | |||
214 | /* Reset feedback state to "no feedback received" */ | ||
214 | if (hc->tx_state == TFRC_SSTATE_FBACK) | 215 | if (hc->tx_state == TFRC_SSTATE_FBACK) |
215 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | 216 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
216 | else if (hc->tx_state != TFRC_SSTATE_NO_FBACK) | ||
217 | goto out; | ||
218 | 217 | ||
219 | /* | 218 | /* |
220 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 | 219 | * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 |
220 | * RTO is 0 if and only if no feedback has been received yet. | ||
221 | */ | 221 | */ |
222 | if (hc->tx_t_rto == 0 || /* no feedback received yet */ | 222 | if (hc->tx_t_rto == 0 || hc->tx_p == 0) { |
223 | hc->tx_p == 0) { | ||
224 | 223 | ||
225 | /* halve send rate directly */ | 224 | /* halve send rate directly */ |
226 | hc->tx_x = max(hc->tx_x / 2, | 225 | hc->tx_x = max(hc->tx_x / 2, |
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | |||
256 | * Set new timeout for the nofeedback timer. | 255 | * Set new timeout for the nofeedback timer. |
257 | * See comments in packet_recv() regarding the value of t_RTO. | 256 | * See comments in packet_recv() regarding the value of t_RTO. |
258 | */ | 257 | */ |
259 | if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ | 258 | if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */ |
260 | t_nfb = TFRC_INITIAL_TIMEOUT; | 259 | t_nfb = TFRC_INITIAL_TIMEOUT; |
261 | else | 260 | else |
262 | t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); | 261 | t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); |
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
290 | if (unlikely(skb->len == 0)) | 289 | if (unlikely(skb->len == 0)) |
291 | return -EBADMSG; | 290 | return -EBADMSG; |
292 | 291 | ||
293 | switch (hc->tx_state) { | 292 | if (hc->tx_state == TFRC_SSTATE_NO_SENT) { |
294 | case TFRC_SSTATE_NO_SENT: | ||
295 | sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + | 293 | sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + |
296 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); | 294 | usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); |
297 | hc->tx_last_win_count = 0; | 295 | hc->tx_last_win_count = 0; |
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
326 | ccid3_update_send_interval(hc); | 324 | ccid3_update_send_interval(hc); |
327 | 325 | ||
328 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | 326 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); |
329 | break; | 327 | |
330 | case TFRC_SSTATE_NO_FBACK: | 328 | } else { |
331 | case TFRC_SSTATE_FBACK: | ||
332 | delay = ktime_us_delta(hc->tx_t_nom, now); | 329 | delay = ktime_us_delta(hc->tx_t_nom, now); |
333 | ccid3_pr_debug("delay=%ld\n", (long)delay); | 330 | ccid3_pr_debug("delay=%ld\n", (long)delay); |
334 | /* | 331 | /* |
335 | * Scheduling of packet transmissions [RFC 3448, 4.6] | 332 | * Scheduling of packet transmissions (RFC 5348, 8.3) |
336 | * | 333 | * |
337 | * if (t_now > t_nom - delta) | 334 | * if (t_now > t_nom - delta) |
338 | * // send the packet now | 335 | * // send the packet now |
339 | * else | 336 | * else |
340 | * // send the packet in (t_nom - t_now) milliseconds. | 337 | * // send the packet in (t_nom - t_now) milliseconds. |
341 | */ | 338 | */ |
342 | if (delay - (s64)hc->tx_delta >= 1000) | 339 | if (delay >= TFRC_T_DELTA) |
343 | return (u32)delay / 1000L; | 340 | return (u32)delay / USEC_PER_MSEC; |
344 | 341 | ||
345 | ccid3_hc_tx_update_win_count(hc, now); | 342 | ccid3_hc_tx_update_win_count(hc, now); |
346 | break; | ||
347 | case TFRC_SSTATE_TERM: | ||
348 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
349 | return -EINVAL; | ||
350 | } | 343 | } |
351 | 344 | ||
352 | /* prepare to send now (add options etc.) */ | 345 | /* prepare to send now (add options etc.) */ |
@@ -358,8 +351,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) | |||
358 | return 0; | 351 | return 0; |
359 | } | 352 | } |
360 | 353 | ||
361 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, | 354 | static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) |
362 | unsigned int len) | ||
363 | { | 355 | { |
364 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 356 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
365 | 357 | ||
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, | |||
372 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 364 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
373 | { | 365 | { |
374 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 366 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
375 | struct ccid3_options_received *opt_recv; | 367 | struct tfrc_tx_hist_entry *acked; |
376 | ktime_t now; | 368 | ktime_t now; |
377 | unsigned long t_nfb; | 369 | unsigned long t_nfb; |
378 | u32 pinv, r_sample; | 370 | u32 r_sample; |
379 | 371 | ||
380 | /* we are only interested in ACKs */ | 372 | /* we are only interested in ACKs */ |
381 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | 373 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || |
382 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | 374 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) |
383 | return; | 375 | return; |
384 | /* ... and only in the established state */ | ||
385 | if (hc->tx_state != TFRC_SSTATE_FBACK && | ||
386 | hc->tx_state != TFRC_SSTATE_NO_FBACK) | ||
387 | return; | ||
388 | |||
389 | opt_recv = &hc->tx_options_received; | ||
390 | now = ktime_get_real(); | ||
391 | |||
392 | /* Estimate RTT from history if ACK number is valid */ | ||
393 | r_sample = tfrc_tx_hist_rtt(hc->tx_hist, | ||
394 | DCCP_SKB_CB(skb)->dccpd_ack_seq, now); | ||
395 | if (r_sample == 0) { | ||
396 | DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, | ||
397 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), | ||
398 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
399 | return; | ||
400 | } | ||
401 | |||
402 | /* Update receive rate in units of 64 * bytes/second */ | ||
403 | hc->tx_x_recv = opt_recv->ccid3or_receive_rate; | ||
404 | hc->tx_x_recv <<= 6; | ||
405 | |||
406 | /* Update loss event rate (which is scaled by 1e6) */ | ||
407 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
408 | if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ | ||
409 | hc->tx_p = 0; | ||
410 | else /* can not exceed 100% */ | ||
411 | hc->tx_p = scaled_div(1, pinv); | ||
412 | /* | 376 | /* |
413 | * Validate new RTT sample and update moving average | 377 | * Locate the acknowledged packet in the TX history. |
378 | * | ||
379 | * Returning "entry not found" here can for instance happen when | ||
380 | * - the host has not sent out anything (e.g. a passive server), | ||
381 | * - the Ack is outdated (packet with higher Ack number was received), | ||
382 | * - it is a bogus Ack (for a packet not sent on this connection). | ||
414 | */ | 383 | */ |
415 | r_sample = dccp_sample_rtt(sk, r_sample); | 384 | acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb)); |
385 | if (acked == NULL) | ||
386 | return; | ||
387 | /* For the sake of RTT sampling, ignore/remove all older entries */ | ||
388 | tfrc_tx_hist_purge(&acked->next); | ||
389 | |||
390 | /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ | ||
391 | now = ktime_get_real(); | ||
392 | r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); | ||
416 | hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); | 393 | hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); |
394 | |||
417 | /* | 395 | /* |
418 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 | 396 | * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 |
419 | */ | 397 | */ |
@@ -461,13 +439,12 @@ done_computing_x: | |||
461 | sk->sk_write_space(sk); | 439 | sk->sk_write_space(sk); |
462 | 440 | ||
463 | /* | 441 | /* |
464 | * Update timeout interval for the nofeedback timer. | 442 | * Update timeout interval for the nofeedback timer. In order to control |
465 | * We use a configuration option to increase the lower bound. | 443 | * rate halving on networks with very low RTTs (<= 1 ms), use per-route |
466 | * This can help avoid triggering the nofeedback timer too | 444 | * tunable RTAX_RTO_MIN value as the lower bound. |
467 | * often ('spinning') on LANs with small RTTs. | ||
468 | */ | 445 | */ |
469 | hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * | 446 | hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, |
470 | (USEC_PER_SEC / 1000))); | 447 | USEC_PER_SEC/HZ * tcp_rto_min(sk)); |
471 | /* | 448 | /* |
472 | * Schedule no feedback timer to expire in | 449 | * Schedule no feedback timer to expire in |
473 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) | 450 | * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) |
@@ -482,66 +459,41 @@ done_computing_x: | |||
482 | jiffies + usecs_to_jiffies(t_nfb)); | 459 | jiffies + usecs_to_jiffies(t_nfb)); |
483 | } | 460 | } |
484 | 461 | ||
485 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | 462 | static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, |
486 | unsigned char len, u16 idx, | 463 | u8 option, u8 *optval, u8 optlen) |
487 | unsigned char *value) | ||
488 | { | 464 | { |
489 | int rc = 0; | ||
490 | const struct dccp_sock *dp = dccp_sk(sk); | ||
491 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 465 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
492 | struct ccid3_options_received *opt_recv; | ||
493 | __be32 opt_val; | 466 | __be32 opt_val; |
494 | 467 | ||
495 | opt_recv = &hc->tx_options_received; | ||
496 | |||
497 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
498 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
499 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
500 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
501 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
502 | opt_recv->ccid3or_receive_rate = 0; | ||
503 | } | ||
504 | |||
505 | switch (option) { | 468 | switch (option) { |
469 | case TFRC_OPT_RECEIVE_RATE: | ||
506 | case TFRC_OPT_LOSS_EVENT_RATE: | 470 | case TFRC_OPT_LOSS_EVENT_RATE: |
507 | if (unlikely(len != 4)) { | 471 | /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ |
508 | DCCP_WARN("%s(%p), invalid len %d " | 472 | if (packet_type == DCCP_PKT_DATA) |
509 | "for TFRC_OPT_LOSS_EVENT_RATE\n", | 473 | break; |
510 | dccp_role(sk), sk, len); | 474 | if (unlikely(optlen != 4)) { |
511 | rc = -EINVAL; | 475 | DCCP_WARN("%s(%p), invalid len %d for %u\n", |
512 | } else { | 476 | dccp_role(sk), sk, optlen, option); |
513 | opt_val = get_unaligned((__be32 *)value); | 477 | return -EINVAL; |
514 | opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); | ||
515 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", | ||
516 | dccp_role(sk), sk, | ||
517 | opt_recv->ccid3or_loss_event_rate); | ||
518 | } | 478 | } |
519 | break; | 479 | opt_val = ntohl(get_unaligned((__be32 *)optval)); |
520 | case TFRC_OPT_LOSS_INTERVALS: | 480 | |
521 | opt_recv->ccid3or_loss_intervals_idx = idx; | 481 | if (option == TFRC_OPT_RECEIVE_RATE) { |
522 | opt_recv->ccid3or_loss_intervals_len = len; | 482 | /* Receive Rate is kept in units of 64 bytes/second */ |
523 | ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", | 483 | hc->tx_x_recv = opt_val; |
524 | dccp_role(sk), sk, | 484 | hc->tx_x_recv <<= 6; |
525 | opt_recv->ccid3or_loss_intervals_idx, | 485 | |
526 | opt_recv->ccid3or_loss_intervals_len); | ||
527 | break; | ||
528 | case TFRC_OPT_RECEIVE_RATE: | ||
529 | if (unlikely(len != 4)) { | ||
530 | DCCP_WARN("%s(%p), invalid len %d " | ||
531 | "for TFRC_OPT_RECEIVE_RATE\n", | ||
532 | dccp_role(sk), sk, len); | ||
533 | rc = -EINVAL; | ||
534 | } else { | ||
535 | opt_val = get_unaligned((__be32 *)value); | ||
536 | opt_recv->ccid3or_receive_rate = ntohl(opt_val); | ||
537 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", | 486 | ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", |
538 | dccp_role(sk), sk, | 487 | dccp_role(sk), sk, opt_val); |
539 | opt_recv->ccid3or_receive_rate); | 488 | } else { |
489 | /* Update the fixpoint Loss Event Rate fraction */ | ||
490 | hc->tx_p = tfrc_invert_loss_event_rate(opt_val); | ||
491 | |||
492 | ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", | ||
493 | dccp_role(sk), sk, opt_val); | ||
540 | } | 494 | } |
541 | break; | ||
542 | } | 495 | } |
543 | 496 | return 0; | |
544 | return rc; | ||
545 | } | 497 | } |
546 | 498 | ||
547 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) | 499 | static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) |
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk) | |||
559 | { | 511 | { |
560 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); | 512 | struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
561 | 513 | ||
562 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
563 | sk_stop_timer(sk, &hc->tx_no_feedback_timer); | 514 | sk_stop_timer(sk, &hc->tx_no_feedback_timer); |
564 | |||
565 | tfrc_tx_hist_purge(&hc->tx_hist); | 515 | tfrc_tx_hist_purge(&hc->tx_hist); |
566 | } | 516 | } |
567 | 517 | ||
568 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | 518 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) |
569 | { | 519 | { |
570 | struct ccid3_hc_tx_sock *hc; | 520 | info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto; |
571 | 521 | info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt; | |
572 | /* Listen socks doesn't have a private CCID block */ | ||
573 | if (sk->sk_state == DCCP_LISTEN) | ||
574 | return; | ||
575 | |||
576 | hc = ccid3_hc_tx_sk(sk); | ||
577 | info->tcpi_rto = hc->tx_t_rto; | ||
578 | info->tcpi_rtt = hc->tx_rtt; | ||
579 | } | 522 | } |
580 | 523 | ||
581 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | 524 | static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, |
582 | u32 __user *optval, int __user *optlen) | 525 | u32 __user *optval, int __user *optlen) |
583 | { | 526 | { |
584 | const struct ccid3_hc_tx_sock *hc; | 527 | const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); |
528 | struct tfrc_tx_info tfrc; | ||
585 | const void *val; | 529 | const void *val; |
586 | 530 | ||
587 | /* Listen socks doesn't have a private CCID block */ | ||
588 | if (sk->sk_state == DCCP_LISTEN) | ||
589 | return -EINVAL; | ||
590 | |||
591 | hc = ccid3_hc_tx_sk(sk); | ||
592 | switch (optname) { | 531 | switch (optname) { |
593 | case DCCP_SOCKOPT_CCID_TX_INFO: | 532 | case DCCP_SOCKOPT_CCID_TX_INFO: |
594 | if (len < sizeof(hc->tx_tfrc)) | 533 | if (len < sizeof(tfrc)) |
595 | return -EINVAL; | 534 | return -EINVAL; |
596 | len = sizeof(hc->tx_tfrc); | 535 | tfrc.tfrctx_x = hc->tx_x; |
597 | val = &hc->tx_tfrc; | 536 | tfrc.tfrctx_x_recv = hc->tx_x_recv; |
537 | tfrc.tfrctx_x_calc = hc->tx_x_calc; | ||
538 | tfrc.tfrctx_rtt = hc->tx_rtt; | ||
539 | tfrc.tfrctx_p = hc->tx_p; | ||
540 | tfrc.tfrctx_rto = hc->tx_t_rto; | ||
541 | tfrc.tfrctx_ipi = hc->tx_t_ipi; | ||
542 | len = sizeof(tfrc); | ||
543 | val = &tfrc; | ||
598 | break; | 544 | break; |
599 | default: | 545 | default: |
600 | return -ENOPROTOOPT; | 546 | return -ENOPROTOOPT; |
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | |||
624 | static const char *const ccid3_rx_state_names[] = { | 570 | static const char *const ccid3_rx_state_names[] = { |
625 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | 571 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", |
626 | [TFRC_RSTATE_DATA] = "DATA", | 572 | [TFRC_RSTATE_DATA] = "DATA", |
627 | [TFRC_RSTATE_TERM] = "TERM", | ||
628 | }; | 573 | }; |
629 | 574 | ||
630 | return ccid3_rx_state_names[state]; | 575 | return ccid3_rx_state_names[state]; |
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, | |||
650 | { | 595 | { |
651 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); | 596 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
652 | struct dccp_sock *dp = dccp_sk(sk); | 597 | struct dccp_sock *dp = dccp_sk(sk); |
653 | ktime_t now; | 598 | ktime_t now = ktime_get_real(); |
654 | s64 delta = 0; | 599 | s64 delta = 0; |
655 | 600 | ||
656 | if (unlikely(hc->rx_state == TFRC_RSTATE_TERM)) | ||
657 | return; | ||
658 | |||
659 | now = ktime_get_real(); | ||
660 | |||
661 | switch (fbtype) { | 601 | switch (fbtype) { |
662 | case CCID3_FBACK_INITIAL: | 602 | case CCID3_FBACK_INITIAL: |
663 | hc->rx_x_recv = 0; | 603 | hc->rx_x_recv = 0; |
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, | |||
701 | 641 | ||
702 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | 642 | static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) |
703 | { | 643 | { |
704 | const struct ccid3_hc_rx_sock *hc; | 644 | const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
705 | __be32 x_recv, pinv; | 645 | __be32 x_recv, pinv; |
706 | 646 | ||
707 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) | 647 | if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) |
708 | return 0; | 648 | return 0; |
709 | 649 | ||
710 | hc = ccid3_hc_rx_sk(sk); | ||
711 | |||
712 | if (dccp_packet_without_ack(skb)) | 650 | if (dccp_packet_without_ack(skb)) |
713 | return 0; | 651 | return 0; |
714 | 652 | ||
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk) | |||
749 | x_recv = scaled_div32(hc->rx_bytes_recv, delta); | 687 | x_recv = scaled_div32(hc->rx_bytes_recv, delta); |
750 | if (x_recv == 0) { /* would also trigger divide-by-zero */ | 688 | if (x_recv == 0) { /* would also trigger divide-by-zero */ |
751 | DCCP_WARN("X_recv==0\n"); | 689 | DCCP_WARN("X_recv==0\n"); |
752 | if ((x_recv = hc->rx_x_recv) == 0) { | 690 | if (hc->rx_x_recv == 0) { |
753 | DCCP_BUG("stored value of X_recv is zero"); | 691 | DCCP_BUG("stored value of X_recv is zero"); |
754 | return ~0U; | 692 | return ~0U; |
755 | } | 693 | } |
694 | x_recv = hc->rx_x_recv; | ||
756 | } | 695 | } |
757 | 696 | ||
758 | fval = scaled_div(hc->rx_s, hc->rx_rtt); | 697 | fval = scaled_div(hc->rx_s, hc->rx_rtt); |
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk) | |||
862 | { | 801 | { |
863 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); | 802 | struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
864 | 803 | ||
865 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
866 | |||
867 | tfrc_rx_hist_purge(&hc->rx_hist); | 804 | tfrc_rx_hist_purge(&hc->rx_hist); |
868 | tfrc_lh_cleanup(&hc->rx_li_hist); | 805 | tfrc_lh_cleanup(&hc->rx_li_hist); |
869 | } | 806 | } |
870 | 807 | ||
871 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | 808 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) |
872 | { | 809 | { |
873 | const struct ccid3_hc_rx_sock *hc; | 810 | info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state; |
874 | |||
875 | /* Listen socks doesn't have a private CCID block */ | ||
876 | if (sk->sk_state == DCCP_LISTEN) | ||
877 | return; | ||
878 | |||
879 | hc = ccid3_hc_rx_sk(sk); | ||
880 | info->tcpi_ca_state = hc->rx_state; | ||
881 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | 811 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; |
882 | info->tcpi_rcv_rtt = hc->rx_rtt; | 812 | info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt; |
883 | } | 813 | } |
884 | 814 | ||
885 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, | 815 | static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, |
886 | u32 __user *optval, int __user *optlen) | 816 | u32 __user *optval, int __user *optlen) |
887 | { | 817 | { |
888 | const struct ccid3_hc_rx_sock *hc; | 818 | const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); |
889 | struct tfrc_rx_info rx_info; | 819 | struct tfrc_rx_info rx_info; |
890 | const void *val; | 820 | const void *val; |
891 | 821 | ||
892 | /* Listen socks doesn't have a private CCID block */ | ||
893 | if (sk->sk_state == DCCP_LISTEN) | ||
894 | return -EINVAL; | ||
895 | |||
896 | hc = ccid3_hc_rx_sk(sk); | ||
897 | switch (optname) { | 822 | switch (optname) { |
898 | case DCCP_SOCKOPT_CCID_RX_INFO: | 823 | case DCCP_SOCKOPT_CCID_RX_INFO: |
899 | if (len < sizeof(rx_info)) | 824 | if (len < sizeof(rx_info)) |
900 | return -EINVAL; | 825 | return -EINVAL; |
901 | rx_info.tfrcrx_x_recv = hc->rx_x_recv; | 826 | rx_info.tfrcrx_x_recv = hc->rx_x_recv; |
902 | rx_info.tfrcrx_rtt = hc->rx_rtt; | 827 | rx_info.tfrcrx_rtt = hc->rx_rtt; |
903 | rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : | 828 | rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv); |
904 | scaled_div(1, hc->rx_pinv); | ||
905 | len = sizeof(rx_info); | 829 | len = sizeof(rx_info); |
906 | val = &rx_info; | 830 | val = &rx_info; |
907 | break; | 831 | break; |
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 032635776653..1a9933c29672 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h | |||
@@ -42,35 +42,36 @@ | |||
42 | #include "lib/tfrc.h" | 42 | #include "lib/tfrc.h" |
43 | #include "../ccid.h" | 43 | #include "../ccid.h" |
44 | 44 | ||
45 | /* Two seconds as per RFC 3448 4.2 */ | 45 | /* Two seconds as per RFC 5348, 4.2 */ |
46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | 46 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) |
47 | 47 | ||
48 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
49 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) | ||
50 | |||
51 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ | 48 | /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ |
52 | #define TFRC_T_MBI 64 | 49 | #define TFRC_T_MBI 64 |
53 | 50 | ||
51 | /* | ||
52 | * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are | ||
53 | * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. | ||
54 | * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse | ||
55 | * resolution of HZ < 500 means that the error is below one timer tick (t_gran) | ||
56 | * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). | ||
57 | */ | ||
58 | #if (HZ >= 500) | ||
59 | # define TFRC_T_DELTA USEC_PER_MSEC | ||
60 | #else | ||
61 | # define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) | ||
62 | #endif | ||
63 | |||
54 | enum ccid3_options { | 64 | enum ccid3_options { |
55 | TFRC_OPT_LOSS_EVENT_RATE = 192, | 65 | TFRC_OPT_LOSS_EVENT_RATE = 192, |
56 | TFRC_OPT_LOSS_INTERVALS = 193, | 66 | TFRC_OPT_LOSS_INTERVALS = 193, |
57 | TFRC_OPT_RECEIVE_RATE = 194, | 67 | TFRC_OPT_RECEIVE_RATE = 194, |
58 | }; | 68 | }; |
59 | 69 | ||
60 | struct ccid3_options_received { | ||
61 | u64 ccid3or_seqno:48, | ||
62 | ccid3or_loss_intervals_idx:16; | ||
63 | u16 ccid3or_loss_intervals_len; | ||
64 | u32 ccid3or_loss_event_rate; | ||
65 | u32 ccid3or_receive_rate; | ||
66 | }; | ||
67 | |||
68 | /* TFRC sender states */ | 70 | /* TFRC sender states */ |
69 | enum ccid3_hc_tx_states { | 71 | enum ccid3_hc_tx_states { |
70 | TFRC_SSTATE_NO_SENT = 1, | 72 | TFRC_SSTATE_NO_SENT = 1, |
71 | TFRC_SSTATE_NO_FBACK, | 73 | TFRC_SSTATE_NO_FBACK, |
72 | TFRC_SSTATE_FBACK, | 74 | TFRC_SSTATE_FBACK, |
73 | TFRC_SSTATE_TERM, | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | /** | 77 | /** |
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states { | |||
90 | * @tx_no_feedback_timer: Handle to no feedback timer | 91 | * @tx_no_feedback_timer: Handle to no feedback timer |
91 | * @tx_t_ld: Time last doubled during slow start | 92 | * @tx_t_ld: Time last doubled during slow start |
92 | * @tx_t_nom: Nominal send time of next packet | 93 | * @tx_t_nom: Nominal send time of next packet |
93 | * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs | ||
94 | * @tx_hist: Packet history | 94 | * @tx_hist: Packet history |
95 | * @tx_options_received: Parsed set of retrieved options | ||
96 | */ | 95 | */ |
97 | struct ccid3_hc_tx_sock { | 96 | struct ccid3_hc_tx_sock { |
98 | struct tfrc_tx_info tx_tfrc; | 97 | u64 tx_x; |
99 | #define tx_x tx_tfrc.tfrctx_x | 98 | u64 tx_x_recv; |
100 | #define tx_x_recv tx_tfrc.tfrctx_x_recv | 99 | u32 tx_x_calc; |
101 | #define tx_x_calc tx_tfrc.tfrctx_x_calc | 100 | u32 tx_rtt; |
102 | #define tx_rtt tx_tfrc.tfrctx_rtt | 101 | u32 tx_p; |
103 | #define tx_p tx_tfrc.tfrctx_p | 102 | u32 tx_t_rto; |
104 | #define tx_t_rto tx_tfrc.tfrctx_rto | 103 | u32 tx_t_ipi; |
105 | #define tx_t_ipi tx_tfrc.tfrctx_ipi | ||
106 | u16 tx_s; | 104 | u16 tx_s; |
107 | enum ccid3_hc_tx_states tx_state:8; | 105 | enum ccid3_hc_tx_states tx_state:8; |
108 | u8 tx_last_win_count; | 106 | u8 tx_last_win_count; |
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock { | |||
110 | struct timer_list tx_no_feedback_timer; | 108 | struct timer_list tx_no_feedback_timer; |
111 | ktime_t tx_t_ld; | 109 | ktime_t tx_t_ld; |
112 | ktime_t tx_t_nom; | 110 | ktime_t tx_t_nom; |
113 | u32 tx_delta; | ||
114 | struct tfrc_tx_hist_entry *tx_hist; | 111 | struct tfrc_tx_hist_entry *tx_hist; |
115 | struct ccid3_options_received tx_options_received; | ||
116 | }; | 112 | }; |
117 | 113 | ||
118 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | 114 | static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) |
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) | |||
126 | enum ccid3_hc_rx_states { | 122 | enum ccid3_hc_rx_states { |
127 | TFRC_RSTATE_NO_DATA = 1, | 123 | TFRC_RSTATE_NO_DATA = 1, |
128 | TFRC_RSTATE_DATA, | 124 | TFRC_RSTATE_DATA, |
129 | TFRC_RSTATE_TERM = 127, | ||
130 | }; | 125 | }; |
131 | 126 | ||
132 | /** | 127 | /** |
133 | * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket | 128 | * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket |
134 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3) | ||
135 | * @rx_rtt: Receiver estimate of rtt (non-standard) | ||
136 | * @rx_p: Current loss event rate (RFC 3448 5.4) | ||
137 | * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) | 129 | * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) |
138 | * @rx_state: Receiver state, one of %ccid3_hc_rx_states | 130 | * @rx_state: Receiver state, one of %ccid3_hc_rx_states |
139 | * @rx_bytes_recv: Total sum of DCCP payload bytes | 131 | * @rx_bytes_recv: Total sum of DCCP payload bytes |
140 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) | 132 | * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) |
141 | * @rx_rtt: Receiver estimate of RTT | 133 | * @rx_rtt: Receiver estimate of RTT |
142 | * @rx_tstamp_last_feedback: Time at which last feedback was sent | 134 | * @rx_tstamp_last_feedback: Time at which last feedback was sent |
143 | * @rx_tstamp_last_ack: Time at which last feedback was sent | ||
144 | * @rx_hist: Packet history (loss detection + RTT sampling) | 135 | * @rx_hist: Packet history (loss detection + RTT sampling) |
145 | * @rx_li_hist: Loss Interval database | 136 | * @rx_li_hist: Loss Interval database |
146 | * @rx_s: Received packet size in bytes | 137 | * @rx_s: Received packet size in bytes |
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 8fc3cbf79071..497723c4d4bb 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) | |||
116 | cur->li_length = len; | 116 | cur->li_length = len; |
117 | tfrc_lh_calc_i_mean(lh); | 117 | tfrc_lh_calc_i_mean(lh); |
118 | 118 | ||
119 | return (lh->i_mean < old_i_mean); | 119 | return lh->i_mean < old_i_mean; |
120 | } | 120 | } |
121 | 121 | ||
122 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ | 122 | /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ |
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 3a4f414e94a0..de8fe294bf0b 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -38,18 +38,6 @@ | |||
38 | #include "packet_history.h" | 38 | #include "packet_history.h" |
39 | #include "../../dccp.h" | 39 | #include "../../dccp.h" |
40 | 40 | ||
41 | /** | ||
42 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
43 | * @next: next oldest entry (LIFO order) | ||
44 | * @seqno: sequence number of this entry | ||
45 | * @stamp: send time of packet with sequence number @seqno | ||
46 | */ | ||
47 | struct tfrc_tx_hist_entry { | ||
48 | struct tfrc_tx_hist_entry *next; | ||
49 | u64 seqno; | ||
50 | ktime_t stamp; | ||
51 | }; | ||
52 | |||
53 | /* | 41 | /* |
54 | * Transmitter History Routines | 42 | * Transmitter History Routines |
55 | */ | 43 | */ |
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void) | |||
71 | } | 59 | } |
72 | } | 60 | } |
73 | 61 | ||
74 | static struct tfrc_tx_hist_entry * | ||
75 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
76 | { | ||
77 | while (head != NULL && head->seqno != seqno) | ||
78 | head = head->next; | ||
79 | |||
80 | return head; | ||
81 | } | ||
82 | |||
83 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) | 62 | int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) |
84 | { | 63 | { |
85 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); | 64 | struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); |
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) | |||
107 | *headp = NULL; | 86 | *headp = NULL; |
108 | } | 87 | } |
109 | 88 | ||
110 | u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, | ||
111 | const ktime_t now) | ||
112 | { | ||
113 | u32 rtt = 0; | ||
114 | struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); | ||
115 | |||
116 | if (packet != NULL) { | ||
117 | rtt = ktime_us_delta(now, packet->stamp); | ||
118 | /* | ||
119 | * Garbage-collect older (irrelevant) entries: | ||
120 | */ | ||
121 | tfrc_tx_hist_purge(&packet->next); | ||
122 | } | ||
123 | |||
124 | return rtt; | ||
125 | } | ||
126 | |||
127 | |||
128 | /* | 89 | /* |
129 | * Receiver History Routines | 90 | * Receiver History Routines |
130 | */ | 91 | */ |
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 7df6c5299999..7ee4a9d9d335 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -40,12 +40,28 @@ | |||
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include "tfrc.h" | 41 | #include "tfrc.h" |
42 | 42 | ||
43 | struct tfrc_tx_hist_entry; | 43 | /** |
44 | * tfrc_tx_hist_entry - Simple singly-linked TX history list | ||
45 | * @next: next oldest entry (LIFO order) | ||
46 | * @seqno: sequence number of this entry | ||
47 | * @stamp: send time of packet with sequence number @seqno | ||
48 | */ | ||
49 | struct tfrc_tx_hist_entry { | ||
50 | struct tfrc_tx_hist_entry *next; | ||
51 | u64 seqno; | ||
52 | ktime_t stamp; | ||
53 | }; | ||
54 | |||
55 | static inline struct tfrc_tx_hist_entry * | ||
56 | tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) | ||
57 | { | ||
58 | while (head != NULL && head->seqno != seqno) | ||
59 | head = head->next; | ||
60 | return head; | ||
61 | } | ||
44 | 62 | ||
45 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); | 63 | extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); |
46 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); | 64 | extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); |
47 | extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, | ||
48 | const u64 seqno, const ktime_t now); | ||
49 | 65 | ||
50 | /* Subtraction a-b modulo-16, respects circular wrap-around */ | 66 | /* Subtraction a-b modulo-16, respects circular wrap-around */ |
51 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) | 67 | #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) |
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 01bb48e96c2e..f8ee3f549770 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) | |||
57 | 57 | ||
58 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | 58 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); |
59 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | 59 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); |
60 | extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); | ||
60 | 61 | ||
61 | extern int tfrc_tx_packet_history_init(void); | 62 | extern int tfrc_tx_packet_history_init(void); |
62 | extern void tfrc_tx_packet_history_exit(void); | 63 | extern void tfrc_tx_packet_history_exit(void); |
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 22ca1cf0eb55..a052a4377e26 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c | |||
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) | |||
687 | index = tfrc_binsearch(fvalue, 0); | 687 | index = tfrc_binsearch(fvalue, 0); |
688 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; | 688 | return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; |
689 | } | 689 | } |
690 | |||
691 | /** | ||
692 | * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% | ||
693 | * When @loss_event_rate is large, there is a chance that p is truncated to 0. | ||
694 | * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. | ||
695 | */ | ||
696 | u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) | ||
697 | { | ||
698 | if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ | ||
699 | return 0; | ||
700 | if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ | ||
701 | return 1000000; | ||
702 | return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); | ||
703 | } | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 3ccef1b70fee..3eb264b60823 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -153,18 +153,27 @@ static inline u64 max48(const u64 seq1, const u64 seq2) | |||
153 | } | 153 | } |
154 | 154 | ||
155 | /** | 155 | /** |
156 | * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 | 156 | * dccp_loss_count - Approximate the number of lost data packets in a burst loss |
157 | * @s1: start sequence number | 157 | * @s1: last known sequence number before the loss ('hole') |
158 | * @s2: end sequence number | 158 | * @s2: first sequence number seen after the 'hole' |
159 | * @ndp: NDP count on packet with sequence number @s2 | 159 | * @ndp: NDP count on packet with sequence number @s2 |
160 | * Returns true if the sequence range s1...s2 has no data loss. | ||
161 | */ | 160 | */ |
162 | static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) | 161 | static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp) |
163 | { | 162 | { |
164 | s64 delta = dccp_delta_seqno(s1, s2); | 163 | s64 delta = dccp_delta_seqno(s1, s2); |
165 | 164 | ||
166 | WARN_ON(delta < 0); | 165 | WARN_ON(delta < 0); |
167 | return (u64)delta <= ndp + 1; | 166 | delta -= ndp + 1; |
167 | |||
168 | return delta > 0 ? delta : 0; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1 | ||
173 | */ | ||
174 | static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) | ||
175 | { | ||
176 | return dccp_loss_count(s1, s2, ndp) == 0; | ||
168 | } | 177 | } |
169 | 178 | ||
170 | enum { | 179 | enum { |
@@ -246,7 +255,6 @@ static inline void dccp_clear_xmit_timers(struct sock *sk) | |||
246 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | 255 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); |
247 | 256 | ||
248 | extern const char *dccp_packet_name(const int type); | 257 | extern const char *dccp_packet_name(const int type); |
249 | extern const char *dccp_state_name(const int state); | ||
250 | 258 | ||
251 | extern void dccp_set_state(struct sock *sk, const int state); | 259 | extern void dccp_set_state(struct sock *sk, const int state); |
252 | extern void dccp_done(struct sock *sk); | 260 | extern void dccp_done(struct sock *sk); |
@@ -415,6 +423,23 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) | |||
415 | dp->dccps_gsr = seq; | 423 | dp->dccps_gsr = seq; |
416 | /* Sequence validity window depends on remote Sequence Window (7.5.1) */ | 424 | /* Sequence validity window depends on remote Sequence Window (7.5.1) */ |
417 | dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); | 425 | dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); |
426 | /* | ||
427 | * Adjust SWL so that it is not below ISR. In contrast to RFC 4340, | ||
428 | * 7.5.1 we perform this check beyond the initial handshake: W/W' are | ||
429 | * always > 32, so for the first W/W' packets in the lifetime of a | ||
430 | * connection we always have to adjust SWL. | ||
431 | * A second reason why we are doing this is that the window depends on | ||
432 | * the feature-remote value of Sequence Window: nothing stops the peer | ||
433 | * from updating this value while we are busy adjusting SWL for the | ||
434 | * first W packets (we would have to count from scratch again then). | ||
435 | * Therefore it is safer to always make sure that the Sequence Window | ||
436 | * is not artificially extended by a peer who grows SWL downwards by | ||
437 | * continually updating the feature-remote Sequence-Window. | ||
438 | * If sequence numbers wrap it is bad luck. But that will take a while | ||
439 | * (48 bit), and this measure prevents Sequence-number attacks. | ||
440 | */ | ||
441 | if (before48(dp->dccps_swl, dp->dccps_isr)) | ||
442 | dp->dccps_swl = dp->dccps_isr; | ||
418 | dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); | 443 | dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); |
419 | } | 444 | } |
420 | 445 | ||
@@ -425,14 +450,16 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) | |||
425 | dp->dccps_gss = seq; | 450 | dp->dccps_gss = seq; |
426 | /* Ack validity window depends on local Sequence Window value (7.5.1) */ | 451 | /* Ack validity window depends on local Sequence Window value (7.5.1) */ |
427 | dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); | 452 | dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); |
453 | /* Adjust AWL so that it is not below ISS - see comment above for SWL */ | ||
454 | if (before48(dp->dccps_awl, dp->dccps_iss)) | ||
455 | dp->dccps_awl = dp->dccps_iss; | ||
428 | dp->dccps_awh = dp->dccps_gss; | 456 | dp->dccps_awh = dp->dccps_gss; |
429 | } | 457 | } |
430 | 458 | ||
431 | static inline int dccp_ack_pending(const struct sock *sk) | 459 | static inline int dccp_ack_pending(const struct sock *sk) |
432 | { | 460 | { |
433 | const struct dccp_sock *dp = dccp_sk(sk); | 461 | const struct dccp_sock *dp = dccp_sk(sk); |
434 | return dp->dccps_timestamp_echo != 0 || | 462 | return (dp->dccps_hc_rx_ackvec != NULL && |
435 | (dp->dccps_hc_rx_ackvec != NULL && | ||
436 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || | 463 | dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || |
437 | inet_csk_ack_scheduled(sk); | 464 | inet_csk_ack_scheduled(sk); |
438 | } | 465 | } |
@@ -449,7 +476,6 @@ extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); | |||
449 | extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); | 476 | extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); |
450 | extern u32 dccp_timestamp(void); | 477 | extern u32 dccp_timestamp(void); |
451 | extern void dccp_timestamping_init(void); | 478 | extern void dccp_timestamping_init(void); |
452 | extern int dccp_insert_option_timestamp(struct sk_buff *skb); | ||
453 | extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, | 479 | extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, |
454 | const void *value, unsigned char len); | 480 | const void *value, unsigned char len); |
455 | 481 | ||
diff --git a/net/dccp/feat.c b/net/dccp/feat.c index df7dd26cf07e..568def952722 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c | |||
@@ -730,16 +730,6 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, | |||
730 | 0, list, len); | 730 | 0, list, len); |
731 | } | 731 | } |
732 | 732 | ||
733 | /* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ | ||
734 | int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) | ||
735 | { | ||
736 | /* any changes must be registered before establishing the connection */ | ||
737 | if (sk->sk_state != DCCP_CLOSED) | ||
738 | return -EISCONN; | ||
739 | if (dccp_feat_type(feat) != FEAT_NN) | ||
740 | return -EINVAL; | ||
741 | return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); | ||
742 | } | ||
743 | 733 | ||
744 | /* | 734 | /* |
745 | * Tracking features whose value depend on the choice of CCID | 735 | * Tracking features whose value depend on the choice of CCID |
diff --git a/net/dccp/feat.h b/net/dccp/feat.h index f96721619def..e56a4e5e634e 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h | |||
@@ -111,7 +111,6 @@ extern int dccp_feat_init(struct sock *sk); | |||
111 | extern void dccp_feat_initialise_sysctls(void); | 111 | extern void dccp_feat_initialise_sysctls(void); |
112 | extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, | 112 | extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, |
113 | u8 const *list, u8 len); | 113 | u8 const *list, u8 len); |
114 | extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); | ||
115 | extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, | 114 | extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, |
116 | u8 mand, u8 opt, u8 feat, u8 *val, u8 len); | 115 | u8 mand, u8 opt, u8 feat, u8 *val, u8 len); |
117 | extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); | 116 | extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); |
diff --git a/net/dccp/input.c b/net/dccp/input.c index 10c957a88f4f..265985370fa1 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -259,7 +259,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | |||
259 | sysctl_dccp_sync_ratelimit))) | 259 | sysctl_dccp_sync_ratelimit))) |
260 | return 0; | 260 | return 0; |
261 | 261 | ||
262 | DCCP_WARN("DCCP: Step 6 failed for %s packet, " | 262 | DCCP_WARN("Step 6 failed for %s packet, " |
263 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " | 263 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " |
264 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " | 264 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " |
265 | "sending SYNC...\n", dccp_packet_name(dh->dccph_type), | 265 | "sending SYNC...\n", dccp_packet_name(dh->dccph_type), |
@@ -441,20 +441,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
441 | kfree_skb(sk->sk_send_head); | 441 | kfree_skb(sk->sk_send_head); |
442 | sk->sk_send_head = NULL; | 442 | sk->sk_send_head = NULL; |
443 | 443 | ||
444 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
445 | dccp_update_gsr(sk, dp->dccps_isr); | ||
446 | /* | 444 | /* |
447 | * SWL and AWL are initially adjusted so that they are not less than | 445 | * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect |
448 | * the initial Sequence Numbers received and sent, respectively: | 446 | * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH |
449 | * SWL := max(GSR + 1 - floor(W/4), ISR), | 447 | * is done as part of activating the feature values below, since |
450 | * AWL := max(GSS - W' + 1, ISS). | 448 | * these settings depend on the local/remote Sequence Window |
451 | * These adjustments MUST be applied only at the beginning of the | 449 | * features, which were undefined or not confirmed until now. |
452 | * connection. | ||
453 | * | ||
454 | * AWL was adjusted in dccp_v4_connect -acme | ||
455 | */ | 450 | */ |
456 | dccp_set_seqno(&dp->dccps_swl, | 451 | dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; |
457 | max48(dp->dccps_swl, dp->dccps_isr)); | ||
458 | 452 | ||
459 | dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); | 453 | dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
460 | 454 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d4a166f0f391..3f69ea114829 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
392 | 392 | ||
393 | newsk = dccp_create_openreq_child(sk, req, skb); | 393 | newsk = dccp_create_openreq_child(sk, req, skb); |
394 | if (newsk == NULL) | 394 | if (newsk == NULL) |
395 | goto exit; | 395 | goto exit_nonewsk; |
396 | 396 | ||
397 | sk_setup_caps(newsk, dst); | 397 | sk_setup_caps(newsk, dst); |
398 | 398 | ||
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
409 | 409 | ||
410 | dccp_sync_mss(newsk, dst_mtu(dst)); | 410 | dccp_sync_mss(newsk, dst_mtu(dst)); |
411 | 411 | ||
412 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
413 | sock_put(newsk); | ||
414 | goto exit; | ||
415 | } | ||
412 | __inet_hash_nolisten(newsk, NULL); | 416 | __inet_hash_nolisten(newsk, NULL); |
413 | __inet_inherit_port(sk, newsk); | ||
414 | 417 | ||
415 | return newsk; | 418 | return newsk; |
416 | 419 | ||
417 | exit_overflow: | 420 | exit_overflow: |
418 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 421 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
422 | exit_nonewsk: | ||
423 | dst_release(dst); | ||
419 | exit: | 424 | exit: |
420 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 425 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
421 | dst_release(dst); | ||
422 | return NULL; | 426 | return NULL; |
423 | } | 427 | } |
424 | 428 | ||
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e3f32575df7..dca711df9b60 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
564 | 564 | ||
565 | newsk = dccp_create_openreq_child(sk, req, skb); | 565 | newsk = dccp_create_openreq_child(sk, req, skb); |
566 | if (newsk == NULL) | 566 | if (newsk == NULL) |
567 | goto out; | 567 | goto out_nonewsk; |
568 | 568 | ||
569 | /* | 569 | /* |
570 | * No need to charge this sock to the relevant IPv6 refcnt debug socks | 570 | * No need to charge this sock to the relevant IPv6 refcnt debug socks |
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
632 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; | 632 | newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; |
633 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; | 633 | newinet->inet_rcv_saddr = LOOPBACK4_IPV6; |
634 | 634 | ||
635 | if (__inet_inherit_port(sk, newsk) < 0) { | ||
636 | sock_put(newsk); | ||
637 | goto out; | ||
638 | } | ||
635 | __inet6_hash(newsk, NULL); | 639 | __inet6_hash(newsk, NULL); |
636 | __inet_inherit_port(sk, newsk); | ||
637 | 640 | ||
638 | return newsk; | 641 | return newsk; |
639 | 642 | ||
640 | out_overflow: | 643 | out_overflow: |
641 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); | 644 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
645 | out_nonewsk: | ||
646 | dst_release(dst); | ||
642 | out: | 647 | out: |
643 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); | 648 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); |
644 | if (opt != NULL && opt != np->opt) | 649 | if (opt != NULL && opt != np->opt) |
645 | sock_kfree_s(sk, opt, opt->tot_len); | 650 | sock_kfree_s(sk, opt, opt->tot_len); |
646 | dst_release(dst); | ||
647 | return NULL; | 651 | return NULL; |
648 | } | 652 | } |
649 | 653 | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 128b089d3aef..d7041a0963af 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -121,30 +121,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk, | |||
121 | * | 121 | * |
122 | * Choose S.ISS (initial seqno) or set from Init Cookies | 122 | * Choose S.ISS (initial seqno) or set from Init Cookies |
123 | * Initialize S.GAR := S.ISS | 123 | * Initialize S.GAR := S.ISS |
124 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies | 124 | * Set S.ISR, S.GSR from packet (or Init Cookies) |
125 | */ | 125 | * |
126 | newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; | 126 | * Setting AWL/AWH and SWL/SWH happens as part of the feature |
127 | dccp_update_gss(newsk, dreq->dreq_iss); | 127 | * activation below, as these windows all depend on the local |
128 | 128 | * and remote Sequence Window feature values (7.5.2). | |
129 | newdp->dccps_isr = dreq->dreq_isr; | ||
130 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
131 | |||
132 | /* | ||
133 | * SWL and AWL are initially adjusted so that they are not less than | ||
134 | * the initial Sequence Numbers received and sent, respectively: | ||
135 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
136 | * AWL := max(GSS - W' + 1, ISS). | ||
137 | * These adjustments MUST be applied only at the beginning of the | ||
138 | * connection. | ||
139 | */ | 129 | */ |
140 | dccp_set_seqno(&newdp->dccps_swl, | 130 | newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss; |
141 | max48(newdp->dccps_swl, newdp->dccps_isr)); | 131 | newdp->dccps_gar = newdp->dccps_iss; |
142 | dccp_set_seqno(&newdp->dccps_awl, | 132 | newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr; |
143 | max48(newdp->dccps_awl, newdp->dccps_iss)); | ||
144 | 133 | ||
145 | /* | 134 | /* |
146 | * Activate features after initialising the sequence numbers, | 135 | * Activate features: initialise CCIDs, sequence windows etc. |
147 | * since CCID initialisation may depend on GSS, ISR, ISS etc. | ||
148 | */ | 136 | */ |
149 | if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { | 137 | if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { |
150 | /* It is still raw copy of parent, so invalidate | 138 | /* It is still raw copy of parent, so invalidate |
diff --git a/net/dccp/options.c b/net/dccp/options.c index bfda087bd90d..cd3061813009 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c | |||
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
96 | } | 96 | } |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * CCID-Specific Options (from RFC 4340, sec. 10.3): | ||
100 | * | ||
101 | * Option numbers 128 through 191 are for options sent from the | ||
102 | * HC-Sender to the HC-Receiver; option numbers 192 through 255 | ||
103 | * are for options sent from the HC-Receiver to the HC-Sender. | ||
104 | * | ||
105 | * CCID-specific options are ignored during connection setup, as | 99 | * CCID-specific options are ignored during connection setup, as |
106 | * negotiation may still be in progress (see RFC 4340, 10.3). | 100 | * negotiation may still be in progress (see RFC 4340, 10.3). |
107 | * The same applies to Ack Vectors, as these depend on the CCID. | 101 | * The same applies to Ack Vectors, as these depend on the CCID. |
108 | * | ||
109 | */ | 102 | */ |
110 | if (dreq != NULL && (opt >= 128 || | 103 | if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC || |
111 | opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) | 104 | opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) |
112 | goto ignore_option; | 105 | goto ignore_option; |
113 | 106 | ||
@@ -170,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
170 | dccp_role(sk), ntohl(opt_val), | 163 | dccp_role(sk), ntohl(opt_val), |
171 | (unsigned long long) | 164 | (unsigned long long) |
172 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | 165 | DCCP_SKB_CB(skb)->dccpd_ack_seq); |
166 | /* schedule an Ack in case this sender is quiescent */ | ||
167 | inet_csk_schedule_ack(sk); | ||
173 | break; | 168 | break; |
174 | case DCCPO_TIMESTAMP_ECHO: | 169 | case DCCPO_TIMESTAMP_ECHO: |
175 | if (len != 4 && len != 6 && len != 8) | 170 | if (len != 4 && len != 6 && len != 8) |
@@ -226,23 +221,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, | |||
226 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", | 221 | dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", |
227 | dccp_role(sk), elapsed_time); | 222 | dccp_role(sk), elapsed_time); |
228 | break; | 223 | break; |
229 | case 128 ... 191: { | 224 | case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC: |
230 | const u16 idx = value - options; | ||
231 | |||
232 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, | 225 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, |
233 | opt, len, idx, | 226 | pkt_type, opt, value, len)) |
234 | value) != 0) | ||
235 | goto out_invalid_option; | 227 | goto out_invalid_option; |
236 | } | ||
237 | break; | 228 | break; |
238 | case 192 ... 255: { | 229 | case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: |
239 | const u16 idx = value - options; | ||
240 | |||
241 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | 230 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, |
242 | opt, len, idx, | 231 | pkt_type, opt, value, len)) |
243 | value) != 0) | ||
244 | goto out_invalid_option; | 232 | goto out_invalid_option; |
245 | } | ||
246 | break; | 233 | break; |
247 | default: | 234 | default: |
248 | DCCP_CRIT("DCCP(%p): option %d(len=%d) not " | 235 | DCCP_CRIT("DCCP(%p): option %d(len=%d) not " |
@@ -384,7 +371,7 @@ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) | |||
384 | 371 | ||
385 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | 372 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); |
386 | 373 | ||
387 | int dccp_insert_option_timestamp(struct sk_buff *skb) | 374 | static int dccp_insert_option_timestamp(struct sk_buff *skb) |
388 | { | 375 | { |
389 | __be32 now = htonl(dccp_timestamp()); | 376 | __be32 now = htonl(dccp_timestamp()); |
390 | /* yes this will overflow but that is the point as we want a | 377 | /* yes this will overflow but that is the point as we want a |
@@ -393,8 +380,6 @@ int dccp_insert_option_timestamp(struct sk_buff *skb) | |||
393 | return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | 380 | return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); |
394 | } | 381 | } |
395 | 382 | ||
396 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); | ||
397 | |||
398 | static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, | 383 | static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, |
399 | struct dccp_request_sock *dreq, | 384 | struct dccp_request_sock *dreq, |
400 | struct sk_buff *skb) | 385 | struct sk_buff *skb) |
diff --git a/net/dccp/output.c b/net/dccp/output.c index aadbdb58758b..a988fe9ffcba 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -304,7 +304,7 @@ void dccp_write_xmit(struct sock *sk, int block) | |||
304 | dcb->dccpd_type = DCCP_PKT_DATA; | 304 | dcb->dccpd_type = DCCP_PKT_DATA; |
305 | 305 | ||
306 | err = dccp_transmit_skb(sk, skb); | 306 | err = dccp_transmit_skb(sk, skb); |
307 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | 307 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); |
308 | if (err) | 308 | if (err) |
309 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", | 309 | DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", |
310 | err); | 310 | err); |
@@ -474,8 +474,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) | |||
474 | /* | 474 | /* |
475 | * Do all connect socket setups that can be done AF independent. | 475 | * Do all connect socket setups that can be done AF independent. |
476 | */ | 476 | */ |
477 | static inline void dccp_connect_init(struct sock *sk) | 477 | int dccp_connect(struct sock *sk) |
478 | { | 478 | { |
479 | struct sk_buff *skb; | ||
479 | struct dccp_sock *dp = dccp_sk(sk); | 480 | struct dccp_sock *dp = dccp_sk(sk); |
480 | struct dst_entry *dst = __sk_dst_get(sk); | 481 | struct dst_entry *dst = __sk_dst_get(sk); |
481 | struct inet_connection_sock *icsk = inet_csk(sk); | 482 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -485,22 +486,12 @@ static inline void dccp_connect_init(struct sock *sk) | |||
485 | 486 | ||
486 | dccp_sync_mss(sk, dst_mtu(dst)); | 487 | dccp_sync_mss(sk, dst_mtu(dst)); |
487 | 488 | ||
488 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ | ||
489 | dp->dccps_gar = dp->dccps_iss; | ||
490 | |||
491 | icsk->icsk_retransmits = 0; | ||
492 | } | ||
493 | |||
494 | int dccp_connect(struct sock *sk) | ||
495 | { | ||
496 | struct sk_buff *skb; | ||
497 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
498 | |||
499 | /* do not connect if feature negotiation setup fails */ | 489 | /* do not connect if feature negotiation setup fails */ |
500 | if (dccp_feat_finalise_settings(dccp_sk(sk))) | 490 | if (dccp_feat_finalise_settings(dccp_sk(sk))) |
501 | return -EPROTO; | 491 | return -EPROTO; |
502 | 492 | ||
503 | dccp_connect_init(sk); | 493 | /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ |
494 | dp->dccps_gar = dp->dccps_iss; | ||
504 | 495 | ||
505 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); | 496 | skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); |
506 | if (unlikely(skb == NULL)) | 497 | if (unlikely(skb == NULL)) |
@@ -516,6 +507,7 @@ int dccp_connect(struct sock *sk) | |||
516 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | 507 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); |
517 | 508 | ||
518 | /* Timer for repeating the REQUEST until an answer. */ | 509 | /* Timer for repeating the REQUEST until an answer. */ |
510 | icsk->icsk_retransmits = 0; | ||
519 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 511 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
520 | icsk->icsk_rto, DCCP_RTO_MAX); | 512 | icsk->icsk_rto, DCCP_RTO_MAX); |
521 | return 0; | 513 | return 0; |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 096250d1323b..7e5fc04eb6d1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -50,6 +50,30 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo); | |||
50 | /* the maximum queue length for tx in packets. 0 is no limit */ | 50 | /* the maximum queue length for tx in packets. 0 is no limit */ |
51 | int sysctl_dccp_tx_qlen __read_mostly = 5; | 51 | int sysctl_dccp_tx_qlen __read_mostly = 5; |
52 | 52 | ||
53 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
54 | static const char *dccp_state_name(const int state) | ||
55 | { | ||
56 | static const char *const dccp_state_names[] = { | ||
57 | [DCCP_OPEN] = "OPEN", | ||
58 | [DCCP_REQUESTING] = "REQUESTING", | ||
59 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
60 | [DCCP_LISTEN] = "LISTEN", | ||
61 | [DCCP_RESPOND] = "RESPOND", | ||
62 | [DCCP_CLOSING] = "CLOSING", | ||
63 | [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", | ||
64 | [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", | ||
65 | [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", | ||
66 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
67 | [DCCP_CLOSED] = "CLOSED", | ||
68 | }; | ||
69 | |||
70 | if (state >= DCCP_MAX_STATES) | ||
71 | return "INVALID STATE!"; | ||
72 | else | ||
73 | return dccp_state_names[state]; | ||
74 | } | ||
75 | #endif | ||
76 | |||
53 | void dccp_set_state(struct sock *sk, const int state) | 77 | void dccp_set_state(struct sock *sk, const int state) |
54 | { | 78 | { |
55 | const int oldstate = sk->sk_state; | 79 | const int oldstate = sk->sk_state; |
@@ -146,30 +170,6 @@ const char *dccp_packet_name(const int type) | |||
146 | 170 | ||
147 | EXPORT_SYMBOL_GPL(dccp_packet_name); | 171 | EXPORT_SYMBOL_GPL(dccp_packet_name); |
148 | 172 | ||
149 | const char *dccp_state_name(const int state) | ||
150 | { | ||
151 | static const char *const dccp_state_names[] = { | ||
152 | [DCCP_OPEN] = "OPEN", | ||
153 | [DCCP_REQUESTING] = "REQUESTING", | ||
154 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
155 | [DCCP_LISTEN] = "LISTEN", | ||
156 | [DCCP_RESPOND] = "RESPOND", | ||
157 | [DCCP_CLOSING] = "CLOSING", | ||
158 | [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", | ||
159 | [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", | ||
160 | [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", | ||
161 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
162 | [DCCP_CLOSED] = "CLOSED", | ||
163 | }; | ||
164 | |||
165 | if (state >= DCCP_MAX_STATES) | ||
166 | return "INVALID STATE!"; | ||
167 | else | ||
168 | return dccp_state_names[state]; | ||
169 | } | ||
170 | |||
171 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
172 | |||
173 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | 173 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) |
174 | { | 174 | { |
175 | struct dccp_sock *dp = dccp_sk(sk); | 175 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -944,7 +944,7 @@ void dccp_close(struct sock *sk, long timeout) | |||
944 | 944 | ||
945 | if (data_was_unread) { | 945 | if (data_was_unread) { |
946 | /* Unread data was tossed, send an appropriate Reset Code */ | 946 | /* Unread data was tossed, send an appropriate Reset Code */ |
947 | DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); | 947 | DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); |
948 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); | 948 | dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); |
949 | dccp_set_state(sk, DCCP_CLOSED); | 949 | dccp_set_state(sk, DCCP_CLOSED); |
950 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | 950 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { |