diff options
author | Len Brown <len.brown@intel.com> | 2005-09-03 02:44:09 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2005-09-03 02:44:09 -0400 |
commit | 129521dcc94f781890f8f668219ab79f0073ff9f (patch) | |
tree | 9f70707c88da65577f38814fe37b24c4b4957d64 /net/dccp | |
parent | 824b558bbe2c298b165cdb54c33718994dda30bb (diff) | |
parent | f505380ba7b98ec97bf25300c2a58aeae903530b (diff) |
Merge linux-2.6 into linux-acpi-2.6 test
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Kconfig | 50 | ||||
-rw-r--r-- | net/dccp/Makefile | 10 | ||||
-rw-r--r-- | net/dccp/ccid.c | 139 | ||||
-rw-r--r-- | net/dccp/ccid.h | 180 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 29 | ||||
-rw-r--r-- | net/dccp/ccids/Makefile | 5 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 1221 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 137 | ||||
-rw-r--r-- | net/dccp/ccids/lib/Makefile | 3 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 144 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 61 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 398 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 199 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 22 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 644 | ||||
-rw-r--r-- | net/dccp/dccp.h | 493 | ||||
-rw-r--r-- | net/dccp/diag.c | 71 | ||||
-rw-r--r-- | net/dccp/input.c | 600 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 1356 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 264 | ||||
-rw-r--r-- | net/dccp/options.c | 855 | ||||
-rw-r--r-- | net/dccp/output.c | 528 | ||||
-rw-r--r-- | net/dccp/proto.c | 826 | ||||
-rw-r--r-- | net/dccp/timer.c | 255 |
24 files changed, 8490 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..187ac182e24b --- /dev/null +++ b/net/dccp/Kconfig | |||
@@ -0,0 +1,50 @@ | |||
1 | menu "DCCP Configuration (EXPERIMENTAL)" | ||
2 | depends on INET && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP | ||
5 | tristate "The DCCP Protocol (EXPERIMENTAL)" | ||
6 | ---help--- | ||
7 | Datagram Congestion Control Protocol | ||
8 | |||
9 | From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. | ||
10 | |||
11 | The Datagram Congestion Control Protocol (DCCP) is a transport | ||
12 | protocol that implements bidirectional, unicast connections of | ||
13 | congestion-controlled, unreliable datagrams. It should be suitable | ||
14 | for use by applications such as streaming media, Internet telephony, | ||
15 | and on-line games | ||
16 | |||
17 | To compile this protocol support as a module, choose M here: the | ||
18 | module will be called dccp. | ||
19 | |||
20 | If in doubt, say N. | ||
21 | |||
22 | config INET_DCCP_DIAG | ||
23 | depends on IP_DCCP && INET_DIAG | ||
24 | def_tristate y if (IP_DCCP = y && INET_DIAG = y) | ||
25 | def_tristate m | ||
26 | |||
27 | source "net/dccp/ccids/Kconfig" | ||
28 | |||
29 | menu "DCCP Kernel Hacking" | ||
30 | depends on IP_DCCP && DEBUG_KERNEL=y | ||
31 | |||
32 | config IP_DCCP_DEBUG | ||
33 | bool "DCCP debug messages" | ||
34 | ---help--- | ||
35 | Only use this if you're hacking DCCP. | ||
36 | |||
37 | Just say N. | ||
38 | |||
39 | config IP_DCCP_UNLOAD_HACK | ||
40 | depends on IP_DCCP=m && IP_DCCP_CCID3=m | ||
41 | bool "DCCP control sock unload hack" | ||
42 | ---help--- | ||
43 | Enable this to be able to unload the dccp module when the it | ||
44 | has only one refcount held, the control sock one. Just execute | ||
45 | "rmmod dccp_ccid3 dccp" | ||
46 | |||
47 | Just say N. | ||
48 | endmenu | ||
49 | |||
50 | endmenu | ||
diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..fb97bb042455 --- /dev/null +++ b/net/dccp/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o | ||
2 | |||
3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ | ||
4 | timer.o | ||
5 | |||
6 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o | ||
7 | |||
8 | dccp_diag-y := diag.o | ||
9 | |||
10 | obj-y += ccids/ | ||
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * net/dccp/ccid.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * CCID infrastructure | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include "ccid.h" | ||
15 | |||
16 | static struct ccid *ccids[CCID_MAX]; | ||
17 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
18 | static atomic_t ccids_lockct = ATOMIC_INIT(0); | ||
19 | static DEFINE_SPINLOCK(ccids_lock); | ||
20 | |||
21 | /* | ||
22 | * The strategy is: modifications ccids vector are short, do not sleep and | ||
23 | * veeery rare, but read access should be free of any exclusive locks. | ||
24 | */ | ||
25 | static void ccids_write_lock(void) | ||
26 | { | ||
27 | spin_lock(&ccids_lock); | ||
28 | while (atomic_read(&ccids_lockct) != 0) { | ||
29 | spin_unlock(&ccids_lock); | ||
30 | yield(); | ||
31 | spin_lock(&ccids_lock); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | static inline void ccids_write_unlock(void) | ||
36 | { | ||
37 | spin_unlock(&ccids_lock); | ||
38 | } | ||
39 | |||
40 | static inline void ccids_read_lock(void) | ||
41 | { | ||
42 | atomic_inc(&ccids_lockct); | ||
43 | spin_unlock_wait(&ccids_lock); | ||
44 | } | ||
45 | |||
46 | static inline void ccids_read_unlock(void) | ||
47 | { | ||
48 | atomic_dec(&ccids_lockct); | ||
49 | } | ||
50 | |||
51 | #else | ||
52 | #define ccids_write_lock() do { } while(0) | ||
53 | #define ccids_write_unlock() do { } while(0) | ||
54 | #define ccids_read_lock() do { } while(0) | ||
55 | #define ccids_read_unlock() do { } while(0) | ||
56 | #endif | ||
57 | |||
58 | int ccid_register(struct ccid *ccid) | ||
59 | { | ||
60 | int err; | ||
61 | |||
62 | if (ccid->ccid_init == NULL) | ||
63 | return -1; | ||
64 | |||
65 | ccids_write_lock(); | ||
66 | err = -EEXIST; | ||
67 | if (ccids[ccid->ccid_id] == NULL) { | ||
68 | ccids[ccid->ccid_id] = ccid; | ||
69 | err = 0; | ||
70 | } | ||
71 | ccids_write_unlock(); | ||
72 | if (err == 0) | ||
73 | pr_info("CCID: Registered CCID %d (%s)\n", | ||
74 | ccid->ccid_id, ccid->ccid_name); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | EXPORT_SYMBOL_GPL(ccid_register); | ||
79 | |||
80 | int ccid_unregister(struct ccid *ccid) | ||
81 | { | ||
82 | ccids_write_lock(); | ||
83 | ccids[ccid->ccid_id] = NULL; | ||
84 | ccids_write_unlock(); | ||
85 | pr_info("CCID: Unregistered CCID %d (%s)\n", | ||
86 | ccid->ccid_id, ccid->ccid_name); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | EXPORT_SYMBOL_GPL(ccid_unregister); | ||
91 | |||
92 | struct ccid *ccid_init(unsigned char id, struct sock *sk) | ||
93 | { | ||
94 | struct ccid *ccid; | ||
95 | |||
96 | #ifdef CONFIG_KMOD | ||
97 | if (ccids[id] == NULL) | ||
98 | request_module("net-dccp-ccid-%d", id); | ||
99 | #endif | ||
100 | ccids_read_lock(); | ||
101 | |||
102 | ccid = ccids[id]; | ||
103 | if (ccid == NULL) | ||
104 | goto out; | ||
105 | |||
106 | if (!try_module_get(ccid->ccid_owner)) | ||
107 | goto out_err; | ||
108 | |||
109 | if (ccid->ccid_init(sk) != 0) | ||
110 | goto out_module_put; | ||
111 | out: | ||
112 | ccids_read_unlock(); | ||
113 | return ccid; | ||
114 | out_module_put: | ||
115 | module_put(ccid->ccid_owner); | ||
116 | out_err: | ||
117 | ccid = NULL; | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | EXPORT_SYMBOL_GPL(ccid_init); | ||
122 | |||
123 | void ccid_exit(struct ccid *ccid, struct sock *sk) | ||
124 | { | ||
125 | if (ccid == NULL) | ||
126 | return; | ||
127 | |||
128 | ccids_read_lock(); | ||
129 | |||
130 | if (ccids[ccid->ccid_id] != NULL) { | ||
131 | if (ccid->ccid_exit != NULL) | ||
132 | ccid->ccid_exit(sk); | ||
133 | module_put(ccid->ccid_owner); | ||
134 | } | ||
135 | |||
136 | ccids_read_unlock(); | ||
137 | } | ||
138 | |||
139 | EXPORT_SYMBOL_GPL(ccid_exit); | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..962f1e9e2f7e --- /dev/null +++ b/net/dccp/ccid.h | |||
@@ -0,0 +1,180 @@ | |||
1 | #ifndef _CCID_H | ||
2 | #define _CCID_H | ||
3 | /* | ||
4 | * net/dccp/ccid.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * | ||
9 | * CCID infrastructure | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License version 2 as | ||
13 | * published by the Free Software Foundation. | ||
14 | */ | ||
15 | |||
16 | #include <net/sock.h> | ||
17 | #include <linux/dccp.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | #define CCID_MAX 255 | ||
22 | |||
23 | struct ccid { | ||
24 | unsigned char ccid_id; | ||
25 | const char *ccid_name; | ||
26 | struct module *ccid_owner; | ||
27 | int (*ccid_init)(struct sock *sk); | ||
28 | void (*ccid_exit)(struct sock *sk); | ||
29 | int (*ccid_hc_rx_init)(struct sock *sk); | ||
30 | int (*ccid_hc_tx_init)(struct sock *sk); | ||
31 | void (*ccid_hc_rx_exit)(struct sock *sk); | ||
32 | void (*ccid_hc_tx_exit)(struct sock *sk); | ||
33 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, | ||
34 | struct sk_buff *skb); | ||
35 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | ||
36 | unsigned char option, | ||
37 | unsigned char len, u16 idx, | ||
38 | unsigned char* value); | ||
39 | void (*ccid_hc_rx_insert_options)(struct sock *sk, | ||
40 | struct sk_buff *skb); | ||
41 | void (*ccid_hc_tx_insert_options)(struct sock *sk, | ||
42 | struct sk_buff *skb); | ||
43 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, | ||
44 | struct sk_buff *skb); | ||
45 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | ||
46 | unsigned char option, | ||
47 | unsigned char len, u16 idx, | ||
48 | unsigned char* value); | ||
49 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | ||
50 | struct sk_buff *skb, int len); | ||
51 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, | ||
52 | int len); | ||
53 | void (*ccid_hc_rx_get_info)(struct sock *sk, | ||
54 | struct tcp_info *info); | ||
55 | void (*ccid_hc_tx_get_info)(struct sock *sk, | ||
56 | struct tcp_info *info); | ||
57 | }; | ||
58 | |||
59 | extern int ccid_register(struct ccid *ccid); | ||
60 | extern int ccid_unregister(struct ccid *ccid); | ||
61 | |||
62 | extern struct ccid *ccid_init(unsigned char id, struct sock *sk); | ||
63 | extern void ccid_exit(struct ccid *ccid, struct sock *sk); | ||
64 | |||
65 | static inline void __ccid_get(struct ccid *ccid) | ||
66 | { | ||
67 | __module_get(ccid->ccid_owner); | ||
68 | } | ||
69 | |||
70 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | ||
71 | struct sk_buff *skb, int len) | ||
72 | { | ||
73 | int rc = 0; | ||
74 | if (ccid->ccid_hc_tx_send_packet != NULL) | ||
75 | rc = ccid->ccid_hc_tx_send_packet(sk, skb, len); | ||
76 | return rc; | ||
77 | } | ||
78 | |||
79 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | ||
80 | int more, int len) | ||
81 | { | ||
82 | if (ccid->ccid_hc_tx_packet_sent != NULL) | ||
83 | ccid->ccid_hc_tx_packet_sent(sk, more, len); | ||
84 | } | ||
85 | |||
86 | static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) | ||
87 | { | ||
88 | int rc = 0; | ||
89 | if (ccid->ccid_hc_rx_init != NULL) | ||
90 | rc = ccid->ccid_hc_rx_init(sk); | ||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) | ||
95 | { | ||
96 | int rc = 0; | ||
97 | if (ccid->ccid_hc_tx_init != NULL) | ||
98 | rc = ccid->ccid_hc_tx_init(sk); | ||
99 | return rc; | ||
100 | } | ||
101 | |||
102 | static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) | ||
103 | { | ||
104 | if (ccid->ccid_hc_rx_exit != NULL && | ||
105 | dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) | ||
106 | ccid->ccid_hc_rx_exit(sk); | ||
107 | } | ||
108 | |||
109 | static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) | ||
110 | { | ||
111 | if (ccid->ccid_hc_tx_exit != NULL && | ||
112 | dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) | ||
113 | ccid->ccid_hc_tx_exit(sk); | ||
114 | } | ||
115 | |||
116 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
117 | struct sk_buff *skb) | ||
118 | { | ||
119 | if (ccid->ccid_hc_rx_packet_recv != NULL) | ||
120 | ccid->ccid_hc_rx_packet_recv(sk, skb); | ||
121 | } | ||
122 | |||
123 | static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
124 | struct sk_buff *skb) | ||
125 | { | ||
126 | if (ccid->ccid_hc_tx_packet_recv != NULL) | ||
127 | ccid->ccid_hc_tx_packet_recv(sk, skb); | ||
128 | } | ||
129 | |||
130 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | ||
131 | unsigned char option, | ||
132 | unsigned char len, u16 idx, | ||
133 | unsigned char* value) | ||
134 | { | ||
135 | int rc = 0; | ||
136 | if (ccid->ccid_hc_tx_parse_options != NULL) | ||
137 | rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, | ||
138 | value); | ||
139 | return rc; | ||
140 | } | ||
141 | |||
142 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | ||
143 | unsigned char option, | ||
144 | unsigned char len, u16 idx, | ||
145 | unsigned char* value) | ||
146 | { | ||
147 | int rc = 0; | ||
148 | if (ccid->ccid_hc_rx_parse_options != NULL) | ||
149 | rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); | ||
150 | return rc; | ||
151 | } | ||
152 | |||
153 | static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, | ||
154 | struct sk_buff *skb) | ||
155 | { | ||
156 | if (ccid->ccid_hc_tx_insert_options != NULL) | ||
157 | ccid->ccid_hc_tx_insert_options(sk, skb); | ||
158 | } | ||
159 | |||
160 | static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | ||
161 | struct sk_buff *skb) | ||
162 | { | ||
163 | if (ccid->ccid_hc_rx_insert_options != NULL) | ||
164 | ccid->ccid_hc_rx_insert_options(sk, skb); | ||
165 | } | ||
166 | |||
167 | static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, | ||
168 | struct tcp_info *info) | ||
169 | { | ||
170 | if (ccid->ccid_hc_rx_get_info != NULL) | ||
171 | ccid->ccid_hc_rx_get_info(sk, info); | ||
172 | } | ||
173 | |||
174 | static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, | ||
175 | struct tcp_info *info) | ||
176 | { | ||
177 | if (ccid->ccid_hc_tx_get_info != NULL) | ||
178 | ccid->ccid_hc_tx_get_info(sk, info); | ||
179 | } | ||
180 | #endif /* _CCID_H */ | ||
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..7684d83946a4 --- /dev/null +++ b/net/dccp/ccids/Kconfig | |||
@@ -0,0 +1,29 @@ | |||
1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | ||
2 | depends on IP_DCCP && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP_CCID3 | ||
5 | tristate "CCID3 (TFRC) (EXPERIMENTAL)" | ||
6 | depends on IP_DCCP | ||
7 | ---help--- | ||
8 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based | ||
9 | rate-controlled congestion control mechanism. TFRC is designed to | ||
10 | be reasonably fair when competing for bandwidth with TCP-like flows, | ||
11 | where a flow is "reasonably fair" if its sending rate is generally | ||
12 | within a factor of two of the sending rate of a TCP flow under the | ||
13 | same conditions. However, TFRC has a much lower variation of | ||
14 | throughput over time compared with TCP, which makes CCID 3 more | ||
15 | suitable than CCID 2 for applications such streaming media where a | ||
16 | relatively smooth sending rate is of importance. | ||
17 | |||
18 | CCID 3 is further described in [CCID 3 PROFILE]. The TFRC | ||
19 | congestion control algorithms were initially described in RFC 3448. | ||
20 | |||
21 | This text was extracted from draft-ietf-dccp-spec-11.txt. | ||
22 | |||
23 | If in doubt, say M. | ||
24 | |||
25 | config IP_DCCP_TFRC_LIB | ||
26 | depends on IP_DCCP_CCID3 | ||
27 | def_tristate IP_DCCP_CCID3 | ||
28 | |||
29 | endmenu | ||
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..956f79f50743 --- /dev/null +++ b/net/dccp/ccids/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o | ||
2 | |||
3 | dccp_ccid3-y := ccid3.o | ||
4 | |||
5 | obj-y += lib/ | ||
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..7bf3b3a91e97 --- /dev/null +++ b/net/dccp/ccids/ccid3.c | |||
@@ -0,0 +1,1221 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * | ||
7 | * An implementation of the DCCP protocol | ||
8 | * | ||
9 | * This code has been developed by the University of Waikato WAND | ||
10 | * research group. For further information please see http://www.wand.net.nz/ | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #include <linux/config.h> | ||
38 | #include "../ccid.h" | ||
39 | #include "../dccp.h" | ||
40 | #include "lib/packet_history.h" | ||
41 | #include "lib/loss_interval.h" | ||
42 | #include "lib/tfrc.h" | ||
43 | #include "ccid3.h" | ||
44 | |||
45 | /* | ||
46 | * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. | ||
47 | */ | ||
48 | static inline u32 usecs_div(const u32 a, const u32 b) | ||
49 | { | ||
50 | const u32 tmp = a * (USEC_PER_SEC / 10); | ||
51 | return b > 20 ? tmp / (b / 10) : tmp; | ||
52 | } | ||
53 | |||
54 | static int ccid3_debug; | ||
55 | |||
56 | #ifdef CCID3_DEBUG | ||
57 | #define ccid3_pr_debug(format, a...) \ | ||
58 | do { if (ccid3_debug) \ | ||
59 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ | ||
60 | } while (0) | ||
61 | #else | ||
62 | #define ccid3_pr_debug(format, a...) | ||
63 | #endif | ||
64 | |||
65 | static struct dccp_tx_hist *ccid3_tx_hist; | ||
66 | static struct dccp_rx_hist *ccid3_rx_hist; | ||
67 | static struct dccp_li_hist *ccid3_li_hist; | ||
68 | |||
69 | static int ccid3_init(struct sock *sk) | ||
70 | { | ||
71 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static void ccid3_exit(struct sock *sk) | ||
76 | { | ||
77 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
78 | } | ||
79 | |||
80 | /* TFRC sender states */ | ||
81 | enum ccid3_hc_tx_states { | ||
82 | TFRC_SSTATE_NO_SENT = 1, | ||
83 | TFRC_SSTATE_NO_FBACK, | ||
84 | TFRC_SSTATE_FBACK, | ||
85 | TFRC_SSTATE_TERM, | ||
86 | }; | ||
87 | |||
88 | #ifdef CCID3_DEBUG | ||
89 | static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | ||
90 | { | ||
91 | static char *ccid3_state_names[] = { | ||
92 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | ||
93 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | ||
94 | [TFRC_SSTATE_FBACK] = "FBACK", | ||
95 | [TFRC_SSTATE_TERM] = "TERM", | ||
96 | }; | ||
97 | |||
98 | return ccid3_state_names[state]; | ||
99 | } | ||
100 | #endif | ||
101 | |||
102 | static inline void ccid3_hc_tx_set_state(struct sock *sk, | ||
103 | enum ccid3_hc_tx_states state) | ||
104 | { | ||
105 | struct dccp_sock *dp = dccp_sk(sk); | ||
106 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
107 | enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; | ||
108 | |||
109 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
110 | dccp_role(sk), sk, ccid3_tx_state_name(oldstate), | ||
111 | ccid3_tx_state_name(state)); | ||
112 | WARN_ON(state == oldstate); | ||
113 | hctx->ccid3hctx_state = state; | ||
114 | } | ||
115 | |||
116 | /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ | ||
117 | static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) | ||
118 | { | ||
119 | /* | ||
120 | * If no feedback spec says t_ipi is 1 second (set elsewhere and then | ||
121 | * doubles after every no feedback timer (separate function) | ||
122 | */ | ||
123 | if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) | ||
124 | hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, | ||
125 | hctx->ccid3hctx_x); | ||
126 | } | ||
127 | |||
128 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | ||
129 | static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) | ||
130 | { | ||
131 | hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, | ||
132 | TFRC_OPSYS_HALF_TIME_GRAN); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Update X by | ||
137 | * If (p > 0) | ||
138 | * x_calc = calcX(s, R, p); | ||
139 | * X = max(min(X_calc, 2 * X_recv), s / t_mbi); | ||
140 | * Else | ||
141 | * If (now - tld >= R) | ||
142 | * X = max(min(2 * X, 2 * X_recv), s / R); | ||
143 | * tld = now; | ||
144 | */ | ||
145 | static void ccid3_hc_tx_update_x(struct sock *sk) | ||
146 | { | ||
147 | struct dccp_sock *dp = dccp_sk(sk); | ||
148 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
149 | |||
150 | /* To avoid large error in calcX */ | ||
151 | if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { | ||
152 | hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, | ||
153 | hctx->ccid3hctx_rtt, | ||
154 | hctx->ccid3hctx_p); | ||
155 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, | ||
156 | 2 * hctx->ccid3hctx_x_recv), | ||
157 | (hctx->ccid3hctx_s / | ||
158 | TFRC_MAX_BACK_OFF_TIME)); | ||
159 | } else { | ||
160 | struct timeval now; | ||
161 | |||
162 | do_gettimeofday(&now); | ||
163 | if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= | ||
164 | hctx->ccid3hctx_rtt) { | ||
165 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, | ||
166 | hctx->ccid3hctx_x) * 2, | ||
167 | usecs_div(hctx->ccid3hctx_s, | ||
168 | hctx->ccid3hctx_rtt)); | ||
169 | hctx->ccid3hctx_t_ld = now; | ||
170 | } | ||
171 | } | ||
172 | } | ||
173 | |||
174 | static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | ||
175 | { | ||
176 | struct sock *sk = (struct sock *)data; | ||
177 | struct dccp_sock *dp = dccp_sk(sk); | ||
178 | unsigned long next_tmout = 0; | ||
179 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
180 | |||
181 | bh_lock_sock(sk); | ||
182 | if (sock_owned_by_user(sk)) { | ||
183 | /* Try again later. */ | ||
184 | /* XXX: set some sensible MIB */ | ||
185 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
186 | jiffies + HZ / 5); | ||
187 | goto out; | ||
188 | } | ||
189 | |||
190 | ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, | ||
191 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
192 | |||
193 | switch (hctx->ccid3hctx_state) { | ||
194 | case TFRC_SSTATE_TERM: | ||
195 | goto out; | ||
196 | case TFRC_SSTATE_NO_FBACK: | ||
197 | /* Halve send rate */ | ||
198 | hctx->ccid3hctx_x /= 2; | ||
199 | if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / | ||
200 | TFRC_MAX_BACK_OFF_TIME)) | ||
201 | hctx->ccid3hctx_x = (hctx->ccid3hctx_s / | ||
202 | TFRC_MAX_BACK_OFF_TIME); | ||
203 | |||
204 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " | ||
205 | "bytes/s\n", | ||
206 | dccp_role(sk), sk, | ||
207 | ccid3_tx_state_name(hctx->ccid3hctx_state), | ||
208 | hctx->ccid3hctx_x); | ||
209 | next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, | ||
210 | hctx->ccid3hctx_x), | ||
211 | TFRC_INITIAL_TIMEOUT); | ||
212 | /* | ||
213 | * FIXME - not sure above calculation is correct. See section | ||
214 | * 5 of CCID3 11 should adjust tx_t_ipi and double that to | ||
215 | * achieve it really | ||
216 | */ | ||
217 | break; | ||
218 | case TFRC_SSTATE_FBACK: | ||
219 | /* | ||
220 | * Check if IDLE since last timeout and recv rate is less than | ||
221 | * 4 packets per RTT | ||
222 | */ | ||
223 | if (!hctx->ccid3hctx_idle || | ||
224 | (hctx->ccid3hctx_x_recv >= | ||
225 | 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { | ||
226 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", | ||
227 | dccp_role(sk), sk, | ||
228 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
229 | /* Halve sending rate */ | ||
230 | |||
231 | /* If (X_calc > 2 * X_recv) | ||
232 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | ||
233 | * Else | ||
234 | * X_recv = X_calc / 4; | ||
235 | */ | ||
236 | BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && | ||
237 | hctx->ccid3hctx_x_calc == 0); | ||
238 | |||
239 | /* check also if p is zero -> x_calc is infinity? */ | ||
240 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || | ||
241 | hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) | ||
242 | hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, | ||
243 | hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); | ||
244 | else | ||
245 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; | ||
246 | |||
247 | /* Update sending rate */ | ||
248 | ccid3_hc_tx_update_x(sk); | ||
249 | } | ||
250 | /* | ||
251 | * Schedule no feedback timer to expire in | ||
252 | * max(4 * R, 2 * s / X) | ||
253 | */ | ||
254 | next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, | ||
255 | 2 * usecs_div(hctx->ccid3hctx_s, | ||
256 | hctx->ccid3hctx_x)); | ||
257 | break; | ||
258 | default: | ||
259 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
260 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
261 | dump_stack(); | ||
262 | goto out; | ||
263 | } | ||
264 | |||
265 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
266 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
267 | hctx->ccid3hctx_idle = 1; | ||
268 | out: | ||
269 | bh_unlock_sock(sk); | ||
270 | sock_put(sk); | ||
271 | } | ||
272 | |||
273 | static int ccid3_hc_tx_send_packet(struct sock *sk, | ||
274 | struct sk_buff *skb, int len) | ||
275 | { | ||
276 | struct dccp_sock *dp = dccp_sk(sk); | ||
277 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
278 | struct dccp_tx_hist_entry *new_packet; | ||
279 | struct timeval now; | ||
280 | long delay; | ||
281 | int rc = -ENOTCONN; | ||
282 | |||
283 | /* Check if pure ACK or Terminating*/ | ||
284 | |||
285 | /* | ||
286 | * XXX: We only call this function for DATA and DATAACK, on, these | ||
287 | * packets can have zero length, but why the comment about "pure ACK"? | ||
288 | */ | ||
289 | if (hctx == NULL || len == 0 || | ||
290 | hctx->ccid3hctx_state == TFRC_SSTATE_TERM) | ||
291 | goto out; | ||
292 | |||
293 | /* See if last packet allocated was not sent */ | ||
294 | new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); | ||
295 | if (new_packet == NULL || new_packet->dccphtx_sent) { | ||
296 | new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, | ||
297 | SLAB_ATOMIC); | ||
298 | |||
299 | rc = -ENOBUFS; | ||
300 | if (new_packet == NULL) { | ||
301 | ccid3_pr_debug("%s, sk=%p, not enough mem to add " | ||
302 | "to history, send refused\n", | ||
303 | dccp_role(sk), sk); | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); | ||
308 | } | ||
309 | |||
310 | do_gettimeofday(&now); | ||
311 | |||
312 | switch (hctx->ccid3hctx_state) { | ||
313 | case TFRC_SSTATE_NO_SENT: | ||
314 | ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", | ||
315 | dccp_role(sk), sk, dp->dccps_gss); | ||
316 | |||
317 | hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; | ||
318 | hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; | ||
319 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
320 | jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); | ||
321 | hctx->ccid3hctx_last_win_count = 0; | ||
322 | hctx->ccid3hctx_t_last_win_count = now; | ||
323 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | ||
324 | hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; | ||
325 | |||
326 | /* Set nominal send time for initial packet */ | ||
327 | hctx->ccid3hctx_t_nom = now; | ||
328 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
329 | hctx->ccid3hctx_t_ipi); | ||
330 | ccid3_calc_new_delta(hctx); | ||
331 | rc = 0; | ||
332 | break; | ||
333 | case TFRC_SSTATE_NO_FBACK: | ||
334 | case TFRC_SSTATE_FBACK: | ||
335 | delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - | ||
336 | hctx->ccid3hctx_delta); | ||
337 | ccid3_pr_debug("send_packet delay=%ld\n", delay); | ||
338 | delay /= -1000; | ||
339 | /* divide by -1000 is to convert to ms and get sign right */ | ||
340 | rc = delay > 0 ? delay : 0; | ||
341 | break; | ||
342 | default: | ||
343 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
344 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
345 | dump_stack(); | ||
346 | rc = -EINVAL; | ||
347 | break; | ||
348 | } | ||
349 | |||
350 | /* Can we send? if so add options and add to packet history */ | ||
351 | if (rc == 0) | ||
352 | new_packet->dccphtx_ccval = | ||
353 | DCCP_SKB_CB(skb)->dccpd_ccval = | ||
354 | hctx->ccid3hctx_last_win_count; | ||
355 | out: | ||
356 | return rc; | ||
357 | } | ||
358 | |||
359 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) | ||
360 | { | ||
361 | struct dccp_sock *dp = dccp_sk(sk); | ||
362 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
363 | struct timeval now; | ||
364 | |||
365 | BUG_ON(hctx == NULL); | ||
366 | |||
367 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
368 | ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", | ||
369 | dccp_role(sk), sk); | ||
370 | return; | ||
371 | } | ||
372 | |||
373 | do_gettimeofday(&now); | ||
374 | |||
375 | /* check if we have sent a data packet */ | ||
376 | if (len > 0) { | ||
377 | unsigned long quarter_rtt; | ||
378 | struct dccp_tx_hist_entry *packet; | ||
379 | |||
380 | packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); | ||
381 | if (packet == NULL) { | ||
382 | printk(KERN_CRIT "%s: packet doesn't exists in " | ||
383 | "history!\n", __FUNCTION__); | ||
384 | return; | ||
385 | } | ||
386 | if (packet->dccphtx_sent) { | ||
387 | printk(KERN_CRIT "%s: no unsent packet in history!\n", | ||
388 | __FUNCTION__); | ||
389 | return; | ||
390 | } | ||
391 | packet->dccphtx_tstamp = now; | ||
392 | packet->dccphtx_seqno = dp->dccps_gss; | ||
393 | /* | ||
394 | * Check if win_count have changed | ||
395 | * Algorithm in "8.1. Window Counter Valuer" in | ||
396 | * draft-ietf-dccp-ccid3-11.txt | ||
397 | */ | ||
398 | quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); | ||
399 | if (likely(hctx->ccid3hctx_rtt > 8)) | ||
400 | quarter_rtt /= hctx->ccid3hctx_rtt / 4; | ||
401 | |||
402 | if (quarter_rtt > 0) { | ||
403 | hctx->ccid3hctx_t_last_win_count = now; | ||
404 | hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + | ||
405 | min_t(unsigned long, quarter_rtt, 5)) % 16; | ||
406 | ccid3_pr_debug("%s, sk=%p, window changed from " | ||
407 | "%u to %u!\n", | ||
408 | dccp_role(sk), sk, | ||
409 | packet->dccphtx_ccval, | ||
410 | hctx->ccid3hctx_last_win_count); | ||
411 | } | ||
412 | |||
413 | hctx->ccid3hctx_idle = 0; | ||
414 | packet->dccphtx_rtt = hctx->ccid3hctx_rtt; | ||
415 | packet->dccphtx_sent = 1; | ||
416 | } else | ||
417 | ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", | ||
418 | dccp_role(sk), sk, dp->dccps_gss); | ||
419 | |||
420 | switch (hctx->ccid3hctx_state) { | ||
421 | case TFRC_SSTATE_NO_SENT: | ||
422 | /* if first wasn't pure ack */ | ||
423 | if (len != 0) | ||
424 | printk(KERN_CRIT "%s: %s, First packet sent is noted " | ||
425 | "as a data packet\n", | ||
426 | __FUNCTION__, dccp_role(sk)); | ||
427 | return; | ||
428 | case TFRC_SSTATE_NO_FBACK: | ||
429 | case TFRC_SSTATE_FBACK: | ||
430 | if (len > 0) { | ||
431 | hctx->ccid3hctx_t_nom = now; | ||
432 | ccid3_calc_new_t_ipi(hctx); | ||
433 | ccid3_calc_new_delta(hctx); | ||
434 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
435 | hctx->ccid3hctx_t_ipi); | ||
436 | } | ||
437 | break; | ||
438 | default: | ||
439 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
440 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
441 | dump_stack(); | ||
442 | break; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
447 | { | ||
448 | struct dccp_sock *dp = dccp_sk(sk); | ||
449 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
450 | struct ccid3_options_received *opt_recv; | ||
451 | struct dccp_tx_hist_entry *packet; | ||
452 | unsigned long next_tmout; | ||
453 | u32 t_elapsed; | ||
454 | u32 pinv; | ||
455 | u32 x_recv; | ||
456 | u32 r_sample; | ||
457 | |||
458 | if (hctx == NULL) | ||
459 | return; | ||
460 | |||
461 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
462 | ccid3_pr_debug("%s, sk=%p, received a packet when " | ||
463 | "terminating!\n", dccp_role(sk), sk); | ||
464 | return; | ||
465 | } | ||
466 | |||
467 | /* we are only interested in ACKs */ | ||
468 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | ||
469 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | ||
470 | return; | ||
471 | |||
472 | opt_recv = &hctx->ccid3hctx_options_received; | ||
473 | |||
474 | t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; | ||
475 | x_recv = opt_recv->ccid3or_receive_rate; | ||
476 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
477 | |||
478 | switch (hctx->ccid3hctx_state) { | ||
479 | case TFRC_SSTATE_NO_SENT: | ||
480 | /* FIXME: what to do here? */ | ||
481 | return; | ||
482 | case TFRC_SSTATE_NO_FBACK: | ||
483 | case TFRC_SSTATE_FBACK: | ||
484 | /* Calculate new round trip sample by | ||
485 | * R_sample = (now - t_recvdata) - t_delay */ | ||
486 | /* get t_recvdata from history */ | ||
487 | packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, | ||
488 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
489 | if (packet == NULL) { | ||
490 | ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " | ||
491 | "exist in history!\n", | ||
492 | dccp_role(sk), sk, | ||
493 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
494 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
495 | return; | ||
496 | } | ||
497 | |||
498 | /* Update RTT */ | ||
499 | r_sample = timeval_now_delta(&packet->dccphtx_tstamp); | ||
500 | /* FIXME: */ | ||
501 | // r_sample -= usecs_to_jiffies(t_elapsed * 10); | ||
502 | |||
503 | /* Update RTT estimate by | ||
504 | * If (No feedback recv) | ||
505 | * R = R_sample; | ||
506 | * Else | ||
507 | * R = q * R + (1 - q) * R_sample; | ||
508 | * | ||
509 | * q is a constant, RFC 3448 recomments 0.9 | ||
510 | */ | ||
511 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
512 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
513 | hctx->ccid3hctx_rtt = r_sample; | ||
514 | } else | ||
515 | hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + | ||
516 | r_sample / 10; | ||
517 | |||
518 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " | ||
519 | "r_sample=%us\n", dccp_role(sk), sk, | ||
520 | hctx->ccid3hctx_rtt, r_sample); | ||
521 | |||
522 | /* Update timeout interval */ | ||
523 | hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, | ||
524 | USEC_PER_SEC); | ||
525 | |||
526 | /* Update receive rate */ | ||
527 | hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ | ||
528 | |||
529 | /* Update loss event rate */ | ||
530 | if (pinv == ~0 || pinv == 0) | ||
531 | hctx->ccid3hctx_p = 0; | ||
532 | else { | ||
533 | hctx->ccid3hctx_p = 1000000 / pinv; | ||
534 | |||
535 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { | ||
536 | hctx->ccid3hctx_p = TFRC_SMALLEST_P; | ||
537 | ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", | ||
538 | dccp_role(sk), sk); | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* unschedule no feedback timer */ | ||
543 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
544 | |||
545 | /* Update sending rate */ | ||
546 | ccid3_hc_tx_update_x(sk); | ||
547 | |||
548 | /* Update next send time */ | ||
549 | timeval_sub_usecs(&hctx->ccid3hctx_t_nom, | ||
550 | hctx->ccid3hctx_t_ipi); | ||
551 | ccid3_calc_new_t_ipi(hctx); | ||
552 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
553 | hctx->ccid3hctx_t_ipi); | ||
554 | ccid3_calc_new_delta(hctx); | ||
555 | |||
556 | /* remove all packets older than the one acked from history */ | ||
557 | dccp_tx_hist_purge_older(ccid3_tx_hist, | ||
558 | &hctx->ccid3hctx_hist, packet); | ||
559 | /* | ||
560 | * As we have calculated new ipi, delta, t_nom it is possible that | ||
561 | * we now can send a packet, so wake up dccp_wait_for_ccids. | ||
562 | */ | ||
563 | sk->sk_write_space(sk); | ||
564 | |||
565 | /* | ||
566 | * Schedule no feedback timer to expire in | ||
567 | * max(4 * R, 2 * s / X) | ||
568 | */ | ||
569 | next_tmout = max(hctx->ccid3hctx_t_rto, | ||
570 | 2 * usecs_div(hctx->ccid3hctx_s, | ||
571 | hctx->ccid3hctx_x)); | ||
572 | |||
573 | ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " | ||
574 | "expire in %lu jiffies (%luus)\n", | ||
575 | dccp_role(sk), sk, | ||
576 | usecs_to_jiffies(next_tmout), next_tmout); | ||
577 | |||
578 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
579 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
580 | |||
581 | /* set idle flag */ | ||
582 | hctx->ccid3hctx_idle = 1; | ||
583 | break; | ||
584 | default: | ||
585 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
586 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
587 | dump_stack(); | ||
588 | break; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
593 | { | ||
594 | const struct dccp_sock *dp = dccp_sk(sk); | ||
595 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
596 | |||
597 | if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || | ||
598 | sk->sk_state == DCCP_PARTOPEN)) | ||
599 | return; | ||
600 | |||
601 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
602 | } | ||
603 | |||
604 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | ||
605 | unsigned char len, u16 idx, | ||
606 | unsigned char *value) | ||
607 | { | ||
608 | int rc = 0; | ||
609 | struct dccp_sock *dp = dccp_sk(sk); | ||
610 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
611 | struct ccid3_options_received *opt_recv; | ||
612 | |||
613 | if (hctx == NULL) | ||
614 | return 0; | ||
615 | |||
616 | opt_recv = &hctx->ccid3hctx_options_received; | ||
617 | |||
618 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
619 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
620 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
621 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
622 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
623 | opt_recv->ccid3or_receive_rate = 0; | ||
624 | } | ||
625 | |||
626 | switch (option) { | ||
627 | case TFRC_OPT_LOSS_EVENT_RATE: | ||
628 | if (len != 4) { | ||
629 | ccid3_pr_debug("%s, sk=%p, invalid len for " | ||
630 | "TFRC_OPT_LOSS_EVENT_RATE\n", | ||
631 | dccp_role(sk), sk); | ||
632 | rc = -EINVAL; | ||
633 | } else { | ||
634 | opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); | ||
635 | ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", | ||
636 | dccp_role(sk), sk, | ||
637 | opt_recv->ccid3or_loss_event_rate); | ||
638 | } | ||
639 | break; | ||
640 | case TFRC_OPT_LOSS_INTERVALS: | ||
641 | opt_recv->ccid3or_loss_intervals_idx = idx; | ||
642 | opt_recv->ccid3or_loss_intervals_len = len; | ||
643 | ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", | ||
644 | dccp_role(sk), sk, | ||
645 | opt_recv->ccid3or_loss_intervals_idx, | ||
646 | opt_recv->ccid3or_loss_intervals_len); | ||
647 | break; | ||
648 | case TFRC_OPT_RECEIVE_RATE: | ||
649 | if (len != 4) { | ||
650 | ccid3_pr_debug("%s, sk=%p, invalid len for " | ||
651 | "TFRC_OPT_RECEIVE_RATE\n", | ||
652 | dccp_role(sk), sk); | ||
653 | rc = -EINVAL; | ||
654 | } else { | ||
655 | opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); | ||
656 | ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", | ||
657 | dccp_role(sk), sk, | ||
658 | opt_recv->ccid3or_receive_rate); | ||
659 | } | ||
660 | break; | ||
661 | } | ||
662 | |||
663 | return rc; | ||
664 | } | ||
665 | |||
666 | static int ccid3_hc_tx_init(struct sock *sk) | ||
667 | { | ||
668 | struct dccp_sock *dp = dccp_sk(sk); | ||
669 | struct ccid3_hc_tx_sock *hctx; | ||
670 | |||
671 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
672 | |||
673 | hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), | ||
674 | gfp_any()); | ||
675 | if (hctx == NULL) | ||
676 | return -ENOMEM; | ||
677 | |||
678 | memset(hctx, 0, sizeof(*hctx)); | ||
679 | |||
680 | if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
681 | dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
682 | hctx->ccid3hctx_s = dp->dccps_packet_size; | ||
683 | else | ||
684 | hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; | ||
685 | |||
686 | /* Set transmission rate to 1 packet per second */ | ||
687 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; | ||
688 | hctx->ccid3hctx_t_rto = USEC_PER_SEC; | ||
689 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; | ||
690 | INIT_LIST_HEAD(&hctx->ccid3hctx_hist); | ||
691 | init_timer(&hctx->ccid3hctx_no_feedback_timer); | ||
692 | |||
693 | return 0; | ||
694 | } | ||
695 | |||
696 | static void ccid3_hc_tx_exit(struct sock *sk) | ||
697 | { | ||
698 | struct dccp_sock *dp = dccp_sk(sk); | ||
699 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
700 | |||
701 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
702 | BUG_ON(hctx == NULL); | ||
703 | |||
704 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
705 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
706 | |||
707 | /* Empty packet history */ | ||
708 | dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); | ||
709 | |||
710 | kfree(dp->dccps_hc_tx_ccid_private); | ||
711 | dp->dccps_hc_tx_ccid_private = NULL; | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * RX Half Connection methods | ||
716 | */ | ||
717 | |||
718 | /* TFRC receiver states */ | ||
719 | enum ccid3_hc_rx_states { | ||
720 | TFRC_RSTATE_NO_DATA = 1, | ||
721 | TFRC_RSTATE_DATA, | ||
722 | TFRC_RSTATE_TERM = 127, | ||
723 | }; | ||
724 | |||
725 | #ifdef CCID3_DEBUG | ||
726 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | ||
727 | { | ||
728 | static char *ccid3_rx_state_names[] = { | ||
729 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | ||
730 | [TFRC_RSTATE_DATA] = "DATA", | ||
731 | [TFRC_RSTATE_TERM] = "TERM", | ||
732 | }; | ||
733 | |||
734 | return ccid3_rx_state_names[state]; | ||
735 | } | ||
736 | #endif | ||
737 | |||
738 | static inline void ccid3_hc_rx_set_state(struct sock *sk, | ||
739 | enum ccid3_hc_rx_states state) | ||
740 | { | ||
741 | struct dccp_sock *dp = dccp_sk(sk); | ||
742 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
743 | enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; | ||
744 | |||
745 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
746 | dccp_role(sk), sk, ccid3_rx_state_name(oldstate), | ||
747 | ccid3_rx_state_name(state)); | ||
748 | WARN_ON(state == oldstate); | ||
749 | hcrx->ccid3hcrx_state = state; | ||
750 | } | ||
751 | |||
752 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
753 | { | ||
754 | struct dccp_sock *dp = dccp_sk(sk); | ||
755 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
756 | struct dccp_rx_hist_entry *packet; | ||
757 | struct timeval now; | ||
758 | |||
759 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
760 | |||
761 | do_gettimeofday(&now); | ||
762 | |||
763 | switch (hcrx->ccid3hcrx_state) { | ||
764 | case TFRC_RSTATE_NO_DATA: | ||
765 | hcrx->ccid3hcrx_x_recv = 0; | ||
766 | break; | ||
767 | case TFRC_RSTATE_DATA: { | ||
768 | const u32 delta = timeval_delta(&now, | ||
769 | &hcrx->ccid3hcrx_tstamp_last_feedback); | ||
770 | |||
771 | hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * | ||
772 | USEC_PER_SEC); | ||
773 | if (likely(delta > 1)) | ||
774 | hcrx->ccid3hcrx_x_recv /= delta; | ||
775 | } | ||
776 | break; | ||
777 | default: | ||
778 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
779 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
780 | dump_stack(); | ||
781 | return; | ||
782 | } | ||
783 | |||
784 | packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); | ||
785 | if (packet == NULL) { | ||
786 | printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", | ||
787 | __FUNCTION__, dccp_role(sk), sk); | ||
788 | dump_stack(); | ||
789 | return; | ||
790 | } | ||
791 | |||
792 | hcrx->ccid3hcrx_tstamp_last_feedback = now; | ||
793 | hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; | ||
794 | hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; | ||
795 | hcrx->ccid3hcrx_bytes_recv = 0; | ||
796 | |||
797 | /* Convert to multiples of 10us */ | ||
798 | hcrx->ccid3hcrx_elapsed_time = | ||
799 | timeval_delta(&now, &packet->dccphrx_tstamp) / 10; | ||
800 | if (hcrx->ccid3hcrx_p == 0) | ||
801 | hcrx->ccid3hcrx_pinv = ~0; | ||
802 | else | ||
803 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
804 | dccp_send_ack(sk); | ||
805 | } | ||
806 | |||
807 | static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
808 | { | ||
809 | const struct dccp_sock *dp = dccp_sk(sk); | ||
810 | u32 x_recv, pinv; | ||
811 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
812 | |||
813 | if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || | ||
814 | sk->sk_state == DCCP_PARTOPEN)) | ||
815 | return; | ||
816 | |||
817 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; | ||
818 | |||
819 | if (dccp_packet_without_ack(skb)) | ||
820 | return; | ||
821 | |||
822 | if (hcrx->ccid3hcrx_elapsed_time != 0) | ||
823 | dccp_insert_option_elapsed_time(sk, skb, | ||
824 | hcrx->ccid3hcrx_elapsed_time); | ||
825 | dccp_insert_option_timestamp(sk, skb); | ||
826 | x_recv = htonl(hcrx->ccid3hcrx_x_recv); | ||
827 | pinv = htonl(hcrx->ccid3hcrx_pinv); | ||
828 | dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, | ||
829 | &pinv, sizeof(pinv)); | ||
830 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, | ||
831 | &x_recv, sizeof(x_recv)); | ||
832 | } | ||
833 | |||
834 | /* calculate first loss interval | ||
835 | * | ||
836 | * returns estimated loss interval in usecs */ | ||
837 | |||
838 | static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) | ||
839 | { | ||
840 | struct dccp_sock *dp = dccp_sk(sk); | ||
841 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
842 | struct dccp_rx_hist_entry *entry, *next, *tail = NULL; | ||
843 | u32 rtt, delta, x_recv, fval, p, tmp2; | ||
844 | struct timeval tstamp = { 0, }; | ||
845 | int interval = 0; | ||
846 | int win_count = 0; | ||
847 | int step = 0; | ||
848 | u64 tmp1; | ||
849 | |||
850 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, | ||
851 | dccphrx_node) { | ||
852 | if (dccp_rx_hist_entry_data_packet(entry)) { | ||
853 | tail = entry; | ||
854 | |||
855 | switch (step) { | ||
856 | case 0: | ||
857 | tstamp = entry->dccphrx_tstamp; | ||
858 | win_count = entry->dccphrx_ccval; | ||
859 | step = 1; | ||
860 | break; | ||
861 | case 1: | ||
862 | interval = win_count - entry->dccphrx_ccval; | ||
863 | if (interval < 0) | ||
864 | interval += TFRC_WIN_COUNT_LIMIT; | ||
865 | if (interval > 4) | ||
866 | goto found; | ||
867 | break; | ||
868 | } | ||
869 | } | ||
870 | } | ||
871 | |||
872 | if (step == 0) { | ||
873 | printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " | ||
874 | "data packets!\n", | ||
875 | __FUNCTION__, dccp_role(sk), sk); | ||
876 | return ~0; | ||
877 | } | ||
878 | |||
879 | if (interval == 0) { | ||
880 | ccid3_pr_debug("%s, sk=%p, Could not find a win_count " | ||
881 | "interval > 0. Defaulting to 1\n", | ||
882 | dccp_role(sk), sk); | ||
883 | interval = 1; | ||
884 | } | ||
885 | found: | ||
886 | rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; | ||
887 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", | ||
888 | dccp_role(sk), sk, rtt); | ||
889 | if (rtt == 0) | ||
890 | rtt = 1; | ||
891 | |||
892 | delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); | ||
893 | x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; | ||
894 | if (likely(delta > 1)) | ||
895 | x_recv /= delta; | ||
896 | |||
897 | tmp1 = (u64)x_recv * (u64)rtt; | ||
898 | do_div(tmp1,10000000); | ||
899 | tmp2 = (u32)tmp1; | ||
900 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; | ||
901 | /* do not alter order above or you will get overflow on 32 bit */ | ||
902 | p = tfrc_calc_x_reverse_lookup(fval); | ||
903 | ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " | ||
904 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); | ||
905 | |||
906 | if (p == 0) | ||
907 | return ~0; | ||
908 | else | ||
909 | return 1000000 / p; | ||
910 | } | ||
911 | |||
912 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | ||
913 | { | ||
914 | struct dccp_sock *dp = dccp_sk(sk); | ||
915 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
916 | |||
917 | if (seq_loss != DCCP_MAX_SEQNO + 1 && | ||
918 | list_empty(&hcrx->ccid3hcrx_li_hist)) { | ||
919 | struct dccp_li_hist_entry *li_tail; | ||
920 | |||
921 | li_tail = dccp_li_hist_interval_new(ccid3_li_hist, | ||
922 | &hcrx->ccid3hcrx_li_hist, | ||
923 | seq_loss, win_loss); | ||
924 | if (li_tail == NULL) | ||
925 | return; | ||
926 | li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); | ||
927 | } | ||
928 | /* FIXME: find end of interval */ | ||
929 | } | ||
930 | |||
931 | static void ccid3_hc_rx_detect_loss(struct sock *sk) | ||
932 | { | ||
933 | struct dccp_sock *dp = dccp_sk(sk); | ||
934 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
935 | u8 win_loss; | ||
936 | const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, | ||
937 | &hcrx->ccid3hcrx_li_hist, | ||
938 | &win_loss); | ||
939 | |||
940 | ccid3_hc_rx_update_li(sk, seq_loss, win_loss); | ||
941 | } | ||
942 | |||
943 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
944 | { | ||
945 | struct dccp_sock *dp = dccp_sk(sk); | ||
946 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
947 | const struct dccp_options_received *opt_recv; | ||
948 | struct dccp_rx_hist_entry *packet; | ||
949 | struct timeval now; | ||
950 | u8 win_count; | ||
951 | u32 p_prev; | ||
952 | int ins; | ||
953 | |||
954 | if (hcrx == NULL) | ||
955 | return; | ||
956 | |||
957 | BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || | ||
958 | hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); | ||
959 | |||
960 | opt_recv = &dp->dccps_options_received; | ||
961 | |||
962 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | ||
963 | case DCCP_PKT_ACK: | ||
964 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | ||
965 | return; | ||
966 | case DCCP_PKT_DATAACK: | ||
967 | if (opt_recv->dccpor_timestamp_echo == 0) | ||
968 | break; | ||
969 | p_prev = hcrx->ccid3hcrx_rtt; | ||
970 | do_gettimeofday(&now); | ||
971 | hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - | ||
972 | (opt_recv->dccpor_timestamp_echo - | ||
973 | opt_recv->dccpor_elapsed_time) * 10; | ||
974 | if (p_prev != hcrx->ccid3hcrx_rtt) | ||
975 | ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", | ||
976 | dccp_role(sk), hcrx->ccid3hcrx_rtt, | ||
977 | opt_recv->dccpor_elapsed_time); | ||
978 | break; | ||
979 | case DCCP_PKT_DATA: | ||
980 | break; | ||
981 | default: | ||
982 | ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", | ||
983 | dccp_role(sk), sk, | ||
984 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
985 | return; | ||
986 | } | ||
987 | |||
988 | packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, | ||
989 | skb, SLAB_ATOMIC); | ||
990 | if (packet == NULL) { | ||
991 | ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " | ||
992 | "to history (consider it lost)!", | ||
993 | dccp_role(sk), sk); | ||
994 | return; | ||
995 | } | ||
996 | |||
997 | win_count = packet->dccphrx_ccval; | ||
998 | |||
999 | ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, | ||
1000 | &hcrx->ccid3hcrx_li_hist, packet); | ||
1001 | |||
1002 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | ||
1003 | return; | ||
1004 | |||
1005 | switch (hcrx->ccid3hcrx_state) { | ||
1006 | case TFRC_RSTATE_NO_DATA: | ||
1007 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " | ||
1008 | "feedback\n", | ||
1009 | dccp_role(sk), sk, | ||
1010 | dccp_state_name(sk->sk_state), skb); | ||
1011 | ccid3_hc_rx_send_feedback(sk); | ||
1012 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
1013 | return; | ||
1014 | case TFRC_RSTATE_DATA: | ||
1015 | hcrx->ccid3hcrx_bytes_recv += skb->len - | ||
1016 | dccp_hdr(skb)->dccph_doff * 4; | ||
1017 | if (ins != 0) | ||
1018 | break; | ||
1019 | |||
1020 | do_gettimeofday(&now); | ||
1021 | if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= | ||
1022 | hcrx->ccid3hcrx_rtt) { | ||
1023 | hcrx->ccid3hcrx_tstamp_last_ack = now; | ||
1024 | ccid3_hc_rx_send_feedback(sk); | ||
1025 | } | ||
1026 | return; | ||
1027 | default: | ||
1028 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1029 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
1030 | dump_stack(); | ||
1031 | return; | ||
1032 | } | ||
1033 | |||
1034 | /* Dealing with packet loss */ | ||
1035 | ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", | ||
1036 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); | ||
1037 | |||
1038 | ccid3_hc_rx_detect_loss(sk); | ||
1039 | p_prev = hcrx->ccid3hcrx_p; | ||
1040 | |||
1041 | /* Calculate loss event rate */ | ||
1042 | if (!list_empty(&hcrx->ccid3hcrx_li_hist)) | ||
1043 | /* Scaling up by 1000000 as fixed decimal */ | ||
1044 | hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); | ||
1045 | |||
1046 | if (hcrx->ccid3hcrx_p > p_prev) { | ||
1047 | ccid3_hc_rx_send_feedback(sk); | ||
1048 | return; | ||
1049 | } | ||
1050 | } | ||
1051 | |||
1052 | static int ccid3_hc_rx_init(struct sock *sk) | ||
1053 | { | ||
1054 | struct dccp_sock *dp = dccp_sk(sk); | ||
1055 | struct ccid3_hc_rx_sock *hcrx; | ||
1056 | |||
1057 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1058 | |||
1059 | hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), | ||
1060 | gfp_any()); | ||
1061 | if (hcrx == NULL) | ||
1062 | return -ENOMEM; | ||
1063 | |||
1064 | memset(hcrx, 0, sizeof(*hcrx)); | ||
1065 | |||
1066 | if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
1067 | dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
1068 | hcrx->ccid3hcrx_s = dp->dccps_packet_size; | ||
1069 | else | ||
1070 | hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; | ||
1071 | |||
1072 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | ||
1073 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | ||
1074 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); | ||
1075 | /* | ||
1076 | * XXX this seems to be paranoid, need to think more about this, for | ||
1077 | * now start with something different than zero. -acme | ||
1078 | */ | ||
1079 | hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; | ||
1080 | return 0; | ||
1081 | } | ||
1082 | |||
1083 | static void ccid3_hc_rx_exit(struct sock *sk) | ||
1084 | { | ||
1085 | struct dccp_sock *dp = dccp_sk(sk); | ||
1086 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1087 | |||
1088 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1089 | |||
1090 | if (hcrx == NULL) | ||
1091 | return; | ||
1092 | |||
1093 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
1094 | |||
1095 | /* Empty packet history */ | ||
1096 | dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); | ||
1097 | |||
1098 | /* Empty loss interval history */ | ||
1099 | dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); | ||
1100 | |||
1101 | kfree(dp->dccps_hc_rx_ccid_private); | ||
1102 | dp->dccps_hc_rx_ccid_private = NULL; | ||
1103 | } | ||
1104 | |||
1105 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | ||
1106 | { | ||
1107 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1108 | const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1109 | |||
1110 | if (hcrx == NULL) | ||
1111 | return; | ||
1112 | |||
1113 | info->tcpi_ca_state = hcrx->ccid3hcrx_state; | ||
1114 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | ||
1115 | info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; | ||
1116 | } | ||
1117 | |||
1118 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | ||
1119 | { | ||
1120 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1121 | const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1122 | |||
1123 | if (hctx == NULL) | ||
1124 | return; | ||
1125 | |||
1126 | info->tcpi_rto = hctx->ccid3hctx_t_rto; | ||
1127 | info->tcpi_rtt = hctx->ccid3hctx_rtt; | ||
1128 | } | ||
1129 | |||
1130 | static struct ccid ccid3 = { | ||
1131 | .ccid_id = 3, | ||
1132 | .ccid_name = "ccid3", | ||
1133 | .ccid_owner = THIS_MODULE, | ||
1134 | .ccid_init = ccid3_init, | ||
1135 | .ccid_exit = ccid3_exit, | ||
1136 | .ccid_hc_tx_init = ccid3_hc_tx_init, | ||
1137 | .ccid_hc_tx_exit = ccid3_hc_tx_exit, | ||
1138 | .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, | ||
1139 | .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, | ||
1140 | .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, | ||
1141 | .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, | ||
1142 | .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, | ||
1143 | .ccid_hc_rx_init = ccid3_hc_rx_init, | ||
1144 | .ccid_hc_rx_exit = ccid3_hc_rx_exit, | ||
1145 | .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, | ||
1146 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | ||
1147 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, | ||
1148 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, | ||
1149 | }; | ||
1150 | |||
1151 | module_param(ccid3_debug, int, 0444); | ||
1152 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | ||
1153 | |||
1154 | static __init int ccid3_module_init(void) | ||
1155 | { | ||
1156 | int rc = -ENOBUFS; | ||
1157 | |||
1158 | ccid3_rx_hist = dccp_rx_hist_new("ccid3"); | ||
1159 | if (ccid3_rx_hist == NULL) | ||
1160 | goto out; | ||
1161 | |||
1162 | ccid3_tx_hist = dccp_tx_hist_new("ccid3"); | ||
1163 | if (ccid3_tx_hist == NULL) | ||
1164 | goto out_free_rx; | ||
1165 | |||
1166 | ccid3_li_hist = dccp_li_hist_new("ccid3"); | ||
1167 | if (ccid3_li_hist == NULL) | ||
1168 | goto out_free_tx; | ||
1169 | |||
1170 | rc = ccid_register(&ccid3); | ||
1171 | if (rc != 0) | ||
1172 | goto out_free_loss_interval_history; | ||
1173 | out: | ||
1174 | return rc; | ||
1175 | |||
1176 | out_free_loss_interval_history: | ||
1177 | dccp_li_hist_delete(ccid3_li_hist); | ||
1178 | ccid3_li_hist = NULL; | ||
1179 | out_free_tx: | ||
1180 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1181 | ccid3_tx_hist = NULL; | ||
1182 | out_free_rx: | ||
1183 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1184 | ccid3_rx_hist = NULL; | ||
1185 | goto out; | ||
1186 | } | ||
1187 | module_init(ccid3_module_init); | ||
1188 | |||
1189 | static __exit void ccid3_module_exit(void) | ||
1190 | { | ||
1191 | #ifdef CONFIG_IP_DCCP_UNLOAD_HACK | ||
1192 | /* | ||
1193 | * Hack to use while developing, so that we get rid of the control | ||
1194 | * sock, that is what keeps a refcount on dccp.ko -acme | ||
1195 | */ | ||
1196 | extern void dccp_ctl_sock_exit(void); | ||
1197 | |||
1198 | dccp_ctl_sock_exit(); | ||
1199 | #endif | ||
1200 | ccid_unregister(&ccid3); | ||
1201 | |||
1202 | if (ccid3_tx_hist != NULL) { | ||
1203 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1204 | ccid3_tx_hist = NULL; | ||
1205 | } | ||
1206 | if (ccid3_rx_hist != NULL) { | ||
1207 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1208 | ccid3_rx_hist = NULL; | ||
1209 | } | ||
1210 | if (ccid3_li_hist != NULL) { | ||
1211 | dccp_li_hist_delete(ccid3_li_hist); | ||
1212 | ccid3_li_hist = NULL; | ||
1213 | } | ||
1214 | } | ||
1215 | module_exit(ccid3_module_exit); | ||
1216 | |||
1217 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " | ||
1218 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
1219 | MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); | ||
1220 | MODULE_LICENSE("GPL"); | ||
1221 | MODULE_ALIAS("net-dccp-ccid-3"); | ||
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..ee8cbace6630 --- /dev/null +++ b/net/dccp/ccids/ccid3.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | #ifndef _DCCP_CCID3_H_ | ||
37 | #define _DCCP_CCID3_H_ | ||
38 | |||
39 | #include <linux/config.h> | ||
40 | #include <linux/list.h> | ||
41 | #include <linux/time.h> | ||
42 | #include <linux/types.h> | ||
43 | |||
44 | #define TFRC_MIN_PACKET_SIZE 16 | ||
45 | #define TFRC_STD_PACKET_SIZE 256 | ||
46 | #define TFRC_MAX_PACKET_SIZE 65535 | ||
47 | |||
48 | /* Two seconds as per CCID3 spec */ | ||
49 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | ||
50 | |||
51 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
52 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) | ||
53 | |||
54 | /* In seconds */ | ||
55 | #define TFRC_MAX_BACK_OFF_TIME 64 | ||
56 | |||
57 | #define TFRC_SMALLEST_P 40 | ||
58 | |||
59 | enum ccid3_options { | ||
60 | TFRC_OPT_LOSS_EVENT_RATE = 192, | ||
61 | TFRC_OPT_LOSS_INTERVALS = 193, | ||
62 | TFRC_OPT_RECEIVE_RATE = 194, | ||
63 | }; | ||
64 | |||
65 | struct ccid3_options_received { | ||
66 | u64 ccid3or_seqno:48, | ||
67 | ccid3or_loss_intervals_idx:16; | ||
68 | u16 ccid3or_loss_intervals_len; | ||
69 | u32 ccid3or_loss_event_rate; | ||
70 | u32 ccid3or_receive_rate; | ||
71 | }; | ||
72 | |||
73 | /** struct ccid3_hc_tx_sock - CCID3 sender half connection sock | ||
74 | * | ||
75 | * @ccid3hctx_state - Sender state | ||
76 | * @ccid3hctx_x - Current sending rate | ||
77 | * @ccid3hctx_x_recv - Receive rate | ||
78 | * @ccid3hctx_x_calc - Calculated send (?) rate | ||
79 | * @ccid3hctx_s - Packet size | ||
80 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs | ||
81 | * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 | ||
82 | * @ccid3hctx_last_win_count - Last window counter sent | ||
83 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet | ||
84 | * with last_win_count value sent | ||
85 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer | ||
86 | * @ccid3hctx_idle - FIXME | ||
87 | * @ccid3hctx_t_ld - Time last doubled during slow start | ||
88 | * @ccid3hctx_t_nom - Nominal send time of next packet | ||
89 | * @ccid3hctx_t_ipi - Interpacket (send) interval | ||
90 | * @ccid3hctx_delta - Send timer delta | ||
91 | * @ccid3hctx_hist - Packet history | ||
92 | */ | ||
93 | struct ccid3_hc_tx_sock { | ||
94 | u32 ccid3hctx_x; | ||
95 | u32 ccid3hctx_x_recv; | ||
96 | u32 ccid3hctx_x_calc; | ||
97 | u16 ccid3hctx_s; | ||
98 | u32 ccid3hctx_rtt; | ||
99 | u32 ccid3hctx_p; | ||
100 | u8 ccid3hctx_state; | ||
101 | u8 ccid3hctx_last_win_count; | ||
102 | u8 ccid3hctx_idle; | ||
103 | struct timeval ccid3hctx_t_last_win_count; | ||
104 | struct timer_list ccid3hctx_no_feedback_timer; | ||
105 | struct timeval ccid3hctx_t_ld; | ||
106 | struct timeval ccid3hctx_t_nom; | ||
107 | u32 ccid3hctx_t_rto; | ||
108 | u32 ccid3hctx_t_ipi; | ||
109 | u32 ccid3hctx_delta; | ||
110 | struct list_head ccid3hctx_hist; | ||
111 | struct ccid3_options_received ccid3hctx_options_received; | ||
112 | }; | ||
113 | |||
114 | struct ccid3_hc_rx_sock { | ||
115 | u64 ccid3hcrx_seqno_last_counter:48, | ||
116 | ccid3hcrx_state:8, | ||
117 | ccid3hcrx_last_counter:4; | ||
118 | unsigned long ccid3hcrx_rtt; | ||
119 | u32 ccid3hcrx_p; | ||
120 | u32 ccid3hcrx_bytes_recv; | ||
121 | struct timeval ccid3hcrx_tstamp_last_feedback; | ||
122 | struct timeval ccid3hcrx_tstamp_last_ack; | ||
123 | struct list_head ccid3hcrx_hist; | ||
124 | struct list_head ccid3hcrx_li_hist; | ||
125 | u16 ccid3hcrx_s; | ||
126 | u32 ccid3hcrx_pinv; | ||
127 | u32 ccid3hcrx_elapsed_time; | ||
128 | u32 ccid3hcrx_x_recv; | ||
129 | }; | ||
130 | |||
131 | #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ | ||
132 | ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) | ||
133 | |||
134 | #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ | ||
135 | ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) | ||
136 | |||
137 | #endif /* _DCCP_CCID3_H_ */ | ||
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile new file mode 100644 index 000000000000..5f940a6cbaca --- /dev/null +++ b/net/dccp/ccids/lib/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o | ||
2 | |||
3 | dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o | ||
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c new file mode 100644 index 000000000000..4c01a54143ad --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -0,0 +1,144 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/lib/loss_interval.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include "loss_interval.h" | ||
18 | |||
19 | struct dccp_li_hist *dccp_li_hist_new(const char *name) | ||
20 | { | ||
21 | struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
22 | static const char dccp_li_hist_mask[] = "li_hist_%s"; | ||
23 | char *slab_name; | ||
24 | |||
25 | if (hist == NULL) | ||
26 | goto out; | ||
27 | |||
28 | slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, | ||
29 | GFP_ATOMIC); | ||
30 | if (slab_name == NULL) | ||
31 | goto out_free_hist; | ||
32 | |||
33 | sprintf(slab_name, dccp_li_hist_mask, name); | ||
34 | hist->dccplih_slab = kmem_cache_create(slab_name, | ||
35 | sizeof(struct dccp_li_hist_entry), | ||
36 | 0, SLAB_HWCACHE_ALIGN, | ||
37 | NULL, NULL); | ||
38 | if (hist->dccplih_slab == NULL) | ||
39 | goto out_free_slab_name; | ||
40 | out: | ||
41 | return hist; | ||
42 | out_free_slab_name: | ||
43 | kfree(slab_name); | ||
44 | out_free_hist: | ||
45 | kfree(hist); | ||
46 | hist = NULL; | ||
47 | goto out; | ||
48 | } | ||
49 | |||
50 | EXPORT_SYMBOL_GPL(dccp_li_hist_new); | ||
51 | |||
52 | void dccp_li_hist_delete(struct dccp_li_hist *hist) | ||
53 | { | ||
54 | const char* name = kmem_cache_name(hist->dccplih_slab); | ||
55 | |||
56 | kmem_cache_destroy(hist->dccplih_slab); | ||
57 | kfree(name); | ||
58 | kfree(hist); | ||
59 | } | ||
60 | |||
61 | EXPORT_SYMBOL_GPL(dccp_li_hist_delete); | ||
62 | |||
63 | void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) | ||
64 | { | ||
65 | struct dccp_li_hist_entry *entry, *next; | ||
66 | |||
67 | list_for_each_entry_safe(entry, next, list, dccplih_node) { | ||
68 | list_del_init(&entry->dccplih_node); | ||
69 | kmem_cache_free(hist->dccplih_slab, entry); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | EXPORT_SYMBOL_GPL(dccp_li_hist_purge); | ||
74 | |||
75 | /* Weights used to calculate loss event rate */ | ||
76 | /* | ||
77 | * These are integers as per section 8 of RFC3448. We can then divide by 4 * | ||
78 | * when we use it. | ||
79 | */ | ||
80 | static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { | ||
81 | 4, 4, 4, 4, 3, 2, 1, 1, | ||
82 | }; | ||
83 | |||
84 | u32 dccp_li_hist_calc_i_mean(struct list_head *list) | ||
85 | { | ||
86 | struct dccp_li_hist_entry *li_entry, *li_next; | ||
87 | int i = 0; | ||
88 | u32 i_tot; | ||
89 | u32 i_tot0 = 0; | ||
90 | u32 i_tot1 = 0; | ||
91 | u32 w_tot = 0; | ||
92 | |||
93 | list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { | ||
94 | if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { | ||
95 | i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; | ||
96 | w_tot += dccp_li_hist_w[i]; | ||
97 | } | ||
98 | |||
99 | if (i != 0) | ||
100 | i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; | ||
101 | |||
102 | if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) | ||
103 | break; | ||
104 | } | ||
105 | |||
106 | if (i != DCCP_LI_HIST_IVAL_F_LENGTH) | ||
107 | return 0; | ||
108 | |||
109 | i_tot = max(i_tot0, i_tot1); | ||
110 | |||
111 | /* FIXME: Why do we do this? -Ian McDonald */ | ||
112 | if (i_tot * 4 < w_tot) | ||
113 | i_tot = w_tot * 4; | ||
114 | |||
115 | return i_tot * 4 / w_tot; | ||
116 | } | ||
117 | |||
118 | EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); | ||
119 | |||
120 | struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, | ||
121 | struct list_head *list, | ||
122 | const u64 seq_loss, | ||
123 | const u8 win_loss) | ||
124 | { | ||
125 | struct dccp_li_hist_entry *tail = NULL, *entry; | ||
126 | int i; | ||
127 | |||
128 | for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { | ||
129 | entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); | ||
130 | if (entry == NULL) { | ||
131 | dccp_li_hist_purge(hist, list); | ||
132 | return NULL; | ||
133 | } | ||
134 | if (tail == NULL) | ||
135 | tail = entry; | ||
136 | list_add(&entry->dccplih_node, list); | ||
137 | } | ||
138 | |||
139 | entry->dccplih_seqno = seq_loss; | ||
140 | entry->dccplih_win_count = win_loss; | ||
141 | return tail; | ||
142 | } | ||
143 | |||
144 | EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); | ||
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h new file mode 100644 index 000000000000..13ad47ba1420 --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _DCCP_LI_HIST_ | ||
2 | #define _DCCP_LI_HIST_ | ||
3 | /* | ||
4 | * net/dccp/ccids/lib/loss_interval.h | ||
5 | * | ||
6 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/list.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/time.h> | ||
20 | |||
21 | #define DCCP_LI_HIST_IVAL_F_LENGTH 8 | ||
22 | |||
23 | struct dccp_li_hist { | ||
24 | kmem_cache_t *dccplih_slab; | ||
25 | }; | ||
26 | |||
27 | extern struct dccp_li_hist *dccp_li_hist_new(const char *name); | ||
28 | extern void dccp_li_hist_delete(struct dccp_li_hist *hist); | ||
29 | |||
30 | struct dccp_li_hist_entry { | ||
31 | struct list_head dccplih_node; | ||
32 | u64 dccplih_seqno:48, | ||
33 | dccplih_win_count:4; | ||
34 | u32 dccplih_interval; | ||
35 | }; | ||
36 | |||
37 | static inline struct dccp_li_hist_entry * | ||
38 | dccp_li_hist_entry_new(struct dccp_li_hist *hist, | ||
39 | const unsigned int __nocast prio) | ||
40 | { | ||
41 | return kmem_cache_alloc(hist->dccplih_slab, prio); | ||
42 | } | ||
43 | |||
44 | static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, | ||
45 | struct dccp_li_hist_entry *entry) | ||
46 | { | ||
47 | if (entry != NULL) | ||
48 | kmem_cache_free(hist->dccplih_slab, entry); | ||
49 | } | ||
50 | |||
51 | extern void dccp_li_hist_purge(struct dccp_li_hist *hist, | ||
52 | struct list_head *list); | ||
53 | |||
54 | extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); | ||
55 | |||
56 | extern struct dccp_li_hist_entry * | ||
57 | dccp_li_hist_interval_new(struct dccp_li_hist *hist, | ||
58 | struct list_head *list, | ||
59 | const u64 seq_loss, | ||
60 | const u8 win_loss); | ||
61 | #endif /* _DCCP_LI_HIST_ */ | ||
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c new file mode 100644 index 000000000000..d3f9d2053830 --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -0,0 +1,398 @@ | |||
1 | /* | ||
2 | * net/dccp/packet_history.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #include <linux/config.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/string.h> | ||
40 | |||
41 | #include "packet_history.h" | ||
42 | |||
43 | struct dccp_rx_hist *dccp_rx_hist_new(const char *name) | ||
44 | { | ||
45 | struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
46 | static const char dccp_rx_hist_mask[] = "rx_hist_%s"; | ||
47 | char *slab_name; | ||
48 | |||
49 | if (hist == NULL) | ||
50 | goto out; | ||
51 | |||
52 | slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, | ||
53 | GFP_ATOMIC); | ||
54 | if (slab_name == NULL) | ||
55 | goto out_free_hist; | ||
56 | |||
57 | sprintf(slab_name, dccp_rx_hist_mask, name); | ||
58 | hist->dccprxh_slab = kmem_cache_create(slab_name, | ||
59 | sizeof(struct dccp_rx_hist_entry), | ||
60 | 0, SLAB_HWCACHE_ALIGN, | ||
61 | NULL, NULL); | ||
62 | if (hist->dccprxh_slab == NULL) | ||
63 | goto out_free_slab_name; | ||
64 | out: | ||
65 | return hist; | ||
66 | out_free_slab_name: | ||
67 | kfree(slab_name); | ||
68 | out_free_hist: | ||
69 | kfree(hist); | ||
70 | hist = NULL; | ||
71 | goto out; | ||
72 | } | ||
73 | |||
74 | EXPORT_SYMBOL_GPL(dccp_rx_hist_new); | ||
75 | |||
76 | void dccp_rx_hist_delete(struct dccp_rx_hist *hist) | ||
77 | { | ||
78 | const char* name = kmem_cache_name(hist->dccprxh_slab); | ||
79 | |||
80 | kmem_cache_destroy(hist->dccprxh_slab); | ||
81 | kfree(name); | ||
82 | kfree(hist); | ||
83 | } | ||
84 | |||
85 | EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); | ||
86 | |||
87 | void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) | ||
88 | { | ||
89 | struct dccp_rx_hist_entry *entry, *next; | ||
90 | |||
91 | list_for_each_entry_safe(entry, next, list, dccphrx_node) { | ||
92 | list_del_init(&entry->dccphrx_node); | ||
93 | kmem_cache_free(hist->dccprxh_slab, entry); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); | ||
98 | |||
99 | struct dccp_rx_hist_entry * | ||
100 | dccp_rx_hist_find_data_packet(const struct list_head *list) | ||
101 | { | ||
102 | struct dccp_rx_hist_entry *entry, *packet = NULL; | ||
103 | |||
104 | list_for_each_entry(entry, list, dccphrx_node) | ||
105 | if (entry->dccphrx_type == DCCP_PKT_DATA || | ||
106 | entry->dccphrx_type == DCCP_PKT_DATAACK) { | ||
107 | packet = entry; | ||
108 | break; | ||
109 | } | ||
110 | |||
111 | return packet; | ||
112 | } | ||
113 | |||
114 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); | ||
115 | |||
116 | int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | ||
117 | struct list_head *rx_list, | ||
118 | struct list_head *li_list, | ||
119 | struct dccp_rx_hist_entry *packet) | ||
120 | { | ||
121 | struct dccp_rx_hist_entry *entry, *next, *iter; | ||
122 | u8 num_later = 0; | ||
123 | |||
124 | iter = dccp_rx_hist_head(rx_list); | ||
125 | if (iter == NULL) | ||
126 | dccp_rx_hist_add_entry(rx_list, packet); | ||
127 | else { | ||
128 | const u64 seqno = packet->dccphrx_seqno; | ||
129 | |||
130 | if (after48(seqno, iter->dccphrx_seqno)) | ||
131 | dccp_rx_hist_add_entry(rx_list, packet); | ||
132 | else { | ||
133 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
134 | num_later = 1; | ||
135 | |||
136 | list_for_each_entry_continue(iter, rx_list, | ||
137 | dccphrx_node) { | ||
138 | if (after48(seqno, iter->dccphrx_seqno)) { | ||
139 | dccp_rx_hist_add_entry(&iter->dccphrx_node, | ||
140 | packet); | ||
141 | goto trim_history; | ||
142 | } | ||
143 | |||
144 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
145 | num_later++; | ||
146 | |||
147 | if (num_later == TFRC_RECV_NUM_LATE_LOSS) { | ||
148 | dccp_rx_hist_entry_delete(hist, packet); | ||
149 | return 1; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (num_later < TFRC_RECV_NUM_LATE_LOSS) | ||
154 | dccp_rx_hist_add_entry(rx_list, packet); | ||
155 | /* | ||
156 | * FIXME: else what? should we destroy the packet | ||
157 | * like above? | ||
158 | */ | ||
159 | } | ||
160 | } | ||
161 | |||
162 | trim_history: | ||
163 | /* | ||
164 | * Trim history (remove all packets after the NUM_LATE_LOSS + 1 | ||
165 | * data packets) | ||
166 | */ | ||
167 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | ||
168 | |||
169 | if (!list_empty(li_list)) { | ||
170 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
171 | if (num_later == 0) { | ||
172 | list_del_init(&entry->dccphrx_node); | ||
173 | dccp_rx_hist_entry_delete(hist, entry); | ||
174 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
175 | --num_later; | ||
176 | } | ||
177 | } else { | ||
178 | int step = 0; | ||
179 | u8 win_count = 0; /* Not needed, but lets shut up gcc */ | ||
180 | int tmp; | ||
181 | /* | ||
182 | * We have no loss interval history so we need at least one | ||
183 | * rtt:s of data packets to approximate rtt. | ||
184 | */ | ||
185 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
186 | if (num_later == 0) { | ||
187 | switch (step) { | ||
188 | case 0: | ||
189 | step = 1; | ||
190 | /* OK, find next data packet */ | ||
191 | num_later = 1; | ||
192 | break; | ||
193 | case 1: | ||
194 | step = 2; | ||
195 | /* OK, find next data packet */ | ||
196 | num_later = 1; | ||
197 | win_count = entry->dccphrx_ccval; | ||
198 | break; | ||
199 | case 2: | ||
200 | tmp = win_count - entry->dccphrx_ccval; | ||
201 | if (tmp < 0) | ||
202 | tmp += TFRC_WIN_COUNT_LIMIT; | ||
203 | if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { | ||
204 | /* | ||
205 | * We have found a packet older | ||
206 | * than one rtt remove the rest | ||
207 | */ | ||
208 | step = 3; | ||
209 | } else /* OK, find next data packet */ | ||
210 | num_later = 1; | ||
211 | break; | ||
212 | case 3: | ||
213 | list_del_init(&entry->dccphrx_node); | ||
214 | dccp_rx_hist_entry_delete(hist, entry); | ||
215 | break; | ||
216 | } | ||
217 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
218 | --num_later; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); | ||
226 | |||
227 | u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | ||
228 | struct list_head *li_list, u8 *win_loss) | ||
229 | { | ||
230 | struct dccp_rx_hist_entry *entry, *next, *packet; | ||
231 | struct dccp_rx_hist_entry *a_loss = NULL; | ||
232 | struct dccp_rx_hist_entry *b_loss = NULL; | ||
233 | u64 seq_loss = DCCP_MAX_SEQNO + 1; | ||
234 | u8 num_later = TFRC_RECV_NUM_LATE_LOSS; | ||
235 | |||
236 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
237 | if (num_later == 0) { | ||
238 | b_loss = entry; | ||
239 | break; | ||
240 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
241 | --num_later; | ||
242 | } | ||
243 | |||
244 | if (b_loss == NULL) | ||
245 | goto out; | ||
246 | |||
247 | num_later = 1; | ||
248 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
249 | if (num_later == 0) { | ||
250 | a_loss = entry; | ||
251 | break; | ||
252 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
253 | --num_later; | ||
254 | } | ||
255 | |||
256 | if (a_loss == NULL) { | ||
257 | if (list_empty(li_list)) { | ||
258 | /* no loss event have occured yet */ | ||
259 | LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " | ||
260 | "comparing to initial seqno\n", | ||
261 | __FUNCTION__); | ||
262 | goto out; | ||
263 | } else { | ||
264 | LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", | ||
265 | __FUNCTION__); | ||
266 | goto out; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* Locate a lost data packet */ | ||
271 | entry = packet = b_loss; | ||
272 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
273 | u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, | ||
274 | packet->dccphrx_seqno); | ||
275 | |||
276 | if (delta != 0) { | ||
277 | if (dccp_rx_hist_entry_data_packet(packet)) | ||
278 | --delta; | ||
279 | /* | ||
280 | * FIXME: check this, probably this % usage is because | ||
281 | * in earlier drafts the ndp count was just 8 bits | ||
282 | * long, but now it cam be up to 24 bits long. | ||
283 | */ | ||
284 | #if 0 | ||
285 | if (delta % DCCP_NDP_LIMIT != | ||
286 | (packet->dccphrx_ndp - | ||
287 | entry->dccphrx_ndp) % DCCP_NDP_LIMIT) | ||
288 | #endif | ||
289 | if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { | ||
290 | seq_loss = entry->dccphrx_seqno; | ||
291 | dccp_inc_seqno(&seq_loss); | ||
292 | } | ||
293 | } | ||
294 | packet = entry; | ||
295 | if (packet == a_loss) | ||
296 | break; | ||
297 | } | ||
298 | out: | ||
299 | if (seq_loss != DCCP_MAX_SEQNO + 1) | ||
300 | *win_loss = a_loss->dccphrx_ccval; | ||
301 | else | ||
302 | *win_loss = 0; /* Paranoia */ | ||
303 | |||
304 | return seq_loss; | ||
305 | } | ||
306 | |||
307 | EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); | ||
308 | |||
309 | struct dccp_tx_hist *dccp_tx_hist_new(const char *name) | ||
310 | { | ||
311 | struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
312 | static const char dccp_tx_hist_mask[] = "tx_hist_%s"; | ||
313 | char *slab_name; | ||
314 | |||
315 | if (hist == NULL) | ||
316 | goto out; | ||
317 | |||
318 | slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, | ||
319 | GFP_ATOMIC); | ||
320 | if (slab_name == NULL) | ||
321 | goto out_free_hist; | ||
322 | |||
323 | sprintf(slab_name, dccp_tx_hist_mask, name); | ||
324 | hist->dccptxh_slab = kmem_cache_create(slab_name, | ||
325 | sizeof(struct dccp_tx_hist_entry), | ||
326 | 0, SLAB_HWCACHE_ALIGN, | ||
327 | NULL, NULL); | ||
328 | if (hist->dccptxh_slab == NULL) | ||
329 | goto out_free_slab_name; | ||
330 | out: | ||
331 | return hist; | ||
332 | out_free_slab_name: | ||
333 | kfree(slab_name); | ||
334 | out_free_hist: | ||
335 | kfree(hist); | ||
336 | hist = NULL; | ||
337 | goto out; | ||
338 | } | ||
339 | |||
340 | EXPORT_SYMBOL_GPL(dccp_tx_hist_new); | ||
341 | |||
342 | void dccp_tx_hist_delete(struct dccp_tx_hist *hist) | ||
343 | { | ||
344 | const char* name = kmem_cache_name(hist->dccptxh_slab); | ||
345 | |||
346 | kmem_cache_destroy(hist->dccptxh_slab); | ||
347 | kfree(name); | ||
348 | kfree(hist); | ||
349 | } | ||
350 | |||
351 | EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); | ||
352 | |||
353 | struct dccp_tx_hist_entry * | ||
354 | dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) | ||
355 | { | ||
356 | struct dccp_tx_hist_entry *packet = NULL, *entry; | ||
357 | |||
358 | list_for_each_entry(entry, list, dccphtx_node) | ||
359 | if (entry->dccphtx_seqno == seq) { | ||
360 | packet = entry; | ||
361 | break; | ||
362 | } | ||
363 | |||
364 | return packet; | ||
365 | } | ||
366 | |||
367 | EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); | ||
368 | |||
369 | void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | ||
370 | struct list_head *list, | ||
371 | struct dccp_tx_hist_entry *packet) | ||
372 | { | ||
373 | struct dccp_tx_hist_entry *next; | ||
374 | |||
375 | list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { | ||
376 | list_del_init(&packet->dccphtx_node); | ||
377 | dccp_tx_hist_entry_delete(hist, packet); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); | ||
382 | |||
383 | void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) | ||
384 | { | ||
385 | struct dccp_tx_hist_entry *entry, *next; | ||
386 | |||
387 | list_for_each_entry_safe(entry, next, list, dccphtx_node) { | ||
388 | list_del_init(&entry->dccphtx_node); | ||
389 | dccp_tx_hist_entry_delete(hist, entry); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); | ||
394 | |||
395 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " | ||
396 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
397 | MODULE_DESCRIPTION("DCCP TFRC library"); | ||
398 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h new file mode 100644 index 000000000000..fb90a91aa93d --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -0,0 +1,199 @@ | |||
1 | /* | ||
2 | * net/dccp/packet_history.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #ifndef _DCCP_PKT_HIST_ | ||
38 | #define _DCCP_PKT_HIST_ | ||
39 | |||
40 | #include <linux/config.h> | ||
41 | #include <linux/list.h> | ||
42 | #include <linux/slab.h> | ||
43 | #include <linux/time.h> | ||
44 | |||
45 | #include "../../dccp.h" | ||
46 | |||
47 | /* Number of later packets received before one is considered lost */ | ||
48 | #define TFRC_RECV_NUM_LATE_LOSS 3 | ||
49 | |||
50 | #define TFRC_WIN_COUNT_PER_RTT 4 | ||
51 | #define TFRC_WIN_COUNT_LIMIT 16 | ||
52 | |||
53 | struct dccp_tx_hist_entry { | ||
54 | struct list_head dccphtx_node; | ||
55 | u64 dccphtx_seqno:48, | ||
56 | dccphtx_ccval:4, | ||
57 | dccphtx_sent:1; | ||
58 | u32 dccphtx_rtt; | ||
59 | struct timeval dccphtx_tstamp; | ||
60 | }; | ||
61 | |||
62 | struct dccp_rx_hist_entry { | ||
63 | struct list_head dccphrx_node; | ||
64 | u64 dccphrx_seqno:48, | ||
65 | dccphrx_ccval:4, | ||
66 | dccphrx_type:4; | ||
67 | u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ | ||
68 | struct timeval dccphrx_tstamp; | ||
69 | }; | ||
70 | |||
71 | struct dccp_tx_hist { | ||
72 | kmem_cache_t *dccptxh_slab; | ||
73 | }; | ||
74 | |||
75 | extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); | ||
76 | extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); | ||
77 | |||
78 | struct dccp_rx_hist { | ||
79 | kmem_cache_t *dccprxh_slab; | ||
80 | }; | ||
81 | |||
82 | extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); | ||
83 | extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); | ||
84 | extern struct dccp_rx_hist_entry * | ||
85 | dccp_rx_hist_find_data_packet(const struct list_head *list); | ||
86 | |||
87 | static inline struct dccp_tx_hist_entry * | ||
88 | dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, | ||
89 | const unsigned int __nocast prio) | ||
90 | { | ||
91 | struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, | ||
92 | prio); | ||
93 | |||
94 | if (entry != NULL) | ||
95 | entry->dccphtx_sent = 0; | ||
96 | |||
97 | return entry; | ||
98 | } | ||
99 | |||
100 | static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, | ||
101 | struct dccp_tx_hist_entry *entry) | ||
102 | { | ||
103 | if (entry != NULL) | ||
104 | kmem_cache_free(hist->dccptxh_slab, entry); | ||
105 | } | ||
106 | |||
107 | extern struct dccp_tx_hist_entry * | ||
108 | dccp_tx_hist_find_entry(const struct list_head *list, | ||
109 | const u64 seq); | ||
110 | |||
111 | static inline void dccp_tx_hist_add_entry(struct list_head *list, | ||
112 | struct dccp_tx_hist_entry *entry) | ||
113 | { | ||
114 | list_add(&entry->dccphtx_node, list); | ||
115 | } | ||
116 | |||
117 | extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | ||
118 | struct list_head *list, | ||
119 | struct dccp_tx_hist_entry *next); | ||
120 | |||
121 | extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, | ||
122 | struct list_head *list); | ||
123 | |||
124 | static inline struct dccp_tx_hist_entry * | ||
125 | dccp_tx_hist_head(struct list_head *list) | ||
126 | { | ||
127 | struct dccp_tx_hist_entry *head = NULL; | ||
128 | |||
129 | if (!list_empty(list)) | ||
130 | head = list_entry(list->next, struct dccp_tx_hist_entry, | ||
131 | dccphtx_node); | ||
132 | return head; | ||
133 | } | ||
134 | |||
135 | static inline struct dccp_rx_hist_entry * | ||
136 | dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, | ||
137 | const u32 ndp, | ||
138 | const struct sk_buff *skb, | ||
139 | const unsigned int __nocast prio) | ||
140 | { | ||
141 | struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, | ||
142 | prio); | ||
143 | |||
144 | if (entry != NULL) { | ||
145 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
146 | |||
147 | entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
148 | entry->dccphrx_ccval = dh->dccph_ccval; | ||
149 | entry->dccphrx_type = dh->dccph_type; | ||
150 | entry->dccphrx_ndp = ndp; | ||
151 | do_gettimeofday(&(entry->dccphrx_tstamp)); | ||
152 | } | ||
153 | |||
154 | return entry; | ||
155 | } | ||
156 | |||
157 | static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, | ||
158 | struct dccp_rx_hist_entry *entry) | ||
159 | { | ||
160 | if (entry != NULL) | ||
161 | kmem_cache_free(hist->dccprxh_slab, entry); | ||
162 | } | ||
163 | |||
164 | extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, | ||
165 | struct list_head *list); | ||
166 | |||
167 | static inline void dccp_rx_hist_add_entry(struct list_head *list, | ||
168 | struct dccp_rx_hist_entry *entry) | ||
169 | { | ||
170 | list_add(&entry->dccphrx_node, list); | ||
171 | } | ||
172 | |||
173 | static inline struct dccp_rx_hist_entry * | ||
174 | dccp_rx_hist_head(struct list_head *list) | ||
175 | { | ||
176 | struct dccp_rx_hist_entry *head = NULL; | ||
177 | |||
178 | if (!list_empty(list)) | ||
179 | head = list_entry(list->next, struct dccp_rx_hist_entry, | ||
180 | dccphrx_node); | ||
181 | return head; | ||
182 | } | ||
183 | |||
184 | static inline int | ||
185 | dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) | ||
186 | { | ||
187 | return entry->dccphrx_type == DCCP_PKT_DATA || | ||
188 | entry->dccphrx_type == DCCP_PKT_DATAACK; | ||
189 | } | ||
190 | |||
191 | extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | ||
192 | struct list_head *rx_list, | ||
193 | struct list_head *li_list, | ||
194 | struct dccp_rx_hist_entry *packet); | ||
195 | |||
196 | extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | ||
197 | struct list_head *li_list, u8 *win_loss); | ||
198 | |||
199 | #endif /* _DCCP_PKT_HIST_ */ | ||
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h new file mode 100644 index 000000000000..130c4c40cfe3 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef _TFRC_H_ | ||
2 | #define _TFRC_H_ | ||
3 | /* | ||
4 | * net/dccp/ccids/lib/tfrc.h | ||
5 | * | ||
6 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
9 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #include <linux/types.h> | ||
18 | |||
19 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | ||
20 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | ||
21 | |||
22 | #endif /* _TFRC_H_ */ | ||
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c new file mode 100644 index 000000000000..d2b5933b4510 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc_equation.c | |||
@@ -0,0 +1,644 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/lib/tfrc_equation.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
7 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | #include <asm/bug.h> | ||
19 | #include <asm/div64.h> | ||
20 | |||
21 | #include "tfrc.h" | ||
22 | |||
23 | #define TFRC_CALC_X_ARRSIZE 500 | ||
24 | |||
25 | #define TFRC_CALC_X_SPLIT 50000 | ||
26 | /* equivalent to 0.05 */ | ||
27 | |||
28 | static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { | ||
29 | { 37172, 8172 }, | ||
30 | { 53499, 11567 }, | ||
31 | { 66664, 14180 }, | ||
32 | { 78298, 16388 }, | ||
33 | { 89021, 18339 }, | ||
34 | { 99147, 20108 }, | ||
35 | { 108858, 21738 }, | ||
36 | { 118273, 23260 }, | ||
37 | { 127474, 24693 }, | ||
38 | { 136520, 26052 }, | ||
39 | { 145456, 27348 }, | ||
40 | { 154316, 28589 }, | ||
41 | { 163130, 29783 }, | ||
42 | { 171919, 30935 }, | ||
43 | { 180704, 32049 }, | ||
44 | { 189502, 33130 }, | ||
45 | { 198328, 34180 }, | ||
46 | { 207194, 35202 }, | ||
47 | { 216114, 36198 }, | ||
48 | { 225097, 37172 }, | ||
49 | { 234153, 38123 }, | ||
50 | { 243294, 39055 }, | ||
51 | { 252527, 39968 }, | ||
52 | { 261861, 40864 }, | ||
53 | { 271305, 41743 }, | ||
54 | { 280866, 42607 }, | ||
55 | { 290553, 43457 }, | ||
56 | { 300372, 44293 }, | ||
57 | { 310333, 45117 }, | ||
58 | { 320441, 45929 }, | ||
59 | { 330705, 46729 }, | ||
60 | { 341131, 47518 }, | ||
61 | { 351728, 48297 }, | ||
62 | { 362501, 49066 }, | ||
63 | { 373460, 49826 }, | ||
64 | { 384609, 50577 }, | ||
65 | { 395958, 51320 }, | ||
66 | { 407513, 52054 }, | ||
67 | { 419281, 52780 }, | ||
68 | { 431270, 53499 }, | ||
69 | { 443487, 54211 }, | ||
70 | { 455940, 54916 }, | ||
71 | { 468635, 55614 }, | ||
72 | { 481581, 56306 }, | ||
73 | { 494785, 56991 }, | ||
74 | { 508254, 57671 }, | ||
75 | { 521996, 58345 }, | ||
76 | { 536019, 59014 }, | ||
77 | { 550331, 59677 }, | ||
78 | { 564939, 60335 }, | ||
79 | { 579851, 60988 }, | ||
80 | { 595075, 61636 }, | ||
81 | { 610619, 62279 }, | ||
82 | { 626491, 62918 }, | ||
83 | { 642700, 63553 }, | ||
84 | { 659253, 64183 }, | ||
85 | { 676158, 64809 }, | ||
86 | { 693424, 65431 }, | ||
87 | { 711060, 66050 }, | ||
88 | { 729073, 66664 }, | ||
89 | { 747472, 67275 }, | ||
90 | { 766266, 67882 }, | ||
91 | { 785464, 68486 }, | ||
92 | { 805073, 69087 }, | ||
93 | { 825103, 69684 }, | ||
94 | { 845562, 70278 }, | ||
95 | { 866460, 70868 }, | ||
96 | { 887805, 71456 }, | ||
97 | { 909606, 72041 }, | ||
98 | { 931873, 72623 }, | ||
99 | { 954614, 73202 }, | ||
100 | { 977839, 73778 }, | ||
101 | { 1001557, 74352 }, | ||
102 | { 1025777, 74923 }, | ||
103 | { 1050508, 75492 }, | ||
104 | { 1075761, 76058 }, | ||
105 | { 1101544, 76621 }, | ||
106 | { 1127867, 77183 }, | ||
107 | { 1154739, 77741 }, | ||
108 | { 1182172, 78298 }, | ||
109 | { 1210173, 78852 }, | ||
110 | { 1238753, 79405 }, | ||
111 | { 1267922, 79955 }, | ||
112 | { 1297689, 80503 }, | ||
113 | { 1328066, 81049 }, | ||
114 | { 1359060, 81593 }, | ||
115 | { 1390684, 82135 }, | ||
116 | { 1422947, 82675 }, | ||
117 | { 1455859, 83213 }, | ||
118 | { 1489430, 83750 }, | ||
119 | { 1523671, 84284 }, | ||
120 | { 1558593, 84817 }, | ||
121 | { 1594205, 85348 }, | ||
122 | { 1630518, 85878 }, | ||
123 | { 1667543, 86406 }, | ||
124 | { 1705290, 86932 }, | ||
125 | { 1743770, 87457 }, | ||
126 | { 1782994, 87980 }, | ||
127 | { 1822973, 88501 }, | ||
128 | { 1863717, 89021 }, | ||
129 | { 1905237, 89540 }, | ||
130 | { 1947545, 90057 }, | ||
131 | { 1990650, 90573 }, | ||
132 | { 2034566, 91087 }, | ||
133 | { 2079301, 91600 }, | ||
134 | { 2124869, 92111 }, | ||
135 | { 2171279, 92622 }, | ||
136 | { 2218543, 93131 }, | ||
137 | { 2266673, 93639 }, | ||
138 | { 2315680, 94145 }, | ||
139 | { 2365575, 94650 }, | ||
140 | { 2416371, 95154 }, | ||
141 | { 2468077, 95657 }, | ||
142 | { 2520707, 96159 }, | ||
143 | { 2574271, 96660 }, | ||
144 | { 2628782, 97159 }, | ||
145 | { 2684250, 97658 }, | ||
146 | { 2740689, 98155 }, | ||
147 | { 2798110, 98651 }, | ||
148 | { 2856524, 99147 }, | ||
149 | { 2915944, 99641 }, | ||
150 | { 2976382, 100134 }, | ||
151 | { 3037850, 100626 }, | ||
152 | { 3100360, 101117 }, | ||
153 | { 3163924, 101608 }, | ||
154 | { 3228554, 102097 }, | ||
155 | { 3294263, 102586 }, | ||
156 | { 3361063, 103073 }, | ||
157 | { 3428966, 103560 }, | ||
158 | { 3497984, 104045 }, | ||
159 | { 3568131, 104530 }, | ||
160 | { 3639419, 105014 }, | ||
161 | { 3711860, 105498 }, | ||
162 | { 3785467, 105980 }, | ||
163 | { 3860253, 106462 }, | ||
164 | { 3936229, 106942 }, | ||
165 | { 4013410, 107422 }, | ||
166 | { 4091808, 107902 }, | ||
167 | { 4171435, 108380 }, | ||
168 | { 4252306, 108858 }, | ||
169 | { 4334431, 109335 }, | ||
170 | { 4417825, 109811 }, | ||
171 | { 4502501, 110287 }, | ||
172 | { 4588472, 110762 }, | ||
173 | { 4675750, 111236 }, | ||
174 | { 4764349, 111709 }, | ||
175 | { 4854283, 112182 }, | ||
176 | { 4945564, 112654 }, | ||
177 | { 5038206, 113126 }, | ||
178 | { 5132223, 113597 }, | ||
179 | { 5227627, 114067 }, | ||
180 | { 5324432, 114537 }, | ||
181 | { 5422652, 115006 }, | ||
182 | { 5522299, 115474 }, | ||
183 | { 5623389, 115942 }, | ||
184 | { 5725934, 116409 }, | ||
185 | { 5829948, 116876 }, | ||
186 | { 5935446, 117342 }, | ||
187 | { 6042439, 117808 }, | ||
188 | { 6150943, 118273 }, | ||
189 | { 6260972, 118738 }, | ||
190 | { 6372538, 119202 }, | ||
191 | { 6485657, 119665 }, | ||
192 | { 6600342, 120128 }, | ||
193 | { 6716607, 120591 }, | ||
194 | { 6834467, 121053 }, | ||
195 | { 6953935, 121514 }, | ||
196 | { 7075025, 121976 }, | ||
197 | { 7197752, 122436 }, | ||
198 | { 7322131, 122896 }, | ||
199 | { 7448175, 123356 }, | ||
200 | { 7575898, 123815 }, | ||
201 | { 7705316, 124274 }, | ||
202 | { 7836442, 124733 }, | ||
203 | { 7969291, 125191 }, | ||
204 | { 8103877, 125648 }, | ||
205 | { 8240216, 126105 }, | ||
206 | { 8378321, 126562 }, | ||
207 | { 8518208, 127018 }, | ||
208 | { 8659890, 127474 }, | ||
209 | { 8803384, 127930 }, | ||
210 | { 8948702, 128385 }, | ||
211 | { 9095861, 128840 }, | ||
212 | { 9244875, 129294 }, | ||
213 | { 9395760, 129748 }, | ||
214 | { 9548529, 130202 }, | ||
215 | { 9703198, 130655 }, | ||
216 | { 9859782, 131108 }, | ||
217 | { 10018296, 131561 }, | ||
218 | { 10178755, 132014 }, | ||
219 | { 10341174, 132466 }, | ||
220 | { 10505569, 132917 }, | ||
221 | { 10671954, 133369 }, | ||
222 | { 10840345, 133820 }, | ||
223 | { 11010757, 134271 }, | ||
224 | { 11183206, 134721 }, | ||
225 | { 11357706, 135171 }, | ||
226 | { 11534274, 135621 }, | ||
227 | { 11712924, 136071 }, | ||
228 | { 11893673, 136520 }, | ||
229 | { 12076536, 136969 }, | ||
230 | { 12261527, 137418 }, | ||
231 | { 12448664, 137867 }, | ||
232 | { 12637961, 138315 }, | ||
233 | { 12829435, 138763 }, | ||
234 | { 13023101, 139211 }, | ||
235 | { 13218974, 139658 }, | ||
236 | { 13417071, 140106 }, | ||
237 | { 13617407, 140553 }, | ||
238 | { 13819999, 140999 }, | ||
239 | { 14024862, 141446 }, | ||
240 | { 14232012, 141892 }, | ||
241 | { 14441465, 142339 }, | ||
242 | { 14653238, 142785 }, | ||
243 | { 14867346, 143230 }, | ||
244 | { 15083805, 143676 }, | ||
245 | { 15302632, 144121 }, | ||
246 | { 15523842, 144566 }, | ||
247 | { 15747453, 145011 }, | ||
248 | { 15973479, 145456 }, | ||
249 | { 16201939, 145900 }, | ||
250 | { 16432847, 146345 }, | ||
251 | { 16666221, 146789 }, | ||
252 | { 16902076, 147233 }, | ||
253 | { 17140429, 147677 }, | ||
254 | { 17381297, 148121 }, | ||
255 | { 17624696, 148564 }, | ||
256 | { 17870643, 149007 }, | ||
257 | { 18119154, 149451 }, | ||
258 | { 18370247, 149894 }, | ||
259 | { 18623936, 150336 }, | ||
260 | { 18880241, 150779 }, | ||
261 | { 19139176, 151222 }, | ||
262 | { 19400759, 151664 }, | ||
263 | { 19665007, 152107 }, | ||
264 | { 19931936, 152549 }, | ||
265 | { 20201564, 152991 }, | ||
266 | { 20473907, 153433 }, | ||
267 | { 20748982, 153875 }, | ||
268 | { 21026807, 154316 }, | ||
269 | { 21307399, 154758 }, | ||
270 | { 21590773, 155199 }, | ||
271 | { 21876949, 155641 }, | ||
272 | { 22165941, 156082 }, | ||
273 | { 22457769, 156523 }, | ||
274 | { 22752449, 156964 }, | ||
275 | { 23049999, 157405 }, | ||
276 | { 23350435, 157846 }, | ||
277 | { 23653774, 158287 }, | ||
278 | { 23960036, 158727 }, | ||
279 | { 24269236, 159168 }, | ||
280 | { 24581392, 159608 }, | ||
281 | { 24896521, 160049 }, | ||
282 | { 25214642, 160489 }, | ||
283 | { 25535772, 160929 }, | ||
284 | { 25859927, 161370 }, | ||
285 | { 26187127, 161810 }, | ||
286 | { 26517388, 162250 }, | ||
287 | { 26850728, 162690 }, | ||
288 | { 27187165, 163130 }, | ||
289 | { 27526716, 163569 }, | ||
290 | { 27869400, 164009 }, | ||
291 | { 28215234, 164449 }, | ||
292 | { 28564236, 164889 }, | ||
293 | { 28916423, 165328 }, | ||
294 | { 29271815, 165768 }, | ||
295 | { 29630428, 166208 }, | ||
296 | { 29992281, 166647 }, | ||
297 | { 30357392, 167087 }, | ||
298 | { 30725779, 167526 }, | ||
299 | { 31097459, 167965 }, | ||
300 | { 31472452, 168405 }, | ||
301 | { 31850774, 168844 }, | ||
302 | { 32232445, 169283 }, | ||
303 | { 32617482, 169723 }, | ||
304 | { 33005904, 170162 }, | ||
305 | { 33397730, 170601 }, | ||
306 | { 33792976, 171041 }, | ||
307 | { 34191663, 171480 }, | ||
308 | { 34593807, 171919 }, | ||
309 | { 34999428, 172358 }, | ||
310 | { 35408544, 172797 }, | ||
311 | { 35821174, 173237 }, | ||
312 | { 36237335, 173676 }, | ||
313 | { 36657047, 174115 }, | ||
314 | { 37080329, 174554 }, | ||
315 | { 37507197, 174993 }, | ||
316 | { 37937673, 175433 }, | ||
317 | { 38371773, 175872 }, | ||
318 | { 38809517, 176311 }, | ||
319 | { 39250924, 176750 }, | ||
320 | { 39696012, 177190 }, | ||
321 | { 40144800, 177629 }, | ||
322 | { 40597308, 178068 }, | ||
323 | { 41053553, 178507 }, | ||
324 | { 41513554, 178947 }, | ||
325 | { 41977332, 179386 }, | ||
326 | { 42444904, 179825 }, | ||
327 | { 42916290, 180265 }, | ||
328 | { 43391509, 180704 }, | ||
329 | { 43870579, 181144 }, | ||
330 | { 44353520, 181583 }, | ||
331 | { 44840352, 182023 }, | ||
332 | { 45331092, 182462 }, | ||
333 | { 45825761, 182902 }, | ||
334 | { 46324378, 183342 }, | ||
335 | { 46826961, 183781 }, | ||
336 | { 47333531, 184221 }, | ||
337 | { 47844106, 184661 }, | ||
338 | { 48358706, 185101 }, | ||
339 | { 48877350, 185541 }, | ||
340 | { 49400058, 185981 }, | ||
341 | { 49926849, 186421 }, | ||
342 | { 50457743, 186861 }, | ||
343 | { 50992759, 187301 }, | ||
344 | { 51531916, 187741 }, | ||
345 | { 52075235, 188181 }, | ||
346 | { 52622735, 188622 }, | ||
347 | { 53174435, 189062 }, | ||
348 | { 53730355, 189502 }, | ||
349 | { 54290515, 189943 }, | ||
350 | { 54854935, 190383 }, | ||
351 | { 55423634, 190824 }, | ||
352 | { 55996633, 191265 }, | ||
353 | { 56573950, 191706 }, | ||
354 | { 57155606, 192146 }, | ||
355 | { 57741621, 192587 }, | ||
356 | { 58332014, 193028 }, | ||
357 | { 58926806, 193470 }, | ||
358 | { 59526017, 193911 }, | ||
359 | { 60129666, 194352 }, | ||
360 | { 60737774, 194793 }, | ||
361 | { 61350361, 195235 }, | ||
362 | { 61967446, 195677 }, | ||
363 | { 62589050, 196118 }, | ||
364 | { 63215194, 196560 }, | ||
365 | { 63845897, 197002 }, | ||
366 | { 64481179, 197444 }, | ||
367 | { 65121061, 197886 }, | ||
368 | { 65765563, 198328 }, | ||
369 | { 66414705, 198770 }, | ||
370 | { 67068508, 199213 }, | ||
371 | { 67726992, 199655 }, | ||
372 | { 68390177, 200098 }, | ||
373 | { 69058085, 200540 }, | ||
374 | { 69730735, 200983 }, | ||
375 | { 70408147, 201426 }, | ||
376 | { 71090343, 201869 }, | ||
377 | { 71777343, 202312 }, | ||
378 | { 72469168, 202755 }, | ||
379 | { 73165837, 203199 }, | ||
380 | { 73867373, 203642 }, | ||
381 | { 74573795, 204086 }, | ||
382 | { 75285124, 204529 }, | ||
383 | { 76001380, 204973 }, | ||
384 | { 76722586, 205417 }, | ||
385 | { 77448761, 205861 }, | ||
386 | { 78179926, 206306 }, | ||
387 | { 78916102, 206750 }, | ||
388 | { 79657310, 207194 }, | ||
389 | { 80403571, 207639 }, | ||
390 | { 81154906, 208084 }, | ||
391 | { 81911335, 208529 }, | ||
392 | { 82672880, 208974 }, | ||
393 | { 83439562, 209419 }, | ||
394 | { 84211402, 209864 }, | ||
395 | { 84988421, 210309 }, | ||
396 | { 85770640, 210755 }, | ||
397 | { 86558080, 211201 }, | ||
398 | { 87350762, 211647 }, | ||
399 | { 88148708, 212093 }, | ||
400 | { 88951938, 212539 }, | ||
401 | { 89760475, 212985 }, | ||
402 | { 90574339, 213432 }, | ||
403 | { 91393551, 213878 }, | ||
404 | { 92218133, 214325 }, | ||
405 | { 93048107, 214772 }, | ||
406 | { 93883493, 215219 }, | ||
407 | { 94724314, 215666 }, | ||
408 | { 95570590, 216114 }, | ||
409 | { 96422343, 216561 }, | ||
410 | { 97279594, 217009 }, | ||
411 | { 98142366, 217457 }, | ||
412 | { 99010679, 217905 }, | ||
413 | { 99884556, 218353 }, | ||
414 | { 100764018, 218801 }, | ||
415 | { 101649086, 219250 }, | ||
416 | { 102539782, 219698 }, | ||
417 | { 103436128, 220147 }, | ||
418 | { 104338146, 220596 }, | ||
419 | { 105245857, 221046 }, | ||
420 | { 106159284, 221495 }, | ||
421 | { 107078448, 221945 }, | ||
422 | { 108003370, 222394 }, | ||
423 | { 108934074, 222844 }, | ||
424 | { 109870580, 223294 }, | ||
425 | { 110812910, 223745 }, | ||
426 | { 111761087, 224195 }, | ||
427 | { 112715133, 224646 }, | ||
428 | { 113675069, 225097 }, | ||
429 | { 114640918, 225548 }, | ||
430 | { 115612702, 225999 }, | ||
431 | { 116590442, 226450 }, | ||
432 | { 117574162, 226902 }, | ||
433 | { 118563882, 227353 }, | ||
434 | { 119559626, 227805 }, | ||
435 | { 120561415, 228258 }, | ||
436 | { 121569272, 228710 }, | ||
437 | { 122583219, 229162 }, | ||
438 | { 123603278, 229615 }, | ||
439 | { 124629471, 230068 }, | ||
440 | { 125661822, 230521 }, | ||
441 | { 126700352, 230974 }, | ||
442 | { 127745083, 231428 }, | ||
443 | { 128796039, 231882 }, | ||
444 | { 129853241, 232336 }, | ||
445 | { 130916713, 232790 }, | ||
446 | { 131986475, 233244 }, | ||
447 | { 133062553, 233699 }, | ||
448 | { 134144966, 234153 }, | ||
449 | { 135233739, 234608 }, | ||
450 | { 136328894, 235064 }, | ||
451 | { 137430453, 235519 }, | ||
452 | { 138538440, 235975 }, | ||
453 | { 139652876, 236430 }, | ||
454 | { 140773786, 236886 }, | ||
455 | { 141901190, 237343 }, | ||
456 | { 143035113, 237799 }, | ||
457 | { 144175576, 238256 }, | ||
458 | { 145322604, 238713 }, | ||
459 | { 146476218, 239170 }, | ||
460 | { 147636442, 239627 }, | ||
461 | { 148803298, 240085 }, | ||
462 | { 149976809, 240542 }, | ||
463 | { 151156999, 241000 }, | ||
464 | { 152343890, 241459 }, | ||
465 | { 153537506, 241917 }, | ||
466 | { 154737869, 242376 }, | ||
467 | { 155945002, 242835 }, | ||
468 | { 157158929, 243294 }, | ||
469 | { 158379673, 243753 }, | ||
470 | { 159607257, 244213 }, | ||
471 | { 160841704, 244673 }, | ||
472 | { 162083037, 245133 }, | ||
473 | { 163331279, 245593 }, | ||
474 | { 164586455, 246054 }, | ||
475 | { 165848586, 246514 }, | ||
476 | { 167117696, 246975 }, | ||
477 | { 168393810, 247437 }, | ||
478 | { 169676949, 247898 }, | ||
479 | { 170967138, 248360 }, | ||
480 | { 172264399, 248822 }, | ||
481 | { 173568757, 249284 }, | ||
482 | { 174880235, 249747 }, | ||
483 | { 176198856, 250209 }, | ||
484 | { 177524643, 250672 }, | ||
485 | { 178857621, 251136 }, | ||
486 | { 180197813, 251599 }, | ||
487 | { 181545242, 252063 }, | ||
488 | { 182899933, 252527 }, | ||
489 | { 184261908, 252991 }, | ||
490 | { 185631191, 253456 }, | ||
491 | { 187007807, 253920 }, | ||
492 | { 188391778, 254385 }, | ||
493 | { 189783129, 254851 }, | ||
494 | { 191181884, 255316 }, | ||
495 | { 192588065, 255782 }, | ||
496 | { 194001698, 256248 }, | ||
497 | { 195422805, 256714 }, | ||
498 | { 196851411, 257181 }, | ||
499 | { 198287540, 257648 }, | ||
500 | { 199731215, 258115 }, | ||
501 | { 201182461, 258582 }, | ||
502 | { 202641302, 259050 }, | ||
503 | { 204107760, 259518 }, | ||
504 | { 205581862, 259986 }, | ||
505 | { 207063630, 260454 }, | ||
506 | { 208553088, 260923 }, | ||
507 | { 210050262, 261392 }, | ||
508 | { 211555174, 261861 }, | ||
509 | { 213067849, 262331 }, | ||
510 | { 214588312, 262800 }, | ||
511 | { 216116586, 263270 }, | ||
512 | { 217652696, 263741 }, | ||
513 | { 219196666, 264211 }, | ||
514 | { 220748520, 264682 }, | ||
515 | { 222308282, 265153 }, | ||
516 | { 223875978, 265625 }, | ||
517 | { 225451630, 266097 }, | ||
518 | { 227035265, 266569 }, | ||
519 | { 228626905, 267041 }, | ||
520 | { 230226576, 267514 }, | ||
521 | { 231834302, 267986 }, | ||
522 | { 233450107, 268460 }, | ||
523 | { 235074016, 268933 }, | ||
524 | { 236706054, 269407 }, | ||
525 | { 238346244, 269881 }, | ||
526 | { 239994613, 270355 }, | ||
527 | { 241651183, 270830 }, | ||
528 | { 243315981, 271305 } | ||
529 | }; | ||
530 | |||
531 | /* Calculate the send rate as per section 3.1 of RFC3448 | ||
532 | |||
533 | Returns send rate in bytes per second | ||
534 | |||
535 | Integer maths and lookups are used as not allowed floating point in kernel | ||
536 | |||
537 | The function for Xcalc as per section 3.1 of RFC3448 is: | ||
538 | |||
539 | X = s | ||
540 | ------------------------------------------------------------- | ||
541 | R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) | ||
542 | |||
543 | where | ||
544 | X is the trasmit rate in bytes/second | ||
545 | s is the packet size in bytes | ||
546 | R is the round trip time in seconds | ||
547 | p is the loss event rate, between 0 and 1.0, of the number of loss events | ||
548 | as a fraction of the number of packets transmitted | ||
549 | t_RTO is the TCP retransmission timeout value in seconds | ||
550 | b is the number of packets acknowledged by a single TCP acknowledgement | ||
551 | |||
552 | we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: | ||
553 | |||
554 | X = s | ||
555 | ----------------------------------------------------------------------- | ||
556 | R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) | ||
557 | |||
558 | |||
559 | which we can break down into: | ||
560 | |||
561 | X = s | ||
562 | -------- | ||
563 | R * f(p) | ||
564 | |||
565 | where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) | ||
566 | |||
567 | Function parameters: | ||
568 | s - bytes | ||
569 | R - RTT in usecs | ||
570 | p - loss rate (decimal fraction multiplied by 1,000,000) | ||
571 | |||
572 | Returns Xcalc in bytes per second | ||
573 | |||
574 | DON'T alter this code unless you run test cases against it as the code | ||
575 | has been manipulated to stop underflow/overlow. | ||
576 | |||
577 | */ | ||
578 | u32 tfrc_calc_x(u16 s, u32 R, u32 p) | ||
579 | { | ||
580 | int index; | ||
581 | u32 f; | ||
582 | u64 tmp1, tmp2; | ||
583 | |||
584 | if (p < TFRC_CALC_X_SPLIT) | ||
585 | index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1; | ||
586 | else | ||
587 | index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1; | ||
588 | |||
589 | if (index < 0) | ||
590 | /* p should be 0 unless there is a bug in my code */ | ||
591 | index = 0; | ||
592 | |||
593 | if (R == 0) | ||
594 | R = 1; /* RTT can't be zero or else divide by zero */ | ||
595 | |||
596 | BUG_ON(index >= TFRC_CALC_X_ARRSIZE); | ||
597 | |||
598 | if (p >= TFRC_CALC_X_SPLIT) | ||
599 | f = tfrc_calc_x_lookup[index][0]; | ||
600 | else | ||
601 | f = tfrc_calc_x_lookup[index][1]; | ||
602 | |||
603 | tmp1 = ((u64)s * 100000000); | ||
604 | tmp2 = ((u64)R * (u64)f); | ||
605 | do_div(tmp2, 10000); | ||
606 | do_div(tmp1, tmp2); | ||
607 | /* Don't alter above math unless you test due to overflow on 32 bit */ | ||
608 | |||
609 | return (u32)tmp1; | ||
610 | } | ||
611 | |||
612 | EXPORT_SYMBOL_GPL(tfrc_calc_x); | ||
613 | |||
614 | /* | ||
615 | * args: fvalue - function value to match | ||
616 | * returns: p closest to that value | ||
617 | * | ||
618 | * both fvalue and p are multiplied by 1,000,000 to use ints | ||
619 | */ | ||
620 | u32 tfrc_calc_x_reverse_lookup(u32 fvalue) | ||
621 | { | ||
622 | int ctr = 0; | ||
623 | int small; | ||
624 | |||
625 | if (fvalue < tfrc_calc_x_lookup[0][1]) | ||
626 | return 0; | ||
627 | |||
628 | if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) | ||
629 | small = 1; | ||
630 | else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) | ||
631 | return 1000000; | ||
632 | else | ||
633 | small = 0; | ||
634 | |||
635 | while (fvalue > tfrc_calc_x_lookup[ctr][small]) | ||
636 | ctr++; | ||
637 | |||
638 | if (small) | ||
639 | return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE; | ||
640 | else | ||
641 | return 1000000 * ctr / TFRC_CALC_X_ARRSIZE; | ||
642 | } | ||
643 | |||
644 | EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..33456c0d5937 --- /dev/null +++ b/net/dccp/dccp.h | |||
@@ -0,0 +1,493 @@ | |||
1 | #ifndef _DCCP_H | ||
2 | #define _DCCP_H | ||
3 | /* | ||
4 | * net/dccp/dccp.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/dccp.h> | ||
17 | #include <net/snmp.h> | ||
18 | #include <net/sock.h> | ||
19 | #include <net/tcp.h> | ||
20 | |||
21 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
22 | extern int dccp_debug; | ||
23 | |||
24 | #define dccp_pr_debug(format, a...) \ | ||
25 | do { if (dccp_debug) \ | ||
26 | printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ | ||
27 | } while (0) | ||
28 | #define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ | ||
29 | printk(format, ##a); } while (0) | ||
30 | #else | ||
31 | #define dccp_pr_debug(format, a...) | ||
32 | #define dccp_pr_debug_cat(format, a...) | ||
33 | #endif | ||
34 | |||
35 | extern struct inet_hashinfo dccp_hashinfo; | ||
36 | |||
37 | extern atomic_t dccp_orphan_count; | ||
38 | extern int dccp_tw_count; | ||
39 | extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); | ||
40 | |||
41 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); | ||
42 | |||
43 | /* FIXME: Right size this */ | ||
44 | #define DCCP_MAX_OPT_LEN 128 | ||
45 | |||
46 | #define DCCP_MAX_PACKET_HDR 32 | ||
47 | |||
48 | #define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) | ||
49 | |||
50 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT | ||
51 | * state, about 60 seconds */ | ||
52 | |||
53 | /* draft-ietf-dccp-spec-11.txt initial RTO value */ | ||
54 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) | ||
55 | |||
56 | /* Maximal interval between probes for local resources. */ | ||
57 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
58 | |||
59 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | ||
60 | |||
61 | extern struct proto dccp_v4_prot; | ||
62 | |||
63 | /* is seq1 < seq2 ? */ | ||
64 | static inline int before48(const u64 seq1, const u64 seq2) | ||
65 | { | ||
66 | return (s64)((seq1 << 16) - (seq2 << 16)) < 0; | ||
67 | } | ||
68 | |||
69 | /* is seq1 > seq2 ? */ | ||
70 | static inline int after48(const u64 seq1, const u64 seq2) | ||
71 | { | ||
72 | return (s64)((seq2 << 16) - (seq1 << 16)) < 0; | ||
73 | } | ||
74 | |||
75 | /* is seq2 <= seq1 <= seq3 ? */ | ||
76 | static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) | ||
77 | { | ||
78 | return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); | ||
79 | } | ||
80 | |||
81 | static inline u64 max48(const u64 seq1, const u64 seq2) | ||
82 | { | ||
83 | return after48(seq1, seq2) ? seq1 : seq2; | ||
84 | } | ||
85 | |||
86 | enum { | ||
87 | DCCP_MIB_NUM = 0, | ||
88 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ | ||
89 | DCCP_MIB_ESTABRESETS, /* EstabResets */ | ||
90 | DCCP_MIB_CURRESTAB, /* CurrEstab */ | ||
91 | DCCP_MIB_OUTSEGS, /* OutSegs */ | ||
92 | DCCP_MIB_OUTRSTS, | ||
93 | DCCP_MIB_ABORTONTIMEOUT, | ||
94 | DCCP_MIB_TIMEOUTS, | ||
95 | DCCP_MIB_ABORTFAILED, | ||
96 | DCCP_MIB_PASSIVEOPENS, | ||
97 | DCCP_MIB_ATTEMPTFAILS, | ||
98 | DCCP_MIB_OUTDATAGRAMS, | ||
99 | DCCP_MIB_INERRS, | ||
100 | DCCP_MIB_OPTMANDATORYERROR, | ||
101 | DCCP_MIB_INVALIDOPT, | ||
102 | __DCCP_MIB_MAX | ||
103 | }; | ||
104 | |||
105 | #define DCCP_MIB_MAX __DCCP_MIB_MAX | ||
106 | struct dccp_mib { | ||
107 | unsigned long mibs[DCCP_MIB_MAX]; | ||
108 | } __SNMP_MIB_ALIGN__; | ||
109 | |||
110 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
111 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | ||
112 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | ||
113 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
114 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | ||
115 | #define DCCP_ADD_STATS_BH(field, val) \ | ||
116 | SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
117 | #define DCCP_ADD_STATS_USER(field, val) \ | ||
118 | SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
119 | |||
120 | extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); | ||
121 | extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); | ||
122 | |||
123 | extern int dccp_send_response(struct sock *sk); | ||
124 | extern void dccp_send_ack(struct sock *sk); | ||
125 | extern void dccp_send_delayed_ack(struct sock *sk); | ||
126 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | ||
127 | const enum dccp_pkt_type pkt_type); | ||
128 | |||
129 | extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); | ||
130 | extern void dccp_write_space(struct sock *sk); | ||
131 | |||
132 | extern void dccp_init_xmit_timers(struct sock *sk); | ||
133 | static inline void dccp_clear_xmit_timers(struct sock *sk) | ||
134 | { | ||
135 | inet_csk_clear_xmit_timers(sk); | ||
136 | } | ||
137 | |||
138 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | ||
139 | |||
140 | extern const char *dccp_packet_name(const int type); | ||
141 | extern const char *dccp_state_name(const int state); | ||
142 | |||
143 | static inline void dccp_set_state(struct sock *sk, const int state) | ||
144 | { | ||
145 | const int oldstate = sk->sk_state; | ||
146 | |||
147 | dccp_pr_debug("%s(%p) %-10.10s -> %s\n", | ||
148 | dccp_role(sk), sk, | ||
149 | dccp_state_name(oldstate), dccp_state_name(state)); | ||
150 | WARN_ON(state == oldstate); | ||
151 | |||
152 | switch (state) { | ||
153 | case DCCP_OPEN: | ||
154 | if (oldstate != DCCP_OPEN) | ||
155 | DCCP_INC_STATS(DCCP_MIB_CURRESTAB); | ||
156 | break; | ||
157 | |||
158 | case DCCP_CLOSED: | ||
159 | if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) | ||
160 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); | ||
161 | |||
162 | sk->sk_prot->unhash(sk); | ||
163 | if (inet_csk(sk)->icsk_bind_hash != NULL && | ||
164 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | ||
165 | inet_put_port(&dccp_hashinfo, sk); | ||
166 | /* fall through */ | ||
167 | default: | ||
168 | if (oldstate == DCCP_OPEN) | ||
169 | DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); | ||
170 | } | ||
171 | |||
172 | /* Change state AFTER socket is unhashed to avoid closed | ||
173 | * socket sitting in hash tables. | ||
174 | */ | ||
175 | sk->sk_state = state; | ||
176 | } | ||
177 | |||
178 | static inline void dccp_done(struct sock *sk) | ||
179 | { | ||
180 | dccp_set_state(sk, DCCP_CLOSED); | ||
181 | dccp_clear_xmit_timers(sk); | ||
182 | |||
183 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
184 | |||
185 | if (!sock_flag(sk, SOCK_DEAD)) | ||
186 | sk->sk_state_change(sk); | ||
187 | else | ||
188 | inet_csk_destroy_sock(sk); | ||
189 | } | ||
190 | |||
191 | static inline void dccp_openreq_init(struct request_sock *req, | ||
192 | struct dccp_sock *dp, | ||
193 | struct sk_buff *skb) | ||
194 | { | ||
195 | /* | ||
196 | * FIXME: fill in the other req fields from the DCCP options | ||
197 | * received | ||
198 | */ | ||
199 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; | ||
200 | inet_rsk(req)->acked = 0; | ||
201 | req->rcv_wnd = 0; | ||
202 | } | ||
203 | |||
204 | extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); | ||
205 | |||
206 | extern struct sock *dccp_create_openreq_child(struct sock *sk, | ||
207 | const struct request_sock *req, | ||
208 | const struct sk_buff *skb); | ||
209 | |||
210 | extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); | ||
211 | |||
212 | extern void dccp_v4_err(struct sk_buff *skb, u32); | ||
213 | |||
214 | extern int dccp_v4_rcv(struct sk_buff *skb); | ||
215 | |||
216 | extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, | ||
217 | struct sk_buff *skb, | ||
218 | struct request_sock *req, | ||
219 | struct dst_entry *dst); | ||
220 | extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
221 | struct request_sock *req, | ||
222 | struct request_sock **prev); | ||
223 | |||
224 | extern int dccp_child_process(struct sock *parent, struct sock *child, | ||
225 | struct sk_buff *skb); | ||
226 | extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
227 | struct dccp_hdr *dh, unsigned len); | ||
228 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
229 | const struct dccp_hdr *dh, const unsigned len); | ||
230 | |||
231 | extern void dccp_close(struct sock *sk, long timeout); | ||
232 | extern struct sk_buff *dccp_make_response(struct sock *sk, | ||
233 | struct dst_entry *dst, | ||
234 | struct request_sock *req); | ||
235 | extern struct sk_buff *dccp_make_reset(struct sock *sk, | ||
236 | struct dst_entry *dst, | ||
237 | enum dccp_reset_codes code); | ||
238 | |||
239 | extern int dccp_connect(struct sock *sk); | ||
240 | extern int dccp_disconnect(struct sock *sk, int flags); | ||
241 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
242 | char __user *optval, int __user *optlen); | ||
243 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
244 | char __user *optval, int optlen); | ||
245 | extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); | ||
246 | extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, | ||
247 | struct msghdr *msg, size_t size); | ||
248 | extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, | ||
249 | struct msghdr *msg, size_t len, int nonblock, | ||
250 | int flags, int *addr_len); | ||
251 | extern void dccp_shutdown(struct sock *sk, int how); | ||
252 | |||
253 | extern int dccp_v4_checksum(const struct sk_buff *skb, | ||
254 | const u32 saddr, const u32 daddr); | ||
255 | |||
256 | extern int dccp_v4_send_reset(struct sock *sk, | ||
257 | enum dccp_reset_codes code); | ||
258 | extern void dccp_send_close(struct sock *sk, const int active); | ||
259 | |||
260 | struct dccp_skb_cb { | ||
261 | __u8 dccpd_type; | ||
262 | __u8 dccpd_reset_code; | ||
263 | __u8 dccpd_service; | ||
264 | __u8 dccpd_ccval; | ||
265 | __u64 dccpd_seq; | ||
266 | __u64 dccpd_ack_seq; | ||
267 | int dccpd_opt_len; | ||
268 | }; | ||
269 | |||
270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | ||
271 | |||
272 | static inline int dccp_non_data_packet(const struct sk_buff *skb) | ||
273 | { | ||
274 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
275 | |||
276 | return type == DCCP_PKT_ACK || | ||
277 | type == DCCP_PKT_CLOSE || | ||
278 | type == DCCP_PKT_CLOSEREQ || | ||
279 | type == DCCP_PKT_RESET || | ||
280 | type == DCCP_PKT_SYNC || | ||
281 | type == DCCP_PKT_SYNCACK; | ||
282 | } | ||
283 | |||
284 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) | ||
285 | { | ||
286 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
287 | |||
288 | return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; | ||
289 | } | ||
290 | |||
291 | #define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) | ||
292 | #define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) | ||
293 | |||
294 | static inline void dccp_set_seqno(u64 *seqno, u64 value) | ||
295 | { | ||
296 | if (value > DCCP_MAX_SEQNO) | ||
297 | value -= DCCP_MAX_SEQNO + 1; | ||
298 | *seqno = value; | ||
299 | } | ||
300 | |||
301 | static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) | ||
302 | { | ||
303 | return ((seqno2 << 16) - (seqno1 << 16)) >> 16; | ||
304 | } | ||
305 | |||
306 | static inline void dccp_inc_seqno(u64 *seqno) | ||
307 | { | ||
308 | if (++*seqno > DCCP_MAX_SEQNO) | ||
309 | *seqno = 0; | ||
310 | } | ||
311 | |||
312 | static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) | ||
313 | { | ||
314 | struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + | ||
315 | sizeof(*dh)); | ||
316 | |||
317 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
318 | dh->dccph_seq = htonl((gss >> 32)) >> 8; | ||
319 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
320 | dh->dccph_seq = htonl((gss >> 32)); | ||
321 | #else | ||
322 | #error "Adjust your <asm/byteorder.h> defines" | ||
323 | #endif | ||
324 | dhx->dccph_seq_low = htonl(gss & 0xffffffff); | ||
325 | } | ||
326 | |||
327 | static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, | ||
328 | const u64 gsr) | ||
329 | { | ||
330 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
331 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; | ||
332 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
333 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)); | ||
334 | #else | ||
335 | #error "Adjust your <asm/byteorder.h> defines" | ||
336 | #endif | ||
337 | dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); | ||
338 | } | ||
339 | |||
340 | static inline void dccp_update_gsr(struct sock *sk, u64 seq) | ||
341 | { | ||
342 | struct dccp_sock *dp = dccp_sk(sk); | ||
343 | |||
344 | dp->dccps_gsr = seq; | ||
345 | dccp_set_seqno(&dp->dccps_swl, | ||
346 | (dp->dccps_gsr + 1 - | ||
347 | (dp->dccps_options.dccpo_sequence_window / 4))); | ||
348 | dccp_set_seqno(&dp->dccps_swh, | ||
349 | (dp->dccps_gsr + | ||
350 | (3 * dp->dccps_options.dccpo_sequence_window) / 4)); | ||
351 | } | ||
352 | |||
353 | static inline void dccp_update_gss(struct sock *sk, u64 seq) | ||
354 | { | ||
355 | struct dccp_sock *dp = dccp_sk(sk); | ||
356 | |||
357 | dp->dccps_awh = dp->dccps_gss = seq; | ||
358 | dccp_set_seqno(&dp->dccps_awl, | ||
359 | (dp->dccps_gss - | ||
360 | dp->dccps_options.dccpo_sequence_window + 1)); | ||
361 | } | ||
362 | |||
363 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | ||
364 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | ||
365 | struct sk_buff *skb, | ||
366 | u32 elapsed_time); | ||
367 | extern void dccp_insert_option_timestamp(struct sock *sk, | ||
368 | struct sk_buff *skb); | ||
369 | extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
370 | unsigned char option, | ||
371 | const void *value, unsigned char len); | ||
372 | |||
373 | extern struct socket *dccp_ctl_socket; | ||
374 | |||
375 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
376 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
377 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
378 | |||
379 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
380 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
381 | |||
382 | /** struct dccp_ackpkts - acknowledgeable packets | ||
383 | * | ||
384 | * This data structure is the one defined in the DCCP draft | ||
385 | * Appendix A. | ||
386 | * | ||
387 | * @dccpap_buf_head - circular buffer head | ||
388 | * @dccpap_buf_tail - circular buffer tail | ||
389 | * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
390 | * buffer (i.e. %dccpap_buf_head) | ||
391 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
392 | * by the buffer with State 0 | ||
393 | * | ||
394 | * Additionally, the HC-Receiver must keep some information about the | ||
395 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
396 | * Ack Vector, it remembers four variables: | ||
397 | * | ||
398 | * @dccpap_ack_seqno - the Sequence Number used for the packet | ||
399 | * (HC-Receiver seqno) | ||
400 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
401 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet | ||
402 | * (HC-Sender seqno) | ||
403 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
404 | * | ||
405 | * @dccpap_buf_len - circular buffer length | ||
406 | * @dccpap_time - the time in usecs | ||
407 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
408 | */ | ||
409 | struct dccp_ackpkts { | ||
410 | unsigned int dccpap_buf_head; | ||
411 | unsigned int dccpap_buf_tail; | ||
412 | u64 dccpap_buf_ackno; | ||
413 | u64 dccpap_ack_seqno; | ||
414 | u64 dccpap_ack_ackno; | ||
415 | unsigned int dccpap_ack_ptr; | ||
416 | unsigned int dccpap_buf_vector_len; | ||
417 | unsigned int dccpap_ack_vector_len; | ||
418 | unsigned int dccpap_buf_len; | ||
419 | struct timeval dccpap_time; | ||
420 | u8 dccpap_buf_nonce; | ||
421 | u8 dccpap_ack_nonce; | ||
422 | u8 dccpap_buf[0]; | ||
423 | }; | ||
424 | |||
425 | extern struct dccp_ackpkts * | ||
426 | dccp_ackpkts_alloc(unsigned int len, | ||
427 | const unsigned int __nocast priority); | ||
428 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
429 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); | ||
430 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
431 | struct sock *sk, u64 ackno); | ||
432 | |||
433 | static inline suseconds_t timeval_usecs(const struct timeval *tv) | ||
434 | { | ||
435 | return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; | ||
436 | } | ||
437 | |||
438 | static inline suseconds_t timeval_delta(const struct timeval *large, | ||
439 | const struct timeval *small) | ||
440 | { | ||
441 | time_t secs = large->tv_sec - small->tv_sec; | ||
442 | suseconds_t usecs = large->tv_usec - small->tv_usec; | ||
443 | |||
444 | if (usecs < 0) { | ||
445 | secs--; | ||
446 | usecs += USEC_PER_SEC; | ||
447 | } | ||
448 | return secs * USEC_PER_SEC + usecs; | ||
449 | } | ||
450 | |||
451 | static inline void timeval_add_usecs(struct timeval *tv, | ||
452 | const suseconds_t usecs) | ||
453 | { | ||
454 | tv->tv_usec += usecs; | ||
455 | while (tv->tv_usec >= USEC_PER_SEC) { | ||
456 | tv->tv_sec++; | ||
457 | tv->tv_usec -= USEC_PER_SEC; | ||
458 | } | ||
459 | } | ||
460 | |||
461 | static inline void timeval_sub_usecs(struct timeval *tv, | ||
462 | const suseconds_t usecs) | ||
463 | { | ||
464 | tv->tv_usec -= usecs; | ||
465 | while (tv->tv_usec < 0) { | ||
466 | tv->tv_sec--; | ||
467 | tv->tv_usec += USEC_PER_SEC; | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Returns the difference in usecs between timeval | ||
473 | * passed in and current time | ||
474 | */ | ||
475 | static inline suseconds_t timeval_now_delta(const struct timeval *tv) | ||
476 | { | ||
477 | struct timeval now; | ||
478 | do_gettimeofday(&now); | ||
479 | return timeval_delta(&now, tv); | ||
480 | } | ||
481 | |||
482 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
483 | extern void dccp_ackvector_print(const u64 ackno, | ||
484 | const unsigned char *vector, int len); | ||
485 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
486 | #else | ||
487 | static inline void dccp_ackvector_print(const u64 ackno, | ||
488 | const unsigned char *vector, | ||
489 | int len) { } | ||
490 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
491 | #endif | ||
492 | |||
493 | #endif /* _DCCP_H */ | ||
diff --git a/net/dccp/diag.c b/net/dccp/diag.c new file mode 100644 index 000000000000..f675d8e642d3 --- /dev/null +++ b/net/dccp/diag.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * net/dccp/diag.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@mandriva.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/inet_diag.h> | ||
16 | |||
17 | #include "ccid.h" | ||
18 | #include "dccp.h" | ||
19 | |||
20 | static void dccp_get_info(struct sock *sk, struct tcp_info *info) | ||
21 | { | ||
22 | struct dccp_sock *dp = dccp_sk(sk); | ||
23 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
24 | |||
25 | memset(info, 0, sizeof(*info)); | ||
26 | |||
27 | info->tcpi_state = sk->sk_state; | ||
28 | info->tcpi_retransmits = icsk->icsk_retransmits; | ||
29 | info->tcpi_probes = icsk->icsk_probes_out; | ||
30 | info->tcpi_backoff = icsk->icsk_backoff; | ||
31 | info->tcpi_pmtu = dp->dccps_pmtu_cookie; | ||
32 | |||
33 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
34 | info->tcpi_options |= TCPI_OPT_SACK; | ||
35 | |||
36 | ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); | ||
37 | ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); | ||
38 | } | ||
39 | |||
40 | static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | ||
41 | void *_info) | ||
42 | { | ||
43 | r->idiag_rqueue = r->idiag_wqueue = 0; | ||
44 | |||
45 | if (_info != NULL) | ||
46 | dccp_get_info(sk, _info); | ||
47 | } | ||
48 | |||
49 | static struct inet_diag_handler dccp_diag_handler = { | ||
50 | .idiag_hashinfo = &dccp_hashinfo, | ||
51 | .idiag_get_info = dccp_diag_get_info, | ||
52 | .idiag_type = DCCPDIAG_GETSOCK, | ||
53 | .idiag_info_size = sizeof(struct tcp_info), | ||
54 | }; | ||
55 | |||
56 | static int __init dccp_diag_init(void) | ||
57 | { | ||
58 | return inet_diag_register(&dccp_diag_handler); | ||
59 | } | ||
60 | |||
61 | static void __exit dccp_diag_fini(void) | ||
62 | { | ||
63 | inet_diag_unregister(&dccp_diag_handler); | ||
64 | } | ||
65 | |||
66 | module_init(dccp_diag_init); | ||
67 | module_exit(dccp_diag_fini); | ||
68 | |||
69 | MODULE_LICENSE("GPL"); | ||
70 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); | ||
71 | MODULE_DESCRIPTION("DCCP inet_diag handler"); | ||
diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..ef29cef1dafe --- /dev/null +++ b/net/dccp/input.c | |||
@@ -0,0 +1,600 @@ | |||
1 | /* | ||
2 | * net/dccp/input.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) | ||
23 | { | ||
24 | sk->sk_shutdown |= RCV_SHUTDOWN; | ||
25 | sock_set_flag(sk, SOCK_DONE); | ||
26 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); | ||
27 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
28 | skb_set_owner_r(skb, sk); | ||
29 | sk->sk_data_ready(sk, 0); | ||
30 | } | ||
31 | |||
32 | static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | ||
33 | { | ||
34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
35 | dccp_fin(sk, skb); | ||
36 | dccp_set_state(sk, DCCP_CLOSED); | ||
37 | sk_wake_async(sk, 1, POLL_HUP); | ||
38 | } | ||
39 | |||
40 | static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | ||
41 | { | ||
42 | /* | ||
43 | * Step 7: Check for unexpected packet types | ||
44 | * If (S.is_server and P.type == CloseReq) | ||
45 | * Send Sync packet acknowledging P.seqno | ||
46 | * Drop packet and return | ||
47 | */ | ||
48 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { | ||
49 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); | ||
50 | return; | ||
51 | } | ||
52 | |||
53 | dccp_set_state(sk, DCCP_CLOSING); | ||
54 | dccp_send_close(sk, 0); | ||
55 | } | ||
56 | |||
57 | static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | ||
58 | { | ||
59 | struct dccp_sock *dp = dccp_sk(sk); | ||
60 | |||
61 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
62 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | ||
63 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
64 | } | ||
65 | |||
66 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | ||
67 | { | ||
68 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
69 | struct dccp_sock *dp = dccp_sk(sk); | ||
70 | u64 lswl, lawl; | ||
71 | |||
72 | /* | ||
73 | * Step 5: Prepare sequence numbers for Sync | ||
74 | * If P.type == Sync or P.type == SyncAck, | ||
75 | * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, | ||
76 | * / * P is valid, so update sequence number variables | ||
77 | * accordingly. After this update, P will pass the tests | ||
78 | * in Step 6. A SyncAck is generated if necessary in | ||
79 | * Step 15 * / | ||
80 | * Update S.GSR, S.SWL, S.SWH | ||
81 | * Otherwise, | ||
82 | * Drop packet and return | ||
83 | */ | ||
84 | if (dh->dccph_type == DCCP_PKT_SYNC || | ||
85 | dh->dccph_type == DCCP_PKT_SYNCACK) { | ||
86 | if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
87 | dp->dccps_awl, dp->dccps_awh) && | ||
88 | !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) | ||
89 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
90 | else | ||
91 | return -1; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Step 6: Check sequence numbers | ||
96 | * Let LSWL = S.SWL and LAWL = S.AWL | ||
97 | * If P.type == CloseReq or P.type == Close or P.type == Reset, | ||
98 | * LSWL := S.GSR + 1, LAWL := S.GAR | ||
99 | * If LSWL <= P.seqno <= S.SWH | ||
100 | * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), | ||
101 | * Update S.GSR, S.SWL, S.SWH | ||
102 | * If P.type != Sync, | ||
103 | * Update S.GAR | ||
104 | * Otherwise, | ||
105 | * Send Sync packet acknowledging P.seqno | ||
106 | * Drop packet and return | ||
107 | */ | ||
108 | lswl = dp->dccps_swl; | ||
109 | lawl = dp->dccps_awl; | ||
110 | |||
111 | if (dh->dccph_type == DCCP_PKT_CLOSEREQ || | ||
112 | dh->dccph_type == DCCP_PKT_CLOSE || | ||
113 | dh->dccph_type == DCCP_PKT_RESET) { | ||
114 | lswl = dp->dccps_gsr; | ||
115 | dccp_inc_seqno(&lswl); | ||
116 | lawl = dp->dccps_gar; | ||
117 | } | ||
118 | |||
119 | if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && | ||
120 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || | ||
121 | between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
122 | lawl, dp->dccps_awh))) { | ||
123 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
124 | |||
125 | if (dh->dccph_type != DCCP_PKT_SYNC && | ||
126 | (DCCP_SKB_CB(skb)->dccpd_ack_seq != | ||
127 | DCCP_PKT_WITHOUT_ACK_SEQ)) | ||
128 | dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
129 | } else { | ||
130 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " | ||
131 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " | ||
132 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " | ||
133 | "sending SYNC...\n", | ||
134 | dccp_packet_name(dh->dccph_type), | ||
135 | (unsigned long long) lswl, | ||
136 | (unsigned long long) | ||
137 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
138 | (unsigned long long) dp->dccps_swh, | ||
139 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == | ||
140 | DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", | ||
141 | (unsigned long long) lawl, | ||
142 | (unsigned long long) | ||
143 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
144 | (unsigned long long) dp->dccps_awh); | ||
145 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); | ||
146 | return -1; | ||
147 | } | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
153 | const struct dccp_hdr *dh, const unsigned len) | ||
154 | { | ||
155 | struct dccp_sock *dp = dccp_sk(sk); | ||
156 | |||
157 | if (dccp_check_seqno(sk, skb)) | ||
158 | goto discard; | ||
159 | |||
160 | if (dccp_parse_options(sk, skb)) | ||
161 | goto discard; | ||
162 | |||
163 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
164 | dccp_event_ack_recv(sk, skb); | ||
165 | |||
166 | /* | ||
167 | * FIXME: check ECN to see if we should use | ||
168 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
169 | */ | ||
170 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
171 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
172 | |||
173 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
174 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
175 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
176 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " | ||
177 | "packets buffer full!\n"); | ||
178 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
179 | inet_csk_schedule_ack(sk); | ||
180 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
181 | TCP_DELACK_MIN, | ||
182 | DCCP_RTO_MAX); | ||
183 | goto discard; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * FIXME: this activation is probably wrong, have to study more | ||
188 | * TCP delack machinery and how it fits into DCCP draft, but | ||
189 | * for now it kinda "works" 8) | ||
190 | */ | ||
191 | if (!inet_csk_ack_scheduled(sk)) { | ||
192 | inet_csk_schedule_ack(sk); | ||
193 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, | ||
194 | DCCP_RTO_MAX); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
199 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
200 | |||
201 | switch (dccp_hdr(skb)->dccph_type) { | ||
202 | case DCCP_PKT_DATAACK: | ||
203 | case DCCP_PKT_DATA: | ||
204 | /* | ||
205 | * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED | ||
206 | * option if it is. | ||
207 | */ | ||
208 | __skb_pull(skb, dh->dccph_doff * 4); | ||
209 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
210 | skb_set_owner_r(skb, sk); | ||
211 | sk->sk_data_ready(sk, 0); | ||
212 | return 0; | ||
213 | case DCCP_PKT_ACK: | ||
214 | goto discard; | ||
215 | case DCCP_PKT_RESET: | ||
216 | /* | ||
217 | * Step 9: Process Reset | ||
218 | * If P.type == Reset, | ||
219 | * Tear down connection | ||
220 | * S.state := TIMEWAIT | ||
221 | * Set TIMEWAIT timer | ||
222 | * Drop packet and return | ||
223 | */ | ||
224 | dccp_fin(sk, skb); | ||
225 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
226 | return 0; | ||
227 | case DCCP_PKT_CLOSEREQ: | ||
228 | dccp_rcv_closereq(sk, skb); | ||
229 | goto discard; | ||
230 | case DCCP_PKT_CLOSE: | ||
231 | dccp_rcv_close(sk, skb); | ||
232 | return 0; | ||
233 | case DCCP_PKT_REQUEST: | ||
234 | /* Step 7 | ||
235 | * or (S.is_server and P.type == Response) | ||
236 | * or (S.is_client and P.type == Request) | ||
237 | * or (S.state >= OPEN and P.type == Request | ||
238 | * and P.seqno >= S.OSR) | ||
239 | * or (S.state >= OPEN and P.type == Response | ||
240 | * and P.seqno >= S.OSR) | ||
241 | * or (S.state == RESPOND and P.type == Data), | ||
242 | * Send Sync packet acknowledging P.seqno | ||
243 | * Drop packet and return | ||
244 | */ | ||
245 | if (dp->dccps_role != DCCP_ROLE_LISTEN) | ||
246 | goto send_sync; | ||
247 | goto check_seq; | ||
248 | case DCCP_PKT_RESPONSE: | ||
249 | if (dp->dccps_role != DCCP_ROLE_CLIENT) | ||
250 | goto send_sync; | ||
251 | check_seq: | ||
252 | if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { | ||
253 | send_sync: | ||
254 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
255 | DCCP_PKT_SYNC); | ||
256 | } | ||
257 | break; | ||
258 | case DCCP_PKT_SYNC: | ||
259 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
260 | DCCP_PKT_SYNCACK); | ||
261 | /* | ||
262 | * From the draft: | ||
263 | * | ||
264 | * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets | ||
265 | * MAY have non-zero-length application data areas, whose | ||
266 | * contents * receivers MUST ignore. | ||
267 | */ | ||
268 | goto discard; | ||
269 | } | ||
270 | |||
271 | DCCP_INC_STATS_BH(DCCP_MIB_INERRS); | ||
272 | discard: | ||
273 | __kfree_skb(skb); | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int dccp_rcv_request_sent_state_process(struct sock *sk, | ||
278 | struct sk_buff *skb, | ||
279 | const struct dccp_hdr *dh, | ||
280 | const unsigned len) | ||
281 | { | ||
282 | /* | ||
283 | * Step 4: Prepare sequence numbers in REQUEST | ||
284 | * If S.state == REQUEST, | ||
285 | * If (P.type == Response or P.type == Reset) | ||
286 | * and S.AWL <= P.ackno <= S.AWH, | ||
287 | * / * Set sequence number variables corresponding to the | ||
288 | * other endpoint, so P will pass the tests in Step 6 * / | ||
289 | * Set S.GSR, S.ISR, S.SWL, S.SWH | ||
290 | * / * Response processing continues in Step 10; Reset | ||
291 | * processing continues in Step 9 * / | ||
292 | */ | ||
293 | if (dh->dccph_type == DCCP_PKT_RESPONSE) { | ||
294 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
295 | struct dccp_sock *dp = dccp_sk(sk); | ||
296 | |||
297 | /* Stop the REQUEST timer */ | ||
298 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | ||
299 | BUG_TRAP(sk->sk_send_head != NULL); | ||
300 | __kfree_skb(sk->sk_send_head); | ||
301 | sk->sk_send_head = NULL; | ||
302 | |||
303 | if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
304 | dp->dccps_awl, dp->dccps_awh)) { | ||
305 | dccp_pr_debug("invalid ackno: S.AWL=%llu, " | ||
306 | "P.ackno=%llu, S.AWH=%llu \n", | ||
307 | (unsigned long long)dp->dccps_awl, | ||
308 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
309 | (unsigned long long)dp->dccps_awh); | ||
310 | goto out_invalid_packet; | ||
311 | } | ||
312 | |||
313 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
314 | dccp_update_gsr(sk, dp->dccps_isr); | ||
315 | /* | ||
316 | * SWL and AWL are initially adjusted so that they are not less than | ||
317 | * the initial Sequence Numbers received and sent, respectively: | ||
318 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
319 | * AWL := max(GSS - W' + 1, ISS). | ||
320 | * These adjustments MUST be applied only at the beginning of the | ||
321 | * connection. | ||
322 | * | ||
323 | * AWL was adjusted in dccp_v4_connect -acme | ||
324 | */ | ||
325 | dccp_set_seqno(&dp->dccps_swl, | ||
326 | max48(dp->dccps_swl, dp->dccps_isr)); | ||
327 | |||
328 | if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || | ||
329 | ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { | ||
330 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
331 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
332 | /* FIXME: send appropriate RESET code */ | ||
333 | goto out_invalid_packet; | ||
334 | } | ||
335 | |||
336 | dccp_sync_mss(sk, dp->dccps_pmtu_cookie); | ||
337 | |||
338 | /* | ||
339 | * Step 10: Process REQUEST state (second part) | ||
340 | * If S.state == REQUEST, | ||
341 | * / * If we get here, P is a valid Response from the | ||
342 | * server (see Step 4), and we should move to | ||
343 | * PARTOPEN state. PARTOPEN means send an Ack, | ||
344 | * don't send Data packets, retransmit Acks | ||
345 | * periodically, and always include any Init Cookie | ||
346 | * from the Response * / | ||
347 | * S.state := PARTOPEN | ||
348 | * Set PARTOPEN timer | ||
349 | * Continue with S.state == PARTOPEN | ||
350 | * / * Step 12 will send the Ack completing the | ||
351 | * three-way handshake * / | ||
352 | */ | ||
353 | dccp_set_state(sk, DCCP_PARTOPEN); | ||
354 | |||
355 | /* Make sure socket is routed, for correct metrics. */ | ||
356 | inet_sk_rebuild_header(sk); | ||
357 | |||
358 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
359 | sk->sk_state_change(sk); | ||
360 | sk_wake_async(sk, 0, POLL_OUT); | ||
361 | } | ||
362 | |||
363 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || | ||
364 | icsk->icsk_accept_queue.rskq_defer_accept) { | ||
365 | /* Save one ACK. Data will be ready after | ||
366 | * several ticks, if write_pending is set. | ||
367 | * | ||
368 | * It may be deleted, but with this feature tcpdumps | ||
369 | * look so _wonderfully_ clever, that I was not able | ||
370 | * to stand against the temptation 8) --ANK | ||
371 | */ | ||
372 | /* | ||
373 | * OK, in DCCP we can as well do a similar trick, its | ||
374 | * even in the draft, but there is no need for us to | ||
375 | * schedule an ack here, as dccp_sendmsg does this for | ||
376 | * us, also stated in the draft. -acme | ||
377 | */ | ||
378 | __kfree_skb(skb); | ||
379 | return 0; | ||
380 | } | ||
381 | dccp_send_ack(sk); | ||
382 | return -1; | ||
383 | } | ||
384 | |||
385 | out_invalid_packet: | ||
386 | return 1; /* dccp_v4_do_rcv will send a reset, but... | ||
387 | FIXME: the reset code should be | ||
388 | DCCP_RESET_CODE_PACKET_ERROR */ | ||
389 | } | ||
390 | |||
391 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | ||
392 | struct sk_buff *skb, | ||
393 | const struct dccp_hdr *dh, | ||
394 | const unsigned len) | ||
395 | { | ||
396 | int queued = 0; | ||
397 | |||
398 | switch (dh->dccph_type) { | ||
399 | case DCCP_PKT_RESET: | ||
400 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
401 | break; | ||
402 | case DCCP_PKT_DATAACK: | ||
403 | case DCCP_PKT_ACK: | ||
404 | /* | ||
405 | * FIXME: we should be reseting the PARTOPEN (DELACK) timer | ||
406 | * here but only if we haven't used the DELACK timer for | ||
407 | * something else, like sending a delayed ack for a TIMESTAMP | ||
408 | * echo, etc, for now were not clearing it, sending an extra | ||
409 | * ACK when there is nothing else to do in DELACK is not a big | ||
410 | * deal after all. | ||
411 | */ | ||
412 | |||
413 | /* Stop the PARTOPEN timer */ | ||
414 | if (sk->sk_state == DCCP_PARTOPEN) | ||
415 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
416 | |||
417 | dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
418 | dccp_set_state(sk, DCCP_OPEN); | ||
419 | |||
420 | if (dh->dccph_type == DCCP_PKT_DATAACK) { | ||
421 | dccp_rcv_established(sk, skb, dh, len); | ||
422 | queued = 1; /* packet was queued | ||
423 | (by dccp_rcv_established) */ | ||
424 | } | ||
425 | break; | ||
426 | } | ||
427 | |||
428 | return queued; | ||
429 | } | ||
430 | |||
431 | int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
432 | struct dccp_hdr *dh, unsigned len) | ||
433 | { | ||
434 | struct dccp_sock *dp = dccp_sk(sk); | ||
435 | const int old_state = sk->sk_state; | ||
436 | int queued = 0; | ||
437 | |||
438 | /* | ||
439 | * Step 3: Process LISTEN state | ||
440 | * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) | ||
441 | * | ||
442 | * If S.state == LISTEN, | ||
443 | * If P.type == Request or P contains a valid Init Cookie | ||
444 | * option, | ||
445 | * * Must scan the packet's options to check for an Init | ||
446 | * Cookie. Only the Init Cookie is processed here, | ||
447 | * however; other options are processed in Step 8. This | ||
448 | * scan need only be performed if the endpoint uses Init | ||
449 | * Cookies * | ||
450 | * * Generate a new socket and switch to that socket * | ||
451 | * Set S := new socket for this port pair | ||
452 | * S.state = RESPOND | ||
453 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
454 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
455 | * Continue with S.state == RESPOND | ||
456 | * * A Response packet will be generated in Step 11 * | ||
457 | * Otherwise, | ||
458 | * Generate Reset(No Connection) unless P.type == Reset | ||
459 | * Drop packet and return | ||
460 | * | ||
461 | * NOTE: the check for the packet types is done in | ||
462 | * dccp_rcv_state_process | ||
463 | */ | ||
464 | if (sk->sk_state == DCCP_LISTEN) { | ||
465 | if (dh->dccph_type == DCCP_PKT_REQUEST) { | ||
466 | if (dccp_v4_conn_request(sk, skb) < 0) | ||
467 | return 1; | ||
468 | |||
469 | /* FIXME: do congestion control initialization */ | ||
470 | goto discard; | ||
471 | } | ||
472 | if (dh->dccph_type == DCCP_PKT_RESET) | ||
473 | goto discard; | ||
474 | |||
475 | /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ | ||
476 | return 1; | ||
477 | } | ||
478 | |||
479 | if (sk->sk_state != DCCP_REQUESTING) { | ||
480 | if (dccp_check_seqno(sk, skb)) | ||
481 | goto discard; | ||
482 | |||
483 | /* | ||
484 | * Step 8: Process options and mark acknowledgeable | ||
485 | */ | ||
486 | if (dccp_parse_options(sk, skb)) | ||
487 | goto discard; | ||
488 | |||
489 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != | ||
490 | DCCP_PKT_WITHOUT_ACK_SEQ) | ||
491 | dccp_event_ack_recv(sk, skb); | ||
492 | |||
493 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
494 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
495 | |||
496 | /* | ||
497 | * FIXME: check ECN to see if we should use | ||
498 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
499 | */ | ||
500 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
501 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
502 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
503 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
504 | goto discard; | ||
505 | /* | ||
506 | * FIXME: this activation is probably wrong, have to | ||
507 | * study more TCP delack machinery and how it fits into | ||
508 | * DCCP draft, but for now it kinda "works" 8) | ||
509 | */ | ||
510 | if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == | ||
511 | DCCP_MAX_SEQNO + 1) && | ||
512 | !inet_csk_ack_scheduled(sk)) { | ||
513 | inet_csk_schedule_ack(sk); | ||
514 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
515 | TCP_DELACK_MIN, | ||
516 | DCCP_RTO_MAX); | ||
517 | } | ||
518 | } | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Step 9: Process Reset | ||
523 | * If P.type == Reset, | ||
524 | * Tear down connection | ||
525 | * S.state := TIMEWAIT | ||
526 | * Set TIMEWAIT timer | ||
527 | * Drop packet and return | ||
528 | */ | ||
529 | if (dh->dccph_type == DCCP_PKT_RESET) { | ||
530 | /* | ||
531 | * Queue the equivalent of TCP fin so that dccp_recvmsg | ||
532 | * exits the loop | ||
533 | */ | ||
534 | dccp_fin(sk, skb); | ||
535 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
536 | return 0; | ||
537 | /* | ||
538 | * Step 7: Check for unexpected packet types | ||
539 | * If (S.is_server and P.type == CloseReq) | ||
540 | * or (S.is_server and P.type == Response) | ||
541 | * or (S.is_client and P.type == Request) | ||
542 | * or (S.state == RESPOND and P.type == Data), | ||
543 | * Send Sync packet acknowledging P.seqno | ||
544 | * Drop packet and return | ||
545 | */ | ||
546 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && | ||
547 | (dh->dccph_type == DCCP_PKT_RESPONSE || | ||
548 | dh->dccph_type == DCCP_PKT_CLOSEREQ)) || | ||
549 | (dp->dccps_role == DCCP_ROLE_CLIENT && | ||
550 | dh->dccph_type == DCCP_PKT_REQUEST) || | ||
551 | (sk->sk_state == DCCP_RESPOND && | ||
552 | dh->dccph_type == DCCP_PKT_DATA)) { | ||
553 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
554 | DCCP_PKT_SYNC); | ||
555 | goto discard; | ||
556 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { | ||
557 | dccp_rcv_closereq(sk, skb); | ||
558 | goto discard; | ||
559 | } else if (dh->dccph_type == DCCP_PKT_CLOSE) { | ||
560 | dccp_rcv_close(sk, skb); | ||
561 | return 0; | ||
562 | } | ||
563 | |||
564 | switch (sk->sk_state) { | ||
565 | case DCCP_CLOSED: | ||
566 | return 1; | ||
567 | |||
568 | case DCCP_REQUESTING: | ||
569 | /* FIXME: do congestion control initialization */ | ||
570 | |||
571 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); | ||
572 | if (queued >= 0) | ||
573 | return queued; | ||
574 | |||
575 | __kfree_skb(skb); | ||
576 | return 0; | ||
577 | |||
578 | case DCCP_RESPOND: | ||
579 | case DCCP_PARTOPEN: | ||
580 | queued = dccp_rcv_respond_partopen_state_process(sk, skb, | ||
581 | dh, len); | ||
582 | break; | ||
583 | } | ||
584 | |||
585 | if (dh->dccph_type == DCCP_PKT_ACK || | ||
586 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
587 | switch (old_state) { | ||
588 | case DCCP_PARTOPEN: | ||
589 | sk->sk_state_change(sk); | ||
590 | sk_wake_async(sk, 0, POLL_OUT); | ||
591 | break; | ||
592 | } | ||
593 | } | ||
594 | |||
595 | if (!queued) { | ||
596 | discard: | ||
597 | __kfree_skb(skb); | ||
598 | } | ||
599 | return 0; | ||
600 | } | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..3fc75dbee4b8 --- /dev/null +++ b/net/dccp/ipv4.c | |||
@@ -0,0 +1,1356 @@ | |||
1 | /* | ||
2 | * net/dccp/ipv4.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/icmp.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/random.h> | ||
19 | |||
20 | #include <net/icmp.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/sock.h> | ||
23 | #include <net/tcp_states.h> | ||
24 | #include <net/xfrm.h> | ||
25 | |||
26 | #include "ccid.h" | ||
27 | #include "dccp.h" | ||
28 | |||
29 | struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | ||
30 | .lhash_lock = RW_LOCK_UNLOCKED, | ||
31 | .lhash_users = ATOMIC_INIT(0), | ||
32 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | ||
33 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
34 | .port_rover = 1024 - 1, | ||
35 | }; | ||
36 | |||
37 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | ||
38 | |||
39 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | ||
40 | { | ||
41 | return inet_csk_get_port(&dccp_hashinfo, sk, snum); | ||
42 | } | ||
43 | |||
44 | static void dccp_v4_hash(struct sock *sk) | ||
45 | { | ||
46 | inet_hash(&dccp_hashinfo, sk); | ||
47 | } | ||
48 | |||
49 | static void dccp_v4_unhash(struct sock *sk) | ||
50 | { | ||
51 | inet_unhash(&dccp_hashinfo, sk); | ||
52 | } | ||
53 | |||
54 | /* called with local bh disabled */ | ||
55 | static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, | ||
56 | struct inet_timewait_sock **twp) | ||
57 | { | ||
58 | struct inet_sock *inet = inet_sk(sk); | ||
59 | const u32 daddr = inet->rcv_saddr; | ||
60 | const u32 saddr = inet->daddr; | ||
61 | const int dif = sk->sk_bound_dev_if; | ||
62 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
63 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
64 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, | ||
65 | dccp_hashinfo.ehash_size); | ||
66 | struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; | ||
67 | const struct sock *sk2; | ||
68 | const struct hlist_node *node; | ||
69 | struct inet_timewait_sock *tw; | ||
70 | |||
71 | write_lock(&head->lock); | ||
72 | |||
73 | /* Check TIME-WAIT sockets first. */ | ||
74 | sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { | ||
75 | tw = inet_twsk(sk2); | ||
76 | |||
77 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
78 | goto not_unique; | ||
79 | } | ||
80 | tw = NULL; | ||
81 | |||
82 | /* And established part... */ | ||
83 | sk_for_each(sk2, node, &head->chain) { | ||
84 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
85 | goto not_unique; | ||
86 | } | ||
87 | |||
88 | /* Must record num and sport now. Otherwise we will see | ||
89 | * in hash table socket with a funny identity. */ | ||
90 | inet->num = lport; | ||
91 | inet->sport = htons(lport); | ||
92 | sk->sk_hashent = hash; | ||
93 | BUG_TRAP(sk_unhashed(sk)); | ||
94 | __sk_add_node(sk, &head->chain); | ||
95 | sock_prot_inc_use(sk->sk_prot); | ||
96 | write_unlock(&head->lock); | ||
97 | |||
98 | if (twp != NULL) { | ||
99 | *twp = tw; | ||
100 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
101 | } else if (tw != NULL) { | ||
102 | /* Silly. Should hash-dance instead... */ | ||
103 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
104 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
105 | |||
106 | inet_twsk_put(tw); | ||
107 | } | ||
108 | |||
109 | return 0; | ||
110 | |||
111 | not_unique: | ||
112 | write_unlock(&head->lock); | ||
113 | return -EADDRNOTAVAIL; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Bind a port for a connect operation and hash it. | ||
118 | */ | ||
119 | static int dccp_v4_hash_connect(struct sock *sk) | ||
120 | { | ||
121 | const unsigned short snum = inet_sk(sk)->num; | ||
122 | struct inet_bind_hashbucket *head; | ||
123 | struct inet_bind_bucket *tb; | ||
124 | int ret; | ||
125 | |||
126 | if (snum == 0) { | ||
127 | int rover; | ||
128 | int low = sysctl_local_port_range[0]; | ||
129 | int high = sysctl_local_port_range[1]; | ||
130 | int remaining = (high - low) + 1; | ||
131 | struct hlist_node *node; | ||
132 | struct inet_timewait_sock *tw = NULL; | ||
133 | |||
134 | local_bh_disable(); | ||
135 | |||
136 | /* TODO. Actually it is not so bad idea to remove | ||
137 | * dccp_hashinfo.portalloc_lock before next submission to | ||
138 | * Linus. | ||
139 | * As soon as we touch this place at all it is time to think. | ||
140 | * | ||
141 | * Now it protects single _advisory_ variable | ||
142 | * dccp_hashinfo.port_rover, hence it is mostly useless. | ||
143 | * Code will work nicely if we just delete it, but | ||
144 | * I am afraid in contented case it will work not better or | ||
145 | * even worse: another cpu just will hit the same bucket | ||
146 | * and spin there. | ||
147 | * So some cpu salt could remove both contention and | ||
148 | * memory pingpong. Any ideas how to do this in a nice way? | ||
149 | */ | ||
150 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
151 | rover = dccp_hashinfo.port_rover; | ||
152 | |||
153 | do { | ||
154 | rover++; | ||
155 | if ((rover < low) || (rover > high)) | ||
156 | rover = low; | ||
157 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | ||
158 | dccp_hashinfo.bhash_size)]; | ||
159 | spin_lock(&head->lock); | ||
160 | |||
161 | /* Does not bother with rcv_saddr checks, | ||
162 | * because the established check is already | ||
163 | * unique enough. | ||
164 | */ | ||
165 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
166 | if (tb->port == rover) { | ||
167 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
168 | if (tb->fastreuse >= 0) | ||
169 | goto next_port; | ||
170 | if (!__dccp_v4_check_established(sk, | ||
171 | rover, | ||
172 | &tw)) | ||
173 | goto ok; | ||
174 | goto next_port; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, | ||
179 | head, rover); | ||
180 | if (tb == NULL) { | ||
181 | spin_unlock(&head->lock); | ||
182 | break; | ||
183 | } | ||
184 | tb->fastreuse = -1; | ||
185 | goto ok; | ||
186 | |||
187 | next_port: | ||
188 | spin_unlock(&head->lock); | ||
189 | } while (--remaining > 0); | ||
190 | dccp_hashinfo.port_rover = rover; | ||
191 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
192 | |||
193 | local_bh_enable(); | ||
194 | |||
195 | return -EADDRNOTAVAIL; | ||
196 | |||
197 | ok: | ||
198 | /* All locks still held and bhs disabled */ | ||
199 | dccp_hashinfo.port_rover = rover; | ||
200 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
201 | |||
202 | inet_bind_hash(sk, tb, rover); | ||
203 | if (sk_unhashed(sk)) { | ||
204 | inet_sk(sk)->sport = htons(rover); | ||
205 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
206 | } | ||
207 | spin_unlock(&head->lock); | ||
208 | |||
209 | if (tw != NULL) { | ||
210 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
211 | inet_twsk_put(tw); | ||
212 | } | ||
213 | |||
214 | ret = 0; | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | head = &dccp_hashinfo.bhash[inet_bhashfn(snum, | ||
219 | dccp_hashinfo.bhash_size)]; | ||
220 | tb = inet_csk(sk)->icsk_bind_hash; | ||
221 | spin_lock_bh(&head->lock); | ||
222 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
223 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
224 | spin_unlock_bh(&head->lock); | ||
225 | return 0; | ||
226 | } else { | ||
227 | spin_unlock(&head->lock); | ||
228 | /* No definite answer... Walk to established hash table */ | ||
229 | ret = __dccp_v4_check_established(sk, snum, NULL); | ||
230 | out: | ||
231 | local_bh_enable(); | ||
232 | return ret; | ||
233 | } | ||
234 | } | ||
235 | |||
236 | static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
237 | int addr_len) | ||
238 | { | ||
239 | struct inet_sock *inet = inet_sk(sk); | ||
240 | struct dccp_sock *dp = dccp_sk(sk); | ||
241 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
242 | struct rtable *rt; | ||
243 | u32 daddr, nexthop; | ||
244 | int tmp; | ||
245 | int err; | ||
246 | |||
247 | dp->dccps_role = DCCP_ROLE_CLIENT; | ||
248 | |||
249 | if (addr_len < sizeof(struct sockaddr_in)) | ||
250 | return -EINVAL; | ||
251 | |||
252 | if (usin->sin_family != AF_INET) | ||
253 | return -EAFNOSUPPORT; | ||
254 | |||
255 | nexthop = daddr = usin->sin_addr.s_addr; | ||
256 | if (inet->opt != NULL && inet->opt->srr) { | ||
257 | if (daddr == 0) | ||
258 | return -EINVAL; | ||
259 | nexthop = inet->opt->faddr; | ||
260 | } | ||
261 | |||
262 | tmp = ip_route_connect(&rt, nexthop, inet->saddr, | ||
263 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | ||
264 | IPPROTO_DCCP, | ||
265 | inet->sport, usin->sin_port, sk); | ||
266 | if (tmp < 0) | ||
267 | return tmp; | ||
268 | |||
269 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | ||
270 | ip_rt_put(rt); | ||
271 | return -ENETUNREACH; | ||
272 | } | ||
273 | |||
274 | if (inet->opt == NULL || !inet->opt->srr) | ||
275 | daddr = rt->rt_dst; | ||
276 | |||
277 | if (inet->saddr == 0) | ||
278 | inet->saddr = rt->rt_src; | ||
279 | inet->rcv_saddr = inet->saddr; | ||
280 | |||
281 | inet->dport = usin->sin_port; | ||
282 | inet->daddr = daddr; | ||
283 | |||
284 | dp->dccps_ext_header_len = 0; | ||
285 | if (inet->opt != NULL) | ||
286 | dp->dccps_ext_header_len = inet->opt->optlen; | ||
287 | /* | ||
288 | * Socket identity is still unknown (sport may be zero). | ||
289 | * However we set state to DCCP_REQUESTING and not releasing socket | ||
290 | * lock select source port, enter ourselves into the hash tables and | ||
291 | * complete initialization after this. | ||
292 | */ | ||
293 | dccp_set_state(sk, DCCP_REQUESTING); | ||
294 | err = dccp_v4_hash_connect(sk); | ||
295 | if (err != 0) | ||
296 | goto failure; | ||
297 | |||
298 | err = ip_route_newports(&rt, inet->sport, inet->dport, sk); | ||
299 | if (err != 0) | ||
300 | goto failure; | ||
301 | |||
302 | /* OK, now commit destination to socket. */ | ||
303 | sk_setup_caps(sk, &rt->u.dst); | ||
304 | |||
305 | dp->dccps_gar = | ||
306 | dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, | ||
307 | inet->daddr, | ||
308 | inet->sport, | ||
309 | usin->sin_port); | ||
310 | dccp_update_gss(sk, dp->dccps_iss); | ||
311 | |||
312 | /* | ||
313 | * SWL and AWL are initially adjusted so that they are not less than | ||
314 | * the initial Sequence Numbers received and sent, respectively: | ||
315 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
316 | * AWL := max(GSS - W' + 1, ISS). | ||
317 | * These adjustments MUST be applied only at the beginning of the | ||
318 | * connection. | ||
319 | */ | ||
320 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); | ||
321 | |||
322 | inet->id = dp->dccps_iss ^ jiffies; | ||
323 | |||
324 | err = dccp_connect(sk); | ||
325 | rt = NULL; | ||
326 | if (err != 0) | ||
327 | goto failure; | ||
328 | out: | ||
329 | return err; | ||
330 | failure: | ||
331 | /* | ||
332 | * This unhashes the socket and releases the local port, if necessary. | ||
333 | */ | ||
334 | dccp_set_state(sk, DCCP_CLOSED); | ||
335 | ip_rt_put(rt); | ||
336 | sk->sk_route_caps = 0; | ||
337 | inet->dport = 0; | ||
338 | goto out; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * This routine does path mtu discovery as defined in RFC1191. | ||
343 | */ | ||
344 | static inline void dccp_do_pmtu_discovery(struct sock *sk, | ||
345 | const struct iphdr *iph, | ||
346 | u32 mtu) | ||
347 | { | ||
348 | struct dst_entry *dst; | ||
349 | const struct inet_sock *inet = inet_sk(sk); | ||
350 | const struct dccp_sock *dp = dccp_sk(sk); | ||
351 | |||
352 | /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs | ||
353 | * send out by Linux are always < 576bytes so they should go through | ||
354 | * unfragmented). | ||
355 | */ | ||
356 | if (sk->sk_state == DCCP_LISTEN) | ||
357 | return; | ||
358 | |||
359 | /* We don't check in the destentry if pmtu discovery is forbidden | ||
360 | * on this route. We just assume that no packet_to_big packets | ||
361 | * are send back when pmtu discovery is not active. | ||
362 | * There is a small race when the user changes this flag in the | ||
363 | * route, but I think that's acceptable. | ||
364 | */ | ||
365 | if ((dst = __sk_dst_check(sk, 0)) == NULL) | ||
366 | return; | ||
367 | |||
368 | dst->ops->update_pmtu(dst, mtu); | ||
369 | |||
370 | /* Something is about to be wrong... Remember soft error | ||
371 | * for the case, if this connection will not able to recover. | ||
372 | */ | ||
373 | if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) | ||
374 | sk->sk_err_soft = EMSGSIZE; | ||
375 | |||
376 | mtu = dst_mtu(dst); | ||
377 | |||
378 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | ||
379 | dp->dccps_pmtu_cookie > mtu) { | ||
380 | dccp_sync_mss(sk, mtu); | ||
381 | |||
382 | /* | ||
383 | * From: draft-ietf-dccp-spec-11.txt | ||
384 | * | ||
385 | * DCCP-Sync packets are the best choice for upward | ||
386 | * probing, since DCCP-Sync probes do not risk application | ||
387 | * data loss. | ||
388 | */ | ||
389 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
390 | } /* else let the usual retransmit timer handle it */ | ||
391 | } | ||
392 | |||
393 | static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) | ||
394 | { | ||
395 | int err; | ||
396 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
397 | const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + | ||
398 | sizeof(struct dccp_hdr_ext) + | ||
399 | sizeof(struct dccp_hdr_ack_bits); | ||
400 | struct sk_buff *skb; | ||
401 | |||
402 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
403 | return; | ||
404 | |||
405 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
406 | if (skb == NULL) | ||
407 | return; | ||
408 | |||
409 | /* Reserve space for headers. */ | ||
410 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
411 | |||
412 | skb->dst = dst_clone(rxskb->dst); | ||
413 | |||
414 | skb->h.raw = skb_push(skb, dccp_hdr_ack_len); | ||
415 | dh = dccp_hdr(skb); | ||
416 | memset(dh, 0, dccp_hdr_ack_len); | ||
417 | |||
418 | /* Build DCCP header and checksum it. */ | ||
419 | dh->dccph_type = DCCP_PKT_ACK; | ||
420 | dh->dccph_sport = rxdh->dccph_dport; | ||
421 | dh->dccph_dport = rxdh->dccph_sport; | ||
422 | dh->dccph_doff = dccp_hdr_ack_len / 4; | ||
423 | dh->dccph_x = 1; | ||
424 | |||
425 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
426 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
427 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
428 | |||
429 | bh_lock_sock(dccp_ctl_socket->sk); | ||
430 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
431 | rxskb->nh.iph->daddr, | ||
432 | rxskb->nh.iph->saddr, NULL); | ||
433 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
434 | |||
435 | if (err == NET_XMIT_CN || err == 0) { | ||
436 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
437 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, | ||
442 | struct request_sock *req) | ||
443 | { | ||
444 | dccp_v4_ctl_send_ack(skb); | ||
445 | } | ||
446 | |||
447 | static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | ||
448 | struct dst_entry *dst) | ||
449 | { | ||
450 | int err = -1; | ||
451 | struct sk_buff *skb; | ||
452 | |||
453 | /* First, grab a route. */ | ||
454 | |||
455 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
456 | goto out; | ||
457 | |||
458 | skb = dccp_make_response(sk, dst, req); | ||
459 | if (skb != NULL) { | ||
460 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
461 | |||
462 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | ||
463 | ireq->rmt_addr, | ||
464 | ireq->opt); | ||
465 | if (err == NET_XMIT_CN) | ||
466 | err = 0; | ||
467 | } | ||
468 | |||
469 | out: | ||
470 | dst_release(dst); | ||
471 | return err; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * This routine is called by the ICMP module when it gets some sort of error | ||
476 | * condition. If err < 0 then the socket should be closed and the error | ||
477 | * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. | ||
478 | * After adjustment header points to the first 8 bytes of the tcp header. We | ||
479 | * need to find the appropriate port. | ||
480 | * | ||
481 | * The locking strategy used here is very "optimistic". When someone else | ||
482 | * accesses the socket the ICMP is just dropped and for some paths there is no | ||
483 | * check at all. A more general error queue to queue errors for later handling | ||
484 | * is probably better. | ||
485 | */ | ||
486 | void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
487 | { | ||
488 | const struct iphdr *iph = (struct iphdr *)skb->data; | ||
489 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + | ||
490 | (iph->ihl << 2)); | ||
491 | struct dccp_sock *dp; | ||
492 | struct inet_sock *inet; | ||
493 | const int type = skb->h.icmph->type; | ||
494 | const int code = skb->h.icmph->code; | ||
495 | struct sock *sk; | ||
496 | __u64 seq; | ||
497 | int err; | ||
498 | |||
499 | if (skb->len < (iph->ihl << 2) + 8) { | ||
500 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
501 | return; | ||
502 | } | ||
503 | |||
504 | sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, | ||
505 | iph->saddr, dh->dccph_sport, inet_iif(skb)); | ||
506 | if (sk == NULL) { | ||
507 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
508 | return; | ||
509 | } | ||
510 | |||
511 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
512 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | bh_lock_sock(sk); | ||
517 | /* If too many ICMPs get dropped on busy | ||
518 | * servers this needs to be solved differently. | ||
519 | */ | ||
520 | if (sock_owned_by_user(sk)) | ||
521 | NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); | ||
522 | |||
523 | if (sk->sk_state == DCCP_CLOSED) | ||
524 | goto out; | ||
525 | |||
526 | dp = dccp_sk(sk); | ||
527 | seq = dccp_hdr_seq(skb); | ||
528 | if (sk->sk_state != DCCP_LISTEN && | ||
529 | !between48(seq, dp->dccps_swl, dp->dccps_swh)) { | ||
530 | NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); | ||
531 | goto out; | ||
532 | } | ||
533 | |||
534 | switch (type) { | ||
535 | case ICMP_SOURCE_QUENCH: | ||
536 | /* Just silently ignore these. */ | ||
537 | goto out; | ||
538 | case ICMP_PARAMETERPROB: | ||
539 | err = EPROTO; | ||
540 | break; | ||
541 | case ICMP_DEST_UNREACH: | ||
542 | if (code > NR_ICMP_UNREACH) | ||
543 | goto out; | ||
544 | |||
545 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | ||
546 | if (!sock_owned_by_user(sk)) | ||
547 | dccp_do_pmtu_discovery(sk, iph, info); | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | err = icmp_err_convert[code].errno; | ||
552 | break; | ||
553 | case ICMP_TIME_EXCEEDED: | ||
554 | err = EHOSTUNREACH; | ||
555 | break; | ||
556 | default: | ||
557 | goto out; | ||
558 | } | ||
559 | |||
560 | switch (sk->sk_state) { | ||
561 | struct request_sock *req , **prev; | ||
562 | case DCCP_LISTEN: | ||
563 | if (sock_owned_by_user(sk)) | ||
564 | goto out; | ||
565 | req = inet_csk_search_req(sk, &prev, dh->dccph_dport, | ||
566 | iph->daddr, iph->saddr); | ||
567 | if (!req) | ||
568 | goto out; | ||
569 | |||
570 | /* | ||
571 | * ICMPs are not backlogged, hence we cannot get an established | ||
572 | * socket here. | ||
573 | */ | ||
574 | BUG_TRAP(!req->sk); | ||
575 | |||
576 | if (seq != dccp_rsk(req)->dreq_iss) { | ||
577 | NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); | ||
578 | goto out; | ||
579 | } | ||
580 | /* | ||
581 | * Still in RESPOND, just remove it silently. | ||
582 | * There is no good way to pass the error to the newly | ||
583 | * created socket, and POSIX does not want network | ||
584 | * errors returned from accept(). | ||
585 | */ | ||
586 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
587 | goto out; | ||
588 | |||
589 | case DCCP_REQUESTING: | ||
590 | case DCCP_RESPOND: | ||
591 | if (!sock_owned_by_user(sk)) { | ||
592 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
593 | sk->sk_err = err; | ||
594 | |||
595 | sk->sk_error_report(sk); | ||
596 | |||
597 | dccp_done(sk); | ||
598 | } else | ||
599 | sk->sk_err_soft = err; | ||
600 | goto out; | ||
601 | } | ||
602 | |||
603 | /* If we've already connected we will keep trying | ||
604 | * until we time out, or the user gives up. | ||
605 | * | ||
606 | * rfc1122 4.2.3.9 allows to consider as hard errors | ||
607 | * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, | ||
608 | * but it is obsoleted by pmtu discovery). | ||
609 | * | ||
610 | * Note, that in modern internet, where routing is unreliable | ||
611 | * and in each dark corner broken firewalls sit, sending random | ||
612 | * errors ordered by their masters even this two messages finally lose | ||
613 | * their original sense (even Linux sends invalid PORT_UNREACHs) | ||
614 | * | ||
615 | * Now we are in compliance with RFCs. | ||
616 | * --ANK (980905) | ||
617 | */ | ||
618 | |||
619 | inet = inet_sk(sk); | ||
620 | if (!sock_owned_by_user(sk) && inet->recverr) { | ||
621 | sk->sk_err = err; | ||
622 | sk->sk_error_report(sk); | ||
623 | } else /* Only an error on timeout */ | ||
624 | sk->sk_err_soft = err; | ||
625 | out: | ||
626 | bh_unlock_sock(sk); | ||
627 | sock_put(sk); | ||
628 | } | ||
629 | |||
630 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) | ||
631 | { | ||
632 | struct sk_buff *skb; | ||
633 | /* | ||
634 | * FIXME: what if rebuild_header fails? | ||
635 | * Should we be doing a rebuild_header here? | ||
636 | */ | ||
637 | int err = inet_sk_rebuild_header(sk); | ||
638 | |||
639 | if (err != 0) | ||
640 | return err; | ||
641 | |||
642 | skb = dccp_make_reset(sk, sk->sk_dst_cache, code); | ||
643 | if (skb != NULL) { | ||
644 | const struct dccp_sock *dp = dccp_sk(sk); | ||
645 | const struct inet_sock *inet = inet_sk(sk); | ||
646 | |||
647 | err = ip_build_and_send_pkt(skb, sk, | ||
648 | inet->saddr, inet->daddr, NULL); | ||
649 | if (err == NET_XMIT_CN) | ||
650 | err = 0; | ||
651 | |||
652 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
653 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
654 | } | ||
655 | |||
656 | return err; | ||
657 | } | ||
658 | |||
659 | static inline u64 dccp_v4_init_sequence(const struct sock *sk, | ||
660 | const struct sk_buff *skb) | ||
661 | { | ||
662 | return secure_dccp_sequence_number(skb->nh.iph->daddr, | ||
663 | skb->nh.iph->saddr, | ||
664 | dccp_hdr(skb)->dccph_dport, | ||
665 | dccp_hdr(skb)->dccph_sport); | ||
666 | } | ||
667 | |||
668 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | ||
669 | { | ||
670 | struct inet_request_sock *ireq; | ||
671 | struct dccp_sock dp; | ||
672 | struct request_sock *req; | ||
673 | struct dccp_request_sock *dreq; | ||
674 | const __u32 saddr = skb->nh.iph->saddr; | ||
675 | const __u32 daddr = skb->nh.iph->daddr; | ||
676 | struct dst_entry *dst = NULL; | ||
677 | |||
678 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | ||
679 | if (((struct rtable *)skb->dst)->rt_flags & | ||
680 | (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
681 | goto drop; | ||
682 | |||
683 | /* | ||
684 | * TW buckets are converted to open requests without | ||
685 | * limitations, they conserve resources and peer is | ||
686 | * evidently real one. | ||
687 | */ | ||
688 | if (inet_csk_reqsk_queue_is_full(sk)) | ||
689 | goto drop; | ||
690 | |||
691 | /* | ||
692 | * Accept backlog is full. If we have already queued enough | ||
693 | * of warm entries in syn queue, drop request. It is better than | ||
694 | * clogging syn queue with openreqs with exponentially increasing | ||
695 | * timeout. | ||
696 | */ | ||
697 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | ||
698 | goto drop; | ||
699 | |||
700 | req = reqsk_alloc(sk->sk_prot->rsk_prot); | ||
701 | if (req == NULL) | ||
702 | goto drop; | ||
703 | |||
704 | /* FIXME: process options */ | ||
705 | |||
706 | dccp_openreq_init(req, &dp, skb); | ||
707 | |||
708 | ireq = inet_rsk(req); | ||
709 | ireq->loc_addr = daddr; | ||
710 | ireq->rmt_addr = saddr; | ||
711 | /* FIXME: Merge Aristeu's option parsing code when ready */ | ||
712 | req->rcv_wnd = 100; /* Fake, option parsing will get the | ||
713 | right value */ | ||
714 | ireq->opt = NULL; | ||
715 | |||
716 | /* | ||
717 | * Step 3: Process LISTEN state | ||
718 | * | ||
719 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
720 | * | ||
721 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | ||
722 | * dccp_create_openreq_child. | ||
723 | */ | ||
724 | dreq = dccp_rsk(req); | ||
725 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
726 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | ||
727 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | ||
728 | |||
729 | if (dccp_v4_send_response(sk, req, dst)) | ||
730 | goto drop_and_free; | ||
731 | |||
732 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | ||
733 | return 0; | ||
734 | |||
735 | drop_and_free: | ||
736 | /* | ||
737 | * FIXME: should be reqsk_free after implementing req->rsk_ops | ||
738 | */ | ||
739 | __reqsk_free(req); | ||
740 | drop: | ||
741 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
742 | return -1; | ||
743 | } | ||
744 | |||
745 | /* | ||
746 | * The three way handshake has completed - we got a valid ACK or DATAACK - | ||
747 | * now create the new socket. | ||
748 | * | ||
749 | * This is the equivalent of TCP's tcp_v4_syn_recv_sock | ||
750 | */ | ||
751 | struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | ||
752 | struct request_sock *req, | ||
753 | struct dst_entry *dst) | ||
754 | { | ||
755 | struct inet_request_sock *ireq; | ||
756 | struct inet_sock *newinet; | ||
757 | struct dccp_sock *newdp; | ||
758 | struct sock *newsk; | ||
759 | |||
760 | if (sk_acceptq_is_full(sk)) | ||
761 | goto exit_overflow; | ||
762 | |||
763 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
764 | goto exit; | ||
765 | |||
766 | newsk = dccp_create_openreq_child(sk, req, skb); | ||
767 | if (newsk == NULL) | ||
768 | goto exit; | ||
769 | |||
770 | sk_setup_caps(newsk, dst); | ||
771 | |||
772 | newdp = dccp_sk(newsk); | ||
773 | newinet = inet_sk(newsk); | ||
774 | ireq = inet_rsk(req); | ||
775 | newinet->daddr = ireq->rmt_addr; | ||
776 | newinet->rcv_saddr = ireq->loc_addr; | ||
777 | newinet->saddr = ireq->loc_addr; | ||
778 | newinet->opt = ireq->opt; | ||
779 | ireq->opt = NULL; | ||
780 | newinet->mc_index = inet_iif(skb); | ||
781 | newinet->mc_ttl = skb->nh.iph->ttl; | ||
782 | newinet->id = jiffies; | ||
783 | |||
784 | dccp_sync_mss(newsk, dst_mtu(dst)); | ||
785 | |||
786 | __inet_hash(&dccp_hashinfo, newsk, 0); | ||
787 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | ||
788 | |||
789 | return newsk; | ||
790 | |||
791 | exit_overflow: | ||
792 | NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); | ||
793 | exit: | ||
794 | NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); | ||
795 | dst_release(dst); | ||
796 | return NULL; | ||
797 | } | ||
798 | |||
799 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | ||
800 | { | ||
801 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
802 | const struct iphdr *iph = skb->nh.iph; | ||
803 | struct sock *nsk; | ||
804 | struct request_sock **prev; | ||
805 | /* Find possible connection requests. */ | ||
806 | struct request_sock *req = inet_csk_search_req(sk, &prev, | ||
807 | dh->dccph_sport, | ||
808 | iph->saddr, iph->daddr); | ||
809 | if (req != NULL) | ||
810 | return dccp_check_req(sk, skb, req, prev); | ||
811 | |||
812 | nsk = __inet_lookup_established(&dccp_hashinfo, | ||
813 | iph->saddr, dh->dccph_sport, | ||
814 | iph->daddr, ntohs(dh->dccph_dport), | ||
815 | inet_iif(skb)); | ||
816 | if (nsk != NULL) { | ||
817 | if (nsk->sk_state != DCCP_TIME_WAIT) { | ||
818 | bh_lock_sock(nsk); | ||
819 | return nsk; | ||
820 | } | ||
821 | inet_twsk_put((struct inet_timewait_sock *)nsk); | ||
822 | return NULL; | ||
823 | } | ||
824 | |||
825 | return sk; | ||
826 | } | ||
827 | |||
828 | int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, | ||
829 | const u32 daddr) | ||
830 | { | ||
831 | const struct dccp_hdr* dh = dccp_hdr(skb); | ||
832 | int checksum_len; | ||
833 | u32 tmp; | ||
834 | |||
835 | if (dh->dccph_cscov == 0) | ||
836 | checksum_len = skb->len; | ||
837 | else { | ||
838 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
839 | checksum_len = checksum_len < skb->len ? checksum_len : | ||
840 | skb->len; | ||
841 | } | ||
842 | |||
843 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
844 | return csum_tcpudp_magic(saddr, daddr, checksum_len, | ||
845 | IPPROTO_DCCP, tmp); | ||
846 | } | ||
847 | |||
848 | static int dccp_v4_verify_checksum(struct sk_buff *skb, | ||
849 | const u32 saddr, const u32 daddr) | ||
850 | { | ||
851 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
852 | int checksum_len; | ||
853 | u32 tmp; | ||
854 | |||
855 | if (dh->dccph_cscov == 0) | ||
856 | checksum_len = skb->len; | ||
857 | else { | ||
858 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
859 | checksum_len = checksum_len < skb->len ? checksum_len : | ||
860 | skb->len; | ||
861 | } | ||
862 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
863 | return csum_tcpudp_magic(saddr, daddr, checksum_len, | ||
864 | IPPROTO_DCCP, tmp) == 0 ? 0 : -1; | ||
865 | } | ||
866 | |||
867 | static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | ||
868 | struct sk_buff *skb) | ||
869 | { | ||
870 | struct rtable *rt; | ||
871 | struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, | ||
872 | .nl_u = { .ip4_u = | ||
873 | { .daddr = skb->nh.iph->saddr, | ||
874 | .saddr = skb->nh.iph->daddr, | ||
875 | .tos = RT_CONN_FLAGS(sk) } }, | ||
876 | .proto = sk->sk_protocol, | ||
877 | .uli_u = { .ports = | ||
878 | { .sport = dccp_hdr(skb)->dccph_dport, | ||
879 | .dport = dccp_hdr(skb)->dccph_sport } | ||
880 | } | ||
881 | }; | ||
882 | |||
883 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
884 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
885 | return NULL; | ||
886 | } | ||
887 | |||
888 | return &rt->u.dst; | ||
889 | } | ||
890 | |||
891 | static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) | ||
892 | { | ||
893 | int err; | ||
894 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
895 | const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + | ||
896 | sizeof(struct dccp_hdr_ext) + | ||
897 | sizeof(struct dccp_hdr_reset); | ||
898 | struct sk_buff *skb; | ||
899 | struct dst_entry *dst; | ||
900 | u64 seqno; | ||
901 | |||
902 | /* Never send a reset in response to a reset. */ | ||
903 | if (rxdh->dccph_type == DCCP_PKT_RESET) | ||
904 | return; | ||
905 | |||
906 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
907 | return; | ||
908 | |||
909 | dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); | ||
910 | if (dst == NULL) | ||
911 | return; | ||
912 | |||
913 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
914 | if (skb == NULL) | ||
915 | goto out; | ||
916 | |||
917 | /* Reserve space for headers. */ | ||
918 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
919 | skb->dst = dst_clone(dst); | ||
920 | |||
921 | skb->h.raw = skb_push(skb, dccp_hdr_reset_len); | ||
922 | dh = dccp_hdr(skb); | ||
923 | memset(dh, 0, dccp_hdr_reset_len); | ||
924 | |||
925 | /* Build DCCP header and checksum it. */ | ||
926 | dh->dccph_type = DCCP_PKT_RESET; | ||
927 | dh->dccph_sport = rxdh->dccph_dport; | ||
928 | dh->dccph_dport = rxdh->dccph_sport; | ||
929 | dh->dccph_doff = dccp_hdr_reset_len / 4; | ||
930 | dh->dccph_x = 1; | ||
931 | dccp_hdr_reset(skb)->dccph_reset_code = | ||
932 | DCCP_SKB_CB(rxskb)->dccpd_reset_code; | ||
933 | |||
934 | /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ | ||
935 | seqno = 0; | ||
936 | if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
937 | dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); | ||
938 | |||
939 | dccp_hdr_set_seq(dh, seqno); | ||
940 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
941 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
942 | |||
943 | dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, | ||
944 | rxskb->nh.iph->daddr); | ||
945 | |||
946 | bh_lock_sock(dccp_ctl_socket->sk); | ||
947 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
948 | rxskb->nh.iph->daddr, | ||
949 | rxskb->nh.iph->saddr, NULL); | ||
950 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
951 | |||
952 | if (err == NET_XMIT_CN || err == 0) { | ||
953 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
954 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
955 | } | ||
956 | out: | ||
957 | dst_release(dst); | ||
958 | } | ||
959 | |||
960 | int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
961 | { | ||
962 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
963 | |||
964 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ | ||
965 | if (dccp_rcv_established(sk, skb, dh, skb->len)) | ||
966 | goto reset; | ||
967 | return 0; | ||
968 | } | ||
969 | |||
970 | /* | ||
971 | * Step 3: Process LISTEN state | ||
972 | * If S.state == LISTEN, | ||
973 | * If P.type == Request or P contains a valid Init Cookie | ||
974 | * option, | ||
975 | * * Must scan the packet's options to check for an Init | ||
976 | * Cookie. Only the Init Cookie is processed here, | ||
977 | * however; other options are processed in Step 8. This | ||
978 | * scan need only be performed if the endpoint uses Init | ||
979 | * Cookies * | ||
980 | * * Generate a new socket and switch to that socket * | ||
981 | * Set S := new socket for this port pair | ||
982 | * S.state = RESPOND | ||
983 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
984 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
985 | * Continue with S.state == RESPOND | ||
986 | * * A Response packet will be generated in Step 11 * | ||
987 | * Otherwise, | ||
988 | * Generate Reset(No Connection) unless P.type == Reset | ||
989 | * Drop packet and return | ||
990 | * | ||
991 | * NOTE: the check for the packet types is done in | ||
992 | * dccp_rcv_state_process | ||
993 | */ | ||
994 | if (sk->sk_state == DCCP_LISTEN) { | ||
995 | struct sock *nsk = dccp_v4_hnd_req(sk, skb); | ||
996 | |||
997 | if (nsk == NULL) | ||
998 | goto discard; | ||
999 | |||
1000 | if (nsk != sk) { | ||
1001 | if (dccp_child_process(sk, nsk, skb)) | ||
1002 | goto reset; | ||
1003 | return 0; | ||
1004 | } | ||
1005 | } | ||
1006 | |||
1007 | if (dccp_rcv_state_process(sk, skb, dh, skb->len)) | ||
1008 | goto reset; | ||
1009 | return 0; | ||
1010 | |||
1011 | reset: | ||
1012 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
1013 | dccp_v4_ctl_send_reset(skb); | ||
1014 | discard: | ||
1015 | kfree_skb(skb); | ||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static inline int dccp_invalid_packet(struct sk_buff *skb) | ||
1020 | { | ||
1021 | const struct dccp_hdr *dh; | ||
1022 | |||
1023 | if (skb->pkt_type != PACKET_HOST) | ||
1024 | return 1; | ||
1025 | |||
1026 | if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { | ||
1027 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); | ||
1028 | return 1; | ||
1029 | } | ||
1030 | |||
1031 | dh = dccp_hdr(skb); | ||
1032 | |||
1033 | /* If the packet type is not understood, drop packet and return */ | ||
1034 | if (dh->dccph_type >= DCCP_PKT_INVALID) { | ||
1035 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); | ||
1036 | return 1; | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * If P.Data Offset is too small for packet type, or too large for | ||
1041 | * packet, drop packet and return | ||
1042 | */ | ||
1043 | if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { | ||
1044 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " | ||
1045 | "too small 1\n", | ||
1046 | dh->dccph_doff); | ||
1047 | return 1; | ||
1048 | } | ||
1049 | |||
1050 | if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { | ||
1051 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " | ||
1052 | "too small 2\n", | ||
1053 | dh->dccph_doff); | ||
1054 | return 1; | ||
1055 | } | ||
1056 | |||
1057 | dh = dccp_hdr(skb); | ||
1058 | |||
1059 | /* | ||
1060 | * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet | ||
1061 | * has short sequence numbers), drop packet and return | ||
1062 | */ | ||
1063 | if (dh->dccph_x == 0 && | ||
1064 | dh->dccph_type != DCCP_PKT_DATA && | ||
1065 | dh->dccph_type != DCCP_PKT_ACK && | ||
1066 | dh->dccph_type != DCCP_PKT_DATAACK) { | ||
1067 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " | ||
1068 | "nor DataAck and P.X == 0\n", | ||
1069 | dccp_packet_name(dh->dccph_type)); | ||
1070 | return 1; | ||
1071 | } | ||
1072 | |||
1073 | /* If the header checksum is incorrect, drop packet and return */ | ||
1074 | if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, | ||
1075 | skb->nh.iph->daddr) < 0) { | ||
1076 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is " | ||
1077 | "incorrect\n"); | ||
1078 | return 1; | ||
1079 | } | ||
1080 | |||
1081 | return 0; | ||
1082 | } | ||
1083 | |||
1084 | /* this is called when real data arrives */ | ||
1085 | int dccp_v4_rcv(struct sk_buff *skb) | ||
1086 | { | ||
1087 | const struct dccp_hdr *dh; | ||
1088 | struct sock *sk; | ||
1089 | int rc; | ||
1090 | |||
1091 | /* Step 1: Check header basics: */ | ||
1092 | |||
1093 | if (dccp_invalid_packet(skb)) | ||
1094 | goto discard_it; | ||
1095 | |||
1096 | dh = dccp_hdr(skb); | ||
1097 | #if 0 | ||
1098 | /* | ||
1099 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
1100 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
1101 | * sends 10 DATA/DATAACK packets: | ||
1102 | * | ||
1103 | * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
1104 | * | ||
1105 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
1106 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets | ||
1107 | * with the same state | ||
1108 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
1109 | * | ||
1110 | * So... | ||
1111 | * | ||
1112 | * 281473596467422 was received | ||
1113 | * 281473596467421 was not received | ||
1114 | * 281473596467420 was received | ||
1115 | * 281473596467419 was not received | ||
1116 | * 281473596467418 was received | ||
1117 | * 281473596467417 was not received | ||
1118 | * 281473596467416 was received | ||
1119 | * 281473596467415 was not received | ||
1120 | * 281473596467414 was received | ||
1121 | * 281473596467413 was received (this one was the 3way handshake | ||
1122 | * RESPONSE) | ||
1123 | * | ||
1124 | */ | ||
1125 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
1126 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
1127 | static int discard = 0; | ||
1128 | |||
1129 | if (discard) { | ||
1130 | discard = 0; | ||
1131 | goto discard_it; | ||
1132 | } | ||
1133 | discard = 1; | ||
1134 | } | ||
1135 | #endif | ||
1136 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | ||
1137 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | ||
1138 | |||
1139 | dccp_pr_debug("%8.8s " | ||
1140 | "src=%u.%u.%u.%u@%-5d " | ||
1141 | "dst=%u.%u.%u.%u@%-5d seq=%llu", | ||
1142 | dccp_packet_name(dh->dccph_type), | ||
1143 | NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), | ||
1144 | NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), | ||
1145 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
1146 | |||
1147 | if (dccp_packet_without_ack(skb)) { | ||
1148 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; | ||
1149 | dccp_pr_debug_cat("\n"); | ||
1150 | } else { | ||
1151 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); | ||
1152 | dccp_pr_debug_cat(", ack=%llu\n", | ||
1153 | (unsigned long long) | ||
1154 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
1155 | } | ||
1156 | |||
1157 | /* Step 2: | ||
1158 | * Look up flow ID in table and get corresponding socket */ | ||
1159 | sk = __inet_lookup(&dccp_hashinfo, | ||
1160 | skb->nh.iph->saddr, dh->dccph_sport, | ||
1161 | skb->nh.iph->daddr, ntohs(dh->dccph_dport), | ||
1162 | inet_iif(skb)); | ||
1163 | |||
1164 | /* | ||
1165 | * Step 2: | ||
1166 | * If no socket ... | ||
1167 | * Generate Reset(No Connection) unless P.type == Reset | ||
1168 | * Drop packet and return | ||
1169 | */ | ||
1170 | if (sk == NULL) { | ||
1171 | dccp_pr_debug("failed to look up flow ID in table and " | ||
1172 | "get corresponding socket\n"); | ||
1173 | goto no_dccp_socket; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Step 2: | ||
1178 | * ... or S.state == TIMEWAIT, | ||
1179 | * Generate Reset(No Connection) unless P.type == Reset | ||
1180 | * Drop packet and return | ||
1181 | */ | ||
1182 | |||
1183 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
1184 | dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " | ||
1185 | "do_time_wait\n"); | ||
1186 | goto do_time_wait; | ||
1187 | } | ||
1188 | |||
1189 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | ||
1190 | dccp_pr_debug("xfrm4_policy_check failed\n"); | ||
1191 | goto discard_and_relse; | ||
1192 | } | ||
1193 | |||
1194 | if (sk_filter(sk, skb, 0)) { | ||
1195 | dccp_pr_debug("sk_filter failed\n"); | ||
1196 | goto discard_and_relse; | ||
1197 | } | ||
1198 | |||
1199 | skb->dev = NULL; | ||
1200 | |||
1201 | bh_lock_sock(sk); | ||
1202 | rc = 0; | ||
1203 | if (!sock_owned_by_user(sk)) | ||
1204 | rc = dccp_v4_do_rcv(sk, skb); | ||
1205 | else | ||
1206 | sk_add_backlog(sk, skb); | ||
1207 | bh_unlock_sock(sk); | ||
1208 | |||
1209 | sock_put(sk); | ||
1210 | return rc; | ||
1211 | |||
1212 | no_dccp_socket: | ||
1213 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1214 | goto discard_it; | ||
1215 | /* | ||
1216 | * Step 2: | ||
1217 | * Generate Reset(No Connection) unless P.type == Reset | ||
1218 | * Drop packet and return | ||
1219 | */ | ||
1220 | if (dh->dccph_type != DCCP_PKT_RESET) { | ||
1221 | DCCP_SKB_CB(skb)->dccpd_reset_code = | ||
1222 | DCCP_RESET_CODE_NO_CONNECTION; | ||
1223 | dccp_v4_ctl_send_reset(skb); | ||
1224 | } | ||
1225 | |||
1226 | discard_it: | ||
1227 | /* Discard frame. */ | ||
1228 | kfree_skb(skb); | ||
1229 | return 0; | ||
1230 | |||
1231 | discard_and_relse: | ||
1232 | sock_put(sk); | ||
1233 | goto discard_it; | ||
1234 | |||
1235 | do_time_wait: | ||
1236 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
1237 | goto no_dccp_socket; | ||
1238 | } | ||
1239 | |||
1240 | static int dccp_v4_init_sock(struct sock *sk) | ||
1241 | { | ||
1242 | struct dccp_sock *dp = dccp_sk(sk); | ||
1243 | static int dccp_ctl_socket_init = 1; | ||
1244 | |||
1245 | dccp_options_init(&dp->dccps_options); | ||
1246 | |||
1247 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
1248 | dp->dccps_hc_rx_ackpkts = | ||
1249 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
1250 | GFP_KERNEL); | ||
1251 | |||
1252 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
1253 | return -ENOMEM; | ||
1254 | } | ||
1255 | |||
1256 | /* | ||
1257 | * FIXME: We're hardcoding the CCID, and doing this at this point makes | ||
1258 | * the listening (master) sock get CCID control blocks, which is not | ||
1259 | * necessary, but for now, to not mess with the test userspace apps, | ||
1260 | * lets leave it here, later the real solution is to do this in a | ||
1261 | * setsockopt(CCIDs-I-want/accept). -acme | ||
1262 | */ | ||
1263 | if (likely(!dccp_ctl_socket_init)) { | ||
1264 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | ||
1265 | sk); | ||
1266 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | ||
1267 | sk); | ||
1268 | if (dp->dccps_hc_rx_ccid == NULL || | ||
1269 | dp->dccps_hc_tx_ccid == NULL) { | ||
1270 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1271 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1272 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1273 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1274 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1275 | return -ENOMEM; | ||
1276 | } | ||
1277 | } else | ||
1278 | dccp_ctl_socket_init = 0; | ||
1279 | |||
1280 | dccp_init_xmit_timers(sk); | ||
1281 | inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; | ||
1282 | sk->sk_state = DCCP_CLOSED; | ||
1283 | sk->sk_write_space = dccp_write_space; | ||
1284 | dp->dccps_mss_cache = 536; | ||
1285 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
1286 | |||
1287 | return 0; | ||
1288 | } | ||
1289 | |||
1290 | static int dccp_v4_destroy_sock(struct sock *sk) | ||
1291 | { | ||
1292 | struct dccp_sock *dp = dccp_sk(sk); | ||
1293 | |||
1294 | /* | ||
1295 | * DCCP doesn't use sk_qrite_queue, just sk_send_head | ||
1296 | * for retransmissions | ||
1297 | */ | ||
1298 | if (sk->sk_send_head != NULL) { | ||
1299 | kfree_skb(sk->sk_send_head); | ||
1300 | sk->sk_send_head = NULL; | ||
1301 | } | ||
1302 | |||
1303 | /* Clean up a referenced DCCP bind bucket. */ | ||
1304 | if (inet_csk(sk)->icsk_bind_hash != NULL) | ||
1305 | inet_put_port(&dccp_hashinfo, sk); | ||
1306 | |||
1307 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
1308 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
1309 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1310 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1311 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1312 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1313 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1314 | |||
1315 | return 0; | ||
1316 | } | ||
1317 | |||
1318 | static void dccp_v4_reqsk_destructor(struct request_sock *req) | ||
1319 | { | ||
1320 | kfree(inet_rsk(req)->opt); | ||
1321 | } | ||
1322 | |||
1323 | static struct request_sock_ops dccp_request_sock_ops = { | ||
1324 | .family = PF_INET, | ||
1325 | .obj_size = sizeof(struct dccp_request_sock), | ||
1326 | .rtx_syn_ack = dccp_v4_send_response, | ||
1327 | .send_ack = dccp_v4_reqsk_send_ack, | ||
1328 | .destructor = dccp_v4_reqsk_destructor, | ||
1329 | .send_reset = dccp_v4_ctl_send_reset, | ||
1330 | }; | ||
1331 | |||
1332 | struct proto dccp_v4_prot = { | ||
1333 | .name = "DCCP", | ||
1334 | .owner = THIS_MODULE, | ||
1335 | .close = dccp_close, | ||
1336 | .connect = dccp_v4_connect, | ||
1337 | .disconnect = dccp_disconnect, | ||
1338 | .ioctl = dccp_ioctl, | ||
1339 | .init = dccp_v4_init_sock, | ||
1340 | .setsockopt = dccp_setsockopt, | ||
1341 | .getsockopt = dccp_getsockopt, | ||
1342 | .sendmsg = dccp_sendmsg, | ||
1343 | .recvmsg = dccp_recvmsg, | ||
1344 | .backlog_rcv = dccp_v4_do_rcv, | ||
1345 | .hash = dccp_v4_hash, | ||
1346 | .unhash = dccp_v4_unhash, | ||
1347 | .accept = inet_csk_accept, | ||
1348 | .get_port = dccp_v4_get_port, | ||
1349 | .shutdown = dccp_shutdown, | ||
1350 | .destroy = dccp_v4_destroy_sock, | ||
1351 | .orphan_count = &dccp_orphan_count, | ||
1352 | .max_header = MAX_DCCP_HEADER, | ||
1353 | .obj_size = sizeof(struct dccp_sock), | ||
1354 | .rsk_prot = &dccp_request_sock_ops, | ||
1355 | .twsk_obj_size = sizeof(struct inet_timewait_sock), | ||
1356 | }; | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..ce5dff4ac22e --- /dev/null +++ b/net/dccp/minisocks.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * net/dccp/minisocks.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/timer.h> | ||
17 | |||
18 | #include <net/sock.h> | ||
19 | #include <net/xfrm.h> | ||
20 | #include <net/inet_timewait_sock.h> | ||
21 | |||
22 | #include "ccid.h" | ||
23 | #include "dccp.h" | ||
24 | |||
25 | struct inet_timewait_death_row dccp_death_row = { | ||
26 | .sysctl_max_tw_buckets = NR_FILE * 2, | ||
27 | .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, | ||
28 | .death_lock = SPIN_LOCK_UNLOCKED, | ||
29 | .hashinfo = &dccp_hashinfo, | ||
30 | .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, | ||
31 | (unsigned long)&dccp_death_row), | ||
32 | .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, | ||
33 | inet_twdr_twkill_work, | ||
34 | &dccp_death_row), | ||
35 | /* Short-time timewait calendar */ | ||
36 | |||
37 | .twcal_hand = -1, | ||
38 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | ||
39 | (unsigned long)&dccp_death_row), | ||
40 | }; | ||
41 | |||
42 | void dccp_time_wait(struct sock *sk, int state, int timeo) | ||
43 | { | ||
44 | struct inet_timewait_sock *tw = NULL; | ||
45 | |||
46 | if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) | ||
47 | tw = inet_twsk_alloc(sk, state); | ||
48 | |||
49 | if (tw != NULL) { | ||
50 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
51 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | ||
52 | |||
53 | /* Linkage updates. */ | ||
54 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); | ||
55 | |||
56 | /* Get the TIME_WAIT timeout firing. */ | ||
57 | if (timeo < rto) | ||
58 | timeo = rto; | ||
59 | |||
60 | tw->tw_timeout = DCCP_TIMEWAIT_LEN; | ||
61 | if (state == DCCP_TIME_WAIT) | ||
62 | timeo = DCCP_TIMEWAIT_LEN; | ||
63 | |||
64 | inet_twsk_schedule(tw, &dccp_death_row, timeo, | ||
65 | DCCP_TIMEWAIT_LEN); | ||
66 | inet_twsk_put(tw); | ||
67 | } else { | ||
68 | /* Sorry, if we're out of memory, just CLOSE this | ||
69 | * socket up. We've got bigger problems than | ||
70 | * non-graceful socket closings. | ||
71 | */ | ||
72 | LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " | ||
73 | "table overflow\n"); | ||
74 | } | ||
75 | |||
76 | dccp_done(sk); | ||
77 | } | ||
78 | |||
79 | struct sock *dccp_create_openreq_child(struct sock *sk, | ||
80 | const struct request_sock *req, | ||
81 | const struct sk_buff *skb) | ||
82 | { | ||
83 | /* | ||
84 | * Step 3: Process LISTEN state | ||
85 | * | ||
86 | * // Generate a new socket and switch to that socket | ||
87 | * Set S := new socket for this port pair | ||
88 | */ | ||
89 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); | ||
90 | |||
91 | if (newsk != NULL) { | ||
92 | const struct dccp_request_sock *dreq = dccp_rsk(req); | ||
93 | struct inet_connection_sock *newicsk = inet_csk(sk); | ||
94 | struct dccp_sock *newdp = dccp_sk(newsk); | ||
95 | |||
96 | newdp->dccps_hc_rx_ackpkts = NULL; | ||
97 | newdp->dccps_role = DCCP_ROLE_SERVER; | ||
98 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
99 | |||
100 | if (newdp->dccps_options.dccpo_send_ack_vector) { | ||
101 | newdp->dccps_hc_rx_ackpkts = | ||
102 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
103 | GFP_ATOMIC); | ||
104 | /* | ||
105 | * XXX: We're using the same CCIDs set on the parent, | ||
106 | * i.e. sk_clone copied the master sock and left the | ||
107 | * CCID pointers for this child, that is why we do the | ||
108 | * __ccid_get calls. | ||
109 | */ | ||
110 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | ||
111 | goto out_free; | ||
112 | } | ||
113 | |||
114 | if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, | ||
115 | newsk) != 0 || | ||
116 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, | ||
117 | newsk) != 0)) { | ||
118 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | ||
119 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | ||
120 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | ||
121 | out_free: | ||
122 | /* It is still raw copy of parent, so invalidate | ||
123 | * destructor and make plain sk_free() */ | ||
124 | newsk->sk_destruct = NULL; | ||
125 | sk_free(newsk); | ||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | __ccid_get(newdp->dccps_hc_rx_ccid); | ||
130 | __ccid_get(newdp->dccps_hc_tx_ccid); | ||
131 | |||
132 | /* | ||
133 | * Step 3: Process LISTEN state | ||
134 | * | ||
135 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
136 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init | ||
137 | * Cookie | ||
138 | */ | ||
139 | |||
140 | /* See dccp_v4_conn_request */ | ||
141 | newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; | ||
142 | |||
143 | newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; | ||
144 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
145 | |||
146 | newdp->dccps_iss = dreq->dreq_iss; | ||
147 | dccp_update_gss(newsk, dreq->dreq_iss); | ||
148 | |||
149 | /* | ||
150 | * SWL and AWL are initially adjusted so that they are not less than | ||
151 | * the initial Sequence Numbers received and sent, respectively: | ||
152 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
153 | * AWL := max(GSS - W' + 1, ISS). | ||
154 | * These adjustments MUST be applied only at the beginning of the | ||
155 | * connection. | ||
156 | */ | ||
157 | dccp_set_seqno(&newdp->dccps_swl, | ||
158 | max48(newdp->dccps_swl, newdp->dccps_isr)); | ||
159 | dccp_set_seqno(&newdp->dccps_awl, | ||
160 | max48(newdp->dccps_awl, newdp->dccps_iss)); | ||
161 | |||
162 | dccp_init_xmit_timers(newsk); | ||
163 | |||
164 | DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); | ||
165 | } | ||
166 | return newsk; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Process an incoming packet for RESPOND sockets represented | ||
171 | * as an request_sock. | ||
172 | */ | ||
173 | struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
174 | struct request_sock *req, | ||
175 | struct request_sock **prev) | ||
176 | { | ||
177 | struct sock *child = NULL; | ||
178 | |||
179 | /* Check for retransmitted REQUEST */ | ||
180 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | ||
181 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, | ||
182 | dccp_rsk(req)->dreq_isr)) { | ||
183 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
184 | |||
185 | dccp_pr_debug("Retransmitted REQUEST\n"); | ||
186 | /* Send another RESPONSE packet */ | ||
187 | dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); | ||
188 | dccp_set_seqno(&dreq->dreq_isr, | ||
189 | DCCP_SKB_CB(skb)->dccpd_seq); | ||
190 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | ||
191 | } | ||
192 | /* Network Duplicate, discard packet */ | ||
193 | return NULL; | ||
194 | } | ||
195 | |||
196 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; | ||
197 | |||
198 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && | ||
199 | dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) | ||
200 | goto drop; | ||
201 | |||
202 | /* Invalid ACK */ | ||
203 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { | ||
204 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " | ||
205 | "dreq_iss=%llu\n", | ||
206 | (unsigned long long) | ||
207 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
208 | (unsigned long long) | ||
209 | dccp_rsk(req)->dreq_iss); | ||
210 | goto drop; | ||
211 | } | ||
212 | |||
213 | child = dccp_v4_request_recv_sock(sk, skb, req, NULL); | ||
214 | if (child == NULL) | ||
215 | goto listen_overflow; | ||
216 | |||
217 | /* FIXME: deal with options */ | ||
218 | |||
219 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
220 | inet_csk_reqsk_queue_removed(sk, req); | ||
221 | inet_csk_reqsk_queue_add(sk, req, child); | ||
222 | out: | ||
223 | return child; | ||
224 | listen_overflow: | ||
225 | dccp_pr_debug("listen_overflow!\n"); | ||
226 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
227 | drop: | ||
228 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) | ||
229 | req->rsk_ops->send_reset(skb); | ||
230 | |||
231 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
232 | goto out; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Queue segment on the new socket if the new socket is active, | ||
237 | * otherwise we just shortcircuit this and continue with | ||
238 | * the new socket. | ||
239 | */ | ||
240 | int dccp_child_process(struct sock *parent, struct sock *child, | ||
241 | struct sk_buff *skb) | ||
242 | { | ||
243 | int ret = 0; | ||
244 | const int state = child->sk_state; | ||
245 | |||
246 | if (!sock_owned_by_user(child)) { | ||
247 | ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), | ||
248 | skb->len); | ||
249 | |||
250 | /* Wakeup parent, send SIGIO */ | ||
251 | if (state == DCCP_RESPOND && child->sk_state != state) | ||
252 | parent->sk_data_ready(parent, 0); | ||
253 | } else { | ||
254 | /* Alas, it is possible again, because we do lookup | ||
255 | * in main socket hash table and lock on listening | ||
256 | * socket does not protect us more. | ||
257 | */ | ||
258 | sk_add_backlog(child, skb); | ||
259 | } | ||
260 | |||
261 | bh_unlock_sock(child); | ||
262 | sock_put(child); | ||
263 | return ret; | ||
264 | } | ||
diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..382c5894acb2 --- /dev/null +++ b/net/dccp/options.c | |||
@@ -0,0 +1,855 @@ | |||
1 | /* | ||
2 | * net/dccp/options.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | #include <linux/config.h> | ||
15 | #include <linux/dccp.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/skbuff.h> | ||
20 | |||
21 | #include "ccid.h" | ||
22 | #include "dccp.h" | ||
23 | |||
24 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
25 | struct sock *sk, | ||
26 | const u64 ackno, | ||
27 | const unsigned char len, | ||
28 | const unsigned char *vector); | ||
29 | |||
30 | /* stores the default values for new connection. may be changed with sysctl */ | ||
31 | static const struct dccp_options dccpo_default_values = { | ||
32 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | ||
33 | .dccpo_ccid = DCCPF_INITIAL_CCID, | ||
34 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | ||
35 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | ||
36 | }; | ||
37 | |||
38 | void dccp_options_init(struct dccp_options *dccpo) | ||
39 | { | ||
40 | memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); | ||
41 | } | ||
42 | |||
43 | static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) | ||
44 | { | ||
45 | u32 value = 0; | ||
46 | |||
47 | if (len > 3) | ||
48 | value += *bf++ << 24; | ||
49 | if (len > 2) | ||
50 | value += *bf++ << 16; | ||
51 | if (len > 1) | ||
52 | value += *bf++ << 8; | ||
53 | if (len > 0) | ||
54 | value += *bf; | ||
55 | |||
56 | return value; | ||
57 | } | ||
58 | |||
59 | int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | ||
60 | { | ||
61 | struct dccp_sock *dp = dccp_sk(sk); | ||
62 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
63 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
64 | "CLIENT rx opt: " : "server rx opt: "; | ||
65 | #endif | ||
66 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
67 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | ||
68 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
69 | unsigned char *opt_ptr = options; | ||
70 | const unsigned char *opt_end = (unsigned char *)dh + | ||
71 | (dh->dccph_doff * 4); | ||
72 | struct dccp_options_received *opt_recv = &dp->dccps_options_received; | ||
73 | unsigned char opt, len; | ||
74 | unsigned char *value; | ||
75 | |||
76 | memset(opt_recv, 0, sizeof(*opt_recv)); | ||
77 | |||
78 | while (opt_ptr != opt_end) { | ||
79 | opt = *opt_ptr++; | ||
80 | len = 0; | ||
81 | value = NULL; | ||
82 | |||
83 | /* Check if this isn't a single byte option */ | ||
84 | if (opt > DCCPO_MAX_RESERVED) { | ||
85 | if (opt_ptr == opt_end) | ||
86 | goto out_invalid_option; | ||
87 | |||
88 | len = *opt_ptr++; | ||
89 | if (len < 3) | ||
90 | goto out_invalid_option; | ||
91 | /* | ||
92 | * Remove the type and len fields, leaving | ||
93 | * just the value size | ||
94 | */ | ||
95 | len -= 2; | ||
96 | value = opt_ptr; | ||
97 | opt_ptr += len; | ||
98 | |||
99 | if (opt_ptr > opt_end) | ||
100 | goto out_invalid_option; | ||
101 | } | ||
102 | |||
103 | switch (opt) { | ||
104 | case DCCPO_PADDING: | ||
105 | break; | ||
106 | case DCCPO_NDP_COUNT: | ||
107 | if (len > 3) | ||
108 | goto out_invalid_option; | ||
109 | |||
110 | opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); | ||
111 | dccp_pr_debug("%sNDP count=%d\n", debug_prefix, | ||
112 | opt_recv->dccpor_ndp); | ||
113 | break; | ||
114 | case DCCPO_ACK_VECTOR_0: | ||
115 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | ||
116 | goto out_invalid_option; | ||
117 | |||
118 | if (pkt_type == DCCP_PKT_DATA) | ||
119 | continue; | ||
120 | |||
121 | opt_recv->dccpor_ack_vector_len = len; | ||
122 | opt_recv->dccpor_ack_vector_idx = value - options; | ||
123 | |||
124 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
125 | debug_prefix, len, | ||
126 | (unsigned long long) | ||
127 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
128 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
129 | value, len); | ||
130 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, | ||
131 | sk, | ||
132 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
133 | len, value); | ||
134 | break; | ||
135 | case DCCPO_TIMESTAMP: | ||
136 | if (len != 4) | ||
137 | goto out_invalid_option; | ||
138 | |||
139 | opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); | ||
140 | |||
141 | dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; | ||
142 | do_gettimeofday(&dp->dccps_timestamp_time); | ||
143 | |||
144 | dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", | ||
145 | debug_prefix, opt_recv->dccpor_timestamp, | ||
146 | (unsigned long long) | ||
147 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
148 | break; | ||
149 | case DCCPO_TIMESTAMP_ECHO: | ||
150 | if (len != 4 && len != 6 && len != 8) | ||
151 | goto out_invalid_option; | ||
152 | |||
153 | opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); | ||
154 | |||
155 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", | ||
156 | debug_prefix, | ||
157 | opt_recv->dccpor_timestamp_echo, | ||
158 | len + 2, | ||
159 | (unsigned long long) | ||
160 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
161 | |||
162 | if (len > 4) { | ||
163 | if (len == 6) | ||
164 | opt_recv->dccpor_elapsed_time = | ||
165 | ntohs(*(u16 *)(value + 4)); | ||
166 | else | ||
167 | opt_recv->dccpor_elapsed_time = | ||
168 | ntohl(*(u32 *)(value + 4)); | ||
169 | |||
170 | dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", | ||
171 | debug_prefix, | ||
172 | opt_recv->dccpor_elapsed_time); | ||
173 | } | ||
174 | break; | ||
175 | case DCCPO_ELAPSED_TIME: | ||
176 | if (len != 2 && len != 4) | ||
177 | goto out_invalid_option; | ||
178 | |||
179 | if (pkt_type == DCCP_PKT_DATA) | ||
180 | continue; | ||
181 | |||
182 | if (len == 2) | ||
183 | opt_recv->dccpor_elapsed_time = | ||
184 | ntohs(*(u16 *)value); | ||
185 | else | ||
186 | opt_recv->dccpor_elapsed_time = | ||
187 | ntohl(*(u32 *)value); | ||
188 | |||
189 | dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, | ||
190 | opt_recv->dccpor_elapsed_time); | ||
191 | break; | ||
192 | /* | ||
193 | * From draft-ietf-dccp-spec-11.txt: | ||
194 | * | ||
195 | * Option numbers 128 through 191 are for | ||
196 | * options sent from the HC-Sender to the | ||
197 | * HC-Receiver; option numbers 192 through 255 | ||
198 | * are for options sent from the HC-Receiver to | ||
199 | * the HC-Sender. | ||
200 | */ | ||
201 | case 128 ... 191: { | ||
202 | const u16 idx = value - options; | ||
203 | |||
204 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, | ||
205 | opt, len, idx, | ||
206 | value) != 0) | ||
207 | goto out_invalid_option; | ||
208 | } | ||
209 | break; | ||
210 | case 192 ... 255: { | ||
211 | const u16 idx = value - options; | ||
212 | |||
213 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | ||
214 | opt, len, idx, | ||
215 | value) != 0) | ||
216 | goto out_invalid_option; | ||
217 | } | ||
218 | break; | ||
219 | default: | ||
220 | pr_info("DCCP(%p): option %d(len=%d) not " | ||
221 | "implemented, ignoring\n", | ||
222 | sk, opt, len); | ||
223 | break; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | return 0; | ||
228 | |||
229 | out_invalid_option: | ||
230 | DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); | ||
231 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; | ||
232 | pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); | ||
233 | return -1; | ||
234 | } | ||
235 | |||
236 | static void dccp_encode_value_var(const u32 value, unsigned char *to, | ||
237 | const unsigned int len) | ||
238 | { | ||
239 | if (len > 3) | ||
240 | *to++ = (value & 0xFF000000) >> 24; | ||
241 | if (len > 2) | ||
242 | *to++ = (value & 0xFF0000) >> 16; | ||
243 | if (len > 1) | ||
244 | *to++ = (value & 0xFF00) >> 8; | ||
245 | if (len > 0) | ||
246 | *to++ = (value & 0xFF); | ||
247 | } | ||
248 | |||
249 | static inline int dccp_ndp_len(const int ndp) | ||
250 | { | ||
251 | return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; | ||
252 | } | ||
253 | |||
254 | void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
255 | const unsigned char option, | ||
256 | const void *value, const unsigned char len) | ||
257 | { | ||
258 | unsigned char *to; | ||
259 | |||
260 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { | ||
261 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " | ||
262 | "%d option!\n", option); | ||
263 | return; | ||
264 | } | ||
265 | |||
266 | DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; | ||
267 | |||
268 | to = skb_push(skb, len + 2); | ||
269 | *to++ = option; | ||
270 | *to++ = len + 2; | ||
271 | |||
272 | memcpy(to, value, len); | ||
273 | } | ||
274 | |||
275 | EXPORT_SYMBOL_GPL(dccp_insert_option); | ||
276 | |||
277 | static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) | ||
278 | { | ||
279 | struct dccp_sock *dp = dccp_sk(sk); | ||
280 | int ndp = dp->dccps_ndp_count; | ||
281 | |||
282 | if (dccp_non_data_packet(skb)) | ||
283 | ++dp->dccps_ndp_count; | ||
284 | else | ||
285 | dp->dccps_ndp_count = 0; | ||
286 | |||
287 | if (ndp > 0) { | ||
288 | unsigned char *ptr; | ||
289 | const int ndp_len = dccp_ndp_len(ndp); | ||
290 | const int len = ndp_len + 2; | ||
291 | |||
292 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
293 | return; | ||
294 | |||
295 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
296 | |||
297 | ptr = skb_push(skb, len); | ||
298 | *ptr++ = DCCPO_NDP_COUNT; | ||
299 | *ptr++ = len; | ||
300 | dccp_encode_value_var(ndp, ptr, ndp_len); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | static inline int dccp_elapsed_time_len(const u32 elapsed_time) | ||
305 | { | ||
306 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; | ||
307 | } | ||
308 | |||
309 | void dccp_insert_option_elapsed_time(struct sock *sk, | ||
310 | struct sk_buff *skb, | ||
311 | u32 elapsed_time) | ||
312 | { | ||
313 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
314 | struct dccp_sock *dp = dccp_sk(sk); | ||
315 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
316 | "CLIENT TX opt: " : "server TX opt: "; | ||
317 | #endif | ||
318 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
319 | const int len = 2 + elapsed_time_len; | ||
320 | unsigned char *to; | ||
321 | |||
322 | if (elapsed_time_len == 0) | ||
323 | return; | ||
324 | |||
325 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
326 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
327 | "insert elapsed time!\n"); | ||
328 | return; | ||
329 | } | ||
330 | |||
331 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
332 | |||
333 | to = skb_push(skb, len); | ||
334 | *to++ = DCCPO_ELAPSED_TIME; | ||
335 | *to++ = len; | ||
336 | |||
337 | if (elapsed_time_len == 2) { | ||
338 | const u16 var16 = htons((u16)elapsed_time); | ||
339 | memcpy(to, &var16, 2); | ||
340 | } else { | ||
341 | const u32 var32 = htonl(elapsed_time); | ||
342 | memcpy(to, &var32, 4); | ||
343 | } | ||
344 | |||
345 | dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", | ||
346 | debug_prefix, elapsed_time, | ||
347 | len, | ||
348 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
349 | } | ||
350 | |||
351 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | ||
352 | |||
353 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
354 | { | ||
355 | struct dccp_sock *dp = dccp_sk(sk); | ||
356 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
357 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
358 | "CLIENT TX opt: " : "server TX opt: "; | ||
359 | #endif | ||
360 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
361 | int len = ap->dccpap_buf_vector_len + 2; | ||
362 | const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; | ||
363 | unsigned char *to, *from; | ||
364 | |||
365 | if (elapsed_time != 0) | ||
366 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
367 | |||
368 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
369 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
370 | "insert ACK Vector!\n"); | ||
371 | return; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * XXX: now we have just one ack vector sent record, so | ||
376 | * we have to wait for it to be cleared. | ||
377 | * | ||
378 | * Of course this is not acceptable, but this is just for | ||
379 | * basic testing now. | ||
380 | */ | ||
381 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
382 | return; | ||
383 | |||
384 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
385 | |||
386 | to = skb_push(skb, len); | ||
387 | *to++ = DCCPO_ACK_VECTOR_0; | ||
388 | *to++ = len; | ||
389 | |||
390 | len = ap->dccpap_buf_vector_len; | ||
391 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
392 | |||
393 | /* Check if buf_head wraps */ | ||
394 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
395 | const unsigned int tailsize = (ap->dccpap_buf_len - | ||
396 | ap->dccpap_buf_head); | ||
397 | |||
398 | memcpy(to, from, tailsize); | ||
399 | to += tailsize; | ||
400 | len -= tailsize; | ||
401 | from = ap->dccpap_buf; | ||
402 | } | ||
403 | |||
404 | memcpy(to, from, len); | ||
405 | /* | ||
406 | * From draft-ietf-dccp-spec-11.txt: | ||
407 | * | ||
408 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
409 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
410 | * sequence number it used for the ack packet; ack_ptr will equal | ||
411 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
412 | * equal buf_nonce. | ||
413 | * | ||
414 | * This implemention uses just one ack record for now. | ||
415 | */ | ||
416 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
417 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
418 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
419 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
420 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
421 | |||
422 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
423 | "ack_ackno=%llu\n", | ||
424 | debug_prefix, ap->dccpap_ack_vector_len, | ||
425 | (unsigned long long) ap->dccpap_ack_seqno, | ||
426 | (unsigned long long) ap->dccpap_ack_ackno); | ||
427 | } | ||
428 | |||
429 | void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) | ||
430 | { | ||
431 | struct timeval tv; | ||
432 | u32 now; | ||
433 | |||
434 | do_gettimeofday(&tv); | ||
435 | now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; | ||
436 | /* yes this will overflow but that is the point as we want a | ||
437 | * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ | ||
438 | |||
439 | now = htonl(now); | ||
440 | dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | ||
441 | } | ||
442 | |||
443 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); | ||
444 | |||
445 | static void dccp_insert_option_timestamp_echo(struct sock *sk, | ||
446 | struct sk_buff *skb) | ||
447 | { | ||
448 | struct dccp_sock *dp = dccp_sk(sk); | ||
449 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
450 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
451 | "CLIENT TX opt: " : "server TX opt: "; | ||
452 | #endif | ||
453 | u32 tstamp_echo; | ||
454 | const u32 elapsed_time = | ||
455 | timeval_now_delta(&dp->dccps_timestamp_time) / 10; | ||
456 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
457 | const int len = 6 + elapsed_time_len; | ||
458 | unsigned char *to; | ||
459 | |||
460 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
461 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " | ||
462 | "timestamp echo!\n"); | ||
463 | return; | ||
464 | } | ||
465 | |||
466 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
467 | |||
468 | to = skb_push(skb, len); | ||
469 | *to++ = DCCPO_TIMESTAMP_ECHO; | ||
470 | *to++ = len; | ||
471 | |||
472 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
473 | memcpy(to, &tstamp_echo, 4); | ||
474 | to += 4; | ||
475 | |||
476 | if (elapsed_time_len == 2) { | ||
477 | const u16 var16 = htons((u16)elapsed_time); | ||
478 | memcpy(to, &var16, 2); | ||
479 | } else if (elapsed_time_len == 4) { | ||
480 | const u32 var32 = htonl(elapsed_time); | ||
481 | memcpy(to, &var32, 4); | ||
482 | } | ||
483 | |||
484 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", | ||
485 | debug_prefix, dp->dccps_timestamp_echo, | ||
486 | len, | ||
487 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
488 | |||
489 | dp->dccps_timestamp_echo = 0; | ||
490 | dp->dccps_timestamp_time.tv_sec = 0; | ||
491 | dp->dccps_timestamp_time.tv_usec = 0; | ||
492 | } | ||
493 | |||
494 | void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | ||
495 | { | ||
496 | struct dccp_sock *dp = dccp_sk(sk); | ||
497 | |||
498 | DCCP_SKB_CB(skb)->dccpd_opt_len = 0; | ||
499 | |||
500 | if (dp->dccps_options.dccpo_send_ndp_count) | ||
501 | dccp_insert_option_ndp(sk, skb); | ||
502 | |||
503 | if (!dccp_packet_without_ack(skb)) { | ||
504 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
505 | (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != | ||
506 | DCCP_MAX_SEQNO + 1)) | ||
507 | dccp_insert_option_ack_vector(sk, skb); | ||
508 | |||
509 | if (dp->dccps_timestamp_echo != 0) | ||
510 | dccp_insert_option_timestamp_echo(sk, skb); | ||
511 | } | ||
512 | |||
513 | ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); | ||
514 | ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); | ||
515 | |||
516 | /* XXX: insert other options when appropriate */ | ||
517 | |||
518 | if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { | ||
519 | /* The length of all options has to be a multiple of 4 */ | ||
520 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | ||
521 | |||
522 | if (padding != 0) { | ||
523 | padding = 4 - padding; | ||
524 | memset(skb_push(skb, padding), 0, padding); | ||
525 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
526 | } | ||
527 | } | ||
528 | } | ||
529 | |||
530 | struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, | ||
531 | const unsigned int __nocast priority) | ||
532 | { | ||
533 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
534 | |||
535 | if (ap != NULL) { | ||
536 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
537 | memset(ap->dccpap_buf, 0xFF, len); | ||
538 | #endif | ||
539 | ap->dccpap_buf_len = len; | ||
540 | ap->dccpap_buf_head = | ||
541 | ap->dccpap_buf_tail = | ||
542 | ap->dccpap_buf_len - 1; | ||
543 | ap->dccpap_buf_ackno = | ||
544 | ap->dccpap_ack_ackno = | ||
545 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
546 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
547 | ap->dccpap_ack_ptr = 0; | ||
548 | ap->dccpap_time.tv_sec = 0; | ||
549 | ap->dccpap_time.tv_usec = 0; | ||
550 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
551 | } | ||
552 | |||
553 | return ap; | ||
554 | } | ||
555 | |||
556 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
557 | { | ||
558 | if (ap != NULL) { | ||
559 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
560 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
561 | #endif | ||
562 | kfree(ap); | ||
563 | } | ||
564 | } | ||
565 | |||
566 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
567 | const unsigned int index) | ||
568 | { | ||
569 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
570 | } | ||
571 | |||
572 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
573 | const unsigned int index) | ||
574 | { | ||
575 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
580 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
581 | * this simplifies table updates if one of the missing packets arrives. | ||
582 | */ | ||
583 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
584 | const unsigned int packets, | ||
585 | const unsigned char state) | ||
586 | { | ||
587 | unsigned int gap; | ||
588 | signed long new_head; | ||
589 | |||
590 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
591 | return -ENOBUFS; | ||
592 | |||
593 | gap = packets - 1; | ||
594 | new_head = ap->dccpap_buf_head - packets; | ||
595 | |||
596 | if (new_head < 0) { | ||
597 | if (gap > 0) { | ||
598 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
599 | gap + new_head + 1); | ||
600 | gap = -new_head; | ||
601 | } | ||
602 | new_head += ap->dccpap_buf_len; | ||
603 | } | ||
604 | |||
605 | ap->dccpap_buf_head = new_head; | ||
606 | |||
607 | if (gap > 0) | ||
608 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
609 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
610 | |||
611 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
612 | ap->dccpap_buf_vector_len += packets; | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
618 | */ | ||
619 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) | ||
620 | { | ||
621 | /* | ||
622 | * Check at the right places if the buffer is full, if it is, tell the | ||
623 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
624 | * vectors, when we will free up space in dccpap_buf. | ||
625 | * | ||
626 | * We may well decide to do buffer compression, etc, but for now lets | ||
627 | * just drop. | ||
628 | * | ||
629 | * From Appendix A: | ||
630 | * | ||
631 | * Of course, the circular buffer may overflow, either when the | ||
632 | * HC-Sender is sending data at a very high rate, when the | ||
633 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
634 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
635 | * (so the HC-Receiver is unable to clean up old state). In this | ||
636 | * case, the HC-Receiver should either compress the buffer (by | ||
637 | * increasing run lengths when possible), transfer its state to | ||
638 | * a larger buffer, or, as a last resort, drop all received | ||
639 | * packets, without processing them whatsoever, until its buffer | ||
640 | * shrinks again. | ||
641 | */ | ||
642 | |||
643 | /* See if this is the first ackno being inserted */ | ||
644 | if (ap->dccpap_buf_vector_len == 0) { | ||
645 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
646 | ap->dccpap_buf_vector_len = 1; | ||
647 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
648 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, | ||
649 | ackno); | ||
650 | |||
651 | /* | ||
652 | * Look if the state of this packet is the same as the | ||
653 | * previous ackno and if so if we can bump the head len. | ||
654 | */ | ||
655 | if (delta == 1 && | ||
656 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
657 | (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < | ||
658 | DCCP_ACKPKTS_LEN_MASK)) | ||
659 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
660 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
661 | return -ENOBUFS; | ||
662 | } else { | ||
663 | /* | ||
664 | * A.1.2. Old Packets | ||
665 | * | ||
666 | * When a packet with Sequence Number S arrives, and | ||
667 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
668 | * for the byte corresponding to S. (Indexing structures | ||
669 | * could reduce the complexity of this scan.) | ||
670 | */ | ||
671 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
672 | unsigned int index = ap->dccpap_buf_head; | ||
673 | |||
674 | while (1) { | ||
675 | const u8 len = dccp_ackpkts_len(ap, index); | ||
676 | const u8 state = dccp_ackpkts_state(ap, index); | ||
677 | /* | ||
678 | * valid packets not yet in dccpap_buf have a reserved | ||
679 | * entry, with a len equal to 0. | ||
680 | */ | ||
681 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
682 | len == 0 && delta == 0) { /* Found our | ||
683 | reserved seat! */ | ||
684 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
685 | (unsigned long long) ackno); | ||
686 | ap->dccpap_buf[index] = state; | ||
687 | goto out; | ||
688 | } | ||
689 | /* len == 0 means one packet */ | ||
690 | if (delta < len + 1) | ||
691 | goto out_duplicate; | ||
692 | |||
693 | delta -= len + 1; | ||
694 | if (++index == ap->dccpap_buf_len) | ||
695 | index = 0; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | ap->dccpap_buf_ackno = ackno; | ||
700 | do_gettimeofday(&ap->dccpap_time); | ||
701 | out: | ||
702 | dccp_pr_debug(""); | ||
703 | dccp_ackpkts_print(ap); | ||
704 | return 0; | ||
705 | |||
706 | out_duplicate: | ||
707 | /* Duplicate packet */ | ||
708 | dccp_pr_debug("Received a dup or already considered lost " | ||
709 | "packet: %llu\n", (unsigned long long) ackno); | ||
710 | return -EILSEQ; | ||
711 | } | ||
712 | |||
713 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
714 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, | ||
715 | int len) | ||
716 | { | ||
717 | if (!dccp_debug) | ||
718 | return; | ||
719 | |||
720 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
721 | (unsigned long long) ackno); | ||
722 | |||
723 | while (len--) { | ||
724 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
725 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
726 | |||
727 | printk("%d,%d|", state, rl); | ||
728 | ++vector; | ||
729 | } | ||
730 | |||
731 | printk("\n"); | ||
732 | } | ||
733 | |||
734 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
735 | { | ||
736 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
737 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
738 | ap->dccpap_buf_vector_len); | ||
739 | } | ||
740 | #endif | ||
741 | |||
742 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
743 | { | ||
744 | /* | ||
745 | * As we're keeping track of the ack vector size | ||
746 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
747 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
748 | * keep this code here as in the future we'll implement a vector of | ||
749 | * ack records, as suggested in draft-ietf-dccp-spec-11.txt | ||
750 | * Appendix A. -acme | ||
751 | */ | ||
752 | #if 0 | ||
753 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
754 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
755 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
756 | #endif | ||
757 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
758 | } | ||
759 | |||
760 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
761 | u64 ackno) | ||
762 | { | ||
763 | /* Check if we actually sent an ACK vector */ | ||
764 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
765 | return; | ||
766 | |||
767 | if (ackno == ap->dccpap_ack_seqno) { | ||
768 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
769 | struct dccp_sock *dp = dccp_sk(sk); | ||
770 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
771 | "CLIENT rx ack: " : "server rx ack: "; | ||
772 | #endif | ||
773 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
774 | "ack_ackno=%llu, ACKED!\n", | ||
775 | debug_prefix, 1, | ||
776 | (unsigned long long) ap->dccpap_ack_seqno, | ||
777 | (unsigned long long) ap->dccpap_ack_ackno); | ||
778 | dccp_ackpkts_trow_away_ack_record(ap); | ||
779 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
784 | struct sock *sk, u64 ackno, | ||
785 | const unsigned char len, | ||
786 | const unsigned char *vector) | ||
787 | { | ||
788 | unsigned char i; | ||
789 | |||
790 | /* Check if we actually sent an ACK vector */ | ||
791 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
792 | return; | ||
793 | /* | ||
794 | * We're in the receiver half connection, so if the received an ACK | ||
795 | * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're | ||
796 | * not interested. | ||
797 | * | ||
798 | * Extra explanation with example: | ||
799 | * | ||
800 | * if we received an ACK vector with ackno 50, it can only be acking | ||
801 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
802 | */ | ||
803 | /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ | ||
804 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
805 | /* dccp_pr_debug_cat("yes\n"); */ | ||
806 | return; | ||
807 | } | ||
808 | /* dccp_pr_debug_cat("no\n"); */ | ||
809 | |||
810 | i = len; | ||
811 | while (i--) { | ||
812 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
813 | u64 ackno_end_rl; | ||
814 | |||
815 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
816 | |||
817 | /* | ||
818 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
819 | * ap->dccpap_ack_seqno, ackno); | ||
820 | */ | ||
821 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
822 | const u8 state = (*vector & | ||
823 | DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
824 | /* dccp_pr_debug_cat("yes\n"); */ | ||
825 | |||
826 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
827 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
828 | struct dccp_sock *dp = dccp_sk(sk); | ||
829 | const char *debug_prefix = | ||
830 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
831 | "CLIENT rx ack: " : "server rx ack: "; | ||
832 | #endif | ||
833 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
834 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
835 | "ACKED!\n", | ||
836 | debug_prefix, len, | ||
837 | (unsigned long long) | ||
838 | ap->dccpap_ack_seqno, | ||
839 | (unsigned long long) | ||
840 | ap->dccpap_ack_ackno); | ||
841 | dccp_ackpkts_trow_away_ack_record(ap); | ||
842 | } | ||
843 | /* | ||
844 | * If dccpap_ack_seqno was not received, no problem | ||
845 | * we'll send another ACK vector. | ||
846 | */ | ||
847 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
848 | break; | ||
849 | } | ||
850 | /* dccp_pr_debug_cat("no\n"); */ | ||
851 | |||
852 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
853 | ++vector; | ||
854 | } | ||
855 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..28de157a4326 --- /dev/null +++ b/net/dccp/output.c | |||
@@ -0,0 +1,528 @@ | |||
1 | /* | ||
2 | * net/dccp/output.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static inline void dccp_event_ack_sent(struct sock *sk) | ||
23 | { | ||
24 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * All SKB's seen here are completely headerless. It is our | ||
29 | * job to build the DCCP header, and pass the packet down to | ||
30 | * IP so it can do the same plus pass the packet off to the | ||
31 | * device. | ||
32 | */ | ||
33 | int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | ||
34 | { | ||
35 | if (likely(skb != NULL)) { | ||
36 | const struct inet_sock *inet = inet_sk(sk); | ||
37 | struct dccp_sock *dp = dccp_sk(sk); | ||
38 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
39 | struct dccp_hdr *dh; | ||
40 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
41 | const int dccp_header_size = sizeof(*dh) + | ||
42 | sizeof(struct dccp_hdr_ext) + | ||
43 | dccp_packet_hdr_len(dcb->dccpd_type); | ||
44 | int err, set_ack = 1; | ||
45 | u64 ackno = dp->dccps_gsr; | ||
46 | |||
47 | dccp_inc_seqno(&dp->dccps_gss); | ||
48 | |||
49 | switch (dcb->dccpd_type) { | ||
50 | case DCCP_PKT_DATA: | ||
51 | set_ack = 0; | ||
52 | break; | ||
53 | case DCCP_PKT_SYNC: | ||
54 | case DCCP_PKT_SYNCACK: | ||
55 | ackno = dcb->dccpd_seq; | ||
56 | break; | ||
57 | } | ||
58 | |||
59 | dcb->dccpd_seq = dp->dccps_gss; | ||
60 | dccp_insert_options(sk, skb); | ||
61 | |||
62 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
63 | dh = dccp_hdr(skb); | ||
64 | /* | ||
65 | * Data packets are not cloned as they are never retransmitted | ||
66 | */ | ||
67 | if (skb_cloned(skb)) | ||
68 | skb_set_owner_w(skb, sk); | ||
69 | |||
70 | /* Build DCCP header and checksum it. */ | ||
71 | memset(dh, 0, dccp_header_size); | ||
72 | dh->dccph_type = dcb->dccpd_type; | ||
73 | dh->dccph_sport = inet->sport; | ||
74 | dh->dccph_dport = inet->dport; | ||
75 | dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; | ||
76 | dh->dccph_ccval = dcb->dccpd_ccval; | ||
77 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
78 | dh->dccph_x = 1; | ||
79 | |||
80 | dp->dccps_awh = dp->dccps_gss; | ||
81 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
82 | if (set_ack) | ||
83 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); | ||
84 | |||
85 | switch (dcb->dccpd_type) { | ||
86 | case DCCP_PKT_REQUEST: | ||
87 | dccp_hdr_request(skb)->dccph_req_service = | ||
88 | dcb->dccpd_service; | ||
89 | break; | ||
90 | case DCCP_PKT_RESET: | ||
91 | dccp_hdr_reset(skb)->dccph_reset_code = | ||
92 | dcb->dccpd_reset_code; | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, | ||
97 | inet->daddr); | ||
98 | |||
99 | if (set_ack) | ||
100 | dccp_event_ack_sent(sk); | ||
101 | |||
102 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
103 | |||
104 | err = ip_queue_xmit(skb, 0); | ||
105 | if (err <= 0) | ||
106 | return err; | ||
107 | |||
108 | /* NET_XMIT_CN is special. It does not guarantee, | ||
109 | * that this packet is lost. It tells that device | ||
110 | * is about to start to drop packets or already | ||
111 | * drops some packets of the same priority and | ||
112 | * invokes us to send less aggressively. | ||
113 | */ | ||
114 | return err == NET_XMIT_CN ? 0 : err; | ||
115 | } | ||
116 | return -ENOBUFS; | ||
117 | } | ||
118 | |||
119 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | ||
120 | { | ||
121 | struct dccp_sock *dp = dccp_sk(sk); | ||
122 | int mss_now; | ||
123 | |||
124 | /* | ||
125 | * FIXME: we really should be using the af_specific thing to support | ||
126 | * IPv6. | ||
127 | * mss_now = pmtu - tp->af_specific->net_header_len - | ||
128 | * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
129 | */ | ||
130 | mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - | ||
131 | sizeof(struct dccp_hdr_ext); | ||
132 | |||
133 | /* Now subtract optional transport overhead */ | ||
134 | mss_now -= dp->dccps_ext_header_len; | ||
135 | |||
136 | /* | ||
137 | * FIXME: this should come from the CCID infrastructure, where, say, | ||
138 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | ||
139 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | ||
140 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | ||
141 | * make it a multiple of 4 | ||
142 | */ | ||
143 | |||
144 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | ||
145 | |||
146 | /* And store cached results */ | ||
147 | dp->dccps_pmtu_cookie = pmtu; | ||
148 | dp->dccps_mss_cache = mss_now; | ||
149 | |||
150 | return mss_now; | ||
151 | } | ||
152 | |||
153 | void dccp_write_space(struct sock *sk) | ||
154 | { | ||
155 | read_lock(&sk->sk_callback_lock); | ||
156 | |||
157 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
158 | wake_up_interruptible(sk->sk_sleep); | ||
159 | /* Should agree with poll, otherwise some programs break */ | ||
160 | if (sock_writeable(sk)) | ||
161 | sk_wake_async(sk, 2, POLL_OUT); | ||
162 | |||
163 | read_unlock(&sk->sk_callback_lock); | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet | ||
168 | * @sk: socket to wait for | ||
169 | * @timeo: for how long | ||
170 | */ | ||
171 | static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, | ||
172 | long *timeo) | ||
173 | { | ||
174 | struct dccp_sock *dp = dccp_sk(sk); | ||
175 | DEFINE_WAIT(wait); | ||
176 | long delay; | ||
177 | int rc; | ||
178 | |||
179 | while (1) { | ||
180 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
181 | |||
182 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | ||
183 | goto do_error; | ||
184 | if (!*timeo) | ||
185 | goto do_nonblock; | ||
186 | if (signal_pending(current)) | ||
187 | goto do_interrupted; | ||
188 | |||
189 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | ||
190 | skb->len); | ||
191 | if (rc <= 0) | ||
192 | break; | ||
193 | delay = msecs_to_jiffies(rc); | ||
194 | if (delay > *timeo || delay < 0) | ||
195 | goto do_nonblock; | ||
196 | |||
197 | sk->sk_write_pending++; | ||
198 | release_sock(sk); | ||
199 | *timeo -= schedule_timeout(delay); | ||
200 | lock_sock(sk); | ||
201 | sk->sk_write_pending--; | ||
202 | } | ||
203 | out: | ||
204 | finish_wait(sk->sk_sleep, &wait); | ||
205 | return rc; | ||
206 | |||
207 | do_error: | ||
208 | rc = -EPIPE; | ||
209 | goto out; | ||
210 | do_nonblock: | ||
211 | rc = -EAGAIN; | ||
212 | goto out; | ||
213 | do_interrupted: | ||
214 | rc = sock_intr_errno(*timeo); | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | ||
219 | { | ||
220 | const struct dccp_sock *dp = dccp_sk(sk); | ||
221 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | ||
222 | skb->len); | ||
223 | |||
224 | if (err > 0) | ||
225 | err = dccp_wait_for_ccid(sk, skb, timeo); | ||
226 | |||
227 | if (err == 0) { | ||
228 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
230 | const int len = skb->len; | ||
231 | |||
232 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
233 | /* See 8.1.5. Handshake Completion */ | ||
234 | inet_csk_schedule_ack(sk); | ||
235 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
236 | inet_csk(sk)->icsk_rto, | ||
237 | DCCP_RTO_MAX); | ||
238 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
239 | /* | ||
240 | * FIXME: we really should have a | ||
241 | * dccps_ack_pending or use icsk. | ||
242 | */ | ||
243 | } else if (inet_csk_ack_scheduled(sk) || | ||
244 | dp->dccps_timestamp_echo != 0 || | ||
245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
249 | else | ||
250 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
251 | |||
252 | err = dccp_transmit_skb(sk, skb); | ||
253 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | ||
254 | } | ||
255 | |||
256 | return err; | ||
257 | } | ||
258 | |||
259 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | ||
260 | { | ||
261 | if (inet_sk_rebuild_header(sk) != 0) | ||
262 | return -EHOSTUNREACH; /* Routing failure or similar. */ | ||
263 | |||
264 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? | ||
265 | pskb_copy(skb, GFP_ATOMIC): | ||
266 | skb_clone(skb, GFP_ATOMIC))); | ||
267 | } | ||
268 | |||
269 | struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | ||
270 | struct request_sock *req) | ||
271 | { | ||
272 | struct dccp_hdr *dh; | ||
273 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
274 | sizeof(struct dccp_hdr_ext) + | ||
275 | sizeof(struct dccp_hdr_response); | ||
276 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
277 | dccp_header_size, 1, | ||
278 | GFP_ATOMIC); | ||
279 | if (skb == NULL) | ||
280 | return NULL; | ||
281 | |||
282 | /* Reserve space for headers. */ | ||
283 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
284 | |||
285 | skb->dst = dst_clone(dst); | ||
286 | skb->csum = 0; | ||
287 | |||
288 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | ||
289 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | ||
290 | dccp_insert_options(sk, skb); | ||
291 | |||
292 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
293 | |||
294 | dh = dccp_hdr(skb); | ||
295 | memset(dh, 0, dccp_header_size); | ||
296 | |||
297 | dh->dccph_sport = inet_sk(sk)->sport; | ||
298 | dh->dccph_dport = inet_rsk(req)->rmt_port; | ||
299 | dh->dccph_doff = (dccp_header_size + | ||
300 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
301 | dh->dccph_type = DCCP_PKT_RESPONSE; | ||
302 | dh->dccph_x = 1; | ||
303 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | ||
304 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | ||
305 | |||
306 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, | ||
307 | inet_rsk(req)->rmt_addr); | ||
308 | |||
309 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
310 | return skb; | ||
311 | } | ||
312 | |||
313 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | ||
314 | const enum dccp_reset_codes code) | ||
315 | |||
316 | { | ||
317 | struct dccp_hdr *dh; | ||
318 | struct dccp_sock *dp = dccp_sk(sk); | ||
319 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
320 | sizeof(struct dccp_hdr_ext) + | ||
321 | sizeof(struct dccp_hdr_reset); | ||
322 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
323 | dccp_header_size, 1, | ||
324 | GFP_ATOMIC); | ||
325 | if (skb == NULL) | ||
326 | return NULL; | ||
327 | |||
328 | /* Reserve space for headers. */ | ||
329 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
330 | |||
331 | skb->dst = dst_clone(dst); | ||
332 | skb->csum = 0; | ||
333 | |||
334 | dccp_inc_seqno(&dp->dccps_gss); | ||
335 | |||
336 | DCCP_SKB_CB(skb)->dccpd_reset_code = code; | ||
337 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; | ||
338 | DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; | ||
339 | dccp_insert_options(sk, skb); | ||
340 | |||
341 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
342 | |||
343 | dh = dccp_hdr(skb); | ||
344 | memset(dh, 0, dccp_header_size); | ||
345 | |||
346 | dh->dccph_sport = inet_sk(sk)->sport; | ||
347 | dh->dccph_dport = inet_sk(sk)->dport; | ||
348 | dh->dccph_doff = (dccp_header_size + | ||
349 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
350 | dh->dccph_type = DCCP_PKT_RESET; | ||
351 | dh->dccph_x = 1; | ||
352 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
353 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); | ||
354 | |||
355 | dccp_hdr_reset(skb)->dccph_reset_code = code; | ||
356 | |||
357 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr, | ||
358 | inet_sk(sk)->daddr); | ||
359 | |||
360 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
361 | return skb; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Do all connect socket setups that can be done AF independent. | ||
366 | */ | ||
367 | static inline void dccp_connect_init(struct sock *sk) | ||
368 | { | ||
369 | struct dst_entry *dst = __sk_dst_get(sk); | ||
370 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
371 | |||
372 | sk->sk_err = 0; | ||
373 | sock_reset_flag(sk, SOCK_DONE); | ||
374 | |||
375 | dccp_sync_mss(sk, dst_mtu(dst)); | ||
376 | |||
377 | /* | ||
378 | * FIXME: set dp->{dccps_swh,dccps_swl}, with | ||
379 | * something like dccp_inc_seq | ||
380 | */ | ||
381 | |||
382 | icsk->icsk_retransmits = 0; | ||
383 | } | ||
384 | |||
385 | int dccp_connect(struct sock *sk) | ||
386 | { | ||
387 | struct sk_buff *skb; | ||
388 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
389 | |||
390 | dccp_connect_init(sk); | ||
391 | |||
392 | skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); | ||
393 | if (unlikely(skb == NULL)) | ||
394 | return -ENOBUFS; | ||
395 | |||
396 | /* Reserve space for headers. */ | ||
397 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
398 | |||
399 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | ||
400 | /* FIXME: set service to something meaningful, coming | ||
401 | * from userspace*/ | ||
402 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
403 | skb->csum = 0; | ||
404 | skb_set_owner_w(skb, sk); | ||
405 | |||
406 | BUG_TRAP(sk->sk_send_head == NULL); | ||
407 | sk->sk_send_head = skb; | ||
408 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
409 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | ||
410 | |||
411 | /* Timer for repeating the REQUEST until an answer. */ | ||
412 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
413 | icsk->icsk_rto, DCCP_RTO_MAX); | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | void dccp_send_ack(struct sock *sk) | ||
418 | { | ||
419 | /* If we have been reset, we may not send again. */ | ||
420 | if (sk->sk_state != DCCP_CLOSED) { | ||
421 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
422 | |||
423 | if (skb == NULL) { | ||
424 | inet_csk_schedule_ack(sk); | ||
425 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | ||
426 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
427 | TCP_DELACK_MAX, | ||
428 | DCCP_RTO_MAX); | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | /* Reserve space for headers */ | ||
433 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
434 | skb->csum = 0; | ||
435 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; | ||
436 | skb_set_owner_w(skb, sk); | ||
437 | dccp_transmit_skb(sk, skb); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | EXPORT_SYMBOL_GPL(dccp_send_ack); | ||
442 | |||
443 | void dccp_send_delayed_ack(struct sock *sk) | ||
444 | { | ||
445 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
446 | /* | ||
447 | * FIXME: tune this timer. elapsed time fixes the skew, so no problem | ||
448 | * with using 2s, and active senders also piggyback the ACK into a | ||
449 | * DATAACK packet, so this is really for quiescent senders. | ||
450 | */ | ||
451 | unsigned long timeout = jiffies + 2 * HZ; | ||
452 | |||
453 | /* Use new timeout only if there wasn't a older one earlier. */ | ||
454 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { | ||
455 | /* If delack timer was blocked or is about to expire, | ||
456 | * send ACK now. | ||
457 | * | ||
458 | * FIXME: check the "about to expire" part | ||
459 | */ | ||
460 | if (icsk->icsk_ack.blocked) { | ||
461 | dccp_send_ack(sk); | ||
462 | return; | ||
463 | } | ||
464 | |||
465 | if (!time_before(timeout, icsk->icsk_ack.timeout)) | ||
466 | timeout = icsk->icsk_ack.timeout; | ||
467 | } | ||
468 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; | ||
469 | icsk->icsk_ack.timeout = timeout; | ||
470 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); | ||
471 | } | ||
472 | |||
473 | void dccp_send_sync(struct sock *sk, const u64 seq, | ||
474 | const enum dccp_pkt_type pkt_type) | ||
475 | { | ||
476 | /* | ||
477 | * We are not putting this on the write queue, so | ||
478 | * dccp_transmit_skb() will set the ownership to this | ||
479 | * sock. | ||
480 | */ | ||
481 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
482 | |||
483 | if (skb == NULL) | ||
484 | /* FIXME: how to make sure the sync is sent? */ | ||
485 | return; | ||
486 | |||
487 | /* Reserve space for headers and prepare control bits. */ | ||
488 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
489 | skb->csum = 0; | ||
490 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | ||
491 | DCCP_SKB_CB(skb)->dccpd_seq = seq; | ||
492 | |||
493 | skb_set_owner_w(skb, sk); | ||
494 | dccp_transmit_skb(sk, skb); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This | ||
499 | * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under | ||
500 | * any circumstances. | ||
501 | */ | ||
502 | void dccp_send_close(struct sock *sk, const int active) | ||
503 | { | ||
504 | struct dccp_sock *dp = dccp_sk(sk); | ||
505 | struct sk_buff *skb; | ||
506 | const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; | ||
507 | |||
508 | skb = alloc_skb(sk->sk_prot->max_header, prio); | ||
509 | if (skb == NULL) | ||
510 | return; | ||
511 | |||
512 | /* Reserve space for headers and prepare control bits. */ | ||
513 | skb_reserve(skb, sk->sk_prot->max_header); | ||
514 | skb->csum = 0; | ||
515 | DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
516 | DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | ||
517 | |||
518 | skb_set_owner_w(skb, sk); | ||
519 | if (active) { | ||
520 | BUG_TRAP(sk->sk_send_head == NULL); | ||
521 | sk->sk_send_head = skb; | ||
522 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | ||
523 | } else | ||
524 | dccp_transmit_skb(sk, skb); | ||
525 | |||
526 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
527 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
528 | } | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..18a0e69c9dc7 --- /dev/null +++ b/net/dccp/proto.c | |||
@@ -0,0 +1,826 @@ | |||
1 | /* | ||
2 | * net/dccp/proto.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/dccp.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/netdevice.h> | ||
20 | #include <linux/in.h> | ||
21 | #include <linux/if_arp.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/random.h> | ||
24 | #include <net/checksum.h> | ||
25 | |||
26 | #include <net/inet_common.h> | ||
27 | #include <net/ip.h> | ||
28 | #include <net/protocol.h> | ||
29 | #include <net/sock.h> | ||
30 | #include <net/xfrm.h> | ||
31 | |||
32 | #include <asm/semaphore.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/timer.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/poll.h> | ||
37 | #include <linux/dccp.h> | ||
38 | |||
39 | #include "ccid.h" | ||
40 | #include "dccp.h" | ||
41 | |||
42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; | ||
43 | |||
44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | ||
45 | |||
46 | static struct net_protocol dccp_protocol = { | ||
47 | .handler = dccp_v4_rcv, | ||
48 | .err_handler = dccp_v4_err, | ||
49 | }; | ||
50 | |||
51 | const char *dccp_packet_name(const int type) | ||
52 | { | ||
53 | static const char *dccp_packet_names[] = { | ||
54 | [DCCP_PKT_REQUEST] = "REQUEST", | ||
55 | [DCCP_PKT_RESPONSE] = "RESPONSE", | ||
56 | [DCCP_PKT_DATA] = "DATA", | ||
57 | [DCCP_PKT_ACK] = "ACK", | ||
58 | [DCCP_PKT_DATAACK] = "DATAACK", | ||
59 | [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", | ||
60 | [DCCP_PKT_CLOSE] = "CLOSE", | ||
61 | [DCCP_PKT_RESET] = "RESET", | ||
62 | [DCCP_PKT_SYNC] = "SYNC", | ||
63 | [DCCP_PKT_SYNCACK] = "SYNCACK", | ||
64 | }; | ||
65 | |||
66 | if (type >= DCCP_NR_PKT_TYPES) | ||
67 | return "INVALID"; | ||
68 | else | ||
69 | return dccp_packet_names[type]; | ||
70 | } | ||
71 | |||
72 | EXPORT_SYMBOL_GPL(dccp_packet_name); | ||
73 | |||
74 | const char *dccp_state_name(const int state) | ||
75 | { | ||
76 | static char *dccp_state_names[] = { | ||
77 | [DCCP_OPEN] = "OPEN", | ||
78 | [DCCP_REQUESTING] = "REQUESTING", | ||
79 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
80 | [DCCP_LISTEN] = "LISTEN", | ||
81 | [DCCP_RESPOND] = "RESPOND", | ||
82 | [DCCP_CLOSING] = "CLOSING", | ||
83 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
84 | [DCCP_CLOSED] = "CLOSED", | ||
85 | }; | ||
86 | |||
87 | if (state >= DCCP_MAX_STATES) | ||
88 | return "INVALID STATE!"; | ||
89 | else | ||
90 | return dccp_state_names[state]; | ||
91 | } | ||
92 | |||
93 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
94 | |||
95 | static inline int dccp_listen_start(struct sock *sk) | ||
96 | { | ||
97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | ||
98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | ||
99 | } | ||
100 | |||
101 | int dccp_disconnect(struct sock *sk, int flags) | ||
102 | { | ||
103 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
104 | struct inet_sock *inet = inet_sk(sk); | ||
105 | int err = 0; | ||
106 | const int old_state = sk->sk_state; | ||
107 | |||
108 | if (old_state != DCCP_CLOSED) | ||
109 | dccp_set_state(sk, DCCP_CLOSED); | ||
110 | |||
111 | /* ABORT function of RFC793 */ | ||
112 | if (old_state == DCCP_LISTEN) { | ||
113 | inet_csk_listen_stop(sk); | ||
114 | /* FIXME: do the active reset thing */ | ||
115 | } else if (old_state == DCCP_REQUESTING) | ||
116 | sk->sk_err = ECONNRESET; | ||
117 | |||
118 | dccp_clear_xmit_timers(sk); | ||
119 | __skb_queue_purge(&sk->sk_receive_queue); | ||
120 | if (sk->sk_send_head != NULL) { | ||
121 | __kfree_skb(sk->sk_send_head); | ||
122 | sk->sk_send_head = NULL; | ||
123 | } | ||
124 | |||
125 | inet->dport = 0; | ||
126 | |||
127 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | ||
128 | inet_reset_saddr(sk); | ||
129 | |||
130 | sk->sk_shutdown = 0; | ||
131 | sock_reset_flag(sk, SOCK_DONE); | ||
132 | |||
133 | icsk->icsk_backoff = 0; | ||
134 | inet_csk_delack_init(sk); | ||
135 | __sk_dst_reset(sk); | ||
136 | |||
137 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); | ||
138 | |||
139 | sk->sk_error_report(sk); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Wait for a DCCP event. | ||
145 | * | ||
146 | * Note that we don't need to lock the socket, as the upper poll layers | ||
147 | * take care of normal races (between the test and the event) and we don't | ||
148 | * go look at any of the socket buffers directly. | ||
149 | */ | ||
150 | static unsigned int dccp_poll(struct file *file, struct socket *sock, | ||
151 | poll_table *wait) | ||
152 | { | ||
153 | unsigned int mask; | ||
154 | struct sock *sk = sock->sk; | ||
155 | |||
156 | poll_wait(file, sk->sk_sleep, wait); | ||
157 | if (sk->sk_state == DCCP_LISTEN) | ||
158 | return inet_csk_listen_poll(sk); | ||
159 | |||
160 | /* Socket is not locked. We are protected from async events | ||
161 | by poll logic and correct handling of state changes | ||
162 | made by another threads is impossible in any case. | ||
163 | */ | ||
164 | |||
165 | mask = 0; | ||
166 | if (sk->sk_err) | ||
167 | mask = POLLERR; | ||
168 | |||
169 | if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) | ||
170 | mask |= POLLHUP; | ||
171 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
172 | mask |= POLLIN | POLLRDNORM; | ||
173 | |||
174 | /* Connected? */ | ||
175 | if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { | ||
176 | if (atomic_read(&sk->sk_rmem_alloc) > 0) | ||
177 | mask |= POLLIN | POLLRDNORM; | ||
178 | |||
179 | if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { | ||
180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
181 | mask |= POLLOUT | POLLWRNORM; | ||
182 | } else { /* send SIGIO later */ | ||
183 | set_bit(SOCK_ASYNC_NOSPACE, | ||
184 | &sk->sk_socket->flags); | ||
185 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
186 | |||
187 | /* Race breaker. If space is freed after | ||
188 | * wspace test but before the flags are set, | ||
189 | * IO signal will be lost. | ||
190 | */ | ||
191 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) | ||
192 | mask |= POLLOUT | POLLWRNORM; | ||
193 | } | ||
194 | } | ||
195 | } | ||
196 | return mask; | ||
197 | } | ||
198 | |||
199 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | ||
200 | { | ||
201 | dccp_pr_debug("entry\n"); | ||
202 | return -ENOIOCTLCMD; | ||
203 | } | ||
204 | |||
205 | int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
206 | char __user *optval, int optlen) | ||
207 | { | ||
208 | struct dccp_sock *dp; | ||
209 | int err; | ||
210 | int val; | ||
211 | |||
212 | if (level != SOL_DCCP) | ||
213 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
214 | |||
215 | if (optlen < sizeof(int)) | ||
216 | return -EINVAL; | ||
217 | |||
218 | if (get_user(val, (int __user *)optval)) | ||
219 | return -EFAULT; | ||
220 | |||
221 | lock_sock(sk); | ||
222 | |||
223 | dp = dccp_sk(sk); | ||
224 | err = 0; | ||
225 | |||
226 | switch (optname) { | ||
227 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
228 | dp->dccps_packet_size = val; | ||
229 | break; | ||
230 | default: | ||
231 | err = -ENOPROTOOPT; | ||
232 | break; | ||
233 | } | ||
234 | |||
235 | release_sock(sk); | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
240 | char __user *optval, int __user *optlen) | ||
241 | { | ||
242 | struct dccp_sock *dp; | ||
243 | int val, len; | ||
244 | |||
245 | if (level != SOL_DCCP) | ||
246 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
247 | |||
248 | if (get_user(len, optlen)) | ||
249 | return -EFAULT; | ||
250 | |||
251 | len = min_t(unsigned int, len, sizeof(int)); | ||
252 | if (len < 0) | ||
253 | return -EINVAL; | ||
254 | |||
255 | dp = dccp_sk(sk); | ||
256 | |||
257 | switch (optname) { | ||
258 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
259 | val = dp->dccps_packet_size; | ||
260 | break; | ||
261 | default: | ||
262 | return -ENOPROTOOPT; | ||
263 | } | ||
264 | |||
265 | if (put_user(len, optlen) || copy_to_user(optval, &val, len)) | ||
266 | return -EFAULT; | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
272 | size_t len) | ||
273 | { | ||
274 | const struct dccp_sock *dp = dccp_sk(sk); | ||
275 | const int flags = msg->msg_flags; | ||
276 | const int noblock = flags & MSG_DONTWAIT; | ||
277 | struct sk_buff *skb; | ||
278 | int rc, size; | ||
279 | long timeo; | ||
280 | |||
281 | if (len > dp->dccps_mss_cache) | ||
282 | return -EMSGSIZE; | ||
283 | |||
284 | lock_sock(sk); | ||
285 | timeo = sock_sndtimeo(sk, noblock); | ||
286 | |||
287 | /* | ||
288 | * We have to use sk_stream_wait_connect here to set sk_write_pending, | ||
289 | * so that the trick in dccp_rcv_request_sent_state_process. | ||
290 | */ | ||
291 | /* Wait for a connection to finish. */ | ||
292 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) | ||
293 | if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) | ||
294 | goto out_release; | ||
295 | |||
296 | size = sk->sk_prot->max_header + len; | ||
297 | release_sock(sk); | ||
298 | skb = sock_alloc_send_skb(sk, size, noblock, &rc); | ||
299 | lock_sock(sk); | ||
300 | if (skb == NULL) | ||
301 | goto out_release; | ||
302 | |||
303 | skb_reserve(skb, sk->sk_prot->max_header); | ||
304 | rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
305 | if (rc != 0) | ||
306 | goto out_discard; | ||
307 | |||
308 | rc = dccp_write_xmit(sk, skb, &timeo); | ||
309 | /* | ||
310 | * XXX we don't use sk_write_queue, so just discard the packet. | ||
311 | * Current plan however is to _use_ sk_write_queue with | ||
312 | * an algorith similar to tcp_sendmsg, where the main difference | ||
313 | * is that in DCCP we have to respect packet boundaries, so | ||
314 | * no coalescing of skbs. | ||
315 | * | ||
316 | * This bug was _quickly_ found & fixed by just looking at an OSTRA | ||
317 | * generated callgraph 8) -acme | ||
318 | */ | ||
319 | if (rc != 0) | ||
320 | goto out_discard; | ||
321 | out_release: | ||
322 | release_sock(sk); | ||
323 | return rc ? : len; | ||
324 | out_discard: | ||
325 | kfree_skb(skb); | ||
326 | goto out_release; | ||
327 | } | ||
328 | |||
329 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
330 | size_t len, int nonblock, int flags, int *addr_len) | ||
331 | { | ||
332 | const struct dccp_hdr *dh; | ||
333 | long timeo; | ||
334 | |||
335 | lock_sock(sk); | ||
336 | |||
337 | if (sk->sk_state == DCCP_LISTEN) { | ||
338 | len = -ENOTCONN; | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | timeo = sock_rcvtimeo(sk, nonblock); | ||
343 | |||
344 | do { | ||
345 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | ||
346 | |||
347 | if (skb == NULL) | ||
348 | goto verify_sock_status; | ||
349 | |||
350 | dh = dccp_hdr(skb); | ||
351 | |||
352 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
353 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
354 | goto found_ok_skb; | ||
355 | |||
356 | if (dh->dccph_type == DCCP_PKT_RESET || | ||
357 | dh->dccph_type == DCCP_PKT_CLOSE) { | ||
358 | dccp_pr_debug("found fin ok!\n"); | ||
359 | len = 0; | ||
360 | goto found_fin_ok; | ||
361 | } | ||
362 | dccp_pr_debug("packet_type=%s\n", | ||
363 | dccp_packet_name(dh->dccph_type)); | ||
364 | sk_eat_skb(sk, skb); | ||
365 | verify_sock_status: | ||
366 | if (sock_flag(sk, SOCK_DONE)) { | ||
367 | len = 0; | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | if (sk->sk_err) { | ||
372 | len = sock_error(sk); | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | ||
377 | len = 0; | ||
378 | break; | ||
379 | } | ||
380 | |||
381 | if (sk->sk_state == DCCP_CLOSED) { | ||
382 | if (!sock_flag(sk, SOCK_DONE)) { | ||
383 | /* This occurs when user tries to read | ||
384 | * from never connected socket. | ||
385 | */ | ||
386 | len = -ENOTCONN; | ||
387 | break; | ||
388 | } | ||
389 | len = 0; | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | if (!timeo) { | ||
394 | len = -EAGAIN; | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | if (signal_pending(current)) { | ||
399 | len = sock_intr_errno(timeo); | ||
400 | break; | ||
401 | } | ||
402 | |||
403 | sk_wait_data(sk, &timeo); | ||
404 | continue; | ||
405 | found_ok_skb: | ||
406 | if (len > skb->len) | ||
407 | len = skb->len; | ||
408 | else if (len < skb->len) | ||
409 | msg->msg_flags |= MSG_TRUNC; | ||
410 | |||
411 | if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { | ||
412 | /* Exception. Bailout! */ | ||
413 | len = -EFAULT; | ||
414 | break; | ||
415 | } | ||
416 | found_fin_ok: | ||
417 | if (!(flags & MSG_PEEK)) | ||
418 | sk_eat_skb(sk, skb); | ||
419 | break; | ||
420 | } while (1); | ||
421 | out: | ||
422 | release_sock(sk); | ||
423 | return len; | ||
424 | } | ||
425 | |||
426 | static int inet_dccp_listen(struct socket *sock, int backlog) | ||
427 | { | ||
428 | struct sock *sk = sock->sk; | ||
429 | unsigned char old_state; | ||
430 | int err; | ||
431 | |||
432 | lock_sock(sk); | ||
433 | |||
434 | err = -EINVAL; | ||
435 | if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) | ||
436 | goto out; | ||
437 | |||
438 | old_state = sk->sk_state; | ||
439 | if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) | ||
440 | goto out; | ||
441 | |||
442 | /* Really, if the socket is already in listen state | ||
443 | * we can only allow the backlog to be adjusted. | ||
444 | */ | ||
445 | if (old_state != DCCP_LISTEN) { | ||
446 | /* | ||
447 | * FIXME: here it probably should be sk->sk_prot->listen_start | ||
448 | * see tcp_listen_start | ||
449 | */ | ||
450 | err = dccp_listen_start(sk); | ||
451 | if (err) | ||
452 | goto out; | ||
453 | } | ||
454 | sk->sk_max_ack_backlog = backlog; | ||
455 | err = 0; | ||
456 | |||
457 | out: | ||
458 | release_sock(sk); | ||
459 | return err; | ||
460 | } | ||
461 | |||
462 | static const unsigned char dccp_new_state[] = { | ||
463 | /* current state: new state: action: */ | ||
464 | [0] = DCCP_CLOSED, | ||
465 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
466 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
467 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
468 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
469 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
470 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
471 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
472 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
473 | }; | ||
474 | |||
475 | static int dccp_close_state(struct sock *sk) | ||
476 | { | ||
477 | const int next = dccp_new_state[sk->sk_state]; | ||
478 | const int ns = next & DCCP_STATE_MASK; | ||
479 | |||
480 | if (ns != sk->sk_state) | ||
481 | dccp_set_state(sk, ns); | ||
482 | |||
483 | return next & DCCP_ACTION_FIN; | ||
484 | } | ||
485 | |||
486 | void dccp_close(struct sock *sk, long timeout) | ||
487 | { | ||
488 | struct sk_buff *skb; | ||
489 | |||
490 | lock_sock(sk); | ||
491 | |||
492 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
493 | |||
494 | if (sk->sk_state == DCCP_LISTEN) { | ||
495 | dccp_set_state(sk, DCCP_CLOSED); | ||
496 | |||
497 | /* Special case. */ | ||
498 | inet_csk_listen_stop(sk); | ||
499 | |||
500 | goto adjudge_to_death; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * We need to flush the recv. buffs. We do this only on the | ||
505 | * descriptor close, not protocol-sourced closes, because the | ||
506 | *reader process may not have drained the data yet! | ||
507 | */ | ||
508 | /* FIXME: check for unread data */ | ||
509 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
510 | __kfree_skb(skb); | ||
511 | } | ||
512 | |||
513 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
514 | /* Check zero linger _after_ checking for unread data. */ | ||
515 | sk->sk_prot->disconnect(sk, 0); | ||
516 | } else if (dccp_close_state(sk)) { | ||
517 | dccp_send_close(sk, 1); | ||
518 | } | ||
519 | |||
520 | sk_stream_wait_close(sk, timeout); | ||
521 | |||
522 | adjudge_to_death: | ||
523 | /* | ||
524 | * It is the last release_sock in its life. It will remove backlog. | ||
525 | */ | ||
526 | release_sock(sk); | ||
527 | /* | ||
528 | * Now socket is owned by kernel and we acquire BH lock | ||
529 | * to finish close. No need to check for user refs. | ||
530 | */ | ||
531 | local_bh_disable(); | ||
532 | bh_lock_sock(sk); | ||
533 | BUG_TRAP(!sock_owned_by_user(sk)); | ||
534 | |||
535 | sock_hold(sk); | ||
536 | sock_orphan(sk); | ||
537 | |||
538 | /* | ||
539 | * The last release_sock may have processed the CLOSE or RESET | ||
540 | * packet moving sock to CLOSED state, if not we have to fire | ||
541 | * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" | ||
542 | * in draft-ietf-dccp-spec-11. -acme | ||
543 | */ | ||
544 | if (sk->sk_state == DCCP_CLOSING) { | ||
545 | /* FIXME: should start at 2 * RTT */ | ||
546 | /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ | ||
547 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
548 | inet_csk(sk)->icsk_rto, | ||
549 | DCCP_RTO_MAX); | ||
550 | #if 0 | ||
551 | /* Yeah, we should use sk->sk_prot->orphan_count, etc */ | ||
552 | dccp_set_state(sk, DCCP_CLOSED); | ||
553 | #endif | ||
554 | } | ||
555 | |||
556 | atomic_inc(sk->sk_prot->orphan_count); | ||
557 | if (sk->sk_state == DCCP_CLOSED) | ||
558 | inet_csk_destroy_sock(sk); | ||
559 | |||
560 | /* Otherwise, socket is reprieved until protocol close. */ | ||
561 | |||
562 | bh_unlock_sock(sk); | ||
563 | local_bh_enable(); | ||
564 | sock_put(sk); | ||
565 | } | ||
566 | |||
567 | void dccp_shutdown(struct sock *sk, int how) | ||
568 | { | ||
569 | dccp_pr_debug("entry\n"); | ||
570 | } | ||
571 | |||
572 | static struct proto_ops inet_dccp_ops = { | ||
573 | .family = PF_INET, | ||
574 | .owner = THIS_MODULE, | ||
575 | .release = inet_release, | ||
576 | .bind = inet_bind, | ||
577 | .connect = inet_stream_connect, | ||
578 | .socketpair = sock_no_socketpair, | ||
579 | .accept = inet_accept, | ||
580 | .getname = inet_getname, | ||
581 | /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ | ||
582 | .poll = dccp_poll, | ||
583 | .ioctl = inet_ioctl, | ||
584 | /* FIXME: work on inet_listen to rename it to sock_common_listen */ | ||
585 | .listen = inet_dccp_listen, | ||
586 | .shutdown = inet_shutdown, | ||
587 | .setsockopt = sock_common_setsockopt, | ||
588 | .getsockopt = sock_common_getsockopt, | ||
589 | .sendmsg = inet_sendmsg, | ||
590 | .recvmsg = sock_common_recvmsg, | ||
591 | .mmap = sock_no_mmap, | ||
592 | .sendpage = sock_no_sendpage, | ||
593 | }; | ||
594 | |||
595 | extern struct net_proto_family inet_family_ops; | ||
596 | |||
597 | static struct inet_protosw dccp_v4_protosw = { | ||
598 | .type = SOCK_DCCP, | ||
599 | .protocol = IPPROTO_DCCP, | ||
600 | .prot = &dccp_v4_prot, | ||
601 | .ops = &inet_dccp_ops, | ||
602 | .capability = -1, | ||
603 | .no_check = 0, | ||
604 | .flags = 0, | ||
605 | }; | ||
606 | |||
607 | /* | ||
608 | * This is the global socket data structure used for responding to | ||
609 | * the Out-of-the-blue (OOTB) packets. A control sock will be created | ||
610 | * for this socket at the initialization time. | ||
611 | */ | ||
612 | struct socket *dccp_ctl_socket; | ||
613 | |||
614 | static char dccp_ctl_socket_err_msg[] __initdata = | ||
615 | KERN_ERR "DCCP: Failed to create the control socket.\n"; | ||
616 | |||
617 | static int __init dccp_ctl_sock_init(void) | ||
618 | { | ||
619 | int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, | ||
620 | &dccp_ctl_socket); | ||
621 | if (rc < 0) | ||
622 | printk(dccp_ctl_socket_err_msg); | ||
623 | else { | ||
624 | dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; | ||
625 | inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; | ||
626 | |||
627 | /* Unhash it so that IP input processing does not even | ||
628 | * see it, we do not wish this socket to see incoming | ||
629 | * packets. | ||
630 | */ | ||
631 | dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); | ||
632 | } | ||
633 | |||
634 | return rc; | ||
635 | } | ||
636 | |||
637 | #ifdef CONFIG_IP_DCCP_UNLOAD_HACK | ||
638 | void dccp_ctl_sock_exit(void) | ||
639 | { | ||
640 | if (dccp_ctl_socket != NULL) { | ||
641 | sock_release(dccp_ctl_socket); | ||
642 | dccp_ctl_socket = NULL; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); | ||
647 | #endif | ||
648 | |||
649 | static int __init init_dccp_v4_mibs(void) | ||
650 | { | ||
651 | int rc = -ENOMEM; | ||
652 | |||
653 | dccp_statistics[0] = alloc_percpu(struct dccp_mib); | ||
654 | if (dccp_statistics[0] == NULL) | ||
655 | goto out; | ||
656 | |||
657 | dccp_statistics[1] = alloc_percpu(struct dccp_mib); | ||
658 | if (dccp_statistics[1] == NULL) | ||
659 | goto out_free_one; | ||
660 | |||
661 | rc = 0; | ||
662 | out: | ||
663 | return rc; | ||
664 | out_free_one: | ||
665 | free_percpu(dccp_statistics[0]); | ||
666 | dccp_statistics[0] = NULL; | ||
667 | goto out; | ||
668 | |||
669 | } | ||
670 | |||
671 | static int thash_entries; | ||
672 | module_param(thash_entries, int, 0444); | ||
673 | MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | ||
674 | |||
675 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
676 | int dccp_debug; | ||
677 | module_param(dccp_debug, int, 0444); | ||
678 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | ||
679 | #endif | ||
680 | |||
681 | static int __init dccp_init(void) | ||
682 | { | ||
683 | unsigned long goal; | ||
684 | int ehash_order, bhash_order, i; | ||
685 | int rc = proto_register(&dccp_v4_prot, 1); | ||
686 | |||
687 | if (rc) | ||
688 | goto out; | ||
689 | |||
690 | dccp_hashinfo.bind_bucket_cachep = | ||
691 | kmem_cache_create("dccp_bind_bucket", | ||
692 | sizeof(struct inet_bind_bucket), 0, | ||
693 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
694 | if (!dccp_hashinfo.bind_bucket_cachep) | ||
695 | goto out_proto_unregister; | ||
696 | |||
697 | /* | ||
698 | * Size and allocate the main established and bind bucket | ||
699 | * hash tables. | ||
700 | * | ||
701 | * The methodology is similar to that of the buffer cache. | ||
702 | */ | ||
703 | if (num_physpages >= (128 * 1024)) | ||
704 | goal = num_physpages >> (21 - PAGE_SHIFT); | ||
705 | else | ||
706 | goal = num_physpages >> (23 - PAGE_SHIFT); | ||
707 | |||
708 | if (thash_entries) | ||
709 | goal = (thash_entries * | ||
710 | sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; | ||
711 | for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) | ||
712 | ; | ||
713 | do { | ||
714 | dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / | ||
715 | sizeof(struct inet_ehash_bucket); | ||
716 | dccp_hashinfo.ehash_size >>= 1; | ||
717 | while (dccp_hashinfo.ehash_size & | ||
718 | (dccp_hashinfo.ehash_size - 1)) | ||
719 | dccp_hashinfo.ehash_size--; | ||
720 | dccp_hashinfo.ehash = (struct inet_ehash_bucket *) | ||
721 | __get_free_pages(GFP_ATOMIC, ehash_order); | ||
722 | } while (!dccp_hashinfo.ehash && --ehash_order > 0); | ||
723 | |||
724 | if (!dccp_hashinfo.ehash) { | ||
725 | printk(KERN_CRIT "Failed to allocate DCCP " | ||
726 | "established hash table\n"); | ||
727 | goto out_free_bind_bucket_cachep; | ||
728 | } | ||
729 | |||
730 | for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { | ||
731 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
732 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | ||
733 | } | ||
734 | |||
735 | bhash_order = ehash_order; | ||
736 | |||
737 | do { | ||
738 | dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / | ||
739 | sizeof(struct inet_bind_hashbucket); | ||
740 | if ((dccp_hashinfo.bhash_size > (64 * 1024)) && | ||
741 | bhash_order > 0) | ||
742 | continue; | ||
743 | dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) | ||
744 | __get_free_pages(GFP_ATOMIC, bhash_order); | ||
745 | } while (!dccp_hashinfo.bhash && --bhash_order >= 0); | ||
746 | |||
747 | if (!dccp_hashinfo.bhash) { | ||
748 | printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); | ||
749 | goto out_free_dccp_ehash; | ||
750 | } | ||
751 | |||
752 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | ||
753 | spin_lock_init(&dccp_hashinfo.bhash[i].lock); | ||
754 | INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); | ||
755 | } | ||
756 | |||
757 | if (init_dccp_v4_mibs()) | ||
758 | goto out_free_dccp_bhash; | ||
759 | |||
760 | rc = -EAGAIN; | ||
761 | if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) | ||
762 | goto out_free_dccp_v4_mibs; | ||
763 | |||
764 | inet_register_protosw(&dccp_v4_protosw); | ||
765 | |||
766 | rc = dccp_ctl_sock_init(); | ||
767 | if (rc) | ||
768 | goto out_unregister_protosw; | ||
769 | out: | ||
770 | return rc; | ||
771 | out_unregister_protosw: | ||
772 | inet_unregister_protosw(&dccp_v4_protosw); | ||
773 | inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); | ||
774 | out_free_dccp_v4_mibs: | ||
775 | free_percpu(dccp_statistics[0]); | ||
776 | free_percpu(dccp_statistics[1]); | ||
777 | dccp_statistics[0] = dccp_statistics[1] = NULL; | ||
778 | out_free_dccp_bhash: | ||
779 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | ||
780 | dccp_hashinfo.bhash = NULL; | ||
781 | out_free_dccp_ehash: | ||
782 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | ||
783 | dccp_hashinfo.ehash = NULL; | ||
784 | out_free_bind_bucket_cachep: | ||
785 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
786 | dccp_hashinfo.bind_bucket_cachep = NULL; | ||
787 | out_proto_unregister: | ||
788 | proto_unregister(&dccp_v4_prot); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | static const char dccp_del_proto_err_msg[] __exitdata = | ||
793 | KERN_ERR "can't remove dccp net_protocol\n"; | ||
794 | |||
795 | static void __exit dccp_fini(void) | ||
796 | { | ||
797 | inet_unregister_protosw(&dccp_v4_protosw); | ||
798 | |||
799 | if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) | ||
800 | printk(dccp_del_proto_err_msg); | ||
801 | |||
802 | free_percpu(dccp_statistics[0]); | ||
803 | free_percpu(dccp_statistics[1]); | ||
804 | free_pages((unsigned long)dccp_hashinfo.bhash, | ||
805 | get_order(dccp_hashinfo.bhash_size * | ||
806 | sizeof(struct inet_bind_hashbucket))); | ||
807 | free_pages((unsigned long)dccp_hashinfo.ehash, | ||
808 | get_order(dccp_hashinfo.ehash_size * | ||
809 | sizeof(struct inet_ehash_bucket))); | ||
810 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
811 | proto_unregister(&dccp_v4_prot); | ||
812 | } | ||
813 | |||
814 | module_init(dccp_init); | ||
815 | module_exit(dccp_fini); | ||
816 | |||
817 | /* | ||
818 | * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) | ||
819 | * values directly, Also cover the case where the protocol is not specified, | ||
820 | * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP | ||
821 | */ | ||
822 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); | ||
823 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); | ||
824 | MODULE_LICENSE("GPL"); | ||
825 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); | ||
826 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); | ||
diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..aa34b576e228 --- /dev/null +++ b/net/dccp/timer.c | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * net/dccp/timer.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include "dccp.h" | ||
18 | |||
19 | static void dccp_write_timer(unsigned long data); | ||
20 | static void dccp_keepalive_timer(unsigned long data); | ||
21 | static void dccp_delack_timer(unsigned long data); | ||
22 | |||
23 | void dccp_init_xmit_timers(struct sock *sk) | ||
24 | { | ||
25 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | ||
26 | &dccp_keepalive_timer); | ||
27 | } | ||
28 | |||
29 | static void dccp_write_err(struct sock *sk) | ||
30 | { | ||
31 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | ||
32 | sk->sk_error_report(sk); | ||
33 | |||
34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
35 | dccp_done(sk); | ||
36 | DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); | ||
37 | } | ||
38 | |||
39 | /* A write timeout has occurred. Process the after effects. */ | ||
40 | static int dccp_write_timeout(struct sock *sk) | ||
41 | { | ||
42 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
43 | int retry_until; | ||
44 | |||
45 | if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { | ||
46 | if (icsk->icsk_retransmits != 0) | ||
47 | dst_negative_advice(&sk->sk_dst_cache); | ||
48 | retry_until = icsk->icsk_syn_retries ? : | ||
49 | /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; | ||
50 | } else { | ||
51 | if (icsk->icsk_retransmits >= | ||
52 | /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { | ||
53 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu | ||
54 | black hole detection. :-( | ||
55 | |||
56 | It is place to make it. It is not made. I do not want | ||
57 | to make it. It is disguisting. It does not work in any | ||
58 | case. Let me to cite the same draft, which requires for | ||
59 | us to implement this: | ||
60 | |||
61 | "The one security concern raised by this memo is that ICMP black holes | ||
62 | are often caused by over-zealous security administrators who block | ||
63 | all ICMP messages. It is vitally important that those who design and | ||
64 | deploy security systems understand the impact of strict filtering on | ||
65 | upper-layer protocols. The safest web site in the world is worthless | ||
66 | if most TCP implementations cannot transfer data from it. It would | ||
67 | be far nicer to have all of the black holes fixed rather than fixing | ||
68 | all of the TCP implementations." | ||
69 | |||
70 | Golden words :-). | ||
71 | */ | ||
72 | |||
73 | dst_negative_advice(&sk->sk_dst_cache); | ||
74 | } | ||
75 | |||
76 | retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; | ||
77 | /* | ||
78 | * FIXME: see tcp_write_timout and tcp_out_of_resources | ||
79 | */ | ||
80 | } | ||
81 | |||
82 | if (icsk->icsk_retransmits >= retry_until) { | ||
83 | /* Has it gone just too far? */ | ||
84 | dccp_write_err(sk); | ||
85 | return 1; | ||
86 | } | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ | ||
91 | static void dccp_delack_timer(unsigned long data) | ||
92 | { | ||
93 | struct sock *sk = (struct sock *)data; | ||
94 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
95 | |||
96 | bh_lock_sock(sk); | ||
97 | if (sock_owned_by_user(sk)) { | ||
98 | /* Try again later. */ | ||
99 | icsk->icsk_ack.blocked = 1; | ||
100 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); | ||
101 | sk_reset_timer(sk, &icsk->icsk_delack_timer, | ||
102 | jiffies + TCP_DELACK_MIN); | ||
103 | goto out; | ||
104 | } | ||
105 | |||
106 | if (sk->sk_state == DCCP_CLOSED || | ||
107 | !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
108 | goto out; | ||
109 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { | ||
110 | sk_reset_timer(sk, &icsk->icsk_delack_timer, | ||
111 | icsk->icsk_ack.timeout); | ||
112 | goto out; | ||
113 | } | ||
114 | |||
115 | icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; | ||
116 | |||
117 | if (inet_csk_ack_scheduled(sk)) { | ||
118 | if (!icsk->icsk_ack.pingpong) { | ||
119 | /* Delayed ACK missed: inflate ATO. */ | ||
120 | icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, | ||
121 | icsk->icsk_rto); | ||
122 | } else { | ||
123 | /* Delayed ACK missed: leave pingpong mode and | ||
124 | * deflate ATO. | ||
125 | */ | ||
126 | icsk->icsk_ack.pingpong = 0; | ||
127 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
128 | } | ||
129 | dccp_send_ack(sk); | ||
130 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); | ||
131 | } | ||
132 | out: | ||
133 | bh_unlock_sock(sk); | ||
134 | sock_put(sk); | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * The DCCP retransmit timer. | ||
139 | */ | ||
140 | static void dccp_retransmit_timer(struct sock *sk) | ||
141 | { | ||
142 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
143 | |||
144 | /* | ||
145 | * sk->sk_send_head has to have one skb with | ||
146 | * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP | ||
147 | * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake | ||
148 | * (PARTOPEN timer), etc). | ||
149 | */ | ||
150 | BUG_TRAP(sk->sk_send_head != NULL); | ||
151 | |||
152 | /* | ||
153 | * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was | ||
154 | * sent, no need to retransmit, this sock is dead. | ||
155 | */ | ||
156 | if (dccp_write_timeout(sk)) | ||
157 | goto out; | ||
158 | |||
159 | /* | ||
160 | * We want to know the number of packets retransmitted, not the | ||
161 | * total number of retransmissions of clones of original packets. | ||
162 | */ | ||
163 | if (icsk->icsk_retransmits == 0) | ||
164 | DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); | ||
165 | |||
166 | if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { | ||
167 | /* | ||
168 | * Retransmission failed because of local congestion, | ||
169 | * do not backoff. | ||
170 | */ | ||
171 | if (icsk->icsk_retransmits == 0) | ||
172 | icsk->icsk_retransmits = 1; | ||
173 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
174 | min(icsk->icsk_rto, | ||
175 | TCP_RESOURCE_PROBE_INTERVAL), | ||
176 | DCCP_RTO_MAX); | ||
177 | goto out; | ||
178 | } | ||
179 | |||
180 | icsk->icsk_backoff++; | ||
181 | icsk->icsk_retransmits++; | ||
182 | |||
183 | icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); | ||
184 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, | ||
185 | DCCP_RTO_MAX); | ||
186 | if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) | ||
187 | __sk_dst_reset(sk); | ||
188 | out:; | ||
189 | } | ||
190 | |||
191 | static void dccp_write_timer(unsigned long data) | ||
192 | { | ||
193 | struct sock *sk = (struct sock *)data; | ||
194 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
195 | int event = 0; | ||
196 | |||
197 | bh_lock_sock(sk); | ||
198 | if (sock_owned_by_user(sk)) { | ||
199 | /* Try again later */ | ||
200 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, | ||
201 | jiffies + (HZ / 20)); | ||
202 | goto out; | ||
203 | } | ||
204 | |||
205 | if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) | ||
206 | goto out; | ||
207 | |||
208 | if (time_after(icsk->icsk_timeout, jiffies)) { | ||
209 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, | ||
210 | icsk->icsk_timeout); | ||
211 | goto out; | ||
212 | } | ||
213 | |||
214 | event = icsk->icsk_pending; | ||
215 | icsk->icsk_pending = 0; | ||
216 | |||
217 | switch (event) { | ||
218 | case ICSK_TIME_RETRANS: | ||
219 | dccp_retransmit_timer(sk); | ||
220 | break; | ||
221 | } | ||
222 | out: | ||
223 | bh_unlock_sock(sk); | ||
224 | sock_put(sk); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * Timer for listening sockets | ||
229 | */ | ||
230 | static void dccp_response_timer(struct sock *sk) | ||
231 | { | ||
232 | inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, | ||
233 | DCCP_RTO_MAX); | ||
234 | } | ||
235 | |||
236 | static void dccp_keepalive_timer(unsigned long data) | ||
237 | { | ||
238 | struct sock *sk = (struct sock *)data; | ||
239 | |||
240 | /* Only process if socket is not in use. */ | ||
241 | bh_lock_sock(sk); | ||
242 | if (sock_owned_by_user(sk)) { | ||
243 | /* Try again later. */ | ||
244 | inet_csk_reset_keepalive_timer(sk, HZ / 20); | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | if (sk->sk_state == DCCP_LISTEN) { | ||
249 | dccp_response_timer(sk); | ||
250 | goto out; | ||
251 | } | ||
252 | out: | ||
253 | bh_unlock_sock(sk); | ||
254 | sock_put(sk); | ||
255 | } | ||