aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@ghostprotocols.net>2005-08-09 23:14:34 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 18:49:46 -0400
commit7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (patch)
tree3cb2732870c9cf8f976cb6fa57e0223f1c648e2a /net/dccp
parentc4365c9235f80128c3c3d5993074173941b1c1f0 (diff)
[DCCP]: Initial implementation
Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig24
-rw-r--r--net/dccp/Makefile5
-rw-r--r--net/dccp/ccid.c139
-rw-r--r--net/dccp/ccid.h156
-rw-r--r--net/dccp/ccids/Kconfig25
-rw-r--r--net/dccp/ccids/Makefile3
-rw-r--r--net/dccp/ccids/ccid3.c2164
-rw-r--r--net/dccp/ccids/ccid3.h137
-rw-r--r--net/dccp/dccp.h422
-rw-r--r--net/dccp/input.c510
-rw-r--r--net/dccp/ipv4.c1289
-rw-r--r--net/dccp/minisocks.c199
-rw-r--r--net/dccp/options.c763
-rw-r--r--net/dccp/output.c406
-rw-r--r--net/dccp/proto.c818
-rw-r--r--net/dccp/timer.c249
16 files changed, 7309 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644
index 000000000000..90460bc629b3
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,24 @@
1menu "DCCP Configuration (EXPERIMENTAL)"
2 depends on INET && EXPERIMENTAL
3
4config IP_DCCP
5 tristate "The DCCP Protocol (EXPERIMENTAL)"
6 ---help---
7 Datagram Congestion Control Protocol
8
9 From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
10
11 The Datagram Congestion Control Protocol (DCCP) is a transport
12 protocol that implements bidirectional, unicast connections of
13 congestion-controlled, unreliable datagrams. It should be suitable
14 for use by applications such as streaming media, Internet telephony,
15 and on-line games
16
17 To compile this protocol support as a module, choose M here: the
18 module will be called dccp.
19
20 If in doubt, say N.
21
22source "net/dccp/ccids/Kconfig"
23
24endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644
index 000000000000..c6e6ba55c36b
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o
2
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o
4
5obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644
index 000000000000..9d8fc0e289ea
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
1/*
2 * net/dccp/ccid.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * CCID infrastructure
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "ccid.h"
15
16static struct ccid *ccids[CCID_MAX];
17#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
18static atomic_t ccids_lockct = ATOMIC_INIT(0);
19static DEFINE_SPINLOCK(ccids_lock);
20
21/*
22 * The strategy is: modifications ccids vector are short, do not sleep and
23 * veeery rare, but read access should be free of any exclusive locks.
24 */
25static void ccids_write_lock(void)
26{
27 spin_lock(&ccids_lock);
28 while (atomic_read(&ccids_lockct) != 0) {
29 spin_unlock(&ccids_lock);
30 yield();
31 spin_lock(&ccids_lock);
32 }
33}
34
35static inline void ccids_write_unlock(void)
36{
37 spin_unlock(&ccids_lock);
38}
39
40static inline void ccids_read_lock(void)
41{
42 atomic_inc(&ccids_lockct);
43 spin_unlock_wait(&ccids_lock);
44}
45
46static inline void ccids_read_unlock(void)
47{
48 atomic_dec(&ccids_lockct);
49}
50
51#else
52#define ccids_write_lock() do { } while(0)
53#define ccids_write_unlock() do { } while(0)
54#define ccids_read_lock() do { } while(0)
55#define ccids_read_unlock() do { } while(0)
56#endif
57
58int ccid_register(struct ccid *ccid)
59{
60 int err;
61
62 if (ccid->ccid_init == NULL)
63 return -1;
64
65 ccids_write_lock();
66 err = -EEXIST;
67 if (ccids[ccid->ccid_id] == NULL) {
68 ccids[ccid->ccid_id] = ccid;
69 err = 0;
70 }
71 ccids_write_unlock();
72 if (err == 0)
73 pr_info("CCID: Registered CCID %d (%s)\n",
74 ccid->ccid_id, ccid->ccid_name);
75 return err;
76}
77
78EXPORT_SYMBOL_GPL(ccid_register);
79
80int ccid_unregister(struct ccid *ccid)
81{
82 ccids_write_lock();
83 ccids[ccid->ccid_id] = NULL;
84 ccids_write_unlock();
85 pr_info("CCID: Unregistered CCID %d (%s)\n",
86 ccid->ccid_id, ccid->ccid_name);
87 return 0;
88}
89
90EXPORT_SYMBOL_GPL(ccid_unregister);
91
92struct ccid *ccid_init(unsigned char id, struct sock *sk)
93{
94 struct ccid *ccid;
95
96#ifdef CONFIG_KMOD
97 if (ccids[id] == NULL)
98 request_module("net-dccp-ccid-%d", id);
99#endif
100 ccids_read_lock();
101
102 ccid = ccids[id];
103 if (ccid == NULL)
104 goto out;
105
106 if (!try_module_get(ccid->ccid_owner))
107 goto out_err;
108
109 if (ccid->ccid_init(sk) != 0)
110 goto out_module_put;
111out:
112 ccids_read_unlock();
113 return ccid;
114out_module_put:
115 module_put(ccid->ccid_owner);
116out_err:
117 ccid = NULL;
118 goto out;
119}
120
121EXPORT_SYMBOL_GPL(ccid_init);
122
123void ccid_exit(struct ccid *ccid, struct sock *sk)
124{
125 if (ccid == NULL)
126 return;
127
128 ccids_read_lock();
129
130 if (ccids[ccid->ccid_id] != NULL) {
131 if (ccid->ccid_exit != NULL)
132 ccid->ccid_exit(sk);
133 module_put(ccid->ccid_owner);
134 }
135
136 ccids_read_unlock();
137}
138
139EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644
index 000000000000..06105b2a613c
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,156 @@
1#ifndef _CCID_H
2#define _CCID_H
3/*
4 * net/dccp/ccid.h
5 *
6 * An implementation of the DCCP protocol
7 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 *
9 * CCID infrastructure
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <net/sock.h>
17#include <linux/dccp.h>
18#include <linux/list.h>
19#include <linux/module.h>
20
21#define CCID_MAX 255
22
23struct ccid {
24 unsigned char ccid_id;
25 const char *ccid_name;
26 struct module *ccid_owner;
27 int (*ccid_init)(struct sock *sk);
28 void (*ccid_exit)(struct sock *sk);
29 int (*ccid_hc_rx_init)(struct sock *sk);
30 int (*ccid_hc_tx_init)(struct sock *sk);
31 void (*ccid_hc_rx_exit)(struct sock *sk);
32 void (*ccid_hc_tx_exit)(struct sock *sk);
33 void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb);
34 int (*ccid_hc_rx_parse_options)(struct sock *sk,
35 unsigned char option,
36 unsigned char len, u16 idx,
37 unsigned char* value);
38 void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb);
39 void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb);
40 void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb);
41 int (*ccid_hc_tx_parse_options)(struct sock *sk,
42 unsigned char option,
43 unsigned char len, u16 idx,
44 unsigned char* value);
45 int (*ccid_hc_tx_send_packet)(struct sock *sk,
46 struct sk_buff *skb, int len,
47 long *delay);
48 void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len);
49};
50
51extern int ccid_register(struct ccid *ccid);
52extern int ccid_unregister(struct ccid *ccid);
53
54extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
55extern void ccid_exit(struct ccid *ccid, struct sock *sk);
56
57static inline void __ccid_get(struct ccid *ccid)
58{
59 __module_get(ccid->ccid_owner);
60}
61
62static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
63 struct sk_buff *skb, int len,
64 long *delay)
65{
66 int rc = 0;
67 if (ccid->ccid_hc_tx_send_packet != NULL)
68 rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay);
69 return rc;
70}
71
72static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
73 int more, int len)
74{
75 if (ccid->ccid_hc_tx_packet_sent != NULL)
76 ccid->ccid_hc_tx_packet_sent(sk, more, len);
77}
78
79static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
80{
81 int rc = 0;
82 if (ccid->ccid_hc_rx_init != NULL)
83 rc = ccid->ccid_hc_rx_init(sk);
84 return rc;
85}
86
87static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
88{
89 int rc = 0;
90 if (ccid->ccid_hc_tx_init != NULL)
91 rc = ccid->ccid_hc_tx_init(sk);
92 return rc;
93}
94
95static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
96{
97 if (ccid->ccid_hc_rx_exit != NULL)
98 ccid->ccid_hc_rx_exit(sk);
99}
100
101static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
102{
103 if (ccid->ccid_hc_tx_exit != NULL)
104 ccid->ccid_hc_tx_exit(sk);
105}
106
107static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
108 struct sk_buff *skb)
109{
110 if (ccid->ccid_hc_rx_packet_recv != NULL)
111 ccid->ccid_hc_rx_packet_recv(sk, skb);
112}
113
114static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
115 struct sk_buff *skb)
116{
117 if (ccid->ccid_hc_tx_packet_recv != NULL)
118 ccid->ccid_hc_tx_packet_recv(sk, skb);
119}
120
121static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
122 unsigned char option,
123 unsigned char len, u16 idx,
124 unsigned char* value)
125{
126 int rc = 0;
127 if (ccid->ccid_hc_tx_parse_options != NULL)
128 rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value);
129 return rc;
130}
131
132static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
133 unsigned char option,
134 unsigned char len, u16 idx,
135 unsigned char* value)
136{
137 int rc = 0;
138 if (ccid->ccid_hc_rx_parse_options != NULL)
139 rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
140 return rc;
141}
142
143static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
144 struct sk_buff *skb)
145{
146 if (ccid->ccid_hc_tx_insert_options != NULL)
147 ccid->ccid_hc_tx_insert_options(sk, skb);
148}
149
150static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
151 struct sk_buff *skb)
152{
153 if (ccid->ccid_hc_rx_insert_options != NULL)
154 ccid->ccid_hc_rx_insert_options(sk, skb);
155}
156#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644
index 000000000000..67f9c06bd179
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,25 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL
3
4config IP_DCCP_CCID3
5 tristate "CCID3 (TFRC) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 ---help---
8 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
9 rate-controlled congestion control mechanism. TFRC is designed to
10 be reasonably fair when competing for bandwidth with TCP-like flows,
11 where a flow is "reasonably fair" if its sending rate is generally
12 within a factor of two of the sending rate of a TCP flow under the
13 same conditions. However, TFRC has a much lower variation of
14 throughput over time compared with TCP, which makes CCID 3 more
15 suitable than CCID 2 for applications such streaming media where a
16 relatively smooth sending rate is of importance.
17
18 CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
19 congestion control algorithms were initially described in RFC 3448.
20
21 This text was extracted from draft-ietf-dccp-spec-11.txt.
22
23 If in doubt, say M.
24
25endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644
index 000000000000..1c720131c5db
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
2
3dccp_ccid3-y := ccid3.o
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644
index 000000000000..4f45902cb55e
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,2164 @@
1/*
2 * net/dccp/ccids/ccid3.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#include "../ccid.h"
38#include "../dccp.h"
39#include "ccid3.h"
40
41#ifdef CCID3_DEBUG
42extern int ccid3_debug;
43
44#define ccid3_pr_debug(format, a...) \
45 do { if (ccid3_debug) \
46 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
47 } while (0)
48#else
49#define ccid3_pr_debug(format, a...)
50#endif
51
52#define TFRC_MIN_PACKET_SIZE 16
53#define TFRC_STD_PACKET_SIZE 256
54#define TFRC_MAX_PACKET_SIZE 65535
55
56#define USEC_IN_SEC 1000000
57
58#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC)
59/* two seconds as per CCID3 spec 11 */
60
61#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ))
62/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */
63
64#define TFRC_WIN_COUNT_PER_RTT 4
65#define TFRC_WIN_COUNT_LIMIT 16
66
67#define TFRC_MAX_BACK_OFF_TIME 64
68/* above is in seconds */
69
70#define TFRC_SMALLEST_P 40
71
72#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */
73
74/* Number of later packets received before one is considered lost */
75#define TFRC_RECV_NUM_LATE_LOSS 3
76
77enum ccid3_options {
78 TFRC_OPT_LOSS_EVENT_RATE = 192,
79 TFRC_OPT_LOSS_INTERVALS = 193,
80 TFRC_OPT_RECEIVE_RATE = 194,
81};
82
83static int ccid3_debug;
84
85static kmem_cache_t *ccid3_tx_hist_slab;
86static kmem_cache_t *ccid3_rx_hist_slab;
87static kmem_cache_t *ccid3_loss_interval_hist_slab;
88
89static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio)
90{
91 struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio);
92
93 if (entry != NULL)
94 entry->ccid3htx_sent = 0;
95
96 return entry;
97}
98
99static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry)
100{
101 if (entry != NULL)
102 kmem_cache_free(ccid3_tx_hist_slab, entry);
103}
104
105static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk,
106 struct sk_buff *skb,
107 int prio)
108{
109 struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio);
110
111 if (entry != NULL) {
112 const struct dccp_hdr *dh = dccp_hdr(skb);
113
114 entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
115 entry->ccid3hrx_win_count = dh->dccph_ccval;
116 entry->ccid3hrx_type = dh->dccph_type;
117 entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
118 do_gettimeofday(&(entry->ccid3hrx_tstamp));
119 }
120
121 return entry;
122}
123
124static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry)
125{
126 if (entry != NULL)
127 kmem_cache_free(ccid3_rx_hist_slab, entry);
128}
129
130static void ccid3_rx_history_delete(struct list_head *hist)
131{
132 struct ccid3_rx_hist_entry *entry, *next;
133
134 list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) {
135 list_del_init(&entry->ccid3hrx_node);
136 kmem_cache_free(ccid3_rx_hist_slab, entry);
137 }
138}
139
140static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio)
141{
142 return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio);
143}
144
145static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry)
146{
147 if (entry != NULL)
148 kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
149}
150
151static void ccid3_loss_interval_history_delete(struct list_head *hist)
152{
153 struct ccid3_loss_interval_hist_entry *entry, *next;
154
155 list_for_each_entry_safe(entry, next, hist, ccid3lih_node) {
156 list_del_init(&entry->ccid3lih_node);
157 kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
158 }
159}
160
161static int ccid3_init(struct sock *sk)
162{
163 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
164 return 0;
165}
166
167static void ccid3_exit(struct sock *sk)
168{
169 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
170}
171
172/* TFRC sender states */
173enum ccid3_hc_tx_states {
174 TFRC_SSTATE_NO_SENT = 1,
175 TFRC_SSTATE_NO_FBACK,
176 TFRC_SSTATE_FBACK,
177 TFRC_SSTATE_TERM,
178};
179
180#ifdef CCID3_DEBUG
181static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
182{
183 static char *ccid3_state_names[] = {
184 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
185 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
186 [TFRC_SSTATE_FBACK] = "FBACK",
187 [TFRC_SSTATE_TERM] = "TERM",
188 };
189
190 return ccid3_state_names[state];
191}
192#endif
193
194static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state)
195{
196 struct dccp_sock *dp = dccp_sk(sk);
197 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
198 enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
199
200 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
201 dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state));
202 WARN_ON(state == oldstate);
203 hctx->ccid3hctx_state = state;
204}
205
206static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) {
207
208 result->tv_sec = large.tv_sec-small.tv_sec;
209 if (large.tv_usec < small.tv_usec) {
210 (result->tv_sec)--;
211 result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec;
212 } else
213 result->tv_usec = large.tv_usec-small.tv_usec;
214}
215
216static inline void timeval_fix(struct timeval *tv) {
217 if (tv->tv_usec >= USEC_IN_SEC) {
218 tv->tv_sec++;
219 tv->tv_usec -= USEC_IN_SEC;
220 }
221}
222
223/* returns the difference in usecs between timeval passed in and current time */
224static inline u32 now_delta(struct timeval tv) {
225 struct timeval now;
226
227 do_gettimeofday(&now);
228 return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec);
229}
230
231#define CALCX_ARRSIZE 500
232
233#define CALCX_SPLIT 50000
234/* equivalent to 0.05 */
235
236static const u32 calcx_lookup[CALCX_ARRSIZE][2] = {
237 { 37172 , 8172 },
238 { 53499 , 11567 },
239 { 66664 , 14180 },
240 { 78298 , 16388 },
241 { 89021 , 18339 },
242 { 99147 , 20108 },
243 { 108858 , 21738 },
244 { 118273 , 23260 },
245 { 127474 , 24693 },
246 { 136520 , 26052 },
247 { 145456 , 27348 },
248 { 154316 , 28589 },
249 { 163130 , 29783 },
250 { 171919 , 30935 },
251 { 180704 , 32049 },
252 { 189502 , 33130 },
253 { 198328 , 34180 },
254 { 207194 , 35202 },
255 { 216114 , 36198 },
256 { 225097 , 37172 },
257 { 234153 , 38123 },
258 { 243294 , 39055 },
259 { 252527 , 39968 },
260 { 261861 , 40864 },
261 { 271305 , 41743 },
262 { 280866 , 42607 },
263 { 290553 , 43457 },
264 { 300372 , 44293 },
265 { 310333 , 45117 },
266 { 320441 , 45929 },
267 { 330705 , 46729 },
268 { 341131 , 47518 },
269 { 351728 , 48297 },
270 { 362501 , 49066 },
271 { 373460 , 49826 },
272 { 384609 , 50577 },
273 { 395958 , 51320 },
274 { 407513 , 52054 },
275 { 419281 , 52780 },
276 { 431270 , 53499 },
277 { 443487 , 54211 },
278 { 455940 , 54916 },
279 { 468635 , 55614 },
280 { 481581 , 56306 },
281 { 494785 , 56991 },
282 { 508254 , 57671 },
283 { 521996 , 58345 },
284 { 536019 , 59014 },
285 { 550331 , 59677 },
286 { 564939 , 60335 },
287 { 579851 , 60988 },
288 { 595075 , 61636 },
289 { 610619 , 62279 },
290 { 626491 , 62918 },
291 { 642700 , 63553 },
292 { 659253 , 64183 },
293 { 676158 , 64809 },
294 { 693424 , 65431 },
295 { 711060 , 66050 },
296 { 729073 , 66664 },
297 { 747472 , 67275 },
298 { 766266 , 67882 },
299 { 785464 , 68486 },
300 { 805073 , 69087 },
301 { 825103 , 69684 },
302 { 845562 , 70278 },
303 { 866460 , 70868 },
304 { 887805 , 71456 },
305 { 909606 , 72041 },
306 { 931873 , 72623 },
307 { 954614 , 73202 },
308 { 977839 , 73778 },
309 { 1001557 , 74352 },
310 { 1025777 , 74923 },
311 { 1050508 , 75492 },
312 { 1075761 , 76058 },
313 { 1101544 , 76621 },
314 { 1127867 , 77183 },
315 { 1154739 , 77741 },
316 { 1182172 , 78298 },
317 { 1210173 , 78852 },
318 { 1238753 , 79405 },
319 { 1267922 , 79955 },
320 { 1297689 , 80503 },
321 { 1328066 , 81049 },
322 { 1359060 , 81593 },
323 { 1390684 , 82135 },
324 { 1422947 , 82675 },
325 { 1455859 , 83213 },
326 { 1489430 , 83750 },
327 { 1523671 , 84284 },
328 { 1558593 , 84817 },
329 { 1594205 , 85348 },
330 { 1630518 , 85878 },
331 { 1667543 , 86406 },
332 { 1705290 , 86932 },
333 { 1743770 , 87457 },
334 { 1782994 , 87980 },
335 { 1822973 , 88501 },
336 { 1863717 , 89021 },
337 { 1905237 , 89540 },
338 { 1947545 , 90057 },
339 { 1990650 , 90573 },
340 { 2034566 , 91087 },
341 { 2079301 , 91600 },
342 { 2124869 , 92111 },
343 { 2171279 , 92622 },
344 { 2218543 , 93131 },
345 { 2266673 , 93639 },
346 { 2315680 , 94145 },
347 { 2365575 , 94650 },
348 { 2416371 , 95154 },
349 { 2468077 , 95657 },
350 { 2520707 , 96159 },
351 { 2574271 , 96660 },
352 { 2628782 , 97159 },
353 { 2684250 , 97658 },
354 { 2740689 , 98155 },
355 { 2798110 , 98651 },
356 { 2856524 , 99147 },
357 { 2915944 , 99641 },
358 { 2976382 , 100134 },
359 { 3037850 , 100626 },
360 { 3100360 , 101117 },
361 { 3163924 , 101608 },
362 { 3228554 , 102097 },
363 { 3294263 , 102586 },
364 { 3361063 , 103073 },
365 { 3428966 , 103560 },
366 { 3497984 , 104045 },
367 { 3568131 , 104530 },
368 { 3639419 , 105014 },
369 { 3711860 , 105498 },
370 { 3785467 , 105980 },
371 { 3860253 , 106462 },
372 { 3936229 , 106942 },
373 { 4013410 , 107422 },
374 { 4091808 , 107902 },
375 { 4171435 , 108380 },
376 { 4252306 , 108858 },
377 { 4334431 , 109335 },
378 { 4417825 , 109811 },
379 { 4502501 , 110287 },
380 { 4588472 , 110762 },
381 { 4675750 , 111236 },
382 { 4764349 , 111709 },
383 { 4854283 , 112182 },
384 { 4945564 , 112654 },
385 { 5038206 , 113126 },
386 { 5132223 , 113597 },
387 { 5227627 , 114067 },
388 { 5324432 , 114537 },
389 { 5422652 , 115006 },
390 { 5522299 , 115474 },
391 { 5623389 , 115942 },
392 { 5725934 , 116409 },
393 { 5829948 , 116876 },
394 { 5935446 , 117342 },
395 { 6042439 , 117808 },
396 { 6150943 , 118273 },
397 { 6260972 , 118738 },
398 { 6372538 , 119202 },
399 { 6485657 , 119665 },
400 { 6600342 , 120128 },
401 { 6716607 , 120591 },
402 { 6834467 , 121053 },
403 { 6953935 , 121514 },
404 { 7075025 , 121976 },
405 { 7197752 , 122436 },
406 { 7322131 , 122896 },
407 { 7448175 , 123356 },
408 { 7575898 , 123815 },
409 { 7705316 , 124274 },
410 { 7836442 , 124733 },
411 { 7969291 , 125191 },
412 { 8103877 , 125648 },
413 { 8240216 , 126105 },
414 { 8378321 , 126562 },
415 { 8518208 , 127018 },
416 { 8659890 , 127474 },
417 { 8803384 , 127930 },
418 { 8948702 , 128385 },
419 { 9095861 , 128840 },
420 { 9244875 , 129294 },
421 { 9395760 , 129748 },
422 { 9548529 , 130202 },
423 { 9703198 , 130655 },
424 { 9859782 , 131108 },
425 { 10018296 , 131561 },
426 { 10178755 , 132014 },
427 { 10341174 , 132466 },
428 { 10505569 , 132917 },
429 { 10671954 , 133369 },
430 { 10840345 , 133820 },
431 { 11010757 , 134271 },
432 { 11183206 , 134721 },
433 { 11357706 , 135171 },
434 { 11534274 , 135621 },
435 { 11712924 , 136071 },
436 { 11893673 , 136520 },
437 { 12076536 , 136969 },
438 { 12261527 , 137418 },
439 { 12448664 , 137867 },
440 { 12637961 , 138315 },
441 { 12829435 , 138763 },
442 { 13023101 , 139211 },
443 { 13218974 , 139658 },
444 { 13417071 , 140106 },
445 { 13617407 , 140553 },
446 { 13819999 , 140999 },
447 { 14024862 , 141446 },
448 { 14232012 , 141892 },
449 { 14441465 , 142339 },
450 { 14653238 , 142785 },
451 { 14867346 , 143230 },
452 { 15083805 , 143676 },
453 { 15302632 , 144121 },
454 { 15523842 , 144566 },
455 { 15747453 , 145011 },
456 { 15973479 , 145456 },
457 { 16201939 , 145900 },
458 { 16432847 , 146345 },
459 { 16666221 , 146789 },
460 { 16902076 , 147233 },
461 { 17140429 , 147677 },
462 { 17381297 , 148121 },
463 { 17624696 , 148564 },
464 { 17870643 , 149007 },
465 { 18119154 , 149451 },
466 { 18370247 , 149894 },
467 { 18623936 , 150336 },
468 { 18880241 , 150779 },
469 { 19139176 , 151222 },
470 { 19400759 , 151664 },
471 { 19665007 , 152107 },
472 { 19931936 , 152549 },
473 { 20201564 , 152991 },
474 { 20473907 , 153433 },
475 { 20748982 , 153875 },
476 { 21026807 , 154316 },
477 { 21307399 , 154758 },
478 { 21590773 , 155199 },
479 { 21876949 , 155641 },
480 { 22165941 , 156082 },
481 { 22457769 , 156523 },
482 { 22752449 , 156964 },
483 { 23049999 , 157405 },
484 { 23350435 , 157846 },
485 { 23653774 , 158287 },
486 { 23960036 , 158727 },
487 { 24269236 , 159168 },
488 { 24581392 , 159608 },
489 { 24896521 , 160049 },
490 { 25214642 , 160489 },
491 { 25535772 , 160929 },
492 { 25859927 , 161370 },
493 { 26187127 , 161810 },
494 { 26517388 , 162250 },
495 { 26850728 , 162690 },
496 { 27187165 , 163130 },
497 { 27526716 , 163569 },
498 { 27869400 , 164009 },
499 { 28215234 , 164449 },
500 { 28564236 , 164889 },
501 { 28916423 , 165328 },
502 { 29271815 , 165768 },
503 { 29630428 , 166208 },
504 { 29992281 , 166647 },
505 { 30357392 , 167087 },
506 { 30725779 , 167526 },
507 { 31097459 , 167965 },
508 { 31472452 , 168405 },
509 { 31850774 , 168844 },
510 { 32232445 , 169283 },
511 { 32617482 , 169723 },
512 { 33005904 , 170162 },
513 { 33397730 , 170601 },
514 { 33792976 , 171041 },
515 { 34191663 , 171480 },
516 { 34593807 , 171919 },
517 { 34999428 , 172358 },
518 { 35408544 , 172797 },
519 { 35821174 , 173237 },
520 { 36237335 , 173676 },
521 { 36657047 , 174115 },
522 { 37080329 , 174554 },
523 { 37507197 , 174993 },
524 { 37937673 , 175433 },
525 { 38371773 , 175872 },
526 { 38809517 , 176311 },
527 { 39250924 , 176750 },
528 { 39696012 , 177190 },
529 { 40144800 , 177629 },
530 { 40597308 , 178068 },
531 { 41053553 , 178507 },
532 { 41513554 , 178947 },
533 { 41977332 , 179386 },
534 { 42444904 , 179825 },
535 { 42916290 , 180265 },
536 { 43391509 , 180704 },
537 { 43870579 , 181144 },
538 { 44353520 , 181583 },
539 { 44840352 , 182023 },
540 { 45331092 , 182462 },
541 { 45825761 , 182902 },
542 { 46324378 , 183342 },
543 { 46826961 , 183781 },
544 { 47333531 , 184221 },
545 { 47844106 , 184661 },
546 { 48358706 , 185101 },
547 { 48877350 , 185541 },
548 { 49400058 , 185981 },
549 { 49926849 , 186421 },
550 { 50457743 , 186861 },
551 { 50992759 , 187301 },
552 { 51531916 , 187741 },
553 { 52075235 , 188181 },
554 { 52622735 , 188622 },
555 { 53174435 , 189062 },
556 { 53730355 , 189502 },
557 { 54290515 , 189943 },
558 { 54854935 , 190383 },
559 { 55423634 , 190824 },
560 { 55996633 , 191265 },
561 { 56573950 , 191706 },
562 { 57155606 , 192146 },
563 { 57741621 , 192587 },
564 { 58332014 , 193028 },
565 { 58926806 , 193470 },
566 { 59526017 , 193911 },
567 { 60129666 , 194352 },
568 { 60737774 , 194793 },
569 { 61350361 , 195235 },
570 { 61967446 , 195677 },
571 { 62589050 , 196118 },
572 { 63215194 , 196560 },
573 { 63845897 , 197002 },
574 { 64481179 , 197444 },
575 { 65121061 , 197886 },
576 { 65765563 , 198328 },
577 { 66414705 , 198770 },
578 { 67068508 , 199213 },
579 { 67726992 , 199655 },
580 { 68390177 , 200098 },
581 { 69058085 , 200540 },
582 { 69730735 , 200983 },
583 { 70408147 , 201426 },
584 { 71090343 , 201869 },
585 { 71777343 , 202312 },
586 { 72469168 , 202755 },
587 { 73165837 , 203199 },
588 { 73867373 , 203642 },
589 { 74573795 , 204086 },
590 { 75285124 , 204529 },
591 { 76001380 , 204973 },
592 { 76722586 , 205417 },
593 { 77448761 , 205861 },
594 { 78179926 , 206306 },
595 { 78916102 , 206750 },
596 { 79657310 , 207194 },
597 { 80403571 , 207639 },
598 { 81154906 , 208084 },
599 { 81911335 , 208529 },
600 { 82672880 , 208974 },
601 { 83439562 , 209419 },
602 { 84211402 , 209864 },
603 { 84988421 , 210309 },
604 { 85770640 , 210755 },
605 { 86558080 , 211201 },
606 { 87350762 , 211647 },
607 { 88148708 , 212093 },
608 { 88951938 , 212539 },
609 { 89760475 , 212985 },
610 { 90574339 , 213432 },
611 { 91393551 , 213878 },
612 { 92218133 , 214325 },
613 { 93048107 , 214772 },
614 { 93883493 , 215219 },
615 { 94724314 , 215666 },
616 { 95570590 , 216114 },
617 { 96422343 , 216561 },
618 { 97279594 , 217009 },
619 { 98142366 , 217457 },
620 { 99010679 , 217905 },
621 { 99884556 , 218353 },
622 { 100764018 , 218801 },
623 { 101649086 , 219250 },
624 { 102539782 , 219698 },
625 { 103436128 , 220147 },
626 { 104338146 , 220596 },
627 { 105245857 , 221046 },
628 { 106159284 , 221495 },
629 { 107078448 , 221945 },
630 { 108003370 , 222394 },
631 { 108934074 , 222844 },
632 { 109870580 , 223294 },
633 { 110812910 , 223745 },
634 { 111761087 , 224195 },
635 { 112715133 , 224646 },
636 { 113675069 , 225097 },
637 { 114640918 , 225548 },
638 { 115612702 , 225999 },
639 { 116590442 , 226450 },
640 { 117574162 , 226902 },
641 { 118563882 , 227353 },
642 { 119559626 , 227805 },
643 { 120561415 , 228258 },
644 { 121569272 , 228710 },
645 { 122583219 , 229162 },
646 { 123603278 , 229615 },
647 { 124629471 , 230068 },
648 { 125661822 , 230521 },
649 { 126700352 , 230974 },
650 { 127745083 , 231428 },
651 { 128796039 , 231882 },
652 { 129853241 , 232336 },
653 { 130916713 , 232790 },
654 { 131986475 , 233244 },
655 { 133062553 , 233699 },
656 { 134144966 , 234153 },
657 { 135233739 , 234608 },
658 { 136328894 , 235064 },
659 { 137430453 , 235519 },
660 { 138538440 , 235975 },
661 { 139652876 , 236430 },
662 { 140773786 , 236886 },
663 { 141901190 , 237343 },
664 { 143035113 , 237799 },
665 { 144175576 , 238256 },
666 { 145322604 , 238713 },
667 { 146476218 , 239170 },
668 { 147636442 , 239627 },
669 { 148803298 , 240085 },
670 { 149976809 , 240542 },
671 { 151156999 , 241000 },
672 { 152343890 , 241459 },
673 { 153537506 , 241917 },
674 { 154737869 , 242376 },
675 { 155945002 , 242835 },
676 { 157158929 , 243294 },
677 { 158379673 , 243753 },
678 { 159607257 , 244213 },
679 { 160841704 , 244673 },
680 { 162083037 , 245133 },
681 { 163331279 , 245593 },
682 { 164586455 , 246054 },
683 { 165848586 , 246514 },
684 { 167117696 , 246975 },
685 { 168393810 , 247437 },
686 { 169676949 , 247898 },
687 { 170967138 , 248360 },
688 { 172264399 , 248822 },
689 { 173568757 , 249284 },
690 { 174880235 , 249747 },
691 { 176198856 , 250209 },
692 { 177524643 , 250672 },
693 { 178857621 , 251136 },
694 { 180197813 , 251599 },
695 { 181545242 , 252063 },
696 { 182899933 , 252527 },
697 { 184261908 , 252991 },
698 { 185631191 , 253456 },
699 { 187007807 , 253920 },
700 { 188391778 , 254385 },
701 { 189783129 , 254851 },
702 { 191181884 , 255316 },
703 { 192588065 , 255782 },
704 { 194001698 , 256248 },
705 { 195422805 , 256714 },
706 { 196851411 , 257181 },
707 { 198287540 , 257648 },
708 { 199731215 , 258115 },
709 { 201182461 , 258582 },
710 { 202641302 , 259050 },
711 { 204107760 , 259518 },
712 { 205581862 , 259986 },
713 { 207063630 , 260454 },
714 { 208553088 , 260923 },
715 { 210050262 , 261392 },
716 { 211555174 , 261861 },
717 { 213067849 , 262331 },
718 { 214588312 , 262800 },
719 { 216116586 , 263270 },
720 { 217652696 , 263741 },
721 { 219196666 , 264211 },
722 { 220748520 , 264682 },
723 { 222308282 , 265153 },
724 { 223875978 , 265625 },
725 { 225451630 , 266097 },
726 { 227035265 , 266569 },
727 { 228626905 , 267041 },
728 { 230226576 , 267514 },
729 { 231834302 , 267986 },
730 { 233450107 , 268460 },
731 { 235074016 , 268933 },
732 { 236706054 , 269407 },
733 { 238346244 , 269881 },
734 { 239994613 , 270355 },
735 { 241651183 , 270830 },
736 { 243315981 , 271305 }
737};
738
739/* Calculate the send rate as per section 3.1 of RFC3448
740
741Returns send rate in bytes per second
742
743Integer maths and lookups are used as not allowed floating point in kernel
744
745The function for Xcalc as per section 3.1 of RFC3448 is:
746
747X = s
748 -------------------------------------------------------------
749 R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
750
751where
752X is the trasmit rate in bytes/second
753s is the packet size in bytes
754R is the round trip time in seconds
755p is the loss event rate, between 0 and 1.0, of the number of loss events
756 as a fraction of the number of packets transmitted
757t_RTO is the TCP retransmission timeout value in seconds
758b is the number of packets acknowledged by a single TCP acknowledgement
759
760we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
761
762X = s
763 -----------------------------------------------------------------------
764 R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
765
766
767which we can break down into:
768
769X = s
770 --------
771 R * f(p)
772
773where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
774
775Function parameters:
776s - bytes
777R - RTT in usecs
778p - loss rate (decimal fraction multiplied by 1,000,000)
779
780Returns Xcalc in bytes per second
781
782DON'T alter this code unless you run test cases against it as the code
783has been manipulated to stop underflow/overlow.
784
785*/
786static u32 ccid3_calc_x(u16 s, u32 R, u32 p)
787{
788 int index;
789 u32 f;
790 u64 tmp1, tmp2;
791
792 if (p < CALCX_SPLIT)
793 index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1;
794 else
795 index = (p / (1000000 / CALCX_ARRSIZE)) - 1;
796
797 if (index < 0)
798 /* p should be 0 unless there is a bug in my code */
799 index = 0;
800
801 if (R == 0)
802 R = 1; /* RTT can't be zero or else divide by zero */
803
804 BUG_ON(index >= CALCX_ARRSIZE);
805
806 if (p >= CALCX_SPLIT)
807 f = calcx_lookup[index][0];
808 else
809 f = calcx_lookup[index][1];
810
811 tmp1 = ((u64)s * 100000000);
812 tmp2 = ((u64)R * (u64)f);
813 do_div(tmp2,10000);
814 do_div(tmp1,tmp2);
815 /* don't alter above math unless you test due to overflow on 32 bit */
816
817 return (u32)tmp1;
818}
819
820/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
821static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
822{
823 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK)
824 return;
825 /* if no feedback spec says t_ipi is 1 second (set elsewhere and then
826 * doubles after every no feedback timer (separate function) */
827
828 if (hctx->ccid3hctx_x < 10) {
829 ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n");
830 hctx->ccid3hctx_x = 10;
831 }
832 hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000)
833 / (hctx->ccid3hctx_x / 10);
834 /* reason for above maths with 10 in there is to avoid 32 bit
835 * overflow for jumbo packets */
836
837}
838
839/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
840static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
841{
842 hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
843
844}
845
846/*
847 * Update X by
848 * If (p > 0)
849 * x_calc = calcX(s, R, p);
850 * X = max(min(X_calc, 2 * X_recv), s / t_mbi);
851 * Else
852 * If (now - tld >= R)
853 * X = max(min(2 * X, 2 * X_recv), s / R);
854 * tld = now;
855 */
856static void ccid3_hc_tx_update_x(struct sock *sk)
857{
858 struct dccp_sock *dp = dccp_sk(sk);
859 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
860
861 if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */
862 hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s,
863 hctx->ccid3hctx_rtt,
864 hctx->ccid3hctx_p);
865 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv),
866 hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME);
867 } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) {
868 u32 rtt = hctx->ccid3hctx_rtt;
869 if (rtt < 10) {
870 rtt = 10;
871 } /* avoid divide by zero below */
872
873 hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x),
874 (hctx->ccid3hctx_s * 100000) / (rtt / 10));
875 /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */
876 do_gettimeofday(&hctx->ccid3hctx_t_ld);
877 }
878
879 if (hctx->ccid3hctx_x == 0) {
880 ccid3_pr_debug("ccid3hctx_x = 0!\n");
881 hctx->ccid3hctx_x = 1;
882 }
883}
884
885static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
886{
887 struct sock *sk = (struct sock *)data;
888 struct dccp_sock *dp = dccp_sk(sk);
889 unsigned long next_tmout = 0;
890 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
891 u32 rtt;
892
893 bh_lock_sock(sk);
894 if (sock_owned_by_user(sk)) {
895 /* Try again later. */
896 /* XXX: set some sensible MIB */
897 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5);
898 goto out;
899 }
900
901 ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
902 ccid3_tx_state_name(hctx->ccid3hctx_state));
903
904 if (hctx->ccid3hctx_x < 10) {
905 ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n");
906 hctx->ccid3hctx_x = 10;
907 }
908
909 switch (hctx->ccid3hctx_state) {
910 case TFRC_SSTATE_TERM:
911 goto out;
912 case TFRC_SSTATE_NO_FBACK:
913 /* Halve send rate */
914 hctx->ccid3hctx_x /= 2;
915 if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME))
916 hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME;
917
918 ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n",
919 dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state),
920 hctx->ccid3hctx_x);
921 next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000)
922 / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT);
923 /* do above maths with 100000 and 10 to prevent overflow on 32 bit */
924 /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11
925 * should adjust tx_t_ipi and double that to achieve it really */
926 break;
927 case TFRC_SSTATE_FBACK:
928 /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */
929 rtt = hctx->ccid3hctx_rtt;
930 if (rtt < 10)
931 rtt = 10;
932 /* stop divide by zero below */
933 if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >=
934 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) {
935 ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk,
936 ccid3_tx_state_name(hctx->ccid3hctx_state));
937 /* Halve sending rate */
938
939 /* If (X_calc > 2 * X_recv)
940 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
941 * Else
942 * X_recv = X_calc / 4;
943 */
944 BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0);
945
946 /* check also if p is zero -> x_calc is infinity? */
947 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
948 hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
949 hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
950 hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
951 else
952 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
953
954 /* Update sending rate */
955 ccid3_hc_tx_update_x(sk);
956 }
957 if (hctx->ccid3hctx_x == 0) {
958 ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n");
959 hctx->ccid3hctx_x = 10;
960 }
961 /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
962 next_tmout = max_t(u32, inet_csk(sk)->icsk_rto,
963 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10));
964 break;
965 default:
966 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
967 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
968 dump_stack();
969 goto out;
970 }
971
972 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
973 jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
974 hctx->ccid3hctx_idle = 1;
975out:
976 bh_unlock_sock(sk);
977 sock_put(sk);
978}
979
980static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb,
981 int len, long *delay)
982{
983 struct dccp_sock *dp = dccp_sk(sk);
984 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
985 struct ccid3_tx_hist_entry *new_packet = NULL;
986 struct timeval now;
987 int rc = -ENOTCONN;
988
989// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len);
990 /*
991 * check if pure ACK or Terminating */
992 /* XXX: We only call this function for DATA and DATAACK, on, these packets can have
993 * zero length, but why the comment about "pure ACK"?
994 */
995 if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
996 goto out;
997
998 /* See if last packet allocated was not sent */
999 if (!list_empty(&hctx->ccid3hctx_hist))
1000 new_packet = list_entry(hctx->ccid3hctx_hist.next,
1001 struct ccid3_tx_hist_entry, ccid3htx_node);
1002
1003 if (new_packet == NULL || new_packet->ccid3htx_sent) {
1004 new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC);
1005
1006 rc = -ENOBUFS;
1007 if (new_packet == NULL) {
1008 ccid3_pr_debug("%s, sk=%p, not enough mem to add "
1009 "to history, send refused\n", dccp_role(sk), sk);
1010 goto out;
1011 }
1012
1013 list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist);
1014 }
1015
1016 do_gettimeofday(&now);
1017
1018 switch (hctx->ccid3hctx_state) {
1019 case TFRC_SSTATE_NO_SENT:
1020 ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk,
1021 dp->dccps_gss);
1022
1023 hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
1024 hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
1025 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
1026 hctx->ccid3hctx_last_win_count = 0;
1027 hctx->ccid3hctx_t_last_win_count = now;
1028 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
1029 hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
1030
1031 /* Set nominal send time for initial packet */
1032 hctx->ccid3hctx_t_nom = now;
1033 (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
1034 timeval_fix(&(hctx->ccid3hctx_t_nom));
1035 ccid3_calc_new_delta(hctx);
1036 rc = 0;
1037 break;
1038 case TFRC_SSTATE_NO_FBACK:
1039 case TFRC_SSTATE_FBACK:
1040 *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta);
1041 ccid3_pr_debug("send_packet delay=%ld\n",*delay);
1042 *delay /= -1000;
1043 /* divide by -1000 is to convert to ms and get sign right */
1044 rc = *delay > 0 ? -EAGAIN : 0;
1045 break;
1046 default:
1047 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1048 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
1049 dump_stack();
1050 rc = -EINVAL;
1051 break;
1052 }
1053
1054 /* Can we send? if so add options and add to packet history */
1055 if (rc == 0)
1056 new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
1057out:
1058 return rc;
1059}
1060
1061static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
1062{
1063 struct dccp_sock *dp = dccp_sk(sk);
1064 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1065 struct ccid3_tx_hist_entry *packet = NULL;
1066 struct timeval now;
1067
1068// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len);
1069 BUG_ON(hctx == NULL);
1070
1071 if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
1072 ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
1073 dccp_role(sk), sk);
1074 return;
1075 }
1076
1077 do_gettimeofday(&now);
1078
1079 /* check if we have sent a data packet */
1080 if (len > 0) {
1081 unsigned long quarter_rtt;
1082
1083 if (list_empty(&hctx->ccid3hctx_hist)) {
1084 printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__);
1085 return;
1086 }
1087 packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node);
1088 if (packet->ccid3htx_sent) {
1089 printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__);
1090 return;
1091 }
1092 packet->ccid3htx_tstamp = now;
1093 packet->ccid3htx_seqno = dp->dccps_gss;
1094 // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno);
1095
1096 /*
1097 * Check if win_count have changed */
1098 /* COMPLIANCE_BEGIN
1099 * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt
1100 */
1101 quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4);
1102 if (quarter_rtt > 0) {
1103 hctx->ccid3hctx_t_last_win_count = now;
1104 hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
1105 min_t(unsigned long, quarter_rtt, 5)) % 16;
1106 ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n",
1107 dccp_role(sk), sk,
1108 packet->ccid3htx_win_count,
1109 hctx->ccid3hctx_last_win_count);
1110 }
1111 /* COMPLIANCE_END */
1112#if 0
1113 ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n",
1114 dccp_role(sk), sk,
1115 packet->ccid3htx_seqno,
1116 packet->ccid3htx_win_count);
1117#endif
1118 hctx->ccid3hctx_idle = 0;
1119 packet->ccid3htx_sent = 1;
1120 } else
1121 ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
1122 dccp_role(sk), sk, dp->dccps_gss);
1123
1124 switch (hctx->ccid3hctx_state) {
1125 case TFRC_SSTATE_NO_SENT:
1126 /* if first wasn't pure ack */
1127 if (len != 0)
1128 printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n",
1129 __FUNCTION__, dccp_role(sk));
1130 return;
1131 case TFRC_SSTATE_NO_FBACK:
1132 case TFRC_SSTATE_FBACK:
1133 if (len > 0) {
1134 hctx->ccid3hctx_t_nom = now;
1135 ccid3_calc_new_t_ipi(hctx);
1136 ccid3_calc_new_delta(hctx);
1137 (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
1138 timeval_fix(&(hctx->ccid3hctx_t_nom));
1139 }
1140 break;
1141 default:
1142 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1143 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
1144 dump_stack();
1145 break;
1146 }
1147}
1148
1149static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
1150{
1151 struct dccp_sock *dp = dccp_sk(sk);
1152 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1153 struct ccid3_options_received *opt_recv;
1154 struct ccid3_tx_hist_entry *entry, *next, *packet;
1155 unsigned long next_tmout;
1156 u16 t_elapsed;
1157 u32 pinv;
1158 u32 x_recv;
1159 u32 r_sample;
1160#if 0
1161 ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
1162 dccp_role(sk), sk, dccp_state_name(sk->sk_state),
1163 skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
1164#endif
1165 if (hctx == NULL)
1166 return;
1167
1168 if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
1169 ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk);
1170 return;
1171 }
1172
1173 /* we are only interested in ACKs */
1174 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
1175 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
1176 return;
1177
1178 opt_recv = &hctx->ccid3hctx_options_received;
1179
1180 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
1181 x_recv = opt_recv->ccid3or_receive_rate;
1182 pinv = opt_recv->ccid3or_loss_event_rate;
1183
1184 switch (hctx->ccid3hctx_state) {
1185 case TFRC_SSTATE_NO_SENT:
1186 /* FIXME: what to do here? */
1187 return;
1188 case TFRC_SSTATE_NO_FBACK:
1189 case TFRC_SSTATE_FBACK:
1190 /* Calculate new round trip sample by
1191 * R_sample = (now - t_recvdata) - t_delay */
1192 /* get t_recvdata from history */
1193 packet = NULL;
1194 list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node)
1195 if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) {
1196 packet = entry;
1197 break;
1198 }
1199
1200 if (packet == NULL) {
1201 ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n",
1202 dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq,
1203 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
1204 return;
1205 }
1206
1207 /* Update RTT */
1208 r_sample = now_delta(packet->ccid3htx_tstamp);
1209 /* FIXME: */
1210 // r_sample -= usecs_to_jiffies(t_elapsed * 10);
1211
1212 /* Update RTT estimate by
1213 * If (No feedback recv)
1214 * R = R_sample;
1215 * Else
1216 * R = q * R + (1 - q) * R_sample;
1217 *
1218 * q is a constant, RFC 3448 recomments 0.9
1219 */
1220 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
1221 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
1222 hctx->ccid3hctx_rtt = r_sample;
1223 } else
1224 hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10;
1225
1226 /*
1227 * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent
1228 * implemention of the new window count.
1229 */
1230 if (hctx->ccid3hctx_rtt < 4)
1231 hctx->ccid3hctx_rtt = 4;
1232
1233 ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n",
1234 dccp_role(sk), sk,
1235 hctx->ccid3hctx_rtt,
1236 r_sample);
1237
1238 /* Update timeout interval */
1239 inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC);
1240
1241 /* Update receive rate */
1242 hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */
1243
1244 /* Update loss event rate */
1245 if (pinv == ~0 || pinv == 0)
1246 hctx->ccid3hctx_p = 0;
1247 else {
1248 hctx->ccid3hctx_p = 1000000 / pinv;
1249
1250 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
1251 hctx->ccid3hctx_p = TFRC_SMALLEST_P;
1252 ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk);
1253 }
1254 }
1255
1256 /* unschedule no feedback timer */
1257 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
1258
1259 /* Update sending rate */
1260 ccid3_hc_tx_update_x(sk);
1261
1262 /* Update next send time */
1263 if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) {
1264 (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC;
1265 (hctx->ccid3hctx_t_nom).tv_sec--;
1266 }
1267 /* FIXME - if no feedback then t_ipi can go > 1 second */
1268 (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi;
1269 ccid3_calc_new_t_ipi(hctx);
1270 (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
1271 timeval_fix(&(hctx->ccid3hctx_t_nom));
1272 ccid3_calc_new_delta(hctx);
1273
1274 /* remove all packets older than the one acked from history */
1275#if 0
1276 FIXME!
1277 list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
1278 list_del_init(&entry->ccid3htx_node);
1279 ccid3_tx_hist_entry_delete(entry);
1280 }
1281#endif
1282 if (hctx->ccid3hctx_x < 10) {
1283 ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n");
1284 hctx->ccid3hctx_x = 10;
1285 }
1286 /* to prevent divide by zero below */
1287
1288 /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
1289 next_tmout = max(inet_csk(sk)->icsk_rto,
1290 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10));
1291 /* maths with 100000 and 10 is to prevent overflow with 32 bit */
1292
1293 ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n",
1294 dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout);
1295
1296 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
1297 jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout)));
1298
1299 /* set idle flag */
1300 hctx->ccid3hctx_idle = 1;
1301 break;
1302 default:
1303 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1304 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
1305 dump_stack();
1306 break;
1307 }
1308}
1309
1310static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
1311{
1312 const struct dccp_sock *dp = dccp_sk(sk);
1313 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1314
1315 if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
1316 return;
1317
1318 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
1319}
1320
1321static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
1322 unsigned char len, u16 idx, unsigned char *value)
1323{
1324 int rc = 0;
1325 struct dccp_sock *dp = dccp_sk(sk);
1326 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1327 struct ccid3_options_received *opt_recv;
1328
1329 if (hctx == NULL)
1330 return 0;
1331
1332 opt_recv = &hctx->ccid3hctx_options_received;
1333
1334 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
1335 opt_recv->ccid3or_seqno = dp->dccps_gsr;
1336 opt_recv->ccid3or_loss_event_rate = ~0;
1337 opt_recv->ccid3or_loss_intervals_idx = 0;
1338 opt_recv->ccid3or_loss_intervals_len = 0;
1339 opt_recv->ccid3or_receive_rate = 0;
1340 }
1341
1342 switch (option) {
1343 case TFRC_OPT_LOSS_EVENT_RATE:
1344 if (len != 4) {
1345 ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n",
1346 dccp_role(sk), sk);
1347 rc = -EINVAL;
1348 } else {
1349 opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
1350 ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
1351 dccp_role(sk), sk,
1352 opt_recv->ccid3or_loss_event_rate);
1353 }
1354 break;
1355 case TFRC_OPT_LOSS_INTERVALS:
1356 opt_recv->ccid3or_loss_intervals_idx = idx;
1357 opt_recv->ccid3or_loss_intervals_len = len;
1358 ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
1359 dccp_role(sk), sk,
1360 opt_recv->ccid3or_loss_intervals_idx,
1361 opt_recv->ccid3or_loss_intervals_len);
1362 break;
1363 case TFRC_OPT_RECEIVE_RATE:
1364 if (len != 4) {
1365 ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n",
1366 dccp_role(sk), sk);
1367 rc = -EINVAL;
1368 } else {
1369 opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
1370 ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
1371 dccp_role(sk), sk,
1372 opt_recv->ccid3or_receive_rate);
1373 }
1374 break;
1375 }
1376
1377 return rc;
1378}
1379
1380static int ccid3_hc_tx_init(struct sock *sk)
1381{
1382 struct dccp_sock *dp = dccp_sk(sk);
1383 struct ccid3_hc_tx_sock *hctx;
1384
1385 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1386
1387 hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
1388 if (hctx == NULL)
1389 return -ENOMEM;
1390
1391 memset(hctx, 0, sizeof(*hctx));
1392
1393 if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
1394 dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
1395 hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size;
1396 else
1397 hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
1398
1399 hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */
1400 hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */
1401 inet_csk(sk)->icsk_rto = USEC_IN_SEC;
1402 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
1403 INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
1404 init_timer(&hctx->ccid3hctx_no_feedback_timer);
1405
1406 return 0;
1407}
1408
1409static void ccid3_hc_tx_exit(struct sock *sk)
1410{
1411 struct dccp_sock *dp = dccp_sk(sk);
1412 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1413 struct ccid3_tx_hist_entry *entry, *next;
1414
1415 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1416 BUG_ON(hctx == NULL);
1417
1418 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
1419 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
1420
1421 /* Empty packet history */
1422 list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
1423 list_del_init(&entry->ccid3htx_node);
1424 ccid3_tx_hist_entry_delete(entry);
1425 }
1426
1427 kfree(dp->dccps_hc_tx_ccid_private);
1428 dp->dccps_hc_tx_ccid_private = NULL;
1429}
1430
1431/*
1432 * RX Half Connection methods
1433 */
1434
1435/* TFRC receiver states */
1436enum ccid3_hc_rx_states {
1437 TFRC_RSTATE_NO_DATA = 1,
1438 TFRC_RSTATE_DATA,
1439 TFRC_RSTATE_TERM = 127,
1440};
1441
1442#ifdef CCID3_DEBUG
1443static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
1444{
1445 static char *ccid3_rx_state_names[] = {
1446 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
1447 [TFRC_RSTATE_DATA] = "DATA",
1448 [TFRC_RSTATE_TERM] = "TERM",
1449 };
1450
1451 return ccid3_rx_state_names[state];
1452}
1453#endif
1454
1455static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state)
1456{
1457 struct dccp_sock *dp = dccp_sk(sk);
1458 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1459 enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
1460
1461 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
1462 dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state));
1463 WARN_ON(state == oldstate);
1464 hcrx->ccid3hcrx_state = state;
1465}
1466
1467static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet)
1468{
1469 struct dccp_sock *dp = dccp_sk(sk);
1470 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1471 struct ccid3_rx_hist_entry *entry, *next;
1472 u8 num_later = 0;
1473
1474 if (list_empty(&hcrx->ccid3hcrx_hist))
1475 list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
1476 else {
1477 u64 seqno = packet->ccid3hrx_seqno;
1478 struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next,
1479 struct ccid3_rx_hist_entry,
1480 ccid3hrx_node);
1481 if (after48(seqno, iter->ccid3hrx_seqno))
1482 list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
1483 else {
1484 if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
1485 iter->ccid3hrx_type == DCCP_PKT_DATAACK)
1486 num_later = 1;
1487
1488 list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1489 if (after48(seqno, iter->ccid3hrx_seqno)) {
1490 list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node);
1491 goto trim_history;
1492 }
1493
1494 if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
1495 iter->ccid3hrx_type == DCCP_PKT_DATAACK)
1496 num_later++;
1497
1498 if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
1499 ccid3_rx_hist_entry_delete(packet);
1500 ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n",
1501 dccp_role(sk), sk, seqno);
1502 return 1;
1503 }
1504 }
1505
1506 if (num_later < TFRC_RECV_NUM_LATE_LOSS)
1507 list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
1508 /* FIXME: else what? should we destroy the packet like above? */
1509 }
1510 }
1511
1512trim_history:
1513 /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */
1514 num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
1515
1516 if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
1517 list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1518 if (num_later == 0) {
1519 list_del_init(&entry->ccid3hrx_node);
1520 ccid3_rx_hist_entry_delete(entry);
1521 } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1522 entry->ccid3hrx_type == DCCP_PKT_DATAACK)
1523 --num_later;
1524 }
1525 } else {
1526 int step = 0;
1527 u8 win_count = 0; /* Not needed, but lets shut up gcc */
1528 int tmp;
1529 /*
1530 * We have no loss interval history so we need at least one
1531 * rtt:s of data packets to approximate rtt.
1532 */
1533 list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1534 if (num_later == 0) {
1535 switch (step) {
1536 case 0:
1537 step = 1;
1538 /* OK, find next data packet */
1539 num_later = 1;
1540 break;
1541 case 1:
1542 step = 2;
1543 /* OK, find next data packet */
1544 num_later = 1;
1545 win_count = entry->ccid3hrx_win_count;
1546 break;
1547 case 2:
1548 tmp = win_count - entry->ccid3hrx_win_count;
1549 if (tmp < 0)
1550 tmp += TFRC_WIN_COUNT_LIMIT;
1551 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
1552 /* we have found a packet older than one rtt
1553 * remove the rest */
1554 step = 3;
1555 } else /* OK, find next data packet */
1556 num_later = 1;
1557 break;
1558 case 3:
1559 list_del_init(&entry->ccid3hrx_node);
1560 ccid3_rx_hist_entry_delete(entry);
1561 break;
1562 }
1563 } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1564 entry->ccid3hrx_type == DCCP_PKT_DATAACK)
1565 --num_later;
1566 }
1567 }
1568
1569 return 0;
1570}
1571
1572static void ccid3_hc_rx_send_feedback(struct sock *sk)
1573{
1574 struct dccp_sock *dp = dccp_sk(sk);
1575 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1576 struct ccid3_rx_hist_entry *entry, *packet;
1577
1578 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1579
1580 switch (hcrx->ccid3hcrx_state) {
1581 case TFRC_RSTATE_NO_DATA:
1582 hcrx->ccid3hcrx_x_recv = 0;
1583 break;
1584 case TFRC_RSTATE_DATA: {
1585 u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
1586
1587 if (delta == 0)
1588 delta = 1; /* to prevent divide by zero */
1589 hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
1590 }
1591 break;
1592 default:
1593 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1594 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
1595 dump_stack();
1596 return;
1597 }
1598
1599 packet = NULL;
1600 list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node)
1601 if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1602 entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
1603 packet = entry;
1604 break;
1605 }
1606
1607 if (packet == NULL) {
1608 printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
1609 __FUNCTION__, dccp_role(sk), sk);
1610 dump_stack();
1611 return;
1612 }
1613
1614 do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback));
1615 hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count;
1616 hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno;
1617 hcrx->ccid3hcrx_bytes_recv = 0;
1618
1619 /* Convert to multiples of 10us */
1620 hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10;
1621 if (hcrx->ccid3hcrx_p == 0)
1622 hcrx->ccid3hcrx_pinv = ~0;
1623 else
1624 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
1625 dccp_send_ack(sk);
1626}
1627
1628static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
1629{
1630 const struct dccp_sock *dp = dccp_sk(sk);
1631 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1632
1633 if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
1634 return;
1635
1636 if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb))
1637 dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time);
1638
1639 if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
1640 const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
1641 const u32 pinv = htonl(hcrx->ccid3hcrx_pinv);
1642
1643 dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv));
1644 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv));
1645 }
1646
1647 DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
1648}
1649
1650/* Weights used to calculate loss event rate */
1651/*
1652 * These are integers as per section 8 of RFC3448. We can then divide by 4 *
1653 * when we use it.
1654 */
1655const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, };
1656
1657/*
1658 * args: fvalue - function value to match
1659 * returns: p closest to that value
1660 *
1661 * both fvalue and p are multiplied by 1,000,000 to use ints
1662 */
1663u32 calcx_reverse_lookup(u32 fvalue) {
1664 int ctr = 0;
1665 int small;
1666
1667 if (fvalue < calcx_lookup[0][1])
1668 return 0;
1669 if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1])
1670 small = 1;
1671 else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0])
1672 return 1000000;
1673 else
1674 small = 0;
1675 while (fvalue > calcx_lookup[ctr][small])
1676 ctr++;
1677 if (small)
1678 return (CALCX_SPLIT * ctr / CALCX_ARRSIZE);
1679 else
1680 return (1000000 * ctr / CALCX_ARRSIZE) ;
1681}
1682
1683/* calculate first loss interval
1684 *
1685 * returns estimated loss interval in usecs */
1686
1687static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
1688{
1689 struct dccp_sock *dp = dccp_sk(sk);
1690 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1691 struct ccid3_rx_hist_entry *entry, *next, *tail = NULL;
1692 u32 rtt, delta, x_recv, fval, p, tmp2;
1693 struct timeval tstamp, tmp_tv;
1694 int interval = 0;
1695 int win_count = 0;
1696 int step = 0;
1697 u64 tmp1;
1698
1699 list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1700 if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1701 entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
1702 tail = entry;
1703
1704 switch (step) {
1705 case 0:
1706 tstamp = entry->ccid3hrx_tstamp;
1707 win_count = entry->ccid3hrx_win_count;
1708 step = 1;
1709 break;
1710 case 1:
1711 interval = win_count - entry->ccid3hrx_win_count;
1712 if (interval < 0)
1713 interval += TFRC_WIN_COUNT_LIMIT;
1714 if (interval > 4)
1715 goto found;
1716 break;
1717 }
1718 }
1719 }
1720
1721 if (step == 0) {
1722 printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n",
1723 __FUNCTION__, dccp_role(sk), sk);
1724 return ~0;
1725 }
1726
1727 if (interval == 0) {
1728 ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n",
1729 dccp_role(sk), sk);
1730 interval = 1;
1731 }
1732found:
1733 timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv);
1734 rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval;
1735 ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
1736 dccp_role(sk), sk, rtt);
1737 if (rtt == 0)
1738 rtt = 1;
1739
1740 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
1741 if (delta == 0)
1742 delta = 1;
1743
1744 x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
1745
1746 tmp1 = (u64)x_recv * (u64)rtt;
1747 do_div(tmp1,10000000);
1748 tmp2 = (u32)tmp1;
1749 fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
1750 /* do not alter order above or you will get overflow on 32 bit */
1751 p = calcx_reverse_lookup(fval);
1752 ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\
1753 dccp_role(sk), sk, x_recv, p);
1754
1755 if (p == 0)
1756 return ~0;
1757 else
1758 return 1000000 / p;
1759}
1760
1761static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
1762{
1763 struct dccp_sock *dp = dccp_sk(sk);
1764 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1765 struct ccid3_loss_interval_hist_entry *li_entry;
1766
1767 if (seq_loss != DCCP_MAX_SEQNO + 1) {
1768 ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n",
1769 dccp_role(sk), sk, seq_loss, win_loss);
1770
1771 if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
1772 struct ccid3_loss_interval_hist_entry *li_tail = NULL;
1773 int i;
1774
1775 ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk);
1776 for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) {
1777 li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC);
1778 if (li_entry == NULL) {
1779 ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
1780 ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n",
1781 dccp_role(sk), sk);
1782 return;
1783 }
1784 if (li_tail == NULL)
1785 li_tail = li_entry;
1786 list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist);
1787 }
1788
1789 li_entry->ccid3lih_seqno = seq_loss;
1790 li_entry->ccid3lih_win_count = win_loss;
1791
1792 li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk);
1793 }
1794 }
1795 /* FIXME: find end of interval */
1796}
1797
1798static void ccid3_hc_rx_detect_loss(struct sock *sk)
1799{
1800 struct dccp_sock *dp = dccp_sk(sk);
1801 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1802 struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet;
1803 struct ccid3_rx_hist_entry *a_loss = NULL;
1804 struct ccid3_rx_hist_entry *b_loss = NULL;
1805 u64 seq_loss = DCCP_MAX_SEQNO + 1;
1806 u8 win_loss = 0;
1807 u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
1808
1809 list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1810 if (num_later == 0) {
1811 b_loss = entry;
1812 break;
1813 } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1814 entry->ccid3hrx_type == DCCP_PKT_DATAACK)
1815 --num_later;
1816 }
1817
1818 if (b_loss == NULL)
1819 goto out_update_li;
1820
1821 a_next = b_next;
1822 num_later = 1;
1823#if 0
1824 FIXME MERGE GIT!
1825 list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1826 if (num_later == 0) {
1827 a_loss = entry;
1828 break;
1829 } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
1830 entry->ccid3hrx_type == DCCP_PKT_DATAACK)
1831 --num_later;
1832 }
1833#endif
1834
1835 if (a_loss == NULL) {
1836 if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
1837 /* no loss event have occured yet */
1838 ccid3_pr_debug("%s, sk=%p, TODO: find a lost data "
1839 "packet by comparing to initial seqno\n",
1840 dccp_role(sk), sk);
1841 goto out_update_li;
1842 } else {
1843 pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history",
1844 __FUNCTION__, dccp_role(sk), sk);
1845 return;
1846 }
1847 }
1848
1849 /* Locate a lost data packet */
1850 entry = packet = b_loss;
1851#if 0
1852 FIXME MERGE GIT!
1853 list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
1854 u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno);
1855
1856 if (delta != 0) {
1857 if (packet->ccid3hrx_type == DCCP_PKT_DATA ||
1858 packet->ccid3hrx_type == DCCP_PKT_DATAACK)
1859 --delta;
1860 /*
1861 * FIXME: check this, probably this % usage is because
1862 * in earlier drafts the ndp count was just 8 bits
1863 * long, but now it cam be up to 24 bits long.
1864 */
1865#if 0
1866 if (delta % DCCP_NDP_LIMIT !=
1867 (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT)
1868#endif
1869 if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) {
1870 seq_loss = entry->ccid3hrx_seqno;
1871 dccp_inc_seqno(&seq_loss);
1872 }
1873 }
1874 packet = entry;
1875 if (packet == a_loss)
1876 break;
1877 }
1878#endif
1879
1880 if (seq_loss != DCCP_MAX_SEQNO + 1)
1881 win_loss = a_loss->ccid3hrx_win_count;
1882
1883out_update_li:
1884 ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
1885}
1886
1887static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk)
1888{
1889 struct dccp_sock *dp = dccp_sk(sk);
1890 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1891 struct ccid3_loss_interval_hist_entry *li_entry, *li_next;
1892 int i = 0;
1893 u32 i_tot;
1894 u32 i_tot0 = 0;
1895 u32 i_tot1 = 0;
1896 u32 w_tot = 0;
1897
1898 list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) {
1899 if (i < TFRC_RECV_IVAL_F_LENGTH) {
1900 i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i];
1901 w_tot += ccid3_hc_rx_w[i];
1902 }
1903
1904 if (i != 0)
1905 i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1];
1906
1907 if (++i > TFRC_RECV_IVAL_F_LENGTH)
1908 break;
1909 }
1910
1911 if (i != TFRC_RECV_IVAL_F_LENGTH) {
1912 pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n",
1913 __FUNCTION__, dccp_role(sk), sk);
1914 return 0;
1915 }
1916
1917 i_tot = max(i_tot0, i_tot1);
1918
1919 /* FIXME: Why do we do this? -Ian McDonald */
1920 if (i_tot * 4 < w_tot)
1921 i_tot = w_tot * 4;
1922
1923 return i_tot * 4 / w_tot;
1924}
1925
1926static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
1927{
1928 struct dccp_sock *dp = dccp_sk(sk);
1929 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1930 struct ccid3_rx_hist_entry *packet;
1931 struct timeval now;
1932 u8 win_count;
1933 u32 p_prev;
1934 int ins;
1935#if 0
1936 ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
1937 dccp_role(sk), sk, dccp_state_name(sk->sk_state),
1938 skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
1939#endif
1940 if (hcrx == NULL)
1941 return;
1942
1943 BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
1944 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
1945
1946 switch (DCCP_SKB_CB(skb)->dccpd_type) {
1947 case DCCP_PKT_ACK:
1948 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
1949 return;
1950 case DCCP_PKT_DATAACK:
1951 if (dp->dccps_options_received.dccpor_timestamp_echo == 0)
1952 break;
1953 p_prev = hcrx->ccid3hcrx_rtt;
1954 do_gettimeofday(&now);
1955 /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo -
1956 usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10);
1957 FIXME - I think above code is broken - have to look at options more, will also need
1958 to fix pr_debug below */
1959 if (p_prev != hcrx->ccid3hcrx_rtt)
1960 ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n",
1961 dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
1962 dp->dccps_options_received.dccpor_timestamp_echo,
1963 dp->dccps_options_received.dccpor_elapsed_time);
1964 break;
1965 case DCCP_PKT_DATA:
1966 break;
1967 default:
1968 ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
1969 dccp_role(sk), sk,
1970 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
1971 return;
1972 }
1973
1974 packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC);
1975 if (packet == NULL) {
1976 ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!",
1977 dccp_role(sk), sk);
1978 return;
1979 }
1980
1981 win_count = packet->ccid3hrx_win_count;
1982
1983 ins = ccid3_hc_rx_add_hist(sk, packet);
1984
1985 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
1986 return;
1987
1988 switch (hcrx->ccid3hcrx_state) {
1989 case TFRC_RSTATE_NO_DATA:
1990 ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n",
1991 dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
1992 ccid3_hc_rx_send_feedback(sk);
1993 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
1994 return;
1995 case TFRC_RSTATE_DATA:
1996 hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4;
1997 if (ins == 0) {
1998 do_gettimeofday(&now);
1999 if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) {
2000 hcrx->ccid3hcrx_tstamp_last_ack = now;
2001 ccid3_hc_rx_send_feedback(sk);
2002 }
2003 return;
2004 }
2005 break;
2006 default:
2007 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
2008 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
2009 dump_stack();
2010 return;
2011 }
2012
2013 /* Dealing with packet loss */
2014 ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n",
2015 dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
2016
2017 ccid3_hc_rx_detect_loss(sk);
2018 p_prev = hcrx->ccid3hcrx_p;
2019
2020 /* Calculate loss event rate */
2021 if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist))
2022 /* Scaling up by 1000000 as fixed decimal */
2023 hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk);
2024
2025 if (hcrx->ccid3hcrx_p > p_prev) {
2026 ccid3_hc_rx_send_feedback(sk);
2027 return;
2028 }
2029}
2030
2031static int ccid3_hc_rx_init(struct sock *sk)
2032{
2033 struct dccp_sock *dp = dccp_sk(sk);
2034 struct ccid3_hc_rx_sock *hcrx;
2035
2036 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
2037
2038 hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
2039 if (hcrx == NULL)
2040 return -ENOMEM;
2041
2042 memset(hcrx, 0, sizeof(*hcrx));
2043
2044 if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
2045 dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
2046 hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size;
2047 else
2048 hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
2049
2050 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
2051 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
2052 INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist);
2053
2054 return 0;
2055}
2056
2057static void ccid3_hc_rx_exit(struct sock *sk)
2058{
2059 struct dccp_sock *dp = dccp_sk(sk);
2060 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
2061
2062 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
2063
2064 if (hcrx == NULL)
2065 return;
2066
2067 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
2068
2069 /* Empty packet history */
2070 ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist);
2071
2072 /* Empty loss interval history */
2073 ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
2074
2075 kfree(dp->dccps_hc_rx_ccid_private);
2076 dp->dccps_hc_rx_ccid_private = NULL;
2077}
2078
2079static struct ccid ccid3 = {
2080 .ccid_id = 3,
2081 .ccid_name = "ccid3",
2082 .ccid_owner = THIS_MODULE,
2083 .ccid_init = ccid3_init,
2084 .ccid_exit = ccid3_exit,
2085 .ccid_hc_tx_init = ccid3_hc_tx_init,
2086 .ccid_hc_tx_exit = ccid3_hc_tx_exit,
2087 .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
2088 .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
2089 .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
2090 .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
2091 .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
2092 .ccid_hc_rx_init = ccid3_hc_rx_init,
2093 .ccid_hc_rx_exit = ccid3_hc_rx_exit,
2094 .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
2095 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
2096};
2097
2098module_param(ccid3_debug, int, 0444);
2099MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
2100
2101static __init int ccid3_module_init(void)
2102{
2103 int rc = -ENOMEM;
2104
2105 ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history",
2106 sizeof(struct ccid3_tx_hist_entry), 0,
2107 SLAB_HWCACHE_ALIGN, NULL, NULL);
2108 if (ccid3_tx_hist_slab == NULL)
2109 goto out;
2110
2111 ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history",
2112 sizeof(struct ccid3_rx_hist_entry), 0,
2113 SLAB_HWCACHE_ALIGN, NULL, NULL);
2114 if (ccid3_rx_hist_slab == NULL)
2115 goto out_free_tx_history;
2116
2117 ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history",
2118 sizeof(struct ccid3_loss_interval_hist_entry), 0,
2119 SLAB_HWCACHE_ALIGN, NULL, NULL);
2120 if (ccid3_loss_interval_hist_slab == NULL)
2121 goto out_free_rx_history;
2122
2123 rc = ccid_register(&ccid3);
2124 if (rc != 0)
2125 goto out_free_loss_interval_history;
2126
2127out:
2128 return rc;
2129out_free_loss_interval_history:
2130 kmem_cache_destroy(ccid3_loss_interval_hist_slab);
2131 ccid3_loss_interval_hist_slab = NULL;
2132out_free_rx_history:
2133 kmem_cache_destroy(ccid3_rx_hist_slab);
2134 ccid3_rx_hist_slab = NULL;
2135out_free_tx_history:
2136 kmem_cache_destroy(ccid3_tx_hist_slab);
2137 ccid3_tx_hist_slab = NULL;
2138 goto out;
2139}
2140module_init(ccid3_module_init);
2141
2142static __exit void ccid3_module_exit(void)
2143{
2144 ccid_unregister(&ccid3);
2145
2146 if (ccid3_tx_hist_slab != NULL) {
2147 kmem_cache_destroy(ccid3_tx_hist_slab);
2148 ccid3_tx_hist_slab = NULL;
2149 }
2150 if (ccid3_rx_hist_slab != NULL) {
2151 kmem_cache_destroy(ccid3_rx_hist_slab);
2152 ccid3_rx_hist_slab = NULL;
2153 }
2154 if (ccid3_loss_interval_hist_slab != NULL) {
2155 kmem_cache_destroy(ccid3_loss_interval_hist_slab);
2156 ccid3_loss_interval_hist_slab = NULL;
2157 }
2158}
2159module_exit(ccid3_module_exit);
2160
2161MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
2162MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
2163MODULE_LICENSE("GPL");
2164MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644
index 000000000000..5d6b623e64da
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,137 @@
1/*
2 * net/dccp/ccids/ccid3.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36#ifndef _DCCP_CCID3_H_
37#define _DCCP_CCID3_H_
38
39#include <linux/types.h>
40#include <linux/list.h>
41#include <linux/timer.h>
42
43struct ccid3_tx_hist_entry {
44 struct list_head ccid3htx_node;
45 u64 ccid3htx_seqno:48,
46 ccid3htx_win_count:8,
47 ccid3htx_sent:1;
48 struct timeval ccid3htx_tstamp;
49};
50
51struct ccid3_options_received {
52 u64 ccid3or_seqno:48,
53 ccid3or_loss_intervals_idx:16;
54 u16 ccid3or_loss_intervals_len;
55 u32 ccid3or_loss_event_rate;
56 u32 ccid3or_receive_rate;
57};
58
59/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block
60 *
61 * @ccid3hctx_state - Sender state
62 * @ccid3hctx_x - Current sending rate
63 * @ccid3hctx_x_recv - Receive rate
64 * @ccid3hctx_x_calc - Calculated send (?) rate
65 * @ccid3hctx_s - Packet size
66 * @ccid3hctx_rtt - Estimate of current round trip time in usecs
67 * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
68 * @ccid3hctx_last_win_count - Last window counter sent
69 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent
70 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
71 * @ccid3hctx_idle - FIXME
72 * @ccid3hctx_t_ld - Time last doubled during slow start
73 * @ccid3hctx_t_nom - Nominal send time of next packet
74 * @ccid3hctx_t_ipi - Interpacket (send) interval
75 * @ccid3hctx_delta - Send timer delta
76 * @ccid3hctx_hist - Packet history
77 */
78struct ccid3_hc_tx_sock {
79 u32 ccid3hctx_x;
80 u32 ccid3hctx_x_recv;
81 u32 ccid3hctx_x_calc;
82 u16 ccid3hctx_s;
83 u32 ccid3hctx_rtt;
84 u32 ccid3hctx_p;
85 u8 ccid3hctx_state;
86 u8 ccid3hctx_last_win_count;
87 u8 ccid3hctx_idle;
88 struct timeval ccid3hctx_t_last_win_count;
89 struct timer_list ccid3hctx_no_feedback_timer;
90 struct timeval ccid3hctx_t_ld;
91 struct timeval ccid3hctx_t_nom;
92 u32 ccid3hctx_t_ipi;
93 u32 ccid3hctx_delta;
94 struct list_head ccid3hctx_hist;
95 struct ccid3_options_received ccid3hctx_options_received;
96};
97
98struct ccid3_loss_interval_hist_entry {
99 struct list_head ccid3lih_node;
100 u64 ccid3lih_seqno:48,
101 ccid3lih_win_count:4;
102 u32 ccid3lih_interval;
103};
104
105struct ccid3_rx_hist_entry {
106 struct list_head ccid3hrx_node;
107 u64 ccid3hrx_seqno:48,
108 ccid3hrx_win_count:4,
109 ccid3hrx_type:4;
110 u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */
111 struct timeval ccid3hrx_tstamp;
112};
113
114struct ccid3_hc_rx_sock {
115 u64 ccid3hcrx_seqno_last_counter:48,
116 ccid3hcrx_state:8,
117 ccid3hcrx_last_counter:4;
118 unsigned long ccid3hcrx_rtt;
119 u32 ccid3hcrx_p;
120 u32 ccid3hcrx_bytes_recv;
121 struct timeval ccid3hcrx_tstamp_last_feedback;
122 struct timeval ccid3hcrx_tstamp_last_ack;
123 struct list_head ccid3hcrx_hist;
124 struct list_head ccid3hcrx_loss_interval_hist;
125 u16 ccid3hcrx_s;
126 u32 ccid3hcrx_pinv;
127 u32 ccid3hcrx_elapsed_time;
128 u32 ccid3hcrx_x_recv;
129};
130
131#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
132 ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
133
134#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
135 ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
136
137#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644
index 000000000000..fb83454102c1
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,422 @@
1#ifndef _DCCP_H
2#define _DCCP_H
3/*
4 * net/dccp/dccp.h
5 *
6 * An implementation of the DCCP protocol
7 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/dccp.h>
15#include <net/snmp.h>
16#include <net/sock.h>
17#include <net/tcp.h>
18
19#define DCCP_DEBUG
20
21#ifdef DCCP_DEBUG
22extern int dccp_debug;
23
24#define dccp_pr_debug(format, a...) \
25 do { if (dccp_debug) \
26 printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
27 } while (0)
28#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0)
29#else
30#define dccp_pr_debug(format, a...)
31#define dccp_pr_debug_cat(format, a...)
32#endif
33
34extern struct inet_hashinfo dccp_hashinfo;
35
36extern atomic_t dccp_orphan_count;
37extern int dccp_tw_count;
38extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
39
40extern void dccp_time_wait(struct sock *sk, int state, int timeo);
41
42/* FIXME: Right size this */
43#define DCCP_MAX_OPT_LEN 128
44
45#define DCCP_MAX_PACKET_HDR 32
46
47#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
48
49#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
50 * state, about 60 seconds */
51
52/* draft-ietf-dccp-spec-11.txt initial RTO value */
53#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
54
55/* Maximal interval between probes for local resources. */
56#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
57
58#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
59
60extern struct proto dccp_v4_prot;
61
62/* is seq1 < seq2 ? */
63static inline const int before48(const u64 seq1, const u64 seq2)
64{
65 return (const s64)((seq1 << 16) - (seq2 << 16)) < 0;
66}
67
68/* is seq1 > seq2 ? */
69static inline const int after48(const u64 seq1, const u64 seq2)
70{
71 return (const s64)((seq2 << 16) - (seq1 << 16)) < 0;
72}
73
74/* is seq2 <= seq1 <= seq3 ? */
75static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3)
76{
77 return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
78}
79
80static inline u64 max48(const u64 seq1, const u64 seq2)
81{
82 return after48(seq1, seq2) ? seq1 : seq2;
83}
84
85enum {
86 DCCP_MIB_NUM = 0,
87 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
88 DCCP_MIB_ESTABRESETS, /* EstabResets */
89 DCCP_MIB_CURRESTAB, /* CurrEstab */
90 DCCP_MIB_OUTSEGS, /* OutSegs */
91 DCCP_MIB_OUTRSTS,
92 DCCP_MIB_ABORTONTIMEOUT,
93 DCCP_MIB_TIMEOUTS,
94 DCCP_MIB_ABORTFAILED,
95 DCCP_MIB_PASSIVEOPENS,
96 DCCP_MIB_ATTEMPTFAILS,
97 DCCP_MIB_OUTDATAGRAMS,
98 DCCP_MIB_INERRS,
99 DCCP_MIB_OPTMANDATORYERROR,
100 DCCP_MIB_INVALIDOPT,
101 __DCCP_MIB_MAX
102};
103
104#define DCCP_MIB_MAX __DCCP_MIB_MAX
105struct dccp_mib {
106 unsigned long mibs[DCCP_MIB_MAX];
107} __SNMP_MIB_ALIGN__;
108
109DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
110#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
111#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
112#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
113#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
114#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val)
115#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val)
116
117extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
118extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
119
120extern int dccp_send_response(struct sock *sk);
121extern void dccp_send_ack(struct sock *sk);
122extern void dccp_send_delayed_ack(struct sock *sk);
123extern void dccp_send_sync(struct sock *sk, u64 seq);
124
125extern void dccp_init_xmit_timers(struct sock *sk);
126static inline void dccp_clear_xmit_timers(struct sock *sk)
127{
128 inet_csk_clear_xmit_timers(sk);
129}
130
131extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
132
133extern const char *dccp_packet_name(const int type);
134extern const char *dccp_state_name(const int state);
135
136static inline void dccp_set_state(struct sock *sk, const int state)
137{
138 const int oldstate = sk->sk_state;
139
140 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
141 dccp_role(sk), sk,
142 dccp_state_name(oldstate), dccp_state_name(state));
143 WARN_ON(state == oldstate);
144
145 switch (state) {
146 case DCCP_OPEN:
147 if (oldstate != DCCP_OPEN)
148 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
149 break;
150
151 case DCCP_CLOSED:
152 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
153 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
154
155 sk->sk_prot->unhash(sk);
156 if (inet_csk(sk)->icsk_bind_hash != NULL &&
157 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
158 inet_put_port(&dccp_hashinfo, sk);
159 /* fall through */
160 default:
161 if (oldstate == DCCP_OPEN)
162 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
163 }
164
165 /* Change state AFTER socket is unhashed to avoid closed
166 * socket sitting in hash tables.
167 */
168 sk->sk_state = state;
169}
170
171static inline void dccp_done(struct sock *sk)
172{
173 dccp_set_state(sk, DCCP_CLOSED);
174 dccp_clear_xmit_timers(sk);
175
176 sk->sk_shutdown = SHUTDOWN_MASK;
177
178 if (!sock_flag(sk, SOCK_DEAD))
179 sk->sk_state_change(sk);
180 else
181 inet_csk_destroy_sock(sk);
182}
183
184static inline void dccp_openreq_init(struct request_sock *req,
185 struct dccp_sock *dp,
186 struct sk_buff *skb)
187{
188 /*
189 * FIXME: fill in the other req fields from the DCCP options
190 * received
191 */
192 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
193 inet_rsk(req)->acked = 0;
194 req->rcv_wnd = 0;
195}
196
197extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len,
198 struct sk_buff *skb);
199extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
200
201extern struct sock *dccp_create_openreq_child(struct sock *sk,
202 const struct request_sock *req,
203 const struct sk_buff *skb);
204
205extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
206
207extern void dccp_v4_err(struct sk_buff *skb, u32);
208
209extern int dccp_v4_rcv(struct sk_buff *skb);
210
211extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
212 struct sk_buff *skb,
213 struct request_sock *req,
214 struct dst_entry *dst);
215extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
216 struct request_sock *req,
217 struct request_sock **prev);
218
219extern int dccp_child_process(struct sock *parent, struct sock *child,
220 struct sk_buff *skb);
221extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
222 struct dccp_hdr *dh, unsigned len);
223extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
224 const struct dccp_hdr *dh, const unsigned len);
225
226extern void dccp_close(struct sock *sk, long timeout);
227extern struct sk_buff *dccp_make_response(struct sock *sk,
228 struct dst_entry *dst,
229 struct request_sock *req);
230
231extern int dccp_connect(struct sock *sk);
232extern int dccp_disconnect(struct sock *sk, int flags);
233extern int dccp_getsockopt(struct sock *sk, int level, int optname,
234 char *optval, int *optlen);
235extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
236extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
237 size_t size);
238extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
239 struct msghdr *msg, size_t len, int nonblock,
240 int flags, int *addr_len);
241extern int dccp_setsockopt(struct sock *sk, int level, int optname,
242 char *optval, int optlen);
243extern void dccp_shutdown(struct sock *sk, int how);
244
245extern int dccp_v4_checksum(struct sk_buff *skb);
246
247extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code);
248extern void dccp_send_close(struct sock *sk);
249
250struct dccp_skb_cb {
251 __u8 dccpd_type;
252 __u8 dccpd_reset_code;
253 __u8 dccpd_service;
254 __u8 dccpd_ccval;
255 __u64 dccpd_seq;
256 __u64 dccpd_ack_seq;
257 int dccpd_opt_len;
258};
259
260#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
261
262static inline int dccp_non_data_packet(const struct sk_buff *skb)
263{
264 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
265
266 return type == DCCP_PKT_ACK ||
267 type == DCCP_PKT_CLOSE ||
268 type == DCCP_PKT_CLOSEREQ ||
269 type == DCCP_PKT_RESET ||
270 type == DCCP_PKT_SYNC ||
271 type == DCCP_PKT_SYNCACK;
272}
273
274static inline int dccp_packet_without_ack(const struct sk_buff *skb)
275{
276 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
277
278 return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
279}
280
281#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
282#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
283
284static inline void dccp_set_seqno(u64 *seqno, u64 value)
285{
286 if (value > DCCP_MAX_SEQNO)
287 value -= DCCP_MAX_SEQNO + 1;
288 *seqno = value;
289}
290
291static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
292{
293 return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
294}
295
296static inline void dccp_inc_seqno(u64 *seqno)
297{
298 if (++*seqno > DCCP_MAX_SEQNO)
299 *seqno = 0;
300}
301
302static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
303{
304 struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh));
305
306#if defined(__LITTLE_ENDIAN_BITFIELD)
307 dh->dccph_seq = htonl((gss >> 32)) >> 8;
308#elif defined(__BIG_ENDIAN_BITFIELD)
309 dh->dccph_seq = htonl((gss >> 32));
310#else
311#error "Adjust your <asm/byteorder.h> defines"
312#endif
313 dhx->dccph_seq_low = htonl(gss & 0xffffffff);
314}
315
316static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr)
317{
318#if defined(__LITTLE_ENDIAN_BITFIELD)
319 dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
320#elif defined(__BIG_ENDIAN_BITFIELD)
321 dhack->dccph_ack_nr_high = htonl((gsr >> 32));
322#else
323#error "Adjust your <asm/byteorder.h> defines"
324#endif
325 dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
326}
327
328static inline void dccp_update_gsr(struct sock *sk, u64 seq)
329{
330 struct dccp_sock *dp = dccp_sk(sk);
331 u64 tmp_gsr;
332
333 dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4));
334 dp->dccps_gsr = seq;
335 dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr));
336 dccp_set_seqno(&dp->dccps_swh,
337 dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4);
338}
339
340static inline void dccp_update_gss(struct sock *sk, u64 seq)
341{
342 struct dccp_sock *dp = dccp_sk(sk);
343 u64 tmp_gss;
344
345 dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1);
346 dp->dccps_awl = max48(tmp_gss, dp->dccps_iss);
347 dp->dccps_awh = dp->dccps_gss = seq;
348}
349
350extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
351extern void dccp_insert_option_elapsed_time(struct sock *sk,
352 struct sk_buff *skb,
353 u32 elapsed_time);
354extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
355 unsigned char option,
356 const void *value, unsigned char len);
357
358extern struct socket *dccp_ctl_socket;
359
360#define DCCP_ACKPKTS_STATE_RECEIVED 0
361#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
362#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
363
364#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
365#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
366
367/** struct dccp_ackpkts - acknowledgeable packets
368 *
369 * This data structure is the one defined in the DCCP draft
370 * Appendix A.
371 *
372 * @dccpap_buf_head - circular buffer head
373 * @dccpap_buf_tail - circular buffer tail
374 * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head)
375 * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0
376 *
377 * Additionally, the HC-Receiver must keep some information about the
378 * Ack Vectors it has recently sent. For each packet sent carrying an
379 * Ack Vector, it remembers four variables:
380 *
381 * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno)
382 * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
383 * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno)
384 * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
385 *
386 * @dccpap_buf_len - circular buffer length
387 * @dccpap_buf - circular buffer of acknowledgeable packets
388 */
389struct dccp_ackpkts {
390 unsigned int dccpap_buf_head;
391 unsigned int dccpap_buf_tail;
392 u64 dccpap_buf_ackno;
393 u64 dccpap_ack_seqno;
394 u64 dccpap_ack_ackno;
395 unsigned int dccpap_ack_ptr;
396 unsigned int dccpap_buf_vector_len;
397 unsigned int dccpap_ack_vector_len;
398 unsigned int dccpap_buf_len;
399 unsigned long dccpap_time;
400 u8 dccpap_buf_nonce;
401 u8 dccpap_ack_nonce;
402 u8 dccpap_buf[0];
403};
404
405extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority);
406extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
407extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
408extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
409 struct sock *sk, u64 ackno);
410
411#ifdef DCCP_DEBUG
412extern void dccp_ackvector_print(const u64 ackno,
413 const unsigned char *vector, int len);
414extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
415#else
416static inline void dccp_ackvector_print(const u64 ackno,
417 const unsigned char *vector,
418 int len) { }
419static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
420#endif
421
422#endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644
index 000000000000..622e976a51fe
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,510 @@
1/*
2 * net/dccp/input.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static void dccp_fin(struct sock *sk, struct sk_buff *skb)
23{
24 sk->sk_shutdown |= RCV_SHUTDOWN;
25 sock_set_flag(sk, SOCK_DONE);
26 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
27 __skb_queue_tail(&sk->sk_receive_queue, skb);
28 skb_set_owner_r(skb, sk);
29 sk->sk_data_ready(sk, 0);
30}
31
32static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
33{
34 switch (sk->sk_state) {
35 case DCCP_PARTOPEN:
36 case DCCP_OPEN:
37 dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
38 dccp_fin(sk, skb);
39 dccp_set_state(sk, DCCP_CLOSED);
40 break;
41 }
42}
43
44static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
45{
46 /*
47 * Step 7: Check for unexpected packet types
48 * If (S.is_server and P.type == CloseReq)
49 * Send Sync packet acknowledging P.seqno
50 * Drop packet and return
51 */
52 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
53 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
54 return;
55 }
56
57 switch (sk->sk_state) {
58 case DCCP_PARTOPEN:
59 case DCCP_OPEN:
60 dccp_set_state(sk, DCCP_CLOSING);
61 dccp_send_close(sk);
62 break;
63 }
64}
65
66static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
67{
68 struct dccp_sock *dp = dccp_sk(sk);
69
70 if (dp->dccps_options.dccpo_send_ack_vector)
71 dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
72 DCCP_SKB_CB(skb)->dccpd_ack_seq);
73}
74
75static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
76{
77 const struct dccp_hdr *dh = dccp_hdr(skb);
78 struct dccp_sock *dp = dccp_sk(sk);
79 u64 lswl = dp->dccps_swl;
80 u64 lawl = dp->dccps_awl;
81
82 /*
83 * Step 5: Prepare sequence numbers for Sync
84 * If P.type == Sync or P.type == SyncAck,
85 * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
86 * / * P is valid, so update sequence number variables
87 * accordingly. After this update, P will pass the tests
88 * in Step 6. A SyncAck is generated if necessary in
89 * Step 15 * /
90 * Update S.GSR, S.SWL, S.SWH
91 * Otherwise,
92 * Drop packet and return
93 */
94 if (dh->dccph_type == DCCP_PKT_SYNC ||
95 dh->dccph_type == DCCP_PKT_SYNCACK) {
96 if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) &&
97 !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
98 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
99 else
100 return -1;
101 /*
102 * Step 6: Check sequence numbers
103 * Let LSWL = S.SWL and LAWL = S.AWL
104 * If P.type == CloseReq or P.type == Close or P.type == Reset,
105 * LSWL := S.GSR + 1, LAWL := S.GAR
106 * If LSWL <= P.seqno <= S.SWH
107 * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
108 * Update S.GSR, S.SWL, S.SWH
109 * If P.type != Sync,
110 * Update S.GAR
111 * Otherwise,
112 * Send Sync packet acknowledging P.seqno
113 * Drop packet and return
114 */
115 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
116 dh->dccph_type == DCCP_PKT_CLOSE ||
117 dh->dccph_type == DCCP_PKT_RESET) {
118 lswl = dp->dccps_gsr;
119 dccp_inc_seqno(&lswl);
120 lawl = dp->dccps_gar;
121 }
122
123 if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
124 (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
125 between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) {
126 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
127
128 if (dh->dccph_type != DCCP_PKT_SYNC &&
129 DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
130 dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
131 } else {
132 dccp_pr_debug("Step 6 failed, sending SYNC...\n");
133 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
134 return -1;
135 }
136
137 return 0;
138}
139
140int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
141 const struct dccp_hdr *dh, const unsigned len)
142{
143 struct dccp_sock *dp = dccp_sk(sk);
144
145 if (dccp_check_seqno(sk, skb))
146 goto discard;
147
148 if (dccp_parse_options(sk, skb))
149 goto discard;
150
151 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
152 dccp_event_ack_recv(sk, skb);
153
154 /*
155 * FIXME: check ECN to see if we should use
156 * DCCP_ACKPKTS_STATE_ECN_MARKED
157 */
158 if (dp->dccps_options.dccpo_send_ack_vector) {
159 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
160
161 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
162 DCCP_SKB_CB(skb)->dccpd_seq,
163 DCCP_ACKPKTS_STATE_RECEIVED)) {
164 LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n"));
165 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
166 inet_csk_schedule_ack(sk);
167 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
168 goto discard;
169 }
170
171 /*
172 * FIXME: this activation is probably wrong, have to study more
173 * TCP delack machinery and how it fits into DCCP draft, but
174 * for now it kinda "works" 8)
175 */
176 if (!inet_csk_ack_scheduled(sk)) {
177 inet_csk_schedule_ack(sk);
178 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX);
179 }
180 }
181
182 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
183 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
184
185 switch (dccp_hdr(skb)->dccph_type) {
186 case DCCP_PKT_DATAACK:
187 case DCCP_PKT_DATA:
188 /*
189 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option
190 * if it is.
191 */
192 __skb_pull(skb, dh->dccph_doff * 4);
193 __skb_queue_tail(&sk->sk_receive_queue, skb);
194 skb_set_owner_r(skb, sk);
195 sk->sk_data_ready(sk, 0);
196 return 0;
197 case DCCP_PKT_ACK:
198 goto discard;
199 case DCCP_PKT_RESET:
200 /*
201 * Step 9: Process Reset
202 * If P.type == Reset,
203 * Tear down connection
204 * S.state := TIMEWAIT
205 * Set TIMEWAIT timer
206 * Drop packet and return
207 */
208 dccp_fin(sk, skb);
209 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
210 return 0;
211 case DCCP_PKT_CLOSEREQ:
212 dccp_rcv_closereq(sk, skb);
213 goto discard;
214 case DCCP_PKT_CLOSE:
215 dccp_rcv_close(sk, skb);
216 return 0;
217 case DCCP_PKT_REQUEST:
218 /* Step 7
219 * or (S.is_server and P.type == Response)
220 * or (S.is_client and P.type == Request)
221 * or (S.state >= OPEN and P.type == Request
222 * and P.seqno >= S.OSR)
223 * or (S.state >= OPEN and P.type == Response
224 * and P.seqno >= S.OSR)
225 * or (S.state == RESPOND and P.type == Data),
226 * Send Sync packet acknowledging P.seqno
227 * Drop packet and return
228 */
229 if (dp->dccps_role != DCCP_ROLE_LISTEN)
230 goto send_sync;
231 goto check_seq;
232 case DCCP_PKT_RESPONSE:
233 if (dp->dccps_role != DCCP_ROLE_CLIENT)
234 goto send_sync;
235check_seq:
236 if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
237send_sync:
238 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
239 }
240 break;
241 }
242
243 DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
244discard:
245 __kfree_skb(skb);
246 return 0;
247}
248
249static int dccp_rcv_request_sent_state_process(struct sock *sk,
250 struct sk_buff *skb,
251 const struct dccp_hdr *dh,
252 const unsigned len)
253{
254 /*
255 * Step 4: Prepare sequence numbers in REQUEST
256 * If S.state == REQUEST,
257 * If (P.type == Response or P.type == Reset)
258 * and S.AWL <= P.ackno <= S.AWH,
259 * / * Set sequence number variables corresponding to the
260 * other endpoint, so P will pass the tests in Step 6 * /
261 * Set S.GSR, S.ISR, S.SWL, S.SWH
262 * / * Response processing continues in Step 10; Reset
263 * processing continues in Step 9 * /
264 */
265 if (dh->dccph_type == DCCP_PKT_RESPONSE) {
266 const struct inet_connection_sock *icsk = inet_csk(sk);
267 struct dccp_sock *dp = dccp_sk(sk);
268
269 /* Stop the REQUEST timer */
270 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
271 BUG_TRAP(sk->sk_send_head != NULL);
272 __kfree_skb(sk->sk_send_head);
273 sk->sk_send_head = NULL;
274
275 if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) {
276 dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n",
277 dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh);
278 goto out_invalid_packet;
279 }
280
281 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
282 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
283
284 if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
285 ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
286 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
287 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
288 /* FIXME: send appropriate RESET code */
289 goto out_invalid_packet;
290 }
291
292 dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
293
294 /*
295 * Step 10: Process REQUEST state (second part)
296 * If S.state == REQUEST,
297 * / * If we get here, P is a valid Response from the server (see
298 * Step 4), and we should move to PARTOPEN state. PARTOPEN
299 * means send an Ack, don't send Data packets, retransmit
300 * Acks periodically, and always include any Init Cookie from
301 * the Response * /
302 * S.state := PARTOPEN
303 * Set PARTOPEN timer
304 * Continue with S.state == PARTOPEN
305 * / * Step 12 will send the Ack completing the three-way
306 * handshake * /
307 */
308 dccp_set_state(sk, DCCP_PARTOPEN);
309
310 /* Make sure socket is routed, for correct metrics. */
311 inet_sk_rebuild_header(sk);
312
313 if (!sock_flag(sk, SOCK_DEAD)) {
314 sk->sk_state_change(sk);
315 sk_wake_async(sk, 0, POLL_OUT);
316 }
317
318 if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
319 icsk->icsk_accept_queue.rskq_defer_accept) {
320 /* Save one ACK. Data will be ready after
321 * several ticks, if write_pending is set.
322 *
323 * It may be deleted, but with this feature tcpdumps
324 * look so _wonderfully_ clever, that I was not able
325 * to stand against the temptation 8) --ANK
326 */
327 /*
328 * OK, in DCCP we can as well do a similar trick, its
329 * even in the draft, but there is no need for us to
330 * schedule an ack here, as dccp_sendmsg does this for
331 * us, also stated in the draft. -acme
332 */
333 __kfree_skb(skb);
334 return 0;
335 }
336 dccp_send_ack(sk);
337 return -1;
338 }
339
340out_invalid_packet:
341 return 1; /* dccp_v4_do_rcv will send a reset, but...
342 FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */
343}
344
345static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
346 struct sk_buff *skb,
347 const struct dccp_hdr *dh,
348 const unsigned len)
349{
350 int queued = 0;
351
352 switch (dh->dccph_type) {
353 case DCCP_PKT_RESET:
354 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
355 break;
356 case DCCP_PKT_DATAACK:
357 case DCCP_PKT_ACK:
358 /*
359 * FIXME: we should be reseting the PARTOPEN (DELACK) timer here,
360 * but only if we haven't used the DELACK timer for something else,
361 * like sending a delayed ack for a TIMESTAMP echo, etc, for now
362 * were not clearing it, sending an extra ACK when there is nothing
363 * else to do in DELACK is not a big deal after all.
364 */
365
366 /* Stop the PARTOPEN timer */
367 if (sk->sk_state == DCCP_PARTOPEN)
368 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
369
370 dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
371 dccp_set_state(sk, DCCP_OPEN);
372
373 if (dh->dccph_type == DCCP_PKT_DATAACK) {
374 dccp_rcv_established(sk, skb, dh, len);
375 queued = 1; /* packet was queued (by dccp_rcv_established) */
376 }
377 break;
378 }
379
380 return queued;
381}
382
383int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
384 struct dccp_hdr *dh, unsigned len)
385{
386 struct dccp_sock *dp = dccp_sk(sk);
387 const int old_state = sk->sk_state;
388 int queued = 0;
389
390 if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) {
391 if (dccp_check_seqno(sk, skb))
392 goto discard;
393
394 /*
395 * Step 8: Process options and mark acknowledgeable
396 */
397 if (dccp_parse_options(sk, skb))
398 goto discard;
399
400 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
401 dccp_event_ack_recv(sk, skb);
402
403 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
404 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
405
406 /*
407 * FIXME: check ECN to see if we should use
408 * DCCP_ACKPKTS_STATE_ECN_MARKED
409 */
410 if (dp->dccps_options.dccpo_send_ack_vector) {
411 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
412 DCCP_SKB_CB(skb)->dccpd_seq,
413 DCCP_ACKPKTS_STATE_RECEIVED))
414 goto discard;
415 /*
416 * FIXME: this activation is probably wrong, have to study more
417 * TCP delack machinery and how it fits into DCCP draft, but
418 * for now it kinda "works" 8)
419 */
420 if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 &&
421 !inet_csk_ack_scheduled(sk)) {
422 inet_csk_schedule_ack(sk);
423 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
424 }
425 }
426 }
427
428 /*
429 * Step 9: Process Reset
430 * If P.type == Reset,
431 * Tear down connection
432 * S.state := TIMEWAIT
433 * Set TIMEWAIT timer
434 * Drop packet and return
435 */
436 if (dh->dccph_type == DCCP_PKT_RESET) {
437 /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */
438 dccp_fin(sk, skb);
439 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
440 return 0;
441 /*
442 * Step 7: Check for unexpected packet types
443 * If (S.is_server and P.type == CloseReq)
444 * or (S.is_server and P.type == Response)
445 * or (S.is_client and P.type == Request)
446 * or (S.state == RESPOND and P.type == Data),
447 * Send Sync packet acknowledging P.seqno
448 * Drop packet and return
449 */
450 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
451 (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
452 (dp->dccps_role == DCCP_ROLE_CLIENT &&
453 dh->dccph_type == DCCP_PKT_REQUEST) ||
454 (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
455 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
456 goto discard;
457 }
458
459 switch (sk->sk_state) {
460 case DCCP_CLOSED:
461 return 1;
462
463 case DCCP_LISTEN:
464 if (dh->dccph_type == DCCP_PKT_ACK ||
465 dh->dccph_type == DCCP_PKT_DATAACK)
466 return 1;
467
468 if (dh->dccph_type == DCCP_PKT_RESET)
469 goto discard;
470
471 if (dh->dccph_type == DCCP_PKT_REQUEST) {
472 if (dccp_v4_conn_request(sk, skb) < 0)
473 return 1;
474
475 /* FIXME: do congestion control initialization */
476 goto discard;
477 }
478 goto discard;
479
480 case DCCP_REQUESTING:
481 /* FIXME: do congestion control initialization */
482
483 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
484 if (queued >= 0)
485 return queued;
486
487 __kfree_skb(skb);
488 return 0;
489
490 case DCCP_RESPOND:
491 case DCCP_PARTOPEN:
492 queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len);
493 break;
494 }
495
496 if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) {
497 switch (old_state) {
498 case DCCP_PARTOPEN:
499 sk->sk_state_change(sk);
500 sk_wake_async(sk, 0, POLL_OUT);
501 break;
502 }
503 }
504
505 if (!queued) {
506discard:
507 __kfree_skb(skb);
508 }
509 return 0;
510}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644
index 000000000000..083bacaecb3b
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1289 @@
1/*
2 * net/dccp/ipv4.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/icmp.h>
16#include <linux/module.h>
17#include <linux/skbuff.h>
18#include <linux/random.h>
19
20#include <net/icmp.h>
21#include <net/inet_hashtables.h>
22#include <net/sock.h>
23#include <net/tcp_states.h>
24#include <net/xfrm.h>
25
26#include "ccid.h"
27#include "dccp.h"
28
29struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
30 .lhash_lock = RW_LOCK_UNLOCKED,
31 .lhash_users = ATOMIC_INIT(0),
32 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
33 .portalloc_lock = SPIN_LOCK_UNLOCKED,
34 .port_rover = 1024 - 1,
35};
36
37static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
38{
39 return inet_csk_get_port(&dccp_hashinfo, sk, snum);
40}
41
42static void dccp_v4_hash(struct sock *sk)
43{
44 inet_hash(&dccp_hashinfo, sk);
45}
46
47static void dccp_v4_unhash(struct sock *sk)
48{
49 inet_unhash(&dccp_hashinfo, sk);
50}
51
52/* called with local bh disabled */
53static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
54 struct inet_timewait_sock **twp)
55{
56 struct inet_sock *inet = inet_sk(sk);
57 const u32 daddr = inet->rcv_saddr;
58 const u32 saddr = inet->daddr;
59 const int dif = sk->sk_bound_dev_if;
60 INET_ADDR_COOKIE(acookie, saddr, daddr)
61 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
62 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size);
63 struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
64 const struct sock *sk2;
65 const struct hlist_node *node;
66 struct inet_timewait_sock *tw;
67
68 write_lock(&head->lock);
69
70 /* Check TIME-WAIT sockets first. */
71 sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
72 tw = inet_twsk(sk2);
73
74 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
75 goto not_unique;
76 }
77 tw = NULL;
78
79 /* And established part... */
80 sk_for_each(sk2, node, &head->chain) {
81 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
82 goto not_unique;
83 }
84
85 /* Must record num and sport now. Otherwise we will see
86 * in hash table socket with a funny identity. */
87 inet->num = lport;
88 inet->sport = htons(lport);
89 sk->sk_hashent = hash;
90 BUG_TRAP(sk_unhashed(sk));
91 __sk_add_node(sk, &head->chain);
92 sock_prot_inc_use(sk->sk_prot);
93 write_unlock(&head->lock);
94
95 if (twp != NULL) {
96 *twp = tw;
97 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
98 } else if (tw != NULL) {
99 /* Silly. Should hash-dance instead... */
100 dccp_tw_deschedule(tw);
101 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
102
103 inet_twsk_put(tw);
104 }
105
106 return 0;
107
108not_unique:
109 write_unlock(&head->lock);
110 return -EADDRNOTAVAIL;
111}
112
113/*
114 * Bind a port for a connect operation and hash it.
115 */
116static int dccp_v4_hash_connect(struct sock *sk)
117{
118 const unsigned short snum = inet_sk(sk)->num;
119 struct inet_bind_hashbucket *head;
120 struct inet_bind_bucket *tb;
121 int ret;
122
123 if (snum == 0) {
124 int rover;
125 int low = sysctl_local_port_range[0];
126 int high = sysctl_local_port_range[1];
127 int remaining = (high - low) + 1;
128 struct hlist_node *node;
129 struct inet_timewait_sock *tw = NULL;
130
131 local_bh_disable();
132
133 /* TODO. Actually it is not so bad idea to remove
134 * dccp_hashinfo.portalloc_lock before next submission to Linus.
135 * As soon as we touch this place at all it is time to think.
136 *
137 * Now it protects single _advisory_ variable dccp_hashinfo.port_rover,
138 * hence it is mostly useless.
139 * Code will work nicely if we just delete it, but
140 * I am afraid in contented case it will work not better or
141 * even worse: another cpu just will hit the same bucket
142 * and spin there.
143 * So some cpu salt could remove both contention and
144 * memory pingpong. Any ideas how to do this in a nice way?
145 */
146 spin_lock(&dccp_hashinfo.portalloc_lock);
147 rover = dccp_hashinfo.port_rover;
148
149 do {
150 rover++;
151 if ((rover < low) || (rover > high))
152 rover = low;
153 head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)];
154 spin_lock(&head->lock);
155
156 /* Does not bother with rcv_saddr checks,
157 * because the established check is already
158 * unique enough.
159 */
160 inet_bind_bucket_for_each(tb, node, &head->chain) {
161 if (tb->port == rover) {
162 BUG_TRAP(!hlist_empty(&tb->owners));
163 if (tb->fastreuse >= 0)
164 goto next_port;
165 if (!__dccp_v4_check_established(sk,
166 rover,
167 &tw))
168 goto ok;
169 goto next_port;
170 }
171 }
172
173 tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover);
174 if (tb == NULL) {
175 spin_unlock(&head->lock);
176 break;
177 }
178 tb->fastreuse = -1;
179 goto ok;
180
181 next_port:
182 spin_unlock(&head->lock);
183 } while (--remaining > 0);
184 dccp_hashinfo.port_rover = rover;
185 spin_unlock(&dccp_hashinfo.portalloc_lock);
186
187 local_bh_enable();
188
189 return -EADDRNOTAVAIL;
190
191ok:
192 /* All locks still held and bhs disabled */
193 dccp_hashinfo.port_rover = rover;
194 spin_unlock(&dccp_hashinfo.portalloc_lock);
195
196 inet_bind_hash(sk, tb, rover);
197 if (sk_unhashed(sk)) {
198 inet_sk(sk)->sport = htons(rover);
199 __inet_hash(&dccp_hashinfo, sk, 0);
200 }
201 spin_unlock(&head->lock);
202
203 if (tw != NULL) {
204 dccp_tw_deschedule(tw);
205 inet_twsk_put(tw);
206 }
207
208 ret = 0;
209 goto out;
210 }
211
212 head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)];
213 tb = inet_csk(sk)->icsk_bind_hash;
214 spin_lock_bh(&head->lock);
215 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
216 __inet_hash(&dccp_hashinfo, sk, 0);
217 spin_unlock_bh(&head->lock);
218 return 0;
219 } else {
220 spin_unlock(&head->lock);
221 /* No definite answer... Walk to established hash table */
222 ret = __dccp_v4_check_established(sk, snum, NULL);
223out:
224 local_bh_enable();
225 return ret;
226 }
227}
228
229static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
230 int addr_len)
231{
232 struct inet_sock *inet = inet_sk(sk);
233 struct dccp_sock *dp = dccp_sk(sk);
234 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
235 struct rtable *rt;
236 u32 daddr, nexthop;
237 int tmp;
238 int err;
239
240 dp->dccps_role = DCCP_ROLE_CLIENT;
241
242 if (addr_len < sizeof(struct sockaddr_in))
243 return -EINVAL;
244
245 if (usin->sin_family != AF_INET)
246 return -EAFNOSUPPORT;
247
248 nexthop = daddr = usin->sin_addr.s_addr;
249 if (inet->opt != NULL && inet->opt->srr) {
250 if (daddr == 0)
251 return -EINVAL;
252 nexthop = inet->opt->faddr;
253 }
254
255 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
256 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
257 IPPROTO_DCCP,
258 inet->sport, usin->sin_port, sk);
259 if (tmp < 0)
260 return tmp;
261
262 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
263 ip_rt_put(rt);
264 return -ENETUNREACH;
265 }
266
267 if (inet->opt == NULL || !inet->opt->srr)
268 daddr = rt->rt_dst;
269
270 if (inet->saddr == 0)
271 inet->saddr = rt->rt_src;
272 inet->rcv_saddr = inet->saddr;
273
274 inet->dport = usin->sin_port;
275 inet->daddr = daddr;
276
277 dp->dccps_ext_header_len = 0;
278 if (inet->opt != NULL)
279 dp->dccps_ext_header_len = inet->opt->optlen;
280 /*
281 * Socket identity is still unknown (sport may be zero).
282 * However we set state to DCCP_REQUESTING and not releasing socket
283 * lock select source port, enter ourselves into the hash tables and
284 * complete initialization after this.
285 */
286 dccp_set_state(sk, DCCP_REQUESTING);
287 err = dccp_v4_hash_connect(sk);
288 if (err != 0)
289 goto failure;
290
291 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
292 if (err != 0)
293 goto failure;
294
295 /* OK, now commit destination to socket. */
296 sk_setup_caps(sk, &rt->u.dst);
297
298 dp->dccps_gar =
299 dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
300 inet->daddr,
301 inet->sport,
302 usin->sin_port);
303 dccp_update_gss(sk, dp->dccps_iss);
304
305 inet->id = dp->dccps_iss ^ jiffies;
306
307 err = dccp_connect(sk);
308 rt = NULL;
309 if (err != 0)
310 goto failure;
311out:
312 return err;
313failure:
314 /* This unhashes the socket and releases the local port, if necessary. */
315 dccp_set_state(sk, DCCP_CLOSED);
316 ip_rt_put(rt);
317 sk->sk_route_caps = 0;
318 inet->dport = 0;
319 goto out;
320}
321
322/*
323 * This routine does path mtu discovery as defined in RFC1191.
324 */
325static inline void dccp_do_pmtu_discovery(struct sock *sk,
326 const struct iphdr *iph,
327 u32 mtu)
328{
329 struct dst_entry *dst;
330 const struct inet_sock *inet = inet_sk(sk);
331 const struct dccp_sock *dp = dccp_sk(sk);
332
333 /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
334 * send out by Linux are always < 576bytes so they should go through
335 * unfragmented).
336 */
337 if (sk->sk_state == DCCP_LISTEN)
338 return;
339
340 /* We don't check in the destentry if pmtu discovery is forbidden
341 * on this route. We just assume that no packet_to_big packets
342 * are send back when pmtu discovery is not active.
343 * There is a small race when the user changes this flag in the
344 * route, but I think that's acceptable.
345 */
346 if ((dst = __sk_dst_check(sk, 0)) == NULL)
347 return;
348
349 dst->ops->update_pmtu(dst, mtu);
350
351 /* Something is about to be wrong... Remember soft error
352 * for the case, if this connection will not able to recover.
353 */
354 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
355 sk->sk_err_soft = EMSGSIZE;
356
357 mtu = dst_mtu(dst);
358
359 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
360 dp->dccps_pmtu_cookie > mtu) {
361 dccp_sync_mss(sk, mtu);
362
363 /*
364 * From: draft-ietf-dccp-spec-11.txt
365 *
366 * DCCP-Sync packets are the best choice for upward probing,
367 * since DCCP-Sync probes do not risk application data loss.
368 */
369 dccp_send_sync(sk, dp->dccps_gsr);
370 } /* else let the usual retransmit timer handle it */
371}
372
373static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
374{
375 int err;
376 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
377 const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
378 sizeof(struct dccp_hdr_ext) +
379 sizeof(struct dccp_hdr_ack_bits);
380 struct sk_buff *skb;
381
382 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
383 return;
384
385 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
386 if (skb == NULL)
387 return;
388
389 /* Reserve space for headers. */
390 skb_reserve(skb, MAX_DCCP_HEADER);
391
392 skb->dst = dst_clone(rxskb->dst);
393
394 skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
395 dh = dccp_hdr(skb);
396 memset(dh, 0, dccp_hdr_ack_len);
397
398 /* Build DCCP header and checksum it. */
399 dh->dccph_type = DCCP_PKT_ACK;
400 dh->dccph_sport = rxdh->dccph_dport;
401 dh->dccph_dport = rxdh->dccph_sport;
402 dh->dccph_doff = dccp_hdr_ack_len / 4;
403 dh->dccph_x = 1;
404
405 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
406 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
407
408 bh_lock_sock(dccp_ctl_socket->sk);
409 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
410 rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
411 bh_unlock_sock(dccp_ctl_socket->sk);
412
413 if (err == NET_XMIT_CN || err == 0) {
414 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
415 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
416 }
417}
418
419static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
420{
421 dccp_v4_ctl_send_ack(skb);
422}
423
424static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
425 struct dst_entry *dst)
426{
427 int err = -1;
428 struct sk_buff *skb;
429
430 /* First, grab a route. */
431
432 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
433 goto out;
434
435 skb = dccp_make_response(sk, dst, req);
436 if (skb != NULL) {
437 const struct inet_request_sock *ireq = inet_rsk(req);
438
439 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
440 ireq->rmt_addr,
441 ireq->opt);
442 if (err == NET_XMIT_CN)
443 err = 0;
444 }
445
446out:
447 dst_release(dst);
448 return err;
449}
450
451/*
452 * This routine is called by the ICMP module when it gets some sort of error
453 * condition. If err < 0 then the socket should be closed and the error
454 * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
455 * After adjustment header points to the first 8 bytes of the tcp header. We
456 * need to find the appropriate port.
457 *
458 * The locking strategy used here is very "optimistic". When someone else
459 * accesses the socket the ICMP is just dropped and for some paths there is no
460 * check at all. A more general error queue to queue errors for later handling
461 * is probably better.
462 */
463void dccp_v4_err(struct sk_buff *skb, u32 info)
464{
465 const struct iphdr *iph = (struct iphdr *)skb->data;
466 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2));
467 struct dccp_sock *dp;
468 struct inet_sock *inet;
469 const int type = skb->h.icmph->type;
470 const int code = skb->h.icmph->code;
471 struct sock *sk;
472 __u64 seq;
473 int err;
474
475 if (skb->len < (iph->ihl << 2) + 8) {
476 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
477 return;
478 }
479
480 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
481 iph->saddr, dh->dccph_sport, inet_iif(skb));
482 if (sk == NULL) {
483 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
484 return;
485 }
486
487 if (sk->sk_state == DCCP_TIME_WAIT) {
488 inet_twsk_put((struct inet_timewait_sock *)sk);
489 return;
490 }
491
492 bh_lock_sock(sk);
493 /* If too many ICMPs get dropped on busy
494 * servers this needs to be solved differently.
495 */
496 if (sock_owned_by_user(sk))
497 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
498
499 if (sk->sk_state == DCCP_CLOSED)
500 goto out;
501
502 dp = dccp_sk(sk);
503 seq = dccp_hdr_seq(skb);
504 if (sk->sk_state != DCCP_LISTEN &&
505 !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
506 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
507 goto out;
508 }
509
510 switch (type) {
511 case ICMP_SOURCE_QUENCH:
512 /* Just silently ignore these. */
513 goto out;
514 case ICMP_PARAMETERPROB:
515 err = EPROTO;
516 break;
517 case ICMP_DEST_UNREACH:
518 if (code > NR_ICMP_UNREACH)
519 goto out;
520
521 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
522 if (!sock_owned_by_user(sk))
523 dccp_do_pmtu_discovery(sk, iph, info);
524 goto out;
525 }
526
527 err = icmp_err_convert[code].errno;
528 break;
529 case ICMP_TIME_EXCEEDED:
530 err = EHOSTUNREACH;
531 break;
532 default:
533 goto out;
534 }
535
536 switch (sk->sk_state) {
537 struct request_sock *req , **prev;
538 case DCCP_LISTEN:
539 if (sock_owned_by_user(sk))
540 goto out;
541 req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
542 iph->daddr, iph->saddr);
543 if (!req)
544 goto out;
545
546 /*
547 * ICMPs are not backlogged, hence we cannot get an established
548 * socket here.
549 */
550 BUG_TRAP(!req->sk);
551
552 if (seq != dccp_rsk(req)->dreq_iss) {
553 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
554 goto out;
555 }
556 /*
557 * Still in RESPOND, just remove it silently.
558 * There is no good way to pass the error to the newly
559 * created socket, and POSIX does not want network
560 * errors returned from accept().
561 */
562 inet_csk_reqsk_queue_drop(sk, req, prev);
563 goto out;
564
565 case DCCP_REQUESTING:
566 case DCCP_RESPOND:
567 if (!sock_owned_by_user(sk)) {
568 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
569 sk->sk_err = err;
570
571 sk->sk_error_report(sk);
572
573 dccp_done(sk);
574 } else
575 sk->sk_err_soft = err;
576 goto out;
577 }
578
579 /* If we've already connected we will keep trying
580 * until we time out, or the user gives up.
581 *
582 * rfc1122 4.2.3.9 allows to consider as hard errors
583 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
584 * but it is obsoleted by pmtu discovery).
585 *
586 * Note, that in modern internet, where routing is unreliable
587 * and in each dark corner broken firewalls sit, sending random
588 * errors ordered by their masters even this two messages finally lose
589 * their original sense (even Linux sends invalid PORT_UNREACHs)
590 *
591 * Now we are in compliance with RFCs.
592 * --ANK (980905)
593 */
594
595 inet = inet_sk(sk);
596 if (!sock_owned_by_user(sk) && inet->recverr) {
597 sk->sk_err = err;
598 sk->sk_error_report(sk);
599 } else /* Only an error on timeout */
600 sk->sk_err_soft = err;
601out:
602 bh_unlock_sock(sk);
603 sock_put(sk);
604}
605
606extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code);
607
608int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
609{
610 struct sk_buff *skb;
611 /*
612 * FIXME: what if rebuild_header fails?
613 * Should we be doing a rebuild_header here?
614 */
615 int err = inet_sk_rebuild_header(sk);
616
617 if (err != 0)
618 return err;
619
620 skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
621 if (skb != NULL) {
622 const struct dccp_sock *dp = dccp_sk(sk);
623 const struct inet_sock *inet = inet_sk(sk);
624
625 err = ip_build_and_send_pkt(skb, sk,
626 inet->saddr, inet->daddr, NULL);
627 if (err == NET_XMIT_CN)
628 err = 0;
629
630 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
631 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
632 }
633
634 return err;
635}
636
637static inline u64 dccp_v4_init_sequence(const struct sock *sk,
638 const struct sk_buff *skb)
639{
640 return secure_dccp_sequence_number(skb->nh.iph->daddr,
641 skb->nh.iph->saddr,
642 dccp_hdr(skb)->dccph_dport,
643 dccp_hdr(skb)->dccph_sport);
644}
645
646int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
647{
648 struct inet_request_sock *ireq;
649 struct dccp_sock dp;
650 struct request_sock *req;
651 struct dccp_request_sock *dreq;
652 const __u32 saddr = skb->nh.iph->saddr;
653 const __u32 daddr = skb->nh.iph->daddr;
654 struct dst_entry *dst = NULL;
655
656 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
657 if (((struct rtable *)skb->dst)->rt_flags &
658 (RTCF_BROADCAST | RTCF_MULTICAST))
659 goto drop;
660
661 /*
662 * TW buckets are converted to open requests without
663 * limitations, they conserve resources and peer is
664 * evidently real one.
665 */
666 if (inet_csk_reqsk_queue_is_full(sk))
667 goto drop;
668
669 /*
670 * Accept backlog is full. If we have already queued enough
671 * of warm entries in syn queue, drop request. It is better than
672 * clogging syn queue with openreqs with exponentially increasing
673 * timeout.
674 */
675 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
676 goto drop;
677
678 req = reqsk_alloc(sk->sk_prot->rsk_prot);
679 if (req == NULL)
680 goto drop;
681
682 /* FIXME: process options */
683
684 dccp_openreq_init(req, &dp, skb);
685
686 ireq = inet_rsk(req);
687 ireq->loc_addr = daddr;
688 ireq->rmt_addr = saddr;
689 /* FIXME: Merge Aristeu's option parsing code when ready */
690 req->rcv_wnd = 100; /* Fake, option parsing will get the right value */
691 ireq->opt = NULL;
692
693 /*
694 * Step 3: Process LISTEN state
695 *
696 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
697 *
698 * In fact we defer setting S.GSR, S.SWL, S.SWH to
699 * dccp_create_openreq_child.
700 */
701 dreq = dccp_rsk(req);
702 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
703 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
704 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
705
706 if (dccp_v4_send_response(sk, req, dst))
707 goto drop_and_free;
708
709 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
710 return 0;
711
712drop_and_free:
713 /*
714 * FIXME: should be reqsk_free after implementing req->rsk_ops
715 */
716 __reqsk_free(req);
717drop:
718 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
719 return -1;
720}
721
722/*
723 * The three way handshake has completed - we got a valid ACK or DATAACK -
724 * now create the new socket.
725 *
726 * This is the equivalent of TCP's tcp_v4_syn_recv_sock
727 */
728struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
729 struct request_sock *req,
730 struct dst_entry *dst)
731{
732 struct inet_request_sock *ireq;
733 struct inet_sock *newinet;
734 struct dccp_sock *newdp;
735 struct sock *newsk;
736
737 if (sk_acceptq_is_full(sk))
738 goto exit_overflow;
739
740 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
741 goto exit;
742
743 newsk = dccp_create_openreq_child(sk, req, skb);
744 if (newsk == NULL)
745 goto exit;
746
747 sk_setup_caps(newsk, dst);
748
749 newdp = dccp_sk(newsk);
750 newinet = inet_sk(newsk);
751 ireq = inet_rsk(req);
752 newinet->daddr = ireq->rmt_addr;
753 newinet->rcv_saddr = ireq->loc_addr;
754 newinet->saddr = ireq->loc_addr;
755 newinet->opt = ireq->opt;
756 ireq->opt = NULL;
757 newinet->mc_index = inet_iif(skb);
758 newinet->mc_ttl = skb->nh.iph->ttl;
759 newinet->id = jiffies;
760
761 dccp_sync_mss(newsk, dst_mtu(dst));
762
763 __inet_hash(&dccp_hashinfo, newsk, 0);
764 __inet_inherit_port(&dccp_hashinfo, sk, newsk);
765
766 return newsk;
767
768exit_overflow:
769 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
770exit:
771 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
772 dst_release(dst);
773 return NULL;
774}
775
776static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
777{
778 const struct dccp_hdr *dh = dccp_hdr(skb);
779 const struct iphdr *iph = skb->nh.iph;
780 struct sock *nsk;
781 struct request_sock **prev;
782 /* Find possible connection requests. */
783 struct request_sock *req = inet_csk_search_req(sk, &prev,
784 dh->dccph_sport,
785 iph->saddr, iph->daddr);
786 if (req != NULL)
787 return dccp_check_req(sk, skb, req, prev);
788
789 nsk = __inet_lookup_established(&dccp_hashinfo,
790 iph->saddr, dh->dccph_sport,
791 iph->daddr, ntohs(dh->dccph_dport),
792 inet_iif(skb));
793 if (nsk != NULL) {
794 if (nsk->sk_state != DCCP_TIME_WAIT) {
795 bh_lock_sock(nsk);
796 return nsk;
797 }
798 inet_twsk_put((struct inet_timewait_sock *)nsk);
799 return NULL;
800 }
801
802 return sk;
803}
804
805int dccp_v4_checksum(struct sk_buff *skb)
806{
807 struct dccp_hdr* dh = dccp_hdr(skb);
808 int checksum_len;
809 u32 tmp;
810
811 if (dh->dccph_cscov == 0)
812 checksum_len = skb->len;
813 else {
814 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
815 checksum_len = checksum_len < skb->len ? checksum_len : skb->len;
816 }
817
818 tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
819 return csum_fold(tmp);
820}
821
822static int dccp_v4_verify_checksum(struct sk_buff *skb)
823{
824 struct dccp_hdr *th = dccp_hdr(skb);
825 const u16 remote_checksum = th->dccph_checksum;
826 u16 local_checksum;
827
828 /* FIXME: don't mess with skb payload */
829 th->dccph_checksum = 0; /* zero it for computation */
830
831 local_checksum = dccp_v4_checksum(skb);
832
833 /* FIXME: don't mess with skb payload */
834 th->dccph_checksum = remote_checksum; /* put it back */
835
836 return remote_checksum == local_checksum ? 0 : -1;
837}
838
839static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
840 struct sk_buff *skb)
841{
842 struct rtable *rt;
843 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
844 .nl_u = { .ip4_u =
845 { .daddr = skb->nh.iph->saddr,
846 .saddr = skb->nh.iph->daddr,
847 .tos = RT_CONN_FLAGS(sk) } },
848 .proto = sk->sk_protocol,
849 .uli_u = { .ports =
850 { .sport = dccp_hdr(skb)->dccph_dport,
851 .dport = dccp_hdr(skb)->dccph_sport } } };
852
853 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
854 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
855 return NULL;
856 }
857
858 return &rt->u.dst;
859}
860
861void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
862{
863 int err;
864 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
865 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
866 sizeof(struct dccp_hdr_ext) +
867 sizeof(struct dccp_hdr_reset);
868 struct sk_buff *skb;
869 struct dst_entry *dst;
870
871 /* Never send a reset in response to a reset. */
872 if (rxdh->dccph_type == DCCP_PKT_RESET)
873 return;
874
875 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
876 return;
877
878 dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
879 if (dst == NULL)
880 return;
881
882 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
883 if (skb == NULL)
884 goto out;
885
886 /* Reserve space for headers. */
887 skb_reserve(skb, MAX_DCCP_HEADER);
888 skb->dst = dst_clone(dst);
889
890 skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
891 dh = dccp_hdr(skb);
892 memset(dh, 0, dccp_hdr_reset_len);
893
894 /* Build DCCP header and checksum it. */
895 dh->dccph_type = DCCP_PKT_RESET;
896 dh->dccph_sport = rxdh->dccph_dport;
897 dh->dccph_dport = rxdh->dccph_sport;
898 dh->dccph_doff = dccp_hdr_reset_len / 4;
899 dh->dccph_x = 1;
900 dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code;
901
902 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
903 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
904
905 dh->dccph_checksum = dccp_v4_checksum(skb);
906
907 bh_lock_sock(dccp_ctl_socket->sk);
908 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
909 rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
910 bh_unlock_sock(dccp_ctl_socket->sk);
911
912 if (err == NET_XMIT_CN || err == 0) {
913 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
914 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
915 }
916out:
917 dst_release(dst);
918}
919
920int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
921{
922 struct dccp_hdr *dh = dccp_hdr(skb);
923
924 if (sk->sk_state == DCCP_OPEN) { /* Fast path */
925 if (dccp_rcv_established(sk, skb, dh, skb->len))
926 goto reset;
927 return 0;
928 }
929
930 /*
931 * Step 3: Process LISTEN state
932 * If S.state == LISTEN,
933 * If P.type == Request or P contains a valid Init Cookie option,
934 * * Must scan the packet's options to check for an Init
935 * Cookie. Only the Init Cookie is processed here,
936 * however; other options are processed in Step 8. This
937 * scan need only be performed if the endpoint uses Init
938 * Cookies *
939 * * Generate a new socket and switch to that socket *
940 * Set S := new socket for this port pair
941 * S.state = RESPOND
942 * Choose S.ISS (initial seqno) or set from Init Cookie
943 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
944 * Continue with S.state == RESPOND
945 * * A Response packet will be generated in Step 11 *
946 * Otherwise,
947 * Generate Reset(No Connection) unless P.type == Reset
948 * Drop packet and return
949 *
950 * NOTE: the check for the packet types is done in dccp_rcv_state_process
951 */
952 if (sk->sk_state == DCCP_LISTEN) {
953 struct sock *nsk = dccp_v4_hnd_req(sk, skb);
954
955 if (nsk == NULL)
956 goto discard;
957
958 if (nsk != sk) {
959 if (dccp_child_process(sk, nsk, skb))
960 goto reset;
961 return 0;
962 }
963 }
964
965 if (dccp_rcv_state_process(sk, skb, dh, skb->len))
966 goto reset;
967 return 0;
968
969reset:
970 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
971 dccp_v4_ctl_send_reset(skb);
972discard:
973 kfree_skb(skb);
974 return 0;
975}
976
977static inline int dccp_invalid_packet(struct sk_buff *skb)
978{
979 const struct dccp_hdr *dh;
980
981 if (skb->pkt_type != PACKET_HOST)
982 return 1;
983
984 if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
985 dccp_pr_debug("pskb_may_pull failed\n");
986 return 1;
987 }
988
989 dh = dccp_hdr(skb);
990
991 /* If the packet type is not understood, drop packet and return */
992 if (dh->dccph_type >= DCCP_PKT_INVALID) {
993 dccp_pr_debug("invalid packet type\n");
994 return 1;
995 }
996
997 /*
998 * If P.Data Offset is too small for packet type, or too large for
999 * packet, drop packet and return
1000 */
1001 if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
1002 dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff);
1003 return 1;
1004 }
1005
1006 if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
1007 dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff);
1008 return 1;
1009 }
1010
1011 dh = dccp_hdr(skb);
1012
1013 /*
1014 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
1015 * has short sequence numbers), drop packet and return
1016 */
1017 if (dh->dccph_x == 0 &&
1018 dh->dccph_type != DCCP_PKT_DATA &&
1019 dh->dccph_type != DCCP_PKT_ACK &&
1020 dh->dccph_type != DCCP_PKT_DATAACK) {
1021 dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n",
1022 dccp_packet_name(dh->dccph_type));
1023 return 1;
1024 }
1025
1026 /* If the header checksum is incorrect, drop packet and return */
1027 if (dccp_v4_verify_checksum(skb) < 0) {
1028 dccp_pr_debug("header checksum is incorrect\n");
1029 return 1;
1030 }
1031
1032 return 0;
1033}
1034
1035/* this is called when real data arrives */
1036int dccp_v4_rcv(struct sk_buff *skb)
1037{
1038 const struct dccp_hdr *dh;
1039 struct sock *sk;
1040 int rc;
1041
1042 /* Step 1: Check header basics: */
1043
1044 if (dccp_invalid_packet(skb))
1045 goto discard_it;
1046
1047 dh = dccp_hdr(skb);
1048#if 0
1049 /*
1050 * Use something like this to simulate some DATA/DATAACK loss to test
1051 * dccp_ackpkts_add, you'll get something like this on a session that
1052 * sends 10 DATA/DATAACK packets:
1053 *
1054 * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
1055 *
1056 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
1057 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state
1058 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
1059 *
1060 * So...
1061 *
1062 * 281473596467422 was received
1063 * 281473596467421 was not received
1064 * 281473596467420 was received
1065 * 281473596467419 was not received
1066 * 281473596467418 was received
1067 * 281473596467417 was not received
1068 * 281473596467416 was received
1069 * 281473596467415 was not received
1070 * 281473596467414 was received
1071 * 281473596467413 was received (this one was the 3way handshake RESPONSE)
1072 *
1073 */
1074 if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) {
1075 static int discard = 0;
1076
1077 if (discard) {
1078 discard = 0;
1079 goto discard_it;
1080 }
1081 discard = 1;
1082 }
1083#endif
1084 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1085 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1086
1087 dccp_pr_debug("%8.8s "
1088 "src=%u.%u.%u.%u@%-5d "
1089 "dst=%u.%u.%u.%u@%-5d seq=%llu",
1090 dccp_packet_name(dh->dccph_type),
1091 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
1092 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
1093 DCCP_SKB_CB(skb)->dccpd_seq);
1094
1095 if (dccp_packet_without_ack(skb)) {
1096 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
1097 dccp_pr_debug_cat("\n");
1098 } else {
1099 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
1100 dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq);
1101 }
1102
1103 /* Step 2:
1104 * Look up flow ID in table and get corresponding socket */
1105 sk = __inet_lookup(&dccp_hashinfo,
1106 skb->nh.iph->saddr, dh->dccph_sport,
1107 skb->nh.iph->daddr, ntohs(dh->dccph_dport),
1108 inet_iif(skb));
1109
1110 /*
1111 * Step 2:
1112 * If no socket ...
1113 * Generate Reset(No Connection) unless P.type == Reset
1114 * Drop packet and return
1115 */
1116 if (sk == NULL) {
1117 dccp_pr_debug("failed to look up flow ID in table and "
1118 "get corresponding socket\n");
1119 goto no_dccp_socket;
1120 }
1121
1122 /*
1123 * Step 2:
1124 * ... or S.state == TIMEWAIT,
1125 * Generate Reset(No Connection) unless P.type == Reset
1126 * Drop packet and return
1127 */
1128
1129 if (sk->sk_state == DCCP_TIME_WAIT) {
1130 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n");
1131 goto discard_and_relse;
1132 }
1133
1134 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
1135 dccp_pr_debug("xfrm4_policy_check failed\n");
1136 goto discard_and_relse;
1137 }
1138
1139 if (sk_filter(sk, skb, 0)) {
1140 dccp_pr_debug("sk_filter failed\n");
1141 goto discard_and_relse;
1142 }
1143
1144 skb->dev = NULL;
1145
1146 bh_lock_sock(sk);
1147 rc = 0;
1148 if (!sock_owned_by_user(sk))
1149 rc = dccp_v4_do_rcv(sk, skb);
1150 else
1151 sk_add_backlog(sk, skb);
1152 bh_unlock_sock(sk);
1153
1154 sock_put(sk);
1155 return rc;
1156
1157no_dccp_socket:
1158 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1159 goto discard_it;
1160 /*
1161 * Step 2:
1162 * Generate Reset(No Connection) unless P.type == Reset
1163 * Drop packet and return
1164 */
1165 if (dh->dccph_type != DCCP_PKT_RESET) {
1166 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1167 dccp_v4_ctl_send_reset(skb);
1168 }
1169
1170discard_it:
1171 /* Discard frame. */
1172 kfree_skb(skb);
1173 return 0;
1174
1175discard_and_relse:
1176 sock_put(sk);
1177 goto discard_it;
1178}
1179
1180static int dccp_v4_init_sock(struct sock *sk)
1181{
1182 struct dccp_sock *dp = dccp_sk(sk);
1183 static int dccp_ctl_socket_init = 1;
1184
1185 dccp_options_init(&dp->dccps_options);
1186
1187 if (dp->dccps_options.dccpo_send_ack_vector) {
1188 dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
1189 GFP_KERNEL);
1190
1191 if (dp->dccps_hc_rx_ackpkts == NULL)
1192 return -ENOMEM;
1193 }
1194
1195 /*
1196 * FIXME: We're hardcoding the CCID, and doing this at this point makes
1197 * the listening (master) sock get CCID control blocks, which is not
1198 * necessary, but for now, to not mess with the test userspace apps,
1199 * lets leave it here, later the real solution is to do this in a
1200 * setsockopt(CCIDs-I-want/accept). -acme
1201 */
1202 if (likely(!dccp_ctl_socket_init)) {
1203 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
1204 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
1205 if (dp->dccps_hc_rx_ccid == NULL ||
1206 dp->dccps_hc_tx_ccid == NULL) {
1207 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1208 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1209 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1210 dp->dccps_hc_rx_ackpkts = NULL;
1211 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1212 return -ENOMEM;
1213 }
1214 } else
1215 dccp_ctl_socket_init = 0;
1216
1217 dccp_init_xmit_timers(sk);
1218 sk->sk_state = DCCP_CLOSED;
1219 dp->dccps_mss_cache = 536;
1220 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1221
1222 return 0;
1223}
1224
1225int dccp_v4_destroy_sock(struct sock *sk)
1226{
1227 struct dccp_sock *dp = dccp_sk(sk);
1228
1229 /*
1230 * DCCP doesn't use sk_qrite_queue, just sk_send_head
1231 * for retransmissions
1232 */
1233 if (sk->sk_send_head != NULL) {
1234 kfree_skb(sk->sk_send_head);
1235 sk->sk_send_head = NULL;
1236 }
1237
1238 /* Clean up a referenced DCCP bind bucket. */
1239 if (inet_csk(sk)->icsk_bind_hash != NULL)
1240 inet_put_port(&dccp_hashinfo, sk);
1241
1242 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1243 dp->dccps_hc_rx_ackpkts = NULL;
1244 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1245 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1246 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1247
1248 return 0;
1249}
1250
1251static void dccp_v4_reqsk_destructor(struct request_sock *req)
1252{
1253 kfree(inet_rsk(req)->opt);
1254}
1255
1256static struct request_sock_ops dccp_request_sock_ops = {
1257 .family = PF_INET,
1258 .obj_size = sizeof(struct dccp_request_sock),
1259 .rtx_syn_ack = dccp_v4_send_response,
1260 .send_ack = dccp_v4_reqsk_send_ack,
1261 .destructor = dccp_v4_reqsk_destructor,
1262 .send_reset = dccp_v4_ctl_send_reset,
1263};
1264
1265struct proto dccp_v4_prot = {
1266 .name = "DCCP",
1267 .owner = THIS_MODULE,
1268 .close = dccp_close,
1269 .connect = dccp_v4_connect,
1270 .disconnect = dccp_disconnect,
1271 .ioctl = dccp_ioctl,
1272 .init = dccp_v4_init_sock,
1273 .setsockopt = dccp_setsockopt,
1274 .getsockopt = dccp_getsockopt,
1275 .sendmsg = dccp_sendmsg,
1276 .recvmsg = dccp_recvmsg,
1277 .backlog_rcv = dccp_v4_do_rcv,
1278 .hash = dccp_v4_hash,
1279 .unhash = dccp_v4_unhash,
1280 .accept = inet_csk_accept,
1281 .get_port = dccp_v4_get_port,
1282 .shutdown = dccp_shutdown,
1283 .destroy = dccp_v4_destroy_sock,
1284 .orphan_count = &dccp_orphan_count,
1285 .max_header = MAX_DCCP_HEADER,
1286 .obj_size = sizeof(struct dccp_sock),
1287 .rsk_prot = &dccp_request_sock_ops,
1288 .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */
1289};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644
index 000000000000..810f0c293b85
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,199 @@
1/*
2 * net/dccp/minisocks.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16#include <linux/timer.h>
17
18#include <net/sock.h>
19#include <net/xfrm.h>
20#include <net/inet_timewait_sock.h>
21
22#include "ccid.h"
23#include "dccp.h"
24
25void dccp_time_wait(struct sock *sk, int state, int timeo)
26{
27 /* FIXME: Implement */
28 dccp_pr_debug("Want to help? Start here\n");
29 dccp_set_state(sk, state);
30}
31
32/* This is for handling early-kills of TIME_WAIT sockets. */
33void dccp_tw_deschedule(struct inet_timewait_sock *tw)
34{
35 dccp_pr_debug("Want to help? Start here\n");
36 __inet_twsk_kill(tw, &dccp_hashinfo);
37}
38
39struct sock *dccp_create_openreq_child(struct sock *sk,
40 const struct request_sock *req,
41 const struct sk_buff *skb)
42{
43 /*
44 * Step 3: Process LISTEN state
45 *
46 * // Generate a new socket and switch to that socket
47 * Set S := new socket for this port pair
48 */
49 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
50
51 if (newsk != NULL) {
52 const struct dccp_request_sock *dreq = dccp_rsk(req);
53 struct inet_connection_sock *newicsk = inet_csk(sk);
54 struct dccp_sock *newdp = dccp_sk(newsk);
55
56 newdp->dccps_hc_rx_ackpkts = NULL;
57 newdp->dccps_role = DCCP_ROLE_SERVER;
58 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
59
60 if (newdp->dccps_options.dccpo_send_ack_vector) {
61 newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
62 GFP_ATOMIC);
63 /*
64 * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone
65 * copied the master sock and left the CCID pointers for this child,
66 * that is why we do the __ccid_get calls.
67 */
68 if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
69 goto out_free;
70 }
71
72 if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 ||
73 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) {
74 dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
75 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
76 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
77out_free:
78 /* It is still raw copy of parent, so invalidate
79 * destructor and make plain sk_free() */
80 newsk->sk_destruct = NULL;
81 sk_free(newsk);
82 return NULL;
83 }
84
85 __ccid_get(newdp->dccps_hc_rx_ccid);
86 __ccid_get(newdp->dccps_hc_tx_ccid);
87
88 /*
89 * Step 3: Process LISTEN state
90 *
91 * Choose S.ISS (initial seqno) or set from Init Cookie
92 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
93 */
94
95 /* See dccp_v4_conn_request */
96 newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
97
98 newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
99 dccp_update_gsr(newsk, dreq->dreq_isr);
100
101 newdp->dccps_iss = dreq->dreq_iss;
102 dccp_update_gss(newsk, dreq->dreq_iss);
103
104 dccp_init_xmit_timers(newsk);
105
106 DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
107 }
108 return newsk;
109}
110
111/*
112 * Process an incoming packet for RESPOND sockets represented
113 * as an request_sock.
114 */
115struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
116 struct request_sock *req,
117 struct request_sock **prev)
118{
119 struct sock *child = NULL;
120
121 /* Check for retransmitted REQUEST */
122 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
123 if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) {
124 struct dccp_request_sock *dreq = dccp_rsk(req);
125
126 dccp_pr_debug("Retransmitted REQUEST\n");
127 /* Send another RESPONSE packet */
128 dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
129 dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq);
130 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
131 }
132 /* Network Duplicate, discard packet */
133 return NULL;
134 }
135
136 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
137
138 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
139 dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
140 goto drop;
141
142 /* Invalid ACK */
143 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
144 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n",
145 DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss);
146 goto drop;
147 }
148
149 child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
150 if (child == NULL)
151 goto listen_overflow;
152
153 /* FIXME: deal with options */
154
155 inet_csk_reqsk_queue_unlink(sk, req, prev);
156 inet_csk_reqsk_queue_removed(sk, req);
157 inet_csk_reqsk_queue_add(sk, req, child);
158out:
159 return child;
160listen_overflow:
161 dccp_pr_debug("listen_overflow!\n");
162 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
163drop:
164 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
165 req->rsk_ops->send_reset(skb);
166
167 inet_csk_reqsk_queue_drop(sk, req, prev);
168 goto out;
169}
170
171/*
172 * Queue segment on the new socket if the new socket is active,
173 * otherwise we just shortcircuit this and continue with
174 * the new socket.
175 */
176int dccp_child_process(struct sock *parent, struct sock *child,
177 struct sk_buff *skb)
178{
179 int ret = 0;
180 const int state = child->sk_state;
181
182 if (!sock_owned_by_user(child)) {
183 ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len);
184
185 /* Wakeup parent, send SIGIO */
186 if (state == DCCP_RESPOND && child->sk_state != state)
187 parent->sk_data_ready(parent, 0);
188 } else {
189 /* Alas, it is possible again, because we do lookup
190 * in main socket hash table and lock on listening
191 * socket does not protect us more.
192 */
193 sk_add_backlog(child, skb);
194 }
195
196 bh_unlock_sock(child);
197 sock_put(child);
198 return ret;
199}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644
index 000000000000..e1867767946c
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,763 @@
1/*
2 * net/dccp/options.c
3 *
4 * An implementation of the DCCP protocol
5 * Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
6 * Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19
20#include "ccid.h"
21#include "dccp.h"
22
23static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
24 struct sock *sk,
25 const u64 ackno,
26 const unsigned char len,
27 const unsigned char *vector);
28
29/* stores the default values for new connection. may be changed with sysctl */
30static const struct dccp_options dccpo_default_values = {
31 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
32 .dccpo_ccid = DCCPF_INITIAL_CCID,
33 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
34 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
35};
36
37void dccp_options_init(struct dccp_options *dccpo)
38{
39 memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
40}
41
42static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
43{
44 u32 value = 0;
45
46 if (len > 3)
47 value += *bf++ << 24;
48 if (len > 2)
49 value += *bf++ << 16;
50 if (len > 1)
51 value += *bf++ << 8;
52 if (len > 0)
53 value += *bf;
54
55 return value;
56}
57
58int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
59{
60 struct dccp_sock *dp = dccp_sk(sk);
61#ifdef DCCP_DEBUG
62 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " :
63 "server rx opt: ";
64#endif
65 const struct dccp_hdr *dh = dccp_hdr(skb);
66 const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
67 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
68 unsigned char *opt_ptr = options;
69 const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4);
70 struct dccp_options_received *opt_recv = &dp->dccps_options_received;
71 unsigned char opt, len;
72 unsigned char *value;
73
74 memset(opt_recv, 0, sizeof(*opt_recv));
75
76 while (opt_ptr != opt_end) {
77 opt = *opt_ptr++;
78 len = 0;
79 value = NULL;
80
81 /* Check if this isn't a single byte option */
82 if (opt > DCCPO_MAX_RESERVED) {
83 if (opt_ptr == opt_end)
84 goto out_invalid_option;
85
86 len = *opt_ptr++;
87 if (len < 3)
88 goto out_invalid_option;
89 /*
90 * Remove the type and len fields, leaving
91 * just the value size
92 */
93 len -= 2;
94 value = opt_ptr;
95 opt_ptr += len;
96
97 if (opt_ptr > opt_end)
98 goto out_invalid_option;
99 }
100
101 switch (opt) {
102 case DCCPO_PADDING:
103 break;
104 case DCCPO_NDP_COUNT:
105 if (len > 3)
106 goto out_invalid_option;
107
108 opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
109 dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp);
110 break;
111 case DCCPO_ACK_VECTOR_0:
112 if (len > DCCP_MAX_ACK_VECTOR_LEN)
113 goto out_invalid_option;
114
115 if (pkt_type == DCCP_PKT_DATA)
116 continue;
117
118 opt_recv->dccpor_ack_vector_len = len;
119 opt_recv->dccpor_ack_vector_idx = value - options;
120
121 dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
122 debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq);
123 dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
124 value, len);
125 dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk,
126 DCCP_SKB_CB(skb)->dccpd_ack_seq,
127 len, value);
128 break;
129 case DCCPO_TIMESTAMP:
130 if (len != 4)
131 goto out_invalid_option;
132
133 opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
134
135 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
136 dp->dccps_timestamp_time = jiffies;
137
138 dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
139 debug_prefix, opt_recv->dccpor_timestamp,
140 DCCP_SKB_CB(skb)->dccpd_ack_seq);
141 break;
142 case DCCPO_TIMESTAMP_ECHO:
143 if (len < 4 || len > 8)
144 goto out_invalid_option;
145
146 opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
147
148 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n",
149 debug_prefix, opt_recv->dccpor_timestamp_echo,
150 len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq,
151 tcp_time_stamp - opt_recv->dccpor_timestamp_echo);
152
153 opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4);
154 dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix,
155 opt_recv->dccpor_elapsed_time);
156 break;
157 case DCCPO_ELAPSED_TIME:
158 if (len > 4)
159 goto out_invalid_option;
160
161 if (pkt_type == DCCP_PKT_DATA)
162 continue;
163 opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len);
164 dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
165 opt_recv->dccpor_elapsed_time);
166 break;
167 /*
168 * From draft-ietf-dccp-spec-11.txt:
169 *
170 * Option numbers 128 through 191 are for options sent from the HC-
171 * Sender to the HC-Receiver; option numbers 192 through 255 are for
172 * options sent from the HC-Receiver to the HC-Sender.
173 */
174 case 128 ... 191: {
175 const u16 idx = value - options;
176
177 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0)
178 goto out_invalid_option;
179 }
180 break;
181 case 192 ... 255: {
182 const u16 idx = value - options;
183
184 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0)
185 goto out_invalid_option;
186 }
187 break;
188 default:
189 pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n",
190 sk, opt, len);
191 break;
192 }
193 }
194
195 return 0;
196
197out_invalid_option:
198 DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
199 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
200 pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
201 return -1;
202}
203
204static void dccp_encode_value_var(const u32 value, unsigned char *to,
205 const unsigned int len)
206{
207 if (len > 3)
208 *to++ = (value & 0xFF000000) >> 24;
209 if (len > 2)
210 *to++ = (value & 0xFF0000) >> 16;
211 if (len > 1)
212 *to++ = (value & 0xFF00) >> 8;
213 if (len > 0)
214 *to++ = (value & 0xFF);
215}
216
217static inline int dccp_ndp_len(const int ndp)
218{
219 return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
220}
221
222void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
223 const unsigned char option,
224 const void *value, const unsigned char len)
225{
226 unsigned char *to;
227
228 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
229 LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option));
230 return;
231 }
232
233 DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
234
235 to = skb_push(skb, len + 2);
236 *to++ = option;
237 *to++ = len + 2;
238
239 memcpy(to, value, len);
240}
241
242EXPORT_SYMBOL_GPL(dccp_insert_option);
243
244static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
245{
246 struct dccp_sock *dp = dccp_sk(sk);
247 int ndp = dp->dccps_ndp_count;
248
249 if (dccp_non_data_packet(skb))
250 ++dp->dccps_ndp_count;
251 else
252 dp->dccps_ndp_count = 0;
253
254 if (ndp > 0) {
255 unsigned char *ptr;
256 const int ndp_len = dccp_ndp_len(ndp);
257 const int len = ndp_len + 2;
258
259 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
260 return;
261
262 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
263
264 ptr = skb_push(skb, len);
265 *ptr++ = DCCPO_NDP_COUNT;
266 *ptr++ = len;
267 dccp_encode_value_var(ndp, ptr, ndp_len);
268 }
269}
270
271static inline int dccp_elapsed_time_len(const u32 elapsed_time)
272{
273 return elapsed_time == 0 ? 0 :
274 elapsed_time <= 0xFF ? 1 :
275 elapsed_time <= 0xFFFF ? 2 :
276 elapsed_time <= 0xFFFFFF ? 3 : 4;
277}
278
279void dccp_insert_option_elapsed_time(struct sock *sk,
280 struct sk_buff *skb,
281 u32 elapsed_time)
282{
283#ifdef DCCP_DEBUG
284 struct dccp_sock *dp = dccp_sk(sk);
285 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
286 "server TX opt: ";
287#endif
288 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
289 const int len = 2 + elapsed_time_len;
290 unsigned char *to;
291
292 /* If elapsed_time == 0... */
293 if (elapsed_time_len == 2)
294 return;
295
296 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
297 LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n"));
298 return;
299 }
300
301 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
302
303 to = skb_push(skb, len);
304 *to++ = DCCPO_ELAPSED_TIME;
305 *to++ = len;
306
307 dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
308
309 dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
310 debug_prefix, elapsed_time,
311 len, DCCP_SKB_CB(skb)->dccpd_seq);
312}
313
314EXPORT_SYMBOL(dccp_insert_option_elapsed_time);
315
316static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
317{
318 struct dccp_sock *dp = dccp_sk(sk);
319#ifdef DCCP_DEBUG
320 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
321 "server TX opt: ";
322#endif
323 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
324 int len = ap->dccpap_buf_vector_len + 2;
325 const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10;
326 unsigned char *to, *from;
327
328 if (elapsed_time != 0)
329 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
330
331 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
332 LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n"));
333 return;
334 }
335
336 /*
337 * XXX: now we have just one ack vector sent record, so
338 * we have to wait for it to be cleared.
339 *
340 * Of course this is not acceptable, but this is just for
341 * basic testing now.
342 */
343 if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
344 return;
345
346 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
347
348 to = skb_push(skb, len);
349 *to++ = DCCPO_ACK_VECTOR_0;
350 *to++ = len;
351
352 len = ap->dccpap_buf_vector_len;
353 from = ap->dccpap_buf + ap->dccpap_buf_head;
354
355 /* Check if buf_head wraps */
356 if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
357 const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head;
358
359 memcpy(to, from, tailsize);
360 to += tailsize;
361 len -= tailsize;
362 from = ap->dccpap_buf;
363 }
364
365 memcpy(to, from, len);
366 /*
367 * From draft-ietf-dccp-spec-11.txt:
368 *
369 * For each acknowledgement it sends, the HC-Receiver will add an
370 * acknowledgement record. ack_seqno will equal the HC-Receiver
371 * sequence number it used for the ack packet; ack_ptr will equal
372 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal
373 * buf_nonce.
374 *
375 * This implemention uses just one ack record for now.
376 */
377 ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
378 ap->dccpap_ack_ptr = ap->dccpap_buf_head;
379 ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
380 ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
381 ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
382
383 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n",
384 debug_prefix, ap->dccpap_ack_vector_len,
385 ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
386}
387
388static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
389{
390 const u32 now = htonl(tcp_time_stamp);
391 dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
392}
393
394static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb)
395{
396 struct dccp_sock *dp = dccp_sk(sk);
397#ifdef DCCP_DEBUG
398 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
399 "server TX opt: ";
400#endif
401 u32 tstamp_echo;
402 const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10;
403 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
404 const int len = 6 + elapsed_time_len;
405 unsigned char *to;
406
407 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
408 LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n"));
409 return;
410 }
411
412 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
413
414 to = skb_push(skb, len);
415 *to++ = DCCPO_TIMESTAMP_ECHO;
416 *to++ = len;
417
418 tstamp_echo = htonl(dp->dccps_timestamp_echo);
419 memcpy(to, &tstamp_echo, 4);
420 to += 4;
421 dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
422
423 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
424 debug_prefix, dp->dccps_timestamp_echo,
425 len, DCCP_SKB_CB(skb)->dccpd_seq);
426
427 dp->dccps_timestamp_echo = 0;
428 dp->dccps_timestamp_time = 0;
429}
430
431void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
432{
433 struct dccp_sock *dp = dccp_sk(sk);
434
435 DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
436
437 if (dp->dccps_options.dccpo_send_ndp_count)
438 dccp_insert_option_ndp(sk, skb);
439
440 if (!dccp_packet_without_ack(skb)) {
441 if (dp->dccps_options.dccpo_send_ack_vector &&
442 dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)
443 dccp_insert_option_ack_vector(sk, skb);
444
445 dccp_insert_option_timestamp(sk, skb);
446 if (dp->dccps_timestamp_echo != 0)
447 dccp_insert_option_timestamp_echo(sk, skb);
448 }
449
450 ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
451 ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
452
453 /* XXX: insert other options when appropriate */
454
455 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
456 /* The length of all options has to be a multiple of 4 */
457 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
458
459 if (padding != 0) {
460 padding = 4 - padding;
461 memset(skb_push(skb, padding), 0, padding);
462 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
463 }
464 }
465}
466
467struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority)
468{
469 struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
470
471 if (ap != NULL) {
472#ifdef DCCP_DEBUG
473 memset(ap->dccpap_buf, 0xFF, len);
474#endif
475 ap->dccpap_buf_len = len;
476 ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1;
477 ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
478 ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
479 ap->dccpap_ack_ptr = 0;
480 ap->dccpap_time = 0;
481 ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
482 }
483
484 return ap;
485}
486
487void dccp_ackpkts_free(struct dccp_ackpkts *ap)
488{
489 if (ap != NULL) {
490#ifdef DCCP_DEBUG
491 memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
492#endif
493 kfree(ap);
494 }
495}
496
497static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
498 const unsigned int index)
499{
500 return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
501}
502
503static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
504 const unsigned int index)
505{
506 return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
507}
508
509/*
510 * If several packets are missing, the HC-Receiver may prefer to enter multiple
511 * bytes with run length 0, rather than a single byte with a larger run length;
512 * this simplifies table updates if one of the missing packets arrives.
513 */
514static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
515 const unsigned int packets,
516 const unsigned char state)
517{
518 unsigned int gap;
519 signed long new_head;
520
521 if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
522 return -ENOBUFS;
523
524 gap = packets - 1;
525 new_head = ap->dccpap_buf_head - packets;
526
527 if (new_head < 0) {
528 if (gap > 0) {
529 memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
530 gap + new_head + 1);
531 gap = -new_head;
532 }
533 new_head += ap->dccpap_buf_len;
534 }
535
536 ap->dccpap_buf_head = new_head;
537
538 if (gap > 0)
539 memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
540 DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
541
542 ap->dccpap_buf[ap->dccpap_buf_head] = state;
543 ap->dccpap_buf_vector_len += packets;
544 return 0;
545}
546
547/*
548 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
549 */
550int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
551{
552 /*
553 * Check at the right places if the buffer is full, if it is, tell the
554 * caller to start dropping packets till the HC-Sender acks our ACK
555 * vectors, when we will free up space in dccpap_buf.
556 *
557 * We may well decide to do buffer compression, etc, but for now lets
558 * just drop.
559 *
560 * From Appendix A:
561 *
562 * Of course, the circular buffer may overflow, either when the HC-
563 * Sender is sending data at a very high rate, when the HC-Receiver's
564 * acknowledgements are not reaching the HC-Sender, or when the HC-
565 * Sender is forgetting to acknowledge those acks (so the HC-Receiver
566 * is unable to clean up old state). In this case, the HC-Receiver
567 * should either compress the buffer (by increasing run lengths when
568 * possible), transfer its state to a larger buffer, or, as a last
569 * resort, drop all received packets, without processing them
570 * whatsoever, until its buffer shrinks again.
571 */
572
573 /* See if this is the first ackno being inserted */
574 if (ap->dccpap_buf_vector_len == 0) {
575 ap->dccpap_buf[ap->dccpap_buf_head] = state;
576 ap->dccpap_buf_vector_len = 1;
577 } else if (after48(ackno, ap->dccpap_buf_ackno)) {
578 const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno);
579
580 /*
581 * Look if the state of this packet is the same as the previous ackno
582 * and if so if we can bump the head len.
583 */
584 if (delta == 1 &&
585 dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
586 dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK)
587 ap->dccpap_buf[ap->dccpap_buf_head]++;
588 else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
589 return -ENOBUFS;
590 } else {
591 /*
592 * A.1.2. Old Packets
593 *
594 * When a packet with Sequence Number S arrives, and S <= buf_ackno,
595 * the HC-Receiver will scan the table for the byte corresponding to S.
596 * (Indexing structures could reduce the complexity of this scan.)
597 */
598 u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
599 unsigned int index = ap->dccpap_buf_head;
600
601 while (1) {
602 const u8 len = dccp_ackpkts_len(ap, index);
603 const u8 state = dccp_ackpkts_state(ap, index);
604 /*
605 * valid packets not yet in dccpap_buf have a reserved entry, with
606 * a len equal to 0
607 */
608 if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
609 len == 0 && delta == 0) { /* Found our reserved seat! */
610 dccp_pr_debug("Found %llu reserved seat!\n", ackno);
611 ap->dccpap_buf[index] = state;
612 goto out;
613 }
614 /* len == 0 means one packet */
615 if (delta < len + 1)
616 goto out_duplicate;
617
618 delta -= len + 1;
619 if (++index == ap->dccpap_buf_len)
620 index = 0;
621 }
622 }
623
624 ap->dccpap_buf_ackno = ackno;
625 ap->dccpap_time = jiffies;
626out:
627 dccp_pr_debug("");
628 dccp_ackpkts_print(ap);
629 return 0;
630
631out_duplicate:
632 /* Duplicate packet */
633 dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno);
634 return -EILSEQ;
635}
636
637#ifdef DCCP_DEBUG
638void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
639{
640 if (!dccp_debug)
641 return;
642
643 printk("ACK vector len=%d, ackno=%llu |", len, ackno);
644
645 while (len--) {
646 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
647 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
648
649 printk("%d,%d|", state, rl);
650 ++vector;
651 }
652
653 printk("\n");
654}
655
656void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
657{
658 dccp_ackvector_print(ap->dccpap_buf_ackno,
659 ap->dccpap_buf + ap->dccpap_buf_head,
660 ap->dccpap_buf_vector_len);
661}
662#endif
663
664static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
665{
666 /*
667 * As we're keeping track of the ack vector size
668 * (dccpap_buf_vector_len) and the sent ack vector size
669 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
670 * keep this code here as in the future we'll implement a vector of ack
671 * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme
672 */
673#if 0
674 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
675 if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
676 ap->dccpap_buf_tail -= ap->dccpap_buf_len;
677#endif
678 ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
679}
680
681void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
682 u64 ackno)
683{
684 /* Check if we actually sent an ACK vector */
685 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
686 return;
687
688 if (ackno == ap->dccpap_ack_seqno) {
689#ifdef DCCP_DEBUG
690 struct dccp_sock *dp = dccp_sk(sk);
691 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
692 "server rx ack: ";
693#endif
694 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
695 debug_prefix, 1,
696 ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
697 dccp_ackpkts_trow_away_ack_record(ap);
698 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
699 }
700}
701
702static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
703 struct sock *sk, u64 ackno,
704 const unsigned char len,
705 const unsigned char *vector)
706{
707 unsigned char i;
708
709 /* Check if we actually sent an ACK vector */
710 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
711 return;
712 /*
713 * We're in the receiver half connection, so if the received an ACK vector
714 * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested.
715 *
716 * Extra explanation with example:
717 *
718 * if we received an ACK vector with ackno 50, it can only be acking
719 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
720 */
721 // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno);
722 if (before48(ackno, ap->dccpap_ack_seqno)) {
723 // dccp_pr_debug_cat("yes\n");
724 return;
725 }
726 // dccp_pr_debug_cat("no\n");
727
728 i = len;
729 while (i--) {
730 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
731 u64 ackno_end_rl;
732
733 dccp_set_seqno(&ackno_end_rl, ackno - rl);
734
735 // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno);
736 if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
737 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
738 // dccp_pr_debug_cat("yes\n");
739
740 if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
741#ifdef DCCP_DEBUG
742 struct dccp_sock *dp = dccp_sk(sk);
743 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
744 "server rx ack: ";
745#endif
746 dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
747 debug_prefix, len,
748 ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
749 dccp_ackpkts_trow_away_ack_record(ap);
750 }
751 /*
752 * If dccpap_ack_seqno was not received, no problem we'll
753 * send another ACK vector.
754 */
755 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
756 break;
757 }
758 // dccp_pr_debug_cat("no\n");
759
760 dccp_set_seqno(&ackno, ackno_end_rl - 1);
761 ++vector;
762 }
763}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644
index 000000000000..22ca2910d4f2
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,406 @@
1/*
2 * net/dccp/output.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static inline void dccp_event_ack_sent(struct sock *sk)
23{
24 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
25}
26
27/*
28 * All SKB's seen here are completely headerless. It is our
29 * job to build the DCCP header, and pass the packet down to
30 * IP so it can do the same plus pass the packet off to the
31 * device.
32 */
33int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
34{
35 if (likely(skb != NULL)) {
36 const struct inet_sock *inet = inet_sk(sk);
37 struct dccp_sock *dp = dccp_sk(sk);
38 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
39 struct dccp_hdr *dh;
40 /* XXX For now we're using only 48 bits sequence numbers */
41 const int dccp_header_size = sizeof(*dh) +
42 sizeof(struct dccp_hdr_ext) +
43 dccp_packet_hdr_len(dcb->dccpd_type);
44 int err, set_ack = 1;
45 u64 ackno = dp->dccps_gsr;
46
47 /*
48 * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing
49 * to do here...
50 */
51 dccp_inc_seqno(&dp->dccps_gss);
52
53 dcb->dccpd_seq = dp->dccps_gss;
54 dccp_insert_options(sk, skb);
55
56 switch (dcb->dccpd_type) {
57 case DCCP_PKT_DATA:
58 set_ack = 0;
59 break;
60 case DCCP_PKT_SYNC:
61 case DCCP_PKT_SYNCACK:
62 ackno = dcb->dccpd_seq;
63 break;
64 }
65
66 skb->h.raw = skb_push(skb, dccp_header_size);
67 dh = dccp_hdr(skb);
68 /* Data packets are not cloned as they are never retransmitted */
69 if (skb_cloned(skb))
70 skb_set_owner_w(skb, sk);
71
72 /* Build DCCP header and checksum it. */
73 memset(dh, 0, dccp_header_size);
74 dh->dccph_type = dcb->dccpd_type;
75 dh->dccph_sport = inet->sport;
76 dh->dccph_dport = inet->dport;
77 dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
78 dh->dccph_ccval = dcb->dccpd_ccval;
79 /* XXX For now we're using only 48 bits sequence numbers */
80 dh->dccph_x = 1;
81
82 dp->dccps_awh = dp->dccps_gss;
83 dccp_hdr_set_seq(dh, dp->dccps_gss);
84 if (set_ack)
85 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
86
87 switch (dcb->dccpd_type) {
88 case DCCP_PKT_REQUEST:
89 dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service;
90 break;
91 case DCCP_PKT_RESET:
92 dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code;
93 break;
94 }
95
96 dh->dccph_checksum = dccp_v4_checksum(skb);
97
98 if (dcb->dccpd_type == DCCP_PKT_ACK ||
99 dcb->dccpd_type == DCCP_PKT_DATAACK)
100 dccp_event_ack_sent(sk);
101
102 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
103
104 err = ip_queue_xmit(skb, 0);
105 if (err <= 0)
106 return err;
107
108 /* NET_XMIT_CN is special. It does not guarantee,
109 * that this packet is lost. It tells that device
110 * is about to start to drop packets or already
111 * drops some packets of the same priority and
112 * invokes us to send less aggressively.
113 */
114 return err == NET_XMIT_CN ? 0 : err;
115 }
116 return -ENOBUFS;
117}
118
119unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
120{
121 struct dccp_sock *dp = dccp_sk(sk);
122 int mss_now;
123
124 /*
125 * FIXME: we really should be using the af_specific thing to support IPv6.
126 * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
127 */
128 mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
129
130 /* Now subtract optional transport overhead */
131 mss_now -= dp->dccps_ext_header_len;
132
133 /*
134 * FIXME: this should come from the CCID infrastructure, where, say,
135 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
136 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
137 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
138 * make it a multiple of 4
139 */
140
141 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
142
143 /* And store cached results */
144 dp->dccps_pmtu_cookie = pmtu;
145 dp->dccps_mss_cache = mss_now;
146
147 return mss_now;
148}
149
150int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
151{
152 if (inet_sk_rebuild_header(sk) != 0)
153 return -EHOSTUNREACH; /* Routing failure or similar. */
154
155 return dccp_transmit_skb(sk, (skb_cloned(skb) ?
156 pskb_copy(skb, GFP_ATOMIC):
157 skb_clone(skb, GFP_ATOMIC)));
158}
159
160struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
161 struct request_sock *req)
162{
163 struct dccp_hdr *dh;
164 const int dccp_header_size = sizeof(struct dccp_hdr) +
165 sizeof(struct dccp_hdr_ext) +
166 sizeof(struct dccp_hdr_response);
167 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
168 dccp_header_size, 1,
169 GFP_ATOMIC);
170 if (skb == NULL)
171 return NULL;
172
173 /* Reserve space for headers. */
174 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
175
176 skb->dst = dst_clone(dst);
177 skb->csum = 0;
178
179 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
180 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
181 dccp_insert_options(sk, skb);
182
183 skb->h.raw = skb_push(skb, dccp_header_size);
184
185 dh = dccp_hdr(skb);
186 memset(dh, 0, dccp_header_size);
187
188 dh->dccph_sport = inet_sk(sk)->sport;
189 dh->dccph_dport = inet_rsk(req)->rmt_port;
190 dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
191 dh->dccph_type = DCCP_PKT_RESPONSE;
192 dh->dccph_x = 1;
193 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
194 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
195
196 dh->dccph_checksum = dccp_v4_checksum(skb);
197
198 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
199 return skb;
200}
201
202struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
203 const enum dccp_reset_codes code)
204
205{
206 struct dccp_hdr *dh;
207 struct dccp_sock *dp = dccp_sk(sk);
208 const int dccp_header_size = sizeof(struct dccp_hdr) +
209 sizeof(struct dccp_hdr_ext) +
210 sizeof(struct dccp_hdr_reset);
211 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
212 dccp_header_size, 1,
213 GFP_ATOMIC);
214 if (skb == NULL)
215 return NULL;
216
217 /* Reserve space for headers. */
218 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
219
220 skb->dst = dst_clone(dst);
221 skb->csum = 0;
222
223 dccp_inc_seqno(&dp->dccps_gss);
224
225 DCCP_SKB_CB(skb)->dccpd_reset_code = code;
226 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
227 DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
228 dccp_insert_options(sk, skb);
229
230 skb->h.raw = skb_push(skb, dccp_header_size);
231
232 dh = dccp_hdr(skb);
233 memset(dh, 0, dccp_header_size);
234
235 dh->dccph_sport = inet_sk(sk)->sport;
236 dh->dccph_dport = inet_sk(sk)->dport;
237 dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
238 dh->dccph_type = DCCP_PKT_RESET;
239 dh->dccph_x = 1;
240 dccp_hdr_set_seq(dh, dp->dccps_gss);
241 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
242
243 dccp_hdr_reset(skb)->dccph_reset_code = code;
244
245 dh->dccph_checksum = dccp_v4_checksum(skb);
246
247 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
248 return skb;
249}
250
251/*
252 * Do all connect socket setups that can be done AF independent.
253 */
254static inline void dccp_connect_init(struct sock *sk)
255{
256 struct dst_entry *dst = __sk_dst_get(sk);
257 struct inet_connection_sock *icsk = inet_csk(sk);
258
259 sk->sk_err = 0;
260 sock_reset_flag(sk, SOCK_DONE);
261
262 dccp_sync_mss(sk, dst_mtu(dst));
263
264 /*
265 * FIXME: set dp->{dccps_swh,dccps_swl}, with
266 * something like dccp_inc_seq
267 */
268
269 icsk->icsk_retransmits = 0;
270}
271
272int dccp_connect(struct sock *sk)
273{
274 struct sk_buff *skb;
275 struct inet_connection_sock *icsk = inet_csk(sk);
276
277 dccp_connect_init(sk);
278
279 skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
280 if (unlikely(skb == NULL))
281 return -ENOBUFS;
282
283 /* Reserve space for headers. */
284 skb_reserve(skb, MAX_DCCP_HEADER);
285
286 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
287 /* FIXME: set service to something meaningful, coming
288 * from userspace*/
289 DCCP_SKB_CB(skb)->dccpd_service = 0;
290 skb->csum = 0;
291 skb_set_owner_w(skb, sk);
292
293 BUG_TRAP(sk->sk_send_head == NULL);
294 sk->sk_send_head = skb;
295 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
296 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
297
298 /* Timer for repeating the REQUEST until an answer. */
299 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
300 return 0;
301}
302
303void dccp_send_ack(struct sock *sk)
304{
305 /* If we have been reset, we may not send again. */
306 if (sk->sk_state != DCCP_CLOSED) {
307 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
308
309 if (skb == NULL) {
310 inet_csk_schedule_ack(sk);
311 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
312 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX);
313 return;
314 }
315
316 /* Reserve space for headers */
317 skb_reserve(skb, MAX_DCCP_HEADER);
318 skb->csum = 0;
319 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
320 skb_set_owner_w(skb, sk);
321 dccp_transmit_skb(sk, skb);
322 }
323}
324
325EXPORT_SYMBOL_GPL(dccp_send_ack);
326
327void dccp_send_delayed_ack(struct sock *sk)
328{
329 struct inet_connection_sock *icsk = inet_csk(sk);
330 /*
331 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
332 * with using 2s, and active senders also piggyback the ACK into a
333 * DATAACK packet, so this is really for quiescent senders.
334 */
335 unsigned long timeout = jiffies + 2 * HZ;
336
337 /* Use new timeout only if there wasn't a older one earlier. */
338 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
339 /* If delack timer was blocked or is about to expire,
340 * send ACK now.
341 *
342 * FIXME: check the "about to expire" part
343 */
344 if (icsk->icsk_ack.blocked) {
345 dccp_send_ack(sk);
346 return;
347 }
348
349 if (!time_before(timeout, icsk->icsk_ack.timeout))
350 timeout = icsk->icsk_ack.timeout;
351 }
352 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
353 icsk->icsk_ack.timeout = timeout;
354 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
355}
356
357void dccp_send_sync(struct sock *sk, u64 seq)
358{
359 /*
360 * We are not putting this on the write queue, so
361 * dccp_transmit_skb() will set the ownership to this
362 * sock.
363 */
364 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
365
366 if (skb == NULL)
367 /* FIXME: how to make sure the sync is sent? */
368 return;
369
370 /* Reserve space for headers and prepare control bits. */
371 skb_reserve(skb, MAX_DCCP_HEADER);
372 skb->csum = 0;
373 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC;
374 DCCP_SKB_CB(skb)->dccpd_seq = seq;
375
376 skb_set_owner_w(skb, sk);
377 dccp_transmit_skb(sk, skb);
378}
379
380/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be
381 * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances.
382 */
383void dccp_send_close(struct sock *sk)
384{
385 struct dccp_sock *dp = dccp_sk(sk);
386 struct sk_buff *skb;
387
388 /* Socket is locked, keep trying until memory is available. */
389 for (;;) {
390 skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL);
391 if (skb != NULL)
392 break;
393 yield();
394 }
395
396 /* Reserve space for headers and prepare control bits. */
397 skb_reserve(skb, sk->sk_prot->max_header);
398 skb->csum = 0;
399 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
400
401 skb_set_owner_w(skb, sk);
402 dccp_transmit_skb(sk, skb);
403
404 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
405 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
406}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644
index 000000000000..70284e6afe05
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,818 @@
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
27#include <net/ip.h>
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
37#include <linux/dccp.h>
38
39#include "ccid.h"
40#include "dccp.h"
41
42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics);
43
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49};
50
51const char *dccp_packet_name(const int type)
52{
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70}
71
72EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74const char *dccp_state_name(const int state)
75{
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91}
92
93EXPORT_SYMBOL_GPL(dccp_state_name);
94
95static inline int dccp_listen_start(struct sock *sk)
96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99}
100
101int dccp_disconnect(struct sock *sk, int flags)
102{
103 struct inet_connection_sock *icsk = inet_csk(sk);
104 struct inet_sock *inet = inet_sk(sk);
105 int err = 0;
106 const int old_state = sk->sk_state;
107
108 if (old_state != DCCP_CLOSED)
109 dccp_set_state(sk, DCCP_CLOSED);
110
111 /* ABORT function of RFC793 */
112 if (old_state == DCCP_LISTEN) {
113 inet_csk_listen_stop(sk);
114 /* FIXME: do the active reset thing */
115 } else if (old_state == DCCP_REQUESTING)
116 sk->sk_err = ECONNRESET;
117
118 dccp_clear_xmit_timers(sk);
119 __skb_queue_purge(&sk->sk_receive_queue);
120 if (sk->sk_send_head != NULL) {
121 __kfree_skb(sk->sk_send_head);
122 sk->sk_send_head = NULL;
123 }
124
125 inet->dport = 0;
126
127 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128 inet_reset_saddr(sk);
129
130 sk->sk_shutdown = 0;
131 sock_reset_flag(sk, SOCK_DONE);
132
133 icsk->icsk_backoff = 0;
134 inet_csk_delack_init(sk);
135 __sk_dst_reset(sk);
136
137 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139 sk->sk_error_report(sk);
140 return err;
141}
142
143int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
144{
145 dccp_pr_debug("entry\n");
146 return -ENOIOCTLCMD;
147}
148
149int dccp_setsockopt(struct sock *sk, int level, int optname,
150 char *optval, int optlen)
151{
152 dccp_pr_debug("entry\n");
153
154 if (level != SOL_DCCP)
155 return ip_setsockopt(sk, level, optname, optval, optlen);
156
157 return -EOPNOTSUPP;
158}
159
160int dccp_getsockopt(struct sock *sk, int level, int optname,
161 char *optval, int *optlen)
162{
163 dccp_pr_debug("entry\n");
164
165 if (level != SOL_DCCP)
166 return ip_getsockopt(sk, level, optname, optval, optlen);
167
168 return -EOPNOTSUPP;
169}
170
171int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
172 size_t len)
173{
174 const struct dccp_sock *dp = dccp_sk(sk);
175 const int flags = msg->msg_flags;
176 const int noblock = flags & MSG_DONTWAIT;
177 struct sk_buff *skb;
178 int rc, size;
179 long timeo;
180
181 if (len > dp->dccps_mss_cache)
182 return -EMSGSIZE;
183
184 lock_sock(sk);
185
186 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
187
188 /*
189 * We have to use sk_stream_wait_connect here to set sk_write_pending,
190 * so that the trick in dccp_rcv_request_sent_state_process.
191 */
192 /* Wait for a connection to finish. */
193 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
194 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
195 goto out_err;
196
197 size = sk->sk_prot->max_header + len;
198 release_sock(sk);
199 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
200 lock_sock(sk);
201
202 if (skb == NULL)
203 goto out_release;
204
205 skb_reserve(skb, sk->sk_prot->max_header);
206 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
207 if (rc == 0) {
208 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
209 const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
210 long delay;
211
212 /*
213 * XXX: This is just to match the Waikato tree CA interaction
214 * points, after the CCID3 code is stable and I have a better
215 * understanding of behaviour I'll change this to look more like
216 * TCP.
217 */
218 while (1) {
219 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk,
220 skb, len, &delay);
221 if (rc == 0)
222 break;
223 if (rc != -EAGAIN)
224 goto out_discard;
225 if (delay > timeo)
226 goto out_discard;
227 release_sock(sk);
228 delay = schedule_timeout(delay);
229 lock_sock(sk);
230 timeo -= delay;
231 if (signal_pending(current))
232 goto out_interrupted;
233 rc = -EPIPE;
234 if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN))
235 goto out_discard;
236 }
237
238 if (sk->sk_state == DCCP_PARTOPEN) {
239 /* See 8.1.5. Handshake Completion */
240 inet_csk_schedule_ack(sk);
241 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
242 dcb->dccpd_type = DCCP_PKT_DATAACK;
243 /* FIXME: we really should have a dccps_ack_pending or use icsk */
244 } else if (inet_csk_ack_scheduled(sk) ||
245 (dp->dccps_options.dccpo_send_ack_vector &&
246 ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
247 ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
248 dcb->dccpd_type = DCCP_PKT_DATAACK;
249 else
250 dcb->dccpd_type = DCCP_PKT_DATA;
251 dccp_transmit_skb(sk, skb);
252 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
253 } else {
254out_discard:
255 kfree_skb(skb);
256 }
257out_release:
258 release_sock(sk);
259 return rc ? : len;
260out_err:
261 rc = sk_stream_error(sk, flags, rc);
262 goto out_release;
263out_interrupted:
264 rc = sock_intr_errno(timeo);
265 goto out_discard;
266}
267
268EXPORT_SYMBOL(dccp_sendmsg);
269
270int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
271 size_t len, int nonblock, int flags, int *addr_len)
272{
273 const struct dccp_hdr *dh;
274 int copied = 0;
275 unsigned long used;
276 int err;
277 int target; /* Read at least this many bytes */
278 long timeo;
279
280 lock_sock(sk);
281
282 err = -ENOTCONN;
283 if (sk->sk_state == DCCP_LISTEN)
284 goto out;
285
286 timeo = sock_rcvtimeo(sk, nonblock);
287
288 /* Urgent data needs to be handled specially. */
289 if (flags & MSG_OOB)
290 goto recv_urg;
291
292 /* FIXME */
293#if 0
294 seq = &tp->copied_seq;
295 if (flags & MSG_PEEK) {
296 peek_seq = tp->copied_seq;
297 seq = &peek_seq;
298 }
299#endif
300
301 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
302
303 do {
304 struct sk_buff *skb;
305 u32 offset;
306
307 /* FIXME */
308#if 0
309 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
310 if (tp->urg_data && tp->urg_seq == *seq) {
311 if (copied)
312 break;
313 if (signal_pending(current)) {
314 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
315 break;
316 }
317 }
318#endif
319
320 /* Next get a buffer. */
321
322 skb = skb_peek(&sk->sk_receive_queue);
323 do {
324 if (!skb)
325 break;
326
327 offset = 0;
328 dh = dccp_hdr(skb);
329
330 if (dh->dccph_type == DCCP_PKT_DATA ||
331 dh->dccph_type == DCCP_PKT_DATAACK)
332 goto found_ok_skb;
333
334 if (dh->dccph_type == DCCP_PKT_RESET ||
335 dh->dccph_type == DCCP_PKT_CLOSE) {
336 dccp_pr_debug("found fin ok!\n");
337 goto found_fin_ok;
338 }
339 dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type));
340 BUG_TRAP(flags & MSG_PEEK);
341 skb = skb->next;
342 } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
343
344 /* Well, if we have backlog, try to process it now yet. */
345 if (copied >= target && !sk->sk_backlog.tail)
346 break;
347
348 if (copied) {
349 if (sk->sk_err ||
350 sk->sk_state == DCCP_CLOSED ||
351 (sk->sk_shutdown & RCV_SHUTDOWN) ||
352 !timeo ||
353 signal_pending(current) ||
354 (flags & MSG_PEEK))
355 break;
356 } else {
357 if (sock_flag(sk, SOCK_DONE))
358 break;
359
360 if (sk->sk_err) {
361 copied = sock_error(sk);
362 break;
363 }
364
365 if (sk->sk_shutdown & RCV_SHUTDOWN)
366 break;
367
368 if (sk->sk_state == DCCP_CLOSED) {
369 if (!sock_flag(sk, SOCK_DONE)) {
370 /* This occurs when user tries to read
371 * from never connected socket.
372 */
373 copied = -ENOTCONN;
374 break;
375 }
376 break;
377 }
378
379 if (!timeo) {
380 copied = -EAGAIN;
381 break;
382 }
383
384 if (signal_pending(current)) {
385 copied = sock_intr_errno(timeo);
386 break;
387 }
388 }
389
390 /* FIXME: cleanup_rbuf(sk, copied); */
391
392 if (copied >= target) {
393 /* Do not sleep, just process backlog. */
394 release_sock(sk);
395 lock_sock(sk);
396 } else
397 sk_wait_data(sk, &timeo);
398
399 continue;
400
401 found_ok_skb:
402 /* Ok so how much can we use? */
403 used = skb->len - offset;
404 if (len < used)
405 used = len;
406
407 if (!(flags & MSG_TRUNC)) {
408 err = skb_copy_datagram_iovec(skb, offset,
409 msg->msg_iov, used);
410 if (err) {
411 /* Exception. Bailout! */
412 if (!copied)
413 copied = -EFAULT;
414 break;
415 }
416 }
417
418 copied += used;
419 len -= used;
420
421 /* FIXME: tcp_rcv_space_adjust(sk); */
422
423//skip_copy:
424 if (used + offset < skb->len)
425 continue;
426
427 if (!(flags & MSG_PEEK))
428 sk_eat_skb(sk, skb);
429 continue;
430 found_fin_ok:
431 if (!(flags & MSG_PEEK))
432 sk_eat_skb(sk, skb);
433 break;
434
435 } while (len > 0);
436
437 /* According to UNIX98, msg_name/msg_namelen are ignored
438 * on connected socket. I was just happy when found this 8) --ANK
439 */
440
441 /* Clean up data we have read: This will do ACK frames. */
442 /* FIXME: cleanup_rbuf(sk, copied); */
443
444 release_sock(sk);
445 return copied;
446
447out:
448 release_sock(sk);
449 return err;
450
451recv_urg:
452 /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */
453 goto out;
454}
455
456static int inet_dccp_listen(struct socket *sock, int backlog)
457{
458 struct sock *sk = sock->sk;
459 unsigned char old_state;
460 int err;
461
462 lock_sock(sk);
463
464 err = -EINVAL;
465 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
466 goto out;
467
468 old_state = sk->sk_state;
469 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
470 goto out;
471
472 /* Really, if the socket is already in listen state
473 * we can only allow the backlog to be adjusted.
474 */
475 if (old_state != DCCP_LISTEN) {
476 /*
477 * FIXME: here it probably should be sk->sk_prot->listen_start
478 * see tcp_listen_start
479 */
480 err = dccp_listen_start(sk);
481 if (err)
482 goto out;
483 }
484 sk->sk_max_ack_backlog = backlog;
485 err = 0;
486
487out:
488 release_sock(sk);
489 return err;
490}
491
492static const unsigned char dccp_new_state[] = {
493 /* current state: new state: action: */
494 [0] = DCCP_CLOSED,
495 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
496 [DCCP_REQUESTING] = DCCP_CLOSED,
497 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
498 [DCCP_LISTEN] = DCCP_CLOSED,
499 [DCCP_RESPOND] = DCCP_CLOSED,
500 [DCCP_CLOSING] = DCCP_CLOSED,
501 [DCCP_TIME_WAIT] = DCCP_CLOSED,
502 [DCCP_CLOSED] = DCCP_CLOSED,
503};
504
505static int dccp_close_state(struct sock *sk)
506{
507 const int next = dccp_new_state[sk->sk_state];
508 const int ns = next & DCCP_STATE_MASK;
509
510 if (ns != sk->sk_state)
511 dccp_set_state(sk, ns);
512
513 return next & DCCP_ACTION_FIN;
514}
515
516void dccp_close(struct sock *sk, long timeout)
517{
518 struct sk_buff *skb;
519
520 lock_sock(sk);
521
522 sk->sk_shutdown = SHUTDOWN_MASK;
523
524 if (sk->sk_state == DCCP_LISTEN) {
525 dccp_set_state(sk, DCCP_CLOSED);
526
527 /* Special case. */
528 inet_csk_listen_stop(sk);
529
530 goto adjudge_to_death;
531 }
532
533 /*
534 * We need to flush the recv. buffs. We do this only on the
535 * descriptor close, not protocol-sourced closes, because the
536 *reader process may not have drained the data yet!
537 */
538 /* FIXME: check for unread data */
539 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
540 __kfree_skb(skb);
541 }
542
543 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
544 /* Check zero linger _after_ checking for unread data. */
545 sk->sk_prot->disconnect(sk, 0);
546 } else if (dccp_close_state(sk)) {
547 dccp_send_close(sk);
548 }
549
550 sk_stream_wait_close(sk, timeout);
551
552adjudge_to_death:
553 release_sock(sk);
554 /*
555 * Now socket is owned by kernel and we acquire BH lock
556 * to finish close. No need to check for user refs.
557 */
558 local_bh_disable();
559 bh_lock_sock(sk);
560 BUG_TRAP(!sock_owned_by_user(sk));
561
562 sock_hold(sk);
563 sock_orphan(sk);
564
565 if (sk->sk_state != DCCP_CLOSED)
566 dccp_set_state(sk, DCCP_CLOSED);
567
568 atomic_inc(&dccp_orphan_count);
569 if (sk->sk_state == DCCP_CLOSED)
570 inet_csk_destroy_sock(sk);
571
572 /* Otherwise, socket is reprieved until protocol close. */
573
574 bh_unlock_sock(sk);
575 local_bh_enable();
576 sock_put(sk);
577}
578
579void dccp_shutdown(struct sock *sk, int how)
580{
581 dccp_pr_debug("entry\n");
582}
583
584struct proto_ops inet_dccp_ops = {
585 .family = PF_INET,
586 .owner = THIS_MODULE,
587 .release = inet_release,
588 .bind = inet_bind,
589 .connect = inet_stream_connect,
590 .socketpair = sock_no_socketpair,
591 .accept = inet_accept,
592 .getname = inet_getname,
593 .poll = sock_no_poll,
594 .ioctl = inet_ioctl,
595 .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */
596 .shutdown = inet_shutdown,
597 .setsockopt = sock_common_setsockopt,
598 .getsockopt = sock_common_getsockopt,
599 .sendmsg = inet_sendmsg,
600 .recvmsg = sock_common_recvmsg,
601 .mmap = sock_no_mmap,
602 .sendpage = sock_no_sendpage,
603};
604
605extern struct net_proto_family inet_family_ops;
606
607static struct inet_protosw dccp_v4_protosw = {
608 .type = SOCK_DCCP,
609 .protocol = IPPROTO_DCCP,
610 .prot = &dccp_v4_prot,
611 .ops = &inet_dccp_ops,
612 .capability = -1,
613 .no_check = 0,
614 .flags = 0,
615};
616
617/*
618 * This is the global socket data structure used for responding to
619 * the Out-of-the-blue (OOTB) packets. A control sock will be created
620 * for this socket at the initialization time.
621 */
622struct socket *dccp_ctl_socket;
623
624static char dccp_ctl_socket_err_msg[] __initdata =
625 KERN_ERR "DCCP: Failed to create the control socket.\n";
626
627static int __init dccp_ctl_sock_init(void)
628{
629 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
630 &dccp_ctl_socket);
631 if (rc < 0)
632 printk(dccp_ctl_socket_err_msg);
633 else {
634 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
635 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
636
637 /* Unhash it so that IP input processing does not even
638 * see it, we do not wish this socket to see incoming
639 * packets.
640 */
641 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
642 }
643
644 return rc;
645}
646
647static void __exit dccp_ctl_sock_exit(void)
648{
649 if (dccp_ctl_socket != NULL)
650 sock_release(dccp_ctl_socket);
651}
652
653static int __init init_dccp_v4_mibs(void)
654{
655 int rc = -ENOMEM;
656
657 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
658 if (dccp_statistics[0] == NULL)
659 goto out;
660
661 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
662 if (dccp_statistics[1] == NULL)
663 goto out_free_one;
664
665 rc = 0;
666out:
667 return rc;
668out_free_one:
669 free_percpu(dccp_statistics[0]);
670 dccp_statistics[0] = NULL;
671 goto out;
672
673}
674
675static int thash_entries;
676module_param(thash_entries, int, 0444);
677MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
678
679int dccp_debug;
680module_param(dccp_debug, int, 0444);
681MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
682
683static int __init dccp_init(void)
684{
685 unsigned long goal;
686 int ehash_order, bhash_order, i;
687 int rc = proto_register(&dccp_v4_prot, 1);
688
689 if (rc)
690 goto out;
691
692 dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket",
693 sizeof(struct inet_bind_bucket),
694 0, SLAB_HWCACHE_ALIGN,
695 NULL, NULL);
696 if (!dccp_hashinfo.bind_bucket_cachep)
697 goto out_proto_unregister;
698
699 /*
700 * Size and allocate the main established and bind bucket
701 * hash tables.
702 *
703 * The methodology is similar to that of the buffer cache.
704 */
705 if (num_physpages >= (128 * 1024))
706 goal = num_physpages >> (21 - PAGE_SHIFT);
707 else
708 goal = num_physpages >> (23 - PAGE_SHIFT);
709
710 if (thash_entries)
711 goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
712 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
713 ;
714 do {
715 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
716 sizeof(struct inet_ehash_bucket);
717 dccp_hashinfo.ehash_size >>= 1;
718 while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1))
719 dccp_hashinfo.ehash_size--;
720 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
721 __get_free_pages(GFP_ATOMIC, ehash_order);
722 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
723
724 if (!dccp_hashinfo.ehash) {
725 printk(KERN_CRIT "Failed to allocate DCCP "
726 "established hash table\n");
727 goto out_free_bind_bucket_cachep;
728 }
729
730 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
731 rwlock_init(&dccp_hashinfo.ehash[i].lock);
732 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
733 }
734
735 bhash_order = ehash_order;
736
737 do {
738 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
739 sizeof(struct inet_bind_hashbucket);
740 if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0)
741 continue;
742 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
743 __get_free_pages(GFP_ATOMIC, bhash_order);
744 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
745
746 if (!dccp_hashinfo.bhash) {
747 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
748 goto out_free_dccp_ehash;
749 }
750
751 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
752 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
753 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
754 }
755
756 if (init_dccp_v4_mibs())
757 goto out_free_dccp_bhash;
758
759 rc = -EAGAIN;
760 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
761 goto out_free_dccp_v4_mibs;
762
763 inet_register_protosw(&dccp_v4_protosw);
764
765 rc = dccp_ctl_sock_init();
766 if (rc)
767 goto out_unregister_protosw;
768out:
769 return rc;
770out_unregister_protosw:
771 inet_unregister_protosw(&dccp_v4_protosw);
772 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
773out_free_dccp_v4_mibs:
774 free_percpu(dccp_statistics[0]);
775 free_percpu(dccp_statistics[1]);
776 dccp_statistics[0] = dccp_statistics[1] = NULL;
777out_free_dccp_bhash:
778 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
779 dccp_hashinfo.bhash = NULL;
780out_free_dccp_ehash:
781 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
782 dccp_hashinfo.ehash = NULL;
783out_free_bind_bucket_cachep:
784 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
785 dccp_hashinfo.bind_bucket_cachep = NULL;
786out_proto_unregister:
787 proto_unregister(&dccp_v4_prot);
788 goto out;
789}
790
791static const char dccp_del_proto_err_msg[] __exitdata =
792 KERN_ERR "can't remove dccp net_protocol\n";
793
794static void __exit dccp_fini(void)
795{
796 dccp_ctl_sock_exit();
797
798 inet_unregister_protosw(&dccp_v4_protosw);
799
800 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
801 printk(dccp_del_proto_err_msg);
802
803 /* Free the control endpoint. */
804 sock_release(dccp_ctl_socket);
805
806 proto_unregister(&dccp_v4_prot);
807
808 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
809}
810
811module_init(dccp_init);
812module_exit(dccp_fini);
813
814/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */
815MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6");
816MODULE_LICENSE("GPL");
817MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
818MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644
index 000000000000..8c396ee01aac
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,249 @@
1/*
2 * net/dccp/timer.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include "dccp.h"
18
19static void dccp_write_timer(unsigned long data);
20static void dccp_keepalive_timer(unsigned long data);
21static void dccp_delack_timer(unsigned long data);
22
23void dccp_init_xmit_timers(struct sock *sk)
24{
25 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
26 &dccp_keepalive_timer);
27}
28
29static void dccp_write_err(struct sock *sk)
30{
31 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
32 sk->sk_error_report(sk);
33
34 dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
35 dccp_done(sk);
36 DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
37}
38
39/* A write timeout has occurred. Process the after effects. */
40static int dccp_write_timeout(struct sock *sk)
41{
42 const struct inet_connection_sock *icsk = inet_csk(sk);
43 int retry_until;
44
45 if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
46 if (icsk->icsk_retransmits != 0)
47 dst_negative_advice(&sk->sk_dst_cache);
48 retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
49 } else {
50 if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
51 /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
52 hole detection. :-(
53
54 It is place to make it. It is not made. I do not want
55 to make it. It is disguisting. It does not work in any
56 case. Let me to cite the same draft, which requires for
57 us to implement this:
58
59 "The one security concern raised by this memo is that ICMP black holes
60 are often caused by over-zealous security administrators who block
61 all ICMP messages. It is vitally important that those who design and
62 deploy security systems understand the impact of strict filtering on
63 upper-layer protocols. The safest web site in the world is worthless
64 if most TCP implementations cannot transfer data from it. It would
65 be far nicer to have all of the black holes fixed rather than fixing
66 all of the TCP implementations."
67
68 Golden words :-).
69 */
70
71 dst_negative_advice(&sk->sk_dst_cache);
72 }
73
74 retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
75 /*
76 * FIXME: see tcp_write_timout and tcp_out_of_resources
77 */
78 }
79
80 if (icsk->icsk_retransmits >= retry_until) {
81 /* Has it gone just too far? */
82 dccp_write_err(sk);
83 return 1;
84 }
85 return 0;
86}
87
88/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
89static void dccp_delack_timer(unsigned long data)
90{
91 struct sock *sk = (struct sock *)data;
92 struct inet_connection_sock *icsk = inet_csk(sk);
93
94 bh_lock_sock(sk);
95 if (sock_owned_by_user(sk)) {
96 /* Try again later. */
97 icsk->icsk_ack.blocked = 1;
98 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
99 sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
100 goto out;
101 }
102
103 if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
104 goto out;
105 if (time_after(icsk->icsk_ack.timeout, jiffies)) {
106 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
107 goto out;
108 }
109
110 icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
111
112 if (inet_csk_ack_scheduled(sk)) {
113 if (!icsk->icsk_ack.pingpong) {
114 /* Delayed ACK missed: inflate ATO. */
115 icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
116 } else {
117 /* Delayed ACK missed: leave pingpong mode and
118 * deflate ATO.
119 */
120 icsk->icsk_ack.pingpong = 0;
121 icsk->icsk_ack.ato = TCP_ATO_MIN;
122 }
123 dccp_send_ack(sk);
124 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
125 }
126out:
127 bh_unlock_sock(sk);
128 sock_put(sk);
129}
130
131/*
132 * The DCCP retransmit timer.
133 */
134static void dccp_retransmit_timer(struct sock *sk)
135{
136 struct inet_connection_sock *icsk = inet_csk(sk);
137
138 /*
139 * sk->sk_send_head has to have one skb with
140 * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
141 * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake
142 * (PARTOPEN timer), etc).
143 */
144 BUG_TRAP(sk->sk_send_head != NULL);
145
146 /*
147 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
148 * sent, no need to retransmit, this sock is dead.
149 */
150 if (dccp_write_timeout(sk))
151 goto out;
152
153 /*
154 * We want to know the number of packets retransmitted, not the
155 * total number of retransmissions of clones of original packets.
156 */
157 if (icsk->icsk_retransmits == 0)
158 DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
159
160 if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
161 /*
162 * Retransmission failed because of local congestion,
163 * do not backoff.
164 */
165 if (icsk->icsk_retransmits == 0)
166 icsk->icsk_retransmits = 1;
167 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
168 min(icsk->icsk_rto,
169 TCP_RESOURCE_PROBE_INTERVAL),
170 TCP_RTO_MAX);
171 goto out;
172 }
173
174 icsk->icsk_backoff++;
175 icsk->icsk_retransmits++;
176
177 icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
178 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
179 if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
180 __sk_dst_reset(sk);
181out:;
182}
183
184static void dccp_write_timer(unsigned long data)
185{
186 struct sock *sk = (struct sock *)data;
187 struct inet_connection_sock *icsk = inet_csk(sk);
188 int event = 0;
189
190 bh_lock_sock(sk);
191 if (sock_owned_by_user(sk)) {
192 /* Try again later */
193 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
194 goto out;
195 }
196
197 if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
198 goto out;
199
200 if (time_after(icsk->icsk_timeout, jiffies)) {
201 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
202 goto out;
203 }
204
205 event = icsk->icsk_pending;
206 icsk->icsk_pending = 0;
207
208 switch (event) {
209 case ICSK_TIME_RETRANS:
210 dccp_retransmit_timer(sk);
211 break;
212 }
213out:
214 bh_unlock_sock(sk);
215 sock_put(sk);
216}
217
218/*
219 * Timer for listening sockets
220 */
221static void dccp_response_timer(struct sock *sk)
222{
223 struct inet_connection_sock *icsk = inet_csk(sk);
224 const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
225
226 reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
227 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
228}
229
230static void dccp_keepalive_timer(unsigned long data)
231{
232 struct sock *sk = (struct sock *)data;
233
234 /* Only process if socket is not in use. */
235 bh_lock_sock(sk);
236 if (sock_owned_by_user(sk)) {
237 /* Try again later. */
238 inet_csk_reset_keepalive_timer(sk, HZ / 20);
239 goto out;
240 }
241
242 if (sk->sk_state == DCCP_LISTEN) {
243 dccp_response_timer(sk);
244 goto out;
245 }
246out:
247 bh_unlock_sock(sk);
248 sock_put(sk);
249}