aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig50
-rw-r--r--net/dccp/Makefile10
-rw-r--r--net/dccp/ccid.c139
-rw-r--r--net/dccp/ccid.h180
-rw-r--r--net/dccp/ccids/Kconfig29
-rw-r--r--net/dccp/ccids/Makefile5
-rw-r--r--net/dccp/ccids/ccid3.c1221
-rw-r--r--net/dccp/ccids/ccid3.h137
-rw-r--r--net/dccp/ccids/lib/Makefile3
-rw-r--r--net/dccp/ccids/lib/loss_interval.c144
-rw-r--r--net/dccp/ccids/lib/loss_interval.h61
-rw-r--r--net/dccp/ccids/lib/packet_history.c398
-rw-r--r--net/dccp/ccids/lib/packet_history.h199
-rw-r--r--net/dccp/ccids/lib/tfrc.h22
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c644
-rw-r--r--net/dccp/dccp.h493
-rw-r--r--net/dccp/diag.c71
-rw-r--r--net/dccp/input.c600
-rw-r--r--net/dccp/ipv4.c1356
-rw-r--r--net/dccp/minisocks.c264
-rw-r--r--net/dccp/options.c855
-rw-r--r--net/dccp/output.c528
-rw-r--r--net/dccp/proto.c826
-rw-r--r--net/dccp/timer.c255
24 files changed, 8490 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644
index 000000000000..187ac182e24b
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,50 @@
1menu "DCCP Configuration (EXPERIMENTAL)"
2 depends on INET && EXPERIMENTAL
3
4config IP_DCCP
5 tristate "The DCCP Protocol (EXPERIMENTAL)"
6 ---help---
7 Datagram Congestion Control Protocol
8
9 From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
10
11 The Datagram Congestion Control Protocol (DCCP) is a transport
12 protocol that implements bidirectional, unicast connections of
13 congestion-controlled, unreliable datagrams. It should be suitable
14 for use by applications such as streaming media, Internet telephony,
15 and on-line games
16
17 To compile this protocol support as a module, choose M here: the
18 module will be called dccp.
19
20 If in doubt, say N.
21
22config INET_DCCP_DIAG
23 depends on IP_DCCP && INET_DIAG
24 def_tristate y if (IP_DCCP = y && INET_DIAG = y)
25 def_tristate m
26
27source "net/dccp/ccids/Kconfig"
28
29menu "DCCP Kernel Hacking"
30 depends on IP_DCCP && DEBUG_KERNEL=y
31
32config IP_DCCP_DEBUG
33 bool "DCCP debug messages"
34 ---help---
35 Only use this if you're hacking DCCP.
36
37 Just say N.
38
39config IP_DCCP_UNLOAD_HACK
40 depends on IP_DCCP=m && IP_DCCP_CCID3=m
41 bool "DCCP control sock unload hack"
42 ---help---
43 Enable this to be able to unload the dccp module when the it
44 has only one refcount held, the control sock one. Just execute
45 "rmmod dccp_ccid3 dccp"
46
47 Just say N.
48endmenu
49
50endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644
index 000000000000..fb97bb042455
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o
2
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
4 timer.o
5
6obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
7
8dccp_diag-y := diag.o
9
10obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644
index 000000000000..9d8fc0e289ea
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
1/*
2 * net/dccp/ccid.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * CCID infrastructure
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "ccid.h"
15
16static struct ccid *ccids[CCID_MAX];
17#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
18static atomic_t ccids_lockct = ATOMIC_INIT(0);
19static DEFINE_SPINLOCK(ccids_lock);
20
21/*
22 * The strategy is: modifications ccids vector are short, do not sleep and
23 * veeery rare, but read access should be free of any exclusive locks.
24 */
25static void ccids_write_lock(void)
26{
27 spin_lock(&ccids_lock);
28 while (atomic_read(&ccids_lockct) != 0) {
29 spin_unlock(&ccids_lock);
30 yield();
31 spin_lock(&ccids_lock);
32 }
33}
34
35static inline void ccids_write_unlock(void)
36{
37 spin_unlock(&ccids_lock);
38}
39
40static inline void ccids_read_lock(void)
41{
42 atomic_inc(&ccids_lockct);
43 spin_unlock_wait(&ccids_lock);
44}
45
46static inline void ccids_read_unlock(void)
47{
48 atomic_dec(&ccids_lockct);
49}
50
51#else
52#define ccids_write_lock() do { } while(0)
53#define ccids_write_unlock() do { } while(0)
54#define ccids_read_lock() do { } while(0)
55#define ccids_read_unlock() do { } while(0)
56#endif
57
58int ccid_register(struct ccid *ccid)
59{
60 int err;
61
62 if (ccid->ccid_init == NULL)
63 return -1;
64
65 ccids_write_lock();
66 err = -EEXIST;
67 if (ccids[ccid->ccid_id] == NULL) {
68 ccids[ccid->ccid_id] = ccid;
69 err = 0;
70 }
71 ccids_write_unlock();
72 if (err == 0)
73 pr_info("CCID: Registered CCID %d (%s)\n",
74 ccid->ccid_id, ccid->ccid_name);
75 return err;
76}
77
78EXPORT_SYMBOL_GPL(ccid_register);
79
80int ccid_unregister(struct ccid *ccid)
81{
82 ccids_write_lock();
83 ccids[ccid->ccid_id] = NULL;
84 ccids_write_unlock();
85 pr_info("CCID: Unregistered CCID %d (%s)\n",
86 ccid->ccid_id, ccid->ccid_name);
87 return 0;
88}
89
90EXPORT_SYMBOL_GPL(ccid_unregister);
91
92struct ccid *ccid_init(unsigned char id, struct sock *sk)
93{
94 struct ccid *ccid;
95
96#ifdef CONFIG_KMOD
97 if (ccids[id] == NULL)
98 request_module("net-dccp-ccid-%d", id);
99#endif
100 ccids_read_lock();
101
102 ccid = ccids[id];
103 if (ccid == NULL)
104 goto out;
105
106 if (!try_module_get(ccid->ccid_owner))
107 goto out_err;
108
109 if (ccid->ccid_init(sk) != 0)
110 goto out_module_put;
111out:
112 ccids_read_unlock();
113 return ccid;
114out_module_put:
115 module_put(ccid->ccid_owner);
116out_err:
117 ccid = NULL;
118 goto out;
119}
120
121EXPORT_SYMBOL_GPL(ccid_init);
122
123void ccid_exit(struct ccid *ccid, struct sock *sk)
124{
125 if (ccid == NULL)
126 return;
127
128 ccids_read_lock();
129
130 if (ccids[ccid->ccid_id] != NULL) {
131 if (ccid->ccid_exit != NULL)
132 ccid->ccid_exit(sk);
133 module_put(ccid->ccid_owner);
134 }
135
136 ccids_read_unlock();
137}
138
139EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644
index 000000000000..962f1e9e2f7e
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,180 @@
1#ifndef _CCID_H
2#define _CCID_H
3/*
4 * net/dccp/ccid.h
5 *
6 * An implementation of the DCCP protocol
7 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 *
9 * CCID infrastructure
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <net/sock.h>
17#include <linux/dccp.h>
18#include <linux/list.h>
19#include <linux/module.h>
20
21#define CCID_MAX 255
22
23struct ccid {
24 unsigned char ccid_id;
25 const char *ccid_name;
26 struct module *ccid_owner;
27 int (*ccid_init)(struct sock *sk);
28 void (*ccid_exit)(struct sock *sk);
29 int (*ccid_hc_rx_init)(struct sock *sk);
30 int (*ccid_hc_tx_init)(struct sock *sk);
31 void (*ccid_hc_rx_exit)(struct sock *sk);
32 void (*ccid_hc_tx_exit)(struct sock *sk);
33 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
34 struct sk_buff *skb);
35 int (*ccid_hc_rx_parse_options)(struct sock *sk,
36 unsigned char option,
37 unsigned char len, u16 idx,
38 unsigned char* value);
39 void (*ccid_hc_rx_insert_options)(struct sock *sk,
40 struct sk_buff *skb);
41 void (*ccid_hc_tx_insert_options)(struct sock *sk,
42 struct sk_buff *skb);
43 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
44 struct sk_buff *skb);
45 int (*ccid_hc_tx_parse_options)(struct sock *sk,
46 unsigned char option,
47 unsigned char len, u16 idx,
48 unsigned char* value);
49 int (*ccid_hc_tx_send_packet)(struct sock *sk,
50 struct sk_buff *skb, int len);
51 void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
52 int len);
53 void (*ccid_hc_rx_get_info)(struct sock *sk,
54 struct tcp_info *info);
55 void (*ccid_hc_tx_get_info)(struct sock *sk,
56 struct tcp_info *info);
57};
58
59extern int ccid_register(struct ccid *ccid);
60extern int ccid_unregister(struct ccid *ccid);
61
62extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
63extern void ccid_exit(struct ccid *ccid, struct sock *sk);
64
65static inline void __ccid_get(struct ccid *ccid)
66{
67 __module_get(ccid->ccid_owner);
68}
69
70static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
71 struct sk_buff *skb, int len)
72{
73 int rc = 0;
74 if (ccid->ccid_hc_tx_send_packet != NULL)
75 rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
76 return rc;
77}
78
79static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
80 int more, int len)
81{
82 if (ccid->ccid_hc_tx_packet_sent != NULL)
83 ccid->ccid_hc_tx_packet_sent(sk, more, len);
84}
85
86static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
87{
88 int rc = 0;
89 if (ccid->ccid_hc_rx_init != NULL)
90 rc = ccid->ccid_hc_rx_init(sk);
91 return rc;
92}
93
94static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
95{
96 int rc = 0;
97 if (ccid->ccid_hc_tx_init != NULL)
98 rc = ccid->ccid_hc_tx_init(sk);
99 return rc;
100}
101
102static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
103{
104 if (ccid->ccid_hc_rx_exit != NULL &&
105 dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
106 ccid->ccid_hc_rx_exit(sk);
107}
108
109static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
110{
111 if (ccid->ccid_hc_tx_exit != NULL &&
112 dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
113 ccid->ccid_hc_tx_exit(sk);
114}
115
116static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
117 struct sk_buff *skb)
118{
119 if (ccid->ccid_hc_rx_packet_recv != NULL)
120 ccid->ccid_hc_rx_packet_recv(sk, skb);
121}
122
123static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
124 struct sk_buff *skb)
125{
126 if (ccid->ccid_hc_tx_packet_recv != NULL)
127 ccid->ccid_hc_tx_packet_recv(sk, skb);
128}
129
130static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
131 unsigned char option,
132 unsigned char len, u16 idx,
133 unsigned char* value)
134{
135 int rc = 0;
136 if (ccid->ccid_hc_tx_parse_options != NULL)
137 rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
138 value);
139 return rc;
140}
141
142static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
143 unsigned char option,
144 unsigned char len, u16 idx,
145 unsigned char* value)
146{
147 int rc = 0;
148 if (ccid->ccid_hc_rx_parse_options != NULL)
149 rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
150 return rc;
151}
152
153static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
154 struct sk_buff *skb)
155{
156 if (ccid->ccid_hc_tx_insert_options != NULL)
157 ccid->ccid_hc_tx_insert_options(sk, skb);
158}
159
160static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
161 struct sk_buff *skb)
162{
163 if (ccid->ccid_hc_rx_insert_options != NULL)
164 ccid->ccid_hc_rx_insert_options(sk, skb);
165}
166
167static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
168 struct tcp_info *info)
169{
170 if (ccid->ccid_hc_rx_get_info != NULL)
171 ccid->ccid_hc_rx_get_info(sk, info);
172}
173
174static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
175 struct tcp_info *info)
176{
177 if (ccid->ccid_hc_tx_get_info != NULL)
178 ccid->ccid_hc_tx_get_info(sk, info);
179}
180#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644
index 000000000000..7684d83946a4
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,29 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL
3
4config IP_DCCP_CCID3
5 tristate "CCID3 (TFRC) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 ---help---
8 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
9 rate-controlled congestion control mechanism. TFRC is designed to
10 be reasonably fair when competing for bandwidth with TCP-like flows,
11 where a flow is "reasonably fair" if its sending rate is generally
12 within a factor of two of the sending rate of a TCP flow under the
13 same conditions. However, TFRC has a much lower variation of
14 throughput over time compared with TCP, which makes CCID 3 more
15 suitable than CCID 2 for applications such streaming media where a
16 relatively smooth sending rate is of importance.
17
18 CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
19 congestion control algorithms were initially described in RFC 3448.
20
21 This text was extracted from draft-ietf-dccp-spec-11.txt.
22
23 If in doubt, say M.
24
25config IP_DCCP_TFRC_LIB
26 depends on IP_DCCP_CCID3
27 def_tristate IP_DCCP_CCID3
28
29endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644
index 000000000000..956f79f50743
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
2
3dccp_ccid3-y := ccid3.o
4
5obj-y += lib/
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644
index 000000000000..7bf3b3a91e97
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,1221 @@
1/*
2 * net/dccp/ccids/ccid3.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 *
7 * An implementation of the DCCP protocol
8 *
9 * This code has been developed by the University of Waikato WAND
10 * research group. For further information please see http://www.wand.net.nz/
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#include <linux/config.h>
38#include "../ccid.h"
39#include "../dccp.h"
40#include "lib/packet_history.h"
41#include "lib/loss_interval.h"
42#include "lib/tfrc.h"
43#include "ccid3.h"
44
45/*
46 * Reason for maths with 10 here is to avoid 32 bit overflow when a is big.
47 */
48static inline u32 usecs_div(const u32 a, const u32 b)
49{
50 const u32 tmp = a * (USEC_PER_SEC / 10);
51 return b > 20 ? tmp / (b / 10) : tmp;
52}
53
54static int ccid3_debug;
55
56#ifdef CCID3_DEBUG
57#define ccid3_pr_debug(format, a...) \
58 do { if (ccid3_debug) \
59 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
60 } while (0)
61#else
62#define ccid3_pr_debug(format, a...)
63#endif
64
65static struct dccp_tx_hist *ccid3_tx_hist;
66static struct dccp_rx_hist *ccid3_rx_hist;
67static struct dccp_li_hist *ccid3_li_hist;
68
69static int ccid3_init(struct sock *sk)
70{
71 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
72 return 0;
73}
74
75static void ccid3_exit(struct sock *sk)
76{
77 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
78}
79
80/* TFRC sender states */
81enum ccid3_hc_tx_states {
82 TFRC_SSTATE_NO_SENT = 1,
83 TFRC_SSTATE_NO_FBACK,
84 TFRC_SSTATE_FBACK,
85 TFRC_SSTATE_TERM,
86};
87
88#ifdef CCID3_DEBUG
89static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
90{
91 static char *ccid3_state_names[] = {
92 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
93 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
94 [TFRC_SSTATE_FBACK] = "FBACK",
95 [TFRC_SSTATE_TERM] = "TERM",
96 };
97
98 return ccid3_state_names[state];
99}
100#endif
101
102static inline void ccid3_hc_tx_set_state(struct sock *sk,
103 enum ccid3_hc_tx_states state)
104{
105 struct dccp_sock *dp = dccp_sk(sk);
106 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
107 enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
108
109 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
110 dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
111 ccid3_tx_state_name(state));
112 WARN_ON(state == oldstate);
113 hctx->ccid3hctx_state = state;
114}
115
116/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
117static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
118{
119 /*
120 * If no feedback spec says t_ipi is 1 second (set elsewhere and then
121 * doubles after every no feedback timer (separate function)
122 */
123 if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
124 hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
125 hctx->ccid3hctx_x);
126}
127
128/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
129static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
130{
131 hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
132 TFRC_OPSYS_HALF_TIME_GRAN);
133}
134
135/*
136 * Update X by
137 * If (p > 0)
138 * x_calc = calcX(s, R, p);
139 * X = max(min(X_calc, 2 * X_recv), s / t_mbi);
140 * Else
141 * If (now - tld >= R)
142 * X = max(min(2 * X, 2 * X_recv), s / R);
143 * tld = now;
144 */
145static void ccid3_hc_tx_update_x(struct sock *sk)
146{
147 struct dccp_sock *dp = dccp_sk(sk);
148 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
149
150 /* To avoid large error in calcX */
151 if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
152 hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
153 hctx->ccid3hctx_rtt,
154 hctx->ccid3hctx_p);
155 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
156 2 * hctx->ccid3hctx_x_recv),
157 (hctx->ccid3hctx_s /
158 TFRC_MAX_BACK_OFF_TIME));
159 } else {
160 struct timeval now;
161
162 do_gettimeofday(&now);
163 if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
164 hctx->ccid3hctx_rtt) {
165 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
166 hctx->ccid3hctx_x) * 2,
167 usecs_div(hctx->ccid3hctx_s,
168 hctx->ccid3hctx_rtt));
169 hctx->ccid3hctx_t_ld = now;
170 }
171 }
172}
173
174static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
175{
176 struct sock *sk = (struct sock *)data;
177 struct dccp_sock *dp = dccp_sk(sk);
178 unsigned long next_tmout = 0;
179 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
180
181 bh_lock_sock(sk);
182 if (sock_owned_by_user(sk)) {
183 /* Try again later. */
184 /* XXX: set some sensible MIB */
185 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
186 jiffies + HZ / 5);
187 goto out;
188 }
189
190 ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
191 ccid3_tx_state_name(hctx->ccid3hctx_state));
192
193 switch (hctx->ccid3hctx_state) {
194 case TFRC_SSTATE_TERM:
195 goto out;
196 case TFRC_SSTATE_NO_FBACK:
197 /* Halve send rate */
198 hctx->ccid3hctx_x /= 2;
199 if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
200 TFRC_MAX_BACK_OFF_TIME))
201 hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
202 TFRC_MAX_BACK_OFF_TIME);
203
204 ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
205 "bytes/s\n",
206 dccp_role(sk), sk,
207 ccid3_tx_state_name(hctx->ccid3hctx_state),
208 hctx->ccid3hctx_x);
209 next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
210 hctx->ccid3hctx_x),
211 TFRC_INITIAL_TIMEOUT);
212 /*
213 * FIXME - not sure above calculation is correct. See section
214 * 5 of CCID3 11 should adjust tx_t_ipi and double that to
215 * achieve it really
216 */
217 break;
218 case TFRC_SSTATE_FBACK:
219 /*
220 * Check if IDLE since last timeout and recv rate is less than
221 * 4 packets per RTT
222 */
223 if (!hctx->ccid3hctx_idle ||
224 (hctx->ccid3hctx_x_recv >=
225 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
226 ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
227 dccp_role(sk), sk,
228 ccid3_tx_state_name(hctx->ccid3hctx_state));
229 /* Halve sending rate */
230
231 /* If (X_calc > 2 * X_recv)
232 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
233 * Else
234 * X_recv = X_calc / 4;
235 */
236 BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
237 hctx->ccid3hctx_x_calc == 0);
238
239 /* check also if p is zero -> x_calc is infinity? */
240 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
241 hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
242 hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
243 hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
244 else
245 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
246
247 /* Update sending rate */
248 ccid3_hc_tx_update_x(sk);
249 }
250 /*
251 * Schedule no feedback timer to expire in
252 * max(4 * R, 2 * s / X)
253 */
254 next_tmout = max_t(u32, hctx->ccid3hctx_t_rto,
255 2 * usecs_div(hctx->ccid3hctx_s,
256 hctx->ccid3hctx_x));
257 break;
258 default:
259 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
260 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
261 dump_stack();
262 goto out;
263 }
264
265 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
266 jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
267 hctx->ccid3hctx_idle = 1;
268out:
269 bh_unlock_sock(sk);
270 sock_put(sk);
271}
272
273static int ccid3_hc_tx_send_packet(struct sock *sk,
274 struct sk_buff *skb, int len)
275{
276 struct dccp_sock *dp = dccp_sk(sk);
277 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
278 struct dccp_tx_hist_entry *new_packet;
279 struct timeval now;
280 long delay;
281 int rc = -ENOTCONN;
282
283 /* Check if pure ACK or Terminating*/
284
285 /*
286 * XXX: We only call this function for DATA and DATAACK, on, these
287 * packets can have zero length, but why the comment about "pure ACK"?
288 */
289 if (hctx == NULL || len == 0 ||
290 hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
291 goto out;
292
293 /* See if last packet allocated was not sent */
294 new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
295 if (new_packet == NULL || new_packet->dccphtx_sent) {
296 new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
297 SLAB_ATOMIC);
298
299 rc = -ENOBUFS;
300 if (new_packet == NULL) {
301 ccid3_pr_debug("%s, sk=%p, not enough mem to add "
302 "to history, send refused\n",
303 dccp_role(sk), sk);
304 goto out;
305 }
306
307 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
308 }
309
310 do_gettimeofday(&now);
311
312 switch (hctx->ccid3hctx_state) {
313 case TFRC_SSTATE_NO_SENT:
314 ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n",
315 dccp_role(sk), sk, dp->dccps_gss);
316
317 hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
318 hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
319 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
320 jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
321 hctx->ccid3hctx_last_win_count = 0;
322 hctx->ccid3hctx_t_last_win_count = now;
323 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
324 hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
325
326 /* Set nominal send time for initial packet */
327 hctx->ccid3hctx_t_nom = now;
328 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
329 hctx->ccid3hctx_t_ipi);
330 ccid3_calc_new_delta(hctx);
331 rc = 0;
332 break;
333 case TFRC_SSTATE_NO_FBACK:
334 case TFRC_SSTATE_FBACK:
335 delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
336 hctx->ccid3hctx_delta);
337 ccid3_pr_debug("send_packet delay=%ld\n", delay);
338 delay /= -1000;
339 /* divide by -1000 is to convert to ms and get sign right */
340 rc = delay > 0 ? delay : 0;
341 break;
342 default:
343 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
344 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
345 dump_stack();
346 rc = -EINVAL;
347 break;
348 }
349
350 /* Can we send? if so add options and add to packet history */
351 if (rc == 0)
352 new_packet->dccphtx_ccval =
353 DCCP_SKB_CB(skb)->dccpd_ccval =
354 hctx->ccid3hctx_last_win_count;
355out:
356 return rc;
357}
358
359static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
360{
361 struct dccp_sock *dp = dccp_sk(sk);
362 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
363 struct timeval now;
364
365 BUG_ON(hctx == NULL);
366
367 if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
368 ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
369 dccp_role(sk), sk);
370 return;
371 }
372
373 do_gettimeofday(&now);
374
375 /* check if we have sent a data packet */
376 if (len > 0) {
377 unsigned long quarter_rtt;
378 struct dccp_tx_hist_entry *packet;
379
380 packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
381 if (packet == NULL) {
382 printk(KERN_CRIT "%s: packet doesn't exists in "
383 "history!\n", __FUNCTION__);
384 return;
385 }
386 if (packet->dccphtx_sent) {
387 printk(KERN_CRIT "%s: no unsent packet in history!\n",
388 __FUNCTION__);
389 return;
390 }
391 packet->dccphtx_tstamp = now;
392 packet->dccphtx_seqno = dp->dccps_gss;
393 /*
394 * Check if win_count have changed
395 * Algorithm in "8.1. Window Counter Valuer" in
396 * draft-ietf-dccp-ccid3-11.txt
397 */
398 quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
399 if (likely(hctx->ccid3hctx_rtt > 8))
400 quarter_rtt /= hctx->ccid3hctx_rtt / 4;
401
402 if (quarter_rtt > 0) {
403 hctx->ccid3hctx_t_last_win_count = now;
404 hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
405 min_t(unsigned long, quarter_rtt, 5)) % 16;
406 ccid3_pr_debug("%s, sk=%p, window changed from "
407 "%u to %u!\n",
408 dccp_role(sk), sk,
409 packet->dccphtx_ccval,
410 hctx->ccid3hctx_last_win_count);
411 }
412
413 hctx->ccid3hctx_idle = 0;
414 packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
415 packet->dccphtx_sent = 1;
416 } else
417 ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
418 dccp_role(sk), sk, dp->dccps_gss);
419
420 switch (hctx->ccid3hctx_state) {
421 case TFRC_SSTATE_NO_SENT:
422 /* if first wasn't pure ack */
423 if (len != 0)
424 printk(KERN_CRIT "%s: %s, First packet sent is noted "
425 "as a data packet\n",
426 __FUNCTION__, dccp_role(sk));
427 return;
428 case TFRC_SSTATE_NO_FBACK:
429 case TFRC_SSTATE_FBACK:
430 if (len > 0) {
431 hctx->ccid3hctx_t_nom = now;
432 ccid3_calc_new_t_ipi(hctx);
433 ccid3_calc_new_delta(hctx);
434 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
435 hctx->ccid3hctx_t_ipi);
436 }
437 break;
438 default:
439 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
440 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
441 dump_stack();
442 break;
443 }
444}
445
446static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
447{
448 struct dccp_sock *dp = dccp_sk(sk);
449 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
450 struct ccid3_options_received *opt_recv;
451 struct dccp_tx_hist_entry *packet;
452 unsigned long next_tmout;
453 u32 t_elapsed;
454 u32 pinv;
455 u32 x_recv;
456 u32 r_sample;
457
458 if (hctx == NULL)
459 return;
460
461 if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
462 ccid3_pr_debug("%s, sk=%p, received a packet when "
463 "terminating!\n", dccp_role(sk), sk);
464 return;
465 }
466
467 /* we are only interested in ACKs */
468 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
469 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
470 return;
471
472 opt_recv = &hctx->ccid3hctx_options_received;
473
474 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
475 x_recv = opt_recv->ccid3or_receive_rate;
476 pinv = opt_recv->ccid3or_loss_event_rate;
477
478 switch (hctx->ccid3hctx_state) {
479 case TFRC_SSTATE_NO_SENT:
480 /* FIXME: what to do here? */
481 return;
482 case TFRC_SSTATE_NO_FBACK:
483 case TFRC_SSTATE_FBACK:
484 /* Calculate new round trip sample by
485 * R_sample = (now - t_recvdata) - t_delay */
486 /* get t_recvdata from history */
487 packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
488 DCCP_SKB_CB(skb)->dccpd_ack_seq);
489 if (packet == NULL) {
490 ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't "
491 "exist in history!\n",
492 dccp_role(sk), sk,
493 DCCP_SKB_CB(skb)->dccpd_ack_seq,
494 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
495 return;
496 }
497
498 /* Update RTT */
499 r_sample = timeval_now_delta(&packet->dccphtx_tstamp);
500 /* FIXME: */
501 // r_sample -= usecs_to_jiffies(t_elapsed * 10);
502
503 /* Update RTT estimate by
504 * If (No feedback recv)
505 * R = R_sample;
506 * Else
507 * R = q * R + (1 - q) * R_sample;
508 *
509 * q is a constant, RFC 3448 recomments 0.9
510 */
511 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
512 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
513 hctx->ccid3hctx_rtt = r_sample;
514 } else
515 hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
516 r_sample / 10;
517
518 ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
519 "r_sample=%us\n", dccp_role(sk), sk,
520 hctx->ccid3hctx_rtt, r_sample);
521
522 /* Update timeout interval */
523 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
524 USEC_PER_SEC);
525
526 /* Update receive rate */
527 hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
528
529 /* Update loss event rate */
530 if (pinv == ~0 || pinv == 0)
531 hctx->ccid3hctx_p = 0;
532 else {
533 hctx->ccid3hctx_p = 1000000 / pinv;
534
535 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
536 hctx->ccid3hctx_p = TFRC_SMALLEST_P;
537 ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
538 dccp_role(sk), sk);
539 }
540 }
541
542 /* unschedule no feedback timer */
543 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
544
545 /* Update sending rate */
546 ccid3_hc_tx_update_x(sk);
547
548 /* Update next send time */
549 timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
550 hctx->ccid3hctx_t_ipi);
551 ccid3_calc_new_t_ipi(hctx);
552 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
553 hctx->ccid3hctx_t_ipi);
554 ccid3_calc_new_delta(hctx);
555
556 /* remove all packets older than the one acked from history */
557 dccp_tx_hist_purge_older(ccid3_tx_hist,
558 &hctx->ccid3hctx_hist, packet);
559 /*
560 * As we have calculated new ipi, delta, t_nom it is possible that
561 * we now can send a packet, so wake up dccp_wait_for_ccids.
562 */
563 sk->sk_write_space(sk);
564
565 /*
566 * Schedule no feedback timer to expire in
567 * max(4 * R, 2 * s / X)
568 */
569 next_tmout = max(hctx->ccid3hctx_t_rto,
570 2 * usecs_div(hctx->ccid3hctx_s,
571 hctx->ccid3hctx_x));
572
573 ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
574 "expire in %lu jiffies (%luus)\n",
575 dccp_role(sk), sk,
576 usecs_to_jiffies(next_tmout), next_tmout);
577
578 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
579 jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
580
581 /* set idle flag */
582 hctx->ccid3hctx_idle = 1;
583 break;
584 default:
585 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
586 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
587 dump_stack();
588 break;
589 }
590}
591
592static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
593{
594 const struct dccp_sock *dp = dccp_sk(sk);
595 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
596
597 if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
598 sk->sk_state == DCCP_PARTOPEN))
599 return;
600
601 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
602}
603
604static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
605 unsigned char len, u16 idx,
606 unsigned char *value)
607{
608 int rc = 0;
609 struct dccp_sock *dp = dccp_sk(sk);
610 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
611 struct ccid3_options_received *opt_recv;
612
613 if (hctx == NULL)
614 return 0;
615
616 opt_recv = &hctx->ccid3hctx_options_received;
617
618 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
619 opt_recv->ccid3or_seqno = dp->dccps_gsr;
620 opt_recv->ccid3or_loss_event_rate = ~0;
621 opt_recv->ccid3or_loss_intervals_idx = 0;
622 opt_recv->ccid3or_loss_intervals_len = 0;
623 opt_recv->ccid3or_receive_rate = 0;
624 }
625
626 switch (option) {
627 case TFRC_OPT_LOSS_EVENT_RATE:
628 if (len != 4) {
629 ccid3_pr_debug("%s, sk=%p, invalid len for "
630 "TFRC_OPT_LOSS_EVENT_RATE\n",
631 dccp_role(sk), sk);
632 rc = -EINVAL;
633 } else {
634 opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
635 ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
636 dccp_role(sk), sk,
637 opt_recv->ccid3or_loss_event_rate);
638 }
639 break;
640 case TFRC_OPT_LOSS_INTERVALS:
641 opt_recv->ccid3or_loss_intervals_idx = idx;
642 opt_recv->ccid3or_loss_intervals_len = len;
643 ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
644 dccp_role(sk), sk,
645 opt_recv->ccid3or_loss_intervals_idx,
646 opt_recv->ccid3or_loss_intervals_len);
647 break;
648 case TFRC_OPT_RECEIVE_RATE:
649 if (len != 4) {
650 ccid3_pr_debug("%s, sk=%p, invalid len for "
651 "TFRC_OPT_RECEIVE_RATE\n",
652 dccp_role(sk), sk);
653 rc = -EINVAL;
654 } else {
655 opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
656 ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
657 dccp_role(sk), sk,
658 opt_recv->ccid3or_receive_rate);
659 }
660 break;
661 }
662
663 return rc;
664}
665
666static int ccid3_hc_tx_init(struct sock *sk)
667{
668 struct dccp_sock *dp = dccp_sk(sk);
669 struct ccid3_hc_tx_sock *hctx;
670
671 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
672
673 hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx),
674 gfp_any());
675 if (hctx == NULL)
676 return -ENOMEM;
677
678 memset(hctx, 0, sizeof(*hctx));
679
680 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
681 dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
682 hctx->ccid3hctx_s = dp->dccps_packet_size;
683 else
684 hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
685
686 /* Set transmission rate to 1 packet per second */
687 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
688 hctx->ccid3hctx_t_rto = USEC_PER_SEC;
689 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
690 INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
691 init_timer(&hctx->ccid3hctx_no_feedback_timer);
692
693 return 0;
694}
695
696static void ccid3_hc_tx_exit(struct sock *sk)
697{
698 struct dccp_sock *dp = dccp_sk(sk);
699 struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
700
701 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
702 BUG_ON(hctx == NULL);
703
704 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
705 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
706
707 /* Empty packet history */
708 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
709
710 kfree(dp->dccps_hc_tx_ccid_private);
711 dp->dccps_hc_tx_ccid_private = NULL;
712}
713
714/*
715 * RX Half Connection methods
716 */
717
718/* TFRC receiver states */
719enum ccid3_hc_rx_states {
720 TFRC_RSTATE_NO_DATA = 1,
721 TFRC_RSTATE_DATA,
722 TFRC_RSTATE_TERM = 127,
723};
724
725#ifdef CCID3_DEBUG
726static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
727{
728 static char *ccid3_rx_state_names[] = {
729 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
730 [TFRC_RSTATE_DATA] = "DATA",
731 [TFRC_RSTATE_TERM] = "TERM",
732 };
733
734 return ccid3_rx_state_names[state];
735}
736#endif
737
738static inline void ccid3_hc_rx_set_state(struct sock *sk,
739 enum ccid3_hc_rx_states state)
740{
741 struct dccp_sock *dp = dccp_sk(sk);
742 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
743 enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
744
745 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
746 dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
747 ccid3_rx_state_name(state));
748 WARN_ON(state == oldstate);
749 hcrx->ccid3hcrx_state = state;
750}
751
752static void ccid3_hc_rx_send_feedback(struct sock *sk)
753{
754 struct dccp_sock *dp = dccp_sk(sk);
755 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
756 struct dccp_rx_hist_entry *packet;
757 struct timeval now;
758
759 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
760
761 do_gettimeofday(&now);
762
763 switch (hcrx->ccid3hcrx_state) {
764 case TFRC_RSTATE_NO_DATA:
765 hcrx->ccid3hcrx_x_recv = 0;
766 break;
767 case TFRC_RSTATE_DATA: {
768 const u32 delta = timeval_delta(&now,
769 &hcrx->ccid3hcrx_tstamp_last_feedback);
770
771 hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv *
772 USEC_PER_SEC);
773 if (likely(delta > 1))
774 hcrx->ccid3hcrx_x_recv /= delta;
775 }
776 break;
777 default:
778 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
779 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
780 dump_stack();
781 return;
782 }
783
784 packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
785 if (packet == NULL) {
786 printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
787 __FUNCTION__, dccp_role(sk), sk);
788 dump_stack();
789 return;
790 }
791
792 hcrx->ccid3hcrx_tstamp_last_feedback = now;
793 hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval;
794 hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno;
795 hcrx->ccid3hcrx_bytes_recv = 0;
796
797 /* Convert to multiples of 10us */
798 hcrx->ccid3hcrx_elapsed_time =
799 timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
800 if (hcrx->ccid3hcrx_p == 0)
801 hcrx->ccid3hcrx_pinv = ~0;
802 else
803 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
804 dccp_send_ack(sk);
805}
806
807static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
808{
809 const struct dccp_sock *dp = dccp_sk(sk);
810 u32 x_recv, pinv;
811 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
812
813 if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
814 sk->sk_state == DCCP_PARTOPEN))
815 return;
816
817 DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
818
819 if (dccp_packet_without_ack(skb))
820 return;
821
822 if (hcrx->ccid3hcrx_elapsed_time != 0)
823 dccp_insert_option_elapsed_time(sk, skb,
824 hcrx->ccid3hcrx_elapsed_time);
825 dccp_insert_option_timestamp(sk, skb);
826 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
827 pinv = htonl(hcrx->ccid3hcrx_pinv);
828 dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
829 &pinv, sizeof(pinv));
830 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
831 &x_recv, sizeof(x_recv));
832}
833
834/* calculate first loss interval
835 *
836 * returns estimated loss interval in usecs */
837
838static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
839{
840 struct dccp_sock *dp = dccp_sk(sk);
841 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
842 struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
843 u32 rtt, delta, x_recv, fval, p, tmp2;
844 struct timeval tstamp = { 0, };
845 int interval = 0;
846 int win_count = 0;
847 int step = 0;
848 u64 tmp1;
849
850 list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
851 dccphrx_node) {
852 if (dccp_rx_hist_entry_data_packet(entry)) {
853 tail = entry;
854
855 switch (step) {
856 case 0:
857 tstamp = entry->dccphrx_tstamp;
858 win_count = entry->dccphrx_ccval;
859 step = 1;
860 break;
861 case 1:
862 interval = win_count - entry->dccphrx_ccval;
863 if (interval < 0)
864 interval += TFRC_WIN_COUNT_LIMIT;
865 if (interval > 4)
866 goto found;
867 break;
868 }
869 }
870 }
871
872 if (step == 0) {
873 printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no "
874 "data packets!\n",
875 __FUNCTION__, dccp_role(sk), sk);
876 return ~0;
877 }
878
879 if (interval == 0) {
880 ccid3_pr_debug("%s, sk=%p, Could not find a win_count "
881 "interval > 0. Defaulting to 1\n",
882 dccp_role(sk), sk);
883 interval = 1;
884 }
885found:
886 rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
887 ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
888 dccp_role(sk), sk, rtt);
889 if (rtt == 0)
890 rtt = 1;
891
892 delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback);
893 x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC;
894 if (likely(delta > 1))
895 x_recv /= delta;
896
897 tmp1 = (u64)x_recv * (u64)rtt;
898 do_div(tmp1,10000000);
899 tmp2 = (u32)tmp1;
900 fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
901 /* do not alter order above or you will get overflow on 32 bit */
902 p = tfrc_calc_x_reverse_lookup(fval);
903 ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
904 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
905
906 if (p == 0)
907 return ~0;
908 else
909 return 1000000 / p;
910}
911
912static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
913{
914 struct dccp_sock *dp = dccp_sk(sk);
915 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
916
917 if (seq_loss != DCCP_MAX_SEQNO + 1 &&
918 list_empty(&hcrx->ccid3hcrx_li_hist)) {
919 struct dccp_li_hist_entry *li_tail;
920
921 li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
922 &hcrx->ccid3hcrx_li_hist,
923 seq_loss, win_loss);
924 if (li_tail == NULL)
925 return;
926 li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
927 }
928 /* FIXME: find end of interval */
929}
930
931static void ccid3_hc_rx_detect_loss(struct sock *sk)
932{
933 struct dccp_sock *dp = dccp_sk(sk);
934 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
935 u8 win_loss;
936 const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
937 &hcrx->ccid3hcrx_li_hist,
938 &win_loss);
939
940 ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
941}
942
943static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
944{
945 struct dccp_sock *dp = dccp_sk(sk);
946 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
947 const struct dccp_options_received *opt_recv;
948 struct dccp_rx_hist_entry *packet;
949 struct timeval now;
950 u8 win_count;
951 u32 p_prev;
952 int ins;
953
954 if (hcrx == NULL)
955 return;
956
957 BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
958 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
959
960 opt_recv = &dp->dccps_options_received;
961
962 switch (DCCP_SKB_CB(skb)->dccpd_type) {
963 case DCCP_PKT_ACK:
964 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
965 return;
966 case DCCP_PKT_DATAACK:
967 if (opt_recv->dccpor_timestamp_echo == 0)
968 break;
969 p_prev = hcrx->ccid3hcrx_rtt;
970 do_gettimeofday(&now);
971 hcrx->ccid3hcrx_rtt = timeval_usecs(&now) -
972 (opt_recv->dccpor_timestamp_echo -
973 opt_recv->dccpor_elapsed_time) * 10;
974 if (p_prev != hcrx->ccid3hcrx_rtt)
975 ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
976 dccp_role(sk), hcrx->ccid3hcrx_rtt,
977 opt_recv->dccpor_elapsed_time);
978 break;
979 case DCCP_PKT_DATA:
980 break;
981 default:
982 ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
983 dccp_role(sk), sk,
984 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
985 return;
986 }
987
988 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
989 skb, SLAB_ATOMIC);
990 if (packet == NULL) {
991 ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
992 "to history (consider it lost)!",
993 dccp_role(sk), sk);
994 return;
995 }
996
997 win_count = packet->dccphrx_ccval;
998
999 ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
1000 &hcrx->ccid3hcrx_li_hist, packet);
1001
1002 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
1003 return;
1004
1005 switch (hcrx->ccid3hcrx_state) {
1006 case TFRC_RSTATE_NO_DATA:
1007 ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
1008 "feedback\n",
1009 dccp_role(sk), sk,
1010 dccp_state_name(sk->sk_state), skb);
1011 ccid3_hc_rx_send_feedback(sk);
1012 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
1013 return;
1014 case TFRC_RSTATE_DATA:
1015 hcrx->ccid3hcrx_bytes_recv += skb->len -
1016 dccp_hdr(skb)->dccph_doff * 4;
1017 if (ins != 0)
1018 break;
1019
1020 do_gettimeofday(&now);
1021 if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
1022 hcrx->ccid3hcrx_rtt) {
1023 hcrx->ccid3hcrx_tstamp_last_ack = now;
1024 ccid3_hc_rx_send_feedback(sk);
1025 }
1026 return;
1027 default:
1028 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1029 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
1030 dump_stack();
1031 return;
1032 }
1033
1034 /* Dealing with packet loss */
1035 ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
1036 dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1037
1038 ccid3_hc_rx_detect_loss(sk);
1039 p_prev = hcrx->ccid3hcrx_p;
1040
1041 /* Calculate loss event rate */
1042 if (!list_empty(&hcrx->ccid3hcrx_li_hist))
1043 /* Scaling up by 1000000 as fixed decimal */
1044 hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
1045
1046 if (hcrx->ccid3hcrx_p > p_prev) {
1047 ccid3_hc_rx_send_feedback(sk);
1048 return;
1049 }
1050}
1051
1052static int ccid3_hc_rx_init(struct sock *sk)
1053{
1054 struct dccp_sock *dp = dccp_sk(sk);
1055 struct ccid3_hc_rx_sock *hcrx;
1056
1057 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1058
1059 hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx),
1060 gfp_any());
1061 if (hcrx == NULL)
1062 return -ENOMEM;
1063
1064 memset(hcrx, 0, sizeof(*hcrx));
1065
1066 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
1067 dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
1068 hcrx->ccid3hcrx_s = dp->dccps_packet_size;
1069 else
1070 hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
1071
1072 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
1073 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1074 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1075 /*
1076 * XXX this seems to be paranoid, need to think more about this, for
1077 * now start with something different than zero. -acme
1078 */
1079 hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
1080 return 0;
1081}
1082
1083static void ccid3_hc_rx_exit(struct sock *sk)
1084{
1085 struct dccp_sock *dp = dccp_sk(sk);
1086 struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1087
1088 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1089
1090 if (hcrx == NULL)
1091 return;
1092
1093 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
1094
1095 /* Empty packet history */
1096 dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1097
1098 /* Empty loss interval history */
1099 dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1100
1101 kfree(dp->dccps_hc_rx_ccid_private);
1102 dp->dccps_hc_rx_ccid_private = NULL;
1103}
1104
1105static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
1106{
1107 const struct dccp_sock *dp = dccp_sk(sk);
1108 const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
1109
1110 if (hcrx == NULL)
1111 return;
1112
1113 info->tcpi_ca_state = hcrx->ccid3hcrx_state;
1114 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1115 info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
1116}
1117
1118static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1119{
1120 const struct dccp_sock *dp = dccp_sk(sk);
1121 const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
1122
1123 if (hctx == NULL)
1124 return;
1125
1126 info->tcpi_rto = hctx->ccid3hctx_t_rto;
1127 info->tcpi_rtt = hctx->ccid3hctx_rtt;
1128}
1129
1130static struct ccid ccid3 = {
1131 .ccid_id = 3,
1132 .ccid_name = "ccid3",
1133 .ccid_owner = THIS_MODULE,
1134 .ccid_init = ccid3_init,
1135 .ccid_exit = ccid3_exit,
1136 .ccid_hc_tx_init = ccid3_hc_tx_init,
1137 .ccid_hc_tx_exit = ccid3_hc_tx_exit,
1138 .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
1139 .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
1140 .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
1141 .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
1142 .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
1143 .ccid_hc_rx_init = ccid3_hc_rx_init,
1144 .ccid_hc_rx_exit = ccid3_hc_rx_exit,
1145 .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
1146 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
1147 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
1148 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
1149};
1150
1151module_param(ccid3_debug, int, 0444);
1152MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1153
1154static __init int ccid3_module_init(void)
1155{
1156 int rc = -ENOBUFS;
1157
1158 ccid3_rx_hist = dccp_rx_hist_new("ccid3");
1159 if (ccid3_rx_hist == NULL)
1160 goto out;
1161
1162 ccid3_tx_hist = dccp_tx_hist_new("ccid3");
1163 if (ccid3_tx_hist == NULL)
1164 goto out_free_rx;
1165
1166 ccid3_li_hist = dccp_li_hist_new("ccid3");
1167 if (ccid3_li_hist == NULL)
1168 goto out_free_tx;
1169
1170 rc = ccid_register(&ccid3);
1171 if (rc != 0)
1172 goto out_free_loss_interval_history;
1173out:
1174 return rc;
1175
1176out_free_loss_interval_history:
1177 dccp_li_hist_delete(ccid3_li_hist);
1178 ccid3_li_hist = NULL;
1179out_free_tx:
1180 dccp_tx_hist_delete(ccid3_tx_hist);
1181 ccid3_tx_hist = NULL;
1182out_free_rx:
1183 dccp_rx_hist_delete(ccid3_rx_hist);
1184 ccid3_rx_hist = NULL;
1185 goto out;
1186}
1187module_init(ccid3_module_init);
1188
1189static __exit void ccid3_module_exit(void)
1190{
1191#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
1192 /*
1193 * Hack to use while developing, so that we get rid of the control
1194 * sock, that is what keeps a refcount on dccp.ko -acme
1195 */
1196 extern void dccp_ctl_sock_exit(void);
1197
1198 dccp_ctl_sock_exit();
1199#endif
1200 ccid_unregister(&ccid3);
1201
1202 if (ccid3_tx_hist != NULL) {
1203 dccp_tx_hist_delete(ccid3_tx_hist);
1204 ccid3_tx_hist = NULL;
1205 }
1206 if (ccid3_rx_hist != NULL) {
1207 dccp_rx_hist_delete(ccid3_rx_hist);
1208 ccid3_rx_hist = NULL;
1209 }
1210 if (ccid3_li_hist != NULL) {
1211 dccp_li_hist_delete(ccid3_li_hist);
1212 ccid3_li_hist = NULL;
1213 }
1214}
1215module_exit(ccid3_module_exit);
1216
1217MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
1218 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1219MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
1220MODULE_LICENSE("GPL");
1221MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644
index 000000000000..ee8cbace6630
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,137 @@
1/*
2 * net/dccp/ccids/ccid3.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36#ifndef _DCCP_CCID3_H_
37#define _DCCP_CCID3_H_
38
39#include <linux/config.h>
40#include <linux/list.h>
41#include <linux/time.h>
42#include <linux/types.h>
43
44#define TFRC_MIN_PACKET_SIZE 16
45#define TFRC_STD_PACKET_SIZE 256
46#define TFRC_MAX_PACKET_SIZE 65535
47
48/* Two seconds as per CCID3 spec */
49#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
50
51/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
52#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
53
54/* In seconds */
55#define TFRC_MAX_BACK_OFF_TIME 64
56
57#define TFRC_SMALLEST_P 40
58
59enum ccid3_options {
60 TFRC_OPT_LOSS_EVENT_RATE = 192,
61 TFRC_OPT_LOSS_INTERVALS = 193,
62 TFRC_OPT_RECEIVE_RATE = 194,
63};
64
65struct ccid3_options_received {
66 u64 ccid3or_seqno:48,
67 ccid3or_loss_intervals_idx:16;
68 u16 ccid3or_loss_intervals_len;
69 u32 ccid3or_loss_event_rate;
70 u32 ccid3or_receive_rate;
71};
72
73/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
74 *
75 * @ccid3hctx_state - Sender state
76 * @ccid3hctx_x - Current sending rate
77 * @ccid3hctx_x_recv - Receive rate
78 * @ccid3hctx_x_calc - Calculated send (?) rate
79 * @ccid3hctx_s - Packet size
80 * @ccid3hctx_rtt - Estimate of current round trip time in usecs
81 * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
82 * @ccid3hctx_last_win_count - Last window counter sent
83 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
84 * with last_win_count value sent
85 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
86 * @ccid3hctx_idle - FIXME
87 * @ccid3hctx_t_ld - Time last doubled during slow start
88 * @ccid3hctx_t_nom - Nominal send time of next packet
89 * @ccid3hctx_t_ipi - Interpacket (send) interval
90 * @ccid3hctx_delta - Send timer delta
91 * @ccid3hctx_hist - Packet history
92 */
93struct ccid3_hc_tx_sock {
94 u32 ccid3hctx_x;
95 u32 ccid3hctx_x_recv;
96 u32 ccid3hctx_x_calc;
97 u16 ccid3hctx_s;
98 u32 ccid3hctx_rtt;
99 u32 ccid3hctx_p;
100 u8 ccid3hctx_state;
101 u8 ccid3hctx_last_win_count;
102 u8 ccid3hctx_idle;
103 struct timeval ccid3hctx_t_last_win_count;
104 struct timer_list ccid3hctx_no_feedback_timer;
105 struct timeval ccid3hctx_t_ld;
106 struct timeval ccid3hctx_t_nom;
107 u32 ccid3hctx_t_rto;
108 u32 ccid3hctx_t_ipi;
109 u32 ccid3hctx_delta;
110 struct list_head ccid3hctx_hist;
111 struct ccid3_options_received ccid3hctx_options_received;
112};
113
114struct ccid3_hc_rx_sock {
115 u64 ccid3hcrx_seqno_last_counter:48,
116 ccid3hcrx_state:8,
117 ccid3hcrx_last_counter:4;
118 unsigned long ccid3hcrx_rtt;
119 u32 ccid3hcrx_p;
120 u32 ccid3hcrx_bytes_recv;
121 struct timeval ccid3hcrx_tstamp_last_feedback;
122 struct timeval ccid3hcrx_tstamp_last_ack;
123 struct list_head ccid3hcrx_hist;
124 struct list_head ccid3hcrx_li_hist;
125 u16 ccid3hcrx_s;
126 u32 ccid3hcrx_pinv;
127 u32 ccid3hcrx_elapsed_time;
128 u32 ccid3hcrx_x_recv;
129};
130
131#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
132 ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
133
134#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
135 ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
136
137#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
new file mode 100644
index 000000000000..5f940a6cbaca
--- /dev/null
+++ b/net/dccp/ccids/lib/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
2
3dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
new file mode 100644
index 000000000000..4c01a54143ad
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -0,0 +1,144 @@
1/*
2 * net/dccp/ccids/lib/loss_interval.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/config.h>
15#include <linux/module.h>
16
17#include "loss_interval.h"
18
19struct dccp_li_hist *dccp_li_hist_new(const char *name)
20{
21 struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
22 static const char dccp_li_hist_mask[] = "li_hist_%s";
23 char *slab_name;
24
25 if (hist == NULL)
26 goto out;
27
28 slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
29 GFP_ATOMIC);
30 if (slab_name == NULL)
31 goto out_free_hist;
32
33 sprintf(slab_name, dccp_li_hist_mask, name);
34 hist->dccplih_slab = kmem_cache_create(slab_name,
35 sizeof(struct dccp_li_hist_entry),
36 0, SLAB_HWCACHE_ALIGN,
37 NULL, NULL);
38 if (hist->dccplih_slab == NULL)
39 goto out_free_slab_name;
40out:
41 return hist;
42out_free_slab_name:
43 kfree(slab_name);
44out_free_hist:
45 kfree(hist);
46 hist = NULL;
47 goto out;
48}
49
50EXPORT_SYMBOL_GPL(dccp_li_hist_new);
51
52void dccp_li_hist_delete(struct dccp_li_hist *hist)
53{
54 const char* name = kmem_cache_name(hist->dccplih_slab);
55
56 kmem_cache_destroy(hist->dccplih_slab);
57 kfree(name);
58 kfree(hist);
59}
60
61EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
62
63void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
64{
65 struct dccp_li_hist_entry *entry, *next;
66
67 list_for_each_entry_safe(entry, next, list, dccplih_node) {
68 list_del_init(&entry->dccplih_node);
69 kmem_cache_free(hist->dccplih_slab, entry);
70 }
71}
72
73EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
74
75/* Weights used to calculate loss event rate */
76/*
77 * These are integers as per section 8 of RFC3448. We can then divide by 4 *
78 * when we use it.
79 */
80static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
81 4, 4, 4, 4, 3, 2, 1, 1,
82};
83
84u32 dccp_li_hist_calc_i_mean(struct list_head *list)
85{
86 struct dccp_li_hist_entry *li_entry, *li_next;
87 int i = 0;
88 u32 i_tot;
89 u32 i_tot0 = 0;
90 u32 i_tot1 = 0;
91 u32 w_tot = 0;
92
93 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
94 if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
95 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
96 w_tot += dccp_li_hist_w[i];
97 }
98
99 if (i != 0)
100 i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
101
102 if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
103 break;
104 }
105
106 if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
107 return 0;
108
109 i_tot = max(i_tot0, i_tot1);
110
111 /* FIXME: Why do we do this? -Ian McDonald */
112 if (i_tot * 4 < w_tot)
113 i_tot = w_tot * 4;
114
115 return i_tot * 4 / w_tot;
116}
117
118EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
119
120struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
121 struct list_head *list,
122 const u64 seq_loss,
123 const u8 win_loss)
124{
125 struct dccp_li_hist_entry *tail = NULL, *entry;
126 int i;
127
128 for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
129 entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
130 if (entry == NULL) {
131 dccp_li_hist_purge(hist, list);
132 return NULL;
133 }
134 if (tail == NULL)
135 tail = entry;
136 list_add(&entry->dccplih_node, list);
137 }
138
139 entry->dccplih_seqno = seq_loss;
140 entry->dccplih_win_count = win_loss;
141 return tail;
142}
143
144EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
new file mode 100644
index 000000000000..13ad47ba1420
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -0,0 +1,61 @@
1#ifndef _DCCP_LI_HIST_
2#define _DCCP_LI_HIST_
3/*
4 * net/dccp/ccids/lib/loss_interval.h
5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/list.h>
18#include <linux/slab.h>
19#include <linux/time.h>
20
21#define DCCP_LI_HIST_IVAL_F_LENGTH 8
22
23struct dccp_li_hist {
24 kmem_cache_t *dccplih_slab;
25};
26
27extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
28extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
29
30struct dccp_li_hist_entry {
31 struct list_head dccplih_node;
32 u64 dccplih_seqno:48,
33 dccplih_win_count:4;
34 u32 dccplih_interval;
35};
36
37static inline struct dccp_li_hist_entry *
38 dccp_li_hist_entry_new(struct dccp_li_hist *hist,
39 const unsigned int __nocast prio)
40{
41 return kmem_cache_alloc(hist->dccplih_slab, prio);
42}
43
44static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
45 struct dccp_li_hist_entry *entry)
46{
47 if (entry != NULL)
48 kmem_cache_free(hist->dccplih_slab, entry);
49}
50
51extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
52 struct list_head *list);
53
54extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
55
56extern struct dccp_li_hist_entry *
57 dccp_li_hist_interval_new(struct dccp_li_hist *hist,
58 struct list_head *list,
59 const u64 seq_loss,
60 const u8 win_loss);
61#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
new file mode 100644
index 000000000000..d3f9d2053830
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -0,0 +1,398 @@
1/*
2 * net/dccp/packet_history.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#include <linux/config.h>
38#include <linux/module.h>
39#include <linux/string.h>
40
41#include "packet_history.h"
42
43struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
44{
45 struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
46 static const char dccp_rx_hist_mask[] = "rx_hist_%s";
47 char *slab_name;
48
49 if (hist == NULL)
50 goto out;
51
52 slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
53 GFP_ATOMIC);
54 if (slab_name == NULL)
55 goto out_free_hist;
56
57 sprintf(slab_name, dccp_rx_hist_mask, name);
58 hist->dccprxh_slab = kmem_cache_create(slab_name,
59 sizeof(struct dccp_rx_hist_entry),
60 0, SLAB_HWCACHE_ALIGN,
61 NULL, NULL);
62 if (hist->dccprxh_slab == NULL)
63 goto out_free_slab_name;
64out:
65 return hist;
66out_free_slab_name:
67 kfree(slab_name);
68out_free_hist:
69 kfree(hist);
70 hist = NULL;
71 goto out;
72}
73
74EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
75
76void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
77{
78 const char* name = kmem_cache_name(hist->dccprxh_slab);
79
80 kmem_cache_destroy(hist->dccprxh_slab);
81 kfree(name);
82 kfree(hist);
83}
84
85EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
86
87void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
88{
89 struct dccp_rx_hist_entry *entry, *next;
90
91 list_for_each_entry_safe(entry, next, list, dccphrx_node) {
92 list_del_init(&entry->dccphrx_node);
93 kmem_cache_free(hist->dccprxh_slab, entry);
94 }
95}
96
97EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
98
99struct dccp_rx_hist_entry *
100 dccp_rx_hist_find_data_packet(const struct list_head *list)
101{
102 struct dccp_rx_hist_entry *entry, *packet = NULL;
103
104 list_for_each_entry(entry, list, dccphrx_node)
105 if (entry->dccphrx_type == DCCP_PKT_DATA ||
106 entry->dccphrx_type == DCCP_PKT_DATAACK) {
107 packet = entry;
108 break;
109 }
110
111 return packet;
112}
113
114EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
115
116int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
117 struct list_head *rx_list,
118 struct list_head *li_list,
119 struct dccp_rx_hist_entry *packet)
120{
121 struct dccp_rx_hist_entry *entry, *next, *iter;
122 u8 num_later = 0;
123
124 iter = dccp_rx_hist_head(rx_list);
125 if (iter == NULL)
126 dccp_rx_hist_add_entry(rx_list, packet);
127 else {
128 const u64 seqno = packet->dccphrx_seqno;
129
130 if (after48(seqno, iter->dccphrx_seqno))
131 dccp_rx_hist_add_entry(rx_list, packet);
132 else {
133 if (dccp_rx_hist_entry_data_packet(iter))
134 num_later = 1;
135
136 list_for_each_entry_continue(iter, rx_list,
137 dccphrx_node) {
138 if (after48(seqno, iter->dccphrx_seqno)) {
139 dccp_rx_hist_add_entry(&iter->dccphrx_node,
140 packet);
141 goto trim_history;
142 }
143
144 if (dccp_rx_hist_entry_data_packet(iter))
145 num_later++;
146
147 if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
148 dccp_rx_hist_entry_delete(hist, packet);
149 return 1;
150 }
151 }
152
153 if (num_later < TFRC_RECV_NUM_LATE_LOSS)
154 dccp_rx_hist_add_entry(rx_list, packet);
155 /*
156 * FIXME: else what? should we destroy the packet
157 * like above?
158 */
159 }
160 }
161
162trim_history:
163 /*
164 * Trim history (remove all packets after the NUM_LATE_LOSS + 1
165 * data packets)
166 */
167 num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
168
169 if (!list_empty(li_list)) {
170 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
171 if (num_later == 0) {
172 list_del_init(&entry->dccphrx_node);
173 dccp_rx_hist_entry_delete(hist, entry);
174 } else if (dccp_rx_hist_entry_data_packet(entry))
175 --num_later;
176 }
177 } else {
178 int step = 0;
179 u8 win_count = 0; /* Not needed, but lets shut up gcc */
180 int tmp;
181 /*
182 * We have no loss interval history so we need at least one
183 * rtt:s of data packets to approximate rtt.
184 */
185 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
186 if (num_later == 0) {
187 switch (step) {
188 case 0:
189 step = 1;
190 /* OK, find next data packet */
191 num_later = 1;
192 break;
193 case 1:
194 step = 2;
195 /* OK, find next data packet */
196 num_later = 1;
197 win_count = entry->dccphrx_ccval;
198 break;
199 case 2:
200 tmp = win_count - entry->dccphrx_ccval;
201 if (tmp < 0)
202 tmp += TFRC_WIN_COUNT_LIMIT;
203 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
204 /*
205 * We have found a packet older
206 * than one rtt remove the rest
207 */
208 step = 3;
209 } else /* OK, find next data packet */
210 num_later = 1;
211 break;
212 case 3:
213 list_del_init(&entry->dccphrx_node);
214 dccp_rx_hist_entry_delete(hist, entry);
215 break;
216 }
217 } else if (dccp_rx_hist_entry_data_packet(entry))
218 --num_later;
219 }
220 }
221
222 return 0;
223}
224
225EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
226
227u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
228 struct list_head *li_list, u8 *win_loss)
229{
230 struct dccp_rx_hist_entry *entry, *next, *packet;
231 struct dccp_rx_hist_entry *a_loss = NULL;
232 struct dccp_rx_hist_entry *b_loss = NULL;
233 u64 seq_loss = DCCP_MAX_SEQNO + 1;
234 u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
235
236 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
237 if (num_later == 0) {
238 b_loss = entry;
239 break;
240 } else if (dccp_rx_hist_entry_data_packet(entry))
241 --num_later;
242 }
243
244 if (b_loss == NULL)
245 goto out;
246
247 num_later = 1;
248 list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
249 if (num_later == 0) {
250 a_loss = entry;
251 break;
252 } else if (dccp_rx_hist_entry_data_packet(entry))
253 --num_later;
254 }
255
256 if (a_loss == NULL) {
257 if (list_empty(li_list)) {
258 /* no loss event have occured yet */
259 LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
260 "comparing to initial seqno\n",
261 __FUNCTION__);
262 goto out;
263 } else {
264 LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
265 __FUNCTION__);
266 goto out;
267 }
268 }
269
270 /* Locate a lost data packet */
271 entry = packet = b_loss;
272 list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
273 u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
274 packet->dccphrx_seqno);
275
276 if (delta != 0) {
277 if (dccp_rx_hist_entry_data_packet(packet))
278 --delta;
279 /*
280 * FIXME: check this, probably this % usage is because
281 * in earlier drafts the ndp count was just 8 bits
282 * long, but now it cam be up to 24 bits long.
283 */
284#if 0
285 if (delta % DCCP_NDP_LIMIT !=
286 (packet->dccphrx_ndp -
287 entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
288#endif
289 if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
290 seq_loss = entry->dccphrx_seqno;
291 dccp_inc_seqno(&seq_loss);
292 }
293 }
294 packet = entry;
295 if (packet == a_loss)
296 break;
297 }
298out:
299 if (seq_loss != DCCP_MAX_SEQNO + 1)
300 *win_loss = a_loss->dccphrx_ccval;
301 else
302 *win_loss = 0; /* Paranoia */
303
304 return seq_loss;
305}
306
307EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
308
309struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
310{
311 struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
312 static const char dccp_tx_hist_mask[] = "tx_hist_%s";
313 char *slab_name;
314
315 if (hist == NULL)
316 goto out;
317
318 slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
319 GFP_ATOMIC);
320 if (slab_name == NULL)
321 goto out_free_hist;
322
323 sprintf(slab_name, dccp_tx_hist_mask, name);
324 hist->dccptxh_slab = kmem_cache_create(slab_name,
325 sizeof(struct dccp_tx_hist_entry),
326 0, SLAB_HWCACHE_ALIGN,
327 NULL, NULL);
328 if (hist->dccptxh_slab == NULL)
329 goto out_free_slab_name;
330out:
331 return hist;
332out_free_slab_name:
333 kfree(slab_name);
334out_free_hist:
335 kfree(hist);
336 hist = NULL;
337 goto out;
338}
339
340EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
341
342void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
343{
344 const char* name = kmem_cache_name(hist->dccptxh_slab);
345
346 kmem_cache_destroy(hist->dccptxh_slab);
347 kfree(name);
348 kfree(hist);
349}
350
351EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
352
353struct dccp_tx_hist_entry *
354 dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
355{
356 struct dccp_tx_hist_entry *packet = NULL, *entry;
357
358 list_for_each_entry(entry, list, dccphtx_node)
359 if (entry->dccphtx_seqno == seq) {
360 packet = entry;
361 break;
362 }
363
364 return packet;
365}
366
367EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
368
369void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
370 struct list_head *list,
371 struct dccp_tx_hist_entry *packet)
372{
373 struct dccp_tx_hist_entry *next;
374
375 list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
376 list_del_init(&packet->dccphtx_node);
377 dccp_tx_hist_entry_delete(hist, packet);
378 }
379}
380
381EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
382
383void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
384{
385 struct dccp_tx_hist_entry *entry, *next;
386
387 list_for_each_entry_safe(entry, next, list, dccphtx_node) {
388 list_del_init(&entry->dccphtx_node);
389 dccp_tx_hist_entry_delete(hist, entry);
390 }
391}
392
393EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
394
395MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
396 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
397MODULE_DESCRIPTION("DCCP TFRC library");
398MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
new file mode 100644
index 000000000000..fb90a91aa93d
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -0,0 +1,199 @@
1/*
2 * net/dccp/packet_history.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#ifndef _DCCP_PKT_HIST_
38#define _DCCP_PKT_HIST_
39
40#include <linux/config.h>
41#include <linux/list.h>
42#include <linux/slab.h>
43#include <linux/time.h>
44
45#include "../../dccp.h"
46
47/* Number of later packets received before one is considered lost */
48#define TFRC_RECV_NUM_LATE_LOSS 3
49
50#define TFRC_WIN_COUNT_PER_RTT 4
51#define TFRC_WIN_COUNT_LIMIT 16
52
53struct dccp_tx_hist_entry {
54 struct list_head dccphtx_node;
55 u64 dccphtx_seqno:48,
56 dccphtx_ccval:4,
57 dccphtx_sent:1;
58 u32 dccphtx_rtt;
59 struct timeval dccphtx_tstamp;
60};
61
62struct dccp_rx_hist_entry {
63 struct list_head dccphrx_node;
64 u64 dccphrx_seqno:48,
65 dccphrx_ccval:4,
66 dccphrx_type:4;
67 u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
68 struct timeval dccphrx_tstamp;
69};
70
71struct dccp_tx_hist {
72 kmem_cache_t *dccptxh_slab;
73};
74
75extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
76extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
77
78struct dccp_rx_hist {
79 kmem_cache_t *dccprxh_slab;
80};
81
82extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
83extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
84extern struct dccp_rx_hist_entry *
85 dccp_rx_hist_find_data_packet(const struct list_head *list);
86
87static inline struct dccp_tx_hist_entry *
88 dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
89 const unsigned int __nocast prio)
90{
91 struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
92 prio);
93
94 if (entry != NULL)
95 entry->dccphtx_sent = 0;
96
97 return entry;
98}
99
100static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
101 struct dccp_tx_hist_entry *entry)
102{
103 if (entry != NULL)
104 kmem_cache_free(hist->dccptxh_slab, entry);
105}
106
107extern struct dccp_tx_hist_entry *
108 dccp_tx_hist_find_entry(const struct list_head *list,
109 const u64 seq);
110
111static inline void dccp_tx_hist_add_entry(struct list_head *list,
112 struct dccp_tx_hist_entry *entry)
113{
114 list_add(&entry->dccphtx_node, list);
115}
116
117extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
118 struct list_head *list,
119 struct dccp_tx_hist_entry *next);
120
121extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
122 struct list_head *list);
123
124static inline struct dccp_tx_hist_entry *
125 dccp_tx_hist_head(struct list_head *list)
126{
127 struct dccp_tx_hist_entry *head = NULL;
128
129 if (!list_empty(list))
130 head = list_entry(list->next, struct dccp_tx_hist_entry,
131 dccphtx_node);
132 return head;
133}
134
135static inline struct dccp_rx_hist_entry *
136 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
137 const u32 ndp,
138 const struct sk_buff *skb,
139 const unsigned int __nocast prio)
140{
141 struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
142 prio);
143
144 if (entry != NULL) {
145 const struct dccp_hdr *dh = dccp_hdr(skb);
146
147 entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
148 entry->dccphrx_ccval = dh->dccph_ccval;
149 entry->dccphrx_type = dh->dccph_type;
150 entry->dccphrx_ndp = ndp;
151 do_gettimeofday(&(entry->dccphrx_tstamp));
152 }
153
154 return entry;
155}
156
157static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
158 struct dccp_rx_hist_entry *entry)
159{
160 if (entry != NULL)
161 kmem_cache_free(hist->dccprxh_slab, entry);
162}
163
164extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
165 struct list_head *list);
166
167static inline void dccp_rx_hist_add_entry(struct list_head *list,
168 struct dccp_rx_hist_entry *entry)
169{
170 list_add(&entry->dccphrx_node, list);
171}
172
173static inline struct dccp_rx_hist_entry *
174 dccp_rx_hist_head(struct list_head *list)
175{
176 struct dccp_rx_hist_entry *head = NULL;
177
178 if (!list_empty(list))
179 head = list_entry(list->next, struct dccp_rx_hist_entry,
180 dccphrx_node);
181 return head;
182}
183
184static inline int
185 dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
186{
187 return entry->dccphrx_type == DCCP_PKT_DATA ||
188 entry->dccphrx_type == DCCP_PKT_DATAACK;
189}
190
191extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
192 struct list_head *rx_list,
193 struct list_head *li_list,
194 struct dccp_rx_hist_entry *packet);
195
196extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
197 struct list_head *li_list, u8 *win_loss);
198
199#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
new file mode 100644
index 000000000000..130c4c40cfe3
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -0,0 +1,22 @@
1#ifndef _TFRC_H_
2#define _TFRC_H_
3/*
4 * net/dccp/ccids/lib/tfrc.h
5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
9 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include <linux/types.h>
18
19extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
20extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
21
22#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
new file mode 100644
index 000000000000..d2b5933b4510
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -0,0 +1,644 @@
1/*
2 * net/dccp/ccids/lib/tfrc_equation.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 */
14
15#include <linux/config.h>
16#include <linux/module.h>
17
18#include <asm/bug.h>
19#include <asm/div64.h>
20
21#include "tfrc.h"
22
23#define TFRC_CALC_X_ARRSIZE 500
24
25#define TFRC_CALC_X_SPLIT 50000
26/* equivalent to 0.05 */
27
28static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
29 { 37172, 8172 },
30 { 53499, 11567 },
31 { 66664, 14180 },
32 { 78298, 16388 },
33 { 89021, 18339 },
34 { 99147, 20108 },
35 { 108858, 21738 },
36 { 118273, 23260 },
37 { 127474, 24693 },
38 { 136520, 26052 },
39 { 145456, 27348 },
40 { 154316, 28589 },
41 { 163130, 29783 },
42 { 171919, 30935 },
43 { 180704, 32049 },
44 { 189502, 33130 },
45 { 198328, 34180 },
46 { 207194, 35202 },
47 { 216114, 36198 },
48 { 225097, 37172 },
49 { 234153, 38123 },
50 { 243294, 39055 },
51 { 252527, 39968 },
52 { 261861, 40864 },
53 { 271305, 41743 },
54 { 280866, 42607 },
55 { 290553, 43457 },
56 { 300372, 44293 },
57 { 310333, 45117 },
58 { 320441, 45929 },
59 { 330705, 46729 },
60 { 341131, 47518 },
61 { 351728, 48297 },
62 { 362501, 49066 },
63 { 373460, 49826 },
64 { 384609, 50577 },
65 { 395958, 51320 },
66 { 407513, 52054 },
67 { 419281, 52780 },
68 { 431270, 53499 },
69 { 443487, 54211 },
70 { 455940, 54916 },
71 { 468635, 55614 },
72 { 481581, 56306 },
73 { 494785, 56991 },
74 { 508254, 57671 },
75 { 521996, 58345 },
76 { 536019, 59014 },
77 { 550331, 59677 },
78 { 564939, 60335 },
79 { 579851, 60988 },
80 { 595075, 61636 },
81 { 610619, 62279 },
82 { 626491, 62918 },
83 { 642700, 63553 },
84 { 659253, 64183 },
85 { 676158, 64809 },
86 { 693424, 65431 },
87 { 711060, 66050 },
88 { 729073, 66664 },
89 { 747472, 67275 },
90 { 766266, 67882 },
91 { 785464, 68486 },
92 { 805073, 69087 },
93 { 825103, 69684 },
94 { 845562, 70278 },
95 { 866460, 70868 },
96 { 887805, 71456 },
97 { 909606, 72041 },
98 { 931873, 72623 },
99 { 954614, 73202 },
100 { 977839, 73778 },
101 { 1001557, 74352 },
102 { 1025777, 74923 },
103 { 1050508, 75492 },
104 { 1075761, 76058 },
105 { 1101544, 76621 },
106 { 1127867, 77183 },
107 { 1154739, 77741 },
108 { 1182172, 78298 },
109 { 1210173, 78852 },
110 { 1238753, 79405 },
111 { 1267922, 79955 },
112 { 1297689, 80503 },
113 { 1328066, 81049 },
114 { 1359060, 81593 },
115 { 1390684, 82135 },
116 { 1422947, 82675 },
117 { 1455859, 83213 },
118 { 1489430, 83750 },
119 { 1523671, 84284 },
120 { 1558593, 84817 },
121 { 1594205, 85348 },
122 { 1630518, 85878 },
123 { 1667543, 86406 },
124 { 1705290, 86932 },
125 { 1743770, 87457 },
126 { 1782994, 87980 },
127 { 1822973, 88501 },
128 { 1863717, 89021 },
129 { 1905237, 89540 },
130 { 1947545, 90057 },
131 { 1990650, 90573 },
132 { 2034566, 91087 },
133 { 2079301, 91600 },
134 { 2124869, 92111 },
135 { 2171279, 92622 },
136 { 2218543, 93131 },
137 { 2266673, 93639 },
138 { 2315680, 94145 },
139 { 2365575, 94650 },
140 { 2416371, 95154 },
141 { 2468077, 95657 },
142 { 2520707, 96159 },
143 { 2574271, 96660 },
144 { 2628782, 97159 },
145 { 2684250, 97658 },
146 { 2740689, 98155 },
147 { 2798110, 98651 },
148 { 2856524, 99147 },
149 { 2915944, 99641 },
150 { 2976382, 100134 },
151 { 3037850, 100626 },
152 { 3100360, 101117 },
153 { 3163924, 101608 },
154 { 3228554, 102097 },
155 { 3294263, 102586 },
156 { 3361063, 103073 },
157 { 3428966, 103560 },
158 { 3497984, 104045 },
159 { 3568131, 104530 },
160 { 3639419, 105014 },
161 { 3711860, 105498 },
162 { 3785467, 105980 },
163 { 3860253, 106462 },
164 { 3936229, 106942 },
165 { 4013410, 107422 },
166 { 4091808, 107902 },
167 { 4171435, 108380 },
168 { 4252306, 108858 },
169 { 4334431, 109335 },
170 { 4417825, 109811 },
171 { 4502501, 110287 },
172 { 4588472, 110762 },
173 { 4675750, 111236 },
174 { 4764349, 111709 },
175 { 4854283, 112182 },
176 { 4945564, 112654 },
177 { 5038206, 113126 },
178 { 5132223, 113597 },
179 { 5227627, 114067 },
180 { 5324432, 114537 },
181 { 5422652, 115006 },
182 { 5522299, 115474 },
183 { 5623389, 115942 },
184 { 5725934, 116409 },
185 { 5829948, 116876 },
186 { 5935446, 117342 },
187 { 6042439, 117808 },
188 { 6150943, 118273 },
189 { 6260972, 118738 },
190 { 6372538, 119202 },
191 { 6485657, 119665 },
192 { 6600342, 120128 },
193 { 6716607, 120591 },
194 { 6834467, 121053 },
195 { 6953935, 121514 },
196 { 7075025, 121976 },
197 { 7197752, 122436 },
198 { 7322131, 122896 },
199 { 7448175, 123356 },
200 { 7575898, 123815 },
201 { 7705316, 124274 },
202 { 7836442, 124733 },
203 { 7969291, 125191 },
204 { 8103877, 125648 },
205 { 8240216, 126105 },
206 { 8378321, 126562 },
207 { 8518208, 127018 },
208 { 8659890, 127474 },
209 { 8803384, 127930 },
210 { 8948702, 128385 },
211 { 9095861, 128840 },
212 { 9244875, 129294 },
213 { 9395760, 129748 },
214 { 9548529, 130202 },
215 { 9703198, 130655 },
216 { 9859782, 131108 },
217 { 10018296, 131561 },
218 { 10178755, 132014 },
219 { 10341174, 132466 },
220 { 10505569, 132917 },
221 { 10671954, 133369 },
222 { 10840345, 133820 },
223 { 11010757, 134271 },
224 { 11183206, 134721 },
225 { 11357706, 135171 },
226 { 11534274, 135621 },
227 { 11712924, 136071 },
228 { 11893673, 136520 },
229 { 12076536, 136969 },
230 { 12261527, 137418 },
231 { 12448664, 137867 },
232 { 12637961, 138315 },
233 { 12829435, 138763 },
234 { 13023101, 139211 },
235 { 13218974, 139658 },
236 { 13417071, 140106 },
237 { 13617407, 140553 },
238 { 13819999, 140999 },
239 { 14024862, 141446 },
240 { 14232012, 141892 },
241 { 14441465, 142339 },
242 { 14653238, 142785 },
243 { 14867346, 143230 },
244 { 15083805, 143676 },
245 { 15302632, 144121 },
246 { 15523842, 144566 },
247 { 15747453, 145011 },
248 { 15973479, 145456 },
249 { 16201939, 145900 },
250 { 16432847, 146345 },
251 { 16666221, 146789 },
252 { 16902076, 147233 },
253 { 17140429, 147677 },
254 { 17381297, 148121 },
255 { 17624696, 148564 },
256 { 17870643, 149007 },
257 { 18119154, 149451 },
258 { 18370247, 149894 },
259 { 18623936, 150336 },
260 { 18880241, 150779 },
261 { 19139176, 151222 },
262 { 19400759, 151664 },
263 { 19665007, 152107 },
264 { 19931936, 152549 },
265 { 20201564, 152991 },
266 { 20473907, 153433 },
267 { 20748982, 153875 },
268 { 21026807, 154316 },
269 { 21307399, 154758 },
270 { 21590773, 155199 },
271 { 21876949, 155641 },
272 { 22165941, 156082 },
273 { 22457769, 156523 },
274 { 22752449, 156964 },
275 { 23049999, 157405 },
276 { 23350435, 157846 },
277 { 23653774, 158287 },
278 { 23960036, 158727 },
279 { 24269236, 159168 },
280 { 24581392, 159608 },
281 { 24896521, 160049 },
282 { 25214642, 160489 },
283 { 25535772, 160929 },
284 { 25859927, 161370 },
285 { 26187127, 161810 },
286 { 26517388, 162250 },
287 { 26850728, 162690 },
288 { 27187165, 163130 },
289 { 27526716, 163569 },
290 { 27869400, 164009 },
291 { 28215234, 164449 },
292 { 28564236, 164889 },
293 { 28916423, 165328 },
294 { 29271815, 165768 },
295 { 29630428, 166208 },
296 { 29992281, 166647 },
297 { 30357392, 167087 },
298 { 30725779, 167526 },
299 { 31097459, 167965 },
300 { 31472452, 168405 },
301 { 31850774, 168844 },
302 { 32232445, 169283 },
303 { 32617482, 169723 },
304 { 33005904, 170162 },
305 { 33397730, 170601 },
306 { 33792976, 171041 },
307 { 34191663, 171480 },
308 { 34593807, 171919 },
309 { 34999428, 172358 },
310 { 35408544, 172797 },
311 { 35821174, 173237 },
312 { 36237335, 173676 },
313 { 36657047, 174115 },
314 { 37080329, 174554 },
315 { 37507197, 174993 },
316 { 37937673, 175433 },
317 { 38371773, 175872 },
318 { 38809517, 176311 },
319 { 39250924, 176750 },
320 { 39696012, 177190 },
321 { 40144800, 177629 },
322 { 40597308, 178068 },
323 { 41053553, 178507 },
324 { 41513554, 178947 },
325 { 41977332, 179386 },
326 { 42444904, 179825 },
327 { 42916290, 180265 },
328 { 43391509, 180704 },
329 { 43870579, 181144 },
330 { 44353520, 181583 },
331 { 44840352, 182023 },
332 { 45331092, 182462 },
333 { 45825761, 182902 },
334 { 46324378, 183342 },
335 { 46826961, 183781 },
336 { 47333531, 184221 },
337 { 47844106, 184661 },
338 { 48358706, 185101 },
339 { 48877350, 185541 },
340 { 49400058, 185981 },
341 { 49926849, 186421 },
342 { 50457743, 186861 },
343 { 50992759, 187301 },
344 { 51531916, 187741 },
345 { 52075235, 188181 },
346 { 52622735, 188622 },
347 { 53174435, 189062 },
348 { 53730355, 189502 },
349 { 54290515, 189943 },
350 { 54854935, 190383 },
351 { 55423634, 190824 },
352 { 55996633, 191265 },
353 { 56573950, 191706 },
354 { 57155606, 192146 },
355 { 57741621, 192587 },
356 { 58332014, 193028 },
357 { 58926806, 193470 },
358 { 59526017, 193911 },
359 { 60129666, 194352 },
360 { 60737774, 194793 },
361 { 61350361, 195235 },
362 { 61967446, 195677 },
363 { 62589050, 196118 },
364 { 63215194, 196560 },
365 { 63845897, 197002 },
366 { 64481179, 197444 },
367 { 65121061, 197886 },
368 { 65765563, 198328 },
369 { 66414705, 198770 },
370 { 67068508, 199213 },
371 { 67726992, 199655 },
372 { 68390177, 200098 },
373 { 69058085, 200540 },
374 { 69730735, 200983 },
375 { 70408147, 201426 },
376 { 71090343, 201869 },
377 { 71777343, 202312 },
378 { 72469168, 202755 },
379 { 73165837, 203199 },
380 { 73867373, 203642 },
381 { 74573795, 204086 },
382 { 75285124, 204529 },
383 { 76001380, 204973 },
384 { 76722586, 205417 },
385 { 77448761, 205861 },
386 { 78179926, 206306 },
387 { 78916102, 206750 },
388 { 79657310, 207194 },
389 { 80403571, 207639 },
390 { 81154906, 208084 },
391 { 81911335, 208529 },
392 { 82672880, 208974 },
393 { 83439562, 209419 },
394 { 84211402, 209864 },
395 { 84988421, 210309 },
396 { 85770640, 210755 },
397 { 86558080, 211201 },
398 { 87350762, 211647 },
399 { 88148708, 212093 },
400 { 88951938, 212539 },
401 { 89760475, 212985 },
402 { 90574339, 213432 },
403 { 91393551, 213878 },
404 { 92218133, 214325 },
405 { 93048107, 214772 },
406 { 93883493, 215219 },
407 { 94724314, 215666 },
408 { 95570590, 216114 },
409 { 96422343, 216561 },
410 { 97279594, 217009 },
411 { 98142366, 217457 },
412 { 99010679, 217905 },
413 { 99884556, 218353 },
414 { 100764018, 218801 },
415 { 101649086, 219250 },
416 { 102539782, 219698 },
417 { 103436128, 220147 },
418 { 104338146, 220596 },
419 { 105245857, 221046 },
420 { 106159284, 221495 },
421 { 107078448, 221945 },
422 { 108003370, 222394 },
423 { 108934074, 222844 },
424 { 109870580, 223294 },
425 { 110812910, 223745 },
426 { 111761087, 224195 },
427 { 112715133, 224646 },
428 { 113675069, 225097 },
429 { 114640918, 225548 },
430 { 115612702, 225999 },
431 { 116590442, 226450 },
432 { 117574162, 226902 },
433 { 118563882, 227353 },
434 { 119559626, 227805 },
435 { 120561415, 228258 },
436 { 121569272, 228710 },
437 { 122583219, 229162 },
438 { 123603278, 229615 },
439 { 124629471, 230068 },
440 { 125661822, 230521 },
441 { 126700352, 230974 },
442 { 127745083, 231428 },
443 { 128796039, 231882 },
444 { 129853241, 232336 },
445 { 130916713, 232790 },
446 { 131986475, 233244 },
447 { 133062553, 233699 },
448 { 134144966, 234153 },
449 { 135233739, 234608 },
450 { 136328894, 235064 },
451 { 137430453, 235519 },
452 { 138538440, 235975 },
453 { 139652876, 236430 },
454 { 140773786, 236886 },
455 { 141901190, 237343 },
456 { 143035113, 237799 },
457 { 144175576, 238256 },
458 { 145322604, 238713 },
459 { 146476218, 239170 },
460 { 147636442, 239627 },
461 { 148803298, 240085 },
462 { 149976809, 240542 },
463 { 151156999, 241000 },
464 { 152343890, 241459 },
465 { 153537506, 241917 },
466 { 154737869, 242376 },
467 { 155945002, 242835 },
468 { 157158929, 243294 },
469 { 158379673, 243753 },
470 { 159607257, 244213 },
471 { 160841704, 244673 },
472 { 162083037, 245133 },
473 { 163331279, 245593 },
474 { 164586455, 246054 },
475 { 165848586, 246514 },
476 { 167117696, 246975 },
477 { 168393810, 247437 },
478 { 169676949, 247898 },
479 { 170967138, 248360 },
480 { 172264399, 248822 },
481 { 173568757, 249284 },
482 { 174880235, 249747 },
483 { 176198856, 250209 },
484 { 177524643, 250672 },
485 { 178857621, 251136 },
486 { 180197813, 251599 },
487 { 181545242, 252063 },
488 { 182899933, 252527 },
489 { 184261908, 252991 },
490 { 185631191, 253456 },
491 { 187007807, 253920 },
492 { 188391778, 254385 },
493 { 189783129, 254851 },
494 { 191181884, 255316 },
495 { 192588065, 255782 },
496 { 194001698, 256248 },
497 { 195422805, 256714 },
498 { 196851411, 257181 },
499 { 198287540, 257648 },
500 { 199731215, 258115 },
501 { 201182461, 258582 },
502 { 202641302, 259050 },
503 { 204107760, 259518 },
504 { 205581862, 259986 },
505 { 207063630, 260454 },
506 { 208553088, 260923 },
507 { 210050262, 261392 },
508 { 211555174, 261861 },
509 { 213067849, 262331 },
510 { 214588312, 262800 },
511 { 216116586, 263270 },
512 { 217652696, 263741 },
513 { 219196666, 264211 },
514 { 220748520, 264682 },
515 { 222308282, 265153 },
516 { 223875978, 265625 },
517 { 225451630, 266097 },
518 { 227035265, 266569 },
519 { 228626905, 267041 },
520 { 230226576, 267514 },
521 { 231834302, 267986 },
522 { 233450107, 268460 },
523 { 235074016, 268933 },
524 { 236706054, 269407 },
525 { 238346244, 269881 },
526 { 239994613, 270355 },
527 { 241651183, 270830 },
528 { 243315981, 271305 }
529};
530
531/* Calculate the send rate as per section 3.1 of RFC3448
532
533Returns send rate in bytes per second
534
535Integer maths and lookups are used as not allowed floating point in kernel
536
537The function for Xcalc as per section 3.1 of RFC3448 is:
538
539X = s
540 -------------------------------------------------------------
541 R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
542
543where
544X is the trasmit rate in bytes/second
545s is the packet size in bytes
546R is the round trip time in seconds
547p is the loss event rate, between 0 and 1.0, of the number of loss events
548 as a fraction of the number of packets transmitted
549t_RTO is the TCP retransmission timeout value in seconds
550b is the number of packets acknowledged by a single TCP acknowledgement
551
552we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
553
554X = s
555 -----------------------------------------------------------------------
556 R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
557
558
559which we can break down into:
560
561X = s
562 --------
563 R * f(p)
564
565where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
566
567Function parameters:
568s - bytes
569R - RTT in usecs
570p - loss rate (decimal fraction multiplied by 1,000,000)
571
572Returns Xcalc in bytes per second
573
574DON'T alter this code unless you run test cases against it as the code
575has been manipulated to stop underflow/overlow.
576
577*/
578u32 tfrc_calc_x(u16 s, u32 R, u32 p)
579{
580 int index;
581 u32 f;
582 u64 tmp1, tmp2;
583
584 if (p < TFRC_CALC_X_SPLIT)
585 index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
586 else
587 index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
588
589 if (index < 0)
590 /* p should be 0 unless there is a bug in my code */
591 index = 0;
592
593 if (R == 0)
594 R = 1; /* RTT can't be zero or else divide by zero */
595
596 BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
597
598 if (p >= TFRC_CALC_X_SPLIT)
599 f = tfrc_calc_x_lookup[index][0];
600 else
601 f = tfrc_calc_x_lookup[index][1];
602
603 tmp1 = ((u64)s * 100000000);
604 tmp2 = ((u64)R * (u64)f);
605 do_div(tmp2, 10000);
606 do_div(tmp1, tmp2);
607 /* Don't alter above math unless you test due to overflow on 32 bit */
608
609 return (u32)tmp1;
610}
611
612EXPORT_SYMBOL_GPL(tfrc_calc_x);
613
614/*
615 * args: fvalue - function value to match
616 * returns: p closest to that value
617 *
618 * both fvalue and p are multiplied by 1,000,000 to use ints
619 */
620u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
621{
622 int ctr = 0;
623 int small;
624
625 if (fvalue < tfrc_calc_x_lookup[0][1])
626 return 0;
627
628 if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
629 small = 1;
630 else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
631 return 1000000;
632 else
633 small = 0;
634
635 while (fvalue > tfrc_calc_x_lookup[ctr][small])
636 ctr++;
637
638 if (small)
639 return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
640 else
641 return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
642}
643
644EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644
index 000000000000..33456c0d5937
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,493 @@
1#ifndef _DCCP_H
2#define _DCCP_H
3/*
4 * net/dccp/dccp.h
5 *
6 * An implementation of the DCCP protocol
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/config.h>
16#include <linux/dccp.h>
17#include <net/snmp.h>
18#include <net/sock.h>
19#include <net/tcp.h>
20
21#ifdef CONFIG_IP_DCCP_DEBUG
22extern int dccp_debug;
23
24#define dccp_pr_debug(format, a...) \
25 do { if (dccp_debug) \
26 printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
27 } while (0)
28#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
29 printk(format, ##a); } while (0)
30#else
31#define dccp_pr_debug(format, a...)
32#define dccp_pr_debug_cat(format, a...)
33#endif
34
35extern struct inet_hashinfo dccp_hashinfo;
36
37extern atomic_t dccp_orphan_count;
38extern int dccp_tw_count;
39extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
40
41extern void dccp_time_wait(struct sock *sk, int state, int timeo);
42
43/* FIXME: Right size this */
44#define DCCP_MAX_OPT_LEN 128
45
46#define DCCP_MAX_PACKET_HDR 32
47
48#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
49
50#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
51 * state, about 60 seconds */
52
53/* draft-ietf-dccp-spec-11.txt initial RTO value */
54#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
55
56/* Maximal interval between probes for local resources. */
57#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
58
59#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
60
61extern struct proto dccp_v4_prot;
62
63/* is seq1 < seq2 ? */
64static inline int before48(const u64 seq1, const u64 seq2)
65{
66 return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
67}
68
69/* is seq1 > seq2 ? */
70static inline int after48(const u64 seq1, const u64 seq2)
71{
72 return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
73}
74
75/* is seq2 <= seq1 <= seq3 ? */
76static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
77{
78 return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
79}
80
81static inline u64 max48(const u64 seq1, const u64 seq2)
82{
83 return after48(seq1, seq2) ? seq1 : seq2;
84}
85
86enum {
87 DCCP_MIB_NUM = 0,
88 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
89 DCCP_MIB_ESTABRESETS, /* EstabResets */
90 DCCP_MIB_CURRESTAB, /* CurrEstab */
91 DCCP_MIB_OUTSEGS, /* OutSegs */
92 DCCP_MIB_OUTRSTS,
93 DCCP_MIB_ABORTONTIMEOUT,
94 DCCP_MIB_TIMEOUTS,
95 DCCP_MIB_ABORTFAILED,
96 DCCP_MIB_PASSIVEOPENS,
97 DCCP_MIB_ATTEMPTFAILS,
98 DCCP_MIB_OUTDATAGRAMS,
99 DCCP_MIB_INERRS,
100 DCCP_MIB_OPTMANDATORYERROR,
101 DCCP_MIB_INVALIDOPT,
102 __DCCP_MIB_MAX
103};
104
105#define DCCP_MIB_MAX __DCCP_MIB_MAX
106struct dccp_mib {
107 unsigned long mibs[DCCP_MIB_MAX];
108} __SNMP_MIB_ALIGN__;
109
110DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
111#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
112#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
113#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
114#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
115#define DCCP_ADD_STATS_BH(field, val) \
116 SNMP_ADD_STATS_BH(dccp_statistics, field, val)
117#define DCCP_ADD_STATS_USER(field, val) \
118 SNMP_ADD_STATS_USER(dccp_statistics, field, val)
119
120extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
121extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
122
123extern int dccp_send_response(struct sock *sk);
124extern void dccp_send_ack(struct sock *sk);
125extern void dccp_send_delayed_ack(struct sock *sk);
126extern void dccp_send_sync(struct sock *sk, const u64 seq,
127 const enum dccp_pkt_type pkt_type);
128
129extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
130extern void dccp_write_space(struct sock *sk);
131
132extern void dccp_init_xmit_timers(struct sock *sk);
133static inline void dccp_clear_xmit_timers(struct sock *sk)
134{
135 inet_csk_clear_xmit_timers(sk);
136}
137
138extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
139
140extern const char *dccp_packet_name(const int type);
141extern const char *dccp_state_name(const int state);
142
143static inline void dccp_set_state(struct sock *sk, const int state)
144{
145 const int oldstate = sk->sk_state;
146
147 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
148 dccp_role(sk), sk,
149 dccp_state_name(oldstate), dccp_state_name(state));
150 WARN_ON(state == oldstate);
151
152 switch (state) {
153 case DCCP_OPEN:
154 if (oldstate != DCCP_OPEN)
155 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
156 break;
157
158 case DCCP_CLOSED:
159 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
160 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
161
162 sk->sk_prot->unhash(sk);
163 if (inet_csk(sk)->icsk_bind_hash != NULL &&
164 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
165 inet_put_port(&dccp_hashinfo, sk);
166 /* fall through */
167 default:
168 if (oldstate == DCCP_OPEN)
169 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
170 }
171
172 /* Change state AFTER socket is unhashed to avoid closed
173 * socket sitting in hash tables.
174 */
175 sk->sk_state = state;
176}
177
178static inline void dccp_done(struct sock *sk)
179{
180 dccp_set_state(sk, DCCP_CLOSED);
181 dccp_clear_xmit_timers(sk);
182
183 sk->sk_shutdown = SHUTDOWN_MASK;
184
185 if (!sock_flag(sk, SOCK_DEAD))
186 sk->sk_state_change(sk);
187 else
188 inet_csk_destroy_sock(sk);
189}
190
191static inline void dccp_openreq_init(struct request_sock *req,
192 struct dccp_sock *dp,
193 struct sk_buff *skb)
194{
195 /*
196 * FIXME: fill in the other req fields from the DCCP options
197 * received
198 */
199 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
200 inet_rsk(req)->acked = 0;
201 req->rcv_wnd = 0;
202}
203
204extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
205
206extern struct sock *dccp_create_openreq_child(struct sock *sk,
207 const struct request_sock *req,
208 const struct sk_buff *skb);
209
210extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
211
212extern void dccp_v4_err(struct sk_buff *skb, u32);
213
214extern int dccp_v4_rcv(struct sk_buff *skb);
215
216extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
217 struct sk_buff *skb,
218 struct request_sock *req,
219 struct dst_entry *dst);
220extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
221 struct request_sock *req,
222 struct request_sock **prev);
223
224extern int dccp_child_process(struct sock *parent, struct sock *child,
225 struct sk_buff *skb);
226extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
227 struct dccp_hdr *dh, unsigned len);
228extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
229 const struct dccp_hdr *dh, const unsigned len);
230
231extern void dccp_close(struct sock *sk, long timeout);
232extern struct sk_buff *dccp_make_response(struct sock *sk,
233 struct dst_entry *dst,
234 struct request_sock *req);
235extern struct sk_buff *dccp_make_reset(struct sock *sk,
236 struct dst_entry *dst,
237 enum dccp_reset_codes code);
238
239extern int dccp_connect(struct sock *sk);
240extern int dccp_disconnect(struct sock *sk, int flags);
241extern int dccp_getsockopt(struct sock *sk, int level, int optname,
242 char __user *optval, int __user *optlen);
243extern int dccp_setsockopt(struct sock *sk, int level, int optname,
244 char __user *optval, int optlen);
245extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
246extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
247 struct msghdr *msg, size_t size);
248extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
249 struct msghdr *msg, size_t len, int nonblock,
250 int flags, int *addr_len);
251extern void dccp_shutdown(struct sock *sk, int how);
252
253extern int dccp_v4_checksum(const struct sk_buff *skb,
254 const u32 saddr, const u32 daddr);
255
256extern int dccp_v4_send_reset(struct sock *sk,
257 enum dccp_reset_codes code);
258extern void dccp_send_close(struct sock *sk, const int active);
259
260struct dccp_skb_cb {
261 __u8 dccpd_type;
262 __u8 dccpd_reset_code;
263 __u8 dccpd_service;
264 __u8 dccpd_ccval;
265 __u64 dccpd_seq;
266 __u64 dccpd_ack_seq;
267 int dccpd_opt_len;
268};
269
270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
271
272static inline int dccp_non_data_packet(const struct sk_buff *skb)
273{
274 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
275
276 return type == DCCP_PKT_ACK ||
277 type == DCCP_PKT_CLOSE ||
278 type == DCCP_PKT_CLOSEREQ ||
279 type == DCCP_PKT_RESET ||
280 type == DCCP_PKT_SYNC ||
281 type == DCCP_PKT_SYNCACK;
282}
283
284static inline int dccp_packet_without_ack(const struct sk_buff *skb)
285{
286 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
287
288 return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
289}
290
291#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
292#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
293
294static inline void dccp_set_seqno(u64 *seqno, u64 value)
295{
296 if (value > DCCP_MAX_SEQNO)
297 value -= DCCP_MAX_SEQNO + 1;
298 *seqno = value;
299}
300
301static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
302{
303 return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
304}
305
306static inline void dccp_inc_seqno(u64 *seqno)
307{
308 if (++*seqno > DCCP_MAX_SEQNO)
309 *seqno = 0;
310}
311
312static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
313{
314 struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
315 sizeof(*dh));
316
317#if defined(__LITTLE_ENDIAN_BITFIELD)
318 dh->dccph_seq = htonl((gss >> 32)) >> 8;
319#elif defined(__BIG_ENDIAN_BITFIELD)
320 dh->dccph_seq = htonl((gss >> 32));
321#else
322#error "Adjust your <asm/byteorder.h> defines"
323#endif
324 dhx->dccph_seq_low = htonl(gss & 0xffffffff);
325}
326
327static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
328 const u64 gsr)
329{
330#if defined(__LITTLE_ENDIAN_BITFIELD)
331 dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
332#elif defined(__BIG_ENDIAN_BITFIELD)
333 dhack->dccph_ack_nr_high = htonl((gsr >> 32));
334#else
335#error "Adjust your <asm/byteorder.h> defines"
336#endif
337 dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
338}
339
340static inline void dccp_update_gsr(struct sock *sk, u64 seq)
341{
342 struct dccp_sock *dp = dccp_sk(sk);
343
344 dp->dccps_gsr = seq;
345 dccp_set_seqno(&dp->dccps_swl,
346 (dp->dccps_gsr + 1 -
347 (dp->dccps_options.dccpo_sequence_window / 4)));
348 dccp_set_seqno(&dp->dccps_swh,
349 (dp->dccps_gsr +
350 (3 * dp->dccps_options.dccpo_sequence_window) / 4));
351}
352
353static inline void dccp_update_gss(struct sock *sk, u64 seq)
354{
355 struct dccp_sock *dp = dccp_sk(sk);
356
357 dp->dccps_awh = dp->dccps_gss = seq;
358 dccp_set_seqno(&dp->dccps_awl,
359 (dp->dccps_gss -
360 dp->dccps_options.dccpo_sequence_window + 1));
361}
362
363extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
364extern void dccp_insert_option_elapsed_time(struct sock *sk,
365 struct sk_buff *skb,
366 u32 elapsed_time);
367extern void dccp_insert_option_timestamp(struct sock *sk,
368 struct sk_buff *skb);
369extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
370 unsigned char option,
371 const void *value, unsigned char len);
372
373extern struct socket *dccp_ctl_socket;
374
375#define DCCP_ACKPKTS_STATE_RECEIVED 0
376#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
377#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
378
379#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
380#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
381
382/** struct dccp_ackpkts - acknowledgeable packets
383 *
384 * This data structure is the one defined in the DCCP draft
385 * Appendix A.
386 *
387 * @dccpap_buf_head - circular buffer head
388 * @dccpap_buf_tail - circular buffer tail
389 * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
390 * buffer (i.e. %dccpap_buf_head)
391 * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
392 * by the buffer with State 0
393 *
394 * Additionally, the HC-Receiver must keep some information about the
395 * Ack Vectors it has recently sent. For each packet sent carrying an
396 * Ack Vector, it remembers four variables:
397 *
398 * @dccpap_ack_seqno - the Sequence Number used for the packet
399 * (HC-Receiver seqno)
400 * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
401 * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
402 * (HC-Sender seqno)
403 * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
404 *
405 * @dccpap_buf_len - circular buffer length
406 * @dccpap_time - the time in usecs
407 * @dccpap_buf - circular buffer of acknowledgeable packets
408 */
409struct dccp_ackpkts {
410 unsigned int dccpap_buf_head;
411 unsigned int dccpap_buf_tail;
412 u64 dccpap_buf_ackno;
413 u64 dccpap_ack_seqno;
414 u64 dccpap_ack_ackno;
415 unsigned int dccpap_ack_ptr;
416 unsigned int dccpap_buf_vector_len;
417 unsigned int dccpap_ack_vector_len;
418 unsigned int dccpap_buf_len;
419 struct timeval dccpap_time;
420 u8 dccpap_buf_nonce;
421 u8 dccpap_ack_nonce;
422 u8 dccpap_buf[0];
423};
424
425extern struct dccp_ackpkts *
426 dccp_ackpkts_alloc(unsigned int len,
427 const unsigned int __nocast priority);
428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
430extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
431 struct sock *sk, u64 ackno);
432
433static inline suseconds_t timeval_usecs(const struct timeval *tv)
434{
435 return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
436}
437
438static inline suseconds_t timeval_delta(const struct timeval *large,
439 const struct timeval *small)
440{
441 time_t secs = large->tv_sec - small->tv_sec;
442 suseconds_t usecs = large->tv_usec - small->tv_usec;
443
444 if (usecs < 0) {
445 secs--;
446 usecs += USEC_PER_SEC;
447 }
448 return secs * USEC_PER_SEC + usecs;
449}
450
451static inline void timeval_add_usecs(struct timeval *tv,
452 const suseconds_t usecs)
453{
454 tv->tv_usec += usecs;
455 while (tv->tv_usec >= USEC_PER_SEC) {
456 tv->tv_sec++;
457 tv->tv_usec -= USEC_PER_SEC;
458 }
459}
460
461static inline void timeval_sub_usecs(struct timeval *tv,
462 const suseconds_t usecs)
463{
464 tv->tv_usec -= usecs;
465 while (tv->tv_usec < 0) {
466 tv->tv_sec--;
467 tv->tv_usec += USEC_PER_SEC;
468 }
469}
470
471/*
472 * Returns the difference in usecs between timeval
473 * passed in and current time
474 */
475static inline suseconds_t timeval_now_delta(const struct timeval *tv)
476{
477 struct timeval now;
478 do_gettimeofday(&now);
479 return timeval_delta(&now, tv);
480}
481
482#ifdef CONFIG_IP_DCCP_DEBUG
483extern void dccp_ackvector_print(const u64 ackno,
484 const unsigned char *vector, int len);
485extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
486#else
487static inline void dccp_ackvector_print(const u64 ackno,
488 const unsigned char *vector,
489 int len) { }
490static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
491#endif
492
493#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
new file mode 100644
index 000000000000..f675d8e642d3
--- /dev/null
+++ b/net/dccp/diag.c
@@ -0,0 +1,71 @@
1/*
2 * net/dccp/diag.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@mandriva.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13
14#include <linux/module.h>
15#include <linux/inet_diag.h>
16
17#include "ccid.h"
18#include "dccp.h"
19
20static void dccp_get_info(struct sock *sk, struct tcp_info *info)
21{
22 struct dccp_sock *dp = dccp_sk(sk);
23 const struct inet_connection_sock *icsk = inet_csk(sk);
24
25 memset(info, 0, sizeof(*info));
26
27 info->tcpi_state = sk->sk_state;
28 info->tcpi_retransmits = icsk->icsk_retransmits;
29 info->tcpi_probes = icsk->icsk_probes_out;
30 info->tcpi_backoff = icsk->icsk_backoff;
31 info->tcpi_pmtu = dp->dccps_pmtu_cookie;
32
33 if (dp->dccps_options.dccpo_send_ack_vector)
34 info->tcpi_options |= TCPI_OPT_SACK;
35
36 ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
37 ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
38}
39
40static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
41 void *_info)
42{
43 r->idiag_rqueue = r->idiag_wqueue = 0;
44
45 if (_info != NULL)
46 dccp_get_info(sk, _info);
47}
48
49static struct inet_diag_handler dccp_diag_handler = {
50 .idiag_hashinfo = &dccp_hashinfo,
51 .idiag_get_info = dccp_diag_get_info,
52 .idiag_type = DCCPDIAG_GETSOCK,
53 .idiag_info_size = sizeof(struct tcp_info),
54};
55
56static int __init dccp_diag_init(void)
57{
58 return inet_diag_register(&dccp_diag_handler);
59}
60
61static void __exit dccp_diag_fini(void)
62{
63 inet_diag_unregister(&dccp_diag_handler);
64}
65
66module_init(dccp_diag_init);
67module_exit(dccp_diag_fini);
68
69MODULE_LICENSE("GPL");
70MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
71MODULE_DESCRIPTION("DCCP inet_diag handler");
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644
index 000000000000..ef29cef1dafe
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,600 @@
1/*
2 * net/dccp/input.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static void dccp_fin(struct sock *sk, struct sk_buff *skb)
23{
24 sk->sk_shutdown |= RCV_SHUTDOWN;
25 sock_set_flag(sk, SOCK_DONE);
26 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
27 __skb_queue_tail(&sk->sk_receive_queue, skb);
28 skb_set_owner_r(skb, sk);
29 sk->sk_data_ready(sk, 0);
30}
31
32static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
33{
34 dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
35 dccp_fin(sk, skb);
36 dccp_set_state(sk, DCCP_CLOSED);
37 sk_wake_async(sk, 1, POLL_HUP);
38}
39
40static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
41{
42 /*
43 * Step 7: Check for unexpected packet types
44 * If (S.is_server and P.type == CloseReq)
45 * Send Sync packet acknowledging P.seqno
46 * Drop packet and return
47 */
48 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
49 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
50 return;
51 }
52
53 dccp_set_state(sk, DCCP_CLOSING);
54 dccp_send_close(sk, 0);
55}
56
57static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
58{
59 struct dccp_sock *dp = dccp_sk(sk);
60
61 if (dp->dccps_options.dccpo_send_ack_vector)
62 dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
63 DCCP_SKB_CB(skb)->dccpd_ack_seq);
64}
65
66static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
67{
68 const struct dccp_hdr *dh = dccp_hdr(skb);
69 struct dccp_sock *dp = dccp_sk(sk);
70 u64 lswl, lawl;
71
72 /*
73 * Step 5: Prepare sequence numbers for Sync
74 * If P.type == Sync or P.type == SyncAck,
75 * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
76 * / * P is valid, so update sequence number variables
77 * accordingly. After this update, P will pass the tests
78 * in Step 6. A SyncAck is generated if necessary in
79 * Step 15 * /
80 * Update S.GSR, S.SWL, S.SWH
81 * Otherwise,
82 * Drop packet and return
83 */
84 if (dh->dccph_type == DCCP_PKT_SYNC ||
85 dh->dccph_type == DCCP_PKT_SYNCACK) {
86 if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
87 dp->dccps_awl, dp->dccps_awh) &&
88 !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
89 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
90 else
91 return -1;
92 }
93
94 /*
95 * Step 6: Check sequence numbers
96 * Let LSWL = S.SWL and LAWL = S.AWL
97 * If P.type == CloseReq or P.type == Close or P.type == Reset,
98 * LSWL := S.GSR + 1, LAWL := S.GAR
99 * If LSWL <= P.seqno <= S.SWH
100 * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
101 * Update S.GSR, S.SWL, S.SWH
102 * If P.type != Sync,
103 * Update S.GAR
104 * Otherwise,
105 * Send Sync packet acknowledging P.seqno
106 * Drop packet and return
107 */
108 lswl = dp->dccps_swl;
109 lawl = dp->dccps_awl;
110
111 if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
112 dh->dccph_type == DCCP_PKT_CLOSE ||
113 dh->dccph_type == DCCP_PKT_RESET) {
114 lswl = dp->dccps_gsr;
115 dccp_inc_seqno(&lswl);
116 lawl = dp->dccps_gar;
117 }
118
119 if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
120 (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
121 between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
122 lawl, dp->dccps_awh))) {
123 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
124
125 if (dh->dccph_type != DCCP_PKT_SYNC &&
126 (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
127 DCCP_PKT_WITHOUT_ACK_SEQ))
128 dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
129 } else {
130 LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
131 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
132 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
133 "sending SYNC...\n",
134 dccp_packet_name(dh->dccph_type),
135 (unsigned long long) lswl,
136 (unsigned long long)
137 DCCP_SKB_CB(skb)->dccpd_seq,
138 (unsigned long long) dp->dccps_swh,
139 (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
140 DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
141 (unsigned long long) lawl,
142 (unsigned long long)
143 DCCP_SKB_CB(skb)->dccpd_ack_seq,
144 (unsigned long long) dp->dccps_awh);
145 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
146 return -1;
147 }
148
149 return 0;
150}
151
152int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
153 const struct dccp_hdr *dh, const unsigned len)
154{
155 struct dccp_sock *dp = dccp_sk(sk);
156
157 if (dccp_check_seqno(sk, skb))
158 goto discard;
159
160 if (dccp_parse_options(sk, skb))
161 goto discard;
162
163 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
164 dccp_event_ack_recv(sk, skb);
165
166 /*
167 * FIXME: check ECN to see if we should use
168 * DCCP_ACKPKTS_STATE_ECN_MARKED
169 */
170 if (dp->dccps_options.dccpo_send_ack_vector) {
171 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
172
173 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
174 DCCP_SKB_CB(skb)->dccpd_seq,
175 DCCP_ACKPKTS_STATE_RECEIVED)) {
176 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
177 "packets buffer full!\n");
178 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
179 inet_csk_schedule_ack(sk);
180 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
181 TCP_DELACK_MIN,
182 DCCP_RTO_MAX);
183 goto discard;
184 }
185
186 /*
187 * FIXME: this activation is probably wrong, have to study more
188 * TCP delack machinery and how it fits into DCCP draft, but
189 * for now it kinda "works" 8)
190 */
191 if (!inet_csk_ack_scheduled(sk)) {
192 inet_csk_schedule_ack(sk);
193 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
194 DCCP_RTO_MAX);
195 }
196 }
197
198 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
199 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
200
201 switch (dccp_hdr(skb)->dccph_type) {
202 case DCCP_PKT_DATAACK:
203 case DCCP_PKT_DATA:
204 /*
205 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
206 * option if it is.
207 */
208 __skb_pull(skb, dh->dccph_doff * 4);
209 __skb_queue_tail(&sk->sk_receive_queue, skb);
210 skb_set_owner_r(skb, sk);
211 sk->sk_data_ready(sk, 0);
212 return 0;
213 case DCCP_PKT_ACK:
214 goto discard;
215 case DCCP_PKT_RESET:
216 /*
217 * Step 9: Process Reset
218 * If P.type == Reset,
219 * Tear down connection
220 * S.state := TIMEWAIT
221 * Set TIMEWAIT timer
222 * Drop packet and return
223 */
224 dccp_fin(sk, skb);
225 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
226 return 0;
227 case DCCP_PKT_CLOSEREQ:
228 dccp_rcv_closereq(sk, skb);
229 goto discard;
230 case DCCP_PKT_CLOSE:
231 dccp_rcv_close(sk, skb);
232 return 0;
233 case DCCP_PKT_REQUEST:
234 /* Step 7
235 * or (S.is_server and P.type == Response)
236 * or (S.is_client and P.type == Request)
237 * or (S.state >= OPEN and P.type == Request
238 * and P.seqno >= S.OSR)
239 * or (S.state >= OPEN and P.type == Response
240 * and P.seqno >= S.OSR)
241 * or (S.state == RESPOND and P.type == Data),
242 * Send Sync packet acknowledging P.seqno
243 * Drop packet and return
244 */
245 if (dp->dccps_role != DCCP_ROLE_LISTEN)
246 goto send_sync;
247 goto check_seq;
248 case DCCP_PKT_RESPONSE:
249 if (dp->dccps_role != DCCP_ROLE_CLIENT)
250 goto send_sync;
251check_seq:
252 if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
253send_sync:
254 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
255 DCCP_PKT_SYNC);
256 }
257 break;
258 case DCCP_PKT_SYNC:
259 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
260 DCCP_PKT_SYNCACK);
261 /*
262 * From the draft:
263 *
264 * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
265 * MAY have non-zero-length application data areas, whose
266 * contents * receivers MUST ignore.
267 */
268 goto discard;
269 }
270
271 DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
272discard:
273 __kfree_skb(skb);
274 return 0;
275}
276
277static int dccp_rcv_request_sent_state_process(struct sock *sk,
278 struct sk_buff *skb,
279 const struct dccp_hdr *dh,
280 const unsigned len)
281{
282 /*
283 * Step 4: Prepare sequence numbers in REQUEST
284 * If S.state == REQUEST,
285 * If (P.type == Response or P.type == Reset)
286 * and S.AWL <= P.ackno <= S.AWH,
287 * / * Set sequence number variables corresponding to the
288 * other endpoint, so P will pass the tests in Step 6 * /
289 * Set S.GSR, S.ISR, S.SWL, S.SWH
290 * / * Response processing continues in Step 10; Reset
291 * processing continues in Step 9 * /
292 */
293 if (dh->dccph_type == DCCP_PKT_RESPONSE) {
294 const struct inet_connection_sock *icsk = inet_csk(sk);
295 struct dccp_sock *dp = dccp_sk(sk);
296
297 /* Stop the REQUEST timer */
298 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
299 BUG_TRAP(sk->sk_send_head != NULL);
300 __kfree_skb(sk->sk_send_head);
301 sk->sk_send_head = NULL;
302
303 if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
304 dp->dccps_awl, dp->dccps_awh)) {
305 dccp_pr_debug("invalid ackno: S.AWL=%llu, "
306 "P.ackno=%llu, S.AWH=%llu \n",
307 (unsigned long long)dp->dccps_awl,
308 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
309 (unsigned long long)dp->dccps_awh);
310 goto out_invalid_packet;
311 }
312
313 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
314 dccp_update_gsr(sk, dp->dccps_isr);
315 /*
316 * SWL and AWL are initially adjusted so that they are not less than
317 * the initial Sequence Numbers received and sent, respectively:
318 * SWL := max(GSR + 1 - floor(W/4), ISR),
319 * AWL := max(GSS - W' + 1, ISS).
320 * These adjustments MUST be applied only at the beginning of the
321 * connection.
322 *
323 * AWL was adjusted in dccp_v4_connect -acme
324 */
325 dccp_set_seqno(&dp->dccps_swl,
326 max48(dp->dccps_swl, dp->dccps_isr));
327
328 if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
329 ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
330 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
331 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
332 /* FIXME: send appropriate RESET code */
333 goto out_invalid_packet;
334 }
335
336 dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
337
338 /*
339 * Step 10: Process REQUEST state (second part)
340 * If S.state == REQUEST,
341 * / * If we get here, P is a valid Response from the
342 * server (see Step 4), and we should move to
343 * PARTOPEN state. PARTOPEN means send an Ack,
344 * don't send Data packets, retransmit Acks
345 * periodically, and always include any Init Cookie
346 * from the Response * /
347 * S.state := PARTOPEN
348 * Set PARTOPEN timer
349 * Continue with S.state == PARTOPEN
350 * / * Step 12 will send the Ack completing the
351 * three-way handshake * /
352 */
353 dccp_set_state(sk, DCCP_PARTOPEN);
354
355 /* Make sure socket is routed, for correct metrics. */
356 inet_sk_rebuild_header(sk);
357
358 if (!sock_flag(sk, SOCK_DEAD)) {
359 sk->sk_state_change(sk);
360 sk_wake_async(sk, 0, POLL_OUT);
361 }
362
363 if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
364 icsk->icsk_accept_queue.rskq_defer_accept) {
365 /* Save one ACK. Data will be ready after
366 * several ticks, if write_pending is set.
367 *
368 * It may be deleted, but with this feature tcpdumps
369 * look so _wonderfully_ clever, that I was not able
370 * to stand against the temptation 8) --ANK
371 */
372 /*
373 * OK, in DCCP we can as well do a similar trick, its
374 * even in the draft, but there is no need for us to
375 * schedule an ack here, as dccp_sendmsg does this for
376 * us, also stated in the draft. -acme
377 */
378 __kfree_skb(skb);
379 return 0;
380 }
381 dccp_send_ack(sk);
382 return -1;
383 }
384
385out_invalid_packet:
386 return 1; /* dccp_v4_do_rcv will send a reset, but...
387 FIXME: the reset code should be
388 DCCP_RESET_CODE_PACKET_ERROR */
389}
390
391static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
392 struct sk_buff *skb,
393 const struct dccp_hdr *dh,
394 const unsigned len)
395{
396 int queued = 0;
397
398 switch (dh->dccph_type) {
399 case DCCP_PKT_RESET:
400 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
401 break;
402 case DCCP_PKT_DATAACK:
403 case DCCP_PKT_ACK:
404 /*
405 * FIXME: we should be reseting the PARTOPEN (DELACK) timer
406 * here but only if we haven't used the DELACK timer for
407 * something else, like sending a delayed ack for a TIMESTAMP
408 * echo, etc, for now were not clearing it, sending an extra
409 * ACK when there is nothing else to do in DELACK is not a big
410 * deal after all.
411 */
412
413 /* Stop the PARTOPEN timer */
414 if (sk->sk_state == DCCP_PARTOPEN)
415 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
416
417 dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
418 dccp_set_state(sk, DCCP_OPEN);
419
420 if (dh->dccph_type == DCCP_PKT_DATAACK) {
421 dccp_rcv_established(sk, skb, dh, len);
422 queued = 1; /* packet was queued
423 (by dccp_rcv_established) */
424 }
425 break;
426 }
427
428 return queued;
429}
430
431int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
432 struct dccp_hdr *dh, unsigned len)
433{
434 struct dccp_sock *dp = dccp_sk(sk);
435 const int old_state = sk->sk_state;
436 int queued = 0;
437
438 /*
439 * Step 3: Process LISTEN state
440 * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
441 *
442 * If S.state == LISTEN,
443 * If P.type == Request or P contains a valid Init Cookie
444 * option,
445 * * Must scan the packet's options to check for an Init
446 * Cookie. Only the Init Cookie is processed here,
447 * however; other options are processed in Step 8. This
448 * scan need only be performed if the endpoint uses Init
449 * Cookies *
450 * * Generate a new socket and switch to that socket *
451 * Set S := new socket for this port pair
452 * S.state = RESPOND
453 * Choose S.ISS (initial seqno) or set from Init Cookie
454 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
455 * Continue with S.state == RESPOND
456 * * A Response packet will be generated in Step 11 *
457 * Otherwise,
458 * Generate Reset(No Connection) unless P.type == Reset
459 * Drop packet and return
460 *
461 * NOTE: the check for the packet types is done in
462 * dccp_rcv_state_process
463 */
464 if (sk->sk_state == DCCP_LISTEN) {
465 if (dh->dccph_type == DCCP_PKT_REQUEST) {
466 if (dccp_v4_conn_request(sk, skb) < 0)
467 return 1;
468
469 /* FIXME: do congestion control initialization */
470 goto discard;
471 }
472 if (dh->dccph_type == DCCP_PKT_RESET)
473 goto discard;
474
475 /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
476 return 1;
477 }
478
479 if (sk->sk_state != DCCP_REQUESTING) {
480 if (dccp_check_seqno(sk, skb))
481 goto discard;
482
483 /*
484 * Step 8: Process options and mark acknowledgeable
485 */
486 if (dccp_parse_options(sk, skb))
487 goto discard;
488
489 if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
490 DCCP_PKT_WITHOUT_ACK_SEQ)
491 dccp_event_ack_recv(sk, skb);
492
493 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
494 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
495
496 /*
497 * FIXME: check ECN to see if we should use
498 * DCCP_ACKPKTS_STATE_ECN_MARKED
499 */
500 if (dp->dccps_options.dccpo_send_ack_vector) {
501 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
502 DCCP_SKB_CB(skb)->dccpd_seq,
503 DCCP_ACKPKTS_STATE_RECEIVED))
504 goto discard;
505 /*
506 * FIXME: this activation is probably wrong, have to
507 * study more TCP delack machinery and how it fits into
508 * DCCP draft, but for now it kinda "works" 8)
509 */
510 if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
511 DCCP_MAX_SEQNO + 1) &&
512 !inet_csk_ack_scheduled(sk)) {
513 inet_csk_schedule_ack(sk);
514 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
515 TCP_DELACK_MIN,
516 DCCP_RTO_MAX);
517 }
518 }
519 }
520
521 /*
522 * Step 9: Process Reset
523 * If P.type == Reset,
524 * Tear down connection
525 * S.state := TIMEWAIT
526 * Set TIMEWAIT timer
527 * Drop packet and return
528 */
529 if (dh->dccph_type == DCCP_PKT_RESET) {
530 /*
531 * Queue the equivalent of TCP fin so that dccp_recvmsg
532 * exits the loop
533 */
534 dccp_fin(sk, skb);
535 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
536 return 0;
537 /*
538 * Step 7: Check for unexpected packet types
539 * If (S.is_server and P.type == CloseReq)
540 * or (S.is_server and P.type == Response)
541 * or (S.is_client and P.type == Request)
542 * or (S.state == RESPOND and P.type == Data),
543 * Send Sync packet acknowledging P.seqno
544 * Drop packet and return
545 */
546 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
547 (dh->dccph_type == DCCP_PKT_RESPONSE ||
548 dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
549 (dp->dccps_role == DCCP_ROLE_CLIENT &&
550 dh->dccph_type == DCCP_PKT_REQUEST) ||
551 (sk->sk_state == DCCP_RESPOND &&
552 dh->dccph_type == DCCP_PKT_DATA)) {
553 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
554 DCCP_PKT_SYNC);
555 goto discard;
556 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
557 dccp_rcv_closereq(sk, skb);
558 goto discard;
559 } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
560 dccp_rcv_close(sk, skb);
561 return 0;
562 }
563
564 switch (sk->sk_state) {
565 case DCCP_CLOSED:
566 return 1;
567
568 case DCCP_REQUESTING:
569 /* FIXME: do congestion control initialization */
570
571 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
572 if (queued >= 0)
573 return queued;
574
575 __kfree_skb(skb);
576 return 0;
577
578 case DCCP_RESPOND:
579 case DCCP_PARTOPEN:
580 queued = dccp_rcv_respond_partopen_state_process(sk, skb,
581 dh, len);
582 break;
583 }
584
585 if (dh->dccph_type == DCCP_PKT_ACK ||
586 dh->dccph_type == DCCP_PKT_DATAACK) {
587 switch (old_state) {
588 case DCCP_PARTOPEN:
589 sk->sk_state_change(sk);
590 sk_wake_async(sk, 0, POLL_OUT);
591 break;
592 }
593 }
594
595 if (!queued) {
596discard:
597 __kfree_skb(skb);
598 }
599 return 0;
600}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644
index 000000000000..3fc75dbee4b8
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1356 @@
1/*
2 * net/dccp/ipv4.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/icmp.h>
16#include <linux/module.h>
17#include <linux/skbuff.h>
18#include <linux/random.h>
19
20#include <net/icmp.h>
21#include <net/inet_hashtables.h>
22#include <net/sock.h>
23#include <net/tcp_states.h>
24#include <net/xfrm.h>
25
26#include "ccid.h"
27#include "dccp.h"
28
29struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
30 .lhash_lock = RW_LOCK_UNLOCKED,
31 .lhash_users = ATOMIC_INIT(0),
32 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
33 .portalloc_lock = SPIN_LOCK_UNLOCKED,
34 .port_rover = 1024 - 1,
35};
36
37EXPORT_SYMBOL_GPL(dccp_hashinfo);
38
39static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
40{
41 return inet_csk_get_port(&dccp_hashinfo, sk, snum);
42}
43
44static void dccp_v4_hash(struct sock *sk)
45{
46 inet_hash(&dccp_hashinfo, sk);
47}
48
49static void dccp_v4_unhash(struct sock *sk)
50{
51 inet_unhash(&dccp_hashinfo, sk);
52}
53
54/* called with local bh disabled */
55static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
56 struct inet_timewait_sock **twp)
57{
58 struct inet_sock *inet = inet_sk(sk);
59 const u32 daddr = inet->rcv_saddr;
60 const u32 saddr = inet->daddr;
61 const int dif = sk->sk_bound_dev_if;
62 INET_ADDR_COOKIE(acookie, saddr, daddr)
63 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
64 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
65 dccp_hashinfo.ehash_size);
66 struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
67 const struct sock *sk2;
68 const struct hlist_node *node;
69 struct inet_timewait_sock *tw;
70
71 write_lock(&head->lock);
72
73 /* Check TIME-WAIT sockets first. */
74 sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
75 tw = inet_twsk(sk2);
76
77 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
78 goto not_unique;
79 }
80 tw = NULL;
81
82 /* And established part... */
83 sk_for_each(sk2, node, &head->chain) {
84 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
85 goto not_unique;
86 }
87
88 /* Must record num and sport now. Otherwise we will see
89 * in hash table socket with a funny identity. */
90 inet->num = lport;
91 inet->sport = htons(lport);
92 sk->sk_hashent = hash;
93 BUG_TRAP(sk_unhashed(sk));
94 __sk_add_node(sk, &head->chain);
95 sock_prot_inc_use(sk->sk_prot);
96 write_unlock(&head->lock);
97
98 if (twp != NULL) {
99 *twp = tw;
100 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
101 } else if (tw != NULL) {
102 /* Silly. Should hash-dance instead... */
103 inet_twsk_deschedule(tw, &dccp_death_row);
104 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
105
106 inet_twsk_put(tw);
107 }
108
109 return 0;
110
111not_unique:
112 write_unlock(&head->lock);
113 return -EADDRNOTAVAIL;
114}
115
116/*
117 * Bind a port for a connect operation and hash it.
118 */
119static int dccp_v4_hash_connect(struct sock *sk)
120{
121 const unsigned short snum = inet_sk(sk)->num;
122 struct inet_bind_hashbucket *head;
123 struct inet_bind_bucket *tb;
124 int ret;
125
126 if (snum == 0) {
127 int rover;
128 int low = sysctl_local_port_range[0];
129 int high = sysctl_local_port_range[1];
130 int remaining = (high - low) + 1;
131 struct hlist_node *node;
132 struct inet_timewait_sock *tw = NULL;
133
134 local_bh_disable();
135
136 /* TODO. Actually it is not so bad idea to remove
137 * dccp_hashinfo.portalloc_lock before next submission to
138 * Linus.
139 * As soon as we touch this place at all it is time to think.
140 *
141 * Now it protects single _advisory_ variable
142 * dccp_hashinfo.port_rover, hence it is mostly useless.
143 * Code will work nicely if we just delete it, but
144 * I am afraid in contented case it will work not better or
145 * even worse: another cpu just will hit the same bucket
146 * and spin there.
147 * So some cpu salt could remove both contention and
148 * memory pingpong. Any ideas how to do this in a nice way?
149 */
150 spin_lock(&dccp_hashinfo.portalloc_lock);
151 rover = dccp_hashinfo.port_rover;
152
153 do {
154 rover++;
155 if ((rover < low) || (rover > high))
156 rover = low;
157 head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
158 dccp_hashinfo.bhash_size)];
159 spin_lock(&head->lock);
160
161 /* Does not bother with rcv_saddr checks,
162 * because the established check is already
163 * unique enough.
164 */
165 inet_bind_bucket_for_each(tb, node, &head->chain) {
166 if (tb->port == rover) {
167 BUG_TRAP(!hlist_empty(&tb->owners));
168 if (tb->fastreuse >= 0)
169 goto next_port;
170 if (!__dccp_v4_check_established(sk,
171 rover,
172 &tw))
173 goto ok;
174 goto next_port;
175 }
176 }
177
178 tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
179 head, rover);
180 if (tb == NULL) {
181 spin_unlock(&head->lock);
182 break;
183 }
184 tb->fastreuse = -1;
185 goto ok;
186
187 next_port:
188 spin_unlock(&head->lock);
189 } while (--remaining > 0);
190 dccp_hashinfo.port_rover = rover;
191 spin_unlock(&dccp_hashinfo.portalloc_lock);
192
193 local_bh_enable();
194
195 return -EADDRNOTAVAIL;
196
197ok:
198 /* All locks still held and bhs disabled */
199 dccp_hashinfo.port_rover = rover;
200 spin_unlock(&dccp_hashinfo.portalloc_lock);
201
202 inet_bind_hash(sk, tb, rover);
203 if (sk_unhashed(sk)) {
204 inet_sk(sk)->sport = htons(rover);
205 __inet_hash(&dccp_hashinfo, sk, 0);
206 }
207 spin_unlock(&head->lock);
208
209 if (tw != NULL) {
210 inet_twsk_deschedule(tw, &dccp_death_row);
211 inet_twsk_put(tw);
212 }
213
214 ret = 0;
215 goto out;
216 }
217
218 head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
219 dccp_hashinfo.bhash_size)];
220 tb = inet_csk(sk)->icsk_bind_hash;
221 spin_lock_bh(&head->lock);
222 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
223 __inet_hash(&dccp_hashinfo, sk, 0);
224 spin_unlock_bh(&head->lock);
225 return 0;
226 } else {
227 spin_unlock(&head->lock);
228 /* No definite answer... Walk to established hash table */
229 ret = __dccp_v4_check_established(sk, snum, NULL);
230out:
231 local_bh_enable();
232 return ret;
233 }
234}
235
236static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
237 int addr_len)
238{
239 struct inet_sock *inet = inet_sk(sk);
240 struct dccp_sock *dp = dccp_sk(sk);
241 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
242 struct rtable *rt;
243 u32 daddr, nexthop;
244 int tmp;
245 int err;
246
247 dp->dccps_role = DCCP_ROLE_CLIENT;
248
249 if (addr_len < sizeof(struct sockaddr_in))
250 return -EINVAL;
251
252 if (usin->sin_family != AF_INET)
253 return -EAFNOSUPPORT;
254
255 nexthop = daddr = usin->sin_addr.s_addr;
256 if (inet->opt != NULL && inet->opt->srr) {
257 if (daddr == 0)
258 return -EINVAL;
259 nexthop = inet->opt->faddr;
260 }
261
262 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
263 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
264 IPPROTO_DCCP,
265 inet->sport, usin->sin_port, sk);
266 if (tmp < 0)
267 return tmp;
268
269 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
270 ip_rt_put(rt);
271 return -ENETUNREACH;
272 }
273
274 if (inet->opt == NULL || !inet->opt->srr)
275 daddr = rt->rt_dst;
276
277 if (inet->saddr == 0)
278 inet->saddr = rt->rt_src;
279 inet->rcv_saddr = inet->saddr;
280
281 inet->dport = usin->sin_port;
282 inet->daddr = daddr;
283
284 dp->dccps_ext_header_len = 0;
285 if (inet->opt != NULL)
286 dp->dccps_ext_header_len = inet->opt->optlen;
287 /*
288 * Socket identity is still unknown (sport may be zero).
289 * However we set state to DCCP_REQUESTING and not releasing socket
290 * lock select source port, enter ourselves into the hash tables and
291 * complete initialization after this.
292 */
293 dccp_set_state(sk, DCCP_REQUESTING);
294 err = dccp_v4_hash_connect(sk);
295 if (err != 0)
296 goto failure;
297
298 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
299 if (err != 0)
300 goto failure;
301
302 /* OK, now commit destination to socket. */
303 sk_setup_caps(sk, &rt->u.dst);
304
305 dp->dccps_gar =
306 dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
307 inet->daddr,
308 inet->sport,
309 usin->sin_port);
310 dccp_update_gss(sk, dp->dccps_iss);
311
312 /*
313 * SWL and AWL are initially adjusted so that they are not less than
314 * the initial Sequence Numbers received and sent, respectively:
315 * SWL := max(GSR + 1 - floor(W/4), ISR),
316 * AWL := max(GSS - W' + 1, ISS).
317 * These adjustments MUST be applied only at the beginning of the
318 * connection.
319 */
320 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
321
322 inet->id = dp->dccps_iss ^ jiffies;
323
324 err = dccp_connect(sk);
325 rt = NULL;
326 if (err != 0)
327 goto failure;
328out:
329 return err;
330failure:
331 /*
332 * This unhashes the socket and releases the local port, if necessary.
333 */
334 dccp_set_state(sk, DCCP_CLOSED);
335 ip_rt_put(rt);
336 sk->sk_route_caps = 0;
337 inet->dport = 0;
338 goto out;
339}
340
341/*
342 * This routine does path mtu discovery as defined in RFC1191.
343 */
344static inline void dccp_do_pmtu_discovery(struct sock *sk,
345 const struct iphdr *iph,
346 u32 mtu)
347{
348 struct dst_entry *dst;
349 const struct inet_sock *inet = inet_sk(sk);
350 const struct dccp_sock *dp = dccp_sk(sk);
351
352 /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
353 * send out by Linux are always < 576bytes so they should go through
354 * unfragmented).
355 */
356 if (sk->sk_state == DCCP_LISTEN)
357 return;
358
359 /* We don't check in the destentry if pmtu discovery is forbidden
360 * on this route. We just assume that no packet_to_big packets
361 * are send back when pmtu discovery is not active.
362 * There is a small race when the user changes this flag in the
363 * route, but I think that's acceptable.
364 */
365 if ((dst = __sk_dst_check(sk, 0)) == NULL)
366 return;
367
368 dst->ops->update_pmtu(dst, mtu);
369
370 /* Something is about to be wrong... Remember soft error
371 * for the case, if this connection will not able to recover.
372 */
373 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
374 sk->sk_err_soft = EMSGSIZE;
375
376 mtu = dst_mtu(dst);
377
378 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
379 dp->dccps_pmtu_cookie > mtu) {
380 dccp_sync_mss(sk, mtu);
381
382 /*
383 * From: draft-ietf-dccp-spec-11.txt
384 *
385 * DCCP-Sync packets are the best choice for upward
386 * probing, since DCCP-Sync probes do not risk application
387 * data loss.
388 */
389 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
390 } /* else let the usual retransmit timer handle it */
391}
392
393static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
394{
395 int err;
396 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
397 const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
398 sizeof(struct dccp_hdr_ext) +
399 sizeof(struct dccp_hdr_ack_bits);
400 struct sk_buff *skb;
401
402 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
403 return;
404
405 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
406 if (skb == NULL)
407 return;
408
409 /* Reserve space for headers. */
410 skb_reserve(skb, MAX_DCCP_HEADER);
411
412 skb->dst = dst_clone(rxskb->dst);
413
414 skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
415 dh = dccp_hdr(skb);
416 memset(dh, 0, dccp_hdr_ack_len);
417
418 /* Build DCCP header and checksum it. */
419 dh->dccph_type = DCCP_PKT_ACK;
420 dh->dccph_sport = rxdh->dccph_dport;
421 dh->dccph_dport = rxdh->dccph_sport;
422 dh->dccph_doff = dccp_hdr_ack_len / 4;
423 dh->dccph_x = 1;
424
425 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
426 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
427 DCCP_SKB_CB(rxskb)->dccpd_seq);
428
429 bh_lock_sock(dccp_ctl_socket->sk);
430 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
431 rxskb->nh.iph->daddr,
432 rxskb->nh.iph->saddr, NULL);
433 bh_unlock_sock(dccp_ctl_socket->sk);
434
435 if (err == NET_XMIT_CN || err == 0) {
436 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
437 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
438 }
439}
440
441static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
442 struct request_sock *req)
443{
444 dccp_v4_ctl_send_ack(skb);
445}
446
447static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
448 struct dst_entry *dst)
449{
450 int err = -1;
451 struct sk_buff *skb;
452
453 /* First, grab a route. */
454
455 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
456 goto out;
457
458 skb = dccp_make_response(sk, dst, req);
459 if (skb != NULL) {
460 const struct inet_request_sock *ireq = inet_rsk(req);
461
462 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
463 ireq->rmt_addr,
464 ireq->opt);
465 if (err == NET_XMIT_CN)
466 err = 0;
467 }
468
469out:
470 dst_release(dst);
471 return err;
472}
473
474/*
475 * This routine is called by the ICMP module when it gets some sort of error
476 * condition. If err < 0 then the socket should be closed and the error
477 * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
478 * After adjustment header points to the first 8 bytes of the tcp header. We
479 * need to find the appropriate port.
480 *
481 * The locking strategy used here is very "optimistic". When someone else
482 * accesses the socket the ICMP is just dropped and for some paths there is no
483 * check at all. A more general error queue to queue errors for later handling
484 * is probably better.
485 */
486void dccp_v4_err(struct sk_buff *skb, u32 info)
487{
488 const struct iphdr *iph = (struct iphdr *)skb->data;
489 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
490 (iph->ihl << 2));
491 struct dccp_sock *dp;
492 struct inet_sock *inet;
493 const int type = skb->h.icmph->type;
494 const int code = skb->h.icmph->code;
495 struct sock *sk;
496 __u64 seq;
497 int err;
498
499 if (skb->len < (iph->ihl << 2) + 8) {
500 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
501 return;
502 }
503
504 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
505 iph->saddr, dh->dccph_sport, inet_iif(skb));
506 if (sk == NULL) {
507 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
508 return;
509 }
510
511 if (sk->sk_state == DCCP_TIME_WAIT) {
512 inet_twsk_put((struct inet_timewait_sock *)sk);
513 return;
514 }
515
516 bh_lock_sock(sk);
517 /* If too many ICMPs get dropped on busy
518 * servers this needs to be solved differently.
519 */
520 if (sock_owned_by_user(sk))
521 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
522
523 if (sk->sk_state == DCCP_CLOSED)
524 goto out;
525
526 dp = dccp_sk(sk);
527 seq = dccp_hdr_seq(skb);
528 if (sk->sk_state != DCCP_LISTEN &&
529 !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
530 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
531 goto out;
532 }
533
534 switch (type) {
535 case ICMP_SOURCE_QUENCH:
536 /* Just silently ignore these. */
537 goto out;
538 case ICMP_PARAMETERPROB:
539 err = EPROTO;
540 break;
541 case ICMP_DEST_UNREACH:
542 if (code > NR_ICMP_UNREACH)
543 goto out;
544
545 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
546 if (!sock_owned_by_user(sk))
547 dccp_do_pmtu_discovery(sk, iph, info);
548 goto out;
549 }
550
551 err = icmp_err_convert[code].errno;
552 break;
553 case ICMP_TIME_EXCEEDED:
554 err = EHOSTUNREACH;
555 break;
556 default:
557 goto out;
558 }
559
560 switch (sk->sk_state) {
561 struct request_sock *req , **prev;
562 case DCCP_LISTEN:
563 if (sock_owned_by_user(sk))
564 goto out;
565 req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
566 iph->daddr, iph->saddr);
567 if (!req)
568 goto out;
569
570 /*
571 * ICMPs are not backlogged, hence we cannot get an established
572 * socket here.
573 */
574 BUG_TRAP(!req->sk);
575
576 if (seq != dccp_rsk(req)->dreq_iss) {
577 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
578 goto out;
579 }
580 /*
581 * Still in RESPOND, just remove it silently.
582 * There is no good way to pass the error to the newly
583 * created socket, and POSIX does not want network
584 * errors returned from accept().
585 */
586 inet_csk_reqsk_queue_drop(sk, req, prev);
587 goto out;
588
589 case DCCP_REQUESTING:
590 case DCCP_RESPOND:
591 if (!sock_owned_by_user(sk)) {
592 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
593 sk->sk_err = err;
594
595 sk->sk_error_report(sk);
596
597 dccp_done(sk);
598 } else
599 sk->sk_err_soft = err;
600 goto out;
601 }
602
603 /* If we've already connected we will keep trying
604 * until we time out, or the user gives up.
605 *
606 * rfc1122 4.2.3.9 allows to consider as hard errors
607 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
608 * but it is obsoleted by pmtu discovery).
609 *
610 * Note, that in modern internet, where routing is unreliable
611 * and in each dark corner broken firewalls sit, sending random
612 * errors ordered by their masters even this two messages finally lose
613 * their original sense (even Linux sends invalid PORT_UNREACHs)
614 *
615 * Now we are in compliance with RFCs.
616 * --ANK (980905)
617 */
618
619 inet = inet_sk(sk);
620 if (!sock_owned_by_user(sk) && inet->recverr) {
621 sk->sk_err = err;
622 sk->sk_error_report(sk);
623 } else /* Only an error on timeout */
624 sk->sk_err_soft = err;
625out:
626 bh_unlock_sock(sk);
627 sock_put(sk);
628}
629
630int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
631{
632 struct sk_buff *skb;
633 /*
634 * FIXME: what if rebuild_header fails?
635 * Should we be doing a rebuild_header here?
636 */
637 int err = inet_sk_rebuild_header(sk);
638
639 if (err != 0)
640 return err;
641
642 skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
643 if (skb != NULL) {
644 const struct dccp_sock *dp = dccp_sk(sk);
645 const struct inet_sock *inet = inet_sk(sk);
646
647 err = ip_build_and_send_pkt(skb, sk,
648 inet->saddr, inet->daddr, NULL);
649 if (err == NET_XMIT_CN)
650 err = 0;
651
652 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
653 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
654 }
655
656 return err;
657}
658
659static inline u64 dccp_v4_init_sequence(const struct sock *sk,
660 const struct sk_buff *skb)
661{
662 return secure_dccp_sequence_number(skb->nh.iph->daddr,
663 skb->nh.iph->saddr,
664 dccp_hdr(skb)->dccph_dport,
665 dccp_hdr(skb)->dccph_sport);
666}
667
668int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
669{
670 struct inet_request_sock *ireq;
671 struct dccp_sock dp;
672 struct request_sock *req;
673 struct dccp_request_sock *dreq;
674 const __u32 saddr = skb->nh.iph->saddr;
675 const __u32 daddr = skb->nh.iph->daddr;
676 struct dst_entry *dst = NULL;
677
678 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
679 if (((struct rtable *)skb->dst)->rt_flags &
680 (RTCF_BROADCAST | RTCF_MULTICAST))
681 goto drop;
682
683 /*
684 * TW buckets are converted to open requests without
685 * limitations, they conserve resources and peer is
686 * evidently real one.
687 */
688 if (inet_csk_reqsk_queue_is_full(sk))
689 goto drop;
690
691 /*
692 * Accept backlog is full. If we have already queued enough
693 * of warm entries in syn queue, drop request. It is better than
694 * clogging syn queue with openreqs with exponentially increasing
695 * timeout.
696 */
697 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
698 goto drop;
699
700 req = reqsk_alloc(sk->sk_prot->rsk_prot);
701 if (req == NULL)
702 goto drop;
703
704 /* FIXME: process options */
705
706 dccp_openreq_init(req, &dp, skb);
707
708 ireq = inet_rsk(req);
709 ireq->loc_addr = daddr;
710 ireq->rmt_addr = saddr;
711 /* FIXME: Merge Aristeu's option parsing code when ready */
712 req->rcv_wnd = 100; /* Fake, option parsing will get the
713 right value */
714 ireq->opt = NULL;
715
716 /*
717 * Step 3: Process LISTEN state
718 *
719 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
720 *
721 * In fact we defer setting S.GSR, S.SWL, S.SWH to
722 * dccp_create_openreq_child.
723 */
724 dreq = dccp_rsk(req);
725 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
726 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
727 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
728
729 if (dccp_v4_send_response(sk, req, dst))
730 goto drop_and_free;
731
732 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
733 return 0;
734
735drop_and_free:
736 /*
737 * FIXME: should be reqsk_free after implementing req->rsk_ops
738 */
739 __reqsk_free(req);
740drop:
741 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
742 return -1;
743}
744
745/*
746 * The three way handshake has completed - we got a valid ACK or DATAACK -
747 * now create the new socket.
748 *
749 * This is the equivalent of TCP's tcp_v4_syn_recv_sock
750 */
751struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
752 struct request_sock *req,
753 struct dst_entry *dst)
754{
755 struct inet_request_sock *ireq;
756 struct inet_sock *newinet;
757 struct dccp_sock *newdp;
758 struct sock *newsk;
759
760 if (sk_acceptq_is_full(sk))
761 goto exit_overflow;
762
763 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
764 goto exit;
765
766 newsk = dccp_create_openreq_child(sk, req, skb);
767 if (newsk == NULL)
768 goto exit;
769
770 sk_setup_caps(newsk, dst);
771
772 newdp = dccp_sk(newsk);
773 newinet = inet_sk(newsk);
774 ireq = inet_rsk(req);
775 newinet->daddr = ireq->rmt_addr;
776 newinet->rcv_saddr = ireq->loc_addr;
777 newinet->saddr = ireq->loc_addr;
778 newinet->opt = ireq->opt;
779 ireq->opt = NULL;
780 newinet->mc_index = inet_iif(skb);
781 newinet->mc_ttl = skb->nh.iph->ttl;
782 newinet->id = jiffies;
783
784 dccp_sync_mss(newsk, dst_mtu(dst));
785
786 __inet_hash(&dccp_hashinfo, newsk, 0);
787 __inet_inherit_port(&dccp_hashinfo, sk, newsk);
788
789 return newsk;
790
791exit_overflow:
792 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
793exit:
794 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
795 dst_release(dst);
796 return NULL;
797}
798
799static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
800{
801 const struct dccp_hdr *dh = dccp_hdr(skb);
802 const struct iphdr *iph = skb->nh.iph;
803 struct sock *nsk;
804 struct request_sock **prev;
805 /* Find possible connection requests. */
806 struct request_sock *req = inet_csk_search_req(sk, &prev,
807 dh->dccph_sport,
808 iph->saddr, iph->daddr);
809 if (req != NULL)
810 return dccp_check_req(sk, skb, req, prev);
811
812 nsk = __inet_lookup_established(&dccp_hashinfo,
813 iph->saddr, dh->dccph_sport,
814 iph->daddr, ntohs(dh->dccph_dport),
815 inet_iif(skb));
816 if (nsk != NULL) {
817 if (nsk->sk_state != DCCP_TIME_WAIT) {
818 bh_lock_sock(nsk);
819 return nsk;
820 }
821 inet_twsk_put((struct inet_timewait_sock *)nsk);
822 return NULL;
823 }
824
825 return sk;
826}
827
828int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
829 const u32 daddr)
830{
831 const struct dccp_hdr* dh = dccp_hdr(skb);
832 int checksum_len;
833 u32 tmp;
834
835 if (dh->dccph_cscov == 0)
836 checksum_len = skb->len;
837 else {
838 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
839 checksum_len = checksum_len < skb->len ? checksum_len :
840 skb->len;
841 }
842
843 tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
844 return csum_tcpudp_magic(saddr, daddr, checksum_len,
845 IPPROTO_DCCP, tmp);
846}
847
848static int dccp_v4_verify_checksum(struct sk_buff *skb,
849 const u32 saddr, const u32 daddr)
850{
851 struct dccp_hdr *dh = dccp_hdr(skb);
852 int checksum_len;
853 u32 tmp;
854
855 if (dh->dccph_cscov == 0)
856 checksum_len = skb->len;
857 else {
858 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
859 checksum_len = checksum_len < skb->len ? checksum_len :
860 skb->len;
861 }
862 tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
863 return csum_tcpudp_magic(saddr, daddr, checksum_len,
864 IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
865}
866
867static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
868 struct sk_buff *skb)
869{
870 struct rtable *rt;
871 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
872 .nl_u = { .ip4_u =
873 { .daddr = skb->nh.iph->saddr,
874 .saddr = skb->nh.iph->daddr,
875 .tos = RT_CONN_FLAGS(sk) } },
876 .proto = sk->sk_protocol,
877 .uli_u = { .ports =
878 { .sport = dccp_hdr(skb)->dccph_dport,
879 .dport = dccp_hdr(skb)->dccph_sport }
880 }
881 };
882
883 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
884 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
885 return NULL;
886 }
887
888 return &rt->u.dst;
889}
890
891static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
892{
893 int err;
894 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
895 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
896 sizeof(struct dccp_hdr_ext) +
897 sizeof(struct dccp_hdr_reset);
898 struct sk_buff *skb;
899 struct dst_entry *dst;
900 u64 seqno;
901
902 /* Never send a reset in response to a reset. */
903 if (rxdh->dccph_type == DCCP_PKT_RESET)
904 return;
905
906 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
907 return;
908
909 dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
910 if (dst == NULL)
911 return;
912
913 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
914 if (skb == NULL)
915 goto out;
916
917 /* Reserve space for headers. */
918 skb_reserve(skb, MAX_DCCP_HEADER);
919 skb->dst = dst_clone(dst);
920
921 skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
922 dh = dccp_hdr(skb);
923 memset(dh, 0, dccp_hdr_reset_len);
924
925 /* Build DCCP header and checksum it. */
926 dh->dccph_type = DCCP_PKT_RESET;
927 dh->dccph_sport = rxdh->dccph_dport;
928 dh->dccph_dport = rxdh->dccph_sport;
929 dh->dccph_doff = dccp_hdr_reset_len / 4;
930 dh->dccph_x = 1;
931 dccp_hdr_reset(skb)->dccph_reset_code =
932 DCCP_SKB_CB(rxskb)->dccpd_reset_code;
933
934 /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
935 seqno = 0;
936 if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
937 dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
938
939 dccp_hdr_set_seq(dh, seqno);
940 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
941 DCCP_SKB_CB(rxskb)->dccpd_seq);
942
943 dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
944 rxskb->nh.iph->daddr);
945
946 bh_lock_sock(dccp_ctl_socket->sk);
947 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
948 rxskb->nh.iph->daddr,
949 rxskb->nh.iph->saddr, NULL);
950 bh_unlock_sock(dccp_ctl_socket->sk);
951
952 if (err == NET_XMIT_CN || err == 0) {
953 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
954 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
955 }
956out:
957 dst_release(dst);
958}
959
960int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
961{
962 struct dccp_hdr *dh = dccp_hdr(skb);
963
964 if (sk->sk_state == DCCP_OPEN) { /* Fast path */
965 if (dccp_rcv_established(sk, skb, dh, skb->len))
966 goto reset;
967 return 0;
968 }
969
970 /*
971 * Step 3: Process LISTEN state
972 * If S.state == LISTEN,
973 * If P.type == Request or P contains a valid Init Cookie
974 * option,
975 * * Must scan the packet's options to check for an Init
976 * Cookie. Only the Init Cookie is processed here,
977 * however; other options are processed in Step 8. This
978 * scan need only be performed if the endpoint uses Init
979 * Cookies *
980 * * Generate a new socket and switch to that socket *
981 * Set S := new socket for this port pair
982 * S.state = RESPOND
983 * Choose S.ISS (initial seqno) or set from Init Cookie
984 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
985 * Continue with S.state == RESPOND
986 * * A Response packet will be generated in Step 11 *
987 * Otherwise,
988 * Generate Reset(No Connection) unless P.type == Reset
989 * Drop packet and return
990 *
991 * NOTE: the check for the packet types is done in
992 * dccp_rcv_state_process
993 */
994 if (sk->sk_state == DCCP_LISTEN) {
995 struct sock *nsk = dccp_v4_hnd_req(sk, skb);
996
997 if (nsk == NULL)
998 goto discard;
999
1000 if (nsk != sk) {
1001 if (dccp_child_process(sk, nsk, skb))
1002 goto reset;
1003 return 0;
1004 }
1005 }
1006
1007 if (dccp_rcv_state_process(sk, skb, dh, skb->len))
1008 goto reset;
1009 return 0;
1010
1011reset:
1012 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1013 dccp_v4_ctl_send_reset(skb);
1014discard:
1015 kfree_skb(skb);
1016 return 0;
1017}
1018
1019static inline int dccp_invalid_packet(struct sk_buff *skb)
1020{
1021 const struct dccp_hdr *dh;
1022
1023 if (skb->pkt_type != PACKET_HOST)
1024 return 1;
1025
1026 if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
1027 LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
1028 return 1;
1029 }
1030
1031 dh = dccp_hdr(skb);
1032
1033 /* If the packet type is not understood, drop packet and return */
1034 if (dh->dccph_type >= DCCP_PKT_INVALID) {
1035 LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
1036 return 1;
1037 }
1038
1039 /*
1040 * If P.Data Offset is too small for packet type, or too large for
1041 * packet, drop packet and return
1042 */
1043 if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
1044 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
1045 "too small 1\n",
1046 dh->dccph_doff);
1047 return 1;
1048 }
1049
1050 if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
1051 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
1052 "too small 2\n",
1053 dh->dccph_doff);
1054 return 1;
1055 }
1056
1057 dh = dccp_hdr(skb);
1058
1059 /*
1060 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
1061 * has short sequence numbers), drop packet and return
1062 */
1063 if (dh->dccph_x == 0 &&
1064 dh->dccph_type != DCCP_PKT_DATA &&
1065 dh->dccph_type != DCCP_PKT_ACK &&
1066 dh->dccph_type != DCCP_PKT_DATAACK) {
1067 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
1068 "nor DataAck and P.X == 0\n",
1069 dccp_packet_name(dh->dccph_type));
1070 return 1;
1071 }
1072
1073 /* If the header checksum is incorrect, drop packet and return */
1074 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
1075 skb->nh.iph->daddr) < 0) {
1076 LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
1077 "incorrect\n");
1078 return 1;
1079 }
1080
1081 return 0;
1082}
1083
1084/* this is called when real data arrives */
1085int dccp_v4_rcv(struct sk_buff *skb)
1086{
1087 const struct dccp_hdr *dh;
1088 struct sock *sk;
1089 int rc;
1090
1091 /* Step 1: Check header basics: */
1092
1093 if (dccp_invalid_packet(skb))
1094 goto discard_it;
1095
1096 dh = dccp_hdr(skb);
1097#if 0
1098 /*
1099 * Use something like this to simulate some DATA/DATAACK loss to test
1100 * dccp_ackpkts_add, you'll get something like this on a session that
1101 * sends 10 DATA/DATAACK packets:
1102 *
1103 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
1104 *
1105 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
1106 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
1107 * with the same state
1108 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
1109 *
1110 * So...
1111 *
1112 * 281473596467422 was received
1113 * 281473596467421 was not received
1114 * 281473596467420 was received
1115 * 281473596467419 was not received
1116 * 281473596467418 was received
1117 * 281473596467417 was not received
1118 * 281473596467416 was received
1119 * 281473596467415 was not received
1120 * 281473596467414 was received
1121 * 281473596467413 was received (this one was the 3way handshake
1122 * RESPONSE)
1123 *
1124 */
1125 if (dh->dccph_type == DCCP_PKT_DATA ||
1126 dh->dccph_type == DCCP_PKT_DATAACK) {
1127 static int discard = 0;
1128
1129 if (discard) {
1130 discard = 0;
1131 goto discard_it;
1132 }
1133 discard = 1;
1134 }
1135#endif
1136 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1137 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1138
1139 dccp_pr_debug("%8.8s "
1140 "src=%u.%u.%u.%u@%-5d "
1141 "dst=%u.%u.%u.%u@%-5d seq=%llu",
1142 dccp_packet_name(dh->dccph_type),
1143 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
1144 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
1145 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
1146
1147 if (dccp_packet_without_ack(skb)) {
1148 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
1149 dccp_pr_debug_cat("\n");
1150 } else {
1151 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
1152 dccp_pr_debug_cat(", ack=%llu\n",
1153 (unsigned long long)
1154 DCCP_SKB_CB(skb)->dccpd_ack_seq);
1155 }
1156
1157 /* Step 2:
1158 * Look up flow ID in table and get corresponding socket */
1159 sk = __inet_lookup(&dccp_hashinfo,
1160 skb->nh.iph->saddr, dh->dccph_sport,
1161 skb->nh.iph->daddr, ntohs(dh->dccph_dport),
1162 inet_iif(skb));
1163
1164 /*
1165 * Step 2:
1166 * If no socket ...
1167 * Generate Reset(No Connection) unless P.type == Reset
1168 * Drop packet and return
1169 */
1170 if (sk == NULL) {
1171 dccp_pr_debug("failed to look up flow ID in table and "
1172 "get corresponding socket\n");
1173 goto no_dccp_socket;
1174 }
1175
1176 /*
1177 * Step 2:
1178 * ... or S.state == TIMEWAIT,
1179 * Generate Reset(No Connection) unless P.type == Reset
1180 * Drop packet and return
1181 */
1182
1183 if (sk->sk_state == DCCP_TIME_WAIT) {
1184 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
1185 "do_time_wait\n");
1186 goto do_time_wait;
1187 }
1188
1189 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
1190 dccp_pr_debug("xfrm4_policy_check failed\n");
1191 goto discard_and_relse;
1192 }
1193
1194 if (sk_filter(sk, skb, 0)) {
1195 dccp_pr_debug("sk_filter failed\n");
1196 goto discard_and_relse;
1197 }
1198
1199 skb->dev = NULL;
1200
1201 bh_lock_sock(sk);
1202 rc = 0;
1203 if (!sock_owned_by_user(sk))
1204 rc = dccp_v4_do_rcv(sk, skb);
1205 else
1206 sk_add_backlog(sk, skb);
1207 bh_unlock_sock(sk);
1208
1209 sock_put(sk);
1210 return rc;
1211
1212no_dccp_socket:
1213 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1214 goto discard_it;
1215 /*
1216 * Step 2:
1217 * Generate Reset(No Connection) unless P.type == Reset
1218 * Drop packet and return
1219 */
1220 if (dh->dccph_type != DCCP_PKT_RESET) {
1221 DCCP_SKB_CB(skb)->dccpd_reset_code =
1222 DCCP_RESET_CODE_NO_CONNECTION;
1223 dccp_v4_ctl_send_reset(skb);
1224 }
1225
1226discard_it:
1227 /* Discard frame. */
1228 kfree_skb(skb);
1229 return 0;
1230
1231discard_and_relse:
1232 sock_put(sk);
1233 goto discard_it;
1234
1235do_time_wait:
1236 inet_twsk_put((struct inet_timewait_sock *)sk);
1237 goto no_dccp_socket;
1238}
1239
1240static int dccp_v4_init_sock(struct sock *sk)
1241{
1242 struct dccp_sock *dp = dccp_sk(sk);
1243 static int dccp_ctl_socket_init = 1;
1244
1245 dccp_options_init(&dp->dccps_options);
1246
1247 if (dp->dccps_options.dccpo_send_ack_vector) {
1248 dp->dccps_hc_rx_ackpkts =
1249 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
1250 GFP_KERNEL);
1251
1252 if (dp->dccps_hc_rx_ackpkts == NULL)
1253 return -ENOMEM;
1254 }
1255
1256 /*
1257 * FIXME: We're hardcoding the CCID, and doing this at this point makes
1258 * the listening (master) sock get CCID control blocks, which is not
1259 * necessary, but for now, to not mess with the test userspace apps,
1260 * lets leave it here, later the real solution is to do this in a
1261 * setsockopt(CCIDs-I-want/accept). -acme
1262 */
1263 if (likely(!dccp_ctl_socket_init)) {
1264 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
1265 sk);
1266 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
1267 sk);
1268 if (dp->dccps_hc_rx_ccid == NULL ||
1269 dp->dccps_hc_tx_ccid == NULL) {
1270 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1271 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1272 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1273 dp->dccps_hc_rx_ackpkts = NULL;
1274 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1275 return -ENOMEM;
1276 }
1277 } else
1278 dccp_ctl_socket_init = 0;
1279
1280 dccp_init_xmit_timers(sk);
1281 inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
1282 sk->sk_state = DCCP_CLOSED;
1283 sk->sk_write_space = dccp_write_space;
1284 dp->dccps_mss_cache = 536;
1285 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1286
1287 return 0;
1288}
1289
1290static int dccp_v4_destroy_sock(struct sock *sk)
1291{
1292 struct dccp_sock *dp = dccp_sk(sk);
1293
1294 /*
1295 * DCCP doesn't use sk_qrite_queue, just sk_send_head
1296 * for retransmissions
1297 */
1298 if (sk->sk_send_head != NULL) {
1299 kfree_skb(sk->sk_send_head);
1300 sk->sk_send_head = NULL;
1301 }
1302
1303 /* Clean up a referenced DCCP bind bucket. */
1304 if (inet_csk(sk)->icsk_bind_hash != NULL)
1305 inet_put_port(&dccp_hashinfo, sk);
1306
1307 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
1308 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
1309 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1310 dp->dccps_hc_rx_ackpkts = NULL;
1311 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1312 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1313 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1314
1315 return 0;
1316}
1317
1318static void dccp_v4_reqsk_destructor(struct request_sock *req)
1319{
1320 kfree(inet_rsk(req)->opt);
1321}
1322
1323static struct request_sock_ops dccp_request_sock_ops = {
1324 .family = PF_INET,
1325 .obj_size = sizeof(struct dccp_request_sock),
1326 .rtx_syn_ack = dccp_v4_send_response,
1327 .send_ack = dccp_v4_reqsk_send_ack,
1328 .destructor = dccp_v4_reqsk_destructor,
1329 .send_reset = dccp_v4_ctl_send_reset,
1330};
1331
1332struct proto dccp_v4_prot = {
1333 .name = "DCCP",
1334 .owner = THIS_MODULE,
1335 .close = dccp_close,
1336 .connect = dccp_v4_connect,
1337 .disconnect = dccp_disconnect,
1338 .ioctl = dccp_ioctl,
1339 .init = dccp_v4_init_sock,
1340 .setsockopt = dccp_setsockopt,
1341 .getsockopt = dccp_getsockopt,
1342 .sendmsg = dccp_sendmsg,
1343 .recvmsg = dccp_recvmsg,
1344 .backlog_rcv = dccp_v4_do_rcv,
1345 .hash = dccp_v4_hash,
1346 .unhash = dccp_v4_unhash,
1347 .accept = inet_csk_accept,
1348 .get_port = dccp_v4_get_port,
1349 .shutdown = dccp_shutdown,
1350 .destroy = dccp_v4_destroy_sock,
1351 .orphan_count = &dccp_orphan_count,
1352 .max_header = MAX_DCCP_HEADER,
1353 .obj_size = sizeof(struct dccp_sock),
1354 .rsk_prot = &dccp_request_sock_ops,
1355 .twsk_obj_size = sizeof(struct inet_timewait_sock),
1356};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644
index 000000000000..ce5dff4ac22e
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,264 @@
1/*
2 * net/dccp/minisocks.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16#include <linux/timer.h>
17
18#include <net/sock.h>
19#include <net/xfrm.h>
20#include <net/inet_timewait_sock.h>
21
22#include "ccid.h"
23#include "dccp.h"
24
25struct inet_timewait_death_row dccp_death_row = {
26 .sysctl_max_tw_buckets = NR_FILE * 2,
27 .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
28 .death_lock = SPIN_LOCK_UNLOCKED,
29 .hashinfo = &dccp_hashinfo,
30 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
31 (unsigned long)&dccp_death_row),
32 .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
33 inet_twdr_twkill_work,
34 &dccp_death_row),
35/* Short-time timewait calendar */
36
37 .twcal_hand = -1,
38 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
39 (unsigned long)&dccp_death_row),
40};
41
42void dccp_time_wait(struct sock *sk, int state, int timeo)
43{
44 struct inet_timewait_sock *tw = NULL;
45
46 if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
47 tw = inet_twsk_alloc(sk, state);
48
49 if (tw != NULL) {
50 const struct inet_connection_sock *icsk = inet_csk(sk);
51 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
52
53 /* Linkage updates. */
54 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
55
56 /* Get the TIME_WAIT timeout firing. */
57 if (timeo < rto)
58 timeo = rto;
59
60 tw->tw_timeout = DCCP_TIMEWAIT_LEN;
61 if (state == DCCP_TIME_WAIT)
62 timeo = DCCP_TIMEWAIT_LEN;
63
64 inet_twsk_schedule(tw, &dccp_death_row, timeo,
65 DCCP_TIMEWAIT_LEN);
66 inet_twsk_put(tw);
67 } else {
68 /* Sorry, if we're out of memory, just CLOSE this
69 * socket up. We've got bigger problems than
70 * non-graceful socket closings.
71 */
72 LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
73 "table overflow\n");
74 }
75
76 dccp_done(sk);
77}
78
79struct sock *dccp_create_openreq_child(struct sock *sk,
80 const struct request_sock *req,
81 const struct sk_buff *skb)
82{
83 /*
84 * Step 3: Process LISTEN state
85 *
86 * // Generate a new socket and switch to that socket
87 * Set S := new socket for this port pair
88 */
89 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
90
91 if (newsk != NULL) {
92 const struct dccp_request_sock *dreq = dccp_rsk(req);
93 struct inet_connection_sock *newicsk = inet_csk(sk);
94 struct dccp_sock *newdp = dccp_sk(newsk);
95
96 newdp->dccps_hc_rx_ackpkts = NULL;
97 newdp->dccps_role = DCCP_ROLE_SERVER;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99
100 if (newdp->dccps_options.dccpo_send_ack_vector) {
101 newdp->dccps_hc_rx_ackpkts =
102 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
103 GFP_ATOMIC);
104 /*
105 * XXX: We're using the same CCIDs set on the parent,
106 * i.e. sk_clone copied the master sock and left the
107 * CCID pointers for this child, that is why we do the
108 * __ccid_get calls.
109 */
110 if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
111 goto out_free;
112 }
113
114 if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
115 newsk) != 0 ||
116 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
117 newsk) != 0)) {
118 dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
119 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
120 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
121out_free:
122 /* It is still raw copy of parent, so invalidate
123 * destructor and make plain sk_free() */
124 newsk->sk_destruct = NULL;
125 sk_free(newsk);
126 return NULL;
127 }
128
129 __ccid_get(newdp->dccps_hc_rx_ccid);
130 __ccid_get(newdp->dccps_hc_tx_ccid);
131
132 /*
133 * Step 3: Process LISTEN state
134 *
135 * Choose S.ISS (initial seqno) or set from Init Cookie
136 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
137 * Cookie
138 */
139
140 /* See dccp_v4_conn_request */
141 newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
142
143 newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
144 dccp_update_gsr(newsk, dreq->dreq_isr);
145
146 newdp->dccps_iss = dreq->dreq_iss;
147 dccp_update_gss(newsk, dreq->dreq_iss);
148
149 /*
150 * SWL and AWL are initially adjusted so that they are not less than
151 * the initial Sequence Numbers received and sent, respectively:
152 * SWL := max(GSR + 1 - floor(W/4), ISR),
153 * AWL := max(GSS - W' + 1, ISS).
154 * These adjustments MUST be applied only at the beginning of the
155 * connection.
156 */
157 dccp_set_seqno(&newdp->dccps_swl,
158 max48(newdp->dccps_swl, newdp->dccps_isr));
159 dccp_set_seqno(&newdp->dccps_awl,
160 max48(newdp->dccps_awl, newdp->dccps_iss));
161
162 dccp_init_xmit_timers(newsk);
163
164 DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
165 }
166 return newsk;
167}
168
169/*
170 * Process an incoming packet for RESPOND sockets represented
171 * as an request_sock.
172 */
173struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
174 struct request_sock *req,
175 struct request_sock **prev)
176{
177 struct sock *child = NULL;
178
179 /* Check for retransmitted REQUEST */
180 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
181 if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
182 dccp_rsk(req)->dreq_isr)) {
183 struct dccp_request_sock *dreq = dccp_rsk(req);
184
185 dccp_pr_debug("Retransmitted REQUEST\n");
186 /* Send another RESPONSE packet */
187 dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
188 dccp_set_seqno(&dreq->dreq_isr,
189 DCCP_SKB_CB(skb)->dccpd_seq);
190 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
191 }
192 /* Network Duplicate, discard packet */
193 return NULL;
194 }
195
196 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
197
198 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
199 dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
200 goto drop;
201
202 /* Invalid ACK */
203 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
204 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
205 "dreq_iss=%llu\n",
206 (unsigned long long)
207 DCCP_SKB_CB(skb)->dccpd_ack_seq,
208 (unsigned long long)
209 dccp_rsk(req)->dreq_iss);
210 goto drop;
211 }
212
213 child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
214 if (child == NULL)
215 goto listen_overflow;
216
217 /* FIXME: deal with options */
218
219 inet_csk_reqsk_queue_unlink(sk, req, prev);
220 inet_csk_reqsk_queue_removed(sk, req);
221 inet_csk_reqsk_queue_add(sk, req, child);
222out:
223 return child;
224listen_overflow:
225 dccp_pr_debug("listen_overflow!\n");
226 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
227drop:
228 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
229 req->rsk_ops->send_reset(skb);
230
231 inet_csk_reqsk_queue_drop(sk, req, prev);
232 goto out;
233}
234
235/*
236 * Queue segment on the new socket if the new socket is active,
237 * otherwise we just shortcircuit this and continue with
238 * the new socket.
239 */
240int dccp_child_process(struct sock *parent, struct sock *child,
241 struct sk_buff *skb)
242{
243 int ret = 0;
244 const int state = child->sk_state;
245
246 if (!sock_owned_by_user(child)) {
247 ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
248 skb->len);
249
250 /* Wakeup parent, send SIGIO */
251 if (state == DCCP_RESPOND && child->sk_state != state)
252 parent->sk_data_ready(parent, 0);
253 } else {
254 /* Alas, it is possible again, because we do lookup
255 * in main socket hash table and lock on listening
256 * socket does not protect us more.
257 */
258 sk_add_backlog(child, skb);
259 }
260
261 bh_unlock_sock(child);
262 sock_put(child);
263 return ret;
264}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644
index 000000000000..382c5894acb2
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,855 @@
1/*
2 * net/dccp/options.c
3 *
4 * An implementation of the DCCP protocol
5 * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14#include <linux/config.h>
15#include <linux/dccp.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/skbuff.h>
20
21#include "ccid.h"
22#include "dccp.h"
23
24static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
25 struct sock *sk,
26 const u64 ackno,
27 const unsigned char len,
28 const unsigned char *vector);
29
30/* stores the default values for new connection. may be changed with sysctl */
31static const struct dccp_options dccpo_default_values = {
32 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
33 .dccpo_ccid = DCCPF_INITIAL_CCID,
34 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
35 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
36};
37
38void dccp_options_init(struct dccp_options *dccpo)
39{
40 memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
41}
42
43static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
44{
45 u32 value = 0;
46
47 if (len > 3)
48 value += *bf++ << 24;
49 if (len > 2)
50 value += *bf++ << 16;
51 if (len > 1)
52 value += *bf++ << 8;
53 if (len > 0)
54 value += *bf;
55
56 return value;
57}
58
59int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
60{
61 struct dccp_sock *dp = dccp_sk(sk);
62#ifdef CONFIG_IP_DCCP_DEBUG
63 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
64 "CLIENT rx opt: " : "server rx opt: ";
65#endif
66 const struct dccp_hdr *dh = dccp_hdr(skb);
67 const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
68 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
69 unsigned char *opt_ptr = options;
70 const unsigned char *opt_end = (unsigned char *)dh +
71 (dh->dccph_doff * 4);
72 struct dccp_options_received *opt_recv = &dp->dccps_options_received;
73 unsigned char opt, len;
74 unsigned char *value;
75
76 memset(opt_recv, 0, sizeof(*opt_recv));
77
78 while (opt_ptr != opt_end) {
79 opt = *opt_ptr++;
80 len = 0;
81 value = NULL;
82
83 /* Check if this isn't a single byte option */
84 if (opt > DCCPO_MAX_RESERVED) {
85 if (opt_ptr == opt_end)
86 goto out_invalid_option;
87
88 len = *opt_ptr++;
89 if (len < 3)
90 goto out_invalid_option;
91 /*
92 * Remove the type and len fields, leaving
93 * just the value size
94 */
95 len -= 2;
96 value = opt_ptr;
97 opt_ptr += len;
98
99 if (opt_ptr > opt_end)
100 goto out_invalid_option;
101 }
102
103 switch (opt) {
104 case DCCPO_PADDING:
105 break;
106 case DCCPO_NDP_COUNT:
107 if (len > 3)
108 goto out_invalid_option;
109
110 opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
111 dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
112 opt_recv->dccpor_ndp);
113 break;
114 case DCCPO_ACK_VECTOR_0:
115 if (len > DCCP_MAX_ACK_VECTOR_LEN)
116 goto out_invalid_option;
117
118 if (pkt_type == DCCP_PKT_DATA)
119 continue;
120
121 opt_recv->dccpor_ack_vector_len = len;
122 opt_recv->dccpor_ack_vector_idx = value - options;
123
124 dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
125 debug_prefix, len,
126 (unsigned long long)
127 DCCP_SKB_CB(skb)->dccpd_ack_seq);
128 dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
129 value, len);
130 dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
131 sk,
132 DCCP_SKB_CB(skb)->dccpd_ack_seq,
133 len, value);
134 break;
135 case DCCPO_TIMESTAMP:
136 if (len != 4)
137 goto out_invalid_option;
138
139 opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
140
141 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
142 do_gettimeofday(&dp->dccps_timestamp_time);
143
144 dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
145 debug_prefix, opt_recv->dccpor_timestamp,
146 (unsigned long long)
147 DCCP_SKB_CB(skb)->dccpd_ack_seq);
148 break;
149 case DCCPO_TIMESTAMP_ECHO:
150 if (len != 4 && len != 6 && len != 8)
151 goto out_invalid_option;
152
153 opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
154
155 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
156 debug_prefix,
157 opt_recv->dccpor_timestamp_echo,
158 len + 2,
159 (unsigned long long)
160 DCCP_SKB_CB(skb)->dccpd_ack_seq);
161
162 if (len > 4) {
163 if (len == 6)
164 opt_recv->dccpor_elapsed_time =
165 ntohs(*(u16 *)(value + 4));
166 else
167 opt_recv->dccpor_elapsed_time =
168 ntohl(*(u32 *)(value + 4));
169
170 dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n",
171 debug_prefix,
172 opt_recv->dccpor_elapsed_time);
173 }
174 break;
175 case DCCPO_ELAPSED_TIME:
176 if (len != 2 && len != 4)
177 goto out_invalid_option;
178
179 if (pkt_type == DCCP_PKT_DATA)
180 continue;
181
182 if (len == 2)
183 opt_recv->dccpor_elapsed_time =
184 ntohs(*(u16 *)value);
185 else
186 opt_recv->dccpor_elapsed_time =
187 ntohl(*(u32 *)value);
188
189 dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
190 opt_recv->dccpor_elapsed_time);
191 break;
192 /*
193 * From draft-ietf-dccp-spec-11.txt:
194 *
195 * Option numbers 128 through 191 are for
196 * options sent from the HC-Sender to the
197 * HC-Receiver; option numbers 192 through 255
198 * are for options sent from the HC-Receiver to
199 * the HC-Sender.
200 */
201 case 128 ... 191: {
202 const u16 idx = value - options;
203
204 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
205 opt, len, idx,
206 value) != 0)
207 goto out_invalid_option;
208 }
209 break;
210 case 192 ... 255: {
211 const u16 idx = value - options;
212
213 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
214 opt, len, idx,
215 value) != 0)
216 goto out_invalid_option;
217 }
218 break;
219 default:
220 pr_info("DCCP(%p): option %d(len=%d) not "
221 "implemented, ignoring\n",
222 sk, opt, len);
223 break;
224 }
225 }
226
227 return 0;
228
229out_invalid_option:
230 DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
231 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
232 pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
233 return -1;
234}
235
236static void dccp_encode_value_var(const u32 value, unsigned char *to,
237 const unsigned int len)
238{
239 if (len > 3)
240 *to++ = (value & 0xFF000000) >> 24;
241 if (len > 2)
242 *to++ = (value & 0xFF0000) >> 16;
243 if (len > 1)
244 *to++ = (value & 0xFF00) >> 8;
245 if (len > 0)
246 *to++ = (value & 0xFF);
247}
248
249static inline int dccp_ndp_len(const int ndp)
250{
251 return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
252}
253
254void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
255 const unsigned char option,
256 const void *value, const unsigned char len)
257{
258 unsigned char *to;
259
260 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
261 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
262 "%d option!\n", option);
263 return;
264 }
265
266 DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
267
268 to = skb_push(skb, len + 2);
269 *to++ = option;
270 *to++ = len + 2;
271
272 memcpy(to, value, len);
273}
274
275EXPORT_SYMBOL_GPL(dccp_insert_option);
276
277static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
278{
279 struct dccp_sock *dp = dccp_sk(sk);
280 int ndp = dp->dccps_ndp_count;
281
282 if (dccp_non_data_packet(skb))
283 ++dp->dccps_ndp_count;
284 else
285 dp->dccps_ndp_count = 0;
286
287 if (ndp > 0) {
288 unsigned char *ptr;
289 const int ndp_len = dccp_ndp_len(ndp);
290 const int len = ndp_len + 2;
291
292 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
293 return;
294
295 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
296
297 ptr = skb_push(skb, len);
298 *ptr++ = DCCPO_NDP_COUNT;
299 *ptr++ = len;
300 dccp_encode_value_var(ndp, ptr, ndp_len);
301 }
302}
303
304static inline int dccp_elapsed_time_len(const u32 elapsed_time)
305{
306 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
307}
308
309void dccp_insert_option_elapsed_time(struct sock *sk,
310 struct sk_buff *skb,
311 u32 elapsed_time)
312{
313#ifdef CONFIG_IP_DCCP_DEBUG
314 struct dccp_sock *dp = dccp_sk(sk);
315 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
316 "CLIENT TX opt: " : "server TX opt: ";
317#endif
318 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
319 const int len = 2 + elapsed_time_len;
320 unsigned char *to;
321
322 if (elapsed_time_len == 0)
323 return;
324
325 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
326 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
327 "insert elapsed time!\n");
328 return;
329 }
330
331 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
332
333 to = skb_push(skb, len);
334 *to++ = DCCPO_ELAPSED_TIME;
335 *to++ = len;
336
337 if (elapsed_time_len == 2) {
338 const u16 var16 = htons((u16)elapsed_time);
339 memcpy(to, &var16, 2);
340 } else {
341 const u32 var32 = htonl(elapsed_time);
342 memcpy(to, &var32, 4);
343 }
344
345 dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
346 debug_prefix, elapsed_time,
347 len,
348 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
349}
350
351EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
352
353static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
354{
355 struct dccp_sock *dp = dccp_sk(sk);
356#ifdef CONFIG_IP_DCCP_DEBUG
357 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
358 "CLIENT TX opt: " : "server TX opt: ";
359#endif
360 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
361 int len = ap->dccpap_buf_vector_len + 2;
362 const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10;
363 unsigned char *to, *from;
364
365 if (elapsed_time != 0)
366 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
367
368 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
369 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
370 "insert ACK Vector!\n");
371 return;
372 }
373
374 /*
375 * XXX: now we have just one ack vector sent record, so
376 * we have to wait for it to be cleared.
377 *
378 * Of course this is not acceptable, but this is just for
379 * basic testing now.
380 */
381 if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
382 return;
383
384 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
385
386 to = skb_push(skb, len);
387 *to++ = DCCPO_ACK_VECTOR_0;
388 *to++ = len;
389
390 len = ap->dccpap_buf_vector_len;
391 from = ap->dccpap_buf + ap->dccpap_buf_head;
392
393 /* Check if buf_head wraps */
394 if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
395 const unsigned int tailsize = (ap->dccpap_buf_len -
396 ap->dccpap_buf_head);
397
398 memcpy(to, from, tailsize);
399 to += tailsize;
400 len -= tailsize;
401 from = ap->dccpap_buf;
402 }
403
404 memcpy(to, from, len);
405 /*
406 * From draft-ietf-dccp-spec-11.txt:
407 *
408 * For each acknowledgement it sends, the HC-Receiver will add an
409 * acknowledgement record. ack_seqno will equal the HC-Receiver
410 * sequence number it used for the ack packet; ack_ptr will equal
411 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
412 * equal buf_nonce.
413 *
414 * This implemention uses just one ack record for now.
415 */
416 ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
417 ap->dccpap_ack_ptr = ap->dccpap_buf_head;
418 ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
419 ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
420 ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
421
422 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
423 "ack_ackno=%llu\n",
424 debug_prefix, ap->dccpap_ack_vector_len,
425 (unsigned long long) ap->dccpap_ack_seqno,
426 (unsigned long long) ap->dccpap_ack_ackno);
427}
428
429void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
430{
431 struct timeval tv;
432 u32 now;
433
434 do_gettimeofday(&tv);
435 now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10;
436 /* yes this will overflow but that is the point as we want a
437 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
438
439 now = htonl(now);
440 dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
441}
442
443EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
444
445static void dccp_insert_option_timestamp_echo(struct sock *sk,
446 struct sk_buff *skb)
447{
448 struct dccp_sock *dp = dccp_sk(sk);
449#ifdef CONFIG_IP_DCCP_DEBUG
450 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
451 "CLIENT TX opt: " : "server TX opt: ";
452#endif
453 u32 tstamp_echo;
454 const u32 elapsed_time =
455 timeval_now_delta(&dp->dccps_timestamp_time) / 10;
456 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
457 const int len = 6 + elapsed_time_len;
458 unsigned char *to;
459
460 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
461 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
462 "timestamp echo!\n");
463 return;
464 }
465
466 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
467
468 to = skb_push(skb, len);
469 *to++ = DCCPO_TIMESTAMP_ECHO;
470 *to++ = len;
471
472 tstamp_echo = htonl(dp->dccps_timestamp_echo);
473 memcpy(to, &tstamp_echo, 4);
474 to += 4;
475
476 if (elapsed_time_len == 2) {
477 const u16 var16 = htons((u16)elapsed_time);
478 memcpy(to, &var16, 2);
479 } else if (elapsed_time_len == 4) {
480 const u32 var32 = htonl(elapsed_time);
481 memcpy(to, &var32, 4);
482 }
483
484 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
485 debug_prefix, dp->dccps_timestamp_echo,
486 len,
487 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
488
489 dp->dccps_timestamp_echo = 0;
490 dp->dccps_timestamp_time.tv_sec = 0;
491 dp->dccps_timestamp_time.tv_usec = 0;
492}
493
494void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
495{
496 struct dccp_sock *dp = dccp_sk(sk);
497
498 DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
499
500 if (dp->dccps_options.dccpo_send_ndp_count)
501 dccp_insert_option_ndp(sk, skb);
502
503 if (!dccp_packet_without_ack(skb)) {
504 if (dp->dccps_options.dccpo_send_ack_vector &&
505 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
506 DCCP_MAX_SEQNO + 1))
507 dccp_insert_option_ack_vector(sk, skb);
508
509 if (dp->dccps_timestamp_echo != 0)
510 dccp_insert_option_timestamp_echo(sk, skb);
511 }
512
513 ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
514 ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
515
516 /* XXX: insert other options when appropriate */
517
518 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
519 /* The length of all options has to be a multiple of 4 */
520 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
521
522 if (padding != 0) {
523 padding = 4 - padding;
524 memset(skb_push(skb, padding), 0, padding);
525 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
526 }
527 }
528}
529
530struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
531 const unsigned int __nocast priority)
532{
533 struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
534
535 if (ap != NULL) {
536#ifdef CONFIG_IP_DCCP_DEBUG
537 memset(ap->dccpap_buf, 0xFF, len);
538#endif
539 ap->dccpap_buf_len = len;
540 ap->dccpap_buf_head =
541 ap->dccpap_buf_tail =
542 ap->dccpap_buf_len - 1;
543 ap->dccpap_buf_ackno =
544 ap->dccpap_ack_ackno =
545 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
546 ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
547 ap->dccpap_ack_ptr = 0;
548 ap->dccpap_time.tv_sec = 0;
549 ap->dccpap_time.tv_usec = 0;
550 ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
551 }
552
553 return ap;
554}
555
556void dccp_ackpkts_free(struct dccp_ackpkts *ap)
557{
558 if (ap != NULL) {
559#ifdef CONFIG_IP_DCCP_DEBUG
560 memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
561#endif
562 kfree(ap);
563 }
564}
565
566static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
567 const unsigned int index)
568{
569 return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
570}
571
572static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
573 const unsigned int index)
574{
575 return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
576}
577
578/*
579 * If several packets are missing, the HC-Receiver may prefer to enter multiple
580 * bytes with run length 0, rather than a single byte with a larger run length;
581 * this simplifies table updates if one of the missing packets arrives.
582 */
583static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
584 const unsigned int packets,
585 const unsigned char state)
586{
587 unsigned int gap;
588 signed long new_head;
589
590 if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
591 return -ENOBUFS;
592
593 gap = packets - 1;
594 new_head = ap->dccpap_buf_head - packets;
595
596 if (new_head < 0) {
597 if (gap > 0) {
598 memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
599 gap + new_head + 1);
600 gap = -new_head;
601 }
602 new_head += ap->dccpap_buf_len;
603 }
604
605 ap->dccpap_buf_head = new_head;
606
607 if (gap > 0)
608 memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
609 DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
610
611 ap->dccpap_buf[ap->dccpap_buf_head] = state;
612 ap->dccpap_buf_vector_len += packets;
613 return 0;
614}
615
616/*
617 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
618 */
619int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
620{
621 /*
622 * Check at the right places if the buffer is full, if it is, tell the
623 * caller to start dropping packets till the HC-Sender acks our ACK
624 * vectors, when we will free up space in dccpap_buf.
625 *
626 * We may well decide to do buffer compression, etc, but for now lets
627 * just drop.
628 *
629 * From Appendix A:
630 *
631 * Of course, the circular buffer may overflow, either when the
632 * HC-Sender is sending data at a very high rate, when the
633 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
634 * or when the HC-Sender is forgetting to acknowledge those acks
635 * (so the HC-Receiver is unable to clean up old state). In this
636 * case, the HC-Receiver should either compress the buffer (by
637 * increasing run lengths when possible), transfer its state to
638 * a larger buffer, or, as a last resort, drop all received
639 * packets, without processing them whatsoever, until its buffer
640 * shrinks again.
641 */
642
643 /* See if this is the first ackno being inserted */
644 if (ap->dccpap_buf_vector_len == 0) {
645 ap->dccpap_buf[ap->dccpap_buf_head] = state;
646 ap->dccpap_buf_vector_len = 1;
647 } else if (after48(ackno, ap->dccpap_buf_ackno)) {
648 const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
649 ackno);
650
651 /*
652 * Look if the state of this packet is the same as the
653 * previous ackno and if so if we can bump the head len.
654 */
655 if (delta == 1 &&
656 dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
657 (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
658 DCCP_ACKPKTS_LEN_MASK))
659 ap->dccpap_buf[ap->dccpap_buf_head]++;
660 else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
661 return -ENOBUFS;
662 } else {
663 /*
664 * A.1.2. Old Packets
665 *
666 * When a packet with Sequence Number S arrives, and
667 * S <= buf_ackno, the HC-Receiver will scan the table
668 * for the byte corresponding to S. (Indexing structures
669 * could reduce the complexity of this scan.)
670 */
671 u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
672 unsigned int index = ap->dccpap_buf_head;
673
674 while (1) {
675 const u8 len = dccp_ackpkts_len(ap, index);
676 const u8 state = dccp_ackpkts_state(ap, index);
677 /*
678 * valid packets not yet in dccpap_buf have a reserved
679 * entry, with a len equal to 0.
680 */
681 if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
682 len == 0 && delta == 0) { /* Found our
683 reserved seat! */
684 dccp_pr_debug("Found %llu reserved seat!\n",
685 (unsigned long long) ackno);
686 ap->dccpap_buf[index] = state;
687 goto out;
688 }
689 /* len == 0 means one packet */
690 if (delta < len + 1)
691 goto out_duplicate;
692
693 delta -= len + 1;
694 if (++index == ap->dccpap_buf_len)
695 index = 0;
696 }
697 }
698
699 ap->dccpap_buf_ackno = ackno;
700 do_gettimeofday(&ap->dccpap_time);
701out:
702 dccp_pr_debug("");
703 dccp_ackpkts_print(ap);
704 return 0;
705
706out_duplicate:
707 /* Duplicate packet */
708 dccp_pr_debug("Received a dup or already considered lost "
709 "packet: %llu\n", (unsigned long long) ackno);
710 return -EILSEQ;
711}
712
713#ifdef CONFIG_IP_DCCP_DEBUG
714void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
715 int len)
716{
717 if (!dccp_debug)
718 return;
719
720 printk("ACK vector len=%d, ackno=%llu |", len,
721 (unsigned long long) ackno);
722
723 while (len--) {
724 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
725 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
726
727 printk("%d,%d|", state, rl);
728 ++vector;
729 }
730
731 printk("\n");
732}
733
734void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
735{
736 dccp_ackvector_print(ap->dccpap_buf_ackno,
737 ap->dccpap_buf + ap->dccpap_buf_head,
738 ap->dccpap_buf_vector_len);
739}
740#endif
741
742static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
743{
744 /*
745 * As we're keeping track of the ack vector size
746 * (dccpap_buf_vector_len) and the sent ack vector size
747 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
748 * keep this code here as in the future we'll implement a vector of
749 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
750 * Appendix A. -acme
751 */
752#if 0
753 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
754 if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
755 ap->dccpap_buf_tail -= ap->dccpap_buf_len;
756#endif
757 ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
758}
759
760void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
761 u64 ackno)
762{
763 /* Check if we actually sent an ACK vector */
764 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
765 return;
766
767 if (ackno == ap->dccpap_ack_seqno) {
768#ifdef CONFIG_IP_DCCP_DEBUG
769 struct dccp_sock *dp = dccp_sk(sk);
770 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
771 "CLIENT rx ack: " : "server rx ack: ";
772#endif
773 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
774 "ack_ackno=%llu, ACKED!\n",
775 debug_prefix, 1,
776 (unsigned long long) ap->dccpap_ack_seqno,
777 (unsigned long long) ap->dccpap_ack_ackno);
778 dccp_ackpkts_trow_away_ack_record(ap);
779 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
780 }
781}
782
783static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
784 struct sock *sk, u64 ackno,
785 const unsigned char len,
786 const unsigned char *vector)
787{
788 unsigned char i;
789
790 /* Check if we actually sent an ACK vector */
791 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
792 return;
793 /*
794 * We're in the receiver half connection, so if the received an ACK
795 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
796 * not interested.
797 *
798 * Extra explanation with example:
799 *
800 * if we received an ACK vector with ackno 50, it can only be acking
801 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
802 */
803 /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
804 if (before48(ackno, ap->dccpap_ack_seqno)) {
805 /* dccp_pr_debug_cat("yes\n"); */
806 return;
807 }
808 /* dccp_pr_debug_cat("no\n"); */
809
810 i = len;
811 while (i--) {
812 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
813 u64 ackno_end_rl;
814
815 dccp_set_seqno(&ackno_end_rl, ackno - rl);
816
817 /*
818 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
819 * ap->dccpap_ack_seqno, ackno);
820 */
821 if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
822 const u8 state = (*vector &
823 DCCP_ACKPKTS_STATE_MASK) >> 6;
824 /* dccp_pr_debug_cat("yes\n"); */
825
826 if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
827#ifdef CONFIG_IP_DCCP_DEBUG
828 struct dccp_sock *dp = dccp_sk(sk);
829 const char *debug_prefix =
830 dp->dccps_role == DCCP_ROLE_CLIENT ?
831 "CLIENT rx ack: " : "server rx ack: ";
832#endif
833 dccp_pr_debug("%sACK vector 0, len=%d, "
834 "ack_seqno=%llu, ack_ackno=%llu, "
835 "ACKED!\n",
836 debug_prefix, len,
837 (unsigned long long)
838 ap->dccpap_ack_seqno,
839 (unsigned long long)
840 ap->dccpap_ack_ackno);
841 dccp_ackpkts_trow_away_ack_record(ap);
842 }
843 /*
844 * If dccpap_ack_seqno was not received, no problem
845 * we'll send another ACK vector.
846 */
847 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
848 break;
849 }
850 /* dccp_pr_debug_cat("no\n"); */
851
852 dccp_set_seqno(&ackno, ackno_end_rl - 1);
853 ++vector;
854 }
855}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644
index 000000000000..28de157a4326
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,528 @@
1/*
2 * net/dccp/output.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static inline void dccp_event_ack_sent(struct sock *sk)
23{
24 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
25}
26
27/*
28 * All SKB's seen here are completely headerless. It is our
29 * job to build the DCCP header, and pass the packet down to
30 * IP so it can do the same plus pass the packet off to the
31 * device.
32 */
33int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
34{
35 if (likely(skb != NULL)) {
36 const struct inet_sock *inet = inet_sk(sk);
37 struct dccp_sock *dp = dccp_sk(sk);
38 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
39 struct dccp_hdr *dh;
40 /* XXX For now we're using only 48 bits sequence numbers */
41 const int dccp_header_size = sizeof(*dh) +
42 sizeof(struct dccp_hdr_ext) +
43 dccp_packet_hdr_len(dcb->dccpd_type);
44 int err, set_ack = 1;
45 u64 ackno = dp->dccps_gsr;
46
47 dccp_inc_seqno(&dp->dccps_gss);
48
49 switch (dcb->dccpd_type) {
50 case DCCP_PKT_DATA:
51 set_ack = 0;
52 break;
53 case DCCP_PKT_SYNC:
54 case DCCP_PKT_SYNCACK:
55 ackno = dcb->dccpd_seq;
56 break;
57 }
58
59 dcb->dccpd_seq = dp->dccps_gss;
60 dccp_insert_options(sk, skb);
61
62 skb->h.raw = skb_push(skb, dccp_header_size);
63 dh = dccp_hdr(skb);
64 /*
65 * Data packets are not cloned as they are never retransmitted
66 */
67 if (skb_cloned(skb))
68 skb_set_owner_w(skb, sk);
69
70 /* Build DCCP header and checksum it. */
71 memset(dh, 0, dccp_header_size);
72 dh->dccph_type = dcb->dccpd_type;
73 dh->dccph_sport = inet->sport;
74 dh->dccph_dport = inet->dport;
75 dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
76 dh->dccph_ccval = dcb->dccpd_ccval;
77 /* XXX For now we're using only 48 bits sequence numbers */
78 dh->dccph_x = 1;
79
80 dp->dccps_awh = dp->dccps_gss;
81 dccp_hdr_set_seq(dh, dp->dccps_gss);
82 if (set_ack)
83 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
84
85 switch (dcb->dccpd_type) {
86 case DCCP_PKT_REQUEST:
87 dccp_hdr_request(skb)->dccph_req_service =
88 dcb->dccpd_service;
89 break;
90 case DCCP_PKT_RESET:
91 dccp_hdr_reset(skb)->dccph_reset_code =
92 dcb->dccpd_reset_code;
93 break;
94 }
95
96 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
97 inet->daddr);
98
99 if (set_ack)
100 dccp_event_ack_sent(sk);
101
102 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
103
104 err = ip_queue_xmit(skb, 0);
105 if (err <= 0)
106 return err;
107
108 /* NET_XMIT_CN is special. It does not guarantee,
109 * that this packet is lost. It tells that device
110 * is about to start to drop packets or already
111 * drops some packets of the same priority and
112 * invokes us to send less aggressively.
113 */
114 return err == NET_XMIT_CN ? 0 : err;
115 }
116 return -ENOBUFS;
117}
118
119unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
120{
121 struct dccp_sock *dp = dccp_sk(sk);
122 int mss_now;
123
124 /*
125 * FIXME: we really should be using the af_specific thing to support
126 * IPv6.
127 * mss_now = pmtu - tp->af_specific->net_header_len -
128 * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
129 */
130 mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
131 sizeof(struct dccp_hdr_ext);
132
133 /* Now subtract optional transport overhead */
134 mss_now -= dp->dccps_ext_header_len;
135
136 /*
137 * FIXME: this should come from the CCID infrastructure, where, say,
138 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
139 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
140 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
141 * make it a multiple of 4
142 */
143
144 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
145
146 /* And store cached results */
147 dp->dccps_pmtu_cookie = pmtu;
148 dp->dccps_mss_cache = mss_now;
149
150 return mss_now;
151}
152
153void dccp_write_space(struct sock *sk)
154{
155 read_lock(&sk->sk_callback_lock);
156
157 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
158 wake_up_interruptible(sk->sk_sleep);
159 /* Should agree with poll, otherwise some programs break */
160 if (sock_writeable(sk))
161 sk_wake_async(sk, 2, POLL_OUT);
162
163 read_unlock(&sk->sk_callback_lock);
164}
165
166/**
167 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
168 * @sk: socket to wait for
169 * @timeo: for how long
170 */
171static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
172 long *timeo)
173{
174 struct dccp_sock *dp = dccp_sk(sk);
175 DEFINE_WAIT(wait);
176 long delay;
177 int rc;
178
179 while (1) {
180 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
181
182 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
183 goto do_error;
184 if (!*timeo)
185 goto do_nonblock;
186 if (signal_pending(current))
187 goto do_interrupted;
188
189 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
190 skb->len);
191 if (rc <= 0)
192 break;
193 delay = msecs_to_jiffies(rc);
194 if (delay > *timeo || delay < 0)
195 goto do_nonblock;
196
197 sk->sk_write_pending++;
198 release_sock(sk);
199 *timeo -= schedule_timeout(delay);
200 lock_sock(sk);
201 sk->sk_write_pending--;
202 }
203out:
204 finish_wait(sk->sk_sleep, &wait);
205 return rc;
206
207do_error:
208 rc = -EPIPE;
209 goto out;
210do_nonblock:
211 rc = -EAGAIN;
212 goto out;
213do_interrupted:
214 rc = sock_intr_errno(*timeo);
215 goto out;
216}
217
218int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
219{
220 const struct dccp_sock *dp = dccp_sk(sk);
221 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
222 skb->len);
223
224 if (err > 0)
225 err = dccp_wait_for_ccid(sk, skb, timeo);
226
227 if (err == 0) {
228 const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
230 const int len = skb->len;
231
232 if (sk->sk_state == DCCP_PARTOPEN) {
233 /* See 8.1.5. Handshake Completion */
234 inet_csk_schedule_ack(sk);
235 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
236 inet_csk(sk)->icsk_rto,
237 DCCP_RTO_MAX);
238 dcb->dccpd_type = DCCP_PKT_DATAACK;
239 /*
240 * FIXME: we really should have a
241 * dccps_ack_pending or use icsk.
242 */
243 } else if (inet_csk_ack_scheduled(sk) ||
244 dp->dccps_timestamp_echo != 0 ||
245 (dp->dccps_options.dccpo_send_ack_vector &&
246 ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
247 ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
248 dcb->dccpd_type = DCCP_PKT_DATAACK;
249 else
250 dcb->dccpd_type = DCCP_PKT_DATA;
251
252 err = dccp_transmit_skb(sk, skb);
253 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
254 }
255
256 return err;
257}
258
259int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
260{
261 if (inet_sk_rebuild_header(sk) != 0)
262 return -EHOSTUNREACH; /* Routing failure or similar. */
263
264 return dccp_transmit_skb(sk, (skb_cloned(skb) ?
265 pskb_copy(skb, GFP_ATOMIC):
266 skb_clone(skb, GFP_ATOMIC)));
267}
268
269struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
270 struct request_sock *req)
271{
272 struct dccp_hdr *dh;
273 const int dccp_header_size = sizeof(struct dccp_hdr) +
274 sizeof(struct dccp_hdr_ext) +
275 sizeof(struct dccp_hdr_response);
276 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
277 dccp_header_size, 1,
278 GFP_ATOMIC);
279 if (skb == NULL)
280 return NULL;
281
282 /* Reserve space for headers. */
283 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
284
285 skb->dst = dst_clone(dst);
286 skb->csum = 0;
287
288 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
289 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
290 dccp_insert_options(sk, skb);
291
292 skb->h.raw = skb_push(skb, dccp_header_size);
293
294 dh = dccp_hdr(skb);
295 memset(dh, 0, dccp_header_size);
296
297 dh->dccph_sport = inet_sk(sk)->sport;
298 dh->dccph_dport = inet_rsk(req)->rmt_port;
299 dh->dccph_doff = (dccp_header_size +
300 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
301 dh->dccph_type = DCCP_PKT_RESPONSE;
302 dh->dccph_x = 1;
303 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
304 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
305
306 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
307 inet_rsk(req)->rmt_addr);
308
309 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
310 return skb;
311}
312
313struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
314 const enum dccp_reset_codes code)
315
316{
317 struct dccp_hdr *dh;
318 struct dccp_sock *dp = dccp_sk(sk);
319 const int dccp_header_size = sizeof(struct dccp_hdr) +
320 sizeof(struct dccp_hdr_ext) +
321 sizeof(struct dccp_hdr_reset);
322 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
323 dccp_header_size, 1,
324 GFP_ATOMIC);
325 if (skb == NULL)
326 return NULL;
327
328 /* Reserve space for headers. */
329 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
330
331 skb->dst = dst_clone(dst);
332 skb->csum = 0;
333
334 dccp_inc_seqno(&dp->dccps_gss);
335
336 DCCP_SKB_CB(skb)->dccpd_reset_code = code;
337 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
338 DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
339 dccp_insert_options(sk, skb);
340
341 skb->h.raw = skb_push(skb, dccp_header_size);
342
343 dh = dccp_hdr(skb);
344 memset(dh, 0, dccp_header_size);
345
346 dh->dccph_sport = inet_sk(sk)->sport;
347 dh->dccph_dport = inet_sk(sk)->dport;
348 dh->dccph_doff = (dccp_header_size +
349 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
350 dh->dccph_type = DCCP_PKT_RESET;
351 dh->dccph_x = 1;
352 dccp_hdr_set_seq(dh, dp->dccps_gss);
353 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
354
355 dccp_hdr_reset(skb)->dccph_reset_code = code;
356
357 dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
358 inet_sk(sk)->daddr);
359
360 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
361 return skb;
362}
363
364/*
365 * Do all connect socket setups that can be done AF independent.
366 */
367static inline void dccp_connect_init(struct sock *sk)
368{
369 struct dst_entry *dst = __sk_dst_get(sk);
370 struct inet_connection_sock *icsk = inet_csk(sk);
371
372 sk->sk_err = 0;
373 sock_reset_flag(sk, SOCK_DONE);
374
375 dccp_sync_mss(sk, dst_mtu(dst));
376
377 /*
378 * FIXME: set dp->{dccps_swh,dccps_swl}, with
379 * something like dccp_inc_seq
380 */
381
382 icsk->icsk_retransmits = 0;
383}
384
385int dccp_connect(struct sock *sk)
386{
387 struct sk_buff *skb;
388 struct inet_connection_sock *icsk = inet_csk(sk);
389
390 dccp_connect_init(sk);
391
392 skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
393 if (unlikely(skb == NULL))
394 return -ENOBUFS;
395
396 /* Reserve space for headers. */
397 skb_reserve(skb, MAX_DCCP_HEADER);
398
399 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
400 /* FIXME: set service to something meaningful, coming
401 * from userspace*/
402 DCCP_SKB_CB(skb)->dccpd_service = 0;
403 skb->csum = 0;
404 skb_set_owner_w(skb, sk);
405
406 BUG_TRAP(sk->sk_send_head == NULL);
407 sk->sk_send_head = skb;
408 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
409 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
410
411 /* Timer for repeating the REQUEST until an answer. */
412 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
413 icsk->icsk_rto, DCCP_RTO_MAX);
414 return 0;
415}
416
417void dccp_send_ack(struct sock *sk)
418{
419 /* If we have been reset, we may not send again. */
420 if (sk->sk_state != DCCP_CLOSED) {
421 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
422
423 if (skb == NULL) {
424 inet_csk_schedule_ack(sk);
425 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
426 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
427 TCP_DELACK_MAX,
428 DCCP_RTO_MAX);
429 return;
430 }
431
432 /* Reserve space for headers */
433 skb_reserve(skb, MAX_DCCP_HEADER);
434 skb->csum = 0;
435 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
436 skb_set_owner_w(skb, sk);
437 dccp_transmit_skb(sk, skb);
438 }
439}
440
441EXPORT_SYMBOL_GPL(dccp_send_ack);
442
443void dccp_send_delayed_ack(struct sock *sk)
444{
445 struct inet_connection_sock *icsk = inet_csk(sk);
446 /*
447 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
448 * with using 2s, and active senders also piggyback the ACK into a
449 * DATAACK packet, so this is really for quiescent senders.
450 */
451 unsigned long timeout = jiffies + 2 * HZ;
452
453 /* Use new timeout only if there wasn't a older one earlier. */
454 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
455 /* If delack timer was blocked or is about to expire,
456 * send ACK now.
457 *
458 * FIXME: check the "about to expire" part
459 */
460 if (icsk->icsk_ack.blocked) {
461 dccp_send_ack(sk);
462 return;
463 }
464
465 if (!time_before(timeout, icsk->icsk_ack.timeout))
466 timeout = icsk->icsk_ack.timeout;
467 }
468 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
469 icsk->icsk_ack.timeout = timeout;
470 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
471}
472
473void dccp_send_sync(struct sock *sk, const u64 seq,
474 const enum dccp_pkt_type pkt_type)
475{
476 /*
477 * We are not putting this on the write queue, so
478 * dccp_transmit_skb() will set the ownership to this
479 * sock.
480 */
481 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
482
483 if (skb == NULL)
484 /* FIXME: how to make sure the sync is sent? */
485 return;
486
487 /* Reserve space for headers and prepare control bits. */
488 skb_reserve(skb, MAX_DCCP_HEADER);
489 skb->csum = 0;
490 DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
491 DCCP_SKB_CB(skb)->dccpd_seq = seq;
492
493 skb_set_owner_w(skb, sk);
494 dccp_transmit_skb(sk, skb);
495}
496
497/*
498 * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
499 * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
500 * any circumstances.
501 */
502void dccp_send_close(struct sock *sk, const int active)
503{
504 struct dccp_sock *dp = dccp_sk(sk);
505 struct sk_buff *skb;
506 const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC;
507
508 skb = alloc_skb(sk->sk_prot->max_header, prio);
509 if (skb == NULL)
510 return;
511
512 /* Reserve space for headers and prepare control bits. */
513 skb_reserve(skb, sk->sk_prot->max_header);
514 skb->csum = 0;
515 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
516 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
517
518 skb_set_owner_w(skb, sk);
519 if (active) {
520 BUG_TRAP(sk->sk_send_head == NULL);
521 sk->sk_send_head = skb;
522 dccp_transmit_skb(sk, skb_clone(skb, prio));
523 } else
524 dccp_transmit_skb(sk, skb);
525
526 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
527 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
528}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644
index 000000000000..18a0e69c9dc7
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,826 @@
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
27#include <net/ip.h>
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
37#include <linux/dccp.h>
38
39#include "ccid.h"
40#include "dccp.h"
41
42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49};
50
51const char *dccp_packet_name(const int type)
52{
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70}
71
72EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74const char *dccp_state_name(const int state)
75{
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91}
92
93EXPORT_SYMBOL_GPL(dccp_state_name);
94
95static inline int dccp_listen_start(struct sock *sk)
96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99}
100
101int dccp_disconnect(struct sock *sk, int flags)
102{
103 struct inet_connection_sock *icsk = inet_csk(sk);
104 struct inet_sock *inet = inet_sk(sk);
105 int err = 0;
106 const int old_state = sk->sk_state;
107
108 if (old_state != DCCP_CLOSED)
109 dccp_set_state(sk, DCCP_CLOSED);
110
111 /* ABORT function of RFC793 */
112 if (old_state == DCCP_LISTEN) {
113 inet_csk_listen_stop(sk);
114 /* FIXME: do the active reset thing */
115 } else if (old_state == DCCP_REQUESTING)
116 sk->sk_err = ECONNRESET;
117
118 dccp_clear_xmit_timers(sk);
119 __skb_queue_purge(&sk->sk_receive_queue);
120 if (sk->sk_send_head != NULL) {
121 __kfree_skb(sk->sk_send_head);
122 sk->sk_send_head = NULL;
123 }
124
125 inet->dport = 0;
126
127 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128 inet_reset_saddr(sk);
129
130 sk->sk_shutdown = 0;
131 sock_reset_flag(sk, SOCK_DONE);
132
133 icsk->icsk_backoff = 0;
134 inet_csk_delack_init(sk);
135 __sk_dst_reset(sk);
136
137 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139 sk->sk_error_report(sk);
140 return err;
141}
142
143/*
144 * Wait for a DCCP event.
145 *
146 * Note that we don't need to lock the socket, as the upper poll layers
147 * take care of normal races (between the test and the event) and we don't
148 * go look at any of the socket buffers directly.
149 */
150static unsigned int dccp_poll(struct file *file, struct socket *sock,
151 poll_table *wait)
152{
153 unsigned int mask;
154 struct sock *sk = sock->sk;
155
156 poll_wait(file, sk->sk_sleep, wait);
157 if (sk->sk_state == DCCP_LISTEN)
158 return inet_csk_listen_poll(sk);
159
160 /* Socket is not locked. We are protected from async events
161 by poll logic and correct handling of state changes
162 made by another threads is impossible in any case.
163 */
164
165 mask = 0;
166 if (sk->sk_err)
167 mask = POLLERR;
168
169 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
170 mask |= POLLHUP;
171 if (sk->sk_shutdown & RCV_SHUTDOWN)
172 mask |= POLLIN | POLLRDNORM;
173
174 /* Connected? */
175 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
176 if (atomic_read(&sk->sk_rmem_alloc) > 0)
177 mask |= POLLIN | POLLRDNORM;
178
179 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
180 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
181 mask |= POLLOUT | POLLWRNORM;
182 } else { /* send SIGIO later */
183 set_bit(SOCK_ASYNC_NOSPACE,
184 &sk->sk_socket->flags);
185 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
186
187 /* Race breaker. If space is freed after
188 * wspace test but before the flags are set,
189 * IO signal will be lost.
190 */
191 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
192 mask |= POLLOUT | POLLWRNORM;
193 }
194 }
195 }
196 return mask;
197}
198
199int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
200{
201 dccp_pr_debug("entry\n");
202 return -ENOIOCTLCMD;
203}
204
205int dccp_setsockopt(struct sock *sk, int level, int optname,
206 char __user *optval, int optlen)
207{
208 struct dccp_sock *dp;
209 int err;
210 int val;
211
212 if (level != SOL_DCCP)
213 return ip_setsockopt(sk, level, optname, optval, optlen);
214
215 if (optlen < sizeof(int))
216 return -EINVAL;
217
218 if (get_user(val, (int __user *)optval))
219 return -EFAULT;
220
221 lock_sock(sk);
222
223 dp = dccp_sk(sk);
224 err = 0;
225
226 switch (optname) {
227 case DCCP_SOCKOPT_PACKET_SIZE:
228 dp->dccps_packet_size = val;
229 break;
230 default:
231 err = -ENOPROTOOPT;
232 break;
233 }
234
235 release_sock(sk);
236 return err;
237}
238
239int dccp_getsockopt(struct sock *sk, int level, int optname,
240 char __user *optval, int __user *optlen)
241{
242 struct dccp_sock *dp;
243 int val, len;
244
245 if (level != SOL_DCCP)
246 return ip_getsockopt(sk, level, optname, optval, optlen);
247
248 if (get_user(len, optlen))
249 return -EFAULT;
250
251 len = min_t(unsigned int, len, sizeof(int));
252 if (len < 0)
253 return -EINVAL;
254
255 dp = dccp_sk(sk);
256
257 switch (optname) {
258 case DCCP_SOCKOPT_PACKET_SIZE:
259 val = dp->dccps_packet_size;
260 break;
261 default:
262 return -ENOPROTOOPT;
263 }
264
265 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
266 return -EFAULT;
267
268 return 0;
269}
270
271int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
272 size_t len)
273{
274 const struct dccp_sock *dp = dccp_sk(sk);
275 const int flags = msg->msg_flags;
276 const int noblock = flags & MSG_DONTWAIT;
277 struct sk_buff *skb;
278 int rc, size;
279 long timeo;
280
281 if (len > dp->dccps_mss_cache)
282 return -EMSGSIZE;
283
284 lock_sock(sk);
285 timeo = sock_sndtimeo(sk, noblock);
286
287 /*
288 * We have to use sk_stream_wait_connect here to set sk_write_pending,
289 * so that the trick in dccp_rcv_request_sent_state_process.
290 */
291 /* Wait for a connection to finish. */
292 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
293 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
294 goto out_release;
295
296 size = sk->sk_prot->max_header + len;
297 release_sock(sk);
298 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
299 lock_sock(sk);
300 if (skb == NULL)
301 goto out_release;
302
303 skb_reserve(skb, sk->sk_prot->max_header);
304 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
305 if (rc != 0)
306 goto out_discard;
307
308 rc = dccp_write_xmit(sk, skb, &timeo);
309 /*
310 * XXX we don't use sk_write_queue, so just discard the packet.
311 * Current plan however is to _use_ sk_write_queue with
312 * an algorith similar to tcp_sendmsg, where the main difference
313 * is that in DCCP we have to respect packet boundaries, so
314 * no coalescing of skbs.
315 *
316 * This bug was _quickly_ found & fixed by just looking at an OSTRA
317 * generated callgraph 8) -acme
318 */
319 if (rc != 0)
320 goto out_discard;
321out_release:
322 release_sock(sk);
323 return rc ? : len;
324out_discard:
325 kfree_skb(skb);
326 goto out_release;
327}
328
329int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
330 size_t len, int nonblock, int flags, int *addr_len)
331{
332 const struct dccp_hdr *dh;
333 long timeo;
334
335 lock_sock(sk);
336
337 if (sk->sk_state == DCCP_LISTEN) {
338 len = -ENOTCONN;
339 goto out;
340 }
341
342 timeo = sock_rcvtimeo(sk, nonblock);
343
344 do {
345 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
346
347 if (skb == NULL)
348 goto verify_sock_status;
349
350 dh = dccp_hdr(skb);
351
352 if (dh->dccph_type == DCCP_PKT_DATA ||
353 dh->dccph_type == DCCP_PKT_DATAACK)
354 goto found_ok_skb;
355
356 if (dh->dccph_type == DCCP_PKT_RESET ||
357 dh->dccph_type == DCCP_PKT_CLOSE) {
358 dccp_pr_debug("found fin ok!\n");
359 len = 0;
360 goto found_fin_ok;
361 }
362 dccp_pr_debug("packet_type=%s\n",
363 dccp_packet_name(dh->dccph_type));
364 sk_eat_skb(sk, skb);
365verify_sock_status:
366 if (sock_flag(sk, SOCK_DONE)) {
367 len = 0;
368 break;
369 }
370
371 if (sk->sk_err) {
372 len = sock_error(sk);
373 break;
374 }
375
376 if (sk->sk_shutdown & RCV_SHUTDOWN) {
377 len = 0;
378 break;
379 }
380
381 if (sk->sk_state == DCCP_CLOSED) {
382 if (!sock_flag(sk, SOCK_DONE)) {
383 /* This occurs when user tries to read
384 * from never connected socket.
385 */
386 len = -ENOTCONN;
387 break;
388 }
389 len = 0;
390 break;
391 }
392
393 if (!timeo) {
394 len = -EAGAIN;
395 break;
396 }
397
398 if (signal_pending(current)) {
399 len = sock_intr_errno(timeo);
400 break;
401 }
402
403 sk_wait_data(sk, &timeo);
404 continue;
405 found_ok_skb:
406 if (len > skb->len)
407 len = skb->len;
408 else if (len < skb->len)
409 msg->msg_flags |= MSG_TRUNC;
410
411 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
412 /* Exception. Bailout! */
413 len = -EFAULT;
414 break;
415 }
416 found_fin_ok:
417 if (!(flags & MSG_PEEK))
418 sk_eat_skb(sk, skb);
419 break;
420 } while (1);
421out:
422 release_sock(sk);
423 return len;
424}
425
426static int inet_dccp_listen(struct socket *sock, int backlog)
427{
428 struct sock *sk = sock->sk;
429 unsigned char old_state;
430 int err;
431
432 lock_sock(sk);
433
434 err = -EINVAL;
435 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
436 goto out;
437
438 old_state = sk->sk_state;
439 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
440 goto out;
441
442 /* Really, if the socket is already in listen state
443 * we can only allow the backlog to be adjusted.
444 */
445 if (old_state != DCCP_LISTEN) {
446 /*
447 * FIXME: here it probably should be sk->sk_prot->listen_start
448 * see tcp_listen_start
449 */
450 err = dccp_listen_start(sk);
451 if (err)
452 goto out;
453 }
454 sk->sk_max_ack_backlog = backlog;
455 err = 0;
456
457out:
458 release_sock(sk);
459 return err;
460}
461
462static const unsigned char dccp_new_state[] = {
463 /* current state: new state: action: */
464 [0] = DCCP_CLOSED,
465 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
466 [DCCP_REQUESTING] = DCCP_CLOSED,
467 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
468 [DCCP_LISTEN] = DCCP_CLOSED,
469 [DCCP_RESPOND] = DCCP_CLOSED,
470 [DCCP_CLOSING] = DCCP_CLOSED,
471 [DCCP_TIME_WAIT] = DCCP_CLOSED,
472 [DCCP_CLOSED] = DCCP_CLOSED,
473};
474
475static int dccp_close_state(struct sock *sk)
476{
477 const int next = dccp_new_state[sk->sk_state];
478 const int ns = next & DCCP_STATE_MASK;
479
480 if (ns != sk->sk_state)
481 dccp_set_state(sk, ns);
482
483 return next & DCCP_ACTION_FIN;
484}
485
486void dccp_close(struct sock *sk, long timeout)
487{
488 struct sk_buff *skb;
489
490 lock_sock(sk);
491
492 sk->sk_shutdown = SHUTDOWN_MASK;
493
494 if (sk->sk_state == DCCP_LISTEN) {
495 dccp_set_state(sk, DCCP_CLOSED);
496
497 /* Special case. */
498 inet_csk_listen_stop(sk);
499
500 goto adjudge_to_death;
501 }
502
503 /*
504 * We need to flush the recv. buffs. We do this only on the
505 * descriptor close, not protocol-sourced closes, because the
506 *reader process may not have drained the data yet!
507 */
508 /* FIXME: check for unread data */
509 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
510 __kfree_skb(skb);
511 }
512
513 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
514 /* Check zero linger _after_ checking for unread data. */
515 sk->sk_prot->disconnect(sk, 0);
516 } else if (dccp_close_state(sk)) {
517 dccp_send_close(sk, 1);
518 }
519
520 sk_stream_wait_close(sk, timeout);
521
522adjudge_to_death:
523 /*
524 * It is the last release_sock in its life. It will remove backlog.
525 */
526 release_sock(sk);
527 /*
528 * Now socket is owned by kernel and we acquire BH lock
529 * to finish close. No need to check for user refs.
530 */
531 local_bh_disable();
532 bh_lock_sock(sk);
533 BUG_TRAP(!sock_owned_by_user(sk));
534
535 sock_hold(sk);
536 sock_orphan(sk);
537
538 /*
539 * The last release_sock may have processed the CLOSE or RESET
540 * packet moving sock to CLOSED state, if not we have to fire
541 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
542 * in draft-ietf-dccp-spec-11. -acme
543 */
544 if (sk->sk_state == DCCP_CLOSING) {
545 /* FIXME: should start at 2 * RTT */
546 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
547 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
548 inet_csk(sk)->icsk_rto,
549 DCCP_RTO_MAX);
550#if 0
551 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
552 dccp_set_state(sk, DCCP_CLOSED);
553#endif
554 }
555
556 atomic_inc(sk->sk_prot->orphan_count);
557 if (sk->sk_state == DCCP_CLOSED)
558 inet_csk_destroy_sock(sk);
559
560 /* Otherwise, socket is reprieved until protocol close. */
561
562 bh_unlock_sock(sk);
563 local_bh_enable();
564 sock_put(sk);
565}
566
567void dccp_shutdown(struct sock *sk, int how)
568{
569 dccp_pr_debug("entry\n");
570}
571
572static struct proto_ops inet_dccp_ops = {
573 .family = PF_INET,
574 .owner = THIS_MODULE,
575 .release = inet_release,
576 .bind = inet_bind,
577 .connect = inet_stream_connect,
578 .socketpair = sock_no_socketpair,
579 .accept = inet_accept,
580 .getname = inet_getname,
581 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
582 .poll = dccp_poll,
583 .ioctl = inet_ioctl,
584 /* FIXME: work on inet_listen to rename it to sock_common_listen */
585 .listen = inet_dccp_listen,
586 .shutdown = inet_shutdown,
587 .setsockopt = sock_common_setsockopt,
588 .getsockopt = sock_common_getsockopt,
589 .sendmsg = inet_sendmsg,
590 .recvmsg = sock_common_recvmsg,
591 .mmap = sock_no_mmap,
592 .sendpage = sock_no_sendpage,
593};
594
595extern struct net_proto_family inet_family_ops;
596
597static struct inet_protosw dccp_v4_protosw = {
598 .type = SOCK_DCCP,
599 .protocol = IPPROTO_DCCP,
600 .prot = &dccp_v4_prot,
601 .ops = &inet_dccp_ops,
602 .capability = -1,
603 .no_check = 0,
604 .flags = 0,
605};
606
607/*
608 * This is the global socket data structure used for responding to
609 * the Out-of-the-blue (OOTB) packets. A control sock will be created
610 * for this socket at the initialization time.
611 */
612struct socket *dccp_ctl_socket;
613
614static char dccp_ctl_socket_err_msg[] __initdata =
615 KERN_ERR "DCCP: Failed to create the control socket.\n";
616
617static int __init dccp_ctl_sock_init(void)
618{
619 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
620 &dccp_ctl_socket);
621 if (rc < 0)
622 printk(dccp_ctl_socket_err_msg);
623 else {
624 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
625 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
626
627 /* Unhash it so that IP input processing does not even
628 * see it, we do not wish this socket to see incoming
629 * packets.
630 */
631 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
632 }
633
634 return rc;
635}
636
637#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
638void dccp_ctl_sock_exit(void)
639{
640 if (dccp_ctl_socket != NULL) {
641 sock_release(dccp_ctl_socket);
642 dccp_ctl_socket = NULL;
643 }
644}
645
646EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
647#endif
648
649static int __init init_dccp_v4_mibs(void)
650{
651 int rc = -ENOMEM;
652
653 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
654 if (dccp_statistics[0] == NULL)
655 goto out;
656
657 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
658 if (dccp_statistics[1] == NULL)
659 goto out_free_one;
660
661 rc = 0;
662out:
663 return rc;
664out_free_one:
665 free_percpu(dccp_statistics[0]);
666 dccp_statistics[0] = NULL;
667 goto out;
668
669}
670
671static int thash_entries;
672module_param(thash_entries, int, 0444);
673MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
674
675#ifdef CONFIG_IP_DCCP_DEBUG
676int dccp_debug;
677module_param(dccp_debug, int, 0444);
678MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
679#endif
680
681static int __init dccp_init(void)
682{
683 unsigned long goal;
684 int ehash_order, bhash_order, i;
685 int rc = proto_register(&dccp_v4_prot, 1);
686
687 if (rc)
688 goto out;
689
690 dccp_hashinfo.bind_bucket_cachep =
691 kmem_cache_create("dccp_bind_bucket",
692 sizeof(struct inet_bind_bucket), 0,
693 SLAB_HWCACHE_ALIGN, NULL, NULL);
694 if (!dccp_hashinfo.bind_bucket_cachep)
695 goto out_proto_unregister;
696
697 /*
698 * Size and allocate the main established and bind bucket
699 * hash tables.
700 *
701 * The methodology is similar to that of the buffer cache.
702 */
703 if (num_physpages >= (128 * 1024))
704 goal = num_physpages >> (21 - PAGE_SHIFT);
705 else
706 goal = num_physpages >> (23 - PAGE_SHIFT);
707
708 if (thash_entries)
709 goal = (thash_entries *
710 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
711 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
712 ;
713 do {
714 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
715 sizeof(struct inet_ehash_bucket);
716 dccp_hashinfo.ehash_size >>= 1;
717 while (dccp_hashinfo.ehash_size &
718 (dccp_hashinfo.ehash_size - 1))
719 dccp_hashinfo.ehash_size--;
720 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
721 __get_free_pages(GFP_ATOMIC, ehash_order);
722 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
723
724 if (!dccp_hashinfo.ehash) {
725 printk(KERN_CRIT "Failed to allocate DCCP "
726 "established hash table\n");
727 goto out_free_bind_bucket_cachep;
728 }
729
730 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
731 rwlock_init(&dccp_hashinfo.ehash[i].lock);
732 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
733 }
734
735 bhash_order = ehash_order;
736
737 do {
738 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
739 sizeof(struct inet_bind_hashbucket);
740 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
741 bhash_order > 0)
742 continue;
743 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
744 __get_free_pages(GFP_ATOMIC, bhash_order);
745 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
746
747 if (!dccp_hashinfo.bhash) {
748 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
749 goto out_free_dccp_ehash;
750 }
751
752 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
753 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
754 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
755 }
756
757 if (init_dccp_v4_mibs())
758 goto out_free_dccp_bhash;
759
760 rc = -EAGAIN;
761 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
762 goto out_free_dccp_v4_mibs;
763
764 inet_register_protosw(&dccp_v4_protosw);
765
766 rc = dccp_ctl_sock_init();
767 if (rc)
768 goto out_unregister_protosw;
769out:
770 return rc;
771out_unregister_protosw:
772 inet_unregister_protosw(&dccp_v4_protosw);
773 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
774out_free_dccp_v4_mibs:
775 free_percpu(dccp_statistics[0]);
776 free_percpu(dccp_statistics[1]);
777 dccp_statistics[0] = dccp_statistics[1] = NULL;
778out_free_dccp_bhash:
779 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
780 dccp_hashinfo.bhash = NULL;
781out_free_dccp_ehash:
782 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
783 dccp_hashinfo.ehash = NULL;
784out_free_bind_bucket_cachep:
785 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
786 dccp_hashinfo.bind_bucket_cachep = NULL;
787out_proto_unregister:
788 proto_unregister(&dccp_v4_prot);
789 goto out;
790}
791
792static const char dccp_del_proto_err_msg[] __exitdata =
793 KERN_ERR "can't remove dccp net_protocol\n";
794
795static void __exit dccp_fini(void)
796{
797 inet_unregister_protosw(&dccp_v4_protosw);
798
799 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
800 printk(dccp_del_proto_err_msg);
801
802 free_percpu(dccp_statistics[0]);
803 free_percpu(dccp_statistics[1]);
804 free_pages((unsigned long)dccp_hashinfo.bhash,
805 get_order(dccp_hashinfo.bhash_size *
806 sizeof(struct inet_bind_hashbucket)));
807 free_pages((unsigned long)dccp_hashinfo.ehash,
808 get_order(dccp_hashinfo.ehash_size *
809 sizeof(struct inet_ehash_bucket)));
810 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
811 proto_unregister(&dccp_v4_prot);
812}
813
814module_init(dccp_init);
815module_exit(dccp_fini);
816
817/*
818 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
819 * values directly, Also cover the case where the protocol is not specified,
820 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
821 */
822MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
823MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
824MODULE_LICENSE("GPL");
825MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
826MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644
index 000000000000..aa34b576e228
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,255 @@
1/*
2 * net/dccp/timer.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include "dccp.h"
18
19static void dccp_write_timer(unsigned long data);
20static void dccp_keepalive_timer(unsigned long data);
21static void dccp_delack_timer(unsigned long data);
22
23void dccp_init_xmit_timers(struct sock *sk)
24{
25 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
26 &dccp_keepalive_timer);
27}
28
29static void dccp_write_err(struct sock *sk)
30{
31 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
32 sk->sk_error_report(sk);
33
34 dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
35 dccp_done(sk);
36 DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
37}
38
39/* A write timeout has occurred. Process the after effects. */
40static int dccp_write_timeout(struct sock *sk)
41{
42 const struct inet_connection_sock *icsk = inet_csk(sk);
43 int retry_until;
44
45 if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
46 if (icsk->icsk_retransmits != 0)
47 dst_negative_advice(&sk->sk_dst_cache);
48 retry_until = icsk->icsk_syn_retries ? :
49 /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
50 } else {
51 if (icsk->icsk_retransmits >=
52 /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
53 /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
54 black hole detection. :-(
55
56 It is place to make it. It is not made. I do not want
57 to make it. It is disguisting. It does not work in any
58 case. Let me to cite the same draft, which requires for
59 us to implement this:
60
61 "The one security concern raised by this memo is that ICMP black holes
62 are often caused by over-zealous security administrators who block
63 all ICMP messages. It is vitally important that those who design and
64 deploy security systems understand the impact of strict filtering on
65 upper-layer protocols. The safest web site in the world is worthless
66 if most TCP implementations cannot transfer data from it. It would
67 be far nicer to have all of the black holes fixed rather than fixing
68 all of the TCP implementations."
69
70 Golden words :-).
71 */
72
73 dst_negative_advice(&sk->sk_dst_cache);
74 }
75
76 retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
77 /*
78 * FIXME: see tcp_write_timout and tcp_out_of_resources
79 */
80 }
81
82 if (icsk->icsk_retransmits >= retry_until) {
83 /* Has it gone just too far? */
84 dccp_write_err(sk);
85 return 1;
86 }
87 return 0;
88}
89
90/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
91static void dccp_delack_timer(unsigned long data)
92{
93 struct sock *sk = (struct sock *)data;
94 struct inet_connection_sock *icsk = inet_csk(sk);
95
96 bh_lock_sock(sk);
97 if (sock_owned_by_user(sk)) {
98 /* Try again later. */
99 icsk->icsk_ack.blocked = 1;
100 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
101 sk_reset_timer(sk, &icsk->icsk_delack_timer,
102 jiffies + TCP_DELACK_MIN);
103 goto out;
104 }
105
106 if (sk->sk_state == DCCP_CLOSED ||
107 !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
108 goto out;
109 if (time_after(icsk->icsk_ack.timeout, jiffies)) {
110 sk_reset_timer(sk, &icsk->icsk_delack_timer,
111 icsk->icsk_ack.timeout);
112 goto out;
113 }
114
115 icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
116
117 if (inet_csk_ack_scheduled(sk)) {
118 if (!icsk->icsk_ack.pingpong) {
119 /* Delayed ACK missed: inflate ATO. */
120 icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
121 icsk->icsk_rto);
122 } else {
123 /* Delayed ACK missed: leave pingpong mode and
124 * deflate ATO.
125 */
126 icsk->icsk_ack.pingpong = 0;
127 icsk->icsk_ack.ato = TCP_ATO_MIN;
128 }
129 dccp_send_ack(sk);
130 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
131 }
132out:
133 bh_unlock_sock(sk);
134 sock_put(sk);
135}
136
137/*
138 * The DCCP retransmit timer.
139 */
140static void dccp_retransmit_timer(struct sock *sk)
141{
142 struct inet_connection_sock *icsk = inet_csk(sk);
143
144 /*
145 * sk->sk_send_head has to have one skb with
146 * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
147 * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
148 * (PARTOPEN timer), etc).
149 */
150 BUG_TRAP(sk->sk_send_head != NULL);
151
152 /*
153 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
154 * sent, no need to retransmit, this sock is dead.
155 */
156 if (dccp_write_timeout(sk))
157 goto out;
158
159 /*
160 * We want to know the number of packets retransmitted, not the
161 * total number of retransmissions of clones of original packets.
162 */
163 if (icsk->icsk_retransmits == 0)
164 DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
165
166 if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
167 /*
168 * Retransmission failed because of local congestion,
169 * do not backoff.
170 */
171 if (icsk->icsk_retransmits == 0)
172 icsk->icsk_retransmits = 1;
173 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
174 min(icsk->icsk_rto,
175 TCP_RESOURCE_PROBE_INTERVAL),
176 DCCP_RTO_MAX);
177 goto out;
178 }
179
180 icsk->icsk_backoff++;
181 icsk->icsk_retransmits++;
182
183 icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
184 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
185 DCCP_RTO_MAX);
186 if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
187 __sk_dst_reset(sk);
188out:;
189}
190
191static void dccp_write_timer(unsigned long data)
192{
193 struct sock *sk = (struct sock *)data;
194 struct inet_connection_sock *icsk = inet_csk(sk);
195 int event = 0;
196
197 bh_lock_sock(sk);
198 if (sock_owned_by_user(sk)) {
199 /* Try again later */
200 sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
201 jiffies + (HZ / 20));
202 goto out;
203 }
204
205 if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
206 goto out;
207
208 if (time_after(icsk->icsk_timeout, jiffies)) {
209 sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
210 icsk->icsk_timeout);
211 goto out;
212 }
213
214 event = icsk->icsk_pending;
215 icsk->icsk_pending = 0;
216
217 switch (event) {
218 case ICSK_TIME_RETRANS:
219 dccp_retransmit_timer(sk);
220 break;
221 }
222out:
223 bh_unlock_sock(sk);
224 sock_put(sk);
225}
226
227/*
228 * Timer for listening sockets
229 */
230static void dccp_response_timer(struct sock *sk)
231{
232 inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
233 DCCP_RTO_MAX);
234}
235
236static void dccp_keepalive_timer(unsigned long data)
237{
238 struct sock *sk = (struct sock *)data;
239
240 /* Only process if socket is not in use. */
241 bh_lock_sock(sk);
242 if (sock_owned_by_user(sk)) {
243 /* Try again later. */
244 inet_csk_reset_keepalive_timer(sk, HZ / 20);
245 goto out;
246 }
247
248 if (sk->sk_state == DCCP_LISTEN) {
249 dccp_response_timer(sk);
250 goto out;
251 }
252out:
253 bh_unlock_sock(sk);
254 sock_put(sk);
255}