aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig50
-rw-r--r--net/dccp/Makefile10
-rw-r--r--net/dccp/ccid.c139
-rw-r--r--net/dccp/ccid.h180
-rw-r--r--net/dccp/ccids/Kconfig29
-rw-r--r--net/dccp/ccids/Makefile5
-rw-r--r--net/dccp/ccids/ccid3.c1214
-rw-r--r--net/dccp/ccids/ccid3.h143
-rw-r--r--net/dccp/ccids/lib/Makefile3
-rw-r--r--net/dccp/ccids/lib/loss_interval.c144
-rw-r--r--net/dccp/ccids/lib/loss_interval.h61
-rw-r--r--net/dccp/ccids/lib/packet_history.c398
-rw-r--r--net/dccp/ccids/lib/packet_history.h200
-rw-r--r--net/dccp/ccids/lib/tfrc.h22
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c644
-rw-r--r--net/dccp/dccp.h485
-rw-r--r--net/dccp/diag.c71
-rw-r--r--net/dccp/input.c600
-rw-r--r--net/dccp/ipv4.c1353
-rw-r--r--net/dccp/minisocks.c265
-rw-r--r--net/dccp/options.c887
-rw-r--r--net/dccp/output.c525
-rw-r--r--net/dccp/proto.c826
-rw-r--r--net/dccp/timer.c255
24 files changed, 8509 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644
index 000000000000..187ac182e24b
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,50 @@
1menu "DCCP Configuration (EXPERIMENTAL)"
2 depends on INET && EXPERIMENTAL
3
4config IP_DCCP
5 tristate "The DCCP Protocol (EXPERIMENTAL)"
6 ---help---
7 Datagram Congestion Control Protocol
8
9 From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
10
11 The Datagram Congestion Control Protocol (DCCP) is a transport
12 protocol that implements bidirectional, unicast connections of
13 congestion-controlled, unreliable datagrams. It should be suitable
14 for use by applications such as streaming media, Internet telephony,
15 and on-line games
16
17 To compile this protocol support as a module, choose M here: the
18 module will be called dccp.
19
20 If in doubt, say N.
21
22config INET_DCCP_DIAG
23 depends on IP_DCCP && INET_DIAG
24 def_tristate y if (IP_DCCP = y && INET_DIAG = y)
25 def_tristate m
26
27source "net/dccp/ccids/Kconfig"
28
29menu "DCCP Kernel Hacking"
30 depends on IP_DCCP && DEBUG_KERNEL=y
31
32config IP_DCCP_DEBUG
33 bool "DCCP debug messages"
34 ---help---
35 Only use this if you're hacking DCCP.
36
37 Just say N.
38
39config IP_DCCP_UNLOAD_HACK
40 depends on IP_DCCP=m && IP_DCCP_CCID3=m
41 bool "DCCP control sock unload hack"
42 ---help---
43 Enable this to be able to unload the dccp module when the it
44 has only one refcount held, the control sock one. Just execute
45 "rmmod dccp_ccid3 dccp"
46
47 Just say N.
48endmenu
49
50endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644
index 000000000000..fb97bb042455
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o
2
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
4 timer.o
5
6obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
7
8dccp_diag-y := diag.o
9
10obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644
index 000000000000..9d8fc0e289ea
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
1/*
2 * net/dccp/ccid.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * CCID infrastructure
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "ccid.h"
15
16static struct ccid *ccids[CCID_MAX];
17#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
18static atomic_t ccids_lockct = ATOMIC_INIT(0);
19static DEFINE_SPINLOCK(ccids_lock);
20
21/*
22 * The strategy is: modifications ccids vector are short, do not sleep and
23 * veeery rare, but read access should be free of any exclusive locks.
24 */
25static void ccids_write_lock(void)
26{
27 spin_lock(&ccids_lock);
28 while (atomic_read(&ccids_lockct) != 0) {
29 spin_unlock(&ccids_lock);
30 yield();
31 spin_lock(&ccids_lock);
32 }
33}
34
35static inline void ccids_write_unlock(void)
36{
37 spin_unlock(&ccids_lock);
38}
39
40static inline void ccids_read_lock(void)
41{
42 atomic_inc(&ccids_lockct);
43 spin_unlock_wait(&ccids_lock);
44}
45
46static inline void ccids_read_unlock(void)
47{
48 atomic_dec(&ccids_lockct);
49}
50
51#else
52#define ccids_write_lock() do { } while(0)
53#define ccids_write_unlock() do { } while(0)
54#define ccids_read_lock() do { } while(0)
55#define ccids_read_unlock() do { } while(0)
56#endif
57
58int ccid_register(struct ccid *ccid)
59{
60 int err;
61
62 if (ccid->ccid_init == NULL)
63 return -1;
64
65 ccids_write_lock();
66 err = -EEXIST;
67 if (ccids[ccid->ccid_id] == NULL) {
68 ccids[ccid->ccid_id] = ccid;
69 err = 0;
70 }
71 ccids_write_unlock();
72 if (err == 0)
73 pr_info("CCID: Registered CCID %d (%s)\n",
74 ccid->ccid_id, ccid->ccid_name);
75 return err;
76}
77
78EXPORT_SYMBOL_GPL(ccid_register);
79
80int ccid_unregister(struct ccid *ccid)
81{
82 ccids_write_lock();
83 ccids[ccid->ccid_id] = NULL;
84 ccids_write_unlock();
85 pr_info("CCID: Unregistered CCID %d (%s)\n",
86 ccid->ccid_id, ccid->ccid_name);
87 return 0;
88}
89
90EXPORT_SYMBOL_GPL(ccid_unregister);
91
92struct ccid *ccid_init(unsigned char id, struct sock *sk)
93{
94 struct ccid *ccid;
95
96#ifdef CONFIG_KMOD
97 if (ccids[id] == NULL)
98 request_module("net-dccp-ccid-%d", id);
99#endif
100 ccids_read_lock();
101
102 ccid = ccids[id];
103 if (ccid == NULL)
104 goto out;
105
106 if (!try_module_get(ccid->ccid_owner))
107 goto out_err;
108
109 if (ccid->ccid_init(sk) != 0)
110 goto out_module_put;
111out:
112 ccids_read_unlock();
113 return ccid;
114out_module_put:
115 module_put(ccid->ccid_owner);
116out_err:
117 ccid = NULL;
118 goto out;
119}
120
121EXPORT_SYMBOL_GPL(ccid_init);
122
123void ccid_exit(struct ccid *ccid, struct sock *sk)
124{
125 if (ccid == NULL)
126 return;
127
128 ccids_read_lock();
129
130 if (ccids[ccid->ccid_id] != NULL) {
131 if (ccid->ccid_exit != NULL)
132 ccid->ccid_exit(sk);
133 module_put(ccid->ccid_owner);
134 }
135
136 ccids_read_unlock();
137}
138
139EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644
index 000000000000..962f1e9e2f7e
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,180 @@
1#ifndef _CCID_H
2#define _CCID_H
3/*
4 * net/dccp/ccid.h
5 *
6 * An implementation of the DCCP protocol
7 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 *
9 * CCID infrastructure
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <net/sock.h>
17#include <linux/dccp.h>
18#include <linux/list.h>
19#include <linux/module.h>
20
21#define CCID_MAX 255
22
23struct ccid {
24 unsigned char ccid_id;
25 const char *ccid_name;
26 struct module *ccid_owner;
27 int (*ccid_init)(struct sock *sk);
28 void (*ccid_exit)(struct sock *sk);
29 int (*ccid_hc_rx_init)(struct sock *sk);
30 int (*ccid_hc_tx_init)(struct sock *sk);
31 void (*ccid_hc_rx_exit)(struct sock *sk);
32 void (*ccid_hc_tx_exit)(struct sock *sk);
33 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
34 struct sk_buff *skb);
35 int (*ccid_hc_rx_parse_options)(struct sock *sk,
36 unsigned char option,
37 unsigned char len, u16 idx,
38 unsigned char* value);
39 void (*ccid_hc_rx_insert_options)(struct sock *sk,
40 struct sk_buff *skb);
41 void (*ccid_hc_tx_insert_options)(struct sock *sk,
42 struct sk_buff *skb);
43 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
44 struct sk_buff *skb);
45 int (*ccid_hc_tx_parse_options)(struct sock *sk,
46 unsigned char option,
47 unsigned char len, u16 idx,
48 unsigned char* value);
49 int (*ccid_hc_tx_send_packet)(struct sock *sk,
50 struct sk_buff *skb, int len);
51 void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
52 int len);
53 void (*ccid_hc_rx_get_info)(struct sock *sk,
54 struct tcp_info *info);
55 void (*ccid_hc_tx_get_info)(struct sock *sk,
56 struct tcp_info *info);
57};
58
59extern int ccid_register(struct ccid *ccid);
60extern int ccid_unregister(struct ccid *ccid);
61
62extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
63extern void ccid_exit(struct ccid *ccid, struct sock *sk);
64
65static inline void __ccid_get(struct ccid *ccid)
66{
67 __module_get(ccid->ccid_owner);
68}
69
70static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
71 struct sk_buff *skb, int len)
72{
73 int rc = 0;
74 if (ccid->ccid_hc_tx_send_packet != NULL)
75 rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
76 return rc;
77}
78
79static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
80 int more, int len)
81{
82 if (ccid->ccid_hc_tx_packet_sent != NULL)
83 ccid->ccid_hc_tx_packet_sent(sk, more, len);
84}
85
86static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
87{
88 int rc = 0;
89 if (ccid->ccid_hc_rx_init != NULL)
90 rc = ccid->ccid_hc_rx_init(sk);
91 return rc;
92}
93
94static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
95{
96 int rc = 0;
97 if (ccid->ccid_hc_tx_init != NULL)
98 rc = ccid->ccid_hc_tx_init(sk);
99 return rc;
100}
101
102static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
103{
104 if (ccid->ccid_hc_rx_exit != NULL &&
105 dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
106 ccid->ccid_hc_rx_exit(sk);
107}
108
109static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
110{
111 if (ccid->ccid_hc_tx_exit != NULL &&
112 dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
113 ccid->ccid_hc_tx_exit(sk);
114}
115
116static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
117 struct sk_buff *skb)
118{
119 if (ccid->ccid_hc_rx_packet_recv != NULL)
120 ccid->ccid_hc_rx_packet_recv(sk, skb);
121}
122
123static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
124 struct sk_buff *skb)
125{
126 if (ccid->ccid_hc_tx_packet_recv != NULL)
127 ccid->ccid_hc_tx_packet_recv(sk, skb);
128}
129
130static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
131 unsigned char option,
132 unsigned char len, u16 idx,
133 unsigned char* value)
134{
135 int rc = 0;
136 if (ccid->ccid_hc_tx_parse_options != NULL)
137 rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
138 value);
139 return rc;
140}
141
142static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
143 unsigned char option,
144 unsigned char len, u16 idx,
145 unsigned char* value)
146{
147 int rc = 0;
148 if (ccid->ccid_hc_rx_parse_options != NULL)
149 rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
150 return rc;
151}
152
153static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
154 struct sk_buff *skb)
155{
156 if (ccid->ccid_hc_tx_insert_options != NULL)
157 ccid->ccid_hc_tx_insert_options(sk, skb);
158}
159
160static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
161 struct sk_buff *skb)
162{
163 if (ccid->ccid_hc_rx_insert_options != NULL)
164 ccid->ccid_hc_rx_insert_options(sk, skb);
165}
166
167static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
168 struct tcp_info *info)
169{
170 if (ccid->ccid_hc_rx_get_info != NULL)
171 ccid->ccid_hc_rx_get_info(sk, info);
172}
173
174static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
175 struct tcp_info *info)
176{
177 if (ccid->ccid_hc_tx_get_info != NULL)
178 ccid->ccid_hc_tx_get_info(sk, info);
179}
180#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644
index 000000000000..7684d83946a4
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,29 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL
3
4config IP_DCCP_CCID3
5 tristate "CCID3 (TFRC) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 ---help---
8 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
9 rate-controlled congestion control mechanism. TFRC is designed to
10 be reasonably fair when competing for bandwidth with TCP-like flows,
11 where a flow is "reasonably fair" if its sending rate is generally
12 within a factor of two of the sending rate of a TCP flow under the
13 same conditions. However, TFRC has a much lower variation of
14 throughput over time compared with TCP, which makes CCID 3 more
15 suitable than CCID 2 for applications such streaming media where a
16 relatively smooth sending rate is of importance.
17
18 CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
19 congestion control algorithms were initially described in RFC 3448.
20
21 This text was extracted from draft-ietf-dccp-spec-11.txt.
22
23 If in doubt, say M.
24
25config IP_DCCP_TFRC_LIB
26 depends on IP_DCCP_CCID3
27 def_tristate IP_DCCP_CCID3
28
29endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644
index 000000000000..956f79f50743
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
2
3dccp_ccid3-y := ccid3.o
4
5obj-y += lib/
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644
index 000000000000..38aa84986118
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,1214 @@
1/*
2 * net/dccp/ccids/ccid3.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 *
7 * An implementation of the DCCP protocol
8 *
9 * This code has been developed by the University of Waikato WAND
10 * research group. For further information please see http://www.wand.net.nz/
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#include <linux/config.h>
38#include "../ccid.h"
39#include "../dccp.h"
40#include "lib/packet_history.h"
41#include "lib/loss_interval.h"
42#include "lib/tfrc.h"
43#include "ccid3.h"
44
45/*
46 * Reason for maths here is to avoid 32 bit overflow when a is big.
47 * With this we get close to the limit.
48 */
49static inline u32 usecs_div(const u32 a, const u32 b)
50{
51 const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 :
52 a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 :
53 a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 :
54 a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 :
55 a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 :
56 a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 :
57 a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
58 a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
59 100000;
60 const u32 tmp = a * (USEC_PER_SEC / div);
61 return (b >= 2 * div) ? tmp / (b / div) : tmp;
62}
63
64static int ccid3_debug;
65
66#ifdef CCID3_DEBUG
67#define ccid3_pr_debug(format, a...) \
68 do { if (ccid3_debug) \
69 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
70 } while (0)
71#else
72#define ccid3_pr_debug(format, a...)
73#endif
74
75static struct dccp_tx_hist *ccid3_tx_hist;
76static struct dccp_rx_hist *ccid3_rx_hist;
77static struct dccp_li_hist *ccid3_li_hist;
78
79static int ccid3_init(struct sock *sk)
80{
81 return 0;
82}
83
84static void ccid3_exit(struct sock *sk)
85{
86}
87
88/* TFRC sender states */
89enum ccid3_hc_tx_states {
90 TFRC_SSTATE_NO_SENT = 1,
91 TFRC_SSTATE_NO_FBACK,
92 TFRC_SSTATE_FBACK,
93 TFRC_SSTATE_TERM,
94};
95
96#ifdef CCID3_DEBUG
97static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
98{
99 static char *ccid3_state_names[] = {
100 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
101 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
102 [TFRC_SSTATE_FBACK] = "FBACK",
103 [TFRC_SSTATE_TERM] = "TERM",
104 };
105
106 return ccid3_state_names[state];
107}
108#endif
109
110static inline void ccid3_hc_tx_set_state(struct sock *sk,
111 enum ccid3_hc_tx_states state)
112{
113 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
114 enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
115
116 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
117 dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
118 ccid3_tx_state_name(state));
119 WARN_ON(state == oldstate);
120 hctx->ccid3hctx_state = state;
121}
122
123/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
124static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
125{
126 /*
127 * If no feedback spec says t_ipi is 1 second (set elsewhere and then
128 * doubles after every no feedback timer (separate function)
129 */
130 if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
131 hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
132 hctx->ccid3hctx_x);
133}
134
135/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
136static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
137{
138 hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
139 TFRC_OPSYS_HALF_TIME_GRAN);
140}
141
142/*
143 * Update X by
144 * If (p > 0)
145 * x_calc = calcX(s, R, p);
146 * X = max(min(X_calc, 2 * X_recv), s / t_mbi);
147 * Else
148 * If (now - tld >= R)
149 * X = max(min(2 * X, 2 * X_recv), s / R);
150 * tld = now;
151 */
152static void ccid3_hc_tx_update_x(struct sock *sk)
153{
154 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
155
156 /* To avoid large error in calcX */
157 if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
158 hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
159 hctx->ccid3hctx_rtt,
160 hctx->ccid3hctx_p);
161 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
162 2 * hctx->ccid3hctx_x_recv),
163 (hctx->ccid3hctx_s /
164 TFRC_MAX_BACK_OFF_TIME));
165 } else {
166 struct timeval now;
167
168 dccp_timestamp(sk, &now);
169 if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
170 hctx->ccid3hctx_rtt) {
171 hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
172 hctx->ccid3hctx_x) * 2,
173 usecs_div(hctx->ccid3hctx_s,
174 hctx->ccid3hctx_rtt));
175 hctx->ccid3hctx_t_ld = now;
176 }
177 }
178}
179
180static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
181{
182 struct sock *sk = (struct sock *)data;
183 unsigned long next_tmout = 0;
184 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
185
186 bh_lock_sock(sk);
187 if (sock_owned_by_user(sk)) {
188 /* Try again later. */
189 /* XXX: set some sensible MIB */
190 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
191 jiffies + HZ / 5);
192 goto out;
193 }
194
195 ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
196 ccid3_tx_state_name(hctx->ccid3hctx_state));
197
198 switch (hctx->ccid3hctx_state) {
199 case TFRC_SSTATE_TERM:
200 goto out;
201 case TFRC_SSTATE_NO_FBACK:
202 /* Halve send rate */
203 hctx->ccid3hctx_x /= 2;
204 if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
205 TFRC_MAX_BACK_OFF_TIME))
206 hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
207 TFRC_MAX_BACK_OFF_TIME);
208
209 ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
210 "bytes/s\n",
211 dccp_role(sk), sk,
212 ccid3_tx_state_name(hctx->ccid3hctx_state),
213 hctx->ccid3hctx_x);
214 next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
215 hctx->ccid3hctx_x),
216 TFRC_INITIAL_TIMEOUT);
217 /*
218 * FIXME - not sure above calculation is correct. See section
219 * 5 of CCID3 11 should adjust tx_t_ipi and double that to
220 * achieve it really
221 */
222 break;
223 case TFRC_SSTATE_FBACK:
224 /*
225 * Check if IDLE since last timeout and recv rate is less than
226 * 4 packets per RTT
227 */
228 if (!hctx->ccid3hctx_idle ||
229 (hctx->ccid3hctx_x_recv >=
230 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
231 ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
232 dccp_role(sk), sk,
233 ccid3_tx_state_name(hctx->ccid3hctx_state));
234 /* Halve sending rate */
235
236 /* If (X_calc > 2 * X_recv)
237 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
238 * Else
239 * X_recv = X_calc / 4;
240 */
241 BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
242 hctx->ccid3hctx_x_calc == 0);
243
244 /* check also if p is zero -> x_calc is infinity? */
245 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
246 hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
247 hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
248 hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
249 else
250 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
251
252 /* Update sending rate */
253 ccid3_hc_tx_update_x(sk);
254 }
255 /*
256 * Schedule no feedback timer to expire in
257 * max(4 * R, 2 * s / X)
258 */
259 next_tmout = max_t(u32, hctx->ccid3hctx_t_rto,
260 2 * usecs_div(hctx->ccid3hctx_s,
261 hctx->ccid3hctx_x));
262 break;
263 default:
264 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
265 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
266 dump_stack();
267 goto out;
268 }
269
270 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
271 jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
272 hctx->ccid3hctx_idle = 1;
273out:
274 bh_unlock_sock(sk);
275 sock_put(sk);
276}
277
278static int ccid3_hc_tx_send_packet(struct sock *sk,
279 struct sk_buff *skb, int len)
280{
281 struct dccp_sock *dp = dccp_sk(sk);
282 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
283 struct dccp_tx_hist_entry *new_packet;
284 struct timeval now;
285 long delay;
286 int rc = -ENOTCONN;
287
288 BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
289
290 /* Check if pure ACK or Terminating*/
291 /*
292 * XXX: We only call this function for DATA and DATAACK, on, these
293 * packets can have zero length, but why the comment about "pure ACK"?
294 */
295 if (unlikely(len == 0))
296 goto out;
297
298 /* See if last packet allocated was not sent */
299 new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
300 if (new_packet == NULL || new_packet->dccphtx_sent) {
301 new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
302 SLAB_ATOMIC);
303
304 rc = -ENOBUFS;
305 if (unlikely(new_packet == NULL)) {
306 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, not enough "
307 "mem to add to history, send refused\n",
308 __FUNCTION__, dccp_role(sk), sk);
309 goto out;
310 }
311
312 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
313 }
314
315 dccp_timestamp(sk, &now);
316
317 switch (hctx->ccid3hctx_state) {
318 case TFRC_SSTATE_NO_SENT:
319 hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
320 hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
321 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
322 jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
323 hctx->ccid3hctx_last_win_count = 0;
324 hctx->ccid3hctx_t_last_win_count = now;
325 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
326 hctx->ccid3hctx_t_ipi = TFRC_INITIAL_IPI;
327
328 /* Set nominal send time for initial packet */
329 hctx->ccid3hctx_t_nom = now;
330 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
331 hctx->ccid3hctx_t_ipi);
332 ccid3_calc_new_delta(hctx);
333 rc = 0;
334 break;
335 case TFRC_SSTATE_NO_FBACK:
336 case TFRC_SSTATE_FBACK:
337 delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
338 hctx->ccid3hctx_delta);
339 delay /= -1000;
340 /* divide by -1000 is to convert to ms and get sign right */
341 rc = delay > 0 ? delay : 0;
342 break;
343 default:
344 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
345 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
346 dump_stack();
347 rc = -EINVAL;
348 break;
349 }
350
351 /* Can we send? if so add options and add to packet history */
352 if (rc == 0) {
353 dp->dccps_hc_tx_insert_options = 1;
354 new_packet->dccphtx_ccval =
355 DCCP_SKB_CB(skb)->dccpd_ccval =
356 hctx->ccid3hctx_last_win_count;
357 }
358out:
359 return rc;
360}
361
362static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
363{
364 const struct dccp_sock *dp = dccp_sk(sk);
365 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
366 struct timeval now;
367
368 BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
369
370 dccp_timestamp(sk, &now);
371
372 /* check if we have sent a data packet */
373 if (len > 0) {
374 unsigned long quarter_rtt;
375 struct dccp_tx_hist_entry *packet;
376
377 packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
378 if (unlikely(packet == NULL)) {
379 LIMIT_NETDEBUG(KERN_WARNING "%s: packet doesn't "
380 "exists in history!\n", __FUNCTION__);
381 return;
382 }
383 if (unlikely(packet->dccphtx_sent)) {
384 LIMIT_NETDEBUG(KERN_WARNING "%s: no unsent packet in "
385 "history!\n", __FUNCTION__);
386 return;
387 }
388 packet->dccphtx_tstamp = now;
389 packet->dccphtx_seqno = dp->dccps_gss;
390 /*
391 * Check if win_count have changed
392 * Algorithm in "8.1. Window Counter Valuer" in
393 * draft-ietf-dccp-ccid3-11.txt
394 */
395 quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
396 if (likely(hctx->ccid3hctx_rtt > 8))
397 quarter_rtt /= hctx->ccid3hctx_rtt / 4;
398
399 if (quarter_rtt > 0) {
400 hctx->ccid3hctx_t_last_win_count = now;
401 hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count +
402 min_t(unsigned long, quarter_rtt, 5)) % 16;
403 ccid3_pr_debug("%s, sk=%p, window changed from "
404 "%u to %u!\n",
405 dccp_role(sk), sk,
406 packet->dccphtx_ccval,
407 hctx->ccid3hctx_last_win_count);
408 }
409
410 hctx->ccid3hctx_idle = 0;
411 packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
412 packet->dccphtx_sent = 1;
413 } else
414 ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
415 dccp_role(sk), sk, dp->dccps_gss);
416
417 switch (hctx->ccid3hctx_state) {
418 case TFRC_SSTATE_NO_SENT:
419 /* if first wasn't pure ack */
420 if (len != 0)
421 printk(KERN_CRIT "%s: %s, First packet sent is noted "
422 "as a data packet\n",
423 __FUNCTION__, dccp_role(sk));
424 return;
425 case TFRC_SSTATE_NO_FBACK:
426 case TFRC_SSTATE_FBACK:
427 if (len > 0) {
428 hctx->ccid3hctx_t_nom = now;
429 ccid3_calc_new_t_ipi(hctx);
430 ccid3_calc_new_delta(hctx);
431 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
432 hctx->ccid3hctx_t_ipi);
433 }
434 break;
435 default:
436 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
437 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
438 dump_stack();
439 break;
440 }
441}
442
443static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
444{
445 const struct dccp_sock *dp = dccp_sk(sk);
446 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
447 struct ccid3_options_received *opt_recv;
448 struct dccp_tx_hist_entry *packet;
449 struct timeval now;
450 unsigned long next_tmout;
451 u32 t_elapsed;
452 u32 pinv;
453 u32 x_recv;
454 u32 r_sample;
455
456 BUG_ON(hctx == NULL || hctx->ccid3hctx_state == TFRC_SSTATE_TERM);
457
458 /* we are only interested in ACKs */
459 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
460 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
461 return;
462
463 opt_recv = &hctx->ccid3hctx_options_received;
464
465 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
466 x_recv = opt_recv->ccid3or_receive_rate;
467 pinv = opt_recv->ccid3or_loss_event_rate;
468
469 switch (hctx->ccid3hctx_state) {
470 case TFRC_SSTATE_NO_SENT:
471 /* FIXME: what to do here? */
472 return;
473 case TFRC_SSTATE_NO_FBACK:
474 case TFRC_SSTATE_FBACK:
475 /* Calculate new round trip sample by
476 * R_sample = (now - t_recvdata) - t_delay */
477 /* get t_recvdata from history */
478 packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
479 DCCP_SKB_CB(skb)->dccpd_ack_seq);
480 if (unlikely(packet == NULL)) {
481 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, seqno "
482 "%llu(%s) does't exist in history!\n",
483 __FUNCTION__, dccp_role(sk), sk,
484 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
485 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
486 return;
487 }
488
489 /* Update RTT */
490 dccp_timestamp(sk, &now);
491 r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
492 if (unlikely(r_sample <= t_elapsed))
493 LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
494 "t_elapsed=%uus\n",
495 __FUNCTION__, r_sample, t_elapsed);
496 else
497 r_sample -= t_elapsed;
498
499 /* Update RTT estimate by
500 * If (No feedback recv)
501 * R = R_sample;
502 * Else
503 * R = q * R + (1 - q) * R_sample;
504 *
505 * q is a constant, RFC 3448 recomments 0.9
506 */
507 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
508 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
509 hctx->ccid3hctx_rtt = r_sample;
510 } else
511 hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
512 r_sample / 10;
513
514 ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
515 "r_sample=%us\n", dccp_role(sk), sk,
516 hctx->ccid3hctx_rtt, r_sample);
517
518 /* Update timeout interval */
519 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
520 USEC_PER_SEC);
521
522 /* Update receive rate */
523 hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
524
525 /* Update loss event rate */
526 if (pinv == ~0 || pinv == 0)
527 hctx->ccid3hctx_p = 0;
528 else {
529 hctx->ccid3hctx_p = 1000000 / pinv;
530
531 if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
532 hctx->ccid3hctx_p = TFRC_SMALLEST_P;
533 ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
534 dccp_role(sk), sk);
535 }
536 }
537
538 /* unschedule no feedback timer */
539 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
540
541 /* Update sending rate */
542 ccid3_hc_tx_update_x(sk);
543
544 /* Update next send time */
545 timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
546 hctx->ccid3hctx_t_ipi);
547 ccid3_calc_new_t_ipi(hctx);
548 timeval_add_usecs(&hctx->ccid3hctx_t_nom,
549 hctx->ccid3hctx_t_ipi);
550 ccid3_calc_new_delta(hctx);
551
552 /* remove all packets older than the one acked from history */
553 dccp_tx_hist_purge_older(ccid3_tx_hist,
554 &hctx->ccid3hctx_hist, packet);
555 /*
556 * As we have calculated new ipi, delta, t_nom it is possible that
557 * we now can send a packet, so wake up dccp_wait_for_ccids.
558 */
559 sk->sk_write_space(sk);
560
561 /*
562 * Schedule no feedback timer to expire in
563 * max(4 * R, 2 * s / X)
564 */
565 next_tmout = max(hctx->ccid3hctx_t_rto,
566 2 * usecs_div(hctx->ccid3hctx_s,
567 hctx->ccid3hctx_x));
568
569 ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
570 "expire in %lu jiffies (%luus)\n",
571 dccp_role(sk), sk,
572 usecs_to_jiffies(next_tmout), next_tmout);
573
574 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
575 jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
576
577 /* set idle flag */
578 hctx->ccid3hctx_idle = 1;
579 break;
580 default:
581 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
582 __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
583 dump_stack();
584 break;
585 }
586}
587
588static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
589{
590 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
591
592 BUG_ON(hctx == NULL);
593
594 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
595 return;
596
597 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
598}
599
600static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
601 unsigned char len, u16 idx,
602 unsigned char *value)
603{
604 int rc = 0;
605 const struct dccp_sock *dp = dccp_sk(sk);
606 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
607 struct ccid3_options_received *opt_recv;
608
609 BUG_ON(hctx == NULL);
610
611 opt_recv = &hctx->ccid3hctx_options_received;
612
613 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
614 opt_recv->ccid3or_seqno = dp->dccps_gsr;
615 opt_recv->ccid3or_loss_event_rate = ~0;
616 opt_recv->ccid3or_loss_intervals_idx = 0;
617 opt_recv->ccid3or_loss_intervals_len = 0;
618 opt_recv->ccid3or_receive_rate = 0;
619 }
620
621 switch (option) {
622 case TFRC_OPT_LOSS_EVENT_RATE:
623 if (unlikely(len != 4)) {
624 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
625 "len for TFRC_OPT_LOSS_EVENT_RATE\n",
626 __FUNCTION__, dccp_role(sk), sk);
627 rc = -EINVAL;
628 } else {
629 opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
630 ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
631 dccp_role(sk), sk,
632 opt_recv->ccid3or_loss_event_rate);
633 }
634 break;
635 case TFRC_OPT_LOSS_INTERVALS:
636 opt_recv->ccid3or_loss_intervals_idx = idx;
637 opt_recv->ccid3or_loss_intervals_len = len;
638 ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
639 dccp_role(sk), sk,
640 opt_recv->ccid3or_loss_intervals_idx,
641 opt_recv->ccid3or_loss_intervals_len);
642 break;
643 case TFRC_OPT_RECEIVE_RATE:
644 if (unlikely(len != 4)) {
645 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, invalid "
646 "len for TFRC_OPT_RECEIVE_RATE\n",
647 __FUNCTION__, dccp_role(sk), sk);
648 rc = -EINVAL;
649 } else {
650 opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
651 ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
652 dccp_role(sk), sk,
653 opt_recv->ccid3or_receive_rate);
654 }
655 break;
656 }
657
658 return rc;
659}
660
661static int ccid3_hc_tx_init(struct sock *sk)
662{
663 struct dccp_sock *dp = dccp_sk(sk);
664 struct ccid3_hc_tx_sock *hctx;
665
666 dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
667 if (dp->dccps_hc_tx_ccid_private == NULL)
668 return -ENOMEM;
669
670 hctx = ccid3_hc_tx_sk(sk);
671 memset(hctx, 0, sizeof(*hctx));
672
673 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
674 dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
675 hctx->ccid3hctx_s = dp->dccps_packet_size;
676 else
677 hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
678
679 /* Set transmission rate to 1 packet per second */
680 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
681 hctx->ccid3hctx_t_rto = USEC_PER_SEC;
682 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
683 INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
684 init_timer(&hctx->ccid3hctx_no_feedback_timer);
685
686 return 0;
687}
688
689static void ccid3_hc_tx_exit(struct sock *sk)
690{
691 struct dccp_sock *dp = dccp_sk(sk);
692 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
693
694 BUG_ON(hctx == NULL);
695
696 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
697 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
698
699 /* Empty packet history */
700 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
701
702 kfree(dp->dccps_hc_tx_ccid_private);
703 dp->dccps_hc_tx_ccid_private = NULL;
704}
705
706/*
707 * RX Half Connection methods
708 */
709
710/* TFRC receiver states */
711enum ccid3_hc_rx_states {
712 TFRC_RSTATE_NO_DATA = 1,
713 TFRC_RSTATE_DATA,
714 TFRC_RSTATE_TERM = 127,
715};
716
717#ifdef CCID3_DEBUG
718static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
719{
720 static char *ccid3_rx_state_names[] = {
721 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
722 [TFRC_RSTATE_DATA] = "DATA",
723 [TFRC_RSTATE_TERM] = "TERM",
724 };
725
726 return ccid3_rx_state_names[state];
727}
728#endif
729
730static inline void ccid3_hc_rx_set_state(struct sock *sk,
731 enum ccid3_hc_rx_states state)
732{
733 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
734 enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
735
736 ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
737 dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
738 ccid3_rx_state_name(state));
739 WARN_ON(state == oldstate);
740 hcrx->ccid3hcrx_state = state;
741}
742
743static void ccid3_hc_rx_send_feedback(struct sock *sk)
744{
745 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
746 struct dccp_sock *dp = dccp_sk(sk);
747 struct dccp_rx_hist_entry *packet;
748 struct timeval now;
749
750 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
751
752 dccp_timestamp(sk, &now);
753
754 switch (hcrx->ccid3hcrx_state) {
755 case TFRC_RSTATE_NO_DATA:
756 hcrx->ccid3hcrx_x_recv = 0;
757 break;
758 case TFRC_RSTATE_DATA: {
759 const u32 delta = timeval_delta(&now,
760 &hcrx->ccid3hcrx_tstamp_last_feedback);
761 hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
762 delta);
763 }
764 break;
765 default:
766 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
767 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
768 dump_stack();
769 return;
770 }
771
772 packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
773 if (unlikely(packet == NULL)) {
774 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, no data packet "
775 "in history!\n",
776 __FUNCTION__, dccp_role(sk), sk);
777 return;
778 }
779
780 hcrx->ccid3hcrx_tstamp_last_feedback = now;
781 hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval;
782 hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno;
783 hcrx->ccid3hcrx_bytes_recv = 0;
784
785 /* Convert to multiples of 10us */
786 hcrx->ccid3hcrx_elapsed_time =
787 timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
788 if (hcrx->ccid3hcrx_p == 0)
789 hcrx->ccid3hcrx_pinv = ~0;
790 else
791 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
792 dp->dccps_hc_rx_insert_options = 1;
793 dccp_send_ack(sk);
794}
795
796static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
797{
798 const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
799 u32 x_recv, pinv;
800
801 BUG_ON(hcrx == NULL);
802
803 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
804 return;
805
806 DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
807
808 if (dccp_packet_without_ack(skb))
809 return;
810
811 if (hcrx->ccid3hcrx_elapsed_time != 0)
812 dccp_insert_option_elapsed_time(sk, skb,
813 hcrx->ccid3hcrx_elapsed_time);
814 dccp_insert_option_timestamp(sk, skb);
815 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
816 pinv = htonl(hcrx->ccid3hcrx_pinv);
817 dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
818 &pinv, sizeof(pinv));
819 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
820 &x_recv, sizeof(x_recv));
821}
822
823/* calculate first loss interval
824 *
825 * returns estimated loss interval in usecs */
826
827static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
828{
829 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
830 struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
831 u32 rtt, delta, x_recv, fval, p, tmp2;
832 struct timeval tstamp = { 0, };
833 int interval = 0;
834 int win_count = 0;
835 int step = 0;
836 u64 tmp1;
837
838 list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
839 dccphrx_node) {
840 if (dccp_rx_hist_entry_data_packet(entry)) {
841 tail = entry;
842
843 switch (step) {
844 case 0:
845 tstamp = entry->dccphrx_tstamp;
846 win_count = entry->dccphrx_ccval;
847 step = 1;
848 break;
849 case 1:
850 interval = win_count - entry->dccphrx_ccval;
851 if (interval < 0)
852 interval += TFRC_WIN_COUNT_LIMIT;
853 if (interval > 4)
854 goto found;
855 break;
856 }
857 }
858 }
859
860 if (unlikely(step == 0)) {
861 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, packet history "
862 "contains no data packets!\n",
863 __FUNCTION__, dccp_role(sk), sk);
864 return ~0;
865 }
866
867 if (unlikely(interval == 0)) {
868 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Could not find a "
869 "win_count interval > 0. Defaulting to 1\n",
870 __FUNCTION__, dccp_role(sk), sk);
871 interval = 1;
872 }
873found:
874 rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
875 ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
876 dccp_role(sk), sk, rtt);
877 if (rtt == 0)
878 rtt = 1;
879
880 dccp_timestamp(sk, &tstamp);
881 delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
882 x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
883
884 tmp1 = (u64)x_recv * (u64)rtt;
885 do_div(tmp1,10000000);
886 tmp2 = (u32)tmp1;
887 fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
888 /* do not alter order above or you will get overflow on 32 bit */
889 p = tfrc_calc_x_reverse_lookup(fval);
890 ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
891 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
892
893 if (p == 0)
894 return ~0;
895 else
896 return 1000000 / p;
897}
898
899static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
900{
901 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
902
903 if (seq_loss != DCCP_MAX_SEQNO + 1 &&
904 list_empty(&hcrx->ccid3hcrx_li_hist)) {
905 struct dccp_li_hist_entry *li_tail;
906
907 li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
908 &hcrx->ccid3hcrx_li_hist,
909 seq_loss, win_loss);
910 if (li_tail == NULL)
911 return;
912 li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
913 } else
914 LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of "
915 "interval\n", __FUNCTION__);
916}
917
918static void ccid3_hc_rx_detect_loss(struct sock *sk)
919{
920 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
921 u8 win_loss;
922 const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
923 &hcrx->ccid3hcrx_li_hist,
924 &win_loss);
925
926 ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
927}
928
929static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
930{
931 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
932 const struct dccp_options_received *opt_recv;
933 struct dccp_rx_hist_entry *packet;
934 struct timeval now;
935 u8 win_count;
936 u32 p_prev, r_sample, t_elapsed;
937 int ins;
938
939 BUG_ON(hcrx == NULL ||
940 !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
941 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
942
943 opt_recv = &dccp_sk(sk)->dccps_options_received;
944
945 switch (DCCP_SKB_CB(skb)->dccpd_type) {
946 case DCCP_PKT_ACK:
947 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
948 return;
949 case DCCP_PKT_DATAACK:
950 if (opt_recv->dccpor_timestamp_echo == 0)
951 break;
952 p_prev = hcrx->ccid3hcrx_rtt;
953 dccp_timestamp(sk, &now);
954 timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
955 r_sample = timeval_usecs(&now);
956 t_elapsed = opt_recv->dccpor_elapsed_time * 10;
957
958 if (unlikely(r_sample <= t_elapsed))
959 LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, "
960 "t_elapsed=%uus\n",
961 __FUNCTION__, r_sample, t_elapsed);
962 else
963 r_sample -= t_elapsed;
964
965 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
966 hcrx->ccid3hcrx_rtt = r_sample;
967 else
968 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
969 r_sample / 10;
970
971 if (p_prev != hcrx->ccid3hcrx_rtt)
972 ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
973 dccp_role(sk), hcrx->ccid3hcrx_rtt,
974 opt_recv->dccpor_elapsed_time);
975 break;
976 case DCCP_PKT_DATA:
977 break;
978 default: /* We're not interested in other packet types, move along */
979 return;
980 }
981
982 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
983 skb, SLAB_ATOMIC);
984 if (unlikely(packet == NULL)) {
985 LIMIT_NETDEBUG(KERN_WARNING "%s: %s, sk=%p, Not enough mem to "
986 "add rx packet to history, consider it lost!\n",
987 __FUNCTION__, dccp_role(sk), sk);
988 return;
989 }
990
991 win_count = packet->dccphrx_ccval;
992
993 ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
994 &hcrx->ccid3hcrx_li_hist, packet);
995
996 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
997 return;
998
999 switch (hcrx->ccid3hcrx_state) {
1000 case TFRC_RSTATE_NO_DATA:
1001 ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
1002 "feedback\n",
1003 dccp_role(sk), sk,
1004 dccp_state_name(sk->sk_state), skb);
1005 ccid3_hc_rx_send_feedback(sk);
1006 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
1007 return;
1008 case TFRC_RSTATE_DATA:
1009 hcrx->ccid3hcrx_bytes_recv += skb->len -
1010 dccp_hdr(skb)->dccph_doff * 4;
1011 if (ins != 0)
1012 break;
1013
1014 dccp_timestamp(sk, &now);
1015 if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
1016 hcrx->ccid3hcrx_rtt) {
1017 hcrx->ccid3hcrx_tstamp_last_ack = now;
1018 ccid3_hc_rx_send_feedback(sk);
1019 }
1020 return;
1021 default:
1022 printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
1023 __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
1024 dump_stack();
1025 return;
1026 }
1027
1028 /* Dealing with packet loss */
1029 ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
1030 dccp_role(sk), sk, dccp_state_name(sk->sk_state));
1031
1032 ccid3_hc_rx_detect_loss(sk);
1033 p_prev = hcrx->ccid3hcrx_p;
1034
1035 /* Calculate loss event rate */
1036 if (!list_empty(&hcrx->ccid3hcrx_li_hist))
1037 /* Scaling up by 1000000 as fixed decimal */
1038 hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
1039
1040 if (hcrx->ccid3hcrx_p > p_prev) {
1041 ccid3_hc_rx_send_feedback(sk);
1042 return;
1043 }
1044}
1045
1046static int ccid3_hc_rx_init(struct sock *sk)
1047{
1048 struct dccp_sock *dp = dccp_sk(sk);
1049 struct ccid3_hc_rx_sock *hcrx;
1050
1051 ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
1052
1053 dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
1054 if (dp->dccps_hc_rx_ccid_private == NULL)
1055 return -ENOMEM;
1056
1057 hcrx = ccid3_hc_rx_sk(sk);
1058 memset(hcrx, 0, sizeof(*hcrx));
1059
1060 if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
1061 dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
1062 hcrx->ccid3hcrx_s = dp->dccps_packet_size;
1063 else
1064 hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
1065
1066 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
1067 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
1068 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
1069 dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
1070 hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
1071 hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */
1072 return 0;
1073}
1074
1075static void ccid3_hc_rx_exit(struct sock *sk)
1076{
1077 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1078 struct dccp_sock *dp = dccp_sk(sk);
1079
1080 BUG_ON(hcrx == NULL);
1081
1082 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
1083
1084 /* Empty packet history */
1085 dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
1086
1087 /* Empty loss interval history */
1088 dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
1089
1090 kfree(dp->dccps_hc_rx_ccid_private);
1091 dp->dccps_hc_rx_ccid_private = NULL;
1092}
1093
1094static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
1095{
1096 const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
1097
1098 /* Listen socks doesn't have a private CCID block */
1099 if (sk->sk_state == DCCP_LISTEN)
1100 return;
1101
1102 BUG_ON(hcrx == NULL);
1103
1104 info->tcpi_ca_state = hcrx->ccid3hcrx_state;
1105 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1106 info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
1107}
1108
1109static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1110{
1111 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1112
1113 /* Listen socks doesn't have a private CCID block */
1114 if (sk->sk_state == DCCP_LISTEN)
1115 return;
1116
1117 BUG_ON(hctx == NULL);
1118
1119 info->tcpi_rto = hctx->ccid3hctx_t_rto;
1120 info->tcpi_rtt = hctx->ccid3hctx_rtt;
1121}
1122
1123static struct ccid ccid3 = {
1124 .ccid_id = 3,
1125 .ccid_name = "ccid3",
1126 .ccid_owner = THIS_MODULE,
1127 .ccid_init = ccid3_init,
1128 .ccid_exit = ccid3_exit,
1129 .ccid_hc_tx_init = ccid3_hc_tx_init,
1130 .ccid_hc_tx_exit = ccid3_hc_tx_exit,
1131 .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
1132 .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
1133 .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
1134 .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
1135 .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
1136 .ccid_hc_rx_init = ccid3_hc_rx_init,
1137 .ccid_hc_rx_exit = ccid3_hc_rx_exit,
1138 .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
1139 .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
1140 .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
1141 .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
1142};
1143
1144module_param(ccid3_debug, int, 0444);
1145MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1146
1147static __init int ccid3_module_init(void)
1148{
1149 int rc = -ENOBUFS;
1150
1151 ccid3_rx_hist = dccp_rx_hist_new("ccid3");
1152 if (ccid3_rx_hist == NULL)
1153 goto out;
1154
1155 ccid3_tx_hist = dccp_tx_hist_new("ccid3");
1156 if (ccid3_tx_hist == NULL)
1157 goto out_free_rx;
1158
1159 ccid3_li_hist = dccp_li_hist_new("ccid3");
1160 if (ccid3_li_hist == NULL)
1161 goto out_free_tx;
1162
1163 rc = ccid_register(&ccid3);
1164 if (rc != 0)
1165 goto out_free_loss_interval_history;
1166out:
1167 return rc;
1168
1169out_free_loss_interval_history:
1170 dccp_li_hist_delete(ccid3_li_hist);
1171 ccid3_li_hist = NULL;
1172out_free_tx:
1173 dccp_tx_hist_delete(ccid3_tx_hist);
1174 ccid3_tx_hist = NULL;
1175out_free_rx:
1176 dccp_rx_hist_delete(ccid3_rx_hist);
1177 ccid3_rx_hist = NULL;
1178 goto out;
1179}
1180module_init(ccid3_module_init);
1181
1182static __exit void ccid3_module_exit(void)
1183{
1184#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
1185 /*
1186 * Hack to use while developing, so that we get rid of the control
1187 * sock, that is what keeps a refcount on dccp.ko -acme
1188 */
1189 extern void dccp_ctl_sock_exit(void);
1190
1191 dccp_ctl_sock_exit();
1192#endif
1193 ccid_unregister(&ccid3);
1194
1195 if (ccid3_tx_hist != NULL) {
1196 dccp_tx_hist_delete(ccid3_tx_hist);
1197 ccid3_tx_hist = NULL;
1198 }
1199 if (ccid3_rx_hist != NULL) {
1200 dccp_rx_hist_delete(ccid3_rx_hist);
1201 ccid3_rx_hist = NULL;
1202 }
1203 if (ccid3_li_hist != NULL) {
1204 dccp_li_hist_delete(ccid3_li_hist);
1205 ccid3_li_hist = NULL;
1206 }
1207}
1208module_exit(ccid3_module_exit);
1209
1210MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
1211 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
1212MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
1213MODULE_LICENSE("GPL");
1214MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644
index 000000000000..eb248778eea3
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,143 @@
1/*
2 * net/dccp/ccids/ccid3.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36#ifndef _DCCP_CCID3_H_
37#define _DCCP_CCID3_H_
38
39#include <linux/config.h>
40#include <linux/list.h>
41#include <linux/time.h>
42#include <linux/types.h>
43
44#define TFRC_MIN_PACKET_SIZE 16
45#define TFRC_STD_PACKET_SIZE 256
46#define TFRC_MAX_PACKET_SIZE 65535
47
48/* Two seconds as per CCID3 spec */
49#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
50
51#define TFRC_INITIAL_IPI (USEC_PER_SEC / 4)
52
53/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
54#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
55
56/* In seconds */
57#define TFRC_MAX_BACK_OFF_TIME 64
58
59#define TFRC_SMALLEST_P 40
60
61enum ccid3_options {
62 TFRC_OPT_LOSS_EVENT_RATE = 192,
63 TFRC_OPT_LOSS_INTERVALS = 193,
64 TFRC_OPT_RECEIVE_RATE = 194,
65};
66
67struct ccid3_options_received {
68 u64 ccid3or_seqno:48,
69 ccid3or_loss_intervals_idx:16;
70 u16 ccid3or_loss_intervals_len;
71 u32 ccid3or_loss_event_rate;
72 u32 ccid3or_receive_rate;
73};
74
75/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
76 *
77 * @ccid3hctx_state - Sender state
78 * @ccid3hctx_x - Current sending rate
79 * @ccid3hctx_x_recv - Receive rate
80 * @ccid3hctx_x_calc - Calculated send (?) rate
81 * @ccid3hctx_s - Packet size
82 * @ccid3hctx_rtt - Estimate of current round trip time in usecs
83 * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
84 * @ccid3hctx_last_win_count - Last window counter sent
85 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
86 * with last_win_count value sent
87 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
88 * @ccid3hctx_idle - FIXME
89 * @ccid3hctx_t_ld - Time last doubled during slow start
90 * @ccid3hctx_t_nom - Nominal send time of next packet
91 * @ccid3hctx_t_ipi - Interpacket (send) interval
92 * @ccid3hctx_delta - Send timer delta
93 * @ccid3hctx_hist - Packet history
94 */
95struct ccid3_hc_tx_sock {
96 u32 ccid3hctx_x;
97 u32 ccid3hctx_x_recv;
98 u32 ccid3hctx_x_calc;
99 u16 ccid3hctx_s;
100 u32 ccid3hctx_rtt;
101 u32 ccid3hctx_p;
102 u8 ccid3hctx_state;
103 u8 ccid3hctx_last_win_count;
104 u8 ccid3hctx_idle;
105 struct timeval ccid3hctx_t_last_win_count;
106 struct timer_list ccid3hctx_no_feedback_timer;
107 struct timeval ccid3hctx_t_ld;
108 struct timeval ccid3hctx_t_nom;
109 u32 ccid3hctx_t_rto;
110 u32 ccid3hctx_t_ipi;
111 u32 ccid3hctx_delta;
112 struct list_head ccid3hctx_hist;
113 struct ccid3_options_received ccid3hctx_options_received;
114};
115
116struct ccid3_hc_rx_sock {
117 u64 ccid3hcrx_seqno_last_counter:48,
118 ccid3hcrx_state:8,
119 ccid3hcrx_last_counter:4;
120 u32 ccid3hcrx_rtt;
121 u32 ccid3hcrx_p;
122 u32 ccid3hcrx_bytes_recv;
123 struct timeval ccid3hcrx_tstamp_last_feedback;
124 struct timeval ccid3hcrx_tstamp_last_ack;
125 struct list_head ccid3hcrx_hist;
126 struct list_head ccid3hcrx_li_hist;
127 u16 ccid3hcrx_s;
128 u32 ccid3hcrx_pinv;
129 u32 ccid3hcrx_elapsed_time;
130 u32 ccid3hcrx_x_recv;
131};
132
133static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
134{
135 return dccp_sk(sk)->dccps_hc_tx_ccid_private;
136}
137
138static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
139{
140 return dccp_sk(sk)->dccps_hc_rx_ccid_private;
141}
142
143#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
new file mode 100644
index 000000000000..5f940a6cbaca
--- /dev/null
+++ b/net/dccp/ccids/lib/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
2
3dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
new file mode 100644
index 000000000000..4c01a54143ad
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -0,0 +1,144 @@
1/*
2 * net/dccp/ccids/lib/loss_interval.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/config.h>
15#include <linux/module.h>
16
17#include "loss_interval.h"
18
19struct dccp_li_hist *dccp_li_hist_new(const char *name)
20{
21 struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
22 static const char dccp_li_hist_mask[] = "li_hist_%s";
23 char *slab_name;
24
25 if (hist == NULL)
26 goto out;
27
28 slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
29 GFP_ATOMIC);
30 if (slab_name == NULL)
31 goto out_free_hist;
32
33 sprintf(slab_name, dccp_li_hist_mask, name);
34 hist->dccplih_slab = kmem_cache_create(slab_name,
35 sizeof(struct dccp_li_hist_entry),
36 0, SLAB_HWCACHE_ALIGN,
37 NULL, NULL);
38 if (hist->dccplih_slab == NULL)
39 goto out_free_slab_name;
40out:
41 return hist;
42out_free_slab_name:
43 kfree(slab_name);
44out_free_hist:
45 kfree(hist);
46 hist = NULL;
47 goto out;
48}
49
50EXPORT_SYMBOL_GPL(dccp_li_hist_new);
51
52void dccp_li_hist_delete(struct dccp_li_hist *hist)
53{
54 const char* name = kmem_cache_name(hist->dccplih_slab);
55
56 kmem_cache_destroy(hist->dccplih_slab);
57 kfree(name);
58 kfree(hist);
59}
60
61EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
62
63void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
64{
65 struct dccp_li_hist_entry *entry, *next;
66
67 list_for_each_entry_safe(entry, next, list, dccplih_node) {
68 list_del_init(&entry->dccplih_node);
69 kmem_cache_free(hist->dccplih_slab, entry);
70 }
71}
72
73EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
74
75/* Weights used to calculate loss event rate */
76/*
77 * These are integers as per section 8 of RFC3448. We can then divide by 4 *
78 * when we use it.
79 */
80static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
81 4, 4, 4, 4, 3, 2, 1, 1,
82};
83
84u32 dccp_li_hist_calc_i_mean(struct list_head *list)
85{
86 struct dccp_li_hist_entry *li_entry, *li_next;
87 int i = 0;
88 u32 i_tot;
89 u32 i_tot0 = 0;
90 u32 i_tot1 = 0;
91 u32 w_tot = 0;
92
93 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
94 if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
95 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
96 w_tot += dccp_li_hist_w[i];
97 }
98
99 if (i != 0)
100 i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
101
102 if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
103 break;
104 }
105
106 if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
107 return 0;
108
109 i_tot = max(i_tot0, i_tot1);
110
111 /* FIXME: Why do we do this? -Ian McDonald */
112 if (i_tot * 4 < w_tot)
113 i_tot = w_tot * 4;
114
115 return i_tot * 4 / w_tot;
116}
117
118EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
119
120struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
121 struct list_head *list,
122 const u64 seq_loss,
123 const u8 win_loss)
124{
125 struct dccp_li_hist_entry *tail = NULL, *entry;
126 int i;
127
128 for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
129 entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
130 if (entry == NULL) {
131 dccp_li_hist_purge(hist, list);
132 return NULL;
133 }
134 if (tail == NULL)
135 tail = entry;
136 list_add(&entry->dccplih_node, list);
137 }
138
139 entry->dccplih_seqno = seq_loss;
140 entry->dccplih_win_count = win_loss;
141 return tail;
142}
143
144EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
new file mode 100644
index 000000000000..13ad47ba1420
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -0,0 +1,61 @@
1#ifndef _DCCP_LI_HIST_
2#define _DCCP_LI_HIST_
3/*
4 * net/dccp/ccids/lib/loss_interval.h
5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/list.h>
18#include <linux/slab.h>
19#include <linux/time.h>
20
21#define DCCP_LI_HIST_IVAL_F_LENGTH 8
22
23struct dccp_li_hist {
24 kmem_cache_t *dccplih_slab;
25};
26
27extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
28extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
29
30struct dccp_li_hist_entry {
31 struct list_head dccplih_node;
32 u64 dccplih_seqno:48,
33 dccplih_win_count:4;
34 u32 dccplih_interval;
35};
36
37static inline struct dccp_li_hist_entry *
38 dccp_li_hist_entry_new(struct dccp_li_hist *hist,
39 const unsigned int __nocast prio)
40{
41 return kmem_cache_alloc(hist->dccplih_slab, prio);
42}
43
44static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
45 struct dccp_li_hist_entry *entry)
46{
47 if (entry != NULL)
48 kmem_cache_free(hist->dccplih_slab, entry);
49}
50
51extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
52 struct list_head *list);
53
54extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
55
56extern struct dccp_li_hist_entry *
57 dccp_li_hist_interval_new(struct dccp_li_hist *hist,
58 struct list_head *list,
59 const u64 seq_loss,
60 const u8 win_loss);
61#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
new file mode 100644
index 000000000000..d3f9d2053830
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -0,0 +1,398 @@
1/*
2 * net/dccp/packet_history.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#include <linux/config.h>
38#include <linux/module.h>
39#include <linux/string.h>
40
41#include "packet_history.h"
42
43struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
44{
45 struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
46 static const char dccp_rx_hist_mask[] = "rx_hist_%s";
47 char *slab_name;
48
49 if (hist == NULL)
50 goto out;
51
52 slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
53 GFP_ATOMIC);
54 if (slab_name == NULL)
55 goto out_free_hist;
56
57 sprintf(slab_name, dccp_rx_hist_mask, name);
58 hist->dccprxh_slab = kmem_cache_create(slab_name,
59 sizeof(struct dccp_rx_hist_entry),
60 0, SLAB_HWCACHE_ALIGN,
61 NULL, NULL);
62 if (hist->dccprxh_slab == NULL)
63 goto out_free_slab_name;
64out:
65 return hist;
66out_free_slab_name:
67 kfree(slab_name);
68out_free_hist:
69 kfree(hist);
70 hist = NULL;
71 goto out;
72}
73
74EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
75
76void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
77{
78 const char* name = kmem_cache_name(hist->dccprxh_slab);
79
80 kmem_cache_destroy(hist->dccprxh_slab);
81 kfree(name);
82 kfree(hist);
83}
84
85EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
86
87void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
88{
89 struct dccp_rx_hist_entry *entry, *next;
90
91 list_for_each_entry_safe(entry, next, list, dccphrx_node) {
92 list_del_init(&entry->dccphrx_node);
93 kmem_cache_free(hist->dccprxh_slab, entry);
94 }
95}
96
97EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
98
99struct dccp_rx_hist_entry *
100 dccp_rx_hist_find_data_packet(const struct list_head *list)
101{
102 struct dccp_rx_hist_entry *entry, *packet = NULL;
103
104 list_for_each_entry(entry, list, dccphrx_node)
105 if (entry->dccphrx_type == DCCP_PKT_DATA ||
106 entry->dccphrx_type == DCCP_PKT_DATAACK) {
107 packet = entry;
108 break;
109 }
110
111 return packet;
112}
113
114EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
115
116int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
117 struct list_head *rx_list,
118 struct list_head *li_list,
119 struct dccp_rx_hist_entry *packet)
120{
121 struct dccp_rx_hist_entry *entry, *next, *iter;
122 u8 num_later = 0;
123
124 iter = dccp_rx_hist_head(rx_list);
125 if (iter == NULL)
126 dccp_rx_hist_add_entry(rx_list, packet);
127 else {
128 const u64 seqno = packet->dccphrx_seqno;
129
130 if (after48(seqno, iter->dccphrx_seqno))
131 dccp_rx_hist_add_entry(rx_list, packet);
132 else {
133 if (dccp_rx_hist_entry_data_packet(iter))
134 num_later = 1;
135
136 list_for_each_entry_continue(iter, rx_list,
137 dccphrx_node) {
138 if (after48(seqno, iter->dccphrx_seqno)) {
139 dccp_rx_hist_add_entry(&iter->dccphrx_node,
140 packet);
141 goto trim_history;
142 }
143
144 if (dccp_rx_hist_entry_data_packet(iter))
145 num_later++;
146
147 if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
148 dccp_rx_hist_entry_delete(hist, packet);
149 return 1;
150 }
151 }
152
153 if (num_later < TFRC_RECV_NUM_LATE_LOSS)
154 dccp_rx_hist_add_entry(rx_list, packet);
155 /*
156 * FIXME: else what? should we destroy the packet
157 * like above?
158 */
159 }
160 }
161
162trim_history:
163 /*
164 * Trim history (remove all packets after the NUM_LATE_LOSS + 1
165 * data packets)
166 */
167 num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
168
169 if (!list_empty(li_list)) {
170 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
171 if (num_later == 0) {
172 list_del_init(&entry->dccphrx_node);
173 dccp_rx_hist_entry_delete(hist, entry);
174 } else if (dccp_rx_hist_entry_data_packet(entry))
175 --num_later;
176 }
177 } else {
178 int step = 0;
179 u8 win_count = 0; /* Not needed, but lets shut up gcc */
180 int tmp;
181 /*
182 * We have no loss interval history so we need at least one
183 * rtt:s of data packets to approximate rtt.
184 */
185 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
186 if (num_later == 0) {
187 switch (step) {
188 case 0:
189 step = 1;
190 /* OK, find next data packet */
191 num_later = 1;
192 break;
193 case 1:
194 step = 2;
195 /* OK, find next data packet */
196 num_later = 1;
197 win_count = entry->dccphrx_ccval;
198 break;
199 case 2:
200 tmp = win_count - entry->dccphrx_ccval;
201 if (tmp < 0)
202 tmp += TFRC_WIN_COUNT_LIMIT;
203 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
204 /*
205 * We have found a packet older
206 * than one rtt remove the rest
207 */
208 step = 3;
209 } else /* OK, find next data packet */
210 num_later = 1;
211 break;
212 case 3:
213 list_del_init(&entry->dccphrx_node);
214 dccp_rx_hist_entry_delete(hist, entry);
215 break;
216 }
217 } else if (dccp_rx_hist_entry_data_packet(entry))
218 --num_later;
219 }
220 }
221
222 return 0;
223}
224
225EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
226
227u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
228 struct list_head *li_list, u8 *win_loss)
229{
230 struct dccp_rx_hist_entry *entry, *next, *packet;
231 struct dccp_rx_hist_entry *a_loss = NULL;
232 struct dccp_rx_hist_entry *b_loss = NULL;
233 u64 seq_loss = DCCP_MAX_SEQNO + 1;
234 u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
235
236 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
237 if (num_later == 0) {
238 b_loss = entry;
239 break;
240 } else if (dccp_rx_hist_entry_data_packet(entry))
241 --num_later;
242 }
243
244 if (b_loss == NULL)
245 goto out;
246
247 num_later = 1;
248 list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
249 if (num_later == 0) {
250 a_loss = entry;
251 break;
252 } else if (dccp_rx_hist_entry_data_packet(entry))
253 --num_later;
254 }
255
256 if (a_loss == NULL) {
257 if (list_empty(li_list)) {
258 /* no loss event have occured yet */
259 LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
260 "comparing to initial seqno\n",
261 __FUNCTION__);
262 goto out;
263 } else {
264 LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
265 __FUNCTION__);
266 goto out;
267 }
268 }
269
270 /* Locate a lost data packet */
271 entry = packet = b_loss;
272 list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
273 u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
274 packet->dccphrx_seqno);
275
276 if (delta != 0) {
277 if (dccp_rx_hist_entry_data_packet(packet))
278 --delta;
279 /*
280 * FIXME: check this, probably this % usage is because
281 * in earlier drafts the ndp count was just 8 bits
282 * long, but now it cam be up to 24 bits long.
283 */
284#if 0
285 if (delta % DCCP_NDP_LIMIT !=
286 (packet->dccphrx_ndp -
287 entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
288#endif
289 if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
290 seq_loss = entry->dccphrx_seqno;
291 dccp_inc_seqno(&seq_loss);
292 }
293 }
294 packet = entry;
295 if (packet == a_loss)
296 break;
297 }
298out:
299 if (seq_loss != DCCP_MAX_SEQNO + 1)
300 *win_loss = a_loss->dccphrx_ccval;
301 else
302 *win_loss = 0; /* Paranoia */
303
304 return seq_loss;
305}
306
307EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
308
309struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
310{
311 struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
312 static const char dccp_tx_hist_mask[] = "tx_hist_%s";
313 char *slab_name;
314
315 if (hist == NULL)
316 goto out;
317
318 slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
319 GFP_ATOMIC);
320 if (slab_name == NULL)
321 goto out_free_hist;
322
323 sprintf(slab_name, dccp_tx_hist_mask, name);
324 hist->dccptxh_slab = kmem_cache_create(slab_name,
325 sizeof(struct dccp_tx_hist_entry),
326 0, SLAB_HWCACHE_ALIGN,
327 NULL, NULL);
328 if (hist->dccptxh_slab == NULL)
329 goto out_free_slab_name;
330out:
331 return hist;
332out_free_slab_name:
333 kfree(slab_name);
334out_free_hist:
335 kfree(hist);
336 hist = NULL;
337 goto out;
338}
339
340EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
341
342void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
343{
344 const char* name = kmem_cache_name(hist->dccptxh_slab);
345
346 kmem_cache_destroy(hist->dccptxh_slab);
347 kfree(name);
348 kfree(hist);
349}
350
351EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
352
353struct dccp_tx_hist_entry *
354 dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
355{
356 struct dccp_tx_hist_entry *packet = NULL, *entry;
357
358 list_for_each_entry(entry, list, dccphtx_node)
359 if (entry->dccphtx_seqno == seq) {
360 packet = entry;
361 break;
362 }
363
364 return packet;
365}
366
367EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
368
369void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
370 struct list_head *list,
371 struct dccp_tx_hist_entry *packet)
372{
373 struct dccp_tx_hist_entry *next;
374
375 list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
376 list_del_init(&packet->dccphtx_node);
377 dccp_tx_hist_entry_delete(hist, packet);
378 }
379}
380
381EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
382
383void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
384{
385 struct dccp_tx_hist_entry *entry, *next;
386
387 list_for_each_entry_safe(entry, next, list, dccphtx_node) {
388 list_del_init(&entry->dccphtx_node);
389 dccp_tx_hist_entry_delete(hist, entry);
390 }
391}
392
393EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
394
395MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
396 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
397MODULE_DESCRIPTION("DCCP TFRC library");
398MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
new file mode 100644
index 000000000000..b375ebdb7dcf
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -0,0 +1,200 @@
1/*
2 * net/dccp/packet_history.h
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
11 *
12 * This code also uses code from Lulea University, rereleased as GPL by its
13 * authors:
14 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
15 *
16 * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
17 * and to make it work as a loadable module in the DCCP stack written by
18 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
19 *
20 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
21 *
22 * This program is free software; you can redistribute it and/or modify
23 * it under the terms of the GNU General Public License as published by
24 * the Free Software Foundation; either version 2 of the License, or
25 * (at your option) any later version.
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */
36
37#ifndef _DCCP_PKT_HIST_
38#define _DCCP_PKT_HIST_
39
40#include <linux/config.h>
41#include <linux/list.h>
42#include <linux/slab.h>
43#include <linux/time.h>
44
45#include "../../dccp.h"
46
47/* Number of later packets received before one is considered lost */
48#define TFRC_RECV_NUM_LATE_LOSS 3
49
50#define TFRC_WIN_COUNT_PER_RTT 4
51#define TFRC_WIN_COUNT_LIMIT 16
52
53struct dccp_tx_hist_entry {
54 struct list_head dccphtx_node;
55 u64 dccphtx_seqno:48,
56 dccphtx_ccval:4,
57 dccphtx_sent:1;
58 u32 dccphtx_rtt;
59 struct timeval dccphtx_tstamp;
60};
61
62struct dccp_rx_hist_entry {
63 struct list_head dccphrx_node;
64 u64 dccphrx_seqno:48,
65 dccphrx_ccval:4,
66 dccphrx_type:4;
67 u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
68 struct timeval dccphrx_tstamp;
69};
70
71struct dccp_tx_hist {
72 kmem_cache_t *dccptxh_slab;
73};
74
75extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
76extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
77
78struct dccp_rx_hist {
79 kmem_cache_t *dccprxh_slab;
80};
81
82extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
83extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
84extern struct dccp_rx_hist_entry *
85 dccp_rx_hist_find_data_packet(const struct list_head *list);
86
87static inline struct dccp_tx_hist_entry *
88 dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
89 const unsigned int __nocast prio)
90{
91 struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
92 prio);
93
94 if (entry != NULL)
95 entry->dccphtx_sent = 0;
96
97 return entry;
98}
99
100static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
101 struct dccp_tx_hist_entry *entry)
102{
103 if (entry != NULL)
104 kmem_cache_free(hist->dccptxh_slab, entry);
105}
106
107extern struct dccp_tx_hist_entry *
108 dccp_tx_hist_find_entry(const struct list_head *list,
109 const u64 seq);
110
111static inline void dccp_tx_hist_add_entry(struct list_head *list,
112 struct dccp_tx_hist_entry *entry)
113{
114 list_add(&entry->dccphtx_node, list);
115}
116
117extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
118 struct list_head *list,
119 struct dccp_tx_hist_entry *next);
120
121extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
122 struct list_head *list);
123
124static inline struct dccp_tx_hist_entry *
125 dccp_tx_hist_head(struct list_head *list)
126{
127 struct dccp_tx_hist_entry *head = NULL;
128
129 if (!list_empty(list))
130 head = list_entry(list->next, struct dccp_tx_hist_entry,
131 dccphtx_node);
132 return head;
133}
134
135static inline struct dccp_rx_hist_entry *
136 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
137 const struct sock *sk,
138 const u32 ndp,
139 const struct sk_buff *skb,
140 const unsigned int __nocast prio)
141{
142 struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
143 prio);
144
145 if (entry != NULL) {
146 const struct dccp_hdr *dh = dccp_hdr(skb);
147
148 entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
149 entry->dccphrx_ccval = dh->dccph_ccval;
150 entry->dccphrx_type = dh->dccph_type;
151 entry->dccphrx_ndp = ndp;
152 dccp_timestamp(sk, &entry->dccphrx_tstamp);
153 }
154
155 return entry;
156}
157
158static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
159 struct dccp_rx_hist_entry *entry)
160{
161 if (entry != NULL)
162 kmem_cache_free(hist->dccprxh_slab, entry);
163}
164
165extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
166 struct list_head *list);
167
168static inline void dccp_rx_hist_add_entry(struct list_head *list,
169 struct dccp_rx_hist_entry *entry)
170{
171 list_add(&entry->dccphrx_node, list);
172}
173
174static inline struct dccp_rx_hist_entry *
175 dccp_rx_hist_head(struct list_head *list)
176{
177 struct dccp_rx_hist_entry *head = NULL;
178
179 if (!list_empty(list))
180 head = list_entry(list->next, struct dccp_rx_hist_entry,
181 dccphrx_node);
182 return head;
183}
184
185static inline int
186 dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
187{
188 return entry->dccphrx_type == DCCP_PKT_DATA ||
189 entry->dccphrx_type == DCCP_PKT_DATAACK;
190}
191
192extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
193 struct list_head *rx_list,
194 struct list_head *li_list,
195 struct dccp_rx_hist_entry *packet);
196
197extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
198 struct list_head *li_list, u8 *win_loss);
199
200#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
new file mode 100644
index 000000000000..130c4c40cfe3
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -0,0 +1,22 @@
1#ifndef _TFRC_H_
2#define _TFRC_H_
3/*
4 * net/dccp/ccids/lib/tfrc.h
5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
9 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include <linux/types.h>
18
19extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
20extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
21
22#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
new file mode 100644
index 000000000000..d2b5933b4510
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -0,0 +1,644 @@
1/*
2 * net/dccp/ccids/lib/tfrc_equation.c
3 *
4 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 */
14
15#include <linux/config.h>
16#include <linux/module.h>
17
18#include <asm/bug.h>
19#include <asm/div64.h>
20
21#include "tfrc.h"
22
23#define TFRC_CALC_X_ARRSIZE 500
24
25#define TFRC_CALC_X_SPLIT 50000
26/* equivalent to 0.05 */
27
28static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
29 { 37172, 8172 },
30 { 53499, 11567 },
31 { 66664, 14180 },
32 { 78298, 16388 },
33 { 89021, 18339 },
34 { 99147, 20108 },
35 { 108858, 21738 },
36 { 118273, 23260 },
37 { 127474, 24693 },
38 { 136520, 26052 },
39 { 145456, 27348 },
40 { 154316, 28589 },
41 { 163130, 29783 },
42 { 171919, 30935 },
43 { 180704, 32049 },
44 { 189502, 33130 },
45 { 198328, 34180 },
46 { 207194, 35202 },
47 { 216114, 36198 },
48 { 225097, 37172 },
49 { 234153, 38123 },
50 { 243294, 39055 },
51 { 252527, 39968 },
52 { 261861, 40864 },
53 { 271305, 41743 },
54 { 280866, 42607 },
55 { 290553, 43457 },
56 { 300372, 44293 },
57 { 310333, 45117 },
58 { 320441, 45929 },
59 { 330705, 46729 },
60 { 341131, 47518 },
61 { 351728, 48297 },
62 { 362501, 49066 },
63 { 373460, 49826 },
64 { 384609, 50577 },
65 { 395958, 51320 },
66 { 407513, 52054 },
67 { 419281, 52780 },
68 { 431270, 53499 },
69 { 443487, 54211 },
70 { 455940, 54916 },
71 { 468635, 55614 },
72 { 481581, 56306 },
73 { 494785, 56991 },
74 { 508254, 57671 },
75 { 521996, 58345 },
76 { 536019, 59014 },
77 { 550331, 59677 },
78 { 564939, 60335 },
79 { 579851, 60988 },
80 { 595075, 61636 },
81 { 610619, 62279 },
82 { 626491, 62918 },
83 { 642700, 63553 },
84 { 659253, 64183 },
85 { 676158, 64809 },
86 { 693424, 65431 },
87 { 711060, 66050 },
88 { 729073, 66664 },
89 { 747472, 67275 },
90 { 766266, 67882 },
91 { 785464, 68486 },
92 { 805073, 69087 },
93 { 825103, 69684 },
94 { 845562, 70278 },
95 { 866460, 70868 },
96 { 887805, 71456 },
97 { 909606, 72041 },
98 { 931873, 72623 },
99 { 954614, 73202 },
100 { 977839, 73778 },
101 { 1001557, 74352 },
102 { 1025777, 74923 },
103 { 1050508, 75492 },
104 { 1075761, 76058 },
105 { 1101544, 76621 },
106 { 1127867, 77183 },
107 { 1154739, 77741 },
108 { 1182172, 78298 },
109 { 1210173, 78852 },
110 { 1238753, 79405 },
111 { 1267922, 79955 },
112 { 1297689, 80503 },
113 { 1328066, 81049 },
114 { 1359060, 81593 },
115 { 1390684, 82135 },
116 { 1422947, 82675 },
117 { 1455859, 83213 },
118 { 1489430, 83750 },
119 { 1523671, 84284 },
120 { 1558593, 84817 },
121 { 1594205, 85348 },
122 { 1630518, 85878 },
123 { 1667543, 86406 },
124 { 1705290, 86932 },
125 { 1743770, 87457 },
126 { 1782994, 87980 },
127 { 1822973, 88501 },
128 { 1863717, 89021 },
129 { 1905237, 89540 },
130 { 1947545, 90057 },
131 { 1990650, 90573 },
132 { 2034566, 91087 },
133 { 2079301, 91600 },
134 { 2124869, 92111 },
135 { 2171279, 92622 },
136 { 2218543, 93131 },
137 { 2266673, 93639 },
138 { 2315680, 94145 },
139 { 2365575, 94650 },
140 { 2416371, 95154 },
141 { 2468077, 95657 },
142 { 2520707, 96159 },
143 { 2574271, 96660 },
144 { 2628782, 97159 },
145 { 2684250, 97658 },
146 { 2740689, 98155 },
147 { 2798110, 98651 },
148 { 2856524, 99147 },
149 { 2915944, 99641 },
150 { 2976382, 100134 },
151 { 3037850, 100626 },
152 { 3100360, 101117 },
153 { 3163924, 101608 },
154 { 3228554, 102097 },
155 { 3294263, 102586 },
156 { 3361063, 103073 },
157 { 3428966, 103560 },
158 { 3497984, 104045 },
159 { 3568131, 104530 },
160 { 3639419, 105014 },
161 { 3711860, 105498 },
162 { 3785467, 105980 },
163 { 3860253, 106462 },
164 { 3936229, 106942 },
165 { 4013410, 107422 },
166 { 4091808, 107902 },
167 { 4171435, 108380 },
168 { 4252306, 108858 },
169 { 4334431, 109335 },
170 { 4417825, 109811 },
171 { 4502501, 110287 },
172 { 4588472, 110762 },
173 { 4675750, 111236 },
174 { 4764349, 111709 },
175 { 4854283, 112182 },
176 { 4945564, 112654 },
177 { 5038206, 113126 },
178 { 5132223, 113597 },
179 { 5227627, 114067 },
180 { 5324432, 114537 },
181 { 5422652, 115006 },
182 { 5522299, 115474 },
183 { 5623389, 115942 },
184 { 5725934, 116409 },
185 { 5829948, 116876 },
186 { 5935446, 117342 },
187 { 6042439, 117808 },
188 { 6150943, 118273 },
189 { 6260972, 118738 },
190 { 6372538, 119202 },
191 { 6485657, 119665 },
192 { 6600342, 120128 },
193 { 6716607, 120591 },
194 { 6834467, 121053 },
195 { 6953935, 121514 },
196 { 7075025, 121976 },
197 { 7197752, 122436 },
198 { 7322131, 122896 },
199 { 7448175, 123356 },
200 { 7575898, 123815 },
201 { 7705316, 124274 },
202 { 7836442, 124733 },
203 { 7969291, 125191 },
204 { 8103877, 125648 },
205 { 8240216, 126105 },
206 { 8378321, 126562 },
207 { 8518208, 127018 },
208 { 8659890, 127474 },
209 { 8803384, 127930 },
210 { 8948702, 128385 },
211 { 9095861, 128840 },
212 { 9244875, 129294 },
213 { 9395760, 129748 },
214 { 9548529, 130202 },
215 { 9703198, 130655 },
216 { 9859782, 131108 },
217 { 10018296, 131561 },
218 { 10178755, 132014 },
219 { 10341174, 132466 },
220 { 10505569, 132917 },
221 { 10671954, 133369 },
222 { 10840345, 133820 },
223 { 11010757, 134271 },
224 { 11183206, 134721 },
225 { 11357706, 135171 },
226 { 11534274, 135621 },
227 { 11712924, 136071 },
228 { 11893673, 136520 },
229 { 12076536, 136969 },
230 { 12261527, 137418 },
231 { 12448664, 137867 },
232 { 12637961, 138315 },
233 { 12829435, 138763 },
234 { 13023101, 139211 },
235 { 13218974, 139658 },
236 { 13417071, 140106 },
237 { 13617407, 140553 },
238 { 13819999, 140999 },
239 { 14024862, 141446 },
240 { 14232012, 141892 },
241 { 14441465, 142339 },
242 { 14653238, 142785 },
243 { 14867346, 143230 },
244 { 15083805, 143676 },
245 { 15302632, 144121 },
246 { 15523842, 144566 },
247 { 15747453, 145011 },
248 { 15973479, 145456 },
249 { 16201939, 145900 },
250 { 16432847, 146345 },
251 { 16666221, 146789 },
252 { 16902076, 147233 },
253 { 17140429, 147677 },
254 { 17381297, 148121 },
255 { 17624696, 148564 },
256 { 17870643, 149007 },
257 { 18119154, 149451 },
258 { 18370247, 149894 },
259 { 18623936, 150336 },
260 { 18880241, 150779 },
261 { 19139176, 151222 },
262 { 19400759, 151664 },
263 { 19665007, 152107 },
264 { 19931936, 152549 },
265 { 20201564, 152991 },
266 { 20473907, 153433 },
267 { 20748982, 153875 },
268 { 21026807, 154316 },
269 { 21307399, 154758 },
270 { 21590773, 155199 },
271 { 21876949, 155641 },
272 { 22165941, 156082 },
273 { 22457769, 156523 },
274 { 22752449, 156964 },
275 { 23049999, 157405 },
276 { 23350435, 157846 },
277 { 23653774, 158287 },
278 { 23960036, 158727 },
279 { 24269236, 159168 },
280 { 24581392, 159608 },
281 { 24896521, 160049 },
282 { 25214642, 160489 },
283 { 25535772, 160929 },
284 { 25859927, 161370 },
285 { 26187127, 161810 },
286 { 26517388, 162250 },
287 { 26850728, 162690 },
288 { 27187165, 163130 },
289 { 27526716, 163569 },
290 { 27869400, 164009 },
291 { 28215234, 164449 },
292 { 28564236, 164889 },
293 { 28916423, 165328 },
294 { 29271815, 165768 },
295 { 29630428, 166208 },
296 { 29992281, 166647 },
297 { 30357392, 167087 },
298 { 30725779, 167526 },
299 { 31097459, 167965 },
300 { 31472452, 168405 },
301 { 31850774, 168844 },
302 { 32232445, 169283 },
303 { 32617482, 169723 },
304 { 33005904, 170162 },
305 { 33397730, 170601 },
306 { 33792976, 171041 },
307 { 34191663, 171480 },
308 { 34593807, 171919 },
309 { 34999428, 172358 },
310 { 35408544, 172797 },
311 { 35821174, 173237 },
312 { 36237335, 173676 },
313 { 36657047, 174115 },
314 { 37080329, 174554 },
315 { 37507197, 174993 },
316 { 37937673, 175433 },
317 { 38371773, 175872 },
318 { 38809517, 176311 },
319 { 39250924, 176750 },
320 { 39696012, 177190 },
321 { 40144800, 177629 },
322 { 40597308, 178068 },
323 { 41053553, 178507 },
324 { 41513554, 178947 },
325 { 41977332, 179386 },
326 { 42444904, 179825 },
327 { 42916290, 180265 },
328 { 43391509, 180704 },
329 { 43870579, 181144 },
330 { 44353520, 181583 },
331 { 44840352, 182023 },
332 { 45331092, 182462 },
333 { 45825761, 182902 },
334 { 46324378, 183342 },
335 { 46826961, 183781 },
336 { 47333531, 184221 },
337 { 47844106, 184661 },
338 { 48358706, 185101 },
339 { 48877350, 185541 },
340 { 49400058, 185981 },
341 { 49926849, 186421 },
342 { 50457743, 186861 },
343 { 50992759, 187301 },
344 { 51531916, 187741 },
345 { 52075235, 188181 },
346 { 52622735, 188622 },
347 { 53174435, 189062 },
348 { 53730355, 189502 },
349 { 54290515, 189943 },
350 { 54854935, 190383 },
351 { 55423634, 190824 },
352 { 55996633, 191265 },
353 { 56573950, 191706 },
354 { 57155606, 192146 },
355 { 57741621, 192587 },
356 { 58332014, 193028 },
357 { 58926806, 193470 },
358 { 59526017, 193911 },
359 { 60129666, 194352 },
360 { 60737774, 194793 },
361 { 61350361, 195235 },
362 { 61967446, 195677 },
363 { 62589050, 196118 },
364 { 63215194, 196560 },
365 { 63845897, 197002 },
366 { 64481179, 197444 },
367 { 65121061, 197886 },
368 { 65765563, 198328 },
369 { 66414705, 198770 },
370 { 67068508, 199213 },
371 { 67726992, 199655 },
372 { 68390177, 200098 },
373 { 69058085, 200540 },
374 { 69730735, 200983 },
375 { 70408147, 201426 },
376 { 71090343, 201869 },
377 { 71777343, 202312 },
378 { 72469168, 202755 },
379 { 73165837, 203199 },
380 { 73867373, 203642 },
381 { 74573795, 204086 },
382 { 75285124, 204529 },
383 { 76001380, 204973 },
384 { 76722586, 205417 },
385 { 77448761, 205861 },
386 { 78179926, 206306 },
387 { 78916102, 206750 },
388 { 79657310, 207194 },
389 { 80403571, 207639 },
390 { 81154906, 208084 },
391 { 81911335, 208529 },
392 { 82672880, 208974 },
393 { 83439562, 209419 },
394 { 84211402, 209864 },
395 { 84988421, 210309 },
396 { 85770640, 210755 },
397 { 86558080, 211201 },
398 { 87350762, 211647 },
399 { 88148708, 212093 },
400 { 88951938, 212539 },
401 { 89760475, 212985 },
402 { 90574339, 213432 },
403 { 91393551, 213878 },
404 { 92218133, 214325 },
405 { 93048107, 214772 },
406 { 93883493, 215219 },
407 { 94724314, 215666 },
408 { 95570590, 216114 },
409 { 96422343, 216561 },
410 { 97279594, 217009 },
411 { 98142366, 217457 },
412 { 99010679, 217905 },
413 { 99884556, 218353 },
414 { 100764018, 218801 },
415 { 101649086, 219250 },
416 { 102539782, 219698 },
417 { 103436128, 220147 },
418 { 104338146, 220596 },
419 { 105245857, 221046 },
420 { 106159284, 221495 },
421 { 107078448, 221945 },
422 { 108003370, 222394 },
423 { 108934074, 222844 },
424 { 109870580, 223294 },
425 { 110812910, 223745 },
426 { 111761087, 224195 },
427 { 112715133, 224646 },
428 { 113675069, 225097 },
429 { 114640918, 225548 },
430 { 115612702, 225999 },
431 { 116590442, 226450 },
432 { 117574162, 226902 },
433 { 118563882, 227353 },
434 { 119559626, 227805 },
435 { 120561415, 228258 },
436 { 121569272, 228710 },
437 { 122583219, 229162 },
438 { 123603278, 229615 },
439 { 124629471, 230068 },
440 { 125661822, 230521 },
441 { 126700352, 230974 },
442 { 127745083, 231428 },
443 { 128796039, 231882 },
444 { 129853241, 232336 },
445 { 130916713, 232790 },
446 { 131986475, 233244 },
447 { 133062553, 233699 },
448 { 134144966, 234153 },
449 { 135233739, 234608 },
450 { 136328894, 235064 },
451 { 137430453, 235519 },
452 { 138538440, 235975 },
453 { 139652876, 236430 },
454 { 140773786, 236886 },
455 { 141901190, 237343 },
456 { 143035113, 237799 },
457 { 144175576, 238256 },
458 { 145322604, 238713 },
459 { 146476218, 239170 },
460 { 147636442, 239627 },
461 { 148803298, 240085 },
462 { 149976809, 240542 },
463 { 151156999, 241000 },
464 { 152343890, 241459 },
465 { 153537506, 241917 },
466 { 154737869, 242376 },
467 { 155945002, 242835 },
468 { 157158929, 243294 },
469 { 158379673, 243753 },
470 { 159607257, 244213 },
471 { 160841704, 244673 },
472 { 162083037, 245133 },
473 { 163331279, 245593 },
474 { 164586455, 246054 },
475 { 165848586, 246514 },
476 { 167117696, 246975 },
477 { 168393810, 247437 },
478 { 169676949, 247898 },
479 { 170967138, 248360 },
480 { 172264399, 248822 },
481 { 173568757, 249284 },
482 { 174880235, 249747 },
483 { 176198856, 250209 },
484 { 177524643, 250672 },
485 { 178857621, 251136 },
486 { 180197813, 251599 },
487 { 181545242, 252063 },
488 { 182899933, 252527 },
489 { 184261908, 252991 },
490 { 185631191, 253456 },
491 { 187007807, 253920 },
492 { 188391778, 254385 },
493 { 189783129, 254851 },
494 { 191181884, 255316 },
495 { 192588065, 255782 },
496 { 194001698, 256248 },
497 { 195422805, 256714 },
498 { 196851411, 257181 },
499 { 198287540, 257648 },
500 { 199731215, 258115 },
501 { 201182461, 258582 },
502 { 202641302, 259050 },
503 { 204107760, 259518 },
504 { 205581862, 259986 },
505 { 207063630, 260454 },
506 { 208553088, 260923 },
507 { 210050262, 261392 },
508 { 211555174, 261861 },
509 { 213067849, 262331 },
510 { 214588312, 262800 },
511 { 216116586, 263270 },
512 { 217652696, 263741 },
513 { 219196666, 264211 },
514 { 220748520, 264682 },
515 { 222308282, 265153 },
516 { 223875978, 265625 },
517 { 225451630, 266097 },
518 { 227035265, 266569 },
519 { 228626905, 267041 },
520 { 230226576, 267514 },
521 { 231834302, 267986 },
522 { 233450107, 268460 },
523 { 235074016, 268933 },
524 { 236706054, 269407 },
525 { 238346244, 269881 },
526 { 239994613, 270355 },
527 { 241651183, 270830 },
528 { 243315981, 271305 }
529};
530
531/* Calculate the send rate as per section 3.1 of RFC3448
532
533Returns send rate in bytes per second
534
535Integer maths and lookups are used as not allowed floating point in kernel
536
537The function for Xcalc as per section 3.1 of RFC3448 is:
538
539X = s
540 -------------------------------------------------------------
541 R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
542
543where
544X is the trasmit rate in bytes/second
545s is the packet size in bytes
546R is the round trip time in seconds
547p is the loss event rate, between 0 and 1.0, of the number of loss events
548 as a fraction of the number of packets transmitted
549t_RTO is the TCP retransmission timeout value in seconds
550b is the number of packets acknowledged by a single TCP acknowledgement
551
552we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
553
554X = s
555 -----------------------------------------------------------------------
556 R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
557
558
559which we can break down into:
560
561X = s
562 --------
563 R * f(p)
564
565where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
566
567Function parameters:
568s - bytes
569R - RTT in usecs
570p - loss rate (decimal fraction multiplied by 1,000,000)
571
572Returns Xcalc in bytes per second
573
574DON'T alter this code unless you run test cases against it as the code
575has been manipulated to stop underflow/overlow.
576
577*/
578u32 tfrc_calc_x(u16 s, u32 R, u32 p)
579{
580 int index;
581 u32 f;
582 u64 tmp1, tmp2;
583
584 if (p < TFRC_CALC_X_SPLIT)
585 index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
586 else
587 index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
588
589 if (index < 0)
590 /* p should be 0 unless there is a bug in my code */
591 index = 0;
592
593 if (R == 0)
594 R = 1; /* RTT can't be zero or else divide by zero */
595
596 BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
597
598 if (p >= TFRC_CALC_X_SPLIT)
599 f = tfrc_calc_x_lookup[index][0];
600 else
601 f = tfrc_calc_x_lookup[index][1];
602
603 tmp1 = ((u64)s * 100000000);
604 tmp2 = ((u64)R * (u64)f);
605 do_div(tmp2, 10000);
606 do_div(tmp1, tmp2);
607 /* Don't alter above math unless you test due to overflow on 32 bit */
608
609 return (u32)tmp1;
610}
611
612EXPORT_SYMBOL_GPL(tfrc_calc_x);
613
614/*
615 * args: fvalue - function value to match
616 * returns: p closest to that value
617 *
618 * both fvalue and p are multiplied by 1,000,000 to use ints
619 */
620u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
621{
622 int ctr = 0;
623 int small;
624
625 if (fvalue < tfrc_calc_x_lookup[0][1])
626 return 0;
627
628 if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
629 small = 1;
630 else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
631 return 1000000;
632 else
633 small = 0;
634
635 while (fvalue > tfrc_calc_x_lookup[ctr][small])
636 ctr++;
637
638 if (small)
639 return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
640 else
641 return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
642}
643
644EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644
index 000000000000..95c4630b3b18
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,485 @@
1#ifndef _DCCP_H
2#define _DCCP_H
3/*
4 * net/dccp/dccp.h
5 *
6 * An implementation of the DCCP protocol
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/config.h>
16#include <linux/dccp.h>
17#include <net/snmp.h>
18#include <net/sock.h>
19#include <net/tcp.h>
20
21#ifdef CONFIG_IP_DCCP_DEBUG
22extern int dccp_debug;
23
24#define dccp_pr_debug(format, a...) \
25 do { if (dccp_debug) \
26 printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
27 } while (0)
28#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
29 printk(format, ##a); } while (0)
30#else
31#define dccp_pr_debug(format, a...)
32#define dccp_pr_debug_cat(format, a...)
33#endif
34
35extern struct inet_hashinfo dccp_hashinfo;
36
37extern atomic_t dccp_orphan_count;
38extern int dccp_tw_count;
39extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
40
41extern void dccp_time_wait(struct sock *sk, int state, int timeo);
42
43/* FIXME: Right size this */
44#define DCCP_MAX_OPT_LEN 128
45
46#define DCCP_MAX_PACKET_HDR 32
47
48#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
49
50#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
51 * state, about 60 seconds */
52
53/* draft-ietf-dccp-spec-11.txt initial RTO value */
54#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
55
56/* Maximal interval between probes for local resources. */
57#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
58
59#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
60
61extern struct proto dccp_v4_prot;
62
63/* is seq1 < seq2 ? */
64static inline int before48(const u64 seq1, const u64 seq2)
65{
66 return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
67}
68
69/* is seq1 > seq2 ? */
70static inline int after48(const u64 seq1, const u64 seq2)
71{
72 return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
73}
74
75/* is seq2 <= seq1 <= seq3 ? */
76static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
77{
78 return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
79}
80
81static inline u64 max48(const u64 seq1, const u64 seq2)
82{
83 return after48(seq1, seq2) ? seq1 : seq2;
84}
85
86enum {
87 DCCP_MIB_NUM = 0,
88 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
89 DCCP_MIB_ESTABRESETS, /* EstabResets */
90 DCCP_MIB_CURRESTAB, /* CurrEstab */
91 DCCP_MIB_OUTSEGS, /* OutSegs */
92 DCCP_MIB_OUTRSTS,
93 DCCP_MIB_ABORTONTIMEOUT,
94 DCCP_MIB_TIMEOUTS,
95 DCCP_MIB_ABORTFAILED,
96 DCCP_MIB_PASSIVEOPENS,
97 DCCP_MIB_ATTEMPTFAILS,
98 DCCP_MIB_OUTDATAGRAMS,
99 DCCP_MIB_INERRS,
100 DCCP_MIB_OPTMANDATORYERROR,
101 DCCP_MIB_INVALIDOPT,
102 __DCCP_MIB_MAX
103};
104
105#define DCCP_MIB_MAX __DCCP_MIB_MAX
106struct dccp_mib {
107 unsigned long mibs[DCCP_MIB_MAX];
108} __SNMP_MIB_ALIGN__;
109
110DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
111#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
112#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
113#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
114#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
115#define DCCP_ADD_STATS_BH(field, val) \
116 SNMP_ADD_STATS_BH(dccp_statistics, field, val)
117#define DCCP_ADD_STATS_USER(field, val) \
118 SNMP_ADD_STATS_USER(dccp_statistics, field, val)
119
120extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
121extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
122
123extern int dccp_send_response(struct sock *sk);
124extern void dccp_send_ack(struct sock *sk);
125extern void dccp_send_delayed_ack(struct sock *sk);
126extern void dccp_send_sync(struct sock *sk, const u64 seq,
127 const enum dccp_pkt_type pkt_type);
128
129extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
130extern void dccp_write_space(struct sock *sk);
131
132extern void dccp_init_xmit_timers(struct sock *sk);
133static inline void dccp_clear_xmit_timers(struct sock *sk)
134{
135 inet_csk_clear_xmit_timers(sk);
136}
137
138extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
139
140extern const char *dccp_packet_name(const int type);
141extern const char *dccp_state_name(const int state);
142
143static inline void dccp_set_state(struct sock *sk, const int state)
144{
145 const int oldstate = sk->sk_state;
146
147 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
148 dccp_role(sk), sk,
149 dccp_state_name(oldstate), dccp_state_name(state));
150 WARN_ON(state == oldstate);
151
152 switch (state) {
153 case DCCP_OPEN:
154 if (oldstate != DCCP_OPEN)
155 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
156 break;
157
158 case DCCP_CLOSED:
159 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
160 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
161
162 sk->sk_prot->unhash(sk);
163 if (inet_csk(sk)->icsk_bind_hash != NULL &&
164 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
165 inet_put_port(&dccp_hashinfo, sk);
166 /* fall through */
167 default:
168 if (oldstate == DCCP_OPEN)
169 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
170 }
171
172 /* Change state AFTER socket is unhashed to avoid closed
173 * socket sitting in hash tables.
174 */
175 sk->sk_state = state;
176}
177
178static inline void dccp_done(struct sock *sk)
179{
180 dccp_set_state(sk, DCCP_CLOSED);
181 dccp_clear_xmit_timers(sk);
182
183 sk->sk_shutdown = SHUTDOWN_MASK;
184
185 if (!sock_flag(sk, SOCK_DEAD))
186 sk->sk_state_change(sk);
187 else
188 inet_csk_destroy_sock(sk);
189}
190
191static inline void dccp_openreq_init(struct request_sock *req,
192 struct dccp_sock *dp,
193 struct sk_buff *skb)
194{
195 /*
196 * FIXME: fill in the other req fields from the DCCP options
197 * received
198 */
199 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
200 inet_rsk(req)->acked = 0;
201 req->rcv_wnd = 0;
202}
203
204extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
205
206extern struct sock *dccp_create_openreq_child(struct sock *sk,
207 const struct request_sock *req,
208 const struct sk_buff *skb);
209
210extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
211
212extern void dccp_v4_err(struct sk_buff *skb, u32);
213
214extern int dccp_v4_rcv(struct sk_buff *skb);
215
216extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
217 struct sk_buff *skb,
218 struct request_sock *req,
219 struct dst_entry *dst);
220extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
221 struct request_sock *req,
222 struct request_sock **prev);
223
224extern int dccp_child_process(struct sock *parent, struct sock *child,
225 struct sk_buff *skb);
226extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
227 struct dccp_hdr *dh, unsigned len);
228extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
229 const struct dccp_hdr *dh, const unsigned len);
230
231extern void dccp_close(struct sock *sk, long timeout);
232extern struct sk_buff *dccp_make_response(struct sock *sk,
233 struct dst_entry *dst,
234 struct request_sock *req);
235extern struct sk_buff *dccp_make_reset(struct sock *sk,
236 struct dst_entry *dst,
237 enum dccp_reset_codes code);
238
239extern int dccp_connect(struct sock *sk);
240extern int dccp_disconnect(struct sock *sk, int flags);
241extern int dccp_getsockopt(struct sock *sk, int level, int optname,
242 char __user *optval, int __user *optlen);
243extern int dccp_setsockopt(struct sock *sk, int level, int optname,
244 char __user *optval, int optlen);
245extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
246extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
247 struct msghdr *msg, size_t size);
248extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
249 struct msghdr *msg, size_t len, int nonblock,
250 int flags, int *addr_len);
251extern void dccp_shutdown(struct sock *sk, int how);
252
253extern int dccp_v4_checksum(const struct sk_buff *skb,
254 const u32 saddr, const u32 daddr);
255
256extern int dccp_v4_send_reset(struct sock *sk,
257 enum dccp_reset_codes code);
258extern void dccp_send_close(struct sock *sk, const int active);
259
260struct dccp_skb_cb {
261 __u8 dccpd_type;
262 __u8 dccpd_reset_code;
263 __u8 dccpd_service;
264 __u8 dccpd_ccval;
265 __u64 dccpd_seq;
266 __u64 dccpd_ack_seq;
267 int dccpd_opt_len;
268};
269
270#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
271
272static inline int dccp_non_data_packet(const struct sk_buff *skb)
273{
274 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
275
276 return type == DCCP_PKT_ACK ||
277 type == DCCP_PKT_CLOSE ||
278 type == DCCP_PKT_CLOSEREQ ||
279 type == DCCP_PKT_RESET ||
280 type == DCCP_PKT_SYNC ||
281 type == DCCP_PKT_SYNCACK;
282}
283
284static inline int dccp_packet_without_ack(const struct sk_buff *skb)
285{
286 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
287
288 return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
289}
290
291#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
292#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
293
294static inline void dccp_set_seqno(u64 *seqno, u64 value)
295{
296 if (value > DCCP_MAX_SEQNO)
297 value -= DCCP_MAX_SEQNO + 1;
298 *seqno = value;
299}
300
301static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
302{
303 return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
304}
305
306static inline void dccp_inc_seqno(u64 *seqno)
307{
308 if (++*seqno > DCCP_MAX_SEQNO)
309 *seqno = 0;
310}
311
312static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
313{
314 struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
315 sizeof(*dh));
316
317#if defined(__LITTLE_ENDIAN_BITFIELD)
318 dh->dccph_seq = htonl((gss >> 32)) >> 8;
319#elif defined(__BIG_ENDIAN_BITFIELD)
320 dh->dccph_seq = htonl((gss >> 32));
321#else
322#error "Adjust your <asm/byteorder.h> defines"
323#endif
324 dhx->dccph_seq_low = htonl(gss & 0xffffffff);
325}
326
327static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
328 const u64 gsr)
329{
330#if defined(__LITTLE_ENDIAN_BITFIELD)
331 dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
332#elif defined(__BIG_ENDIAN_BITFIELD)
333 dhack->dccph_ack_nr_high = htonl((gsr >> 32));
334#else
335#error "Adjust your <asm/byteorder.h> defines"
336#endif
337 dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
338}
339
340static inline void dccp_update_gsr(struct sock *sk, u64 seq)
341{
342 struct dccp_sock *dp = dccp_sk(sk);
343
344 dp->dccps_gsr = seq;
345 dccp_set_seqno(&dp->dccps_swl,
346 (dp->dccps_gsr + 1 -
347 (dp->dccps_options.dccpo_sequence_window / 4)));
348 dccp_set_seqno(&dp->dccps_swh,
349 (dp->dccps_gsr +
350 (3 * dp->dccps_options.dccpo_sequence_window) / 4));
351}
352
353static inline void dccp_update_gss(struct sock *sk, u64 seq)
354{
355 struct dccp_sock *dp = dccp_sk(sk);
356
357 dp->dccps_awh = dp->dccps_gss = seq;
358 dccp_set_seqno(&dp->dccps_awl,
359 (dp->dccps_gss -
360 dp->dccps_options.dccpo_sequence_window + 1));
361}
362
363extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
364extern void dccp_insert_option_elapsed_time(struct sock *sk,
365 struct sk_buff *skb,
366 u32 elapsed_time);
367extern void dccp_insert_option_timestamp(struct sock *sk,
368 struct sk_buff *skb);
369extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
370 unsigned char option,
371 const void *value, unsigned char len);
372
373extern struct socket *dccp_ctl_socket;
374
375#define DCCP_ACKPKTS_STATE_RECEIVED 0
376#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6)
377#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
378
379#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */
380#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */
381
382/** struct dccp_ackpkts - acknowledgeable packets
383 *
384 * This data structure is the one defined in the DCCP draft
385 * Appendix A.
386 *
387 * @dccpap_buf_head - circular buffer head
388 * @dccpap_buf_tail - circular buffer tail
389 * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
390 * buffer (i.e. %dccpap_buf_head)
391 * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
392 * by the buffer with State 0
393 *
394 * Additionally, the HC-Receiver must keep some information about the
395 * Ack Vectors it has recently sent. For each packet sent carrying an
396 * Ack Vector, it remembers four variables:
397 *
398 * @dccpap_ack_seqno - the Sequence Number used for the packet
399 * (HC-Receiver seqno)
400 * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
401 * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
402 * (HC-Sender seqno)
403 * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
404 *
405 * @dccpap_buf_len - circular buffer length
406 * @dccpap_time - the time in usecs
407 * @dccpap_buf - circular buffer of acknowledgeable packets
408 */
409struct dccp_ackpkts {
410 unsigned int dccpap_buf_head;
411 unsigned int dccpap_buf_tail;
412 u64 dccpap_buf_ackno;
413 u64 dccpap_ack_seqno;
414 u64 dccpap_ack_ackno;
415 unsigned int dccpap_ack_ptr;
416 unsigned int dccpap_buf_vector_len;
417 unsigned int dccpap_ack_vector_len;
418 unsigned int dccpap_buf_len;
419 struct timeval dccpap_time;
420 u8 dccpap_buf_nonce;
421 u8 dccpap_ack_nonce;
422 u8 dccpap_buf[0];
423};
424
425extern struct dccp_ackpkts *
426 dccp_ackpkts_alloc(unsigned int len,
427 const unsigned int __nocast priority);
428extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
429extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
430 u64 ackno, u8 state);
431extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
432 struct sock *sk, u64 ackno);
433
434extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
435
436static inline suseconds_t timeval_usecs(const struct timeval *tv)
437{
438 return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
439}
440
441static inline suseconds_t timeval_delta(const struct timeval *large,
442 const struct timeval *small)
443{
444 time_t secs = large->tv_sec - small->tv_sec;
445 suseconds_t usecs = large->tv_usec - small->tv_usec;
446
447 if (usecs < 0) {
448 secs--;
449 usecs += USEC_PER_SEC;
450 }
451 return secs * USEC_PER_SEC + usecs;
452}
453
454static inline void timeval_add_usecs(struct timeval *tv,
455 const suseconds_t usecs)
456{
457 tv->tv_usec += usecs;
458 while (tv->tv_usec >= USEC_PER_SEC) {
459 tv->tv_sec++;
460 tv->tv_usec -= USEC_PER_SEC;
461 }
462}
463
464static inline void timeval_sub_usecs(struct timeval *tv,
465 const suseconds_t usecs)
466{
467 tv->tv_usec -= usecs;
468 while (tv->tv_usec < 0) {
469 tv->tv_sec--;
470 tv->tv_usec += USEC_PER_SEC;
471 }
472}
473
474#ifdef CONFIG_IP_DCCP_DEBUG
475extern void dccp_ackvector_print(const u64 ackno,
476 const unsigned char *vector, int len);
477extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
478#else
479static inline void dccp_ackvector_print(const u64 ackno,
480 const unsigned char *vector,
481 int len) { }
482static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
483#endif
484
485#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
new file mode 100644
index 000000000000..f675d8e642d3
--- /dev/null
+++ b/net/dccp/diag.c
@@ -0,0 +1,71 @@
1/*
2 * net/dccp/diag.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@mandriva.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13
14#include <linux/module.h>
15#include <linux/inet_diag.h>
16
17#include "ccid.h"
18#include "dccp.h"
19
20static void dccp_get_info(struct sock *sk, struct tcp_info *info)
21{
22 struct dccp_sock *dp = dccp_sk(sk);
23 const struct inet_connection_sock *icsk = inet_csk(sk);
24
25 memset(info, 0, sizeof(*info));
26
27 info->tcpi_state = sk->sk_state;
28 info->tcpi_retransmits = icsk->icsk_retransmits;
29 info->tcpi_probes = icsk->icsk_probes_out;
30 info->tcpi_backoff = icsk->icsk_backoff;
31 info->tcpi_pmtu = dp->dccps_pmtu_cookie;
32
33 if (dp->dccps_options.dccpo_send_ack_vector)
34 info->tcpi_options |= TCPI_OPT_SACK;
35
36 ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
37 ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
38}
39
40static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
41 void *_info)
42{
43 r->idiag_rqueue = r->idiag_wqueue = 0;
44
45 if (_info != NULL)
46 dccp_get_info(sk, _info);
47}
48
49static struct inet_diag_handler dccp_diag_handler = {
50 .idiag_hashinfo = &dccp_hashinfo,
51 .idiag_get_info = dccp_diag_get_info,
52 .idiag_type = DCCPDIAG_GETSOCK,
53 .idiag_info_size = sizeof(struct tcp_info),
54};
55
56static int __init dccp_diag_init(void)
57{
58 return inet_diag_register(&dccp_diag_handler);
59}
60
61static void __exit dccp_diag_fini(void)
62{
63 inet_diag_unregister(&dccp_diag_handler);
64}
65
66module_init(dccp_diag_init);
67module_exit(dccp_diag_fini);
68
69MODULE_LICENSE("GPL");
70MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
71MODULE_DESCRIPTION("DCCP inet_diag handler");
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644
index 000000000000..c60bc3433f5e
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,600 @@
1/*
2 * net/dccp/input.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static void dccp_fin(struct sock *sk, struct sk_buff *skb)
23{
24 sk->sk_shutdown |= RCV_SHUTDOWN;
25 sock_set_flag(sk, SOCK_DONE);
26 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
27 __skb_queue_tail(&sk->sk_receive_queue, skb);
28 skb_set_owner_r(skb, sk);
29 sk->sk_data_ready(sk, 0);
30}
31
32static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
33{
34 dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
35 dccp_fin(sk, skb);
36 dccp_set_state(sk, DCCP_CLOSED);
37 sk_wake_async(sk, 1, POLL_HUP);
38}
39
40static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
41{
42 /*
43 * Step 7: Check for unexpected packet types
44 * If (S.is_server and P.type == CloseReq)
45 * Send Sync packet acknowledging P.seqno
46 * Drop packet and return
47 */
48 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
49 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
50 return;
51 }
52
53 dccp_set_state(sk, DCCP_CLOSING);
54 dccp_send_close(sk, 0);
55}
56
57static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
58{
59 struct dccp_sock *dp = dccp_sk(sk);
60
61 if (dp->dccps_options.dccpo_send_ack_vector)
62 dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
63 DCCP_SKB_CB(skb)->dccpd_ack_seq);
64}
65
66static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
67{
68 const struct dccp_hdr *dh = dccp_hdr(skb);
69 struct dccp_sock *dp = dccp_sk(sk);
70 u64 lswl, lawl;
71
72 /*
73 * Step 5: Prepare sequence numbers for Sync
74 * If P.type == Sync or P.type == SyncAck,
75 * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
76 * / * P is valid, so update sequence number variables
77 * accordingly. After this update, P will pass the tests
78 * in Step 6. A SyncAck is generated if necessary in
79 * Step 15 * /
80 * Update S.GSR, S.SWL, S.SWH
81 * Otherwise,
82 * Drop packet and return
83 */
84 if (dh->dccph_type == DCCP_PKT_SYNC ||
85 dh->dccph_type == DCCP_PKT_SYNCACK) {
86 if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
87 dp->dccps_awl, dp->dccps_awh) &&
88 !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
89 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
90 else
91 return -1;
92 }
93
94 /*
95 * Step 6: Check sequence numbers
96 * Let LSWL = S.SWL and LAWL = S.AWL
97 * If P.type == CloseReq or P.type == Close or P.type == Reset,
98 * LSWL := S.GSR + 1, LAWL := S.GAR
99 * If LSWL <= P.seqno <= S.SWH
100 * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
101 * Update S.GSR, S.SWL, S.SWH
102 * If P.type != Sync,
103 * Update S.GAR
104 * Otherwise,
105 * Send Sync packet acknowledging P.seqno
106 * Drop packet and return
107 */
108 lswl = dp->dccps_swl;
109 lawl = dp->dccps_awl;
110
111 if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
112 dh->dccph_type == DCCP_PKT_CLOSE ||
113 dh->dccph_type == DCCP_PKT_RESET) {
114 lswl = dp->dccps_gsr;
115 dccp_inc_seqno(&lswl);
116 lawl = dp->dccps_gar;
117 }
118
119 if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
120 (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
121 between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
122 lawl, dp->dccps_awh))) {
123 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
124
125 if (dh->dccph_type != DCCP_PKT_SYNC &&
126 (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
127 DCCP_PKT_WITHOUT_ACK_SEQ))
128 dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
129 } else {
130 LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
131 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
132 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
133 "sending SYNC...\n",
134 dccp_packet_name(dh->dccph_type),
135 (unsigned long long) lswl,
136 (unsigned long long)
137 DCCP_SKB_CB(skb)->dccpd_seq,
138 (unsigned long long) dp->dccps_swh,
139 (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
140 DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
141 (unsigned long long) lawl,
142 (unsigned long long)
143 DCCP_SKB_CB(skb)->dccpd_ack_seq,
144 (unsigned long long) dp->dccps_awh);
145 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
146 return -1;
147 }
148
149 return 0;
150}
151
152int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
153 const struct dccp_hdr *dh, const unsigned len)
154{
155 struct dccp_sock *dp = dccp_sk(sk);
156
157 if (dccp_check_seqno(sk, skb))
158 goto discard;
159
160 if (dccp_parse_options(sk, skb))
161 goto discard;
162
163 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
164 dccp_event_ack_recv(sk, skb);
165
166 /*
167 * FIXME: check ECN to see if we should use
168 * DCCP_ACKPKTS_STATE_ECN_MARKED
169 */
170 if (dp->dccps_options.dccpo_send_ack_vector) {
171 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
172
173 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
174 DCCP_SKB_CB(skb)->dccpd_seq,
175 DCCP_ACKPKTS_STATE_RECEIVED)) {
176 LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
177 "packets buffer full!\n");
178 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
179 inet_csk_schedule_ack(sk);
180 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
181 TCP_DELACK_MIN,
182 DCCP_RTO_MAX);
183 goto discard;
184 }
185
186 /*
187 * FIXME: this activation is probably wrong, have to study more
188 * TCP delack machinery and how it fits into DCCP draft, but
189 * for now it kinda "works" 8)
190 */
191 if (!inet_csk_ack_scheduled(sk)) {
192 inet_csk_schedule_ack(sk);
193 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
194 DCCP_RTO_MAX);
195 }
196 }
197
198 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
199 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
200
201 switch (dccp_hdr(skb)->dccph_type) {
202 case DCCP_PKT_DATAACK:
203 case DCCP_PKT_DATA:
204 /*
205 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
206 * option if it is.
207 */
208 __skb_pull(skb, dh->dccph_doff * 4);
209 __skb_queue_tail(&sk->sk_receive_queue, skb);
210 skb_set_owner_r(skb, sk);
211 sk->sk_data_ready(sk, 0);
212 return 0;
213 case DCCP_PKT_ACK:
214 goto discard;
215 case DCCP_PKT_RESET:
216 /*
217 * Step 9: Process Reset
218 * If P.type == Reset,
219 * Tear down connection
220 * S.state := TIMEWAIT
221 * Set TIMEWAIT timer
222 * Drop packet and return
223 */
224 dccp_fin(sk, skb);
225 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
226 return 0;
227 case DCCP_PKT_CLOSEREQ:
228 dccp_rcv_closereq(sk, skb);
229 goto discard;
230 case DCCP_PKT_CLOSE:
231 dccp_rcv_close(sk, skb);
232 return 0;
233 case DCCP_PKT_REQUEST:
234 /* Step 7
235 * or (S.is_server and P.type == Response)
236 * or (S.is_client and P.type == Request)
237 * or (S.state >= OPEN and P.type == Request
238 * and P.seqno >= S.OSR)
239 * or (S.state >= OPEN and P.type == Response
240 * and P.seqno >= S.OSR)
241 * or (S.state == RESPOND and P.type == Data),
242 * Send Sync packet acknowledging P.seqno
243 * Drop packet and return
244 */
245 if (dp->dccps_role != DCCP_ROLE_LISTEN)
246 goto send_sync;
247 goto check_seq;
248 case DCCP_PKT_RESPONSE:
249 if (dp->dccps_role != DCCP_ROLE_CLIENT)
250 goto send_sync;
251check_seq:
252 if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
253send_sync:
254 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
255 DCCP_PKT_SYNC);
256 }
257 break;
258 case DCCP_PKT_SYNC:
259 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
260 DCCP_PKT_SYNCACK);
261 /*
262 * From the draft:
263 *
264 * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
265 * MAY have non-zero-length application data areas, whose
266 * contents * receivers MUST ignore.
267 */
268 goto discard;
269 }
270
271 DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
272discard:
273 __kfree_skb(skb);
274 return 0;
275}
276
277static int dccp_rcv_request_sent_state_process(struct sock *sk,
278 struct sk_buff *skb,
279 const struct dccp_hdr *dh,
280 const unsigned len)
281{
282 /*
283 * Step 4: Prepare sequence numbers in REQUEST
284 * If S.state == REQUEST,
285 * If (P.type == Response or P.type == Reset)
286 * and S.AWL <= P.ackno <= S.AWH,
287 * / * Set sequence number variables corresponding to the
288 * other endpoint, so P will pass the tests in Step 6 * /
289 * Set S.GSR, S.ISR, S.SWL, S.SWH
290 * / * Response processing continues in Step 10; Reset
291 * processing continues in Step 9 * /
292 */
293 if (dh->dccph_type == DCCP_PKT_RESPONSE) {
294 const struct inet_connection_sock *icsk = inet_csk(sk);
295 struct dccp_sock *dp = dccp_sk(sk);
296
297 /* Stop the REQUEST timer */
298 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
299 BUG_TRAP(sk->sk_send_head != NULL);
300 __kfree_skb(sk->sk_send_head);
301 sk->sk_send_head = NULL;
302
303 if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
304 dp->dccps_awl, dp->dccps_awh)) {
305 dccp_pr_debug("invalid ackno: S.AWL=%llu, "
306 "P.ackno=%llu, S.AWH=%llu \n",
307 (unsigned long long)dp->dccps_awl,
308 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
309 (unsigned long long)dp->dccps_awh);
310 goto out_invalid_packet;
311 }
312
313 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
314 dccp_update_gsr(sk, dp->dccps_isr);
315 /*
316 * SWL and AWL are initially adjusted so that they are not less than
317 * the initial Sequence Numbers received and sent, respectively:
318 * SWL := max(GSR + 1 - floor(W/4), ISR),
319 * AWL := max(GSS - W' + 1, ISS).
320 * These adjustments MUST be applied only at the beginning of the
321 * connection.
322 *
323 * AWL was adjusted in dccp_v4_connect -acme
324 */
325 dccp_set_seqno(&dp->dccps_swl,
326 max48(dp->dccps_swl, dp->dccps_isr));
327
328 if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
329 ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
330 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
331 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
332 /* FIXME: send appropriate RESET code */
333 goto out_invalid_packet;
334 }
335
336 dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
337
338 /*
339 * Step 10: Process REQUEST state (second part)
340 * If S.state == REQUEST,
341 * / * If we get here, P is a valid Response from the
342 * server (see Step 4), and we should move to
343 * PARTOPEN state. PARTOPEN means send an Ack,
344 * don't send Data packets, retransmit Acks
345 * periodically, and always include any Init Cookie
346 * from the Response * /
347 * S.state := PARTOPEN
348 * Set PARTOPEN timer
349 * Continue with S.state == PARTOPEN
350 * / * Step 12 will send the Ack completing the
351 * three-way handshake * /
352 */
353 dccp_set_state(sk, DCCP_PARTOPEN);
354
355 /* Make sure socket is routed, for correct metrics. */
356 inet_sk_rebuild_header(sk);
357
358 if (!sock_flag(sk, SOCK_DEAD)) {
359 sk->sk_state_change(sk);
360 sk_wake_async(sk, 0, POLL_OUT);
361 }
362
363 if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
364 icsk->icsk_accept_queue.rskq_defer_accept) {
365 /* Save one ACK. Data will be ready after
366 * several ticks, if write_pending is set.
367 *
368 * It may be deleted, but with this feature tcpdumps
369 * look so _wonderfully_ clever, that I was not able
370 * to stand against the temptation 8) --ANK
371 */
372 /*
373 * OK, in DCCP we can as well do a similar trick, its
374 * even in the draft, but there is no need for us to
375 * schedule an ack here, as dccp_sendmsg does this for
376 * us, also stated in the draft. -acme
377 */
378 __kfree_skb(skb);
379 return 0;
380 }
381 dccp_send_ack(sk);
382 return -1;
383 }
384
385out_invalid_packet:
386 return 1; /* dccp_v4_do_rcv will send a reset, but...
387 FIXME: the reset code should be
388 DCCP_RESET_CODE_PACKET_ERROR */
389}
390
391static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
392 struct sk_buff *skb,
393 const struct dccp_hdr *dh,
394 const unsigned len)
395{
396 int queued = 0;
397
398 switch (dh->dccph_type) {
399 case DCCP_PKT_RESET:
400 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
401 break;
402 case DCCP_PKT_DATAACK:
403 case DCCP_PKT_ACK:
404 /*
405 * FIXME: we should be reseting the PARTOPEN (DELACK) timer
406 * here but only if we haven't used the DELACK timer for
407 * something else, like sending a delayed ack for a TIMESTAMP
408 * echo, etc, for now were not clearing it, sending an extra
409 * ACK when there is nothing else to do in DELACK is not a big
410 * deal after all.
411 */
412
413 /* Stop the PARTOPEN timer */
414 if (sk->sk_state == DCCP_PARTOPEN)
415 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
416
417 dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
418 dccp_set_state(sk, DCCP_OPEN);
419
420 if (dh->dccph_type == DCCP_PKT_DATAACK) {
421 dccp_rcv_established(sk, skb, dh, len);
422 queued = 1; /* packet was queued
423 (by dccp_rcv_established) */
424 }
425 break;
426 }
427
428 return queued;
429}
430
431int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
432 struct dccp_hdr *dh, unsigned len)
433{
434 struct dccp_sock *dp = dccp_sk(sk);
435 const int old_state = sk->sk_state;
436 int queued = 0;
437
438 /*
439 * Step 3: Process LISTEN state
440 * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
441 *
442 * If S.state == LISTEN,
443 * If P.type == Request or P contains a valid Init Cookie
444 * option,
445 * * Must scan the packet's options to check for an Init
446 * Cookie. Only the Init Cookie is processed here,
447 * however; other options are processed in Step 8. This
448 * scan need only be performed if the endpoint uses Init
449 * Cookies *
450 * * Generate a new socket and switch to that socket *
451 * Set S := new socket for this port pair
452 * S.state = RESPOND
453 * Choose S.ISS (initial seqno) or set from Init Cookie
454 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
455 * Continue with S.state == RESPOND
456 * * A Response packet will be generated in Step 11 *
457 * Otherwise,
458 * Generate Reset(No Connection) unless P.type == Reset
459 * Drop packet and return
460 *
461 * NOTE: the check for the packet types is done in
462 * dccp_rcv_state_process
463 */
464 if (sk->sk_state == DCCP_LISTEN) {
465 if (dh->dccph_type == DCCP_PKT_REQUEST) {
466 if (dccp_v4_conn_request(sk, skb) < 0)
467 return 1;
468
469 /* FIXME: do congestion control initialization */
470 goto discard;
471 }
472 if (dh->dccph_type == DCCP_PKT_RESET)
473 goto discard;
474
475 /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
476 return 1;
477 }
478
479 if (sk->sk_state != DCCP_REQUESTING) {
480 if (dccp_check_seqno(sk, skb))
481 goto discard;
482
483 /*
484 * Step 8: Process options and mark acknowledgeable
485 */
486 if (dccp_parse_options(sk, skb))
487 goto discard;
488
489 if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
490 DCCP_PKT_WITHOUT_ACK_SEQ)
491 dccp_event_ack_recv(sk, skb);
492
493 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
494 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
495
496 /*
497 * FIXME: check ECN to see if we should use
498 * DCCP_ACKPKTS_STATE_ECN_MARKED
499 */
500 if (dp->dccps_options.dccpo_send_ack_vector) {
501 if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
502 DCCP_SKB_CB(skb)->dccpd_seq,
503 DCCP_ACKPKTS_STATE_RECEIVED))
504 goto discard;
505 /*
506 * FIXME: this activation is probably wrong, have to
507 * study more TCP delack machinery and how it fits into
508 * DCCP draft, but for now it kinda "works" 8)
509 */
510 if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
511 DCCP_MAX_SEQNO + 1) &&
512 !inet_csk_ack_scheduled(sk)) {
513 inet_csk_schedule_ack(sk);
514 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
515 TCP_DELACK_MIN,
516 DCCP_RTO_MAX);
517 }
518 }
519 }
520
521 /*
522 * Step 9: Process Reset
523 * If P.type == Reset,
524 * Tear down connection
525 * S.state := TIMEWAIT
526 * Set TIMEWAIT timer
527 * Drop packet and return
528 */
529 if (dh->dccph_type == DCCP_PKT_RESET) {
530 /*
531 * Queue the equivalent of TCP fin so that dccp_recvmsg
532 * exits the loop
533 */
534 dccp_fin(sk, skb);
535 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
536 return 0;
537 /*
538 * Step 7: Check for unexpected packet types
539 * If (S.is_server and P.type == CloseReq)
540 * or (S.is_server and P.type == Response)
541 * or (S.is_client and P.type == Request)
542 * or (S.state == RESPOND and P.type == Data),
543 * Send Sync packet acknowledging P.seqno
544 * Drop packet and return
545 */
546 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
547 (dh->dccph_type == DCCP_PKT_RESPONSE ||
548 dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
549 (dp->dccps_role == DCCP_ROLE_CLIENT &&
550 dh->dccph_type == DCCP_PKT_REQUEST) ||
551 (sk->sk_state == DCCP_RESPOND &&
552 dh->dccph_type == DCCP_PKT_DATA)) {
553 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
554 DCCP_PKT_SYNC);
555 goto discard;
556 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
557 dccp_rcv_closereq(sk, skb);
558 goto discard;
559 } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
560 dccp_rcv_close(sk, skb);
561 return 0;
562 }
563
564 switch (sk->sk_state) {
565 case DCCP_CLOSED:
566 return 1;
567
568 case DCCP_REQUESTING:
569 /* FIXME: do congestion control initialization */
570
571 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
572 if (queued >= 0)
573 return queued;
574
575 __kfree_skb(skb);
576 return 0;
577
578 case DCCP_RESPOND:
579 case DCCP_PARTOPEN:
580 queued = dccp_rcv_respond_partopen_state_process(sk, skb,
581 dh, len);
582 break;
583 }
584
585 if (dh->dccph_type == DCCP_PKT_ACK ||
586 dh->dccph_type == DCCP_PKT_DATAACK) {
587 switch (old_state) {
588 case DCCP_PARTOPEN:
589 sk->sk_state_change(sk);
590 sk_wake_async(sk, 0, POLL_OUT);
591 break;
592 }
593 }
594
595 if (!queued) {
596discard:
597 __kfree_skb(skb);
598 }
599 return 0;
600}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644
index 000000000000..2afaa464e7f0
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1353 @@
1/*
2 * net/dccp/ipv4.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/icmp.h>
16#include <linux/module.h>
17#include <linux/skbuff.h>
18#include <linux/random.h>
19
20#include <net/icmp.h>
21#include <net/inet_hashtables.h>
22#include <net/sock.h>
23#include <net/tcp_states.h>
24#include <net/xfrm.h>
25
26#include "ccid.h"
27#include "dccp.h"
28
29struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
30 .lhash_lock = RW_LOCK_UNLOCKED,
31 .lhash_users = ATOMIC_INIT(0),
32 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
33 .portalloc_lock = SPIN_LOCK_UNLOCKED,
34 .port_rover = 1024 - 1,
35};
36
37EXPORT_SYMBOL_GPL(dccp_hashinfo);
38
39static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
40{
41 return inet_csk_get_port(&dccp_hashinfo, sk, snum);
42}
43
44static void dccp_v4_hash(struct sock *sk)
45{
46 inet_hash(&dccp_hashinfo, sk);
47}
48
49static void dccp_v4_unhash(struct sock *sk)
50{
51 inet_unhash(&dccp_hashinfo, sk);
52}
53
54/* called with local bh disabled */
55static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
56 struct inet_timewait_sock **twp)
57{
58 struct inet_sock *inet = inet_sk(sk);
59 const u32 daddr = inet->rcv_saddr;
60 const u32 saddr = inet->daddr;
61 const int dif = sk->sk_bound_dev_if;
62 INET_ADDR_COOKIE(acookie, saddr, daddr)
63 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
64 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
65 dccp_hashinfo.ehash_size);
66 struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
67 const struct sock *sk2;
68 const struct hlist_node *node;
69 struct inet_timewait_sock *tw;
70
71 write_lock(&head->lock);
72
73 /* Check TIME-WAIT sockets first. */
74 sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
75 tw = inet_twsk(sk2);
76
77 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
78 goto not_unique;
79 }
80 tw = NULL;
81
82 /* And established part... */
83 sk_for_each(sk2, node, &head->chain) {
84 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
85 goto not_unique;
86 }
87
88 /* Must record num and sport now. Otherwise we will see
89 * in hash table socket with a funny identity. */
90 inet->num = lport;
91 inet->sport = htons(lport);
92 sk->sk_hashent = hash;
93 BUG_TRAP(sk_unhashed(sk));
94 __sk_add_node(sk, &head->chain);
95 sock_prot_inc_use(sk->sk_prot);
96 write_unlock(&head->lock);
97
98 if (twp != NULL) {
99 *twp = tw;
100 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
101 } else if (tw != NULL) {
102 /* Silly. Should hash-dance instead... */
103 inet_twsk_deschedule(tw, &dccp_death_row);
104 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
105
106 inet_twsk_put(tw);
107 }
108
109 return 0;
110
111not_unique:
112 write_unlock(&head->lock);
113 return -EADDRNOTAVAIL;
114}
115
116/*
117 * Bind a port for a connect operation and hash it.
118 */
119static int dccp_v4_hash_connect(struct sock *sk)
120{
121 const unsigned short snum = inet_sk(sk)->num;
122 struct inet_bind_hashbucket *head;
123 struct inet_bind_bucket *tb;
124 int ret;
125
126 if (snum == 0) {
127 int rover;
128 int low = sysctl_local_port_range[0];
129 int high = sysctl_local_port_range[1];
130 int remaining = (high - low) + 1;
131 struct hlist_node *node;
132 struct inet_timewait_sock *tw = NULL;
133
134 local_bh_disable();
135
136 /* TODO. Actually it is not so bad idea to remove
137 * dccp_hashinfo.portalloc_lock before next submission to
138 * Linus.
139 * As soon as we touch this place at all it is time to think.
140 *
141 * Now it protects single _advisory_ variable
142 * dccp_hashinfo.port_rover, hence it is mostly useless.
143 * Code will work nicely if we just delete it, but
144 * I am afraid in contented case it will work not better or
145 * even worse: another cpu just will hit the same bucket
146 * and spin there.
147 * So some cpu salt could remove both contention and
148 * memory pingpong. Any ideas how to do this in a nice way?
149 */
150 spin_lock(&dccp_hashinfo.portalloc_lock);
151 rover = dccp_hashinfo.port_rover;
152
153 do {
154 rover++;
155 if ((rover < low) || (rover > high))
156 rover = low;
157 head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
158 dccp_hashinfo.bhash_size)];
159 spin_lock(&head->lock);
160
161 /* Does not bother with rcv_saddr checks,
162 * because the established check is already
163 * unique enough.
164 */
165 inet_bind_bucket_for_each(tb, node, &head->chain) {
166 if (tb->port == rover) {
167 BUG_TRAP(!hlist_empty(&tb->owners));
168 if (tb->fastreuse >= 0)
169 goto next_port;
170 if (!__dccp_v4_check_established(sk,
171 rover,
172 &tw))
173 goto ok;
174 goto next_port;
175 }
176 }
177
178 tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
179 head, rover);
180 if (tb == NULL) {
181 spin_unlock(&head->lock);
182 break;
183 }
184 tb->fastreuse = -1;
185 goto ok;
186
187 next_port:
188 spin_unlock(&head->lock);
189 } while (--remaining > 0);
190 dccp_hashinfo.port_rover = rover;
191 spin_unlock(&dccp_hashinfo.portalloc_lock);
192
193 local_bh_enable();
194
195 return -EADDRNOTAVAIL;
196
197ok:
198 /* All locks still held and bhs disabled */
199 dccp_hashinfo.port_rover = rover;
200 spin_unlock(&dccp_hashinfo.portalloc_lock);
201
202 inet_bind_hash(sk, tb, rover);
203 if (sk_unhashed(sk)) {
204 inet_sk(sk)->sport = htons(rover);
205 __inet_hash(&dccp_hashinfo, sk, 0);
206 }
207 spin_unlock(&head->lock);
208
209 if (tw != NULL) {
210 inet_twsk_deschedule(tw, &dccp_death_row);
211 inet_twsk_put(tw);
212 }
213
214 ret = 0;
215 goto out;
216 }
217
218 head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
219 dccp_hashinfo.bhash_size)];
220 tb = inet_csk(sk)->icsk_bind_hash;
221 spin_lock_bh(&head->lock);
222 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
223 __inet_hash(&dccp_hashinfo, sk, 0);
224 spin_unlock_bh(&head->lock);
225 return 0;
226 } else {
227 spin_unlock(&head->lock);
228 /* No definite answer... Walk to established hash table */
229 ret = __dccp_v4_check_established(sk, snum, NULL);
230out:
231 local_bh_enable();
232 return ret;
233 }
234}
235
236static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
237 int addr_len)
238{
239 struct inet_sock *inet = inet_sk(sk);
240 struct dccp_sock *dp = dccp_sk(sk);
241 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
242 struct rtable *rt;
243 u32 daddr, nexthop;
244 int tmp;
245 int err;
246
247 dp->dccps_role = DCCP_ROLE_CLIENT;
248
249 if (addr_len < sizeof(struct sockaddr_in))
250 return -EINVAL;
251
252 if (usin->sin_family != AF_INET)
253 return -EAFNOSUPPORT;
254
255 nexthop = daddr = usin->sin_addr.s_addr;
256 if (inet->opt != NULL && inet->opt->srr) {
257 if (daddr == 0)
258 return -EINVAL;
259 nexthop = inet->opt->faddr;
260 }
261
262 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
263 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
264 IPPROTO_DCCP,
265 inet->sport, usin->sin_port, sk);
266 if (tmp < 0)
267 return tmp;
268
269 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
270 ip_rt_put(rt);
271 return -ENETUNREACH;
272 }
273
274 if (inet->opt == NULL || !inet->opt->srr)
275 daddr = rt->rt_dst;
276
277 if (inet->saddr == 0)
278 inet->saddr = rt->rt_src;
279 inet->rcv_saddr = inet->saddr;
280
281 inet->dport = usin->sin_port;
282 inet->daddr = daddr;
283
284 dp->dccps_ext_header_len = 0;
285 if (inet->opt != NULL)
286 dp->dccps_ext_header_len = inet->opt->optlen;
287 /*
288 * Socket identity is still unknown (sport may be zero).
289 * However we set state to DCCP_REQUESTING and not releasing socket
290 * lock select source port, enter ourselves into the hash tables and
291 * complete initialization after this.
292 */
293 dccp_set_state(sk, DCCP_REQUESTING);
294 err = dccp_v4_hash_connect(sk);
295 if (err != 0)
296 goto failure;
297
298 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
299 if (err != 0)
300 goto failure;
301
302 /* OK, now commit destination to socket. */
303 sk_setup_caps(sk, &rt->u.dst);
304
305 dp->dccps_gar =
306 dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
307 inet->daddr,
308 inet->sport,
309 usin->sin_port);
310 dccp_update_gss(sk, dp->dccps_iss);
311
312 /*
313 * SWL and AWL are initially adjusted so that they are not less than
314 * the initial Sequence Numbers received and sent, respectively:
315 * SWL := max(GSR + 1 - floor(W/4), ISR),
316 * AWL := max(GSS - W' + 1, ISS).
317 * These adjustments MUST be applied only at the beginning of the
318 * connection.
319 */
320 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
321
322 inet->id = dp->dccps_iss ^ jiffies;
323
324 err = dccp_connect(sk);
325 rt = NULL;
326 if (err != 0)
327 goto failure;
328out:
329 return err;
330failure:
331 /*
332 * This unhashes the socket and releases the local port, if necessary.
333 */
334 dccp_set_state(sk, DCCP_CLOSED);
335 ip_rt_put(rt);
336 sk->sk_route_caps = 0;
337 inet->dport = 0;
338 goto out;
339}
340
341/*
342 * This routine does path mtu discovery as defined in RFC1191.
343 */
344static inline void dccp_do_pmtu_discovery(struct sock *sk,
345 const struct iphdr *iph,
346 u32 mtu)
347{
348 struct dst_entry *dst;
349 const struct inet_sock *inet = inet_sk(sk);
350 const struct dccp_sock *dp = dccp_sk(sk);
351
352 /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
353 * send out by Linux are always < 576bytes so they should go through
354 * unfragmented).
355 */
356 if (sk->sk_state == DCCP_LISTEN)
357 return;
358
359 /* We don't check in the destentry if pmtu discovery is forbidden
360 * on this route. We just assume that no packet_to_big packets
361 * are send back when pmtu discovery is not active.
362 * There is a small race when the user changes this flag in the
363 * route, but I think that's acceptable.
364 */
365 if ((dst = __sk_dst_check(sk, 0)) == NULL)
366 return;
367
368 dst->ops->update_pmtu(dst, mtu);
369
370 /* Something is about to be wrong... Remember soft error
371 * for the case, if this connection will not able to recover.
372 */
373 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
374 sk->sk_err_soft = EMSGSIZE;
375
376 mtu = dst_mtu(dst);
377
378 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
379 dp->dccps_pmtu_cookie > mtu) {
380 dccp_sync_mss(sk, mtu);
381
382 /*
383 * From: draft-ietf-dccp-spec-11.txt
384 *
385 * DCCP-Sync packets are the best choice for upward
386 * probing, since DCCP-Sync probes do not risk application
387 * data loss.
388 */
389 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
390 } /* else let the usual retransmit timer handle it */
391}
392
393static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
394{
395 int err;
396 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
397 const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
398 sizeof(struct dccp_hdr_ext) +
399 sizeof(struct dccp_hdr_ack_bits);
400 struct sk_buff *skb;
401
402 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
403 return;
404
405 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
406 if (skb == NULL)
407 return;
408
409 /* Reserve space for headers. */
410 skb_reserve(skb, MAX_DCCP_HEADER);
411
412 skb->dst = dst_clone(rxskb->dst);
413
414 skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
415 dh = dccp_hdr(skb);
416 memset(dh, 0, dccp_hdr_ack_len);
417
418 /* Build DCCP header and checksum it. */
419 dh->dccph_type = DCCP_PKT_ACK;
420 dh->dccph_sport = rxdh->dccph_dport;
421 dh->dccph_dport = rxdh->dccph_sport;
422 dh->dccph_doff = dccp_hdr_ack_len / 4;
423 dh->dccph_x = 1;
424
425 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
426 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
427 DCCP_SKB_CB(rxskb)->dccpd_seq);
428
429 bh_lock_sock(dccp_ctl_socket->sk);
430 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
431 rxskb->nh.iph->daddr,
432 rxskb->nh.iph->saddr, NULL);
433 bh_unlock_sock(dccp_ctl_socket->sk);
434
435 if (err == NET_XMIT_CN || err == 0) {
436 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
437 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
438 }
439}
440
441static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
442 struct request_sock *req)
443{
444 dccp_v4_ctl_send_ack(skb);
445}
446
447static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
448 struct dst_entry *dst)
449{
450 int err = -1;
451 struct sk_buff *skb;
452
453 /* First, grab a route. */
454
455 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
456 goto out;
457
458 skb = dccp_make_response(sk, dst, req);
459 if (skb != NULL) {
460 const struct inet_request_sock *ireq = inet_rsk(req);
461
462 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
463 ireq->rmt_addr,
464 ireq->opt);
465 if (err == NET_XMIT_CN)
466 err = 0;
467 }
468
469out:
470 dst_release(dst);
471 return err;
472}
473
474/*
475 * This routine is called by the ICMP module when it gets some sort of error
476 * condition. If err < 0 then the socket should be closed and the error
477 * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
478 * After adjustment header points to the first 8 bytes of the tcp header. We
479 * need to find the appropriate port.
480 *
481 * The locking strategy used here is very "optimistic". When someone else
482 * accesses the socket the ICMP is just dropped and for some paths there is no
483 * check at all. A more general error queue to queue errors for later handling
484 * is probably better.
485 */
486void dccp_v4_err(struct sk_buff *skb, u32 info)
487{
488 const struct iphdr *iph = (struct iphdr *)skb->data;
489 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
490 (iph->ihl << 2));
491 struct dccp_sock *dp;
492 struct inet_sock *inet;
493 const int type = skb->h.icmph->type;
494 const int code = skb->h.icmph->code;
495 struct sock *sk;
496 __u64 seq;
497 int err;
498
499 if (skb->len < (iph->ihl << 2) + 8) {
500 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
501 return;
502 }
503
504 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
505 iph->saddr, dh->dccph_sport, inet_iif(skb));
506 if (sk == NULL) {
507 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
508 return;
509 }
510
511 if (sk->sk_state == DCCP_TIME_WAIT) {
512 inet_twsk_put((struct inet_timewait_sock *)sk);
513 return;
514 }
515
516 bh_lock_sock(sk);
517 /* If too many ICMPs get dropped on busy
518 * servers this needs to be solved differently.
519 */
520 if (sock_owned_by_user(sk))
521 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
522
523 if (sk->sk_state == DCCP_CLOSED)
524 goto out;
525
526 dp = dccp_sk(sk);
527 seq = dccp_hdr_seq(skb);
528 if (sk->sk_state != DCCP_LISTEN &&
529 !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
530 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
531 goto out;
532 }
533
534 switch (type) {
535 case ICMP_SOURCE_QUENCH:
536 /* Just silently ignore these. */
537 goto out;
538 case ICMP_PARAMETERPROB:
539 err = EPROTO;
540 break;
541 case ICMP_DEST_UNREACH:
542 if (code > NR_ICMP_UNREACH)
543 goto out;
544
545 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
546 if (!sock_owned_by_user(sk))
547 dccp_do_pmtu_discovery(sk, iph, info);
548 goto out;
549 }
550
551 err = icmp_err_convert[code].errno;
552 break;
553 case ICMP_TIME_EXCEEDED:
554 err = EHOSTUNREACH;
555 break;
556 default:
557 goto out;
558 }
559
560 switch (sk->sk_state) {
561 struct request_sock *req , **prev;
562 case DCCP_LISTEN:
563 if (sock_owned_by_user(sk))
564 goto out;
565 req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
566 iph->daddr, iph->saddr);
567 if (!req)
568 goto out;
569
570 /*
571 * ICMPs are not backlogged, hence we cannot get an established
572 * socket here.
573 */
574 BUG_TRAP(!req->sk);
575
576 if (seq != dccp_rsk(req)->dreq_iss) {
577 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
578 goto out;
579 }
580 /*
581 * Still in RESPOND, just remove it silently.
582 * There is no good way to pass the error to the newly
583 * created socket, and POSIX does not want network
584 * errors returned from accept().
585 */
586 inet_csk_reqsk_queue_drop(sk, req, prev);
587 goto out;
588
589 case DCCP_REQUESTING:
590 case DCCP_RESPOND:
591 if (!sock_owned_by_user(sk)) {
592 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
593 sk->sk_err = err;
594
595 sk->sk_error_report(sk);
596
597 dccp_done(sk);
598 } else
599 sk->sk_err_soft = err;
600 goto out;
601 }
602
603 /* If we've already connected we will keep trying
604 * until we time out, or the user gives up.
605 *
606 * rfc1122 4.2.3.9 allows to consider as hard errors
607 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
608 * but it is obsoleted by pmtu discovery).
609 *
610 * Note, that in modern internet, where routing is unreliable
611 * and in each dark corner broken firewalls sit, sending random
612 * errors ordered by their masters even this two messages finally lose
613 * their original sense (even Linux sends invalid PORT_UNREACHs)
614 *
615 * Now we are in compliance with RFCs.
616 * --ANK (980905)
617 */
618
619 inet = inet_sk(sk);
620 if (!sock_owned_by_user(sk) && inet->recverr) {
621 sk->sk_err = err;
622 sk->sk_error_report(sk);
623 } else /* Only an error on timeout */
624 sk->sk_err_soft = err;
625out:
626 bh_unlock_sock(sk);
627 sock_put(sk);
628}
629
630int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
631{
632 struct sk_buff *skb;
633 /*
634 * FIXME: what if rebuild_header fails?
635 * Should we be doing a rebuild_header here?
636 */
637 int err = inet_sk_rebuild_header(sk);
638
639 if (err != 0)
640 return err;
641
642 skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
643 if (skb != NULL) {
644 const struct inet_sock *inet = inet_sk(sk);
645
646 err = ip_build_and_send_pkt(skb, sk,
647 inet->saddr, inet->daddr, NULL);
648 if (err == NET_XMIT_CN)
649 err = 0;
650 }
651
652 return err;
653}
654
655static inline u64 dccp_v4_init_sequence(const struct sock *sk,
656 const struct sk_buff *skb)
657{
658 return secure_dccp_sequence_number(skb->nh.iph->daddr,
659 skb->nh.iph->saddr,
660 dccp_hdr(skb)->dccph_dport,
661 dccp_hdr(skb)->dccph_sport);
662}
663
664int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
665{
666 struct inet_request_sock *ireq;
667 struct dccp_sock dp;
668 struct request_sock *req;
669 struct dccp_request_sock *dreq;
670 const __u32 saddr = skb->nh.iph->saddr;
671 const __u32 daddr = skb->nh.iph->daddr;
672 struct dst_entry *dst = NULL;
673
674 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
675 if (((struct rtable *)skb->dst)->rt_flags &
676 (RTCF_BROADCAST | RTCF_MULTICAST))
677 goto drop;
678
679 /*
680 * TW buckets are converted to open requests without
681 * limitations, they conserve resources and peer is
682 * evidently real one.
683 */
684 if (inet_csk_reqsk_queue_is_full(sk))
685 goto drop;
686
687 /*
688 * Accept backlog is full. If we have already queued enough
689 * of warm entries in syn queue, drop request. It is better than
690 * clogging syn queue with openreqs with exponentially increasing
691 * timeout.
692 */
693 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
694 goto drop;
695
696 req = reqsk_alloc(sk->sk_prot->rsk_prot);
697 if (req == NULL)
698 goto drop;
699
700 /* FIXME: process options */
701
702 dccp_openreq_init(req, &dp, skb);
703
704 ireq = inet_rsk(req);
705 ireq->loc_addr = daddr;
706 ireq->rmt_addr = saddr;
707 /* FIXME: Merge Aristeu's option parsing code when ready */
708 req->rcv_wnd = 100; /* Fake, option parsing will get the
709 right value */
710 ireq->opt = NULL;
711
712 /*
713 * Step 3: Process LISTEN state
714 *
715 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
716 *
717 * In fact we defer setting S.GSR, S.SWL, S.SWH to
718 * dccp_create_openreq_child.
719 */
720 dreq = dccp_rsk(req);
721 dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
722 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
723 dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
724
725 if (dccp_v4_send_response(sk, req, dst))
726 goto drop_and_free;
727
728 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
729 return 0;
730
731drop_and_free:
732 /*
733 * FIXME: should be reqsk_free after implementing req->rsk_ops
734 */
735 __reqsk_free(req);
736drop:
737 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
738 return -1;
739}
740
741/*
742 * The three way handshake has completed - we got a valid ACK or DATAACK -
743 * now create the new socket.
744 *
745 * This is the equivalent of TCP's tcp_v4_syn_recv_sock
746 */
747struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
748 struct request_sock *req,
749 struct dst_entry *dst)
750{
751 struct inet_request_sock *ireq;
752 struct inet_sock *newinet;
753 struct dccp_sock *newdp;
754 struct sock *newsk;
755
756 if (sk_acceptq_is_full(sk))
757 goto exit_overflow;
758
759 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
760 goto exit;
761
762 newsk = dccp_create_openreq_child(sk, req, skb);
763 if (newsk == NULL)
764 goto exit;
765
766 sk_setup_caps(newsk, dst);
767
768 newdp = dccp_sk(newsk);
769 newinet = inet_sk(newsk);
770 ireq = inet_rsk(req);
771 newinet->daddr = ireq->rmt_addr;
772 newinet->rcv_saddr = ireq->loc_addr;
773 newinet->saddr = ireq->loc_addr;
774 newinet->opt = ireq->opt;
775 ireq->opt = NULL;
776 newinet->mc_index = inet_iif(skb);
777 newinet->mc_ttl = skb->nh.iph->ttl;
778 newinet->id = jiffies;
779
780 dccp_sync_mss(newsk, dst_mtu(dst));
781
782 __inet_hash(&dccp_hashinfo, newsk, 0);
783 __inet_inherit_port(&dccp_hashinfo, sk, newsk);
784
785 return newsk;
786
787exit_overflow:
788 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
789exit:
790 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
791 dst_release(dst);
792 return NULL;
793}
794
795static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
796{
797 const struct dccp_hdr *dh = dccp_hdr(skb);
798 const struct iphdr *iph = skb->nh.iph;
799 struct sock *nsk;
800 struct request_sock **prev;
801 /* Find possible connection requests. */
802 struct request_sock *req = inet_csk_search_req(sk, &prev,
803 dh->dccph_sport,
804 iph->saddr, iph->daddr);
805 if (req != NULL)
806 return dccp_check_req(sk, skb, req, prev);
807
808 nsk = __inet_lookup_established(&dccp_hashinfo,
809 iph->saddr, dh->dccph_sport,
810 iph->daddr, ntohs(dh->dccph_dport),
811 inet_iif(skb));
812 if (nsk != NULL) {
813 if (nsk->sk_state != DCCP_TIME_WAIT) {
814 bh_lock_sock(nsk);
815 return nsk;
816 }
817 inet_twsk_put((struct inet_timewait_sock *)nsk);
818 return NULL;
819 }
820
821 return sk;
822}
823
824int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
825 const u32 daddr)
826{
827 const struct dccp_hdr* dh = dccp_hdr(skb);
828 int checksum_len;
829 u32 tmp;
830
831 if (dh->dccph_cscov == 0)
832 checksum_len = skb->len;
833 else {
834 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
835 checksum_len = checksum_len < skb->len ? checksum_len :
836 skb->len;
837 }
838
839 tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
840 return csum_tcpudp_magic(saddr, daddr, checksum_len,
841 IPPROTO_DCCP, tmp);
842}
843
844static int dccp_v4_verify_checksum(struct sk_buff *skb,
845 const u32 saddr, const u32 daddr)
846{
847 struct dccp_hdr *dh = dccp_hdr(skb);
848 int checksum_len;
849 u32 tmp;
850
851 if (dh->dccph_cscov == 0)
852 checksum_len = skb->len;
853 else {
854 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
855 checksum_len = checksum_len < skb->len ? checksum_len :
856 skb->len;
857 }
858 tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
859 return csum_tcpudp_magic(saddr, daddr, checksum_len,
860 IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
861}
862
863static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
864 struct sk_buff *skb)
865{
866 struct rtable *rt;
867 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
868 .nl_u = { .ip4_u =
869 { .daddr = skb->nh.iph->saddr,
870 .saddr = skb->nh.iph->daddr,
871 .tos = RT_CONN_FLAGS(sk) } },
872 .proto = sk->sk_protocol,
873 .uli_u = { .ports =
874 { .sport = dccp_hdr(skb)->dccph_dport,
875 .dport = dccp_hdr(skb)->dccph_sport }
876 }
877 };
878
879 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
880 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
881 return NULL;
882 }
883
884 return &rt->u.dst;
885}
886
887static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
888{
889 int err;
890 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
891 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
892 sizeof(struct dccp_hdr_ext) +
893 sizeof(struct dccp_hdr_reset);
894 struct sk_buff *skb;
895 struct dst_entry *dst;
896 u64 seqno;
897
898 /* Never send a reset in response to a reset. */
899 if (rxdh->dccph_type == DCCP_PKT_RESET)
900 return;
901
902 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
903 return;
904
905 dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
906 if (dst == NULL)
907 return;
908
909 skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
910 if (skb == NULL)
911 goto out;
912
913 /* Reserve space for headers. */
914 skb_reserve(skb, MAX_DCCP_HEADER);
915 skb->dst = dst_clone(dst);
916
917 skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
918 dh = dccp_hdr(skb);
919 memset(dh, 0, dccp_hdr_reset_len);
920
921 /* Build DCCP header and checksum it. */
922 dh->dccph_type = DCCP_PKT_RESET;
923 dh->dccph_sport = rxdh->dccph_dport;
924 dh->dccph_dport = rxdh->dccph_sport;
925 dh->dccph_doff = dccp_hdr_reset_len / 4;
926 dh->dccph_x = 1;
927 dccp_hdr_reset(skb)->dccph_reset_code =
928 DCCP_SKB_CB(rxskb)->dccpd_reset_code;
929
930 /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
931 seqno = 0;
932 if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
933 dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
934
935 dccp_hdr_set_seq(dh, seqno);
936 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
937 DCCP_SKB_CB(rxskb)->dccpd_seq);
938
939 dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
940 rxskb->nh.iph->daddr);
941
942 bh_lock_sock(dccp_ctl_socket->sk);
943 err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
944 rxskb->nh.iph->daddr,
945 rxskb->nh.iph->saddr, NULL);
946 bh_unlock_sock(dccp_ctl_socket->sk);
947
948 if (err == NET_XMIT_CN || err == 0) {
949 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
950 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
951 }
952out:
953 dst_release(dst);
954}
955
956int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
957{
958 struct dccp_hdr *dh = dccp_hdr(skb);
959
960 if (sk->sk_state == DCCP_OPEN) { /* Fast path */
961 if (dccp_rcv_established(sk, skb, dh, skb->len))
962 goto reset;
963 return 0;
964 }
965
966 /*
967 * Step 3: Process LISTEN state
968 * If S.state == LISTEN,
969 * If P.type == Request or P contains a valid Init Cookie
970 * option,
971 * * Must scan the packet's options to check for an Init
972 * Cookie. Only the Init Cookie is processed here,
973 * however; other options are processed in Step 8. This
974 * scan need only be performed if the endpoint uses Init
975 * Cookies *
976 * * Generate a new socket and switch to that socket *
977 * Set S := new socket for this port pair
978 * S.state = RESPOND
979 * Choose S.ISS (initial seqno) or set from Init Cookie
980 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
981 * Continue with S.state == RESPOND
982 * * A Response packet will be generated in Step 11 *
983 * Otherwise,
984 * Generate Reset(No Connection) unless P.type == Reset
985 * Drop packet and return
986 *
987 * NOTE: the check for the packet types is done in
988 * dccp_rcv_state_process
989 */
990 if (sk->sk_state == DCCP_LISTEN) {
991 struct sock *nsk = dccp_v4_hnd_req(sk, skb);
992
993 if (nsk == NULL)
994 goto discard;
995
996 if (nsk != sk) {
997 if (dccp_child_process(sk, nsk, skb))
998 goto reset;
999 return 0;
1000 }
1001 }
1002
1003 if (dccp_rcv_state_process(sk, skb, dh, skb->len))
1004 goto reset;
1005 return 0;
1006
1007reset:
1008 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
1009 dccp_v4_ctl_send_reset(skb);
1010discard:
1011 kfree_skb(skb);
1012 return 0;
1013}
1014
1015static inline int dccp_invalid_packet(struct sk_buff *skb)
1016{
1017 const struct dccp_hdr *dh;
1018
1019 if (skb->pkt_type != PACKET_HOST)
1020 return 1;
1021
1022 if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
1023 LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
1024 return 1;
1025 }
1026
1027 dh = dccp_hdr(skb);
1028
1029 /* If the packet type is not understood, drop packet and return */
1030 if (dh->dccph_type >= DCCP_PKT_INVALID) {
1031 LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
1032 return 1;
1033 }
1034
1035 /*
1036 * If P.Data Offset is too small for packet type, or too large for
1037 * packet, drop packet and return
1038 */
1039 if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
1040 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
1041 "too small 1\n",
1042 dh->dccph_doff);
1043 return 1;
1044 }
1045
1046 if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
1047 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
1048 "too small 2\n",
1049 dh->dccph_doff);
1050 return 1;
1051 }
1052
1053 dh = dccp_hdr(skb);
1054
1055 /*
1056 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
1057 * has short sequence numbers), drop packet and return
1058 */
1059 if (dh->dccph_x == 0 &&
1060 dh->dccph_type != DCCP_PKT_DATA &&
1061 dh->dccph_type != DCCP_PKT_ACK &&
1062 dh->dccph_type != DCCP_PKT_DATAACK) {
1063 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
1064 "nor DataAck and P.X == 0\n",
1065 dccp_packet_name(dh->dccph_type));
1066 return 1;
1067 }
1068
1069 /* If the header checksum is incorrect, drop packet and return */
1070 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
1071 skb->nh.iph->daddr) < 0) {
1072 LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
1073 "incorrect\n");
1074 return 1;
1075 }
1076
1077 return 0;
1078}
1079
1080/* this is called when real data arrives */
1081int dccp_v4_rcv(struct sk_buff *skb)
1082{
1083 const struct dccp_hdr *dh;
1084 struct sock *sk;
1085 int rc;
1086
1087 /* Step 1: Check header basics: */
1088
1089 if (dccp_invalid_packet(skb))
1090 goto discard_it;
1091
1092 dh = dccp_hdr(skb);
1093#if 0
1094 /*
1095 * Use something like this to simulate some DATA/DATAACK loss to test
1096 * dccp_ackpkts_add, you'll get something like this on a session that
1097 * sends 10 DATA/DATAACK packets:
1098 *
1099 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
1100 *
1101 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
1102 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
1103 * with the same state
1104 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
1105 *
1106 * So...
1107 *
1108 * 281473596467422 was received
1109 * 281473596467421 was not received
1110 * 281473596467420 was received
1111 * 281473596467419 was not received
1112 * 281473596467418 was received
1113 * 281473596467417 was not received
1114 * 281473596467416 was received
1115 * 281473596467415 was not received
1116 * 281473596467414 was received
1117 * 281473596467413 was received (this one was the 3way handshake
1118 * RESPONSE)
1119 *
1120 */
1121 if (dh->dccph_type == DCCP_PKT_DATA ||
1122 dh->dccph_type == DCCP_PKT_DATAACK) {
1123 static int discard = 0;
1124
1125 if (discard) {
1126 discard = 0;
1127 goto discard_it;
1128 }
1129 discard = 1;
1130 }
1131#endif
1132 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1133 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1134
1135 dccp_pr_debug("%8.8s "
1136 "src=%u.%u.%u.%u@%-5d "
1137 "dst=%u.%u.%u.%u@%-5d seq=%llu",
1138 dccp_packet_name(dh->dccph_type),
1139 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
1140 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
1141 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
1142
1143 if (dccp_packet_without_ack(skb)) {
1144 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
1145 dccp_pr_debug_cat("\n");
1146 } else {
1147 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
1148 dccp_pr_debug_cat(", ack=%llu\n",
1149 (unsigned long long)
1150 DCCP_SKB_CB(skb)->dccpd_ack_seq);
1151 }
1152
1153 /* Step 2:
1154 * Look up flow ID in table and get corresponding socket */
1155 sk = __inet_lookup(&dccp_hashinfo,
1156 skb->nh.iph->saddr, dh->dccph_sport,
1157 skb->nh.iph->daddr, ntohs(dh->dccph_dport),
1158 inet_iif(skb));
1159
1160 /*
1161 * Step 2:
1162 * If no socket ...
1163 * Generate Reset(No Connection) unless P.type == Reset
1164 * Drop packet and return
1165 */
1166 if (sk == NULL) {
1167 dccp_pr_debug("failed to look up flow ID in table and "
1168 "get corresponding socket\n");
1169 goto no_dccp_socket;
1170 }
1171
1172 /*
1173 * Step 2:
1174 * ... or S.state == TIMEWAIT,
1175 * Generate Reset(No Connection) unless P.type == Reset
1176 * Drop packet and return
1177 */
1178
1179 if (sk->sk_state == DCCP_TIME_WAIT) {
1180 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
1181 "do_time_wait\n");
1182 goto do_time_wait;
1183 }
1184
1185 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
1186 dccp_pr_debug("xfrm4_policy_check failed\n");
1187 goto discard_and_relse;
1188 }
1189
1190 if (sk_filter(sk, skb, 0)) {
1191 dccp_pr_debug("sk_filter failed\n");
1192 goto discard_and_relse;
1193 }
1194
1195 skb->dev = NULL;
1196
1197 bh_lock_sock(sk);
1198 rc = 0;
1199 if (!sock_owned_by_user(sk))
1200 rc = dccp_v4_do_rcv(sk, skb);
1201 else
1202 sk_add_backlog(sk, skb);
1203 bh_unlock_sock(sk);
1204
1205 sock_put(sk);
1206 return rc;
1207
1208no_dccp_socket:
1209 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1210 goto discard_it;
1211 /*
1212 * Step 2:
1213 * Generate Reset(No Connection) unless P.type == Reset
1214 * Drop packet and return
1215 */
1216 if (dh->dccph_type != DCCP_PKT_RESET) {
1217 DCCP_SKB_CB(skb)->dccpd_reset_code =
1218 DCCP_RESET_CODE_NO_CONNECTION;
1219 dccp_v4_ctl_send_reset(skb);
1220 }
1221
1222discard_it:
1223 /* Discard frame. */
1224 kfree_skb(skb);
1225 return 0;
1226
1227discard_and_relse:
1228 sock_put(sk);
1229 goto discard_it;
1230
1231do_time_wait:
1232 inet_twsk_put((struct inet_timewait_sock *)sk);
1233 goto no_dccp_socket;
1234}
1235
1236static int dccp_v4_init_sock(struct sock *sk)
1237{
1238 struct dccp_sock *dp = dccp_sk(sk);
1239 static int dccp_ctl_socket_init = 1;
1240
1241 dccp_options_init(&dp->dccps_options);
1242 do_gettimeofday(&dp->dccps_epoch);
1243
1244 if (dp->dccps_options.dccpo_send_ack_vector) {
1245 dp->dccps_hc_rx_ackpkts =
1246 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
1247 GFP_KERNEL);
1248
1249 if (dp->dccps_hc_rx_ackpkts == NULL)
1250 return -ENOMEM;
1251 }
1252
1253 /*
1254 * FIXME: We're hardcoding the CCID, and doing this at this point makes
1255 * the listening (master) sock get CCID control blocks, which is not
1256 * necessary, but for now, to not mess with the test userspace apps,
1257 * lets leave it here, later the real solution is to do this in a
1258 * setsockopt(CCIDs-I-want/accept). -acme
1259 */
1260 if (likely(!dccp_ctl_socket_init)) {
1261 dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
1262 sk);
1263 dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
1264 sk);
1265 if (dp->dccps_hc_rx_ccid == NULL ||
1266 dp->dccps_hc_tx_ccid == NULL) {
1267 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1268 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1269 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1270 dp->dccps_hc_rx_ackpkts = NULL;
1271 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1272 return -ENOMEM;
1273 }
1274 } else
1275 dccp_ctl_socket_init = 0;
1276
1277 dccp_init_xmit_timers(sk);
1278 inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
1279 sk->sk_state = DCCP_CLOSED;
1280 sk->sk_write_space = dccp_write_space;
1281 dp->dccps_mss_cache = 536;
1282 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1283
1284 return 0;
1285}
1286
1287static int dccp_v4_destroy_sock(struct sock *sk)
1288{
1289 struct dccp_sock *dp = dccp_sk(sk);
1290
1291 /*
1292 * DCCP doesn't use sk_qrite_queue, just sk_send_head
1293 * for retransmissions
1294 */
1295 if (sk->sk_send_head != NULL) {
1296 kfree_skb(sk->sk_send_head);
1297 sk->sk_send_head = NULL;
1298 }
1299
1300 /* Clean up a referenced DCCP bind bucket. */
1301 if (inet_csk(sk)->icsk_bind_hash != NULL)
1302 inet_put_port(&dccp_hashinfo, sk);
1303
1304 ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
1305 ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
1306 dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
1307 dp->dccps_hc_rx_ackpkts = NULL;
1308 ccid_exit(dp->dccps_hc_rx_ccid, sk);
1309 ccid_exit(dp->dccps_hc_tx_ccid, sk);
1310 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
1311
1312 return 0;
1313}
1314
1315static void dccp_v4_reqsk_destructor(struct request_sock *req)
1316{
1317 kfree(inet_rsk(req)->opt);
1318}
1319
1320static struct request_sock_ops dccp_request_sock_ops = {
1321 .family = PF_INET,
1322 .obj_size = sizeof(struct dccp_request_sock),
1323 .rtx_syn_ack = dccp_v4_send_response,
1324 .send_ack = dccp_v4_reqsk_send_ack,
1325 .destructor = dccp_v4_reqsk_destructor,
1326 .send_reset = dccp_v4_ctl_send_reset,
1327};
1328
1329struct proto dccp_v4_prot = {
1330 .name = "DCCP",
1331 .owner = THIS_MODULE,
1332 .close = dccp_close,
1333 .connect = dccp_v4_connect,
1334 .disconnect = dccp_disconnect,
1335 .ioctl = dccp_ioctl,
1336 .init = dccp_v4_init_sock,
1337 .setsockopt = dccp_setsockopt,
1338 .getsockopt = dccp_getsockopt,
1339 .sendmsg = dccp_sendmsg,
1340 .recvmsg = dccp_recvmsg,
1341 .backlog_rcv = dccp_v4_do_rcv,
1342 .hash = dccp_v4_hash,
1343 .unhash = dccp_v4_unhash,
1344 .accept = inet_csk_accept,
1345 .get_port = dccp_v4_get_port,
1346 .shutdown = dccp_shutdown,
1347 .destroy = dccp_v4_destroy_sock,
1348 .orphan_count = &dccp_orphan_count,
1349 .max_header = MAX_DCCP_HEADER,
1350 .obj_size = sizeof(struct dccp_sock),
1351 .rsk_prot = &dccp_request_sock_ops,
1352 .twsk_obj_size = sizeof(struct inet_timewait_sock),
1353};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644
index 000000000000..18461bc04cbe
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,265 @@
1/*
2 * net/dccp/minisocks.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16#include <linux/timer.h>
17
18#include <net/sock.h>
19#include <net/xfrm.h>
20#include <net/inet_timewait_sock.h>
21
22#include "ccid.h"
23#include "dccp.h"
24
25struct inet_timewait_death_row dccp_death_row = {
26 .sysctl_max_tw_buckets = NR_FILE * 2,
27 .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
28 .death_lock = SPIN_LOCK_UNLOCKED,
29 .hashinfo = &dccp_hashinfo,
30 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
31 (unsigned long)&dccp_death_row),
32 .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
33 inet_twdr_twkill_work,
34 &dccp_death_row),
35/* Short-time timewait calendar */
36
37 .twcal_hand = -1,
38 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
39 (unsigned long)&dccp_death_row),
40};
41
42void dccp_time_wait(struct sock *sk, int state, int timeo)
43{
44 struct inet_timewait_sock *tw = NULL;
45
46 if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
47 tw = inet_twsk_alloc(sk, state);
48
49 if (tw != NULL) {
50 const struct inet_connection_sock *icsk = inet_csk(sk);
51 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
52
53 /* Linkage updates. */
54 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
55
56 /* Get the TIME_WAIT timeout firing. */
57 if (timeo < rto)
58 timeo = rto;
59
60 tw->tw_timeout = DCCP_TIMEWAIT_LEN;
61 if (state == DCCP_TIME_WAIT)
62 timeo = DCCP_TIMEWAIT_LEN;
63
64 inet_twsk_schedule(tw, &dccp_death_row, timeo,
65 DCCP_TIMEWAIT_LEN);
66 inet_twsk_put(tw);
67 } else {
68 /* Sorry, if we're out of memory, just CLOSE this
69 * socket up. We've got bigger problems than
70 * non-graceful socket closings.
71 */
72 LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
73 "table overflow\n");
74 }
75
76 dccp_done(sk);
77}
78
79struct sock *dccp_create_openreq_child(struct sock *sk,
80 const struct request_sock *req,
81 const struct sk_buff *skb)
82{
83 /*
84 * Step 3: Process LISTEN state
85 *
86 * // Generate a new socket and switch to that socket
87 * Set S := new socket for this port pair
88 */
89 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
90
91 if (newsk != NULL) {
92 const struct dccp_request_sock *dreq = dccp_rsk(req);
93 struct inet_connection_sock *newicsk = inet_csk(sk);
94 struct dccp_sock *newdp = dccp_sk(newsk);
95
96 newdp->dccps_hc_rx_ackpkts = NULL;
97 newdp->dccps_role = DCCP_ROLE_SERVER;
98 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
99 do_gettimeofday(&newdp->dccps_epoch);
100
101 if (newdp->dccps_options.dccpo_send_ack_vector) {
102 newdp->dccps_hc_rx_ackpkts =
103 dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
104 GFP_ATOMIC);
105 /*
106 * XXX: We're using the same CCIDs set on the parent,
107 * i.e. sk_clone copied the master sock and left the
108 * CCID pointers for this child, that is why we do the
109 * __ccid_get calls.
110 */
111 if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
112 goto out_free;
113 }
114
115 if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
116 newsk) != 0 ||
117 ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
118 newsk) != 0)) {
119 dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
120 ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
121 ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
122out_free:
123 /* It is still raw copy of parent, so invalidate
124 * destructor and make plain sk_free() */
125 newsk->sk_destruct = NULL;
126 sk_free(newsk);
127 return NULL;
128 }
129
130 __ccid_get(newdp->dccps_hc_rx_ccid);
131 __ccid_get(newdp->dccps_hc_tx_ccid);
132
133 /*
134 * Step 3: Process LISTEN state
135 *
136 * Choose S.ISS (initial seqno) or set from Init Cookie
137 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
138 * Cookie
139 */
140
141 /* See dccp_v4_conn_request */
142 newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
143
144 newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
145 dccp_update_gsr(newsk, dreq->dreq_isr);
146
147 newdp->dccps_iss = dreq->dreq_iss;
148 dccp_update_gss(newsk, dreq->dreq_iss);
149
150 /*
151 * SWL and AWL are initially adjusted so that they are not less than
152 * the initial Sequence Numbers received and sent, respectively:
153 * SWL := max(GSR + 1 - floor(W/4), ISR),
154 * AWL := max(GSS - W' + 1, ISS).
155 * These adjustments MUST be applied only at the beginning of the
156 * connection.
157 */
158 dccp_set_seqno(&newdp->dccps_swl,
159 max48(newdp->dccps_swl, newdp->dccps_isr));
160 dccp_set_seqno(&newdp->dccps_awl,
161 max48(newdp->dccps_awl, newdp->dccps_iss));
162
163 dccp_init_xmit_timers(newsk);
164
165 DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
166 }
167 return newsk;
168}
169
170/*
171 * Process an incoming packet for RESPOND sockets represented
172 * as an request_sock.
173 */
174struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
175 struct request_sock *req,
176 struct request_sock **prev)
177{
178 struct sock *child = NULL;
179
180 /* Check for retransmitted REQUEST */
181 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
182 if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
183 dccp_rsk(req)->dreq_isr)) {
184 struct dccp_request_sock *dreq = dccp_rsk(req);
185
186 dccp_pr_debug("Retransmitted REQUEST\n");
187 /* Send another RESPONSE packet */
188 dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
189 dccp_set_seqno(&dreq->dreq_isr,
190 DCCP_SKB_CB(skb)->dccpd_seq);
191 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
192 }
193 /* Network Duplicate, discard packet */
194 return NULL;
195 }
196
197 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
198
199 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
200 dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
201 goto drop;
202
203 /* Invalid ACK */
204 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
205 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
206 "dreq_iss=%llu\n",
207 (unsigned long long)
208 DCCP_SKB_CB(skb)->dccpd_ack_seq,
209 (unsigned long long)
210 dccp_rsk(req)->dreq_iss);
211 goto drop;
212 }
213
214 child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
215 if (child == NULL)
216 goto listen_overflow;
217
218 /* FIXME: deal with options */
219
220 inet_csk_reqsk_queue_unlink(sk, req, prev);
221 inet_csk_reqsk_queue_removed(sk, req);
222 inet_csk_reqsk_queue_add(sk, req, child);
223out:
224 return child;
225listen_overflow:
226 dccp_pr_debug("listen_overflow!\n");
227 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
228drop:
229 if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
230 req->rsk_ops->send_reset(skb);
231
232 inet_csk_reqsk_queue_drop(sk, req, prev);
233 goto out;
234}
235
236/*
237 * Queue segment on the new socket if the new socket is active,
238 * otherwise we just shortcircuit this and continue with
239 * the new socket.
240 */
241int dccp_child_process(struct sock *parent, struct sock *child,
242 struct sk_buff *skb)
243{
244 int ret = 0;
245 const int state = child->sk_state;
246
247 if (!sock_owned_by_user(child)) {
248 ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
249 skb->len);
250
251 /* Wakeup parent, send SIGIO */
252 if (state == DCCP_RESPOND && child->sk_state != state)
253 parent->sk_data_ready(parent, 0);
254 } else {
255 /* Alas, it is possible again, because we do lookup
256 * in main socket hash table and lock on listening
257 * socket does not protect us more.
258 */
259 sk_add_backlog(child, skb);
260 }
261
262 bh_unlock_sock(child);
263 sock_put(child);
264 return ret;
265}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644
index 000000000000..d4c4242d8dd7
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,887 @@
1/*
2 * net/dccp/options.c
3 *
4 * An implementation of the DCCP protocol
5 * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
7 * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14#include <linux/config.h>
15#include <linux/dccp.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/skbuff.h>
20
21#include "ccid.h"
22#include "dccp.h"
23
24static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
25 struct sock *sk,
26 const u64 ackno,
27 const unsigned char len,
28 const unsigned char *vector);
29
30/* stores the default values for new connection. may be changed with sysctl */
31static const struct dccp_options dccpo_default_values = {
32 .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
33 .dccpo_ccid = DCCPF_INITIAL_CCID,
34 .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
35 .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
36};
37
38void dccp_options_init(struct dccp_options *dccpo)
39{
40 memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
41}
42
43static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
44{
45 u32 value = 0;
46
47 if (len > 3)
48 value += *bf++ << 24;
49 if (len > 2)
50 value += *bf++ << 16;
51 if (len > 1)
52 value += *bf++ << 8;
53 if (len > 0)
54 value += *bf;
55
56 return value;
57}
58
59int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
60{
61 struct dccp_sock *dp = dccp_sk(sk);
62#ifdef CONFIG_IP_DCCP_DEBUG
63 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
64 "CLIENT rx opt: " : "server rx opt: ";
65#endif
66 const struct dccp_hdr *dh = dccp_hdr(skb);
67 const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
68 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
69 unsigned char *opt_ptr = options;
70 const unsigned char *opt_end = (unsigned char *)dh +
71 (dh->dccph_doff * 4);
72 struct dccp_options_received *opt_recv = &dp->dccps_options_received;
73 unsigned char opt, len;
74 unsigned char *value;
75 u32 elapsed_time;
76
77 memset(opt_recv, 0, sizeof(*opt_recv));
78
79 while (opt_ptr != opt_end) {
80 opt = *opt_ptr++;
81 len = 0;
82 value = NULL;
83
84 /* Check if this isn't a single byte option */
85 if (opt > DCCPO_MAX_RESERVED) {
86 if (opt_ptr == opt_end)
87 goto out_invalid_option;
88
89 len = *opt_ptr++;
90 if (len < 3)
91 goto out_invalid_option;
92 /*
93 * Remove the type and len fields, leaving
94 * just the value size
95 */
96 len -= 2;
97 value = opt_ptr;
98 opt_ptr += len;
99
100 if (opt_ptr > opt_end)
101 goto out_invalid_option;
102 }
103
104 switch (opt) {
105 case DCCPO_PADDING:
106 break;
107 case DCCPO_NDP_COUNT:
108 if (len > 3)
109 goto out_invalid_option;
110
111 opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
112 dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
113 opt_recv->dccpor_ndp);
114 break;
115 case DCCPO_ACK_VECTOR_0:
116 if (len > DCCP_MAX_ACK_VECTOR_LEN)
117 goto out_invalid_option;
118
119 if (pkt_type == DCCP_PKT_DATA)
120 continue;
121
122 opt_recv->dccpor_ack_vector_len = len;
123 opt_recv->dccpor_ack_vector_idx = value - options;
124
125 dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
126 debug_prefix, len,
127 (unsigned long long)
128 DCCP_SKB_CB(skb)->dccpd_ack_seq);
129 dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
130 value, len);
131 dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
132 sk,
133 DCCP_SKB_CB(skb)->dccpd_ack_seq,
134 len, value);
135 break;
136 case DCCPO_TIMESTAMP:
137 if (len != 4)
138 goto out_invalid_option;
139
140 opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
141
142 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
143 dccp_timestamp(sk, &dp->dccps_timestamp_time);
144
145 dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
146 debug_prefix, opt_recv->dccpor_timestamp,
147 (unsigned long long)
148 DCCP_SKB_CB(skb)->dccpd_ack_seq);
149 break;
150 case DCCPO_TIMESTAMP_ECHO:
151 if (len != 4 && len != 6 && len != 8)
152 goto out_invalid_option;
153
154 opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
155
156 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
157 debug_prefix,
158 opt_recv->dccpor_timestamp_echo,
159 len + 2,
160 (unsigned long long)
161 DCCP_SKB_CB(skb)->dccpd_ack_seq);
162
163
164 if (len == 4)
165 break;
166
167 if (len == 6)
168 elapsed_time = ntohs(*(u16 *)(value + 4));
169 else
170 elapsed_time = ntohl(*(u32 *)(value + 4));
171
172 /* Give precedence to the biggest ELAPSED_TIME */
173 if (elapsed_time > opt_recv->dccpor_elapsed_time)
174 opt_recv->dccpor_elapsed_time = elapsed_time;
175 break;
176 case DCCPO_ELAPSED_TIME:
177 if (len != 2 && len != 4)
178 goto out_invalid_option;
179
180 if (pkt_type == DCCP_PKT_DATA)
181 continue;
182
183 if (len == 2)
184 elapsed_time = ntohs(*(u16 *)value);
185 else
186 elapsed_time = ntohl(*(u32 *)value);
187
188 if (elapsed_time > opt_recv->dccpor_elapsed_time)
189 opt_recv->dccpor_elapsed_time = elapsed_time;
190
191 dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
192 elapsed_time);
193 break;
194 /*
195 * From draft-ietf-dccp-spec-11.txt:
196 *
197 * Option numbers 128 through 191 are for
198 * options sent from the HC-Sender to the
199 * HC-Receiver; option numbers 192 through 255
200 * are for options sent from the HC-Receiver to
201 * the HC-Sender.
202 */
203 case 128 ... 191: {
204 const u16 idx = value - options;
205
206 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
207 opt, len, idx,
208 value) != 0)
209 goto out_invalid_option;
210 }
211 break;
212 case 192 ... 255: {
213 const u16 idx = value - options;
214
215 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
216 opt, len, idx,
217 value) != 0)
218 goto out_invalid_option;
219 }
220 break;
221 default:
222 pr_info("DCCP(%p): option %d(len=%d) not "
223 "implemented, ignoring\n",
224 sk, opt, len);
225 break;
226 }
227 }
228
229 return 0;
230
231out_invalid_option:
232 DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
233 DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
234 pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
235 return -1;
236}
237
238static void dccp_encode_value_var(const u32 value, unsigned char *to,
239 const unsigned int len)
240{
241 if (len > 3)
242 *to++ = (value & 0xFF000000) >> 24;
243 if (len > 2)
244 *to++ = (value & 0xFF0000) >> 16;
245 if (len > 1)
246 *to++ = (value & 0xFF00) >> 8;
247 if (len > 0)
248 *to++ = (value & 0xFF);
249}
250
251static inline int dccp_ndp_len(const int ndp)
252{
253 return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
254}
255
256void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
257 const unsigned char option,
258 const void *value, const unsigned char len)
259{
260 unsigned char *to;
261
262 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
263 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
264 "%d option!\n", option);
265 return;
266 }
267
268 DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
269
270 to = skb_push(skb, len + 2);
271 *to++ = option;
272 *to++ = len + 2;
273
274 memcpy(to, value, len);
275}
276
277EXPORT_SYMBOL_GPL(dccp_insert_option);
278
279static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
280{
281 struct dccp_sock *dp = dccp_sk(sk);
282 int ndp = dp->dccps_ndp_count;
283
284 if (dccp_non_data_packet(skb))
285 ++dp->dccps_ndp_count;
286 else
287 dp->dccps_ndp_count = 0;
288
289 if (ndp > 0) {
290 unsigned char *ptr;
291 const int ndp_len = dccp_ndp_len(ndp);
292 const int len = ndp_len + 2;
293
294 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
295 return;
296
297 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
298
299 ptr = skb_push(skb, len);
300 *ptr++ = DCCPO_NDP_COUNT;
301 *ptr++ = len;
302 dccp_encode_value_var(ndp, ptr, ndp_len);
303 }
304}
305
306static inline int dccp_elapsed_time_len(const u32 elapsed_time)
307{
308 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
309}
310
311void dccp_insert_option_elapsed_time(struct sock *sk,
312 struct sk_buff *skb,
313 u32 elapsed_time)
314{
315#ifdef CONFIG_IP_DCCP_DEBUG
316 struct dccp_sock *dp = dccp_sk(sk);
317 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
318 "CLIENT TX opt: " : "server TX opt: ";
319#endif
320 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
321 const int len = 2 + elapsed_time_len;
322 unsigned char *to;
323
324 if (elapsed_time_len == 0)
325 return;
326
327 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
328 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
329 "insert elapsed time!\n");
330 return;
331 }
332
333 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
334
335 to = skb_push(skb, len);
336 *to++ = DCCPO_ELAPSED_TIME;
337 *to++ = len;
338
339 if (elapsed_time_len == 2) {
340 const u16 var16 = htons((u16)elapsed_time);
341 memcpy(to, &var16, 2);
342 } else {
343 const u32 var32 = htonl(elapsed_time);
344 memcpy(to, &var32, 4);
345 }
346
347 dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
348 debug_prefix, elapsed_time,
349 len,
350 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
351}
352
353EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
354
355static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
356{
357 struct dccp_sock *dp = dccp_sk(sk);
358#ifdef CONFIG_IP_DCCP_DEBUG
359 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
360 "CLIENT TX opt: " : "server TX opt: ";
361#endif
362 struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
363 int len = ap->dccpap_buf_vector_len + 2;
364 struct timeval now;
365 u32 elapsed_time;
366 unsigned char *to, *from;
367
368 dccp_timestamp(sk, &now);
369 elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
370
371 if (elapsed_time != 0)
372 dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
373
374 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
375 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
376 "insert ACK Vector!\n");
377 return;
378 }
379
380 /*
381 * XXX: now we have just one ack vector sent record, so
382 * we have to wait for it to be cleared.
383 *
384 * Of course this is not acceptable, but this is just for
385 * basic testing now.
386 */
387 if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
388 return;
389
390 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
391
392 to = skb_push(skb, len);
393 *to++ = DCCPO_ACK_VECTOR_0;
394 *to++ = len;
395
396 len = ap->dccpap_buf_vector_len;
397 from = ap->dccpap_buf + ap->dccpap_buf_head;
398
399 /* Check if buf_head wraps */
400 if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
401 const unsigned int tailsize = (ap->dccpap_buf_len -
402 ap->dccpap_buf_head);
403
404 memcpy(to, from, tailsize);
405 to += tailsize;
406 len -= tailsize;
407 from = ap->dccpap_buf;
408 }
409
410 memcpy(to, from, len);
411 /*
412 * From draft-ietf-dccp-spec-11.txt:
413 *
414 * For each acknowledgement it sends, the HC-Receiver will add an
415 * acknowledgement record. ack_seqno will equal the HC-Receiver
416 * sequence number it used for the ack packet; ack_ptr will equal
417 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
418 * equal buf_nonce.
419 *
420 * This implemention uses just one ack record for now.
421 */
422 ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
423 ap->dccpap_ack_ptr = ap->dccpap_buf_head;
424 ap->dccpap_ack_ackno = ap->dccpap_buf_ackno;
425 ap->dccpap_ack_nonce = ap->dccpap_buf_nonce;
426 ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
427
428 dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
429 "ack_ackno=%llu\n",
430 debug_prefix, ap->dccpap_ack_vector_len,
431 (unsigned long long) ap->dccpap_ack_seqno,
432 (unsigned long long) ap->dccpap_ack_ackno);
433}
434
435void dccp_timestamp(const struct sock *sk, struct timeval *tv)
436{
437 const struct dccp_sock *dp = dccp_sk(sk);
438
439 do_gettimeofday(tv);
440 tv->tv_sec -= dp->dccps_epoch.tv_sec;
441 tv->tv_usec -= dp->dccps_epoch.tv_usec;
442
443 while (tv->tv_usec < 0) {
444 tv->tv_sec--;
445 tv->tv_usec += USEC_PER_SEC;
446 }
447}
448
449EXPORT_SYMBOL_GPL(dccp_timestamp);
450
451void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
452{
453 struct timeval tv;
454 u32 now;
455
456 dccp_timestamp(sk, &tv);
457 now = timeval_usecs(&tv) / 10;
458 /* yes this will overflow but that is the point as we want a
459 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
460
461 now = htonl(now);
462 dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
463}
464
465EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
466
467static void dccp_insert_option_timestamp_echo(struct sock *sk,
468 struct sk_buff *skb)
469{
470 struct dccp_sock *dp = dccp_sk(sk);
471#ifdef CONFIG_IP_DCCP_DEBUG
472 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
473 "CLIENT TX opt: " : "server TX opt: ";
474#endif
475 struct timeval now;
476 u32 tstamp_echo;
477 u32 elapsed_time;
478 int len, elapsed_time_len;
479 unsigned char *to;
480
481 dccp_timestamp(sk, &now);
482 elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
483 elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
484 len = 6 + elapsed_time_len;
485
486 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
487 LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
488 "timestamp echo!\n");
489 return;
490 }
491
492 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
493
494 to = skb_push(skb, len);
495 *to++ = DCCPO_TIMESTAMP_ECHO;
496 *to++ = len;
497
498 tstamp_echo = htonl(dp->dccps_timestamp_echo);
499 memcpy(to, &tstamp_echo, 4);
500 to += 4;
501
502 if (elapsed_time_len == 2) {
503 const u16 var16 = htons((u16)elapsed_time);
504 memcpy(to, &var16, 2);
505 } else if (elapsed_time_len == 4) {
506 const u32 var32 = htonl(elapsed_time);
507 memcpy(to, &var32, 4);
508 }
509
510 dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
511 debug_prefix, dp->dccps_timestamp_echo,
512 len,
513 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
514
515 dp->dccps_timestamp_echo = 0;
516 dp->dccps_timestamp_time.tv_sec = 0;
517 dp->dccps_timestamp_time.tv_usec = 0;
518}
519
520void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
521{
522 struct dccp_sock *dp = dccp_sk(sk);
523
524 DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
525
526 if (dp->dccps_options.dccpo_send_ndp_count)
527 dccp_insert_option_ndp(sk, skb);
528
529 if (!dccp_packet_without_ack(skb)) {
530 if (dp->dccps_options.dccpo_send_ack_vector &&
531 (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
532 DCCP_MAX_SEQNO + 1))
533 dccp_insert_option_ack_vector(sk, skb);
534 if (dp->dccps_timestamp_echo != 0)
535 dccp_insert_option_timestamp_echo(sk, skb);
536 }
537
538 if (dp->dccps_hc_rx_insert_options) {
539 ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
540 dp->dccps_hc_rx_insert_options = 0;
541 }
542 if (dp->dccps_hc_tx_insert_options) {
543 ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
544 dp->dccps_hc_tx_insert_options = 0;
545 }
546
547 /* XXX: insert other options when appropriate */
548
549 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
550 /* The length of all options has to be a multiple of 4 */
551 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
552
553 if (padding != 0) {
554 padding = 4 - padding;
555 memset(skb_push(skb, padding), 0, padding);
556 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
557 }
558 }
559}
560
561struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
562 const unsigned int __nocast priority)
563{
564 struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
565
566 if (ap != NULL) {
567#ifdef CONFIG_IP_DCCP_DEBUG
568 memset(ap->dccpap_buf, 0xFF, len);
569#endif
570 ap->dccpap_buf_len = len;
571 ap->dccpap_buf_head =
572 ap->dccpap_buf_tail =
573 ap->dccpap_buf_len - 1;
574 ap->dccpap_buf_ackno =
575 ap->dccpap_ack_ackno =
576 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
577 ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
578 ap->dccpap_ack_ptr = 0;
579 ap->dccpap_time.tv_sec = 0;
580 ap->dccpap_time.tv_usec = 0;
581 ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
582 }
583
584 return ap;
585}
586
587void dccp_ackpkts_free(struct dccp_ackpkts *ap)
588{
589 if (ap != NULL) {
590#ifdef CONFIG_IP_DCCP_DEBUG
591 memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
592#endif
593 kfree(ap);
594 }
595}
596
597static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
598 const unsigned int index)
599{
600 return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
601}
602
603static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
604 const unsigned int index)
605{
606 return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
607}
608
609/*
610 * If several packets are missing, the HC-Receiver may prefer to enter multiple
611 * bytes with run length 0, rather than a single byte with a larger run length;
612 * this simplifies table updates if one of the missing packets arrives.
613 */
614static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
615 const unsigned int packets,
616 const unsigned char state)
617{
618 unsigned int gap;
619 signed long new_head;
620
621 if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
622 return -ENOBUFS;
623
624 gap = packets - 1;
625 new_head = ap->dccpap_buf_head - packets;
626
627 if (new_head < 0) {
628 if (gap > 0) {
629 memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
630 gap + new_head + 1);
631 gap = -new_head;
632 }
633 new_head += ap->dccpap_buf_len;
634 }
635
636 ap->dccpap_buf_head = new_head;
637
638 if (gap > 0)
639 memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
640 DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
641
642 ap->dccpap_buf[ap->dccpap_buf_head] = state;
643 ap->dccpap_buf_vector_len += packets;
644 return 0;
645}
646
647/*
648 * Implements the draft-ietf-dccp-spec-11.txt Appendix A
649 */
650int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
651 u64 ackno, u8 state)
652{
653 /*
654 * Check at the right places if the buffer is full, if it is, tell the
655 * caller to start dropping packets till the HC-Sender acks our ACK
656 * vectors, when we will free up space in dccpap_buf.
657 *
658 * We may well decide to do buffer compression, etc, but for now lets
659 * just drop.
660 *
661 * From Appendix A:
662 *
663 * Of course, the circular buffer may overflow, either when the
664 * HC-Sender is sending data at a very high rate, when the
665 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
666 * or when the HC-Sender is forgetting to acknowledge those acks
667 * (so the HC-Receiver is unable to clean up old state). In this
668 * case, the HC-Receiver should either compress the buffer (by
669 * increasing run lengths when possible), transfer its state to
670 * a larger buffer, or, as a last resort, drop all received
671 * packets, without processing them whatsoever, until its buffer
672 * shrinks again.
673 */
674
675 /* See if this is the first ackno being inserted */
676 if (ap->dccpap_buf_vector_len == 0) {
677 ap->dccpap_buf[ap->dccpap_buf_head] = state;
678 ap->dccpap_buf_vector_len = 1;
679 } else if (after48(ackno, ap->dccpap_buf_ackno)) {
680 const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
681 ackno);
682
683 /*
684 * Look if the state of this packet is the same as the
685 * previous ackno and if so if we can bump the head len.
686 */
687 if (delta == 1 &&
688 dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
689 (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
690 DCCP_ACKPKTS_LEN_MASK))
691 ap->dccpap_buf[ap->dccpap_buf_head]++;
692 else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
693 return -ENOBUFS;
694 } else {
695 /*
696 * A.1.2. Old Packets
697 *
698 * When a packet with Sequence Number S arrives, and
699 * S <= buf_ackno, the HC-Receiver will scan the table
700 * for the byte corresponding to S. (Indexing structures
701 * could reduce the complexity of this scan.)
702 */
703 u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
704 unsigned int index = ap->dccpap_buf_head;
705
706 while (1) {
707 const u8 len = dccp_ackpkts_len(ap, index);
708 const u8 state = dccp_ackpkts_state(ap, index);
709 /*
710 * valid packets not yet in dccpap_buf have a reserved
711 * entry, with a len equal to 0.
712 */
713 if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
714 len == 0 && delta == 0) { /* Found our
715 reserved seat! */
716 dccp_pr_debug("Found %llu reserved seat!\n",
717 (unsigned long long) ackno);
718 ap->dccpap_buf[index] = state;
719 goto out;
720 }
721 /* len == 0 means one packet */
722 if (delta < len + 1)
723 goto out_duplicate;
724
725 delta -= len + 1;
726 if (++index == ap->dccpap_buf_len)
727 index = 0;
728 }
729 }
730
731 ap->dccpap_buf_ackno = ackno;
732 dccp_timestamp(sk, &ap->dccpap_time);
733out:
734 dccp_pr_debug("");
735 dccp_ackpkts_print(ap);
736 return 0;
737
738out_duplicate:
739 /* Duplicate packet */
740 dccp_pr_debug("Received a dup or already considered lost "
741 "packet: %llu\n", (unsigned long long) ackno);
742 return -EILSEQ;
743}
744
745#ifdef CONFIG_IP_DCCP_DEBUG
746void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
747 int len)
748{
749 if (!dccp_debug)
750 return;
751
752 printk("ACK vector len=%d, ackno=%llu |", len,
753 (unsigned long long) ackno);
754
755 while (len--) {
756 const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
757 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
758
759 printk("%d,%d|", state, rl);
760 ++vector;
761 }
762
763 printk("\n");
764}
765
766void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
767{
768 dccp_ackvector_print(ap->dccpap_buf_ackno,
769 ap->dccpap_buf + ap->dccpap_buf_head,
770 ap->dccpap_buf_vector_len);
771}
772#endif
773
774static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
775{
776 /*
777 * As we're keeping track of the ack vector size
778 * (dccpap_buf_vector_len) and the sent ack vector size
779 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
780 * keep this code here as in the future we'll implement a vector of
781 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
782 * Appendix A. -acme
783 */
784#if 0
785 ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
786 if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
787 ap->dccpap_buf_tail -= ap->dccpap_buf_len;
788#endif
789 ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
790}
791
792void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
793 u64 ackno)
794{
795 /* Check if we actually sent an ACK vector */
796 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
797 return;
798
799 if (ackno == ap->dccpap_ack_seqno) {
800#ifdef CONFIG_IP_DCCP_DEBUG
801 struct dccp_sock *dp = dccp_sk(sk);
802 const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
803 "CLIENT rx ack: " : "server rx ack: ";
804#endif
805 dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
806 "ack_ackno=%llu, ACKED!\n",
807 debug_prefix, 1,
808 (unsigned long long) ap->dccpap_ack_seqno,
809 (unsigned long long) ap->dccpap_ack_ackno);
810 dccp_ackpkts_trow_away_ack_record(ap);
811 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
812 }
813}
814
815static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
816 struct sock *sk, u64 ackno,
817 const unsigned char len,
818 const unsigned char *vector)
819{
820 unsigned char i;
821
822 /* Check if we actually sent an ACK vector */
823 if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
824 return;
825 /*
826 * We're in the receiver half connection, so if the received an ACK
827 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
828 * not interested.
829 *
830 * Extra explanation with example:
831 *
832 * if we received an ACK vector with ackno 50, it can only be acking
833 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
834 */
835 /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
836 if (before48(ackno, ap->dccpap_ack_seqno)) {
837 /* dccp_pr_debug_cat("yes\n"); */
838 return;
839 }
840 /* dccp_pr_debug_cat("no\n"); */
841
842 i = len;
843 while (i--) {
844 const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
845 u64 ackno_end_rl;
846
847 dccp_set_seqno(&ackno_end_rl, ackno - rl);
848
849 /*
850 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
851 * ap->dccpap_ack_seqno, ackno);
852 */
853 if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
854 const u8 state = (*vector &
855 DCCP_ACKPKTS_STATE_MASK) >> 6;
856 /* dccp_pr_debug_cat("yes\n"); */
857
858 if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
859#ifdef CONFIG_IP_DCCP_DEBUG
860 struct dccp_sock *dp = dccp_sk(sk);
861 const char *debug_prefix =
862 dp->dccps_role == DCCP_ROLE_CLIENT ?
863 "CLIENT rx ack: " : "server rx ack: ";
864#endif
865 dccp_pr_debug("%sACK vector 0, len=%d, "
866 "ack_seqno=%llu, ack_ackno=%llu, "
867 "ACKED!\n",
868 debug_prefix, len,
869 (unsigned long long)
870 ap->dccpap_ack_seqno,
871 (unsigned long long)
872 ap->dccpap_ack_ackno);
873 dccp_ackpkts_trow_away_ack_record(ap);
874 }
875 /*
876 * If dccpap_ack_seqno was not received, no problem
877 * we'll send another ACK vector.
878 */
879 ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
880 break;
881 }
882 /* dccp_pr_debug_cat("no\n"); */
883
884 dccp_set_seqno(&ackno, ackno_end_rl - 1);
885 ++vector;
886 }
887}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644
index 000000000000..ea6d0e91e511
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,525 @@
1/*
2 * net/dccp/output.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include <net/sock.h>
18
19#include "ccid.h"
20#include "dccp.h"
21
22static inline void dccp_event_ack_sent(struct sock *sk)
23{
24 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
25}
26
27/*
28 * All SKB's seen here are completely headerless. It is our
29 * job to build the DCCP header, and pass the packet down to
30 * IP so it can do the same plus pass the packet off to the
31 * device.
32 */
33int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
34{
35 if (likely(skb != NULL)) {
36 const struct inet_sock *inet = inet_sk(sk);
37 struct dccp_sock *dp = dccp_sk(sk);
38 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
39 struct dccp_hdr *dh;
40 /* XXX For now we're using only 48 bits sequence numbers */
41 const int dccp_header_size = sizeof(*dh) +
42 sizeof(struct dccp_hdr_ext) +
43 dccp_packet_hdr_len(dcb->dccpd_type);
44 int err, set_ack = 1;
45 u64 ackno = dp->dccps_gsr;
46
47 dccp_inc_seqno(&dp->dccps_gss);
48
49 switch (dcb->dccpd_type) {
50 case DCCP_PKT_DATA:
51 set_ack = 0;
52 break;
53 case DCCP_PKT_SYNC:
54 case DCCP_PKT_SYNCACK:
55 ackno = dcb->dccpd_seq;
56 break;
57 }
58
59 dcb->dccpd_seq = dp->dccps_gss;
60 dccp_insert_options(sk, skb);
61
62 skb->h.raw = skb_push(skb, dccp_header_size);
63 dh = dccp_hdr(skb);
64 /*
65 * Data packets are not cloned as they are never retransmitted
66 */
67 if (skb_cloned(skb))
68 skb_set_owner_w(skb, sk);
69
70 /* Build DCCP header and checksum it. */
71 memset(dh, 0, dccp_header_size);
72 dh->dccph_type = dcb->dccpd_type;
73 dh->dccph_sport = inet->sport;
74 dh->dccph_dport = inet->dport;
75 dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
76 dh->dccph_ccval = dcb->dccpd_ccval;
77 /* XXX For now we're using only 48 bits sequence numbers */
78 dh->dccph_x = 1;
79
80 dp->dccps_awh = dp->dccps_gss;
81 dccp_hdr_set_seq(dh, dp->dccps_gss);
82 if (set_ack)
83 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
84
85 switch (dcb->dccpd_type) {
86 case DCCP_PKT_REQUEST:
87 dccp_hdr_request(skb)->dccph_req_service =
88 dcb->dccpd_service;
89 break;
90 case DCCP_PKT_RESET:
91 dccp_hdr_reset(skb)->dccph_reset_code =
92 dcb->dccpd_reset_code;
93 break;
94 }
95
96 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
97 inet->daddr);
98
99 if (set_ack)
100 dccp_event_ack_sent(sk);
101
102 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
103
104 err = ip_queue_xmit(skb, 0);
105 if (err <= 0)
106 return err;
107
108 /* NET_XMIT_CN is special. It does not guarantee,
109 * that this packet is lost. It tells that device
110 * is about to start to drop packets or already
111 * drops some packets of the same priority and
112 * invokes us to send less aggressively.
113 */
114 return err == NET_XMIT_CN ? 0 : err;
115 }
116 return -ENOBUFS;
117}
118
119unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
120{
121 struct dccp_sock *dp = dccp_sk(sk);
122 int mss_now;
123
124 /*
125 * FIXME: we really should be using the af_specific thing to support
126 * IPv6.
127 * mss_now = pmtu - tp->af_specific->net_header_len -
128 * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
129 */
130 mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
131 sizeof(struct dccp_hdr_ext);
132
133 /* Now subtract optional transport overhead */
134 mss_now -= dp->dccps_ext_header_len;
135
136 /*
137 * FIXME: this should come from the CCID infrastructure, where, say,
138 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
139 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
140 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
141 * make it a multiple of 4
142 */
143
144 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
145
146 /* And store cached results */
147 dp->dccps_pmtu_cookie = pmtu;
148 dp->dccps_mss_cache = mss_now;
149
150 return mss_now;
151}
152
153void dccp_write_space(struct sock *sk)
154{
155 read_lock(&sk->sk_callback_lock);
156
157 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
158 wake_up_interruptible(sk->sk_sleep);
159 /* Should agree with poll, otherwise some programs break */
160 if (sock_writeable(sk))
161 sk_wake_async(sk, 2, POLL_OUT);
162
163 read_unlock(&sk->sk_callback_lock);
164}
165
166/**
167 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
168 * @sk: socket to wait for
169 * @timeo: for how long
170 */
171static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
172 long *timeo)
173{
174 struct dccp_sock *dp = dccp_sk(sk);
175 DEFINE_WAIT(wait);
176 long delay;
177 int rc;
178
179 while (1) {
180 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
181
182 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
183 goto do_error;
184 if (!*timeo)
185 goto do_nonblock;
186 if (signal_pending(current))
187 goto do_interrupted;
188
189 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
190 skb->len);
191 if (rc <= 0)
192 break;
193 delay = msecs_to_jiffies(rc);
194 if (delay > *timeo || delay < 0)
195 goto do_nonblock;
196
197 sk->sk_write_pending++;
198 release_sock(sk);
199 *timeo -= schedule_timeout(delay);
200 lock_sock(sk);
201 sk->sk_write_pending--;
202 }
203out:
204 finish_wait(sk->sk_sleep, &wait);
205 return rc;
206
207do_error:
208 rc = -EPIPE;
209 goto out;
210do_nonblock:
211 rc = -EAGAIN;
212 goto out;
213do_interrupted:
214 rc = sock_intr_errno(*timeo);
215 goto out;
216}
217
218int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
219{
220 const struct dccp_sock *dp = dccp_sk(sk);
221 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
222 skb->len);
223
224 if (err > 0)
225 err = dccp_wait_for_ccid(sk, skb, timeo);
226
227 if (err == 0) {
228 const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
229 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
230 const int len = skb->len;
231
232 if (sk->sk_state == DCCP_PARTOPEN) {
233 /* See 8.1.5. Handshake Completion */
234 inet_csk_schedule_ack(sk);
235 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
236 inet_csk(sk)->icsk_rto,
237 DCCP_RTO_MAX);
238 dcb->dccpd_type = DCCP_PKT_DATAACK;
239 /*
240 * FIXME: we really should have a
241 * dccps_ack_pending or use icsk.
242 */
243 } else if (inet_csk_ack_scheduled(sk) ||
244 dp->dccps_timestamp_echo != 0 ||
245 (dp->dccps_options.dccpo_send_ack_vector &&
246 ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
247 ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
248 dcb->dccpd_type = DCCP_PKT_DATAACK;
249 else
250 dcb->dccpd_type = DCCP_PKT_DATA;
251
252 err = dccp_transmit_skb(sk, skb);
253 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
254 }
255
256 return err;
257}
258
259int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
260{
261 if (inet_sk_rebuild_header(sk) != 0)
262 return -EHOSTUNREACH; /* Routing failure or similar. */
263
264 return dccp_transmit_skb(sk, (skb_cloned(skb) ?
265 pskb_copy(skb, GFP_ATOMIC):
266 skb_clone(skb, GFP_ATOMIC)));
267}
268
269struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
270 struct request_sock *req)
271{
272 struct dccp_hdr *dh;
273 const int dccp_header_size = sizeof(struct dccp_hdr) +
274 sizeof(struct dccp_hdr_ext) +
275 sizeof(struct dccp_hdr_response);
276 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
277 dccp_header_size, 1,
278 GFP_ATOMIC);
279 if (skb == NULL)
280 return NULL;
281
282 /* Reserve space for headers. */
283 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
284
285 skb->dst = dst_clone(dst);
286 skb->csum = 0;
287
288 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
289 DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
290 dccp_insert_options(sk, skb);
291
292 skb->h.raw = skb_push(skb, dccp_header_size);
293
294 dh = dccp_hdr(skb);
295 memset(dh, 0, dccp_header_size);
296
297 dh->dccph_sport = inet_sk(sk)->sport;
298 dh->dccph_dport = inet_rsk(req)->rmt_port;
299 dh->dccph_doff = (dccp_header_size +
300 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
301 dh->dccph_type = DCCP_PKT_RESPONSE;
302 dh->dccph_x = 1;
303 dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
304 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
305
306 dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
307 inet_rsk(req)->rmt_addr);
308
309 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
310 return skb;
311}
312
313struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
314 const enum dccp_reset_codes code)
315
316{
317 struct dccp_hdr *dh;
318 struct dccp_sock *dp = dccp_sk(sk);
319 const int dccp_header_size = sizeof(struct dccp_hdr) +
320 sizeof(struct dccp_hdr_ext) +
321 sizeof(struct dccp_hdr_reset);
322 struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
323 dccp_header_size, 1,
324 GFP_ATOMIC);
325 if (skb == NULL)
326 return NULL;
327
328 /* Reserve space for headers. */
329 skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
330
331 skb->dst = dst_clone(dst);
332 skb->csum = 0;
333
334 dccp_inc_seqno(&dp->dccps_gss);
335
336 DCCP_SKB_CB(skb)->dccpd_reset_code = code;
337 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
338 DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
339 dccp_insert_options(sk, skb);
340
341 skb->h.raw = skb_push(skb, dccp_header_size);
342
343 dh = dccp_hdr(skb);
344 memset(dh, 0, dccp_header_size);
345
346 dh->dccph_sport = inet_sk(sk)->sport;
347 dh->dccph_dport = inet_sk(sk)->dport;
348 dh->dccph_doff = (dccp_header_size +
349 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
350 dh->dccph_type = DCCP_PKT_RESET;
351 dh->dccph_x = 1;
352 dccp_hdr_set_seq(dh, dp->dccps_gss);
353 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
354
355 dccp_hdr_reset(skb)->dccph_reset_code = code;
356
357 dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
358 inet_sk(sk)->daddr);
359
360 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
361 return skb;
362}
363
364/*
365 * Do all connect socket setups that can be done AF independent.
366 */
367static inline void dccp_connect_init(struct sock *sk)
368{
369 struct dst_entry *dst = __sk_dst_get(sk);
370 struct inet_connection_sock *icsk = inet_csk(sk);
371
372 sk->sk_err = 0;
373 sock_reset_flag(sk, SOCK_DONE);
374
375 dccp_sync_mss(sk, dst_mtu(dst));
376
377 /*
378 * FIXME: set dp->{dccps_swh,dccps_swl}, with
379 * something like dccp_inc_seq
380 */
381
382 icsk->icsk_retransmits = 0;
383}
384
385int dccp_connect(struct sock *sk)
386{
387 struct sk_buff *skb;
388 struct inet_connection_sock *icsk = inet_csk(sk);
389
390 dccp_connect_init(sk);
391
392 skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
393 if (unlikely(skb == NULL))
394 return -ENOBUFS;
395
396 /* Reserve space for headers. */
397 skb_reserve(skb, MAX_DCCP_HEADER);
398
399 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
400 /* FIXME: set service to something meaningful, coming
401 * from userspace*/
402 DCCP_SKB_CB(skb)->dccpd_service = 0;
403 skb->csum = 0;
404 skb_set_owner_w(skb, sk);
405
406 BUG_TRAP(sk->sk_send_head == NULL);
407 sk->sk_send_head = skb;
408 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
409 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
410
411 /* Timer for repeating the REQUEST until an answer. */
412 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
413 icsk->icsk_rto, DCCP_RTO_MAX);
414 return 0;
415}
416
417void dccp_send_ack(struct sock *sk)
418{
419 /* If we have been reset, we may not send again. */
420 if (sk->sk_state != DCCP_CLOSED) {
421 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
422
423 if (skb == NULL) {
424 inet_csk_schedule_ack(sk);
425 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
426 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
427 TCP_DELACK_MAX,
428 DCCP_RTO_MAX);
429 return;
430 }
431
432 /* Reserve space for headers */
433 skb_reserve(skb, MAX_DCCP_HEADER);
434 skb->csum = 0;
435 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
436 skb_set_owner_w(skb, sk);
437 dccp_transmit_skb(sk, skb);
438 }
439}
440
441EXPORT_SYMBOL_GPL(dccp_send_ack);
442
443void dccp_send_delayed_ack(struct sock *sk)
444{
445 struct inet_connection_sock *icsk = inet_csk(sk);
446 /*
447 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
448 * with using 2s, and active senders also piggyback the ACK into a
449 * DATAACK packet, so this is really for quiescent senders.
450 */
451 unsigned long timeout = jiffies + 2 * HZ;
452
453 /* Use new timeout only if there wasn't a older one earlier. */
454 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
455 /* If delack timer was blocked or is about to expire,
456 * send ACK now.
457 *
458 * FIXME: check the "about to expire" part
459 */
460 if (icsk->icsk_ack.blocked) {
461 dccp_send_ack(sk);
462 return;
463 }
464
465 if (!time_before(timeout, icsk->icsk_ack.timeout))
466 timeout = icsk->icsk_ack.timeout;
467 }
468 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
469 icsk->icsk_ack.timeout = timeout;
470 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
471}
472
473void dccp_send_sync(struct sock *sk, const u64 seq,
474 const enum dccp_pkt_type pkt_type)
475{
476 /*
477 * We are not putting this on the write queue, so
478 * dccp_transmit_skb() will set the ownership to this
479 * sock.
480 */
481 struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
482
483 if (skb == NULL)
484 /* FIXME: how to make sure the sync is sent? */
485 return;
486
487 /* Reserve space for headers and prepare control bits. */
488 skb_reserve(skb, MAX_DCCP_HEADER);
489 skb->csum = 0;
490 DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
491 DCCP_SKB_CB(skb)->dccpd_seq = seq;
492
493 skb_set_owner_w(skb, sk);
494 dccp_transmit_skb(sk, skb);
495}
496
497/*
498 * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
499 * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
500 * any circumstances.
501 */
502void dccp_send_close(struct sock *sk, const int active)
503{
504 struct dccp_sock *dp = dccp_sk(sk);
505 struct sk_buff *skb;
506 const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC;
507
508 skb = alloc_skb(sk->sk_prot->max_header, prio);
509 if (skb == NULL)
510 return;
511
512 /* Reserve space for headers and prepare control bits. */
513 skb_reserve(skb, sk->sk_prot->max_header);
514 skb->csum = 0;
515 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
516 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
517
518 skb_set_owner_w(skb, sk);
519 if (active) {
520 BUG_TRAP(sk->sk_send_head == NULL);
521 sk->sk_send_head = skb;
522 dccp_transmit_skb(sk, skb_clone(skb, prio));
523 } else
524 dccp_transmit_skb(sk, skb);
525}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644
index 000000000000..18a0e69c9dc7
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,826 @@
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
27#include <net/ip.h>
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
37#include <linux/dccp.h>
38
39#include "ccid.h"
40#include "dccp.h"
41
42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49};
50
51const char *dccp_packet_name(const int type)
52{
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70}
71
72EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74const char *dccp_state_name(const int state)
75{
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91}
92
93EXPORT_SYMBOL_GPL(dccp_state_name);
94
95static inline int dccp_listen_start(struct sock *sk)
96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99}
100
101int dccp_disconnect(struct sock *sk, int flags)
102{
103 struct inet_connection_sock *icsk = inet_csk(sk);
104 struct inet_sock *inet = inet_sk(sk);
105 int err = 0;
106 const int old_state = sk->sk_state;
107
108 if (old_state != DCCP_CLOSED)
109 dccp_set_state(sk, DCCP_CLOSED);
110
111 /* ABORT function of RFC793 */
112 if (old_state == DCCP_LISTEN) {
113 inet_csk_listen_stop(sk);
114 /* FIXME: do the active reset thing */
115 } else if (old_state == DCCP_REQUESTING)
116 sk->sk_err = ECONNRESET;
117
118 dccp_clear_xmit_timers(sk);
119 __skb_queue_purge(&sk->sk_receive_queue);
120 if (sk->sk_send_head != NULL) {
121 __kfree_skb(sk->sk_send_head);
122 sk->sk_send_head = NULL;
123 }
124
125 inet->dport = 0;
126
127 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128 inet_reset_saddr(sk);
129
130 sk->sk_shutdown = 0;
131 sock_reset_flag(sk, SOCK_DONE);
132
133 icsk->icsk_backoff = 0;
134 inet_csk_delack_init(sk);
135 __sk_dst_reset(sk);
136
137 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139 sk->sk_error_report(sk);
140 return err;
141}
142
143/*
144 * Wait for a DCCP event.
145 *
146 * Note that we don't need to lock the socket, as the upper poll layers
147 * take care of normal races (between the test and the event) and we don't
148 * go look at any of the socket buffers directly.
149 */
150static unsigned int dccp_poll(struct file *file, struct socket *sock,
151 poll_table *wait)
152{
153 unsigned int mask;
154 struct sock *sk = sock->sk;
155
156 poll_wait(file, sk->sk_sleep, wait);
157 if (sk->sk_state == DCCP_LISTEN)
158 return inet_csk_listen_poll(sk);
159
160 /* Socket is not locked. We are protected from async events
161 by poll logic and correct handling of state changes
162 made by another threads is impossible in any case.
163 */
164
165 mask = 0;
166 if (sk->sk_err)
167 mask = POLLERR;
168
169 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
170 mask |= POLLHUP;
171 if (sk->sk_shutdown & RCV_SHUTDOWN)
172 mask |= POLLIN | POLLRDNORM;
173
174 /* Connected? */
175 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
176 if (atomic_read(&sk->sk_rmem_alloc) > 0)
177 mask |= POLLIN | POLLRDNORM;
178
179 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
180 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
181 mask |= POLLOUT | POLLWRNORM;
182 } else { /* send SIGIO later */
183 set_bit(SOCK_ASYNC_NOSPACE,
184 &sk->sk_socket->flags);
185 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
186
187 /* Race breaker. If space is freed after
188 * wspace test but before the flags are set,
189 * IO signal will be lost.
190 */
191 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
192 mask |= POLLOUT | POLLWRNORM;
193 }
194 }
195 }
196 return mask;
197}
198
199int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
200{
201 dccp_pr_debug("entry\n");
202 return -ENOIOCTLCMD;
203}
204
205int dccp_setsockopt(struct sock *sk, int level, int optname,
206 char __user *optval, int optlen)
207{
208 struct dccp_sock *dp;
209 int err;
210 int val;
211
212 if (level != SOL_DCCP)
213 return ip_setsockopt(sk, level, optname, optval, optlen);
214
215 if (optlen < sizeof(int))
216 return -EINVAL;
217
218 if (get_user(val, (int __user *)optval))
219 return -EFAULT;
220
221 lock_sock(sk);
222
223 dp = dccp_sk(sk);
224 err = 0;
225
226 switch (optname) {
227 case DCCP_SOCKOPT_PACKET_SIZE:
228 dp->dccps_packet_size = val;
229 break;
230 default:
231 err = -ENOPROTOOPT;
232 break;
233 }
234
235 release_sock(sk);
236 return err;
237}
238
239int dccp_getsockopt(struct sock *sk, int level, int optname,
240 char __user *optval, int __user *optlen)
241{
242 struct dccp_sock *dp;
243 int val, len;
244
245 if (level != SOL_DCCP)
246 return ip_getsockopt(sk, level, optname, optval, optlen);
247
248 if (get_user(len, optlen))
249 return -EFAULT;
250
251 len = min_t(unsigned int, len, sizeof(int));
252 if (len < 0)
253 return -EINVAL;
254
255 dp = dccp_sk(sk);
256
257 switch (optname) {
258 case DCCP_SOCKOPT_PACKET_SIZE:
259 val = dp->dccps_packet_size;
260 break;
261 default:
262 return -ENOPROTOOPT;
263 }
264
265 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
266 return -EFAULT;
267
268 return 0;
269}
270
271int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
272 size_t len)
273{
274 const struct dccp_sock *dp = dccp_sk(sk);
275 const int flags = msg->msg_flags;
276 const int noblock = flags & MSG_DONTWAIT;
277 struct sk_buff *skb;
278 int rc, size;
279 long timeo;
280
281 if (len > dp->dccps_mss_cache)
282 return -EMSGSIZE;
283
284 lock_sock(sk);
285 timeo = sock_sndtimeo(sk, noblock);
286
287 /*
288 * We have to use sk_stream_wait_connect here to set sk_write_pending,
289 * so that the trick in dccp_rcv_request_sent_state_process.
290 */
291 /* Wait for a connection to finish. */
292 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
293 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
294 goto out_release;
295
296 size = sk->sk_prot->max_header + len;
297 release_sock(sk);
298 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
299 lock_sock(sk);
300 if (skb == NULL)
301 goto out_release;
302
303 skb_reserve(skb, sk->sk_prot->max_header);
304 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
305 if (rc != 0)
306 goto out_discard;
307
308 rc = dccp_write_xmit(sk, skb, &timeo);
309 /*
310 * XXX we don't use sk_write_queue, so just discard the packet.
311 * Current plan however is to _use_ sk_write_queue with
312 * an algorith similar to tcp_sendmsg, where the main difference
313 * is that in DCCP we have to respect packet boundaries, so
314 * no coalescing of skbs.
315 *
316 * This bug was _quickly_ found & fixed by just looking at an OSTRA
317 * generated callgraph 8) -acme
318 */
319 if (rc != 0)
320 goto out_discard;
321out_release:
322 release_sock(sk);
323 return rc ? : len;
324out_discard:
325 kfree_skb(skb);
326 goto out_release;
327}
328
329int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
330 size_t len, int nonblock, int flags, int *addr_len)
331{
332 const struct dccp_hdr *dh;
333 long timeo;
334
335 lock_sock(sk);
336
337 if (sk->sk_state == DCCP_LISTEN) {
338 len = -ENOTCONN;
339 goto out;
340 }
341
342 timeo = sock_rcvtimeo(sk, nonblock);
343
344 do {
345 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
346
347 if (skb == NULL)
348 goto verify_sock_status;
349
350 dh = dccp_hdr(skb);
351
352 if (dh->dccph_type == DCCP_PKT_DATA ||
353 dh->dccph_type == DCCP_PKT_DATAACK)
354 goto found_ok_skb;
355
356 if (dh->dccph_type == DCCP_PKT_RESET ||
357 dh->dccph_type == DCCP_PKT_CLOSE) {
358 dccp_pr_debug("found fin ok!\n");
359 len = 0;
360 goto found_fin_ok;
361 }
362 dccp_pr_debug("packet_type=%s\n",
363 dccp_packet_name(dh->dccph_type));
364 sk_eat_skb(sk, skb);
365verify_sock_status:
366 if (sock_flag(sk, SOCK_DONE)) {
367 len = 0;
368 break;
369 }
370
371 if (sk->sk_err) {
372 len = sock_error(sk);
373 break;
374 }
375
376 if (sk->sk_shutdown & RCV_SHUTDOWN) {
377 len = 0;
378 break;
379 }
380
381 if (sk->sk_state == DCCP_CLOSED) {
382 if (!sock_flag(sk, SOCK_DONE)) {
383 /* This occurs when user tries to read
384 * from never connected socket.
385 */
386 len = -ENOTCONN;
387 break;
388 }
389 len = 0;
390 break;
391 }
392
393 if (!timeo) {
394 len = -EAGAIN;
395 break;
396 }
397
398 if (signal_pending(current)) {
399 len = sock_intr_errno(timeo);
400 break;
401 }
402
403 sk_wait_data(sk, &timeo);
404 continue;
405 found_ok_skb:
406 if (len > skb->len)
407 len = skb->len;
408 else if (len < skb->len)
409 msg->msg_flags |= MSG_TRUNC;
410
411 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
412 /* Exception. Bailout! */
413 len = -EFAULT;
414 break;
415 }
416 found_fin_ok:
417 if (!(flags & MSG_PEEK))
418 sk_eat_skb(sk, skb);
419 break;
420 } while (1);
421out:
422 release_sock(sk);
423 return len;
424}
425
426static int inet_dccp_listen(struct socket *sock, int backlog)
427{
428 struct sock *sk = sock->sk;
429 unsigned char old_state;
430 int err;
431
432 lock_sock(sk);
433
434 err = -EINVAL;
435 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
436 goto out;
437
438 old_state = sk->sk_state;
439 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
440 goto out;
441
442 /* Really, if the socket is already in listen state
443 * we can only allow the backlog to be adjusted.
444 */
445 if (old_state != DCCP_LISTEN) {
446 /*
447 * FIXME: here it probably should be sk->sk_prot->listen_start
448 * see tcp_listen_start
449 */
450 err = dccp_listen_start(sk);
451 if (err)
452 goto out;
453 }
454 sk->sk_max_ack_backlog = backlog;
455 err = 0;
456
457out:
458 release_sock(sk);
459 return err;
460}
461
462static const unsigned char dccp_new_state[] = {
463 /* current state: new state: action: */
464 [0] = DCCP_CLOSED,
465 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
466 [DCCP_REQUESTING] = DCCP_CLOSED,
467 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
468 [DCCP_LISTEN] = DCCP_CLOSED,
469 [DCCP_RESPOND] = DCCP_CLOSED,
470 [DCCP_CLOSING] = DCCP_CLOSED,
471 [DCCP_TIME_WAIT] = DCCP_CLOSED,
472 [DCCP_CLOSED] = DCCP_CLOSED,
473};
474
475static int dccp_close_state(struct sock *sk)
476{
477 const int next = dccp_new_state[sk->sk_state];
478 const int ns = next & DCCP_STATE_MASK;
479
480 if (ns != sk->sk_state)
481 dccp_set_state(sk, ns);
482
483 return next & DCCP_ACTION_FIN;
484}
485
486void dccp_close(struct sock *sk, long timeout)
487{
488 struct sk_buff *skb;
489
490 lock_sock(sk);
491
492 sk->sk_shutdown = SHUTDOWN_MASK;
493
494 if (sk->sk_state == DCCP_LISTEN) {
495 dccp_set_state(sk, DCCP_CLOSED);
496
497 /* Special case. */
498 inet_csk_listen_stop(sk);
499
500 goto adjudge_to_death;
501 }
502
503 /*
504 * We need to flush the recv. buffs. We do this only on the
505 * descriptor close, not protocol-sourced closes, because the
506 *reader process may not have drained the data yet!
507 */
508 /* FIXME: check for unread data */
509 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
510 __kfree_skb(skb);
511 }
512
513 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
514 /* Check zero linger _after_ checking for unread data. */
515 sk->sk_prot->disconnect(sk, 0);
516 } else if (dccp_close_state(sk)) {
517 dccp_send_close(sk, 1);
518 }
519
520 sk_stream_wait_close(sk, timeout);
521
522adjudge_to_death:
523 /*
524 * It is the last release_sock in its life. It will remove backlog.
525 */
526 release_sock(sk);
527 /*
528 * Now socket is owned by kernel and we acquire BH lock
529 * to finish close. No need to check for user refs.
530 */
531 local_bh_disable();
532 bh_lock_sock(sk);
533 BUG_TRAP(!sock_owned_by_user(sk));
534
535 sock_hold(sk);
536 sock_orphan(sk);
537
538 /*
539 * The last release_sock may have processed the CLOSE or RESET
540 * packet moving sock to CLOSED state, if not we have to fire
541 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
542 * in draft-ietf-dccp-spec-11. -acme
543 */
544 if (sk->sk_state == DCCP_CLOSING) {
545 /* FIXME: should start at 2 * RTT */
546 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
547 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
548 inet_csk(sk)->icsk_rto,
549 DCCP_RTO_MAX);
550#if 0
551 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
552 dccp_set_state(sk, DCCP_CLOSED);
553#endif
554 }
555
556 atomic_inc(sk->sk_prot->orphan_count);
557 if (sk->sk_state == DCCP_CLOSED)
558 inet_csk_destroy_sock(sk);
559
560 /* Otherwise, socket is reprieved until protocol close. */
561
562 bh_unlock_sock(sk);
563 local_bh_enable();
564 sock_put(sk);
565}
566
567void dccp_shutdown(struct sock *sk, int how)
568{
569 dccp_pr_debug("entry\n");
570}
571
572static struct proto_ops inet_dccp_ops = {
573 .family = PF_INET,
574 .owner = THIS_MODULE,
575 .release = inet_release,
576 .bind = inet_bind,
577 .connect = inet_stream_connect,
578 .socketpair = sock_no_socketpair,
579 .accept = inet_accept,
580 .getname = inet_getname,
581 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
582 .poll = dccp_poll,
583 .ioctl = inet_ioctl,
584 /* FIXME: work on inet_listen to rename it to sock_common_listen */
585 .listen = inet_dccp_listen,
586 .shutdown = inet_shutdown,
587 .setsockopt = sock_common_setsockopt,
588 .getsockopt = sock_common_getsockopt,
589 .sendmsg = inet_sendmsg,
590 .recvmsg = sock_common_recvmsg,
591 .mmap = sock_no_mmap,
592 .sendpage = sock_no_sendpage,
593};
594
595extern struct net_proto_family inet_family_ops;
596
597static struct inet_protosw dccp_v4_protosw = {
598 .type = SOCK_DCCP,
599 .protocol = IPPROTO_DCCP,
600 .prot = &dccp_v4_prot,
601 .ops = &inet_dccp_ops,
602 .capability = -1,
603 .no_check = 0,
604 .flags = 0,
605};
606
607/*
608 * This is the global socket data structure used for responding to
609 * the Out-of-the-blue (OOTB) packets. A control sock will be created
610 * for this socket at the initialization time.
611 */
612struct socket *dccp_ctl_socket;
613
614static char dccp_ctl_socket_err_msg[] __initdata =
615 KERN_ERR "DCCP: Failed to create the control socket.\n";
616
617static int __init dccp_ctl_sock_init(void)
618{
619 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
620 &dccp_ctl_socket);
621 if (rc < 0)
622 printk(dccp_ctl_socket_err_msg);
623 else {
624 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
625 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
626
627 /* Unhash it so that IP input processing does not even
628 * see it, we do not wish this socket to see incoming
629 * packets.
630 */
631 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
632 }
633
634 return rc;
635}
636
637#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
638void dccp_ctl_sock_exit(void)
639{
640 if (dccp_ctl_socket != NULL) {
641 sock_release(dccp_ctl_socket);
642 dccp_ctl_socket = NULL;
643 }
644}
645
646EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
647#endif
648
649static int __init init_dccp_v4_mibs(void)
650{
651 int rc = -ENOMEM;
652
653 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
654 if (dccp_statistics[0] == NULL)
655 goto out;
656
657 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
658 if (dccp_statistics[1] == NULL)
659 goto out_free_one;
660
661 rc = 0;
662out:
663 return rc;
664out_free_one:
665 free_percpu(dccp_statistics[0]);
666 dccp_statistics[0] = NULL;
667 goto out;
668
669}
670
671static int thash_entries;
672module_param(thash_entries, int, 0444);
673MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
674
675#ifdef CONFIG_IP_DCCP_DEBUG
676int dccp_debug;
677module_param(dccp_debug, int, 0444);
678MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
679#endif
680
681static int __init dccp_init(void)
682{
683 unsigned long goal;
684 int ehash_order, bhash_order, i;
685 int rc = proto_register(&dccp_v4_prot, 1);
686
687 if (rc)
688 goto out;
689
690 dccp_hashinfo.bind_bucket_cachep =
691 kmem_cache_create("dccp_bind_bucket",
692 sizeof(struct inet_bind_bucket), 0,
693 SLAB_HWCACHE_ALIGN, NULL, NULL);
694 if (!dccp_hashinfo.bind_bucket_cachep)
695 goto out_proto_unregister;
696
697 /*
698 * Size and allocate the main established and bind bucket
699 * hash tables.
700 *
701 * The methodology is similar to that of the buffer cache.
702 */
703 if (num_physpages >= (128 * 1024))
704 goal = num_physpages >> (21 - PAGE_SHIFT);
705 else
706 goal = num_physpages >> (23 - PAGE_SHIFT);
707
708 if (thash_entries)
709 goal = (thash_entries *
710 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
711 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
712 ;
713 do {
714 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
715 sizeof(struct inet_ehash_bucket);
716 dccp_hashinfo.ehash_size >>= 1;
717 while (dccp_hashinfo.ehash_size &
718 (dccp_hashinfo.ehash_size - 1))
719 dccp_hashinfo.ehash_size--;
720 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
721 __get_free_pages(GFP_ATOMIC, ehash_order);
722 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
723
724 if (!dccp_hashinfo.ehash) {
725 printk(KERN_CRIT "Failed to allocate DCCP "
726 "established hash table\n");
727 goto out_free_bind_bucket_cachep;
728 }
729
730 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
731 rwlock_init(&dccp_hashinfo.ehash[i].lock);
732 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
733 }
734
735 bhash_order = ehash_order;
736
737 do {
738 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
739 sizeof(struct inet_bind_hashbucket);
740 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
741 bhash_order > 0)
742 continue;
743 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
744 __get_free_pages(GFP_ATOMIC, bhash_order);
745 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
746
747 if (!dccp_hashinfo.bhash) {
748 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
749 goto out_free_dccp_ehash;
750 }
751
752 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
753 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
754 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
755 }
756
757 if (init_dccp_v4_mibs())
758 goto out_free_dccp_bhash;
759
760 rc = -EAGAIN;
761 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
762 goto out_free_dccp_v4_mibs;
763
764 inet_register_protosw(&dccp_v4_protosw);
765
766 rc = dccp_ctl_sock_init();
767 if (rc)
768 goto out_unregister_protosw;
769out:
770 return rc;
771out_unregister_protosw:
772 inet_unregister_protosw(&dccp_v4_protosw);
773 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
774out_free_dccp_v4_mibs:
775 free_percpu(dccp_statistics[0]);
776 free_percpu(dccp_statistics[1]);
777 dccp_statistics[0] = dccp_statistics[1] = NULL;
778out_free_dccp_bhash:
779 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
780 dccp_hashinfo.bhash = NULL;
781out_free_dccp_ehash:
782 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
783 dccp_hashinfo.ehash = NULL;
784out_free_bind_bucket_cachep:
785 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
786 dccp_hashinfo.bind_bucket_cachep = NULL;
787out_proto_unregister:
788 proto_unregister(&dccp_v4_prot);
789 goto out;
790}
791
792static const char dccp_del_proto_err_msg[] __exitdata =
793 KERN_ERR "can't remove dccp net_protocol\n";
794
795static void __exit dccp_fini(void)
796{
797 inet_unregister_protosw(&dccp_v4_protosw);
798
799 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
800 printk(dccp_del_proto_err_msg);
801
802 free_percpu(dccp_statistics[0]);
803 free_percpu(dccp_statistics[1]);
804 free_pages((unsigned long)dccp_hashinfo.bhash,
805 get_order(dccp_hashinfo.bhash_size *
806 sizeof(struct inet_bind_hashbucket)));
807 free_pages((unsigned long)dccp_hashinfo.ehash,
808 get_order(dccp_hashinfo.ehash_size *
809 sizeof(struct inet_ehash_bucket)));
810 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
811 proto_unregister(&dccp_v4_prot);
812}
813
814module_init(dccp_init);
815module_exit(dccp_fini);
816
817/*
818 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
819 * values directly, Also cover the case where the protocol is not specified,
820 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
821 */
822MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
823MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
824MODULE_LICENSE("GPL");
825MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
826MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644
index 000000000000..aa34b576e228
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,255 @@
1/*
2 * net/dccp/timer.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/dccp.h>
15#include <linux/skbuff.h>
16
17#include "dccp.h"
18
19static void dccp_write_timer(unsigned long data);
20static void dccp_keepalive_timer(unsigned long data);
21static void dccp_delack_timer(unsigned long data);
22
23void dccp_init_xmit_timers(struct sock *sk)
24{
25 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
26 &dccp_keepalive_timer);
27}
28
29static void dccp_write_err(struct sock *sk)
30{
31 sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
32 sk->sk_error_report(sk);
33
34 dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
35 dccp_done(sk);
36 DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
37}
38
39/* A write timeout has occurred. Process the after effects. */
40static int dccp_write_timeout(struct sock *sk)
41{
42 const struct inet_connection_sock *icsk = inet_csk(sk);
43 int retry_until;
44
45 if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
46 if (icsk->icsk_retransmits != 0)
47 dst_negative_advice(&sk->sk_dst_cache);
48 retry_until = icsk->icsk_syn_retries ? :
49 /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
50 } else {
51 if (icsk->icsk_retransmits >=
52 /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
53 /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
54 black hole detection. :-(
55
56 It is place to make it. It is not made. I do not want
57 to make it. It is disguisting. It does not work in any
58 case. Let me to cite the same draft, which requires for
59 us to implement this:
60
61 "The one security concern raised by this memo is that ICMP black holes
62 are often caused by over-zealous security administrators who block
63 all ICMP messages. It is vitally important that those who design and
64 deploy security systems understand the impact of strict filtering on
65 upper-layer protocols. The safest web site in the world is worthless
66 if most TCP implementations cannot transfer data from it. It would
67 be far nicer to have all of the black holes fixed rather than fixing
68 all of the TCP implementations."
69
70 Golden words :-).
71 */
72
73 dst_negative_advice(&sk->sk_dst_cache);
74 }
75
76 retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
77 /*
78 * FIXME: see tcp_write_timout and tcp_out_of_resources
79 */
80 }
81
82 if (icsk->icsk_retransmits >= retry_until) {
83 /* Has it gone just too far? */
84 dccp_write_err(sk);
85 return 1;
86 }
87 return 0;
88}
89
90/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
91static void dccp_delack_timer(unsigned long data)
92{
93 struct sock *sk = (struct sock *)data;
94 struct inet_connection_sock *icsk = inet_csk(sk);
95
96 bh_lock_sock(sk);
97 if (sock_owned_by_user(sk)) {
98 /* Try again later. */
99 icsk->icsk_ack.blocked = 1;
100 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
101 sk_reset_timer(sk, &icsk->icsk_delack_timer,
102 jiffies + TCP_DELACK_MIN);
103 goto out;
104 }
105
106 if (sk->sk_state == DCCP_CLOSED ||
107 !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
108 goto out;
109 if (time_after(icsk->icsk_ack.timeout, jiffies)) {
110 sk_reset_timer(sk, &icsk->icsk_delack_timer,
111 icsk->icsk_ack.timeout);
112 goto out;
113 }
114
115 icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
116
117 if (inet_csk_ack_scheduled(sk)) {
118 if (!icsk->icsk_ack.pingpong) {
119 /* Delayed ACK missed: inflate ATO. */
120 icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
121 icsk->icsk_rto);
122 } else {
123 /* Delayed ACK missed: leave pingpong mode and
124 * deflate ATO.
125 */
126 icsk->icsk_ack.pingpong = 0;
127 icsk->icsk_ack.ato = TCP_ATO_MIN;
128 }
129 dccp_send_ack(sk);
130 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
131 }
132out:
133 bh_unlock_sock(sk);
134 sock_put(sk);
135}
136
137/*
138 * The DCCP retransmit timer.
139 */
140static void dccp_retransmit_timer(struct sock *sk)
141{
142 struct inet_connection_sock *icsk = inet_csk(sk);
143
144 /*
145 * sk->sk_send_head has to have one skb with
146 * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
147 * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
148 * (PARTOPEN timer), etc).
149 */
150 BUG_TRAP(sk->sk_send_head != NULL);
151
152 /*
153 * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
154 * sent, no need to retransmit, this sock is dead.
155 */
156 if (dccp_write_timeout(sk))
157 goto out;
158
159 /*
160 * We want to know the number of packets retransmitted, not the
161 * total number of retransmissions of clones of original packets.
162 */
163 if (icsk->icsk_retransmits == 0)
164 DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
165
166 if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
167 /*
168 * Retransmission failed because of local congestion,
169 * do not backoff.
170 */
171 if (icsk->icsk_retransmits == 0)
172 icsk->icsk_retransmits = 1;
173 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
174 min(icsk->icsk_rto,
175 TCP_RESOURCE_PROBE_INTERVAL),
176 DCCP_RTO_MAX);
177 goto out;
178 }
179
180 icsk->icsk_backoff++;
181 icsk->icsk_retransmits++;
182
183 icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
184 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
185 DCCP_RTO_MAX);
186 if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
187 __sk_dst_reset(sk);
188out:;
189}
190
191static void dccp_write_timer(unsigned long data)
192{
193 struct sock *sk = (struct sock *)data;
194 struct inet_connection_sock *icsk = inet_csk(sk);
195 int event = 0;
196
197 bh_lock_sock(sk);
198 if (sock_owned_by_user(sk)) {
199 /* Try again later */
200 sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
201 jiffies + (HZ / 20));
202 goto out;
203 }
204
205 if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
206 goto out;
207
208 if (time_after(icsk->icsk_timeout, jiffies)) {
209 sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
210 icsk->icsk_timeout);
211 goto out;
212 }
213
214 event = icsk->icsk_pending;
215 icsk->icsk_pending = 0;
216
217 switch (event) {
218 case ICSK_TIME_RETRANS:
219 dccp_retransmit_timer(sk);
220 break;
221 }
222out:
223 bh_unlock_sock(sk);
224 sock_put(sk);
225}
226
227/*
228 * Timer for listening sockets
229 */
230static void dccp_response_timer(struct sock *sk)
231{
232 inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
233 DCCP_RTO_MAX);
234}
235
236static void dccp_keepalive_timer(unsigned long data)
237{
238 struct sock *sk = (struct sock *)data;
239
240 /* Only process if socket is not in use. */
241 bh_lock_sock(sk);
242 if (sock_owned_by_user(sk)) {
243 /* Try again later. */
244 inet_csk_reset_keepalive_timer(sk, HZ / 20);
245 goto out;
246 }
247
248 if (sk->sk_state == DCCP_LISTEN) {
249 dccp_response_timer(sk);
250 goto out;
251 }
252out:
253 bh_unlock_sock(sk);
254 sock_put(sk);
255}