diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/Kconfig | 15 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_westwood.c | 259 |
3 files changed, 275 insertions, 0 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 712ebacacb62..adbe855d931a 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -452,6 +452,21 @@ config TCP_CONG_BIC | |||
452 | increase provides TCP friendliness. | 452 | increase provides TCP friendliness. |
453 | See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ | 453 | See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ |
454 | 454 | ||
455 | config TCP_CONG_WESTWOOD | ||
456 | tristate "TCP Westwood+" | ||
457 | depends on INET | ||
458 | default m | ||
459 | ---help--- | ||
460 | TCP Westwood+ is a sender-side only modification of the TCP Reno | ||
461 | protocol stack that optimizes the performance of TCP congestion | ||
462 | control. It is based on end-to-end bandwidth estimation to set | ||
463 | congestion window and slow start threshold after a congestion | ||
464 | episode. Using this estimation, TCP Westwood+ adaptively sets a | ||
465 | slow start threshold and a congestion window which takes into | ||
466 | account the bandwidth used at the time congestion is experienced. | ||
467 | TCP Westwood+ significantly increases fairness wrt TCP Reno in | ||
468 | wired networks and throughput over wireless links. | ||
469 | |||
455 | endmenu | 470 | endmenu |
456 | 471 | ||
457 | source "net/ipv4/ipvs/Kconfig" | 472 | source "net/ipv4/ipvs/Kconfig" |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 1d1cac5ac06a..dedfbe62a104 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_IP_VS) += ipvs/ | |||
32 | obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o | 32 | obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o |
33 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o | 33 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o |
34 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | 34 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o |
35 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o | ||
35 | 36 | ||
36 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 37 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
37 | xfrm4_output.o | 38 | xfrm4_output.o |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c new file mode 100644 index 000000000000..ef827242c940 --- /dev/null +++ b/net/ipv4/tcp_westwood.c | |||
@@ -0,0 +1,259 @@ | |||
1 | /* | ||
2 | * TCP Westwood+ | ||
3 | * | ||
4 | * Angelo Dell'Aera: TCP Westwood+ support | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/skbuff.h> | ||
11 | #include <linux/tcp_diag.h> | ||
12 | #include <net/tcp.h> | ||
13 | |||
14 | /* TCP Westwood structure */ | ||
15 | struct westwood { | ||
16 | u32 bw_ns_est; /* first bandwidth estimation..not too smoothed 8) */ | ||
17 | u32 bw_est; /* bandwidth estimate */ | ||
18 | u32 rtt_win_sx; /* here starts a new evaluation... */ | ||
19 | u32 bk; | ||
20 | u32 snd_una; /* used for evaluating the number of acked bytes */ | ||
21 | u32 cumul_ack; | ||
22 | u32 accounted; | ||
23 | u32 rtt; | ||
24 | u32 rtt_min; /* minimum observed RTT */ | ||
25 | }; | ||
26 | |||
27 | |||
28 | /* TCP Westwood functions and constants */ | ||
29 | #define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ | ||
30 | #define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ | ||
31 | |||
32 | /* | ||
33 | * @tcp_westwood_create | ||
34 | * This function initializes fields used in TCP Westwood+, | ||
35 | * it is called after the initial SYN, so the sequence numbers | ||
36 | * are correct but new passive connections we have no | ||
37 | * information about RTTmin at this time so we simply set it to | ||
38 | * TCP_WESTWOOD_INIT_RTT. This value was chosen to be too conservative | ||
39 | * since in this way we're sure it will be updated in a consistent | ||
40 | * way as soon as possible. It will reasonably happen within the first | ||
41 | * RTT period of the connection lifetime. | ||
42 | */ | ||
43 | static void tcp_westwood_init(struct tcp_sock *tp) | ||
44 | { | ||
45 | struct westwood *w = tcp_ca(tp); | ||
46 | |||
47 | w->bk = 0; | ||
48 | w->bw_ns_est = 0; | ||
49 | w->bw_est = 0; | ||
50 | w->accounted = 0; | ||
51 | w->cumul_ack = 0; | ||
52 | w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; | ||
53 | w->rtt_win_sx = tcp_time_stamp; | ||
54 | w->snd_una = tp->snd_una; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * @westwood_do_filter | ||
59 | * Low-pass filter. Implemented using constant coefficients. | ||
60 | */ | ||
61 | static inline u32 westwood_do_filter(u32 a, u32 b) | ||
62 | { | ||
63 | return (((7 * a) + b) >> 3); | ||
64 | } | ||
65 | |||
66 | static inline void westwood_filter(struct westwood *w, u32 delta) | ||
67 | { | ||
68 | w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta); | ||
69 | w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * @westwood_pkts_acked | ||
74 | * Called after processing group of packets. | ||
75 | * but all westwood needs is the last sample of srtt. | ||
76 | */ | ||
77 | static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) | ||
78 | { | ||
79 | struct westwood *w = tcp_ca(tp); | ||
80 | if (cnt > 0) | ||
81 | w->rtt = tp->srtt >> 3; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * @westwood_update_window | ||
86 | * It updates RTT evaluation window if it is the right moment to do | ||
87 | * it. If so it calls filter for evaluating bandwidth. | ||
88 | */ | ||
89 | static void westwood_update_window(struct tcp_sock *tp) | ||
90 | { | ||
91 | struct westwood *w = tcp_ca(tp); | ||
92 | s32 delta = tcp_time_stamp - w->rtt_win_sx; | ||
93 | |||
94 | /* | ||
95 | * See if a RTT-window has passed. | ||
96 | * Be careful since if RTT is less than | ||
97 | * 50ms we don't filter but we continue 'building the sample'. | ||
98 | * This minimum limit was chosen since an estimation on small | ||
99 | * time intervals is better to avoid... | ||
100 | * Obviously on a LAN we reasonably will always have | ||
101 | * right_bound = left_bound + WESTWOOD_RTT_MIN | ||
102 | */ | ||
103 | if (w->rtt && delta > max_t(u32, w->rtt, TCP_WESTWOOD_RTT_MIN)) { | ||
104 | westwood_filter(w, delta); | ||
105 | |||
106 | w->bk = 0; | ||
107 | w->rtt_win_sx = tcp_time_stamp; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * @westwood_fast_bw | ||
113 | * It is called when we are in fast path. In particular it is called when | ||
114 | * header prediction is successful. In such case in fact update is | ||
115 | * straight forward and doesn't need any particular care. | ||
116 | */ | ||
117 | static inline void westwood_fast_bw(struct tcp_sock *tp) | ||
118 | { | ||
119 | struct westwood *w = tcp_ca(tp); | ||
120 | |||
121 | westwood_update_window(tp); | ||
122 | |||
123 | w->bk += tp->snd_una - w->snd_una; | ||
124 | w->snd_una = tp->snd_una; | ||
125 | w->rtt_min = min(w->rtt, w->rtt_min); | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * @westwood_acked_count | ||
130 | * This function evaluates cumul_ack for evaluating bk in case of | ||
131 | * delayed or partial acks. | ||
132 | */ | ||
133 | static inline u32 westwood_acked_count(struct tcp_sock *tp) | ||
134 | { | ||
135 | struct westwood *w = tcp_ca(tp); | ||
136 | |||
137 | w->cumul_ack = tp->snd_una - w->snd_una; | ||
138 | |||
139 | /* If cumul_ack is 0 this is a dupack since it's not moving | ||
140 | * tp->snd_una. | ||
141 | */ | ||
142 | if (!w->cumul_ack) { | ||
143 | w->accounted += tp->mss_cache; | ||
144 | w->cumul_ack = tp->mss_cache; | ||
145 | } | ||
146 | |||
147 | if (w->cumul_ack > tp->mss_cache) { | ||
148 | /* Partial or delayed ack */ | ||
149 | if (w->accounted >= w->cumul_ack) { | ||
150 | w->accounted -= w->cumul_ack; | ||
151 | w->cumul_ack = tp->mss_cache; | ||
152 | } else { | ||
153 | w->cumul_ack -= w->accounted; | ||
154 | w->accounted = 0; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | w->snd_una = tp->snd_una; | ||
159 | |||
160 | return w->cumul_ack; | ||
161 | } | ||
162 | |||
163 | static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) | ||
164 | { | ||
165 | struct westwood *w = tcp_ca(tp); | ||
166 | return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * TCP Westwood | ||
171 | * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it | ||
172 | * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 | ||
173 | * so avoids ever returning 0. | ||
174 | */ | ||
175 | static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) | ||
176 | { | ||
177 | return westwood_bw_rttmin(tp); | ||
178 | } | ||
179 | |||
180 | static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) | ||
181 | { | ||
182 | struct westwood *w = tcp_ca(tp); | ||
183 | |||
184 | switch(event) { | ||
185 | case CA_EVENT_FAST_ACK: | ||
186 | westwood_fast_bw(tp); | ||
187 | break; | ||
188 | |||
189 | case CA_EVENT_COMPLETE_CWR: | ||
190 | tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); | ||
191 | break; | ||
192 | |||
193 | case CA_EVENT_FRTO: | ||
194 | tp->snd_ssthresh = westwood_bw_rttmin(tp); | ||
195 | break; | ||
196 | |||
197 | case CA_EVENT_SLOW_ACK: | ||
198 | westwood_update_window(tp); | ||
199 | w->bk += westwood_acked_count(tp); | ||
200 | w->rtt_min = min(w->rtt, w->rtt_min); | ||
201 | break; | ||
202 | |||
203 | default: | ||
204 | /* don't care */ | ||
205 | break; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | |||
210 | /* Extract info for Tcp socket info provided via netlink. */ | ||
211 | static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, | ||
212 | struct sk_buff *skb) | ||
213 | { | ||
214 | const struct westwood *ca = tcp_ca(tp); | ||
215 | if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { | ||
216 | struct rtattr *rta; | ||
217 | struct tcpvegas_info *info; | ||
218 | |||
219 | rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info)); | ||
220 | info = RTA_DATA(rta); | ||
221 | info->tcpv_enabled = 1; | ||
222 | info->tcpv_rttcnt = 0; | ||
223 | info->tcpv_rtt = jiffies_to_usecs(ca->rtt); | ||
224 | info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); | ||
225 | rtattr_failure: ; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | |||
230 | static struct tcp_congestion_ops tcp_westwood = { | ||
231 | .init = tcp_westwood_init, | ||
232 | .ssthresh = tcp_reno_ssthresh, | ||
233 | .cong_avoid = tcp_reno_cong_avoid, | ||
234 | .min_cwnd = tcp_westwood_cwnd_min, | ||
235 | .cwnd_event = tcp_westwood_event, | ||
236 | .get_info = tcp_westwood_info, | ||
237 | .pkts_acked = tcp_westwood_pkts_acked, | ||
238 | |||
239 | .owner = THIS_MODULE, | ||
240 | .name = "westwood" | ||
241 | }; | ||
242 | |||
243 | static int __init tcp_westwood_register(void) | ||
244 | { | ||
245 | BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); | ||
246 | return tcp_register_congestion_control(&tcp_westwood); | ||
247 | } | ||
248 | |||
249 | static void __exit tcp_westwood_unregister(void) | ||
250 | { | ||
251 | tcp_unregister_congestion_control(&tcp_westwood); | ||
252 | } | ||
253 | |||
254 | module_init(tcp_westwood_register); | ||
255 | module_exit(tcp_westwood_unregister); | ||
256 | |||
257 | MODULE_AUTHOR("Stephen Hemminger, Angelo Dell'Aera"); | ||
258 | MODULE_LICENSE("GPL"); | ||
259 | MODULE_DESCRIPTION("TCP Westwood+"); | ||