aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_vegas.c31
-rw-r--r--net/ipv4/tcp_vegas.h24
-rw-r--r--net/ipv4/tcp_yeah.c53
-rw-r--r--net/ipv4/tcp_yeah.h131
4 files changed, 61 insertions, 178 deletions
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index f4104eeb5f26..0f0ee7f732c3 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
38 38
39#include <net/tcp.h> 39#include <net/tcp.h>
40 40
41#include "tcp_vegas.h"
42
41/* Default values of the Vegas variables, in fixed-point representation 43/* Default values of the Vegas variables, in fixed-point representation
42 * with V_PARAM_SHIFT bits to the right of the binary point. 44 * with V_PARAM_SHIFT bits to the right of the binary point.
43 */ 45 */
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
54MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); 56MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
55 57
56 58
57/* Vegas variables */
58struct vegas {
59 u32 beg_snd_nxt; /* right edge during last RTT */
60 u32 beg_snd_una; /* left edge during last RTT */
61 u32 beg_snd_cwnd; /* saves the size of the cwnd */
62 u8 doing_vegas_now;/* if true, do vegas for this RTT */
63 u16 cntRTT; /* # of RTTs measured within last RTT */
64 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
65 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
66};
67
68/* There are several situations when we must "re-start" Vegas: 59/* There are several situations when we must "re-start" Vegas:
69 * 60 *
70 * o when a connection is established 61 * o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
81 * Instead we must wait until the completion of an RTT during 72 * Instead we must wait until the completion of an RTT during
82 * which we actually receive ACKs. 73 * which we actually receive ACKs.
83 */ 74 */
84static inline void vegas_enable(struct sock *sk) 75static void vegas_enable(struct sock *sk)
85{ 76{
86 const struct tcp_sock *tp = tcp_sk(sk); 77 const struct tcp_sock *tp = tcp_sk(sk);
87 struct vegas *vegas = inet_csk_ca(sk); 78 struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
104 vegas->doing_vegas_now = 0; 95 vegas->doing_vegas_now = 0;
105} 96}
106 97
107static void tcp_vegas_init(struct sock *sk) 98void tcp_vegas_init(struct sock *sk)
108{ 99{
109 struct vegas *vegas = inet_csk_ca(sk); 100 struct vegas *vegas = inet_csk_ca(sk);
110 101
111 vegas->baseRTT = 0x7fffffff; 102 vegas->baseRTT = 0x7fffffff;
112 vegas_enable(sk); 103 vegas_enable(sk);
113} 104}
105EXPORT_SYMBOL_GPL(tcp_vegas_init);
114 106
115/* Do RTT sampling needed for Vegas. 107/* Do RTT sampling needed for Vegas.
116 * Basically we: 108 * Basically we:
@@ -120,7 +112,7 @@ static void tcp_vegas_init(struct sock *sk)
120 * o min-filter RTT samples from a much longer window (forever for now) 112 * o min-filter RTT samples from a much longer window (forever for now)
121 * to find the propagation delay (baseRTT) 113 * to find the propagation delay (baseRTT)
122 */ 114 */
123static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) 115void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
124{ 116{
125 struct vegas *vegas = inet_csk_ca(sk); 117 struct vegas *vegas = inet_csk_ca(sk);
126 u32 vrtt; 118 u32 vrtt;
@@ -138,8 +130,9 @@ static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
138 vegas->minRTT = min(vegas->minRTT, vrtt); 130 vegas->minRTT = min(vegas->minRTT, vrtt);
139 vegas->cntRTT++; 131 vegas->cntRTT++;
140} 132}
133EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
141 134
142static void tcp_vegas_state(struct sock *sk, u8 ca_state) 135void tcp_vegas_state(struct sock *sk, u8 ca_state)
143{ 136{
144 137
145 if (ca_state == TCP_CA_Open) 138 if (ca_state == TCP_CA_Open)
@@ -147,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
147 else 140 else
148 vegas_disable(sk); 141 vegas_disable(sk);
149} 142}
143EXPORT_SYMBOL_GPL(tcp_vegas_state);
150 144
151/* 145/*
152 * If the connection is idle and we are restarting, 146 * If the connection is idle and we are restarting,
@@ -157,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
157 * packets, _then_ we can make Vegas calculations 151 * packets, _then_ we can make Vegas calculations
158 * again. 152 * again.
159 */ 153 */
160static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) 154void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
161{ 155{
162 if (event == CA_EVENT_CWND_RESTART || 156 if (event == CA_EVENT_CWND_RESTART ||
163 event == CA_EVENT_TX_START) 157 event == CA_EVENT_TX_START)
164 tcp_vegas_init(sk); 158 tcp_vegas_init(sk);
165} 159}
160EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
166 161
167static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, 162static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
168 u32 seq_rtt, u32 in_flight, int flag) 163 u32 seq_rtt, u32 in_flight, int flag)
@@ -339,8 +334,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
339} 334}
340 335
341/* Extract info for Tcp socket info provided via netlink. */ 336/* Extract info for Tcp socket info provided via netlink. */
342static void tcp_vegas_get_info(struct sock *sk, u32 ext, 337void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
343 struct sk_buff *skb)
344{ 338{
345 const struct vegas *ca = inet_csk_ca(sk); 339 const struct vegas *ca = inet_csk_ca(sk);
346 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 340 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
@@ -354,6 +348,7 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
354 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); 348 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
355 } 349 }
356} 350}
351EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
357 352
358static struct tcp_congestion_ops tcp_vegas = { 353static struct tcp_congestion_ops tcp_vegas = {
359 .flags = TCP_CONG_RTT_STAMP, 354 .flags = TCP_CONG_RTT_STAMP,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 000000000000..502fa8183634
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
1/*
2 * TCP Vegas congestion control interface
3 */
4#ifndef __TCP_VEGAS_H
5#define __TCP_VEGAS_H 1
6
7/* Vegas variables */
8struct vegas {
9 u32 beg_snd_nxt; /* right edge during last RTT */
10 u32 beg_snd_una; /* left edge during last RTT */
11 u32 beg_snd_cwnd; /* saves the size of the cwnd */
12 u8 doing_vegas_now;/* if true, do vegas for this RTT */
13 u16 cntRTT; /* # of RTTs measured within last RTT */
14 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
15 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
16};
17
18extern void tcp_vegas_init(struct sock *sk);
19extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
20extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
21extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
22extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
23
24#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 81ef02c1649a..545ed237ab53 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -6,13 +6,14 @@
6 * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf 6 * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
7 * 7 *
8 */ 8 */
9#include <linux/mm.h>
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/inet_diag.h>
9 13
10#include "tcp_yeah.h" 14#include <net/tcp.h>
11 15
12/* Default values of the Vegas variables, in fixed-point representation 16#include "tcp_vegas.h"
13 * with V_PARAM_SHIFT bits to the right of the binary point.
14 */
15#define V_PARAM_SHIFT 1
16 17
17#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck 18#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck
18#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt 19#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt
@@ -26,14 +27,7 @@
26 27
27/* YeAH variables */ 28/* YeAH variables */
28struct yeah { 29struct yeah {
29 /* Vegas */ 30 struct vegas vegas; /* must be first */
30 u32 beg_snd_nxt; /* right edge during last RTT */
31 u32 beg_snd_una; /* left edge during last RTT */
32 u32 beg_snd_cwnd; /* saves the size of the cwnd */
33 u8 doing_vegas_now;/* if true, do vegas for this RTT */
34 u16 cntRTT; /* # of RTTs measured within last RTT */
35 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
36 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
37 31
38 /* YeAH */ 32 /* YeAH */
39 u32 lastQ; 33 u32 lastQ;
@@ -84,9 +78,10 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
84 if (!tcp_is_cwnd_limited(sk, in_flight)) 78 if (!tcp_is_cwnd_limited(sk, in_flight))
85 return; 79 return;
86 80
87 if (tp->snd_cwnd <= tp->snd_ssthresh) { 81 if (tp->snd_cwnd <= tp->snd_ssthresh)
88 tcp_slow_start(tp); 82 tcp_slow_start(tp);
89 } else if (!yeah->doing_reno_now) { 83
84 else if (!yeah->doing_reno_now) {
90 /* Scalable */ 85 /* Scalable */
91 86
92 tp->snd_cwnd_cnt+=yeah->pkts_acked; 87 tp->snd_cwnd_cnt+=yeah->pkts_acked;
@@ -110,19 +105,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
110 } 105 }
111 } 106 }
112 107
113 /* The key players are v_beg_snd_una and v_beg_snd_nxt. 108 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
114 * 109 *
115 * These are so named because they represent the approximate values 110 * These are so named because they represent the approximate values
116 * of snd_una and snd_nxt at the beginning of the current RTT. More 111 * of snd_una and snd_nxt at the beginning of the current RTT. More
117 * precisely, they represent the amount of data sent during the RTT. 112 * precisely, they represent the amount of data sent during the RTT.
118 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, 113 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
119 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding 114 * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
120 * bytes of data have been ACKed during the course of the RTT, giving 115 * bytes of data have been ACKed during the course of the RTT, giving
121 * an "actual" rate of: 116 * an "actual" rate of:
122 * 117 *
123 * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) 118 * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
124 * 119 *
125 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, 120 * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
126 * because delayed ACKs can cover more than one segment, so they 121 * because delayed ACKs can cover more than one segment, so they
127 * don't line up yeahly with the boundaries of RTTs. 122 * don't line up yeahly with the boundaries of RTTs.
128 * 123 *
@@ -132,7 +127,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
132 * So we keep track of our cwnd separately, in v_beg_snd_cwnd. 127 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
133 */ 128 */
134 129
135 if (after(ack, yeah->beg_snd_nxt)) { 130 if (after(ack, yeah->vegas.beg_snd_nxt)) {
136 131
137 /* We do the Vegas calculations only if we got enough RTT 132 /* We do the Vegas calculations only if we got enough RTT
138 * samples that we can be reasonably sure that we got 133 * samples that we can be reasonably sure that we got
@@ -143,7 +138,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
143 * If we have 3 samples, we should be OK. 138 * If we have 3 samples, we should be OK.
144 */ 139 */
145 140
146 if (yeah->cntRTT > 2) { 141 if (yeah->vegas.cntRTT > 2) {
147 u32 rtt, queue; 142 u32 rtt, queue;
148 u64 bw; 143 u64 bw;
149 144
@@ -158,18 +153,18 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
158 * of delayed ACKs, at the cost of noticing congestion 153 * of delayed ACKs, at the cost of noticing congestion
159 * a bit later. 154 * a bit later.
160 */ 155 */
161 rtt = yeah->minRTT; 156 rtt = yeah->vegas.minRTT;
162 157
163 /* Compute excess number of packets above bandwidth 158 /* Compute excess number of packets above bandwidth
164 * Avoid doing full 64 bit divide. 159 * Avoid doing full 64 bit divide.
165 */ 160 */
166 bw = tp->snd_cwnd; 161 bw = tp->snd_cwnd;
167 bw *= rtt - yeah->baseRTT; 162 bw *= rtt - yeah->vegas.baseRTT;
168 do_div(bw, rtt); 163 do_div(bw, rtt);
169 queue = bw; 164 queue = bw;
170 165
171 if (queue > TCP_YEAH_ALPHA || 166 if (queue > TCP_YEAH_ALPHA ||
172 rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) { 167 rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
173 if (queue > TCP_YEAH_ALPHA 168 if (queue > TCP_YEAH_ALPHA
174 && tp->snd_cwnd > yeah->reno_count) { 169 && tp->snd_cwnd > yeah->reno_count) {
175 u32 reduction = min(queue / TCP_YEAH_GAMMA , 170 u32 reduction = min(queue / TCP_YEAH_GAMMA ,
@@ -208,13 +203,13 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
208 /* Save the extent of the current window so we can use this 203 /* Save the extent of the current window so we can use this
209 * at the end of the next RTT. 204 * at the end of the next RTT.
210 */ 205 */
211 yeah->beg_snd_una = yeah->beg_snd_nxt; 206 yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
212 yeah->beg_snd_nxt = tp->snd_nxt; 207 yeah->vegas.beg_snd_nxt = tp->snd_nxt;
213 yeah->beg_snd_cwnd = tp->snd_cwnd; 208 yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
214 209
215 /* Wipe the slate clean for the next RTT. */ 210 /* Wipe the slate clean for the next RTT. */
216 yeah->cntRTT = 0; 211 yeah->vegas.cntRTT = 0;
217 yeah->minRTT = 0x7fffffff; 212 yeah->vegas.minRTT = 0x7fffffff;
218 } 213 }
219} 214}
220 215
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index 33ad5385c188..ed3b7198f23c 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -5,134 +5,3 @@
5#include <asm/div64.h> 5#include <asm/div64.h>
6 6
7#include <net/tcp.h> 7#include <net/tcp.h>
8
9/* Vegas variables */
10struct vegas {
11 u32 beg_snd_nxt; /* right edge during last RTT */
12 u32 beg_snd_una; /* left edge during last RTT */
13 u32 beg_snd_cwnd; /* saves the size of the cwnd */
14 u8 doing_vegas_now;/* if true, do vegas for this RTT */
15 u16 cntRTT; /* # of RTTs measured within last RTT */
16 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
17 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
18};
19
20/* There are several situations when we must "re-start" Vegas:
21 *
22 * o when a connection is established
23 * o after an RTO
24 * o after fast recovery
25 * o when we send a packet and there is no outstanding
26 * unacknowledged data (restarting an idle connection)
27 *
28 * In these circumstances we cannot do a Vegas calculation at the
29 * end of the first RTT, because any calculation we do is using
30 * stale info -- both the saved cwnd and congestion feedback are
31 * stale.
32 *
33 * Instead we must wait until the completion of an RTT during
34 * which we actually receive ACKs.
35 */
36static inline void vegas_enable(struct sock *sk)
37{
38 const struct tcp_sock *tp = tcp_sk(sk);
39 struct vegas *vegas = inet_csk_ca(sk);
40
41 /* Begin taking Vegas samples next time we send something. */
42 vegas->doing_vegas_now = 1;
43
44 /* Set the beginning of the next send window. */
45 vegas->beg_snd_nxt = tp->snd_nxt;
46
47 vegas->cntRTT = 0;
48 vegas->minRTT = 0x7fffffff;
49}
50
51/* Stop taking Vegas samples for now. */
52static inline void vegas_disable(struct sock *sk)
53{
54 struct vegas *vegas = inet_csk_ca(sk);
55
56 vegas->doing_vegas_now = 0;
57}
58
59static void tcp_vegas_init(struct sock *sk)
60{
61 struct vegas *vegas = inet_csk_ca(sk);
62
63 vegas->baseRTT = 0x7fffffff;
64 vegas_enable(sk);
65}
66
67static void tcp_vegas_state(struct sock *sk, u8 ca_state)
68{
69
70 if (ca_state == TCP_CA_Open)
71 vegas_enable(sk);
72 else
73 vegas_disable(sk);
74}
75
76/* Do RTT sampling needed for Vegas.
77 * Basically we:
78 * o min-filter RTT samples from within an RTT to get the current
79 * propagation delay + queuing delay (we are min-filtering to try to
80 * avoid the effects of delayed ACKs)
81 * o min-filter RTT samples from a much longer window (forever for now)
82 * to find the propagation delay (baseRTT)
83 */
84static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
85{
86 struct vegas *vegas = inet_csk_ca(sk);
87 u32 vrtt;
88
89 /* Never allow zero rtt or baseRTT */
90 vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
91
92 /* Filter to find propagation delay: */
93 if (vrtt < vegas->baseRTT)
94 vegas->baseRTT = vrtt;
95
96 /* Find the min RTT during the last RTT to find
97 * the current prop. delay + queuing delay:
98 */
99 vegas->minRTT = min(vegas->minRTT, vrtt);
100 vegas->cntRTT++;
101}
102
103/*
104 * If the connection is idle and we are restarting,
105 * then we don't want to do any Vegas calculations
106 * until we get fresh RTT samples. So when we
107 * restart, we reset our Vegas state to a clean
108 * slate. After we get acks for this flight of
109 * packets, _then_ we can make Vegas calculations
110 * again.
111 */
112static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
113{
114 if (event == CA_EVENT_CWND_RESTART ||
115 event == CA_EVENT_TX_START)
116 tcp_vegas_init(sk);
117}
118
119/* Extract info for Tcp socket info provided via netlink. */
120static void tcp_vegas_get_info(struct sock *sk, u32 ext,
121 struct sk_buff *skb)
122{
123 const struct vegas *ca = inet_csk_ca(sk);
124 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
125 struct tcpvegas_info *info;
126
127 info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
128 sizeof(*info)));
129
130 info->tcpv_enabled = ca->doing_vegas_now;
131 info->tcpv_rttcnt = ca->cntRTT;
132 info->tcpv_rtt = ca->baseRTT;
133 info->tcpv_minrtt = ca->minRTT;
134 rtattr_failure: ;
135 }
136}
137
138