diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/netfilter/nf_nat_rule.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 22 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 82 |
3 files changed, 23 insertions, 83 deletions
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a685109..8d489e746b21 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -61,7 +61,7 @@ static struct | |||
61 | static struct xt_table nat_table = { | 61 | static struct xt_table nat_table = { |
62 | .name = "nat", | 62 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 63 | .valid_hooks = NAT_VALID_HOOKS, |
64 | .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), | 64 | .lock = __RW_LOCK_UNLOCKED(nat_table.lock), |
65 | .me = THIS_MODULE, | 65 | .me = THIS_MODULE, |
66 | .af = AF_INET, | 66 | .af = AF_INET, |
67 | }; | 67 | }; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 85b07eba1879..fe3b4bdfd251 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -722,8 +722,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
722 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, | 722 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, |
723 | unsigned int mss_now) | 723 | unsigned int mss_now) |
724 | { | 724 | { |
725 | if (skb->len <= mss_now || !sk_can_gso(sk) || | 725 | if (skb->len <= mss_now || !sk_can_gso(sk)) { |
726 | tcp_urg_mode(tcp_sk(sk))) { | ||
727 | /* Avoid the costly divide in the normal | 726 | /* Avoid the costly divide in the normal |
728 | * non-TSO case. | 727 | * non-TSO case. |
729 | */ | 728 | */ |
@@ -1029,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
1029 | 1028 | ||
1030 | /* Compute the current effective MSS, taking SACKs and IP options, | 1029 | /* Compute the current effective MSS, taking SACKs and IP options, |
1031 | * and even PMTU discovery events into account. | 1030 | * and even PMTU discovery events into account. |
1032 | * | ||
1033 | * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up | ||
1034 | * cannot be large. However, taking into account rare use of URG, this | ||
1035 | * is not a big flaw. | ||
1036 | */ | 1031 | */ |
1037 | unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | 1032 | unsigned int tcp_current_mss(struct sock *sk, int large_allowed) |
1038 | { | 1033 | { |
@@ -1047,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
1047 | 1042 | ||
1048 | mss_now = tp->mss_cache; | 1043 | mss_now = tp->mss_cache; |
1049 | 1044 | ||
1050 | if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) | 1045 | if (large_allowed && sk_can_gso(sk)) |
1051 | doing_tso = 1; | 1046 | doing_tso = 1; |
1052 | 1047 | ||
1053 | if (dst) { | 1048 | if (dst) { |
@@ -1164,9 +1159,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | |||
1164 | { | 1159 | { |
1165 | int tso_segs = tcp_skb_pcount(skb); | 1160 | int tso_segs = tcp_skb_pcount(skb); |
1166 | 1161 | ||
1167 | if (!tso_segs || | 1162 | if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { |
1168 | (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || | ||
1169 | tcp_urg_mode(tcp_sk(sk))))) { | ||
1170 | tcp_set_skb_tso_segs(sk, skb, mss_now); | 1163 | tcp_set_skb_tso_segs(sk, skb, mss_now); |
1171 | tso_segs = tcp_skb_pcount(skb); | 1164 | tso_segs = tcp_skb_pcount(skb); |
1172 | } | 1165 | } |
@@ -1519,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1519 | * send_head. This happens as incoming acks open up the remote | 1512 | * send_head. This happens as incoming acks open up the remote |
1520 | * window for us. | 1513 | * window for us. |
1521 | * | 1514 | * |
1515 | * LARGESEND note: !tcp_urg_mode is overkill, only frames between | ||
1516 | * snd_up-64k-mss .. snd_up cannot be large. However, taking into | ||
1517 | * account rare use of URG, this is not a big flaw. | ||
1518 | * | ||
1522 | * Returns 1, if no segments are in flight and we have queued segments, but | 1519 | * Returns 1, if no segments are in flight and we have queued segments, but |
1523 | * cannot send anything now because of SWS or another problem. | 1520 | * cannot send anything now because of SWS or another problem. |
1524 | */ | 1521 | */ |
@@ -1570,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1570 | } | 1567 | } |
1571 | 1568 | ||
1572 | limit = mss_now; | 1569 | limit = mss_now; |
1573 | if (tso_segs > 1) | 1570 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
1574 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1571 | limit = tcp_mss_split_point(sk, skb, mss_now, |
1575 | cwnd_quota); | 1572 | cwnd_quota); |
1576 | 1573 | ||
@@ -1619,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | |||
1619 | */ | 1616 | */ |
1620 | void tcp_push_one(struct sock *sk, unsigned int mss_now) | 1617 | void tcp_push_one(struct sock *sk, unsigned int mss_now) |
1621 | { | 1618 | { |
1619 | struct tcp_sock *tp = tcp_sk(sk); | ||
1622 | struct sk_buff *skb = tcp_send_head(sk); | 1620 | struct sk_buff *skb = tcp_send_head(sk); |
1623 | unsigned int tso_segs, cwnd_quota; | 1621 | unsigned int tso_segs, cwnd_quota; |
1624 | 1622 | ||
@@ -1633,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1633 | BUG_ON(!tso_segs); | 1631 | BUG_ON(!tso_segs); |
1634 | 1632 | ||
1635 | limit = mss_now; | 1633 | limit = mss_now; |
1636 | if (tso_segs > 1) | 1634 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
1637 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1635 | limit = tcp_mss_split_point(sk, skb, mss_now, |
1638 | cwnd_quota); | 1636 | cwnd_quota); |
1639 | 1637 | ||
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 14504dada116..a453aac91bd3 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -40,18 +40,14 @@ | |||
40 | 40 | ||
41 | #include "tcp_vegas.h" | 41 | #include "tcp_vegas.h" |
42 | 42 | ||
43 | /* Default values of the Vegas variables, in fixed-point representation | 43 | static int alpha = 2; |
44 | * with V_PARAM_SHIFT bits to the right of the binary point. | 44 | static int beta = 4; |
45 | */ | 45 | static int gamma = 1; |
46 | #define V_PARAM_SHIFT 1 | ||
47 | static int alpha = 2<<V_PARAM_SHIFT; | ||
48 | static int beta = 4<<V_PARAM_SHIFT; | ||
49 | static int gamma = 1<<V_PARAM_SHIFT; | ||
50 | 46 | ||
51 | module_param(alpha, int, 0644); | 47 | module_param(alpha, int, 0644); |
52 | MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)"); | 48 | MODULE_PARM_DESC(alpha, "lower bound of packets in network"); |
53 | module_param(beta, int, 0644); | 49 | module_param(beta, int, 0644); |
54 | MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)"); | 50 | MODULE_PARM_DESC(beta, "upper bound of packets in network"); |
55 | module_param(gamma, int, 0644); | 51 | module_param(gamma, int, 0644); |
56 | MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); | 52 | MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); |
57 | 53 | ||
@@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
172 | return; | 168 | return; |
173 | } | 169 | } |
174 | 170 | ||
175 | /* The key players are v_beg_snd_una and v_beg_snd_nxt. | ||
176 | * | ||
177 | * These are so named because they represent the approximate values | ||
178 | * of snd_una and snd_nxt at the beginning of the current RTT. More | ||
179 | * precisely, they represent the amount of data sent during the RTT. | ||
180 | * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, | ||
181 | * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding | ||
182 | * bytes of data have been ACKed during the course of the RTT, giving | ||
183 | * an "actual" rate of: | ||
184 | * | ||
185 | * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) | ||
186 | * | ||
187 | * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, | ||
188 | * because delayed ACKs can cover more than one segment, so they | ||
189 | * don't line up nicely with the boundaries of RTTs. | ||
190 | * | ||
191 | * Another unfortunate fact of life is that delayed ACKs delay the | ||
192 | * advance of the left edge of our send window, so that the number | ||
193 | * of bytes we send in an RTT is often less than our cwnd will allow. | ||
194 | * So we keep track of our cwnd separately, in v_beg_snd_cwnd. | ||
195 | */ | ||
196 | |||
197 | if (after(ack, vegas->beg_snd_nxt)) { | 171 | if (after(ack, vegas->beg_snd_nxt)) { |
198 | /* Do the Vegas once-per-RTT cwnd adjustment. */ | 172 | /* Do the Vegas once-per-RTT cwnd adjustment. */ |
199 | u32 old_wnd, old_snd_cwnd; | ||
200 | |||
201 | |||
202 | /* Here old_wnd is essentially the window of data that was | ||
203 | * sent during the previous RTT, and has all | ||
204 | * been acknowledged in the course of the RTT that ended | ||
205 | * with the ACK we just received. Likewise, old_snd_cwnd | ||
206 | * is the cwnd during the previous RTT. | ||
207 | */ | ||
208 | old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / | ||
209 | tp->mss_cache; | ||
210 | old_snd_cwnd = vegas->beg_snd_cwnd; | ||
211 | 173 | ||
212 | /* Save the extent of the current window so we can use this | 174 | /* Save the extent of the current window so we can use this |
213 | * at the end of the next RTT. | 175 | * at the end of the next RTT. |
214 | */ | 176 | */ |
215 | vegas->beg_snd_una = vegas->beg_snd_nxt; | ||
216 | vegas->beg_snd_nxt = tp->snd_nxt; | 177 | vegas->beg_snd_nxt = tp->snd_nxt; |
217 | vegas->beg_snd_cwnd = tp->snd_cwnd; | ||
218 | 178 | ||
219 | /* We do the Vegas calculations only if we got enough RTT | 179 | /* We do the Vegas calculations only if we got enough RTT |
220 | * samples that we can be reasonably sure that we got | 180 | * samples that we can be reasonably sure that we got |
@@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
252 | * | 212 | * |
253 | * This is: | 213 | * This is: |
254 | * (actual rate in segments) * baseRTT | 214 | * (actual rate in segments) * baseRTT |
255 | * We keep it as a fixed point number with | ||
256 | * V_PARAM_SHIFT bits to the right of the binary point. | ||
257 | */ | 215 | */ |
258 | target_cwnd = ((u64)old_wnd * vegas->baseRTT); | 216 | target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; |
259 | target_cwnd <<= V_PARAM_SHIFT; | ||
260 | do_div(target_cwnd, rtt); | ||
261 | 217 | ||
262 | /* Calculate the difference between the window we had, | 218 | /* Calculate the difference between the window we had, |
263 | * and the window we would like to have. This quantity | 219 | * and the window we would like to have. This quantity |
264 | * is the "Diff" from the Arizona Vegas papers. | 220 | * is the "Diff" from the Arizona Vegas papers. |
265 | * | ||
266 | * Again, this is a fixed point number with | ||
267 | * V_PARAM_SHIFT bits to the right of the binary | ||
268 | * point. | ||
269 | */ | 221 | */ |
270 | diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; | 222 | diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; |
271 | 223 | ||
272 | if (diff > gamma && tp->snd_ssthresh > 2 ) { | 224 | if (diff > gamma && tp->snd_ssthresh > 2 ) { |
273 | /* Going too fast. Time to slow down | 225 | /* Going too fast. Time to slow down |
@@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
282 | * truncation robs us of full link | 234 | * truncation robs us of full link |
283 | * utilization. | 235 | * utilization. |
284 | */ | 236 | */ |
285 | tp->snd_cwnd = min(tp->snd_cwnd, | 237 | tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); |
286 | ((u32)target_cwnd >> | ||
287 | V_PARAM_SHIFT)+1); | ||
288 | 238 | ||
289 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { | 239 | } else if (tp->snd_cwnd <= tp->snd_ssthresh) { |
290 | /* Slow start. */ | 240 | /* Slow start. */ |
291 | tcp_slow_start(tp); | 241 | tcp_slow_start(tp); |
292 | } else { | 242 | } else { |
293 | /* Congestion avoidance. */ | 243 | /* Congestion avoidance. */ |
294 | u32 next_snd_cwnd; | ||
295 | 244 | ||
296 | /* Figure out where we would like cwnd | 245 | /* Figure out where we would like cwnd |
297 | * to be. | 246 | * to be. |
@@ -300,32 +249,25 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
300 | /* The old window was too fast, so | 249 | /* The old window was too fast, so |
301 | * we slow down. | 250 | * we slow down. |
302 | */ | 251 | */ |
303 | next_snd_cwnd = old_snd_cwnd - 1; | 252 | tp->snd_cwnd--; |
304 | } else if (diff < alpha) { | 253 | } else if (diff < alpha) { |
305 | /* We don't have enough extra packets | 254 | /* We don't have enough extra packets |
306 | * in the network, so speed up. | 255 | * in the network, so speed up. |
307 | */ | 256 | */ |
308 | next_snd_cwnd = old_snd_cwnd + 1; | 257 | tp->snd_cwnd++; |
309 | } else { | 258 | } else { |
310 | /* Sending just as fast as we | 259 | /* Sending just as fast as we |
311 | * should be. | 260 | * should be. |
312 | */ | 261 | */ |
313 | next_snd_cwnd = old_snd_cwnd; | ||
314 | } | 262 | } |
315 | |||
316 | /* Adjust cwnd upward or downward, toward the | ||
317 | * desired value. | ||
318 | */ | ||
319 | if (next_snd_cwnd > tp->snd_cwnd) | ||
320 | tp->snd_cwnd++; | ||
321 | else if (next_snd_cwnd < tp->snd_cwnd) | ||
322 | tp->snd_cwnd--; | ||
323 | } | 263 | } |
324 | 264 | ||
325 | if (tp->snd_cwnd < 2) | 265 | if (tp->snd_cwnd < 2) |
326 | tp->snd_cwnd = 2; | 266 | tp->snd_cwnd = 2; |
327 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) | 267 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) |
328 | tp->snd_cwnd = tp->snd_cwnd_clamp; | 268 | tp->snd_cwnd = tp->snd_cwnd_clamp; |
269 | |||
270 | tp->snd_ssthresh = tcp_current_ssthresh(sk); | ||
329 | } | 271 | } |
330 | 272 | ||
331 | /* Wipe the slate clean for the next RTT. */ | 273 | /* Wipe the slate clean for the next RTT. */ |