aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-03-15 18:15:17 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-15 18:15:17 -0400
commitc337ffb68e1e71bad069b14d2246fa1e0c31699c (patch)
tree7861a59b196adfd63758cc0921e4fb56030fbaf3 /net
parent30df754dedebf27ef90452944a723ba058d23396 (diff)
parent84c0c6933cb0303fa006992a6659c2b46de4eb17 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_stp.c4
-rw-r--r--net/ipv4/tcp_cubic.c45
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/unix/af_unix.c2
4 files changed, 38 insertions, 15 deletions
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index a5badd0f8226..7370d14f634d 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -375,12 +375,12 @@ static void br_make_forwarding(struct net_bridge_port *p)
375 if (p->state != BR_STATE_BLOCKING) 375 if (p->state != BR_STATE_BLOCKING)
376 return; 376 return;
377 377
378 if (br->forward_delay == 0) { 378 if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) {
379 p->state = BR_STATE_FORWARDING; 379 p->state = BR_STATE_FORWARDING;
380 br_topology_change_detection(br); 380 br_topology_change_detection(br);
381 del_timer(&p->forward_delay_timer); 381 del_timer(&p->forward_delay_timer);
382 } 382 }
383 else if (p->br->stp_enabled == BR_KERNEL_STP) 383 else if (br->stp_enabled == BR_KERNEL_STP)
384 p->state = BR_STATE_LISTENING; 384 p->state = BR_STATE_LISTENING;
385 else 385 else
386 p->state = BR_STATE_LEARNING; 386 p->state = BR_STATE_LEARNING;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 62f775cb0863..34340c9c95fa 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -39,7 +39,7 @@
39 39
40/* Number of delay samples for detecting the increase of delay */ 40/* Number of delay samples for detecting the increase of delay */
41#define HYSTART_MIN_SAMPLES 8 41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3) 42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3) 43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) 44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45 45
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1;
52static int hystart __read_mostly = 1; 52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; 53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16; 54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
55 56
56static u32 cube_rtt_scale __read_mostly; 57static u32 cube_rtt_scale __read_mostly;
57static u32 beta_scale __read_mostly; 58static u32 beta_scale __read_mostly;
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay"); 76 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644); 77module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); 78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
78 81
79/* BIC TCP Parameters */ 82/* BIC TCP Parameters */
80struct bictcp { 83struct bictcp {
@@ -85,7 +88,7 @@ struct bictcp {
85 u32 last_time; /* time when updated last_cwnd */ 88 u32 last_time; /* time when updated last_cwnd */
86 u32 bic_origin_point;/* origin point of bic function */ 89 u32 bic_origin_point;/* origin point of bic function */
87 u32 bic_K; /* time to origin point from the beginning of the current epoch */ 90 u32 bic_K; /* time to origin point from the beginning of the current epoch */
88 u32 delay_min; /* min delay */ 91 u32 delay_min; /* min delay (msec << 3) */
89 u32 epoch_start; /* beginning of an epoch */ 92 u32 epoch_start; /* beginning of an epoch */
90 u32 ack_cnt; /* number of acks */ 93 u32 ack_cnt; /* number of acks */
91 u32 tcp_cwnd; /* estimated tcp cwnd */ 94 u32 tcp_cwnd; /* estimated tcp cwnd */
@@ -95,7 +98,7 @@ struct bictcp {
95 u8 found; /* the exit point is found? */ 98 u8 found; /* the exit point is found? */
96 u32 round_start; /* beginning of each round */ 99 u32 round_start; /* beginning of each round */
97 u32 end_seq; /* end_seq of the round */ 100 u32 end_seq; /* end_seq of the round */
98 u32 last_jiffies; /* last time when the ACK spacing is close */ 101 u32 last_ack; /* last time when the ACK spacing is close */
99 u32 curr_rtt; /* the minimum rtt of current round */ 102 u32 curr_rtt; /* the minimum rtt of current round */
100}; 103};
101 104
@@ -116,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca)
116 ca->found = 0; 119 ca->found = 0;
117} 120}
118 121
122static inline u32 bictcp_clock(void)
123{
124#if HZ < 1000
125 return ktime_to_ms(ktime_get_real());
126#else
127 return jiffies_to_msecs(jiffies);
128#endif
129}
130
119static inline void bictcp_hystart_reset(struct sock *sk) 131static inline void bictcp_hystart_reset(struct sock *sk)
120{ 132{
121 struct tcp_sock *tp = tcp_sk(sk); 133 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk); 134 struct bictcp *ca = inet_csk_ca(sk);
123 135
124 ca->round_start = ca->last_jiffies = jiffies; 136 ca->round_start = ca->last_ack = bictcp_clock();
125 ca->end_seq = tp->snd_nxt; 137 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0; 138 ca->curr_rtt = 0;
127 ca->sample_cnt = 0; 139 ca->sample_cnt = 0;
@@ -236,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
236 */ 248 */
237 249
238 /* change the unit from HZ to bictcp_HZ */ 250 /* change the unit from HZ to bictcp_HZ */
239 t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) 251 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
240 << BICTCP_HZ) / HZ; 252 - ca->epoch_start) << BICTCP_HZ) / HZ;
241 253
242 if (t < ca->bic_K) /* t - K */ 254 if (t < ca->bic_K) /* t - K */
243 offs = ca->bic_K - t; 255 offs = ca->bic_K - t;
@@ -258,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
258 ca->cnt = 100 * cwnd; /* very small increment*/ 270 ca->cnt = 100 * cwnd; /* very small increment*/
259 } 271 }
260 272
273 /*
274 * The initial growth of cubic function may be too conservative
275 * when the available bandwidth is still unknown.
276 */
277 if (ca->loss_cwnd == 0 && ca->cnt > 20)
278 ca->cnt = 20; /* increase cwnd 5% per RTT */
279
261 /* TCP Friendly */ 280 /* TCP Friendly */
262 if (tcp_friendliness) { 281 if (tcp_friendliness) {
263 u32 scale = beta_scale; 282 u32 scale = beta_scale;
@@ -339,12 +358,12 @@ static void hystart_update(struct sock *sk, u32 delay)
339 struct bictcp *ca = inet_csk_ca(sk); 358 struct bictcp *ca = inet_csk_ca(sk);
340 359
341 if (!(ca->found & hystart_detect)) { 360 if (!(ca->found & hystart_detect)) {
342 u32 curr_jiffies = jiffies; 361 u32 now = bictcp_clock();
343 362
344 /* first detection parameter - ack-train detection */ 363 /* first detection parameter - ack-train detection */
345 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { 364 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
346 ca->last_jiffies = curr_jiffies; 365 ca->last_ack = now;
347 if (curr_jiffies - ca->round_start >= ca->delay_min>>4) 366 if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
348 ca->found |= HYSTART_ACK_TRAIN; 367 ca->found |= HYSTART_ACK_TRAIN;
349 } 368 }
350 369
@@ -391,7 +410,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
391 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) 410 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
392 return; 411 return;
393 412
394 delay = usecs_to_jiffies(rtt_us) << 3; 413 delay = (rtt_us << 3) / USEC_PER_MSEC;
395 if (delay == 0) 414 if (delay == 0)
396 delay = 1; 415 delay = 1;
397 416
@@ -447,6 +466,10 @@ static int __init cubictcp_register(void)
447 /* divide by bic_scale and by constant Srtt (100ms) */ 466 /* divide by bic_scale and by constant Srtt (100ms) */
448 do_div(cube_factor, bic_scale * 10); 467 do_div(cube_factor, bic_scale * 10);
449 468
469 /* hystart needs ms clock resolution */
470 if (hystart && HZ < 1000)
471 cubictcp.flags |= TCP_CONG_RTT_STAMP;
472
450 return tcp_register_congestion_control(&cubictcp); 473 return tcp_register_congestion_control(&cubictcp);
451} 474}
452 475
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 08ea735b9d72..da782e7ab16d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3350,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3350 net_invalid_timestamp())) 3350 net_invalid_timestamp()))
3351 rtt_us = ktime_us_delta(ktime_get_real(), 3351 rtt_us = ktime_us_delta(ktime_get_real(),
3352 last_ackt); 3352 last_ackt);
3353 else if (ca_seq_rtt > 0) 3353 else if (ca_seq_rtt >= 0)
3354 rtt_us = jiffies_to_usecs(ca_seq_rtt); 3354 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3355 } 3355 }
3356 3356
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index de870184e457..ef70615d41a1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1124,7 +1124,7 @@ restart:
1124 1124
1125 /* Latch our state. 1125 /* Latch our state.
1126 1126
1127 It is tricky place. We need to grab write lock and cannot 1127 It is tricky place. We need to grab our state lock and cannot
1128 drop lock on peer. It is dangerous because deadlock is 1128 drop lock on peer. It is dangerous because deadlock is
1129 possible. Connect to self case and simultaneous 1129 possible. Connect to self case and simultaneous
1130 attempt to connect are eliminated by checking socket 1130 attempt to connect are eliminated by checking socket