aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2012-01-09 02:38:23 -0500
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2012-01-09 02:38:23 -0500
commitda733563be5a9da26fe81d9f007262d00b846e22 (patch)
treedb28291df94a2043af2123911984c5c173da4e6f /net/ipv4/tcp_input.c
parent6ccbcf2cb41131f8d56ef0723bf3f7c1f8486076 (diff)
parentdab78d7924598ea4031663dd10db814e2e324928 (diff)
Merge branch 'next' into for-linus
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c250
1 files changed, 155 insertions, 95 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea0d2183df4b..52b5c2d0ecd0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207} 207}
208 208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) 209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
210{ 210{
211 if (tcp_hdr(skb)->cwr) 211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -217,32 +217,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218} 218}
219 219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) 220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{ 221{
222 if (tp->ecn_flags & TCP_ECN_OK) { 222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) 223 return;
224 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
225 /* Funny extension: if ECT is not set on a segment, 227 /* Funny extension: if ECT is not set on a segment,
226 * it is surely retransmit. It is not in ECN RFC, 228 * and we already seen ECT on a previous segment,
227 * but Linux follows this rule. */ 229 * it is probably a retransmit.
228 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) 230 */
231 if (tp->ecn_flags & TCP_ECN_SEEN)
229 tcp_enter_quickack_mode((struct sock *)tp); 232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
236 /* fallinto */
237 default:
238 tp->ecn_flags |= TCP_ECN_SEEN;
230 } 239 }
231} 240}
232 241
233static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) 242static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
234{ 243{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) 244 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK; 245 tp->ecn_flags &= ~TCP_ECN_OK;
237} 246}
238 247
239static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) 248static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
240{ 249{
241 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) 250 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
242 tp->ecn_flags &= ~TCP_ECN_OK; 251 tp->ecn_flags &= ~TCP_ECN_OK;
243} 252}
244 253
245static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) 254static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
246{ 255{
247 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) 256 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
248 return 1; 257 return 1;
@@ -256,14 +265,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
256 265
257static void tcp_fixup_sndbuf(struct sock *sk) 266static void tcp_fixup_sndbuf(struct sock *sk)
258{ 267{
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 268 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
260 sizeof(struct sk_buff);
261 269
262 if (sk->sk_sndbuf < 3 * sndmem) { 270 sndmem *= TCP_INIT_CWND;
263 sk->sk_sndbuf = 3 * sndmem; 271 if (sk->sk_sndbuf < sndmem)
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) 272 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
267} 273}
268 274
269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 275/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -309,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
309 return 0; 315 return 0;
310} 316}
311 317
312static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) 318static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
313{ 319{
314 struct tcp_sock *tp = tcp_sk(sk); 320 struct tcp_sock *tp = tcp_sk(sk);
315 321
@@ -339,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
339 345
340static void tcp_fixup_rcvbuf(struct sock *sk) 346static void tcp_fixup_rcvbuf(struct sock *sk)
341{ 347{
342 struct tcp_sock *tp = tcp_sk(sk); 348 u32 mss = tcp_sk(sk)->advmss;
343 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 349 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
350 int rcvmem;
344 351
345 /* Try to select rcvbuf so that 4 mss-sized segments 352 /* Limit to 10 segments if mss <= 1460,
346 * will fit to window and corresponding skbs will fit to our rcvbuf. 353 * or 14600/mss segments, with a minimum of two segments.
347 * (was 3; 4 is minimum to allow fast retransmit to work.)
348 */ 354 */
349 while (tcp_win_from_space(rcvmem) < tp->advmss) 355 if (mss > 1460)
356 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
357
358 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
359 while (tcp_win_from_space(rcvmem) < mss)
350 rcvmem += 128; 360 rcvmem += 128;
351 if (sk->sk_rcvbuf < 4 * rcvmem) 361
352 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); 362 rcvmem *= icwnd;
363
364 if (sk->sk_rcvbuf < rcvmem)
365 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
353} 366}
354 367
355/* 4. Try to fixup all. It is made immediately after connection enters 368/* 4. Try to fixup all. It is made immediately after connection enters
@@ -416,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)
416 */ 429 */
417void tcp_initialize_rcv_mss(struct sock *sk) 430void tcp_initialize_rcv_mss(struct sock *sk)
418{ 431{
419 struct tcp_sock *tp = tcp_sk(sk); 432 const struct tcp_sock *tp = tcp_sk(sk);
420 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 433 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
421 434
422 hint = min(hint, tp->rcv_wnd / 2); 435 hint = min(hint, tp->rcv_wnd / 2);
@@ -531,8 +544,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
531 space /= tp->advmss; 544 space /= tp->advmss;
532 if (!space) 545 if (!space)
533 space = 1; 546 space = 1;
534 rcvmem = (tp->advmss + MAX_TCP_HEADER + 547 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
535 16 + sizeof(struct sk_buff));
536 while (tcp_win_from_space(rcvmem) < tp->advmss) 548 while (tcp_win_from_space(rcvmem) < tp->advmss)
537 rcvmem += 128; 549 rcvmem += 128;
538 space *= rcvmem; 550 space *= rcvmem;
@@ -812,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)
812 } 824 }
813} 825}
814 826
815__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 827__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
816{ 828{
817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 829 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
818 830
@@ -1124,7 +1136,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1124 return 0; 1136 return 0;
1125 1137
1126 /* ...Then it's D-SACK, and must reside below snd_una completely */ 1138 /* ...Then it's D-SACK, and must reside below snd_una completely */
1127 if (!after(end_seq, tp->snd_una)) 1139 if (after(end_seq, tp->snd_una))
1128 return 0; 1140 return 0;
1129 1141
1130 if (!before(start_seq, tp->undo_marker)) 1142 if (!before(start_seq, tp->undo_marker))
@@ -1204,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1204 tp->lost_retrans_low = new_low_seq; 1216 tp->lost_retrans_low = new_low_seq;
1205} 1217}
1206 1218
1207static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, 1219static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1208 struct tcp_sack_block_wire *sp, int num_sacks, 1220 struct tcp_sack_block_wire *sp, int num_sacks,
1209 u32 prior_snd_una) 1221 u32 prior_snd_una)
1210{ 1222{
@@ -1298,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1298 return in_sack; 1310 return in_sack;
1299} 1311}
1300 1312
1301static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, 1313static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1302 struct tcp_sacktag_state *state, 1314 struct tcp_sacktag_state *state,
1303 int dup_sack, int pcount) 1315 int dup_sack, int pcount)
1304{ 1316{
@@ -1389,9 +1401,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1389 1401
1390 BUG_ON(!pcount); 1402 BUG_ON(!pcount);
1391 1403
1392 /* Tweak before seqno plays */ 1404 if (skb == tp->lost_skb_hint)
1393 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1394 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1395 tp->lost_cnt_hint += pcount; 1405 tp->lost_cnt_hint += pcount;
1396 1406
1397 TCP_SKB_CB(prev)->end_seq += shifted; 1407 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1440,7 +1450,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1440 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1450 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1441 } 1451 }
1442 1452
1443 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; 1453 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1444 if (skb == tcp_highest_sack(sk)) 1454 if (skb == tcp_highest_sack(sk))
1445 tcp_advance_highest_sack(sk, skb); 1455 tcp_advance_highest_sack(sk, skb);
1446 1456
@@ -1455,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1455/* I wish gso_size would have a bit more sane initialization than 1465/* I wish gso_size would have a bit more sane initialization than
1456 * something-or-zero which complicates things 1466 * something-or-zero which complicates things
1457 */ 1467 */
1458static int tcp_skb_seglen(struct sk_buff *skb) 1468static int tcp_skb_seglen(const struct sk_buff *skb)
1459{ 1469{
1460 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); 1470 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1461} 1471}
1462 1472
1463/* Shifting pages past head area doesn't work */ 1473/* Shifting pages past head area doesn't work */
1464static int skb_can_shift(struct sk_buff *skb) 1474static int skb_can_shift(const struct sk_buff *skb)
1465{ 1475{
1466 return !skb_headlen(skb) && skb_is_nonlinear(skb); 1476 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1467} 1477}
@@ -1710,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1710 return skb; 1720 return skb;
1711} 1721}
1712 1722
1713static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) 1723static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1714{ 1724{
1715 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); 1725 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1716} 1726}
1717 1727
1718static int 1728static int
1719tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, 1729tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1720 u32 prior_snd_una) 1730 u32 prior_snd_una)
1721{ 1731{
1722 const struct inet_connection_sock *icsk = inet_csk(sk); 1732 const struct inet_connection_sock *icsk = inet_csk(sk);
1723 struct tcp_sock *tp = tcp_sk(sk); 1733 struct tcp_sock *tp = tcp_sk(sk);
1724 unsigned char *ptr = (skb_transport_header(ack_skb) + 1734 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1725 TCP_SKB_CB(ack_skb)->sacked); 1735 TCP_SKB_CB(ack_skb)->sacked);
1726 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1736 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1727 struct tcp_sack_block sp[TCP_NUM_SACKS]; 1737 struct tcp_sack_block sp[TCP_NUM_SACKS];
1728 struct tcp_sack_block *cache; 1738 struct tcp_sack_block *cache;
@@ -2286,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
2286 return 0; 2296 return 0;
2287} 2297}
2288 2298
2289static inline int tcp_fackets_out(struct tcp_sock *tp) 2299static inline int tcp_fackets_out(const struct tcp_sock *tp)
2290{ 2300{
2291 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; 2301 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2292} 2302}
@@ -2306,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2306 * they differ. Since neither occurs due to loss, TCP should really 2316 * they differ. Since neither occurs due to loss, TCP should really
2307 * ignore them. 2317 * ignore them.
2308 */ 2318 */
2309static inline int tcp_dupack_heuristics(struct tcp_sock *tp) 2319static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2310{ 2320{
2311 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2321 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2312} 2322}
2313 2323
2314static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2324static inline int tcp_skb_timedout(const struct sock *sk,
2325 const struct sk_buff *skb)
2315{ 2326{
2316 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; 2327 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2317} 2328}
2318 2329
2319static inline int tcp_head_timedout(struct sock *sk) 2330static inline int tcp_head_timedout(const struct sock *sk)
2320{ 2331{
2321 struct tcp_sock *tp = tcp_sk(sk); 2332 const struct tcp_sock *tp = tcp_sk(sk);
2322 2333
2323 return tp->packets_out && 2334 return tp->packets_out &&
2324 tcp_skb_timedout(sk, tcp_write_queue_head(sk)); 2335 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2629,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
2629/* Nothing was retransmitted or returned timestamp is less 2640/* Nothing was retransmitted or returned timestamp is less
2630 * than timestamp of the first retransmission. 2641 * than timestamp of the first retransmission.
2631 */ 2642 */
2632static inline int tcp_packet_delayed(struct tcp_sock *tp) 2643static inline int tcp_packet_delayed(const struct tcp_sock *tp)
2633{ 2644{
2634 return !tp->retrans_stamp || 2645 return !tp->retrans_stamp ||
2635 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2646 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2690,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2690 tp->snd_cwnd_stamp = tcp_time_stamp; 2701 tp->snd_cwnd_stamp = tcp_time_stamp;
2691} 2702}
2692 2703
2693static inline int tcp_may_undo(struct tcp_sock *tp) 2704static inline int tcp_may_undo(const struct tcp_sock *tp)
2694{ 2705{
2695 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); 2706 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2696} 2707}
@@ -2754,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
2754 * that successive retransmissions of a segment must not advance 2765 * that successive retransmissions of a segment must not advance
2755 * retrans_stamp under any conditions. 2766 * retrans_stamp under any conditions.
2756 */ 2767 */
2757static int tcp_any_retrans_done(struct sock *sk) 2768static int tcp_any_retrans_done(const struct sock *sk)
2758{ 2769{
2759 struct tcp_sock *tp = tcp_sk(sk); 2770 const struct tcp_sock *tp = tcp_sk(sk);
2760 struct sk_buff *skb; 2771 struct sk_buff *skb;
2761 2772
2762 if (tp->retrans_out) 2773 if (tp->retrans_out)
@@ -2830,9 +2841,13 @@ static int tcp_try_undo_loss(struct sock *sk)
2830static inline void tcp_complete_cwr(struct sock *sk) 2841static inline void tcp_complete_cwr(struct sock *sk)
2831{ 2842{
2832 struct tcp_sock *tp = tcp_sk(sk); 2843 struct tcp_sock *tp = tcp_sk(sk);
2833 /* Do not moderate cwnd if it's already undone in cwr or recovery */ 2844
2834 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { 2845 /* Do not moderate cwnd if it's already undone in cwr or recovery. */
2835 tp->snd_cwnd = tp->snd_ssthresh; 2846 if (tp->undo_marker) {
2847 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2848 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2849 else /* PRR */
2850 tp->snd_cwnd = tp->snd_ssthresh;
2836 tp->snd_cwnd_stamp = tcp_time_stamp; 2851 tp->snd_cwnd_stamp = tcp_time_stamp;
2837 } 2852 }
2838 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2853 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2950,6 +2965,38 @@ void tcp_simple_retransmit(struct sock *sk)
2950} 2965}
2951EXPORT_SYMBOL(tcp_simple_retransmit); 2966EXPORT_SYMBOL(tcp_simple_retransmit);
2952 2967
2968/* This function implements the PRR algorithm, specifcally the PRR-SSRB
2969 * (proportional rate reduction with slow start reduction bound) as described in
2970 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2971 * It computes the number of packets to send (sndcnt) based on packets newly
2972 * delivered:
2973 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2974 * cwnd reductions across a full RTT.
2975 * 2) If packets in flight is lower than ssthresh (such as due to excess
2976 * losses and/or application stalls), do not perform any further cwnd
2977 * reductions, but instead slow start up to ssthresh.
2978 */
2979static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2980 int fast_rexmit, int flag)
2981{
2982 struct tcp_sock *tp = tcp_sk(sk);
2983 int sndcnt = 0;
2984 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2985
2986 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2987 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2988 tp->prior_cwnd - 1;
2989 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2990 } else {
2991 sndcnt = min_t(int, delta,
2992 max_t(int, tp->prr_delivered - tp->prr_out,
2993 newly_acked_sacked) + 1);
2994 }
2995
2996 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2997 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2998}
2999
2953/* Process an event, which can update packets-in-flight not trivially. 3000/* Process an event, which can update packets-in-flight not trivially.
2954 * Main goal of this function is to calculate new estimate for left_out, 3001 * Main goal of this function is to calculate new estimate for left_out,
2955 * taking into account both packets sitting in receiver's buffer and 3002 * taking into account both packets sitting in receiver's buffer and
@@ -2961,7 +3008,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
2961 * It does _not_ decide what to send, it is made in function 3008 * It does _not_ decide what to send, it is made in function
2962 * tcp_xmit_retransmit_queue(). 3009 * tcp_xmit_retransmit_queue().
2963 */ 3010 */
2964static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) 3011static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3012 int newly_acked_sacked, int flag)
2965{ 3013{
2966 struct inet_connection_sock *icsk = inet_csk(sk); 3014 struct inet_connection_sock *icsk = inet_csk(sk);
2967 struct tcp_sock *tp = tcp_sk(sk); 3015 struct tcp_sock *tp = tcp_sk(sk);
@@ -3111,13 +3159,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3111 3159
3112 tp->bytes_acked = 0; 3160 tp->bytes_acked = 0;
3113 tp->snd_cwnd_cnt = 0; 3161 tp->snd_cwnd_cnt = 0;
3162 tp->prior_cwnd = tp->snd_cwnd;
3163 tp->prr_delivered = 0;
3164 tp->prr_out = 0;
3114 tcp_set_ca_state(sk, TCP_CA_Recovery); 3165 tcp_set_ca_state(sk, TCP_CA_Recovery);
3115 fast_rexmit = 1; 3166 fast_rexmit = 1;
3116 } 3167 }
3117 3168
3118 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3169 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3119 tcp_update_scoreboard(sk, fast_rexmit); 3170 tcp_update_scoreboard(sk, fast_rexmit);
3120 tcp_cwnd_down(sk, flag); 3171 tp->prr_delivered += newly_acked_sacked;
3172 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3121 tcp_xmit_retransmit_queue(sk); 3173 tcp_xmit_retransmit_queue(sk);
3122} 3174}
3123 3175
@@ -3194,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3194 */ 3246 */
3195static void tcp_rearm_rto(struct sock *sk) 3247static void tcp_rearm_rto(struct sock *sk)
3196{ 3248{
3197 struct tcp_sock *tp = tcp_sk(sk); 3249 const struct tcp_sock *tp = tcp_sk(sk);
3198 3250
3199 if (!tp->packets_out) { 3251 if (!tp->packets_out) {
3200 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3252 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3298,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3298 * connection startup slow start one packet too 3350 * connection startup slow start one packet too
3299 * quickly. This is severely frowned upon behavior. 3351 * quickly. This is severely frowned upon behavior.
3300 */ 3352 */
3301 if (!(scb->flags & TCPHDR_SYN)) { 3353 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3302 flag |= FLAG_DATA_ACKED; 3354 flag |= FLAG_DATA_ACKED;
3303 } else { 3355 } else {
3304 flag |= FLAG_SYN_ACKED; 3356 flag |= FLAG_SYN_ACKED;
@@ -3446,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3446 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 3498 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
3447 * and in FreeBSD. NetBSD's one is even worse.) is wrong. 3499 * and in FreeBSD. NetBSD's one is even worse.) is wrong.
3448 */ 3500 */
3449static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, 3501static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3450 u32 ack_seq) 3502 u32 ack_seq)
3451{ 3503{
3452 struct tcp_sock *tp = tcp_sk(sk); 3504 struct tcp_sock *tp = tcp_sk(sk);
@@ -3622,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
3622} 3674}
3623 3675
3624/* This routine deals with incoming acks, but not outgoing ones. */ 3676/* This routine deals with incoming acks, but not outgoing ones. */
3625static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) 3677static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3626{ 3678{
3627 struct inet_connection_sock *icsk = inet_csk(sk); 3679 struct inet_connection_sock *icsk = inet_csk(sk);
3628 struct tcp_sock *tp = tcp_sk(sk); 3680 struct tcp_sock *tp = tcp_sk(sk);
@@ -3632,6 +3684,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3632 u32 prior_in_flight; 3684 u32 prior_in_flight;
3633 u32 prior_fackets; 3685 u32 prior_fackets;
3634 int prior_packets; 3686 int prior_packets;
3687 int prior_sacked = tp->sacked_out;
3688 int newly_acked_sacked = 0;
3635 int frto_cwnd = 0; 3689 int frto_cwnd = 0;
3636 3690
3637 /* If the ack is older than previous acks 3691 /* If the ack is older than previous acks
@@ -3703,6 +3757,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3703 /* See if we can take anything off of the retransmit queue. */ 3757 /* See if we can take anything off of the retransmit queue. */
3704 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3758 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3705 3759
3760 newly_acked_sacked = (prior_packets - prior_sacked) -
3761 (tp->packets_out - tp->sacked_out);
3762
3706 if (tp->frto_counter) 3763 if (tp->frto_counter)
3707 frto_cwnd = tcp_process_frto(sk, flag); 3764 frto_cwnd = tcp_process_frto(sk, flag);
3708 /* Guarantee sacktag reordering detection against wrap-arounds */ 3765 /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3715,7 +3772,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3715 tcp_may_raise_cwnd(sk, flag)) 3772 tcp_may_raise_cwnd(sk, flag))
3716 tcp_cong_avoid(sk, ack, prior_in_flight); 3773 tcp_cong_avoid(sk, ack, prior_in_flight);
3717 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, 3774 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3718 flag); 3775 newly_acked_sacked, flag);
3719 } else { 3776 } else {
3720 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3777 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3721 tcp_cong_avoid(sk, ack, prior_in_flight); 3778 tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3754,14 +3811,14 @@ old_ack:
3754 * But, this can also be called on packets in the established flow when 3811 * But, this can also be called on packets in the established flow when
3755 * the fast version below fails. 3812 * the fast version below fails.
3756 */ 3813 */
3757void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3814void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3758 u8 **hvpp, int estab) 3815 const u8 **hvpp, int estab)
3759{ 3816{
3760 unsigned char *ptr; 3817 const unsigned char *ptr;
3761 struct tcphdr *th = tcp_hdr(skb); 3818 const struct tcphdr *th = tcp_hdr(skb);
3762 int length = (th->doff * 4) - sizeof(struct tcphdr); 3819 int length = (th->doff * 4) - sizeof(struct tcphdr);
3763 3820
3764 ptr = (unsigned char *)(th + 1); 3821 ptr = (const unsigned char *)(th + 1);
3765 opt_rx->saw_tstamp = 0; 3822 opt_rx->saw_tstamp = 0;
3766 3823
3767 while (length > 0) { 3824 while (length > 0) {
@@ -3872,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3872} 3929}
3873EXPORT_SYMBOL(tcp_parse_options); 3930EXPORT_SYMBOL(tcp_parse_options);
3874 3931
3875static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) 3932static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3876{ 3933{
3877 __be32 *ptr = (__be32 *)(th + 1); 3934 const __be32 *ptr = (const __be32 *)(th + 1);
3878 3935
3879 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 3936 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3880 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { 3937 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3891,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3891/* Fast parse options. This hopes to only see timestamps. 3948/* Fast parse options. This hopes to only see timestamps.
3892 * If it is wrong it falls back on tcp_parse_options(). 3949 * If it is wrong it falls back on tcp_parse_options().
3893 */ 3950 */
3894static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3951static int tcp_fast_parse_options(const struct sk_buff *skb,
3895 struct tcp_sock *tp, u8 **hvpp) 3952 const struct tcphdr *th,
3953 struct tcp_sock *tp, const u8 **hvpp)
3896{ 3954{
3897 /* In the spirit of fast parsing, compare doff directly to constant 3955 /* In the spirit of fast parsing, compare doff directly to constant
3898 * values. Because equality is used, short doff can be ignored here. 3956 * values. Because equality is used, short doff can be ignored here.
@@ -3913,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3913/* 3971/*
3914 * Parse MD5 Signature option 3972 * Parse MD5 Signature option
3915 */ 3973 */
3916u8 *tcp_parse_md5sig_option(struct tcphdr *th) 3974const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3917{ 3975{
3918 int length = (th->doff << 2) - sizeof (*th); 3976 int length = (th->doff << 2) - sizeof(*th);
3919 u8 *ptr = (u8*)(th + 1); 3977 const u8 *ptr = (const u8 *)(th + 1);
3920 3978
3921 /* If the TCP option is too short, we can short cut */ 3979 /* If the TCP option is too short, we can short cut */
3922 if (length < TCPOLEN_MD5SIG) 3980 if (length < TCPOLEN_MD5SIG)
@@ -3993,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3993 4051
3994static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) 4052static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3995{ 4053{
3996 struct tcp_sock *tp = tcp_sk(sk); 4054 const struct tcp_sock *tp = tcp_sk(sk);
3997 struct tcphdr *th = tcp_hdr(skb); 4055 const struct tcphdr *th = tcp_hdr(skb);
3998 u32 seq = TCP_SKB_CB(skb)->seq; 4056 u32 seq = TCP_SKB_CB(skb)->seq;
3999 u32 ack = TCP_SKB_CB(skb)->ack_seq; 4057 u32 ack = TCP_SKB_CB(skb)->ack_seq;
4000 4058
@@ -4033,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
4033 * (borrowed from freebsd) 4091 * (borrowed from freebsd)
4034 */ 4092 */
4035 4093
4036static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) 4094static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4037{ 4095{
4038 return !before(end_seq, tp->rcv_wup) && 4096 return !before(end_seq, tp->rcv_wup) &&
4039 !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); 4097 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4078,7 +4136,7 @@ static void tcp_reset(struct sock *sk)
4078 * 4136 *
4079 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 4137 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
4080 */ 4138 */
4081static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) 4139static void tcp_fin(struct sock *sk)
4082{ 4140{
4083 struct tcp_sock *tp = tcp_sk(sk); 4141 struct tcp_sock *tp = tcp_sk(sk);
4084 4142
@@ -4190,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4190 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 4248 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4191} 4249}
4192 4250
4193static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4251static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4194{ 4252{
4195 struct tcp_sock *tp = tcp_sk(sk); 4253 struct tcp_sock *tp = tcp_sk(sk);
4196 4254
@@ -4349,7 +4407,7 @@ static void tcp_ofo_queue(struct sock *sk)
4349 __skb_queue_tail(&sk->sk_receive_queue, skb); 4407 __skb_queue_tail(&sk->sk_receive_queue, skb);
4350 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4408 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4351 if (tcp_hdr(skb)->fin) 4409 if (tcp_hdr(skb)->fin)
4352 tcp_fin(skb, sk, tcp_hdr(skb)); 4410 tcp_fin(sk);
4353 } 4411 }
4354} 4412}
4355 4413
@@ -4377,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4377 4435
4378static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4436static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4379{ 4437{
4380 struct tcphdr *th = tcp_hdr(skb); 4438 const struct tcphdr *th = tcp_hdr(skb);
4381 struct tcp_sock *tp = tcp_sk(sk); 4439 struct tcp_sock *tp = tcp_sk(sk);
4382 int eaten = -1; 4440 int eaten = -1;
4383 4441
@@ -4431,7 +4489,7 @@ queue_and_out:
4431 if (skb->len) 4489 if (skb->len)
4432 tcp_event_data_recv(sk, skb); 4490 tcp_event_data_recv(sk, skb);
4433 if (th->fin) 4491 if (th->fin)
4434 tcp_fin(skb, sk, th); 4492 tcp_fin(sk);
4435 4493
4436 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4494 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4437 tcp_ofo_queue(sk); 4495 tcp_ofo_queue(sk);
@@ -4861,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
4861 tp->snd_cwnd_stamp = tcp_time_stamp; 4919 tp->snd_cwnd_stamp = tcp_time_stamp;
4862} 4920}
4863 4921
4864static int tcp_should_expand_sndbuf(struct sock *sk) 4922static int tcp_should_expand_sndbuf(const struct sock *sk)
4865{ 4923{
4866 struct tcp_sock *tp = tcp_sk(sk); 4924 const struct tcp_sock *tp = tcp_sk(sk);
4867 4925
4868 /* If the user specified a specific send buffer setting, do 4926 /* If the user specified a specific send buffer setting, do
4869 * not modify it. 4927 * not modify it.
@@ -4897,8 +4955,10 @@ static void tcp_new_space(struct sock *sk)
4897 struct tcp_sock *tp = tcp_sk(sk); 4955 struct tcp_sock *tp = tcp_sk(sk);
4898 4956
4899 if (tcp_should_expand_sndbuf(sk)) { 4957 if (tcp_should_expand_sndbuf(sk)) {
4900 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4958 int sndmem = SKB_TRUESIZE(max_t(u32,
4901 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 4959 tp->rx_opt.mss_clamp,
4960 tp->mss_cache) +
4961 MAX_TCP_HEADER);
4902 int demanded = max_t(unsigned int, tp->snd_cwnd, 4962 int demanded = max_t(unsigned int, tp->snd_cwnd,
4903 tp->reordering + 1); 4963 tp->reordering + 1);
4904 sndmem *= 2 * demanded; 4964 sndmem *= 2 * demanded;
@@ -4970,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
4970 * either form (or just set the sysctl tcp_stdurg). 5030 * either form (or just set the sysctl tcp_stdurg).
4971 */ 5031 */
4972 5032
4973static void tcp_check_urg(struct sock *sk, struct tcphdr *th) 5033static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4974{ 5034{
4975 struct tcp_sock *tp = tcp_sk(sk); 5035 struct tcp_sock *tp = tcp_sk(sk);
4976 u32 ptr = ntohs(th->urg_ptr); 5036 u32 ptr = ntohs(th->urg_ptr);
@@ -5036,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
5036} 5096}
5037 5097
5038/* This is the 'fast' part of urgent handling. */ 5098/* This is the 'fast' part of urgent handling. */
5039static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) 5099static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5040{ 5100{
5041 struct tcp_sock *tp = tcp_sk(sk); 5101 struct tcp_sock *tp = tcp_sk(sk);
5042 5102
@@ -5157,9 +5217,9 @@ out:
5157 * play significant role here. 5217 * play significant role here.
5158 */ 5218 */
5159static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 5219static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5160 struct tcphdr *th, int syn_inerr) 5220 const struct tcphdr *th, int syn_inerr)
5161{ 5221{
5162 u8 *hash_location; 5222 const u8 *hash_location;
5163 struct tcp_sock *tp = tcp_sk(sk); 5223 struct tcp_sock *tp = tcp_sk(sk);
5164 5224
5165 /* RFC1323: H1. Apply PAWS check first. */ 5225 /* RFC1323: H1. Apply PAWS check first. */
@@ -5240,7 +5300,7 @@ discard:
5240 * tcp_data_queue when everything is OK. 5300 * tcp_data_queue when everything is OK.
5241 */ 5301 */
5242int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5302int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5243 struct tcphdr *th, unsigned len) 5303 const struct tcphdr *th, unsigned int len)
5244{ 5304{
5245 struct tcp_sock *tp = tcp_sk(sk); 5305 struct tcp_sock *tp = tcp_sk(sk);
5246 int res; 5306 int res;
@@ -5451,9 +5511,9 @@ discard:
5451EXPORT_SYMBOL(tcp_rcv_established); 5511EXPORT_SYMBOL(tcp_rcv_established);
5452 5512
5453static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5513static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5454 struct tcphdr *th, unsigned len) 5514 const struct tcphdr *th, unsigned int len)
5455{ 5515{
5456 u8 *hash_location; 5516 const u8 *hash_location;
5457 struct inet_connection_sock *icsk = inet_csk(sk); 5517 struct inet_connection_sock *icsk = inet_csk(sk);
5458 struct tcp_sock *tp = tcp_sk(sk); 5518 struct tcp_sock *tp = tcp_sk(sk);
5459 struct tcp_cookie_values *cvp = tp->cookie_values; 5519 struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5728,7 +5788,7 @@ reset_and_undo:
5728 */ 5788 */
5729 5789
5730int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 5790int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5731 struct tcphdr *th, unsigned len) 5791 const struct tcphdr *th, unsigned int len)
5732{ 5792{
5733 struct tcp_sock *tp = tcp_sk(sk); 5793 struct tcp_sock *tp = tcp_sk(sk);
5734 struct inet_connection_sock *icsk = inet_csk(sk); 5794 struct inet_connection_sock *icsk = inet_csk(sk);