aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c244
1 files changed, 153 insertions, 91 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d73aab3fbfc0..52b5c2d0ecd0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 206 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
207} 207}
208 208
209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) 209static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
210{ 210{
211 if (tcp_hdr(skb)->cwr) 211 if (tcp_hdr(skb)->cwr)
212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 212 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -217,32 +217,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218} 218}
219 219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) 220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{ 221{
222 if (tp->ecn_flags & TCP_ECN_OK) { 222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) 223 return;
224 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
225 /* Funny extension: if ECT is not set on a segment, 227 /* Funny extension: if ECT is not set on a segment,
226 * it is surely retransmit. It is not in ECN RFC, 228 * and we already seen ECT on a previous segment,
227 * but Linux follows this rule. */ 229 * it is probably a retransmit.
228 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) 230 */
231 if (tp->ecn_flags & TCP_ECN_SEEN)
229 tcp_enter_quickack_mode((struct sock *)tp); 232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
236 /* fallinto */
237 default:
238 tp->ecn_flags |= TCP_ECN_SEEN;
230 } 239 }
231} 240}
232 241
233static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) 242static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
234{ 243{
235 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) 244 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
236 tp->ecn_flags &= ~TCP_ECN_OK; 245 tp->ecn_flags &= ~TCP_ECN_OK;
237} 246}
238 247
239static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) 248static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
240{ 249{
241 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) 250 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
242 tp->ecn_flags &= ~TCP_ECN_OK; 251 tp->ecn_flags &= ~TCP_ECN_OK;
243} 252}
244 253
245static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) 254static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
246{ 255{
247 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) 256 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
248 return 1; 257 return 1;
@@ -256,14 +265,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
256 265
257static void tcp_fixup_sndbuf(struct sock *sk) 266static void tcp_fixup_sndbuf(struct sock *sk)
258{ 267{
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 268 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
260 sizeof(struct sk_buff);
261 269
262 if (sk->sk_sndbuf < 3 * sndmem) { 270 sndmem *= TCP_INIT_CWND;
263 sk->sk_sndbuf = 3 * sndmem; 271 if (sk->sk_sndbuf < sndmem)
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) 272 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
267} 273}
268 274
269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 275/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -309,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
309 return 0; 315 return 0;
310} 316}
311 317
312static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) 318static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
313{ 319{
314 struct tcp_sock *tp = tcp_sk(sk); 320 struct tcp_sock *tp = tcp_sk(sk);
315 321
@@ -339,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
339 345
340static void tcp_fixup_rcvbuf(struct sock *sk) 346static void tcp_fixup_rcvbuf(struct sock *sk)
341{ 347{
342 struct tcp_sock *tp = tcp_sk(sk); 348 u32 mss = tcp_sk(sk)->advmss;
343 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 349 u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
350 int rcvmem;
344 351
345 /* Try to select rcvbuf so that 4 mss-sized segments 352 /* Limit to 10 segments if mss <= 1460,
346 * will fit to window and corresponding skbs will fit to our rcvbuf. 353 * or 14600/mss segments, with a minimum of two segments.
347 * (was 3; 4 is minimum to allow fast retransmit to work.)
348 */ 354 */
349 while (tcp_win_from_space(rcvmem) < tp->advmss) 355 if (mss > 1460)
356 icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
357
358 rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
359 while (tcp_win_from_space(rcvmem) < mss)
350 rcvmem += 128; 360 rcvmem += 128;
351 if (sk->sk_rcvbuf < 4 * rcvmem) 361
352 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); 362 rcvmem *= icwnd;
363
364 if (sk->sk_rcvbuf < rcvmem)
365 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
353} 366}
354 367
355/* 4. Try to fixup all. It is made immediately after connection enters 368/* 4. Try to fixup all. It is made immediately after connection enters
@@ -416,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)
416 */ 429 */
417void tcp_initialize_rcv_mss(struct sock *sk) 430void tcp_initialize_rcv_mss(struct sock *sk)
418{ 431{
419 struct tcp_sock *tp = tcp_sk(sk); 432 const struct tcp_sock *tp = tcp_sk(sk);
420 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 433 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
421 434
422 hint = min(hint, tp->rcv_wnd / 2); 435 hint = min(hint, tp->rcv_wnd / 2);
@@ -531,8 +544,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
531 space /= tp->advmss; 544 space /= tp->advmss;
532 if (!space) 545 if (!space)
533 space = 1; 546 space = 1;
534 rcvmem = (tp->advmss + MAX_TCP_HEADER + 547 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
535 16 + sizeof(struct sk_buff));
536 while (tcp_win_from_space(rcvmem) < tp->advmss) 548 while (tcp_win_from_space(rcvmem) < tp->advmss)
537 rcvmem += 128; 549 rcvmem += 128;
538 space *= rcvmem; 550 space *= rcvmem;
@@ -812,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)
812 } 824 }
813} 825}
814 826
815__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 827__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
816{ 828{
817 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 829 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
818 830
@@ -1204,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1204 tp->lost_retrans_low = new_low_seq; 1216 tp->lost_retrans_low = new_low_seq;
1205} 1217}
1206 1218
1207static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, 1219static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1208 struct tcp_sack_block_wire *sp, int num_sacks, 1220 struct tcp_sack_block_wire *sp, int num_sacks,
1209 u32 prior_snd_una) 1221 u32 prior_snd_una)
1210{ 1222{
@@ -1298,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1298 return in_sack; 1310 return in_sack;
1299} 1311}
1300 1312
1301static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, 1313static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
1302 struct tcp_sacktag_state *state, 1314 struct tcp_sacktag_state *state,
1303 int dup_sack, int pcount) 1315 int dup_sack, int pcount)
1304{ 1316{
@@ -1438,7 +1450,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1438 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1450 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1439 } 1451 }
1440 1452
1441 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; 1453 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1442 if (skb == tcp_highest_sack(sk)) 1454 if (skb == tcp_highest_sack(sk))
1443 tcp_advance_highest_sack(sk, skb); 1455 tcp_advance_highest_sack(sk, skb);
1444 1456
@@ -1453,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1453/* I wish gso_size would have a bit more sane initialization than 1465/* I wish gso_size would have a bit more sane initialization than
1454 * something-or-zero which complicates things 1466 * something-or-zero which complicates things
1455 */ 1467 */
1456static int tcp_skb_seglen(struct sk_buff *skb) 1468static int tcp_skb_seglen(const struct sk_buff *skb)
1457{ 1469{
1458 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); 1470 return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
1459} 1471}
1460 1472
1461/* Shifting pages past head area doesn't work */ 1473/* Shifting pages past head area doesn't work */
1462static int skb_can_shift(struct sk_buff *skb) 1474static int skb_can_shift(const struct sk_buff *skb)
1463{ 1475{
1464 return !skb_headlen(skb) && skb_is_nonlinear(skb); 1476 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1465} 1477}
@@ -1708,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1708 return skb; 1720 return skb;
1709} 1721}
1710 1722
1711static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) 1723static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
1712{ 1724{
1713 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); 1725 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1714} 1726}
1715 1727
1716static int 1728static int
1717tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, 1729tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1718 u32 prior_snd_una) 1730 u32 prior_snd_una)
1719{ 1731{
1720 const struct inet_connection_sock *icsk = inet_csk(sk); 1732 const struct inet_connection_sock *icsk = inet_csk(sk);
1721 struct tcp_sock *tp = tcp_sk(sk); 1733 struct tcp_sock *tp = tcp_sk(sk);
1722 unsigned char *ptr = (skb_transport_header(ack_skb) + 1734 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1723 TCP_SKB_CB(ack_skb)->sacked); 1735 TCP_SKB_CB(ack_skb)->sacked);
1724 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1736 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1725 struct tcp_sack_block sp[TCP_NUM_SACKS]; 1737 struct tcp_sack_block sp[TCP_NUM_SACKS];
1726 struct tcp_sack_block *cache; 1738 struct tcp_sack_block *cache;
@@ -2284,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
2284 return 0; 2296 return 0;
2285} 2297}
2286 2298
2287static inline int tcp_fackets_out(struct tcp_sock *tp) 2299static inline int tcp_fackets_out(const struct tcp_sock *tp)
2288{ 2300{
2289 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; 2301 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
2290} 2302}
@@ -2304,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2304 * they differ. Since neither occurs due to loss, TCP should really 2316 * they differ. Since neither occurs due to loss, TCP should really
2305 * ignore them. 2317 * ignore them.
2306 */ 2318 */
2307static inline int tcp_dupack_heuristics(struct tcp_sock *tp) 2319static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2308{ 2320{
2309 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2321 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2310} 2322}
2311 2323
2312static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2324static inline int tcp_skb_timedout(const struct sock *sk,
2325 const struct sk_buff *skb)
2313{ 2326{
2314 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; 2327 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2315} 2328}
2316 2329
2317static inline int tcp_head_timedout(struct sock *sk) 2330static inline int tcp_head_timedout(const struct sock *sk)
2318{ 2331{
2319 struct tcp_sock *tp = tcp_sk(sk); 2332 const struct tcp_sock *tp = tcp_sk(sk);
2320 2333
2321 return tp->packets_out && 2334 return tp->packets_out &&
2322 tcp_skb_timedout(sk, tcp_write_queue_head(sk)); 2335 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2627,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
2627/* Nothing was retransmitted or returned timestamp is less 2640/* Nothing was retransmitted or returned timestamp is less
2628 * than timestamp of the first retransmission. 2641 * than timestamp of the first retransmission.
2629 */ 2642 */
2630static inline int tcp_packet_delayed(struct tcp_sock *tp) 2643static inline int tcp_packet_delayed(const struct tcp_sock *tp)
2631{ 2644{
2632 return !tp->retrans_stamp || 2645 return !tp->retrans_stamp ||
2633 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2646 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2688,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
2688 tp->snd_cwnd_stamp = tcp_time_stamp; 2701 tp->snd_cwnd_stamp = tcp_time_stamp;
2689} 2702}
2690 2703
2691static inline int tcp_may_undo(struct tcp_sock *tp) 2704static inline int tcp_may_undo(const struct tcp_sock *tp)
2692{ 2705{
2693 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); 2706 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2694} 2707}
@@ -2752,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
2752 * that successive retransmissions of a segment must not advance 2765 * that successive retransmissions of a segment must not advance
2753 * retrans_stamp under any conditions. 2766 * retrans_stamp under any conditions.
2754 */ 2767 */
2755static int tcp_any_retrans_done(struct sock *sk) 2768static int tcp_any_retrans_done(const struct sock *sk)
2756{ 2769{
2757 struct tcp_sock *tp = tcp_sk(sk); 2770 const struct tcp_sock *tp = tcp_sk(sk);
2758 struct sk_buff *skb; 2771 struct sk_buff *skb;
2759 2772
2760 if (tp->retrans_out) 2773 if (tp->retrans_out)
@@ -2828,9 +2841,13 @@ static int tcp_try_undo_loss(struct sock *sk)
2828static inline void tcp_complete_cwr(struct sock *sk) 2841static inline void tcp_complete_cwr(struct sock *sk)
2829{ 2842{
2830 struct tcp_sock *tp = tcp_sk(sk); 2843 struct tcp_sock *tp = tcp_sk(sk);
2831 /* Do not moderate cwnd if it's already undone in cwr or recovery */ 2844
2832 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { 2845 /* Do not moderate cwnd if it's already undone in cwr or recovery. */
2833 tp->snd_cwnd = tp->snd_ssthresh; 2846 if (tp->undo_marker) {
2847 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2848 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2849 else /* PRR */
2850 tp->snd_cwnd = tp->snd_ssthresh;
2834 tp->snd_cwnd_stamp = tcp_time_stamp; 2851 tp->snd_cwnd_stamp = tcp_time_stamp;
2835 } 2852 }
2836 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2853 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2948,6 +2965,38 @@ void tcp_simple_retransmit(struct sock *sk)
2948} 2965}
2949EXPORT_SYMBOL(tcp_simple_retransmit); 2966EXPORT_SYMBOL(tcp_simple_retransmit);
2950 2967
2968/* This function implements the PRR algorithm, specifcally the PRR-SSRB
2969 * (proportional rate reduction with slow start reduction bound) as described in
2970 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2971 * It computes the number of packets to send (sndcnt) based on packets newly
2972 * delivered:
2973 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2974 * cwnd reductions across a full RTT.
2975 * 2) If packets in flight is lower than ssthresh (such as due to excess
2976 * losses and/or application stalls), do not perform any further cwnd
2977 * reductions, but instead slow start up to ssthresh.
2978 */
2979static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2980 int fast_rexmit, int flag)
2981{
2982 struct tcp_sock *tp = tcp_sk(sk);
2983 int sndcnt = 0;
2984 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2985
2986 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2987 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2988 tp->prior_cwnd - 1;
2989 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2990 } else {
2991 sndcnt = min_t(int, delta,
2992 max_t(int, tp->prr_delivered - tp->prr_out,
2993 newly_acked_sacked) + 1);
2994 }
2995
2996 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2997 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2998}
2999
2951/* Process an event, which can update packets-in-flight not trivially. 3000/* Process an event, which can update packets-in-flight not trivially.
2952 * Main goal of this function is to calculate new estimate for left_out, 3001 * Main goal of this function is to calculate new estimate for left_out,
2953 * taking into account both packets sitting in receiver's buffer and 3002 * taking into account both packets sitting in receiver's buffer and
@@ -2959,7 +3008,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
2959 * It does _not_ decide what to send, it is made in function 3008 * It does _not_ decide what to send, it is made in function
2960 * tcp_xmit_retransmit_queue(). 3009 * tcp_xmit_retransmit_queue().
2961 */ 3010 */
2962static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) 3011static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3012 int newly_acked_sacked, int flag)
2963{ 3013{
2964 struct inet_connection_sock *icsk = inet_csk(sk); 3014 struct inet_connection_sock *icsk = inet_csk(sk);
2965 struct tcp_sock *tp = tcp_sk(sk); 3015 struct tcp_sock *tp = tcp_sk(sk);
@@ -3109,13 +3159,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3109 3159
3110 tp->bytes_acked = 0; 3160 tp->bytes_acked = 0;
3111 tp->snd_cwnd_cnt = 0; 3161 tp->snd_cwnd_cnt = 0;
3162 tp->prior_cwnd = tp->snd_cwnd;
3163 tp->prr_delivered = 0;
3164 tp->prr_out = 0;
3112 tcp_set_ca_state(sk, TCP_CA_Recovery); 3165 tcp_set_ca_state(sk, TCP_CA_Recovery);
3113 fast_rexmit = 1; 3166 fast_rexmit = 1;
3114 } 3167 }
3115 3168
3116 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3169 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3117 tcp_update_scoreboard(sk, fast_rexmit); 3170 tcp_update_scoreboard(sk, fast_rexmit);
3118 tcp_cwnd_down(sk, flag); 3171 tp->prr_delivered += newly_acked_sacked;
3172 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3119 tcp_xmit_retransmit_queue(sk); 3173 tcp_xmit_retransmit_queue(sk);
3120} 3174}
3121 3175
@@ -3192,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3192 */ 3246 */
3193static void tcp_rearm_rto(struct sock *sk) 3247static void tcp_rearm_rto(struct sock *sk)
3194{ 3248{
3195 struct tcp_sock *tp = tcp_sk(sk); 3249 const struct tcp_sock *tp = tcp_sk(sk);
3196 3250
3197 if (!tp->packets_out) { 3251 if (!tp->packets_out) {
3198 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3252 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3296,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3296 * connection startup slow start one packet too 3350 * connection startup slow start one packet too
3297 * quickly. This is severely frowned upon behavior. 3351 * quickly. This is severely frowned upon behavior.
3298 */ 3352 */
3299 if (!(scb->flags & TCPHDR_SYN)) { 3353 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3300 flag |= FLAG_DATA_ACKED; 3354 flag |= FLAG_DATA_ACKED;
3301 } else { 3355 } else {
3302 flag |= FLAG_SYN_ACKED; 3356 flag |= FLAG_SYN_ACKED;
@@ -3444,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3444 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 3498 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
3445 * and in FreeBSD. NetBSD's one is even worse.) is wrong. 3499 * and in FreeBSD. NetBSD's one is even worse.) is wrong.
3446 */ 3500 */
3447static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, 3501static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3448 u32 ack_seq) 3502 u32 ack_seq)
3449{ 3503{
3450 struct tcp_sock *tp = tcp_sk(sk); 3504 struct tcp_sock *tp = tcp_sk(sk);
@@ -3620,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
3620} 3674}
3621 3675
3622/* This routine deals with incoming acks, but not outgoing ones. */ 3676/* This routine deals with incoming acks, but not outgoing ones. */
3623static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) 3677static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3624{ 3678{
3625 struct inet_connection_sock *icsk = inet_csk(sk); 3679 struct inet_connection_sock *icsk = inet_csk(sk);
3626 struct tcp_sock *tp = tcp_sk(sk); 3680 struct tcp_sock *tp = tcp_sk(sk);
@@ -3630,6 +3684,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3630 u32 prior_in_flight; 3684 u32 prior_in_flight;
3631 u32 prior_fackets; 3685 u32 prior_fackets;
3632 int prior_packets; 3686 int prior_packets;
3687 int prior_sacked = tp->sacked_out;
3688 int newly_acked_sacked = 0;
3633 int frto_cwnd = 0; 3689 int frto_cwnd = 0;
3634 3690
3635 /* If the ack is older than previous acks 3691 /* If the ack is older than previous acks
@@ -3701,6 +3757,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3701 /* See if we can take anything off of the retransmit queue. */ 3757 /* See if we can take anything off of the retransmit queue. */
3702 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3758 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3703 3759
3760 newly_acked_sacked = (prior_packets - prior_sacked) -
3761 (tp->packets_out - tp->sacked_out);
3762
3704 if (tp->frto_counter) 3763 if (tp->frto_counter)
3705 frto_cwnd = tcp_process_frto(sk, flag); 3764 frto_cwnd = tcp_process_frto(sk, flag);
3706 /* Guarantee sacktag reordering detection against wrap-arounds */ 3765 /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3713,7 +3772,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3713 tcp_may_raise_cwnd(sk, flag)) 3772 tcp_may_raise_cwnd(sk, flag))
3714 tcp_cong_avoid(sk, ack, prior_in_flight); 3773 tcp_cong_avoid(sk, ack, prior_in_flight);
3715 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, 3774 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3716 flag); 3775 newly_acked_sacked, flag);
3717 } else { 3776 } else {
3718 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3777 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3719 tcp_cong_avoid(sk, ack, prior_in_flight); 3778 tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3752,14 +3811,14 @@ old_ack:
3752 * But, this can also be called on packets in the established flow when 3811 * But, this can also be called on packets in the established flow when
3753 * the fast version below fails. 3812 * the fast version below fails.
3754 */ 3813 */
3755void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3814void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
3756 u8 **hvpp, int estab) 3815 const u8 **hvpp, int estab)
3757{ 3816{
3758 unsigned char *ptr; 3817 const unsigned char *ptr;
3759 struct tcphdr *th = tcp_hdr(skb); 3818 const struct tcphdr *th = tcp_hdr(skb);
3760 int length = (th->doff * 4) - sizeof(struct tcphdr); 3819 int length = (th->doff * 4) - sizeof(struct tcphdr);
3761 3820
3762 ptr = (unsigned char *)(th + 1); 3821 ptr = (const unsigned char *)(th + 1);
3763 opt_rx->saw_tstamp = 0; 3822 opt_rx->saw_tstamp = 0;
3764 3823
3765 while (length > 0) { 3824 while (length > 0) {
@@ -3870,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3870} 3929}
3871EXPORT_SYMBOL(tcp_parse_options); 3930EXPORT_SYMBOL(tcp_parse_options);
3872 3931
3873static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) 3932static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
3874{ 3933{
3875 __be32 *ptr = (__be32 *)(th + 1); 3934 const __be32 *ptr = (const __be32 *)(th + 1);
3876 3935
3877 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 3936 if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
3878 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { 3937 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3889,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3889/* Fast parse options. This hopes to only see timestamps. 3948/* Fast parse options. This hopes to only see timestamps.
3890 * If it is wrong it falls back on tcp_parse_options(). 3949 * If it is wrong it falls back on tcp_parse_options().
3891 */ 3950 */
3892static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3951static int tcp_fast_parse_options(const struct sk_buff *skb,
3893 struct tcp_sock *tp, u8 **hvpp) 3952 const struct tcphdr *th,
3953 struct tcp_sock *tp, const u8 **hvpp)
3894{ 3954{
3895 /* In the spirit of fast parsing, compare doff directly to constant 3955 /* In the spirit of fast parsing, compare doff directly to constant
3896 * values. Because equality is used, short doff can be ignored here. 3956 * values. Because equality is used, short doff can be ignored here.
@@ -3911,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3911/* 3971/*
3912 * Parse MD5 Signature option 3972 * Parse MD5 Signature option
3913 */ 3973 */
3914u8 *tcp_parse_md5sig_option(struct tcphdr *th) 3974const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3915{ 3975{
3916 int length = (th->doff << 2) - sizeof (*th); 3976 int length = (th->doff << 2) - sizeof(*th);
3917 u8 *ptr = (u8*)(th + 1); 3977 const u8 *ptr = (const u8 *)(th + 1);
3918 3978
3919 /* If the TCP option is too short, we can short cut */ 3979 /* If the TCP option is too short, we can short cut */
3920 if (length < TCPOLEN_MD5SIG) 3980 if (length < TCPOLEN_MD5SIG)
@@ -3991,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3991 4051
3992static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) 4052static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3993{ 4053{
3994 struct tcp_sock *tp = tcp_sk(sk); 4054 const struct tcp_sock *tp = tcp_sk(sk);
3995 struct tcphdr *th = tcp_hdr(skb); 4055 const struct tcphdr *th = tcp_hdr(skb);
3996 u32 seq = TCP_SKB_CB(skb)->seq; 4056 u32 seq = TCP_SKB_CB(skb)->seq;
3997 u32 ack = TCP_SKB_CB(skb)->ack_seq; 4057 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3998 4058
@@ -4031,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
4031 * (borrowed from freebsd) 4091 * (borrowed from freebsd)
4032 */ 4092 */
4033 4093
4034static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) 4094static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
4035{ 4095{
4036 return !before(end_seq, tp->rcv_wup) && 4096 return !before(end_seq, tp->rcv_wup) &&
4037 !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); 4097 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4076,7 +4136,7 @@ static void tcp_reset(struct sock *sk)
4076 * 4136 *
4077 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 4137 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
4078 */ 4138 */
4079static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) 4139static void tcp_fin(struct sock *sk)
4080{ 4140{
4081 struct tcp_sock *tp = tcp_sk(sk); 4141 struct tcp_sock *tp = tcp_sk(sk);
4082 4142
@@ -4188,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4188 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 4248 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4189} 4249}
4190 4250
4191static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4251static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4192{ 4252{
4193 struct tcp_sock *tp = tcp_sk(sk); 4253 struct tcp_sock *tp = tcp_sk(sk);
4194 4254
@@ -4347,7 +4407,7 @@ static void tcp_ofo_queue(struct sock *sk)
4347 __skb_queue_tail(&sk->sk_receive_queue, skb); 4407 __skb_queue_tail(&sk->sk_receive_queue, skb);
4348 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4408 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4349 if (tcp_hdr(skb)->fin) 4409 if (tcp_hdr(skb)->fin)
4350 tcp_fin(skb, sk, tcp_hdr(skb)); 4410 tcp_fin(sk);
4351 } 4411 }
4352} 4412}
4353 4413
@@ -4375,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
4375 4435
4376static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4436static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4377{ 4437{
4378 struct tcphdr *th = tcp_hdr(skb); 4438 const struct tcphdr *th = tcp_hdr(skb);
4379 struct tcp_sock *tp = tcp_sk(sk); 4439 struct tcp_sock *tp = tcp_sk(sk);
4380 int eaten = -1; 4440 int eaten = -1;
4381 4441
@@ -4429,7 +4489,7 @@ queue_and_out:
4429 if (skb->len) 4489 if (skb->len)
4430 tcp_event_data_recv(sk, skb); 4490 tcp_event_data_recv(sk, skb);
4431 if (th->fin) 4491 if (th->fin)
4432 tcp_fin(skb, sk, th); 4492 tcp_fin(sk);
4433 4493
4434 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4494 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4435 tcp_ofo_queue(sk); 4495 tcp_ofo_queue(sk);
@@ -4859,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
4859 tp->snd_cwnd_stamp = tcp_time_stamp; 4919 tp->snd_cwnd_stamp = tcp_time_stamp;
4860} 4920}
4861 4921
4862static int tcp_should_expand_sndbuf(struct sock *sk) 4922static int tcp_should_expand_sndbuf(const struct sock *sk)
4863{ 4923{
4864 struct tcp_sock *tp = tcp_sk(sk); 4924 const struct tcp_sock *tp = tcp_sk(sk);
4865 4925
4866 /* If the user specified a specific send buffer setting, do 4926 /* If the user specified a specific send buffer setting, do
4867 * not modify it. 4927 * not modify it.
@@ -4895,8 +4955,10 @@ static void tcp_new_space(struct sock *sk)
4895 struct tcp_sock *tp = tcp_sk(sk); 4955 struct tcp_sock *tp = tcp_sk(sk);
4896 4956
4897 if (tcp_should_expand_sndbuf(sk)) { 4957 if (tcp_should_expand_sndbuf(sk)) {
4898 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4958 int sndmem = SKB_TRUESIZE(max_t(u32,
4899 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 4959 tp->rx_opt.mss_clamp,
4960 tp->mss_cache) +
4961 MAX_TCP_HEADER);
4900 int demanded = max_t(unsigned int, tp->snd_cwnd, 4962 int demanded = max_t(unsigned int, tp->snd_cwnd,
4901 tp->reordering + 1); 4963 tp->reordering + 1);
4902 sndmem *= 2 * demanded; 4964 sndmem *= 2 * demanded;
@@ -4968,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
4968 * either form (or just set the sysctl tcp_stdurg). 5030 * either form (or just set the sysctl tcp_stdurg).
4969 */ 5031 */
4970 5032
4971static void tcp_check_urg(struct sock *sk, struct tcphdr *th) 5033static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
4972{ 5034{
4973 struct tcp_sock *tp = tcp_sk(sk); 5035 struct tcp_sock *tp = tcp_sk(sk);
4974 u32 ptr = ntohs(th->urg_ptr); 5036 u32 ptr = ntohs(th->urg_ptr);
@@ -5034,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
5034} 5096}
5035 5097
5036/* This is the 'fast' part of urgent handling. */ 5098/* This is the 'fast' part of urgent handling. */
5037static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) 5099static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
5038{ 5100{
5039 struct tcp_sock *tp = tcp_sk(sk); 5101 struct tcp_sock *tp = tcp_sk(sk);
5040 5102
@@ -5155,9 +5217,9 @@ out:
5155 * play significant role here. 5217 * play significant role here.
5156 */ 5218 */
5157static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 5219static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5158 struct tcphdr *th, int syn_inerr) 5220 const struct tcphdr *th, int syn_inerr)
5159{ 5221{
5160 u8 *hash_location; 5222 const u8 *hash_location;
5161 struct tcp_sock *tp = tcp_sk(sk); 5223 struct tcp_sock *tp = tcp_sk(sk);
5162 5224
5163 /* RFC1323: H1. Apply PAWS check first. */ 5225 /* RFC1323: H1. Apply PAWS check first. */
@@ -5238,7 +5300,7 @@ discard:
5238 * tcp_data_queue when everything is OK. 5300 * tcp_data_queue when everything is OK.
5239 */ 5301 */
5240int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 5302int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5241 struct tcphdr *th, unsigned len) 5303 const struct tcphdr *th, unsigned int len)
5242{ 5304{
5243 struct tcp_sock *tp = tcp_sk(sk); 5305 struct tcp_sock *tp = tcp_sk(sk);
5244 int res; 5306 int res;
@@ -5449,9 +5511,9 @@ discard:
5449EXPORT_SYMBOL(tcp_rcv_established); 5511EXPORT_SYMBOL(tcp_rcv_established);
5450 5512
5451static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5513static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5452 struct tcphdr *th, unsigned len) 5514 const struct tcphdr *th, unsigned int len)
5453{ 5515{
5454 u8 *hash_location; 5516 const u8 *hash_location;
5455 struct inet_connection_sock *icsk = inet_csk(sk); 5517 struct inet_connection_sock *icsk = inet_csk(sk);
5456 struct tcp_sock *tp = tcp_sk(sk); 5518 struct tcp_sock *tp = tcp_sk(sk);
5457 struct tcp_cookie_values *cvp = tp->cookie_values; 5519 struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5726,7 +5788,7 @@ reset_and_undo:
5726 */ 5788 */
5727 5789
5728int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 5790int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5729 struct tcphdr *th, unsigned len) 5791 const struct tcphdr *th, unsigned int len)
5730{ 5792{
5731 struct tcp_sock *tp = tcp_sk(sk); 5793 struct tcp_sock *tp = tcp_sk(sk);
5732 struct inet_connection_sock *icsk = inet_csk(sk); 5794 struct inet_connection_sock *icsk = inet_csk(sk);