aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c221
1 files changed, 118 insertions, 103 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c26076fb890e..1a14191687ac 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -50,9 +50,9 @@
50 * Andi Kleen: Make sure we never ack data there is not 50 * Andi Kleen: Make sure we never ack data there is not
51 * enough room for. Also make this condition 51 * enough room for. Also make this condition
52 * a fatal error if it might still happen. 52 * a fatal error if it might still happen.
53 * Andi Kleen: Add tcp_measure_rcv_mss to make 53 * Andi Kleen: Add tcp_measure_rcv_mss to make
54 * connections with MSS<min(MTU,ann. MSS) 54 * connections with MSS<min(MTU,ann. MSS)
55 * work without delayed acks. 55 * work without delayed acks.
56 * Andi Kleen: Process packets with PSH set in the 56 * Andi Kleen: Process packets with PSH set in the
57 * fast path. 57 * fast path.
58 * J Hadi Salim: ECN support 58 * J Hadi Salim: ECN support
@@ -112,17 +112,17 @@ int sysctl_tcp_abc __read_mostly;
112 112
113#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 113#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
114 114
115/* Adapt the MSS value used to make delayed ack decision to the 115/* Adapt the MSS value used to make delayed ack decision to the
116 * real world. 116 * real world.
117 */ 117 */
118static void tcp_measure_rcv_mss(struct sock *sk, 118static void tcp_measure_rcv_mss(struct sock *sk,
119 const struct sk_buff *skb) 119 const struct sk_buff *skb)
120{ 120{
121 struct inet_connection_sock *icsk = inet_csk(sk); 121 struct inet_connection_sock *icsk = inet_csk(sk);
122 const unsigned int lss = icsk->icsk_ack.last_seg_size; 122 const unsigned int lss = icsk->icsk_ack.last_seg_size;
123 unsigned int len; 123 unsigned int len;
124 124
125 icsk->icsk_ack.last_seg_size = 0; 125 icsk->icsk_ack.last_seg_size = 0;
126 126
127 /* skb->len may jitter because of SACKs, even if peer 127 /* skb->len may jitter because of SACKs, even if peer
128 * sends good full-sized frames. 128 * sends good full-sized frames.
@@ -440,15 +440,15 @@ void tcp_rcv_space_adjust(struct sock *sk)
440 struct tcp_sock *tp = tcp_sk(sk); 440 struct tcp_sock *tp = tcp_sk(sk);
441 int time; 441 int time;
442 int space; 442 int space;
443 443
444 if (tp->rcvq_space.time == 0) 444 if (tp->rcvq_space.time == 0)
445 goto new_measure; 445 goto new_measure;
446 446
447 time = tcp_time_stamp - tp->rcvq_space.time; 447 time = tcp_time_stamp - tp->rcvq_space.time;
448 if (time < (tp->rcv_rtt_est.rtt >> 3) || 448 if (time < (tp->rcv_rtt_est.rtt >> 3) ||
449 tp->rcv_rtt_est.rtt == 0) 449 tp->rcv_rtt_est.rtt == 0)
450 return; 450 return;
451 451
452 space = 2 * (tp->copied_seq - tp->rcvq_space.seq); 452 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
453 453
454 space = max(tp->rcvq_space.space, space); 454 space = max(tp->rcvq_space.space, space);
@@ -483,7 +483,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
483 } 483 }
484 } 484 }
485 } 485 }
486 486
487new_measure: 487new_measure:
488 tp->rcvq_space.seq = tp->copied_seq; 488 tp->rcvq_space.seq = tp->copied_seq;
489 tp->rcvq_space.time = tcp_time_stamp; 489 tp->rcvq_space.time = tcp_time_stamp;
@@ -509,7 +509,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
509 tcp_measure_rcv_mss(sk, skb); 509 tcp_measure_rcv_mss(sk, skb);
510 510
511 tcp_rcv_rtt_measure(tp); 511 tcp_rcv_rtt_measure(tp);
512 512
513 now = tcp_time_stamp; 513 now = tcp_time_stamp;
514 514
515 if (!icsk->icsk_ack.ato) { 515 if (!icsk->icsk_ack.ato) {
@@ -561,7 +561,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
561 /* The following amusing code comes from Jacobson's 561 /* The following amusing code comes from Jacobson's
562 * article in SIGCOMM '88. Note that rtt and mdev 562 * article in SIGCOMM '88. Note that rtt and mdev
563 * are scaled versions of rtt and mean deviation. 563 * are scaled versions of rtt and mean deviation.
564 * This is designed to be as fast as possible 564 * This is designed to be as fast as possible
565 * m stands for "measurement". 565 * m stands for "measurement".
566 * 566 *
567 * On a 1990 paper the rto value is changed to: 567 * On a 1990 paper the rto value is changed to:
@@ -936,28 +936,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
936 struct tcp_sock *tp = tcp_sk(sk); 936 struct tcp_sock *tp = tcp_sk(sk);
937 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; 937 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
938 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); 938 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
939 struct sk_buff *cached_skb;
939 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 940 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
940 int reord = tp->packets_out; 941 int reord = tp->packets_out;
941 int prior_fackets; 942 int prior_fackets;
942 u32 lost_retrans = 0; 943 u32 lost_retrans = 0;
943 int flag = 0; 944 int flag = 0;
944 int dup_sack = 0; 945 int dup_sack = 0;
946 int cached_fack_count;
945 int i; 947 int i;
948 int first_sack_index;
946 949
947 if (!tp->sacked_out) 950 if (!tp->sacked_out)
948 tp->fackets_out = 0; 951 tp->fackets_out = 0;
949 prior_fackets = tp->fackets_out; 952 prior_fackets = tp->fackets_out;
950 953
954 /* Check for D-SACK. */
955 if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) {
956 dup_sack = 1;
957 tp->rx_opt.sack_ok |= 4;
958 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
959 } else if (num_sacks > 1 &&
960 !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) &&
961 !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) {
962 dup_sack = 1;
963 tp->rx_opt.sack_ok |= 4;
964 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
965 }
966
967 /* D-SACK for already forgotten data...
968 * Do dumb counting. */
969 if (dup_sack &&
970 !after(ntohl(sp[0].end_seq), prior_snd_una) &&
971 after(ntohl(sp[0].end_seq), tp->undo_marker))
972 tp->undo_retrans--;
973
974 /* Eliminate too old ACKs, but take into
975 * account more or less fresh ones, they can
976 * contain valid SACK info.
977 */
978 if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
979 return 0;
980
951 /* SACK fastpath: 981 /* SACK fastpath:
952 * if the only SACK change is the increase of the end_seq of 982 * if the only SACK change is the increase of the end_seq of
953 * the first block then only apply that SACK block 983 * the first block then only apply that SACK block
954 * and use retrans queue hinting otherwise slowpath */ 984 * and use retrans queue hinting otherwise slowpath */
955 flag = 1; 985 flag = 1;
956 for (i = 0; i< num_sacks; i++) { 986 for (i = 0; i < num_sacks; i++) {
957 __u32 start_seq = ntohl(sp[i].start_seq); 987 __be32 start_seq = sp[i].start_seq;
958 __u32 end_seq = ntohl(sp[i].end_seq); 988 __be32 end_seq = sp[i].end_seq;
959 989
960 if (i == 0){ 990 if (i == 0) {
961 if (tp->recv_sack_cache[i].start_seq != start_seq) 991 if (tp->recv_sack_cache[i].start_seq != start_seq)
962 flag = 0; 992 flag = 0;
963 } else { 993 } else {
@@ -967,39 +997,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
967 } 997 }
968 tp->recv_sack_cache[i].start_seq = start_seq; 998 tp->recv_sack_cache[i].start_seq = start_seq;
969 tp->recv_sack_cache[i].end_seq = end_seq; 999 tp->recv_sack_cache[i].end_seq = end_seq;
970 1000 }
971 /* Check for D-SACK. */ 1001 /* Clear the rest of the cache sack blocks so they won't match mistakenly. */
972 if (i == 0) { 1002 for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
973 u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; 1003 tp->recv_sack_cache[i].start_seq = 0;
974 1004 tp->recv_sack_cache[i].end_seq = 0;
975 if (before(start_seq, ack)) {
976 dup_sack = 1;
977 tp->rx_opt.sack_ok |= 4;
978 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
979 } else if (num_sacks > 1 &&
980 !after(end_seq, ntohl(sp[1].end_seq)) &&
981 !before(start_seq, ntohl(sp[1].start_seq))) {
982 dup_sack = 1;
983 tp->rx_opt.sack_ok |= 4;
984 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
985 }
986
987 /* D-SACK for already forgotten data...
988 * Do dumb counting. */
989 if (dup_sack &&
990 !after(end_seq, prior_snd_una) &&
991 after(end_seq, tp->undo_marker))
992 tp->undo_retrans--;
993
994 /* Eliminate too old ACKs, but take into
995 * account more or less fresh ones, they can
996 * contain valid SACK info.
997 */
998 if (before(ack, prior_snd_una - tp->max_window))
999 return 0;
1000 }
1001 } 1005 }
1002 1006
1007 first_sack_index = 0;
1003 if (flag) 1008 if (flag)
1004 num_sacks = 1; 1009 num_sacks = 1;
1005 else { 1010 else {
@@ -1016,6 +1021,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1016 tmp = sp[j]; 1021 tmp = sp[j];
1017 sp[j] = sp[j+1]; 1022 sp[j] = sp[j+1];
1018 sp[j+1] = tmp; 1023 sp[j+1] = tmp;
1024
1025 /* Track where the first SACK block goes to */
1026 if (j == first_sack_index)
1027 first_sack_index = j+1;
1019 } 1028 }
1020 1029
1021 } 1030 }
@@ -1025,20 +1034,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1025 /* clear flag as used for different purpose in following code */ 1034 /* clear flag as used for different purpose in following code */
1026 flag = 0; 1035 flag = 0;
1027 1036
1037 /* Use SACK fastpath hint if valid */
1038 cached_skb = tp->fastpath_skb_hint;
1039 cached_fack_count = tp->fastpath_cnt_hint;
1040 if (!cached_skb) {
1041 cached_skb = sk->sk_write_queue.next;
1042 cached_fack_count = 0;
1043 }
1044
1028 for (i=0; i<num_sacks; i++, sp++) { 1045 for (i=0; i<num_sacks; i++, sp++) {
1029 struct sk_buff *skb; 1046 struct sk_buff *skb;
1030 __u32 start_seq = ntohl(sp->start_seq); 1047 __u32 start_seq = ntohl(sp->start_seq);
1031 __u32 end_seq = ntohl(sp->end_seq); 1048 __u32 end_seq = ntohl(sp->end_seq);
1032 int fack_count; 1049 int fack_count;
1033 1050
1034 /* Use SACK fastpath hint if valid */ 1051 skb = cached_skb;
1035 if (tp->fastpath_skb_hint) { 1052 fack_count = cached_fack_count;
1036 skb = tp->fastpath_skb_hint;
1037 fack_count = tp->fastpath_cnt_hint;
1038 } else {
1039 skb = sk->sk_write_queue.next;
1040 fack_count = 0;
1041 }
1042 1053
1043 /* Event "B" in the comment above. */ 1054 /* Event "B" in the comment above. */
1044 if (after(end_seq, tp->high_seq)) 1055 if (after(end_seq, tp->high_seq))
@@ -1048,8 +1059,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1048 int in_sack, pcount; 1059 int in_sack, pcount;
1049 u8 sacked; 1060 u8 sacked;
1050 1061
1051 tp->fastpath_skb_hint = skb; 1062 cached_skb = skb;
1052 tp->fastpath_cnt_hint = fack_count; 1063 cached_fack_count = fack_count;
1064 if (i == first_sack_index) {
1065 tp->fastpath_skb_hint = skb;
1066 tp->fastpath_cnt_hint = fack_count;
1067 }
1053 1068
1054 /* The retransmission queue is always in order, so 1069 /* The retransmission queue is always in order, so
1055 * we can short-circuit the walk early. 1070 * we can short-circuit the walk early.
@@ -1234,8 +1249,8 @@ void tcp_enter_frto(struct sock *sk)
1234 tp->frto_counter = 1; 1249 tp->frto_counter = 1;
1235 1250
1236 if (icsk->icsk_ca_state <= TCP_CA_Disorder || 1251 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1237 tp->snd_una == tp->high_seq || 1252 tp->snd_una == tp->high_seq ||
1238 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 1253 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1239 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1254 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1240 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1255 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1241 tcp_ca_event(sk, CA_EVENT_FRTO); 1256 tcp_ca_event(sk, CA_EVENT_FRTO);
@@ -1954,11 +1969,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1954 * 1. Reno does not count dupacks (sacked_out) automatically. */ 1969 * 1. Reno does not count dupacks (sacked_out) automatically. */
1955 if (!tp->packets_out) 1970 if (!tp->packets_out)
1956 tp->sacked_out = 0; 1971 tp->sacked_out = 0;
1957 /* 2. SACK counts snd_fack in packets inaccurately. */ 1972 /* 2. SACK counts snd_fack in packets inaccurately. */
1958 if (tp->sacked_out == 0) 1973 if (tp->sacked_out == 0)
1959 tp->fackets_out = 0; 1974 tp->fackets_out = 0;
1960 1975
1961 /* Now state machine starts. 1976 /* Now state machine starts.
1962 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ 1977 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
1963 if (flag&FLAG_ECE) 1978 if (flag&FLAG_ECE)
1964 tp->prior_ssthresh = 0; 1979 tp->prior_ssthresh = 0;
@@ -2188,7 +2203,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2188 __u32 now, __s32 *seq_rtt) 2203 __u32 now, __s32 *seq_rtt)
2189{ 2204{
2190 struct tcp_sock *tp = tcp_sk(sk); 2205 struct tcp_sock *tp = tcp_sk(sk);
2191 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 2206 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2192 __u32 seq = tp->snd_una; 2207 __u32 seq = tp->snd_una;
2193 __u32 packets_acked; 2208 __u32 packets_acked;
2194 int acked = 0; 2209 int acked = 0;
@@ -2264,7 +2279,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2264 2279
2265 while ((skb = skb_peek(&sk->sk_write_queue)) && 2280 while ((skb = skb_peek(&sk->sk_write_queue)) &&
2266 skb != sk->sk_send_head) { 2281 skb != sk->sk_send_head) {
2267 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 2282 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2268 __u8 sacked = scb->sacked; 2283 __u8 sacked = scb->sacked;
2269 2284
2270 /* If our packet is before the ack sequence we can 2285 /* If our packet is before the ack sequence we can
@@ -2455,9 +2470,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2455static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) 2470static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
2456{ 2471{
2457 struct tcp_sock *tp = tcp_sk(sk); 2472 struct tcp_sock *tp = tcp_sk(sk);
2458 2473
2459 tcp_sync_left_out(tp); 2474 tcp_sync_left_out(tp);
2460 2475
2461 if (tp->snd_una == prior_snd_una || 2476 if (tp->snd_una == prior_snd_una ||
2462 !before(tp->snd_una, tp->frto_highmark)) { 2477 !before(tp->snd_una, tp->frto_highmark)) {
2463 /* RTO was caused by loss, start retransmitting in 2478 /* RTO was caused by loss, start retransmitting in
@@ -2612,7 +2627,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2612 opt_rx->saw_tstamp = 0; 2627 opt_rx->saw_tstamp = 0;
2613 2628
2614 while(length>0) { 2629 while(length>0) {
2615 int opcode=*ptr++; 2630 int opcode=*ptr++;
2616 int opsize; 2631 int opsize;
2617 2632
2618 switch (opcode) { 2633 switch (opcode) {
@@ -2627,7 +2642,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2627 return; 2642 return;
2628 if (opsize > length) 2643 if (opsize > length)
2629 return; /* don't parse partial options */ 2644 return; /* don't parse partial options */
2630 switch(opcode) { 2645 switch(opcode) {
2631 case TCPOPT_MSS: 2646 case TCPOPT_MSS:
2632 if(opsize==TCPOLEN_MSS && th->syn && !estab) { 2647 if(opsize==TCPOLEN_MSS && th->syn && !estab) {
2633 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); 2648 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
@@ -2686,10 +2701,10 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2686 */ 2701 */
2687 break; 2702 break;
2688#endif 2703#endif
2689 }; 2704 };
2690 ptr+=opsize-2; 2705 ptr+=opsize-2;
2691 length-=opsize; 2706 length-=opsize;
2692 }; 2707 };
2693 } 2708 }
2694} 2709}
2695 2710
@@ -3248,7 +3263,7 @@ drop:
3248 TCP_SKB_CB(skb)->end_seq); 3263 TCP_SKB_CB(skb)->end_seq);
3249 3264
3250 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); 3265 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
3251 3266
3252 /* If window is closed, drop tail of packet. But after 3267 /* If window is closed, drop tail of packet. But after
3253 * remembering D-SACK for its head made in previous line. 3268 * remembering D-SACK for its head made in previous line.
3254 */ 3269 */
@@ -3327,7 +3342,7 @@ drop:
3327 } 3342 }
3328 } 3343 }
3329 __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); 3344 __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
3330 3345
3331 /* And clean segments covered by new one as whole. */ 3346 /* And clean segments covered by new one as whole. */
3332 while ((skb1 = skb->next) != 3347 while ((skb1 = skb->next) !=
3333 (struct sk_buff*)&tp->out_of_order_queue && 3348 (struct sk_buff*)&tp->out_of_order_queue &&
@@ -3492,7 +3507,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
3492 */ 3507 */
3493static int tcp_prune_queue(struct sock *sk) 3508static int tcp_prune_queue(struct sock *sk)
3494{ 3509{
3495 struct tcp_sock *tp = tcp_sk(sk); 3510 struct tcp_sock *tp = tcp_sk(sk);
3496 3511
3497 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); 3512 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
3498 3513
@@ -3602,7 +3617,7 @@ static void tcp_new_space(struct sock *sk)
3602 struct tcp_sock *tp = tcp_sk(sk); 3617 struct tcp_sock *tp = tcp_sk(sk);
3603 3618
3604 if (tcp_should_expand_sndbuf(sk, tp)) { 3619 if (tcp_should_expand_sndbuf(sk, tp)) {
3605 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 3620 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
3606 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 3621 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
3607 demanded = max_t(unsigned int, tp->snd_cwnd, 3622 demanded = max_t(unsigned int, tp->snd_cwnd,
3608 tp->reordering + 1); 3623 tp->reordering + 1);
@@ -3675,7 +3690,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
3675 * For 1003.1g we should support a new option TCP_STDURG to permit 3690 * For 1003.1g we should support a new option TCP_STDURG to permit
3676 * either form (or just set the sysctl tcp_stdurg). 3691 * either form (or just set the sysctl tcp_stdurg).
3677 */ 3692 */
3678 3693
3679static void tcp_check_urg(struct sock * sk, struct tcphdr * th) 3694static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
3680{ 3695{
3681 struct tcp_sock *tp = tcp_sk(sk); 3696 struct tcp_sock *tp = tcp_sk(sk);
@@ -3756,7 +3771,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
3756 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - 3771 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
3757 th->syn; 3772 th->syn;
3758 3773
3759 /* Is the urgent pointer pointing into this packet? */ 3774 /* Is the urgent pointer pointing into this packet? */
3760 if (ptr < skb->len) { 3775 if (ptr < skb->len) {
3761 u8 tmp; 3776 u8 tmp;
3762 if (skb_copy_bits(skb, ptr, &tmp, 1)) 3777 if (skb_copy_bits(skb, ptr, &tmp, 1))
@@ -3820,7 +3835,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
3820 int copied_early = 0; 3835 int copied_early = 0;
3821 3836
3822 if (tp->ucopy.wakeup) 3837 if (tp->ucopy.wakeup)
3823 return 0; 3838 return 0;
3824 3839
3825 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 3840 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
3826 tp->ucopy.dma_chan = get_softnet_dma(); 3841 tp->ucopy.dma_chan = get_softnet_dma();
@@ -3856,26 +3871,26 @@ out:
3856#endif /* CONFIG_NET_DMA */ 3871#endif /* CONFIG_NET_DMA */
3857 3872
3858/* 3873/*
3859 * TCP receive function for the ESTABLISHED state. 3874 * TCP receive function for the ESTABLISHED state.
3860 * 3875 *
3861 * It is split into a fast path and a slow path. The fast path is 3876 * It is split into a fast path and a slow path. The fast path is
3862 * disabled when: 3877 * disabled when:
3863 * - A zero window was announced from us - zero window probing 3878 * - A zero window was announced from us - zero window probing
3864 * is only handled properly in the slow path. 3879 * is only handled properly in the slow path.
3865 * - Out of order segments arrived. 3880 * - Out of order segments arrived.
3866 * - Urgent data is expected. 3881 * - Urgent data is expected.
3867 * - There is no buffer space left 3882 * - There is no buffer space left
3868 * - Unexpected TCP flags/window values/header lengths are received 3883 * - Unexpected TCP flags/window values/header lengths are received
3869 * (detected by checking the TCP header against pred_flags) 3884 * (detected by checking the TCP header against pred_flags)
3870 * - Data is sent in both directions. Fast path only supports pure senders 3885 * - Data is sent in both directions. Fast path only supports pure senders
3871 * or pure receivers (this means either the sequence number or the ack 3886 * or pure receivers (this means either the sequence number or the ack
3872 * value must stay constant) 3887 * value must stay constant)
3873 * - Unexpected TCP option. 3888 * - Unexpected TCP option.
3874 * 3889 *
3875 * When these conditions are not satisfied it drops into a standard 3890 * When these conditions are not satisfied it drops into a standard
3876 * receive procedure patterned after RFC793 to handle all cases. 3891 * receive procedure patterned after RFC793 to handle all cases.
3877 * The first three cases are guaranteed by proper pred_flags setting, 3892 * The first three cases are guaranteed by proper pred_flags setting,
3878 * the rest is checked inline. Fast processing is turned on in 3893 * the rest is checked inline. Fast processing is turned on in
3879 * tcp_data_queue when everything is OK. 3894 * tcp_data_queue when everything is OK.
3880 */ 3895 */
3881int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 3896int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
@@ -3885,15 +3900,15 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3885 3900
3886 /* 3901 /*
3887 * Header prediction. 3902 * Header prediction.
3888 * The code loosely follows the one in the famous 3903 * The code loosely follows the one in the famous
3889 * "30 instruction TCP receive" Van Jacobson mail. 3904 * "30 instruction TCP receive" Van Jacobson mail.
3890 * 3905 *
3891 * Van's trick is to deposit buffers into socket queue 3906 * Van's trick is to deposit buffers into socket queue
3892 * on a device interrupt, to call tcp_recv function 3907 * on a device interrupt, to call tcp_recv function
3893 * on the receive process context and checksum and copy 3908 * on the receive process context and checksum and copy
3894 * the buffer to user space. smart... 3909 * the buffer to user space. smart...
3895 * 3910 *
3896 * Our current scheme is not silly either but we take the 3911 * Our current scheme is not silly either but we take the
3897 * extra cost of the net_bh soft interrupt processing... 3912 * extra cost of the net_bh soft interrupt processing...
3898 * We do checksum and copy also but from device to kernel. 3913 * We do checksum and copy also but from device to kernel.
3899 */ 3914 */
@@ -3904,7 +3919,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3904 * if header_prediction is to be made 3919 * if header_prediction is to be made
3905 * 'S' will always be tp->tcp_header_len >> 2 3920 * 'S' will always be tp->tcp_header_len >> 2
3906 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to 3921 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to
3907 * turn it off (when there are holes in the receive 3922 * turn it off (when there are holes in the receive
3908 * space for instance) 3923 * space for instance)
3909 * PSH flag is ignored. 3924 * PSH flag is ignored.
3910 */ 3925 */
@@ -3928,7 +3943,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3928 goto slow_path; 3943 goto slow_path;
3929 3944
3930 tp->rx_opt.saw_tstamp = 1; 3945 tp->rx_opt.saw_tstamp = 1;
3931 ++ptr; 3946 ++ptr;
3932 tp->rx_opt.rcv_tsval = ntohl(*ptr); 3947 tp->rx_opt.rcv_tsval = ntohl(*ptr);
3933 ++ptr; 3948 ++ptr;
3934 tp->rx_opt.rcv_tsecr = ntohl(*ptr); 3949 tp->rx_opt.rcv_tsecr = ntohl(*ptr);
@@ -3960,7 +3975,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3960 * on entry. 3975 * on entry.
3961 */ 3976 */
3962 tcp_ack(sk, skb, 0); 3977 tcp_ack(sk, skb, 0);
3963 __kfree_skb(skb); 3978 __kfree_skb(skb);
3964 tcp_data_snd_check(sk, tp); 3979 tcp_data_snd_check(sk, tp);
3965 return 0; 3980 return 0;
3966 } else { /* Header too small */ 3981 } else { /* Header too small */
@@ -4378,11 +4393,11 @@ reset_and_undo:
4378 4393
4379/* 4394/*
4380 * This function implements the receiving procedure of RFC 793 for 4395 * This function implements the receiving procedure of RFC 793 for
4381 * all states except ESTABLISHED and TIME_WAIT. 4396 * all states except ESTABLISHED and TIME_WAIT.
4382 * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be 4397 * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
4383 * address independent. 4398 * address independent.
4384 */ 4399 */
4385 4400
4386int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 4401int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4387 struct tcphdr *th, unsigned len) 4402 struct tcphdr *th, unsigned len)
4388{ 4403{
@@ -4407,19 +4422,19 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4407 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) 4422 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
4408 return 1; 4423 return 1;
4409 4424
4410 /* Now we have several options: In theory there is 4425 /* Now we have several options: In theory there is
4411 * nothing else in the frame. KA9Q has an option to 4426 * nothing else in the frame. KA9Q has an option to
4412 * send data with the syn, BSD accepts data with the 4427 * send data with the syn, BSD accepts data with the
4413 * syn up to the [to be] advertised window and 4428 * syn up to the [to be] advertised window and
4414 * Solaris 2.1 gives you a protocol error. For now 4429 * Solaris 2.1 gives you a protocol error. For now
4415 * we just ignore it, that fits the spec precisely 4430 * we just ignore it, that fits the spec precisely
4416 * and avoids incompatibilities. It would be nice in 4431 * and avoids incompatibilities. It would be nice in
4417 * future to drop through and process the data. 4432 * future to drop through and process the data.
4418 * 4433 *
4419 * Now that TTCP is starting to be used we ought to 4434 * Now that TTCP is starting to be used we ought to
4420 * queue this data. 4435 * queue this data.
4421 * But, this leaves one open to an easy denial of 4436 * But, this leaves one open to an easy denial of
4422 * service attack, and SYN cookies can't defend 4437 * service attack, and SYN cookies can't defend
4423 * against this problem. So, we drop the data 4438 * against this problem. So, we drop the data
4424 * in the interest of security over speed unless 4439 * in the interest of security over speed unless
4425 * it's still in use. 4440 * it's still in use.
@@ -4609,7 +4624,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4609 case TCP_FIN_WAIT1: 4624 case TCP_FIN_WAIT1:
4610 case TCP_FIN_WAIT2: 4625 case TCP_FIN_WAIT2:
4611 /* RFC 793 says to queue data in these states, 4626 /* RFC 793 says to queue data in these states,
4612 * RFC 1122 says we MUST send a reset. 4627 * RFC 1122 says we MUST send a reset.
4613 * BSD 4.4 also does reset. 4628 * BSD 4.4 also does reset.
4614 */ 4629 */
4615 if (sk->sk_shutdown & RCV_SHUTDOWN) { 4630 if (sk->sk_shutdown & RCV_SHUTDOWN) {
@@ -4621,7 +4636,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4621 } 4636 }
4622 } 4637 }
4623 /* Fall through */ 4638 /* Fall through */
4624 case TCP_ESTABLISHED: 4639 case TCP_ESTABLISHED:
4625 tcp_data_queue(sk, skb); 4640 tcp_data_queue(sk, skb);
4626 queued = 1; 4641 queued = 1;
4627 break; 4642 break;
@@ -4633,7 +4648,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4633 tcp_ack_snd_check(sk); 4648 tcp_ack_snd_check(sk);
4634 } 4649 }
4635 4650
4636 if (!queued) { 4651 if (!queued) {
4637discard: 4652discard:
4638 __kfree_skb(skb); 4653 __kfree_skb(skb);
4639 } 4654 }