diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 221 |
1 files changed, 118 insertions, 103 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c26076fb890e..1a14191687ac 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -50,9 +50,9 @@ | |||
50 | * Andi Kleen: Make sure we never ack data there is not | 50 | * Andi Kleen: Make sure we never ack data there is not |
51 | * enough room for. Also make this condition | 51 | * enough room for. Also make this condition |
52 | * a fatal error if it might still happen. | 52 | * a fatal error if it might still happen. |
53 | * Andi Kleen: Add tcp_measure_rcv_mss to make | 53 | * Andi Kleen: Add tcp_measure_rcv_mss to make |
54 | * connections with MSS<min(MTU,ann. MSS) | 54 | * connections with MSS<min(MTU,ann. MSS) |
55 | * work without delayed acks. | 55 | * work without delayed acks. |
56 | * Andi Kleen: Process packets with PSH set in the | 56 | * Andi Kleen: Process packets with PSH set in the |
57 | * fast path. | 57 | * fast path. |
58 | * J Hadi Salim: ECN support | 58 | * J Hadi Salim: ECN support |
@@ -112,17 +112,17 @@ int sysctl_tcp_abc __read_mostly; | |||
112 | 112 | ||
113 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) | 113 | #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) |
114 | 114 | ||
115 | /* Adapt the MSS value used to make delayed ack decision to the | 115 | /* Adapt the MSS value used to make delayed ack decision to the |
116 | * real world. | 116 | * real world. |
117 | */ | 117 | */ |
118 | static void tcp_measure_rcv_mss(struct sock *sk, | 118 | static void tcp_measure_rcv_mss(struct sock *sk, |
119 | const struct sk_buff *skb) | 119 | const struct sk_buff *skb) |
120 | { | 120 | { |
121 | struct inet_connection_sock *icsk = inet_csk(sk); | 121 | struct inet_connection_sock *icsk = inet_csk(sk); |
122 | const unsigned int lss = icsk->icsk_ack.last_seg_size; | 122 | const unsigned int lss = icsk->icsk_ack.last_seg_size; |
123 | unsigned int len; | 123 | unsigned int len; |
124 | 124 | ||
125 | icsk->icsk_ack.last_seg_size = 0; | 125 | icsk->icsk_ack.last_seg_size = 0; |
126 | 126 | ||
127 | /* skb->len may jitter because of SACKs, even if peer | 127 | /* skb->len may jitter because of SACKs, even if peer |
128 | * sends good full-sized frames. | 128 | * sends good full-sized frames. |
@@ -440,15 +440,15 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
440 | struct tcp_sock *tp = tcp_sk(sk); | 440 | struct tcp_sock *tp = tcp_sk(sk); |
441 | int time; | 441 | int time; |
442 | int space; | 442 | int space; |
443 | 443 | ||
444 | if (tp->rcvq_space.time == 0) | 444 | if (tp->rcvq_space.time == 0) |
445 | goto new_measure; | 445 | goto new_measure; |
446 | 446 | ||
447 | time = tcp_time_stamp - tp->rcvq_space.time; | 447 | time = tcp_time_stamp - tp->rcvq_space.time; |
448 | if (time < (tp->rcv_rtt_est.rtt >> 3) || | 448 | if (time < (tp->rcv_rtt_est.rtt >> 3) || |
449 | tp->rcv_rtt_est.rtt == 0) | 449 | tp->rcv_rtt_est.rtt == 0) |
450 | return; | 450 | return; |
451 | 451 | ||
452 | space = 2 * (tp->copied_seq - tp->rcvq_space.seq); | 452 | space = 2 * (tp->copied_seq - tp->rcvq_space.seq); |
453 | 453 | ||
454 | space = max(tp->rcvq_space.space, space); | 454 | space = max(tp->rcvq_space.space, space); |
@@ -483,7 +483,7 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
483 | } | 483 | } |
484 | } | 484 | } |
485 | } | 485 | } |
486 | 486 | ||
487 | new_measure: | 487 | new_measure: |
488 | tp->rcvq_space.seq = tp->copied_seq; | 488 | tp->rcvq_space.seq = tp->copied_seq; |
489 | tp->rcvq_space.time = tcp_time_stamp; | 489 | tp->rcvq_space.time = tcp_time_stamp; |
@@ -509,7 +509,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
509 | tcp_measure_rcv_mss(sk, skb); | 509 | tcp_measure_rcv_mss(sk, skb); |
510 | 510 | ||
511 | tcp_rcv_rtt_measure(tp); | 511 | tcp_rcv_rtt_measure(tp); |
512 | 512 | ||
513 | now = tcp_time_stamp; | 513 | now = tcp_time_stamp; |
514 | 514 | ||
515 | if (!icsk->icsk_ack.ato) { | 515 | if (!icsk->icsk_ack.ato) { |
@@ -561,7 +561,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
561 | /* The following amusing code comes from Jacobson's | 561 | /* The following amusing code comes from Jacobson's |
562 | * article in SIGCOMM '88. Note that rtt and mdev | 562 | * article in SIGCOMM '88. Note that rtt and mdev |
563 | * are scaled versions of rtt and mean deviation. | 563 | * are scaled versions of rtt and mean deviation. |
564 | * This is designed to be as fast as possible | 564 | * This is designed to be as fast as possible |
565 | * m stands for "measurement". | 565 | * m stands for "measurement". |
566 | * | 566 | * |
567 | * On a 1990 paper the rto value is changed to: | 567 | * On a 1990 paper the rto value is changed to: |
@@ -936,28 +936,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
936 | struct tcp_sock *tp = tcp_sk(sk); | 936 | struct tcp_sock *tp = tcp_sk(sk); |
937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; | 937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; |
938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); | 938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); |
939 | struct sk_buff *cached_skb; | ||
939 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; | 940 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; |
940 | int reord = tp->packets_out; | 941 | int reord = tp->packets_out; |
941 | int prior_fackets; | 942 | int prior_fackets; |
942 | u32 lost_retrans = 0; | 943 | u32 lost_retrans = 0; |
943 | int flag = 0; | 944 | int flag = 0; |
944 | int dup_sack = 0; | 945 | int dup_sack = 0; |
946 | int cached_fack_count; | ||
945 | int i; | 947 | int i; |
948 | int first_sack_index; | ||
946 | 949 | ||
947 | if (!tp->sacked_out) | 950 | if (!tp->sacked_out) |
948 | tp->fackets_out = 0; | 951 | tp->fackets_out = 0; |
949 | prior_fackets = tp->fackets_out; | 952 | prior_fackets = tp->fackets_out; |
950 | 953 | ||
954 | /* Check for D-SACK. */ | ||
955 | if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { | ||
956 | dup_sack = 1; | ||
957 | tp->rx_opt.sack_ok |= 4; | ||
958 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); | ||
959 | } else if (num_sacks > 1 && | ||
960 | !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && | ||
961 | !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { | ||
962 | dup_sack = 1; | ||
963 | tp->rx_opt.sack_ok |= 4; | ||
964 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); | ||
965 | } | ||
966 | |||
967 | /* D-SACK for already forgotten data... | ||
968 | * Do dumb counting. */ | ||
969 | if (dup_sack && | ||
970 | !after(ntohl(sp[0].end_seq), prior_snd_una) && | ||
971 | after(ntohl(sp[0].end_seq), tp->undo_marker)) | ||
972 | tp->undo_retrans--; | ||
973 | |||
974 | /* Eliminate too old ACKs, but take into | ||
975 | * account more or less fresh ones, they can | ||
976 | * contain valid SACK info. | ||
977 | */ | ||
978 | if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) | ||
979 | return 0; | ||
980 | |||
951 | /* SACK fastpath: | 981 | /* SACK fastpath: |
952 | * if the only SACK change is the increase of the end_seq of | 982 | * if the only SACK change is the increase of the end_seq of |
953 | * the first block then only apply that SACK block | 983 | * the first block then only apply that SACK block |
954 | * and use retrans queue hinting otherwise slowpath */ | 984 | * and use retrans queue hinting otherwise slowpath */ |
955 | flag = 1; | 985 | flag = 1; |
956 | for (i = 0; i< num_sacks; i++) { | 986 | for (i = 0; i < num_sacks; i++) { |
957 | __u32 start_seq = ntohl(sp[i].start_seq); | 987 | __be32 start_seq = sp[i].start_seq; |
958 | __u32 end_seq = ntohl(sp[i].end_seq); | 988 | __be32 end_seq = sp[i].end_seq; |
959 | 989 | ||
960 | if (i == 0){ | 990 | if (i == 0) { |
961 | if (tp->recv_sack_cache[i].start_seq != start_seq) | 991 | if (tp->recv_sack_cache[i].start_seq != start_seq) |
962 | flag = 0; | 992 | flag = 0; |
963 | } else { | 993 | } else { |
@@ -967,39 +997,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
967 | } | 997 | } |
968 | tp->recv_sack_cache[i].start_seq = start_seq; | 998 | tp->recv_sack_cache[i].start_seq = start_seq; |
969 | tp->recv_sack_cache[i].end_seq = end_seq; | 999 | tp->recv_sack_cache[i].end_seq = end_seq; |
970 | 1000 | } | |
971 | /* Check for D-SACK. */ | 1001 | /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ |
972 | if (i == 0) { | 1002 | for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { |
973 | u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; | 1003 | tp->recv_sack_cache[i].start_seq = 0; |
974 | 1004 | tp->recv_sack_cache[i].end_seq = 0; | |
975 | if (before(start_seq, ack)) { | ||
976 | dup_sack = 1; | ||
977 | tp->rx_opt.sack_ok |= 4; | ||
978 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); | ||
979 | } else if (num_sacks > 1 && | ||
980 | !after(end_seq, ntohl(sp[1].end_seq)) && | ||
981 | !before(start_seq, ntohl(sp[1].start_seq))) { | ||
982 | dup_sack = 1; | ||
983 | tp->rx_opt.sack_ok |= 4; | ||
984 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); | ||
985 | } | ||
986 | |||
987 | /* D-SACK for already forgotten data... | ||
988 | * Do dumb counting. */ | ||
989 | if (dup_sack && | ||
990 | !after(end_seq, prior_snd_una) && | ||
991 | after(end_seq, tp->undo_marker)) | ||
992 | tp->undo_retrans--; | ||
993 | |||
994 | /* Eliminate too old ACKs, but take into | ||
995 | * account more or less fresh ones, they can | ||
996 | * contain valid SACK info. | ||
997 | */ | ||
998 | if (before(ack, prior_snd_una - tp->max_window)) | ||
999 | return 0; | ||
1000 | } | ||
1001 | } | 1005 | } |
1002 | 1006 | ||
1007 | first_sack_index = 0; | ||
1003 | if (flag) | 1008 | if (flag) |
1004 | num_sacks = 1; | 1009 | num_sacks = 1; |
1005 | else { | 1010 | else { |
@@ -1016,6 +1021,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1016 | tmp = sp[j]; | 1021 | tmp = sp[j]; |
1017 | sp[j] = sp[j+1]; | 1022 | sp[j] = sp[j+1]; |
1018 | sp[j+1] = tmp; | 1023 | sp[j+1] = tmp; |
1024 | |||
1025 | /* Track where the first SACK block goes to */ | ||
1026 | if (j == first_sack_index) | ||
1027 | first_sack_index = j+1; | ||
1019 | } | 1028 | } |
1020 | 1029 | ||
1021 | } | 1030 | } |
@@ -1025,20 +1034,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1025 | /* clear flag as used for different purpose in following code */ | 1034 | /* clear flag as used for different purpose in following code */ |
1026 | flag = 0; | 1035 | flag = 0; |
1027 | 1036 | ||
1037 | /* Use SACK fastpath hint if valid */ | ||
1038 | cached_skb = tp->fastpath_skb_hint; | ||
1039 | cached_fack_count = tp->fastpath_cnt_hint; | ||
1040 | if (!cached_skb) { | ||
1041 | cached_skb = sk->sk_write_queue.next; | ||
1042 | cached_fack_count = 0; | ||
1043 | } | ||
1044 | |||
1028 | for (i=0; i<num_sacks; i++, sp++) { | 1045 | for (i=0; i<num_sacks; i++, sp++) { |
1029 | struct sk_buff *skb; | 1046 | struct sk_buff *skb; |
1030 | __u32 start_seq = ntohl(sp->start_seq); | 1047 | __u32 start_seq = ntohl(sp->start_seq); |
1031 | __u32 end_seq = ntohl(sp->end_seq); | 1048 | __u32 end_seq = ntohl(sp->end_seq); |
1032 | int fack_count; | 1049 | int fack_count; |
1033 | 1050 | ||
1034 | /* Use SACK fastpath hint if valid */ | 1051 | skb = cached_skb; |
1035 | if (tp->fastpath_skb_hint) { | 1052 | fack_count = cached_fack_count; |
1036 | skb = tp->fastpath_skb_hint; | ||
1037 | fack_count = tp->fastpath_cnt_hint; | ||
1038 | } else { | ||
1039 | skb = sk->sk_write_queue.next; | ||
1040 | fack_count = 0; | ||
1041 | } | ||
1042 | 1053 | ||
1043 | /* Event "B" in the comment above. */ | 1054 | /* Event "B" in the comment above. */ |
1044 | if (after(end_seq, tp->high_seq)) | 1055 | if (after(end_seq, tp->high_seq)) |
@@ -1048,8 +1059,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1048 | int in_sack, pcount; | 1059 | int in_sack, pcount; |
1049 | u8 sacked; | 1060 | u8 sacked; |
1050 | 1061 | ||
1051 | tp->fastpath_skb_hint = skb; | 1062 | cached_skb = skb; |
1052 | tp->fastpath_cnt_hint = fack_count; | 1063 | cached_fack_count = fack_count; |
1064 | if (i == first_sack_index) { | ||
1065 | tp->fastpath_skb_hint = skb; | ||
1066 | tp->fastpath_cnt_hint = fack_count; | ||
1067 | } | ||
1053 | 1068 | ||
1054 | /* The retransmission queue is always in order, so | 1069 | /* The retransmission queue is always in order, so |
1055 | * we can short-circuit the walk early. | 1070 | * we can short-circuit the walk early. |
@@ -1234,8 +1249,8 @@ void tcp_enter_frto(struct sock *sk) | |||
1234 | tp->frto_counter = 1; | 1249 | tp->frto_counter = 1; |
1235 | 1250 | ||
1236 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || | 1251 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || |
1237 | tp->snd_una == tp->high_seq || | 1252 | tp->snd_una == tp->high_seq || |
1238 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { | 1253 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { |
1239 | tp->prior_ssthresh = tcp_current_ssthresh(sk); | 1254 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1240 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); | 1255 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1241 | tcp_ca_event(sk, CA_EVENT_FRTO); | 1256 | tcp_ca_event(sk, CA_EVENT_FRTO); |
@@ -1954,11 +1969,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1954 | * 1. Reno does not count dupacks (sacked_out) automatically. */ | 1969 | * 1. Reno does not count dupacks (sacked_out) automatically. */ |
1955 | if (!tp->packets_out) | 1970 | if (!tp->packets_out) |
1956 | tp->sacked_out = 0; | 1971 | tp->sacked_out = 0; |
1957 | /* 2. SACK counts snd_fack in packets inaccurately. */ | 1972 | /* 2. SACK counts snd_fack in packets inaccurately. */ |
1958 | if (tp->sacked_out == 0) | 1973 | if (tp->sacked_out == 0) |
1959 | tp->fackets_out = 0; | 1974 | tp->fackets_out = 0; |
1960 | 1975 | ||
1961 | /* Now state machine starts. | 1976 | /* Now state machine starts. |
1962 | * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ | 1977 | * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ |
1963 | if (flag&FLAG_ECE) | 1978 | if (flag&FLAG_ECE) |
1964 | tp->prior_ssthresh = 0; | 1979 | tp->prior_ssthresh = 0; |
@@ -2188,7 +2203,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
2188 | __u32 now, __s32 *seq_rtt) | 2203 | __u32 now, __s32 *seq_rtt) |
2189 | { | 2204 | { |
2190 | struct tcp_sock *tp = tcp_sk(sk); | 2205 | struct tcp_sock *tp = tcp_sk(sk); |
2191 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 2206 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
2192 | __u32 seq = tp->snd_una; | 2207 | __u32 seq = tp->snd_una; |
2193 | __u32 packets_acked; | 2208 | __u32 packets_acked; |
2194 | int acked = 0; | 2209 | int acked = 0; |
@@ -2264,7 +2279,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) | |||
2264 | 2279 | ||
2265 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2280 | while ((skb = skb_peek(&sk->sk_write_queue)) && |
2266 | skb != sk->sk_send_head) { | 2281 | skb != sk->sk_send_head) { |
2267 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 2282 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
2268 | __u8 sacked = scb->sacked; | 2283 | __u8 sacked = scb->sacked; |
2269 | 2284 | ||
2270 | /* If our packet is before the ack sequence we can | 2285 | /* If our packet is before the ack sequence we can |
@@ -2455,9 +2470,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | |||
2455 | static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | 2470 | static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) |
2456 | { | 2471 | { |
2457 | struct tcp_sock *tp = tcp_sk(sk); | 2472 | struct tcp_sock *tp = tcp_sk(sk); |
2458 | 2473 | ||
2459 | tcp_sync_left_out(tp); | 2474 | tcp_sync_left_out(tp); |
2460 | 2475 | ||
2461 | if (tp->snd_una == prior_snd_una || | 2476 | if (tp->snd_una == prior_snd_una || |
2462 | !before(tp->snd_una, tp->frto_highmark)) { | 2477 | !before(tp->snd_una, tp->frto_highmark)) { |
2463 | /* RTO was caused by loss, start retransmitting in | 2478 | /* RTO was caused by loss, start retransmitting in |
@@ -2612,7 +2627,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2612 | opt_rx->saw_tstamp = 0; | 2627 | opt_rx->saw_tstamp = 0; |
2613 | 2628 | ||
2614 | while(length>0) { | 2629 | while(length>0) { |
2615 | int opcode=*ptr++; | 2630 | int opcode=*ptr++; |
2616 | int opsize; | 2631 | int opsize; |
2617 | 2632 | ||
2618 | switch (opcode) { | 2633 | switch (opcode) { |
@@ -2627,7 +2642,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2627 | return; | 2642 | return; |
2628 | if (opsize > length) | 2643 | if (opsize > length) |
2629 | return; /* don't parse partial options */ | 2644 | return; /* don't parse partial options */ |
2630 | switch(opcode) { | 2645 | switch(opcode) { |
2631 | case TCPOPT_MSS: | 2646 | case TCPOPT_MSS: |
2632 | if(opsize==TCPOLEN_MSS && th->syn && !estab) { | 2647 | if(opsize==TCPOLEN_MSS && th->syn && !estab) { |
2633 | u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); | 2648 | u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); |
@@ -2686,10 +2701,10 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2686 | */ | 2701 | */ |
2687 | break; | 2702 | break; |
2688 | #endif | 2703 | #endif |
2689 | }; | 2704 | }; |
2690 | ptr+=opsize-2; | 2705 | ptr+=opsize-2; |
2691 | length-=opsize; | 2706 | length-=opsize; |
2692 | }; | 2707 | }; |
2693 | } | 2708 | } |
2694 | } | 2709 | } |
2695 | 2710 | ||
@@ -3248,7 +3263,7 @@ drop: | |||
3248 | TCP_SKB_CB(skb)->end_seq); | 3263 | TCP_SKB_CB(skb)->end_seq); |
3249 | 3264 | ||
3250 | tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); | 3265 | tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); |
3251 | 3266 | ||
3252 | /* If window is closed, drop tail of packet. But after | 3267 | /* If window is closed, drop tail of packet. But after |
3253 | * remembering D-SACK for its head made in previous line. | 3268 | * remembering D-SACK for its head made in previous line. |
3254 | */ | 3269 | */ |
@@ -3327,7 +3342,7 @@ drop: | |||
3327 | } | 3342 | } |
3328 | } | 3343 | } |
3329 | __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); | 3344 | __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); |
3330 | 3345 | ||
3331 | /* And clean segments covered by new one as whole. */ | 3346 | /* And clean segments covered by new one as whole. */ |
3332 | while ((skb1 = skb->next) != | 3347 | while ((skb1 = skb->next) != |
3333 | (struct sk_buff*)&tp->out_of_order_queue && | 3348 | (struct sk_buff*)&tp->out_of_order_queue && |
@@ -3492,7 +3507,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
3492 | */ | 3507 | */ |
3493 | static int tcp_prune_queue(struct sock *sk) | 3508 | static int tcp_prune_queue(struct sock *sk) |
3494 | { | 3509 | { |
3495 | struct tcp_sock *tp = tcp_sk(sk); | 3510 | struct tcp_sock *tp = tcp_sk(sk); |
3496 | 3511 | ||
3497 | SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); | 3512 | SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); |
3498 | 3513 | ||
@@ -3602,7 +3617,7 @@ static void tcp_new_space(struct sock *sk) | |||
3602 | struct tcp_sock *tp = tcp_sk(sk); | 3617 | struct tcp_sock *tp = tcp_sk(sk); |
3603 | 3618 | ||
3604 | if (tcp_should_expand_sndbuf(sk, tp)) { | 3619 | if (tcp_should_expand_sndbuf(sk, tp)) { |
3605 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | 3620 | int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + |
3606 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), | 3621 | MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), |
3607 | demanded = max_t(unsigned int, tp->snd_cwnd, | 3622 | demanded = max_t(unsigned int, tp->snd_cwnd, |
3608 | tp->reordering + 1); | 3623 | tp->reordering + 1); |
@@ -3675,7 +3690,7 @@ static inline void tcp_ack_snd_check(struct sock *sk) | |||
3675 | * For 1003.1g we should support a new option TCP_STDURG to permit | 3690 | * For 1003.1g we should support a new option TCP_STDURG to permit |
3676 | * either form (or just set the sysctl tcp_stdurg). | 3691 | * either form (or just set the sysctl tcp_stdurg). |
3677 | */ | 3692 | */ |
3678 | 3693 | ||
3679 | static void tcp_check_urg(struct sock * sk, struct tcphdr * th) | 3694 | static void tcp_check_urg(struct sock * sk, struct tcphdr * th) |
3680 | { | 3695 | { |
3681 | struct tcp_sock *tp = tcp_sk(sk); | 3696 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -3756,7 +3771,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) | |||
3756 | u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - | 3771 | u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - |
3757 | th->syn; | 3772 | th->syn; |
3758 | 3773 | ||
3759 | /* Is the urgent pointer pointing into this packet? */ | 3774 | /* Is the urgent pointer pointing into this packet? */ |
3760 | if (ptr < skb->len) { | 3775 | if (ptr < skb->len) { |
3761 | u8 tmp; | 3776 | u8 tmp; |
3762 | if (skb_copy_bits(skb, ptr, &tmp, 1)) | 3777 | if (skb_copy_bits(skb, ptr, &tmp, 1)) |
@@ -3820,7 +3835,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen | |||
3820 | int copied_early = 0; | 3835 | int copied_early = 0; |
3821 | 3836 | ||
3822 | if (tp->ucopy.wakeup) | 3837 | if (tp->ucopy.wakeup) |
3823 | return 0; | 3838 | return 0; |
3824 | 3839 | ||
3825 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) | 3840 | if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) |
3826 | tp->ucopy.dma_chan = get_softnet_dma(); | 3841 | tp->ucopy.dma_chan = get_softnet_dma(); |
@@ -3856,26 +3871,26 @@ out: | |||
3856 | #endif /* CONFIG_NET_DMA */ | 3871 | #endif /* CONFIG_NET_DMA */ |
3857 | 3872 | ||
3858 | /* | 3873 | /* |
3859 | * TCP receive function for the ESTABLISHED state. | 3874 | * TCP receive function for the ESTABLISHED state. |
3860 | * | 3875 | * |
3861 | * It is split into a fast path and a slow path. The fast path is | 3876 | * It is split into a fast path and a slow path. The fast path is |
3862 | * disabled when: | 3877 | * disabled when: |
3863 | * - A zero window was announced from us - zero window probing | 3878 | * - A zero window was announced from us - zero window probing |
3864 | * is only handled properly in the slow path. | 3879 | * is only handled properly in the slow path. |
3865 | * - Out of order segments arrived. | 3880 | * - Out of order segments arrived. |
3866 | * - Urgent data is expected. | 3881 | * - Urgent data is expected. |
3867 | * - There is no buffer space left | 3882 | * - There is no buffer space left |
3868 | * - Unexpected TCP flags/window values/header lengths are received | 3883 | * - Unexpected TCP flags/window values/header lengths are received |
3869 | * (detected by checking the TCP header against pred_flags) | 3884 | * (detected by checking the TCP header against pred_flags) |
3870 | * - Data is sent in both directions. Fast path only supports pure senders | 3885 | * - Data is sent in both directions. Fast path only supports pure senders |
3871 | * or pure receivers (this means either the sequence number or the ack | 3886 | * or pure receivers (this means either the sequence number or the ack |
3872 | * value must stay constant) | 3887 | * value must stay constant) |
3873 | * - Unexpected TCP option. | 3888 | * - Unexpected TCP option. |
3874 | * | 3889 | * |
3875 | * When these conditions are not satisfied it drops into a standard | 3890 | * When these conditions are not satisfied it drops into a standard |
3876 | * receive procedure patterned after RFC793 to handle all cases. | 3891 | * receive procedure patterned after RFC793 to handle all cases. |
3877 | * The first three cases are guaranteed by proper pred_flags setting, | 3892 | * The first three cases are guaranteed by proper pred_flags setting, |
3878 | * the rest is checked inline. Fast processing is turned on in | 3893 | * the rest is checked inline. Fast processing is turned on in |
3879 | * tcp_data_queue when everything is OK. | 3894 | * tcp_data_queue when everything is OK. |
3880 | */ | 3895 | */ |
3881 | int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | 3896 | int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, |
@@ -3885,15 +3900,15 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3885 | 3900 | ||
3886 | /* | 3901 | /* |
3887 | * Header prediction. | 3902 | * Header prediction. |
3888 | * The code loosely follows the one in the famous | 3903 | * The code loosely follows the one in the famous |
3889 | * "30 instruction TCP receive" Van Jacobson mail. | 3904 | * "30 instruction TCP receive" Van Jacobson mail. |
3890 | * | 3905 | * |
3891 | * Van's trick is to deposit buffers into socket queue | 3906 | * Van's trick is to deposit buffers into socket queue |
3892 | * on a device interrupt, to call tcp_recv function | 3907 | * on a device interrupt, to call tcp_recv function |
3893 | * on the receive process context and checksum and copy | 3908 | * on the receive process context and checksum and copy |
3894 | * the buffer to user space. smart... | 3909 | * the buffer to user space. smart... |
3895 | * | 3910 | * |
3896 | * Our current scheme is not silly either but we take the | 3911 | * Our current scheme is not silly either but we take the |
3897 | * extra cost of the net_bh soft interrupt processing... | 3912 | * extra cost of the net_bh soft interrupt processing... |
3898 | * We do checksum and copy also but from device to kernel. | 3913 | * We do checksum and copy also but from device to kernel. |
3899 | */ | 3914 | */ |
@@ -3904,7 +3919,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3904 | * if header_prediction is to be made | 3919 | * if header_prediction is to be made |
3905 | * 'S' will always be tp->tcp_header_len >> 2 | 3920 | * 'S' will always be tp->tcp_header_len >> 2 |
3906 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to | 3921 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to |
3907 | * turn it off (when there are holes in the receive | 3922 | * turn it off (when there are holes in the receive |
3908 | * space for instance) | 3923 | * space for instance) |
3909 | * PSH flag is ignored. | 3924 | * PSH flag is ignored. |
3910 | */ | 3925 | */ |
@@ -3928,7 +3943,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3928 | goto slow_path; | 3943 | goto slow_path; |
3929 | 3944 | ||
3930 | tp->rx_opt.saw_tstamp = 1; | 3945 | tp->rx_opt.saw_tstamp = 1; |
3931 | ++ptr; | 3946 | ++ptr; |
3932 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | 3947 | tp->rx_opt.rcv_tsval = ntohl(*ptr); |
3933 | ++ptr; | 3948 | ++ptr; |
3934 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | 3949 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); |
@@ -3960,7 +3975,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3960 | * on entry. | 3975 | * on entry. |
3961 | */ | 3976 | */ |
3962 | tcp_ack(sk, skb, 0); | 3977 | tcp_ack(sk, skb, 0); |
3963 | __kfree_skb(skb); | 3978 | __kfree_skb(skb); |
3964 | tcp_data_snd_check(sk, tp); | 3979 | tcp_data_snd_check(sk, tp); |
3965 | return 0; | 3980 | return 0; |
3966 | } else { /* Header too small */ | 3981 | } else { /* Header too small */ |
@@ -4378,11 +4393,11 @@ reset_and_undo: | |||
4378 | 4393 | ||
4379 | /* | 4394 | /* |
4380 | * This function implements the receiving procedure of RFC 793 for | 4395 | * This function implements the receiving procedure of RFC 793 for |
4381 | * all states except ESTABLISHED and TIME_WAIT. | 4396 | * all states except ESTABLISHED and TIME_WAIT. |
4382 | * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be | 4397 | * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be |
4383 | * address independent. | 4398 | * address independent. |
4384 | */ | 4399 | */ |
4385 | 4400 | ||
4386 | int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | 4401 | int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, |
4387 | struct tcphdr *th, unsigned len) | 4402 | struct tcphdr *th, unsigned len) |
4388 | { | 4403 | { |
@@ -4407,19 +4422,19 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4407 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) | 4422 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) |
4408 | return 1; | 4423 | return 1; |
4409 | 4424 | ||
4410 | /* Now we have several options: In theory there is | 4425 | /* Now we have several options: In theory there is |
4411 | * nothing else in the frame. KA9Q has an option to | 4426 | * nothing else in the frame. KA9Q has an option to |
4412 | * send data with the syn, BSD accepts data with the | 4427 | * send data with the syn, BSD accepts data with the |
4413 | * syn up to the [to be] advertised window and | 4428 | * syn up to the [to be] advertised window and |
4414 | * Solaris 2.1 gives you a protocol error. For now | 4429 | * Solaris 2.1 gives you a protocol error. For now |
4415 | * we just ignore it, that fits the spec precisely | 4430 | * we just ignore it, that fits the spec precisely |
4416 | * and avoids incompatibilities. It would be nice in | 4431 | * and avoids incompatibilities. It would be nice in |
4417 | * future to drop through and process the data. | 4432 | * future to drop through and process the data. |
4418 | * | 4433 | * |
4419 | * Now that TTCP is starting to be used we ought to | 4434 | * Now that TTCP is starting to be used we ought to |
4420 | * queue this data. | 4435 | * queue this data. |
4421 | * But, this leaves one open to an easy denial of | 4436 | * But, this leaves one open to an easy denial of |
4422 | * service attack, and SYN cookies can't defend | 4437 | * service attack, and SYN cookies can't defend |
4423 | * against this problem. So, we drop the data | 4438 | * against this problem. So, we drop the data |
4424 | * in the interest of security over speed unless | 4439 | * in the interest of security over speed unless |
4425 | * it's still in use. | 4440 | * it's still in use. |
@@ -4609,7 +4624,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4609 | case TCP_FIN_WAIT1: | 4624 | case TCP_FIN_WAIT1: |
4610 | case TCP_FIN_WAIT2: | 4625 | case TCP_FIN_WAIT2: |
4611 | /* RFC 793 says to queue data in these states, | 4626 | /* RFC 793 says to queue data in these states, |
4612 | * RFC 1122 says we MUST send a reset. | 4627 | * RFC 1122 says we MUST send a reset. |
4613 | * BSD 4.4 also does reset. | 4628 | * BSD 4.4 also does reset. |
4614 | */ | 4629 | */ |
4615 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | 4630 | if (sk->sk_shutdown & RCV_SHUTDOWN) { |
@@ -4621,7 +4636,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4621 | } | 4636 | } |
4622 | } | 4637 | } |
4623 | /* Fall through */ | 4638 | /* Fall through */ |
4624 | case TCP_ESTABLISHED: | 4639 | case TCP_ESTABLISHED: |
4625 | tcp_data_queue(sk, skb); | 4640 | tcp_data_queue(sk, skb); |
4626 | queued = 1; | 4641 | queued = 1; |
4627 | break; | 4642 | break; |
@@ -4633,7 +4648,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4633 | tcp_ack_snd_check(sk); | 4648 | tcp_ack_snd_check(sk); |
4634 | } | 4649 | } |
4635 | 4650 | ||
4636 | if (!queued) { | 4651 | if (!queued) { |
4637 | discard: | 4652 | discard: |
4638 | __kfree_skb(skb); | 4653 | __kfree_skb(skb); |
4639 | } | 4654 | } |