diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 105 |
1 files changed, 60 insertions, 45 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c26076fb890e..c6109895bb5e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -936,28 +936,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
936 | struct tcp_sock *tp = tcp_sk(sk); | 936 | struct tcp_sock *tp = tcp_sk(sk); |
937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; | 937 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; |
938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); | 938 | struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); |
939 | struct sk_buff *cached_skb; | ||
939 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; | 940 | int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; |
940 | int reord = tp->packets_out; | 941 | int reord = tp->packets_out; |
941 | int prior_fackets; | 942 | int prior_fackets; |
942 | u32 lost_retrans = 0; | 943 | u32 lost_retrans = 0; |
943 | int flag = 0; | 944 | int flag = 0; |
944 | int dup_sack = 0; | 945 | int dup_sack = 0; |
946 | int cached_fack_count; | ||
945 | int i; | 947 | int i; |
948 | int first_sack_index; | ||
946 | 949 | ||
947 | if (!tp->sacked_out) | 950 | if (!tp->sacked_out) |
948 | tp->fackets_out = 0; | 951 | tp->fackets_out = 0; |
949 | prior_fackets = tp->fackets_out; | 952 | prior_fackets = tp->fackets_out; |
950 | 953 | ||
954 | /* Check for D-SACK. */ | ||
955 | if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { | ||
956 | dup_sack = 1; | ||
957 | tp->rx_opt.sack_ok |= 4; | ||
958 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); | ||
959 | } else if (num_sacks > 1 && | ||
960 | !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && | ||
961 | !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { | ||
962 | dup_sack = 1; | ||
963 | tp->rx_opt.sack_ok |= 4; | ||
964 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); | ||
965 | } | ||
966 | |||
967 | /* D-SACK for already forgotten data... | ||
968 | * Do dumb counting. */ | ||
969 | if (dup_sack && | ||
970 | !after(ntohl(sp[0].end_seq), prior_snd_una) && | ||
971 | after(ntohl(sp[0].end_seq), tp->undo_marker)) | ||
972 | tp->undo_retrans--; | ||
973 | |||
974 | /* Eliminate too old ACKs, but take into | ||
975 | * account more or less fresh ones, they can | ||
976 | * contain valid SACK info. | ||
977 | */ | ||
978 | if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) | ||
979 | return 0; | ||
980 | |||
951 | /* SACK fastpath: | 981 | /* SACK fastpath: |
952 | * if the only SACK change is the increase of the end_seq of | 982 | * if the only SACK change is the increase of the end_seq of |
953 | * the first block then only apply that SACK block | 983 | * the first block then only apply that SACK block |
954 | * and use retrans queue hinting otherwise slowpath */ | 984 | * and use retrans queue hinting otherwise slowpath */ |
955 | flag = 1; | 985 | flag = 1; |
956 | for (i = 0; i< num_sacks; i++) { | 986 | for (i = 0; i < num_sacks; i++) { |
957 | __u32 start_seq = ntohl(sp[i].start_seq); | 987 | __be32 start_seq = sp[i].start_seq; |
958 | __u32 end_seq = ntohl(sp[i].end_seq); | 988 | __be32 end_seq = sp[i].end_seq; |
959 | 989 | ||
960 | if (i == 0){ | 990 | if (i == 0) { |
961 | if (tp->recv_sack_cache[i].start_seq != start_seq) | 991 | if (tp->recv_sack_cache[i].start_seq != start_seq) |
962 | flag = 0; | 992 | flag = 0; |
963 | } else { | 993 | } else { |
@@ -967,39 +997,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
967 | } | 997 | } |
968 | tp->recv_sack_cache[i].start_seq = start_seq; | 998 | tp->recv_sack_cache[i].start_seq = start_seq; |
969 | tp->recv_sack_cache[i].end_seq = end_seq; | 999 | tp->recv_sack_cache[i].end_seq = end_seq; |
970 | 1000 | } | |
971 | /* Check for D-SACK. */ | 1001 | /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ |
972 | if (i == 0) { | 1002 | for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { |
973 | u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; | 1003 | tp->recv_sack_cache[i].start_seq = 0; |
974 | 1004 | tp->recv_sack_cache[i].end_seq = 0; | |
975 | if (before(start_seq, ack)) { | ||
976 | dup_sack = 1; | ||
977 | tp->rx_opt.sack_ok |= 4; | ||
978 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); | ||
979 | } else if (num_sacks > 1 && | ||
980 | !after(end_seq, ntohl(sp[1].end_seq)) && | ||
981 | !before(start_seq, ntohl(sp[1].start_seq))) { | ||
982 | dup_sack = 1; | ||
983 | tp->rx_opt.sack_ok |= 4; | ||
984 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); | ||
985 | } | ||
986 | |||
987 | /* D-SACK for already forgotten data... | ||
988 | * Do dumb counting. */ | ||
989 | if (dup_sack && | ||
990 | !after(end_seq, prior_snd_una) && | ||
991 | after(end_seq, tp->undo_marker)) | ||
992 | tp->undo_retrans--; | ||
993 | |||
994 | /* Eliminate too old ACKs, but take into | ||
995 | * account more or less fresh ones, they can | ||
996 | * contain valid SACK info. | ||
997 | */ | ||
998 | if (before(ack, prior_snd_una - tp->max_window)) | ||
999 | return 0; | ||
1000 | } | ||
1001 | } | 1005 | } |
1002 | 1006 | ||
1007 | first_sack_index = 0; | ||
1003 | if (flag) | 1008 | if (flag) |
1004 | num_sacks = 1; | 1009 | num_sacks = 1; |
1005 | else { | 1010 | else { |
@@ -1016,6 +1021,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1016 | tmp = sp[j]; | 1021 | tmp = sp[j]; |
1017 | sp[j] = sp[j+1]; | 1022 | sp[j] = sp[j+1]; |
1018 | sp[j+1] = tmp; | 1023 | sp[j+1] = tmp; |
1024 | |||
1025 | /* Track where the first SACK block goes to */ | ||
1026 | if (j == first_sack_index) | ||
1027 | first_sack_index = j+1; | ||
1019 | } | 1028 | } |
1020 | 1029 | ||
1021 | } | 1030 | } |
@@ -1025,20 +1034,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1025 | /* clear flag as used for different purpose in following code */ | 1034 | /* clear flag as used for different purpose in following code */ |
1026 | flag = 0; | 1035 | flag = 0; |
1027 | 1036 | ||
1037 | /* Use SACK fastpath hint if valid */ | ||
1038 | cached_skb = tp->fastpath_skb_hint; | ||
1039 | cached_fack_count = tp->fastpath_cnt_hint; | ||
1040 | if (!cached_skb) { | ||
1041 | cached_skb = sk->sk_write_queue.next; | ||
1042 | cached_fack_count = 0; | ||
1043 | } | ||
1044 | |||
1028 | for (i=0; i<num_sacks; i++, sp++) { | 1045 | for (i=0; i<num_sacks; i++, sp++) { |
1029 | struct sk_buff *skb; | 1046 | struct sk_buff *skb; |
1030 | __u32 start_seq = ntohl(sp->start_seq); | 1047 | __u32 start_seq = ntohl(sp->start_seq); |
1031 | __u32 end_seq = ntohl(sp->end_seq); | 1048 | __u32 end_seq = ntohl(sp->end_seq); |
1032 | int fack_count; | 1049 | int fack_count; |
1033 | 1050 | ||
1034 | /* Use SACK fastpath hint if valid */ | 1051 | skb = cached_skb; |
1035 | if (tp->fastpath_skb_hint) { | 1052 | fack_count = cached_fack_count; |
1036 | skb = tp->fastpath_skb_hint; | ||
1037 | fack_count = tp->fastpath_cnt_hint; | ||
1038 | } else { | ||
1039 | skb = sk->sk_write_queue.next; | ||
1040 | fack_count = 0; | ||
1041 | } | ||
1042 | 1053 | ||
1043 | /* Event "B" in the comment above. */ | 1054 | /* Event "B" in the comment above. */ |
1044 | if (after(end_seq, tp->high_seq)) | 1055 | if (after(end_seq, tp->high_seq)) |
@@ -1048,8 +1059,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1048 | int in_sack, pcount; | 1059 | int in_sack, pcount; |
1049 | u8 sacked; | 1060 | u8 sacked; |
1050 | 1061 | ||
1051 | tp->fastpath_skb_hint = skb; | 1062 | cached_skb = skb; |
1052 | tp->fastpath_cnt_hint = fack_count; | 1063 | cached_fack_count = fack_count; |
1064 | if (i == first_sack_index) { | ||
1065 | tp->fastpath_skb_hint = skb; | ||
1066 | tp->fastpath_cnt_hint = fack_count; | ||
1067 | } | ||
1053 | 1068 | ||
1054 | /* The retransmission queue is always in order, so | 1069 | /* The retransmission queue is always in order, so |
1055 | * we can short-circuit the walk early. | 1070 | * we can short-circuit the walk early. |