diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-24 18:51:40 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-24 18:51:40 -0500 |
commit | 701b259f446be2f3625fb852bceb93afe76e206d (patch) | |
tree | 93f15bcd00bd59c38b4e59fed9af7ddf6b06c8b3 /net/ipv4 | |
parent | d2346963bfcbb9a8ee783ca3c3b3bdd7448ec9d5 (diff) | |
parent | efc3dbc37412c027e363736b4f4c74ee5e8ecffc (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Davem says:
1) Fix JIT code generation on x86-64 for divide by zero, from Eric Dumazet.
2) tg3 header length computation correction from Eric Dumazet.
3) More build and reference counting fixes for socket memory cgroup
code from Glauber Costa.
4) module.h snuck back into a core header after all the hard work we
did to remove that, from Paul Gortmaker and Jesper Dangaard Brouer.
5) Fix PHY naming regression and add some new PCI IDs in stmmac, from
Alessandro Rubini.
6) Netlink message generation fix in new team driver, should only advertise
the entries that changed during events, from Jiri Pirko.
7) SRIOV VF registration and unregistration fixes, and also add a
missing PCI ID, from Roopa Prabhu.
8) Fix infinite loop in tx queue flush code of brcmsmac, from Stanislaw Gruszka.
9) ftgmac100/ftmac100 build fix, missing interrupt.h include.
10) Memory leak fix in net/hyperv do_set_mutlicast() handling, from Wei Yongjun.
11) Off by one fix in netem packet scheduler, from Vijay Subramanian.
12) TCP loss detection fix from Yuchung Cheng.
13) TCP reset packet MD5 calculation uses wrong address, fix from Shawn Lu.
14) skge carrier assertion and DMA mapping fixes from Stephen Hemminger.
15) Congestion recovery undo performed at the wrong spot in BIC and CUBIC
congestion control modules, fix from Neal Cardwell.
16) Ethtool ETHTOOL_GSSET_INFO is unnecessarily restrictive, from Michał Mirosław.
17) Fix triggerable race in ipv6 sysctl handling, from Francesco Ruggeri.
18) Statistics bug fixes in mlx4 from Eugenia Emantayev.
19) rds locking bug fix during info dumps, from your's truly.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (67 commits)
rds: Make rds_sock_lock BH rather than IRQ safe.
netprio_cgroup.h: dont include module.h from other includes
net: flow_dissector.c missing include linux/export.h
team: send only changed options/ports via netlink
net/hyperv: fix possible memory leak in do_set_multicast()
drivers/net: dsa/mv88e6xxx.c files need linux/module.h
stmmac: added PCI identifiers
llc: Fix race condition in llc_ui_recvmsg
stmmac: fix phy naming inconsistency
dsa: Add reporting of silicon revision for Marvell 88E6123/88E6161/88E6165 switches.
tg3: fix ipv6 header length computation
skge: add byte queue limit support
mv643xx_eth: Add Rx Discard and Rx Overrun statistics
bnx2x: fix compilation error with SOE in fw_dump
bnx2x: handle CHIP_REVISION during init_one
bnx2x: allow user to change ring size in ISCSI SD mode
bnx2x: fix Big-Endianess in ethtool -t
bnx2x: fixed ethtool statistics for MF modes
bnx2x: credit-leakage fixup on vlan_mac_del_all
macvlan: fix a possible use after free
...
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 41 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 |
5 files changed, 29 insertions, 36 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3569d8ecaeac..6afc807ee2ad 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), | 216 | SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), |
217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), | 217 | SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), |
218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), | 218 | SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), |
219 | SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS), | ||
220 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), | 219 | SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), |
221 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), | 220 | SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), |
222 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), | 221 | SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 6187eb4d1dcf..f45e1c242440 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
63 | { | 63 | { |
64 | ca->cnt = 0; | 64 | ca->cnt = 0; |
65 | ca->last_max_cwnd = 0; | 65 | ca->last_max_cwnd = 0; |
66 | ca->loss_cwnd = 0; | ||
67 | ca->last_cwnd = 0; | 66 | ca->last_cwnd = 0; |
68 | ca->last_time = 0; | 67 | ca->last_time = 0; |
69 | ca->epoch_start = 0; | 68 | ca->epoch_start = 0; |
@@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
72 | 71 | ||
73 | static void bictcp_init(struct sock *sk) | 72 | static void bictcp_init(struct sock *sk) |
74 | { | 73 | { |
75 | bictcp_reset(inet_csk_ca(sk)); | 74 | struct bictcp *ca = inet_csk_ca(sk); |
75 | |||
76 | bictcp_reset(ca); | ||
77 | ca->loss_cwnd = 0; | ||
78 | |||
76 | if (initial_ssthresh) | 79 | if (initial_ssthresh) |
77 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 80 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
78 | } | 81 | } |
@@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
127 | } | 130 | } |
128 | 131 | ||
129 | /* if in slow start or link utilization is very low */ | 132 | /* if in slow start or link utilization is very low */ |
130 | if (ca->loss_cwnd == 0) { | 133 | if (ca->last_max_cwnd == 0) { |
131 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ | 134 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ |
132 | ca->cnt = 20; | 135 | ca->cnt = 20; |
133 | } | 136 | } |
@@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
185 | { | 188 | { |
186 | const struct tcp_sock *tp = tcp_sk(sk); | 189 | const struct tcp_sock *tp = tcp_sk(sk); |
187 | const struct bictcp *ca = inet_csk_ca(sk); | 190 | const struct bictcp *ca = inet_csk_ca(sk); |
188 | return max(tp->snd_cwnd, ca->last_max_cwnd); | 191 | return max(tp->snd_cwnd, ca->loss_cwnd); |
189 | } | 192 | } |
190 | 193 | ||
191 | static void bictcp_state(struct sock *sk, u8 new_state) | 194 | static void bictcp_state(struct sock *sk, u8 new_state) |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f376b05cca81..a9077f441cb2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
107 | { | 107 | { |
108 | ca->cnt = 0; | 108 | ca->cnt = 0; |
109 | ca->last_max_cwnd = 0; | 109 | ca->last_max_cwnd = 0; |
110 | ca->loss_cwnd = 0; | ||
111 | ca->last_cwnd = 0; | 110 | ca->last_cwnd = 0; |
112 | ca->last_time = 0; | 111 | ca->last_time = 0; |
113 | ca->bic_origin_point = 0; | 112 | ca->bic_origin_point = 0; |
@@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk) | |||
142 | 141 | ||
143 | static void bictcp_init(struct sock *sk) | 142 | static void bictcp_init(struct sock *sk) |
144 | { | 143 | { |
145 | bictcp_reset(inet_csk_ca(sk)); | 144 | struct bictcp *ca = inet_csk_ca(sk); |
145 | |||
146 | bictcp_reset(ca); | ||
147 | ca->loss_cwnd = 0; | ||
146 | 148 | ||
147 | if (hystart) | 149 | if (hystart) |
148 | bictcp_hystart_reset(sk); | 150 | bictcp_hystart_reset(sk); |
@@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
275 | * The initial growth of cubic function may be too conservative | 277 | * The initial growth of cubic function may be too conservative |
276 | * when the available bandwidth is still unknown. | 278 | * when the available bandwidth is still unknown. |
277 | */ | 279 | */ |
278 | if (ca->loss_cwnd == 0 && ca->cnt > 20) | 280 | if (ca->last_max_cwnd == 0 && ca->cnt > 20) |
279 | ca->cnt = 20; /* increase cwnd 5% per RTT */ | 281 | ca->cnt = 20; /* increase cwnd 5% per RTT */ |
280 | 282 | ||
281 | /* TCP Friendly */ | 283 | /* TCP Friendly */ |
@@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) | |||
342 | { | 344 | { |
343 | struct bictcp *ca = inet_csk_ca(sk); | 345 | struct bictcp *ca = inet_csk_ca(sk); |
344 | 346 | ||
345 | return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); | 347 | return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); |
346 | } | 348 | } |
347 | 349 | ||
348 | static void bictcp_state(struct sock *sk, u8 new_state) | 350 | static void bictcp_state(struct sock *sk, u8 new_state) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2877c3e09587..976034f82320 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; | |||
105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ | 105 | #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ |
106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ | 106 | #define FLAG_DATA_SACKED 0x20 /* New SACK. */ |
107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ | 107 | #define FLAG_ECE 0x40 /* ECE in this ACK */ |
108 | #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ | ||
109 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ | 108 | #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ |
110 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ | 109 | #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ |
111 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ | 110 | #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ |
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, | |||
1040 | * These 6 states form finite state machine, controlled by the following events: | 1039 | * These 6 states form finite state machine, controlled by the following events: |
1041 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) | 1040 | * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) |
1042 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) | 1041 | * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) |
1043 | * 3. Loss detection event of one of three flavors: | 1042 | * 3. Loss detection event of two flavors: |
1044 | * A. Scoreboard estimator decided the packet is lost. | 1043 | * A. Scoreboard estimator decided the packet is lost. |
1045 | * A'. Reno "three dupacks" marks head of queue lost. | 1044 | * A'. Reno "three dupacks" marks head of queue lost. |
1046 | * A''. Its FACK modfication, head until snd.fack is lost. | 1045 | * A''. Its FACK modification, head until snd.fack is lost. |
1047 | * B. SACK arrives sacking data transmitted after never retransmitted | 1046 | * B. SACK arrives sacking SND.NXT at the moment, when the |
1048 | * hole was sent out. | ||
1049 | * C. SACK arrives sacking SND.NXT at the moment, when the | ||
1050 | * segment was retransmitted. | 1047 | * segment was retransmitted. |
1051 | * 4. D-SACK added new rule: D-SACK changes any tag to S. | 1048 | * 4. D-SACK added new rule: D-SACK changes any tag to S. |
1052 | * | 1049 | * |
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | |||
1153 | } | 1150 | } |
1154 | 1151 | ||
1155 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". | 1152 | /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". |
1156 | * Event "C". Later note: FACK people cheated me again 8), we have to account | 1153 | * Event "B". Later note: FACK people cheated me again 8), we have to account |
1157 | * for reordering! Ugly, but should help. | 1154 | * for reordering! Ugly, but should help. |
1158 | * | 1155 | * |
1159 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was | 1156 | * Search retransmitted skbs from write_queue that were sent when snd_nxt was |
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1844 | if (found_dup_sack && ((i + 1) == first_sack_index)) | 1841 | if (found_dup_sack && ((i + 1) == first_sack_index)) |
1845 | next_dup = &sp[i + 1]; | 1842 | next_dup = &sp[i + 1]; |
1846 | 1843 | ||
1847 | /* Event "B" in the comment above. */ | ||
1848 | if (after(end_seq, tp->high_seq)) | ||
1849 | state.flag |= FLAG_DATA_LOST; | ||
1850 | |||
1851 | /* Skip too early cached blocks */ | 1844 | /* Skip too early cached blocks */ |
1852 | while (tcp_sack_cache_ok(tp, cache) && | 1845 | while (tcp_sack_cache_ok(tp, cache) && |
1853 | !before(start_seq, cache->end_seq)) | 1846 | !before(start_seq, cache->end_seq)) |
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk) | |||
2515 | tcp_verify_left_out(tp); | 2508 | tcp_verify_left_out(tp); |
2516 | } | 2509 | } |
2517 | 2510 | ||
2518 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2511 | /* Detect loss in event "A" above by marking head of queue up as lost. |
2519 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2512 | * For FACK or non-SACK(Reno) senders, the first "packets" number of segments |
2513 | * are considered lost. For RFC3517 SACK, a segment is considered lost if it | ||
2514 | * has at least tp->reordering SACKed seqments above it; "packets" refers to | ||
2515 | * the maximum SACKed segments to pass before reaching this limit. | ||
2520 | */ | 2516 | */ |
2521 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | 2517 | static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) |
2522 | { | 2518 | { |
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2525 | int cnt, oldcnt; | 2521 | int cnt, oldcnt; |
2526 | int err; | 2522 | int err; |
2527 | unsigned int mss; | 2523 | unsigned int mss; |
2524 | /* Use SACK to deduce losses of new sequences sent during recovery */ | ||
2525 | const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; | ||
2528 | 2526 | ||
2529 | WARN_ON(packets > tp->packets_out); | 2527 | WARN_ON(packets > tp->packets_out); |
2530 | if (tp->lost_skb_hint) { | 2528 | if (tp->lost_skb_hint) { |
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) | |||
2546 | tp->lost_skb_hint = skb; | 2544 | tp->lost_skb_hint = skb; |
2547 | tp->lost_cnt_hint = cnt; | 2545 | tp->lost_cnt_hint = cnt; |
2548 | 2546 | ||
2549 | if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) | 2547 | if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) |
2550 | break; | 2548 | break; |
2551 | 2549 | ||
2552 | oldcnt = cnt; | 2550 | oldcnt = cnt; |
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3033 | if (tcp_check_sack_reneging(sk, flag)) | 3031 | if (tcp_check_sack_reneging(sk, flag)) |
3034 | return; | 3032 | return; |
3035 | 3033 | ||
3036 | /* C. Process data loss notification, provided it is valid. */ | 3034 | /* C. Check consistency of the current state. */ |
3037 | if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && | ||
3038 | before(tp->snd_una, tp->high_seq) && | ||
3039 | icsk->icsk_ca_state != TCP_CA_Open && | ||
3040 | tp->fackets_out > tp->reordering) { | ||
3041 | tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); | ||
3042 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); | ||
3043 | } | ||
3044 | |||
3045 | /* D. Check consistency of the current state. */ | ||
3046 | tcp_verify_left_out(tp); | 3035 | tcp_verify_left_out(tp); |
3047 | 3036 | ||
3048 | /* E. Check state exit conditions. State can be terminated | 3037 | /* D. Check state exit conditions. State can be terminated |
3049 | * when high_seq is ACKed. */ | 3038 | * when high_seq is ACKed. */ |
3050 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 3039 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
3051 | WARN_ON(tp->retrans_out != 0); | 3040 | WARN_ON(tp->retrans_out != 0); |
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3077 | } | 3066 | } |
3078 | } | 3067 | } |
3079 | 3068 | ||
3080 | /* F. Process state. */ | 3069 | /* E. Process state. */ |
3081 | switch (icsk->icsk_ca_state) { | 3070 | switch (icsk->icsk_ca_state) { |
3082 | case TCP_CA_Recovery: | 3071 | case TCP_CA_Recovery: |
3083 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { | 3072 | if (!(flag & FLAG_SND_UNA_ADVANCED)) { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1eb4ad57670e..337ba4cca052 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -631,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
631 | arg.iov[0].iov_len = sizeof(rep.th); | 631 | arg.iov[0].iov_len = sizeof(rep.th); |
632 | 632 | ||
633 | #ifdef CONFIG_TCP_MD5SIG | 633 | #ifdef CONFIG_TCP_MD5SIG |
634 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; | 634 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; |
635 | if (key) { | 635 | if (key) { |
636 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | | 636 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | |
637 | (TCPOPT_NOP << 16) | | 637 | (TCPOPT_NOP << 16) | |