aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-24 18:51:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-24 18:51:40 -0500
commit701b259f446be2f3625fb852bceb93afe76e206d (patch)
tree93f15bcd00bd59c38b4e59fed9af7ddf6b06c8b3 /net/ipv4
parentd2346963bfcbb9a8ee783ca3c3b3bdd7448ec9d5 (diff)
parentefc3dbc37412c027e363736b4f4c74ee5e8ecffc (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Davem says: 1) Fix JIT code generation on x86-64 for divide by zero, from Eric Dumazet. 2) tg3 header length computation correction from Eric Dumazet. 3) More build and reference counting fixes for socket memory cgroup code from Glauber Costa. 4) module.h snuck back into a core header after all the hard work we did to remove that, from Paul Gortmaker and Jesper Dangaard Brouer. 5) Fix PHY naming regression and add some new PCI IDs in stmmac, from Alessandro Rubini. 6) Netlink message generation fix in new team driver, should only advertise the entries that changed during events, from Jiri Pirko. 7) SRIOV VF registration and unregistration fixes, and also add a missing PCI ID, from Roopa Prabhu. 8) Fix infinite loop in tx queue flush code of brcmsmac, from Stanislaw Gruszka. 9) ftgmac100/ftmac100 build fix, missing interrupt.h include. 10) Memory leak fix in net/hyperv do_set_mutlicast() handling, from Wei Yongjun. 11) Off by one fix in netem packet scheduler, from Vijay Subramanian. 12) TCP loss detection fix from Yuchung Cheng. 13) TCP reset packet MD5 calculation uses wrong address, fix from Shawn Lu. 14) skge carrier assertion and DMA mapping fixes from Stephen Hemminger. 15) Congestion recovery undo performed at the wrong spot in BIC and CUBIC congestion control modules, fix from Neal Cardwell. 16) Ethtool ETHTOOL_GSSET_INFO is unnecessarily restrictive, from Michał Mirosław. 17) Fix triggerable race in ipv6 sysctl handling, from Francesco Ruggeri. 18) Statistics bug fixes in mlx4 from Eugenia Emantayev. 19) rds locking bug fix during info dumps, from your's truly. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (67 commits) rds: Make rds_sock_lock BH rather than IRQ safe. netprio_cgroup.h: dont include module.h from other includes net: flow_dissector.c missing include linux/export.h team: send only changed options/ports via netlink net/hyperv: fix possible memory leak in do_set_multicast() drivers/net: dsa/mv88e6xxx.c files need linux/module.h stmmac: added PCI identifiers llc: Fix race condition in llc_ui_recvmsg stmmac: fix phy naming inconsistency dsa: Add reporting of silicon revision for Marvell 88E6123/88E6161/88E6165 switches. tg3: fix ipv6 header length computation skge: add byte queue limit support mv643xx_eth: Add Rx Discard and Rx Overrun statistics bnx2x: fix compilation error with SOE in fw_dump bnx2x: handle CHIP_REVISION during init_one bnx2x: allow user to change ring size in ISCSI SD mode bnx2x: fix Big-Endianess in ethtool -t bnx2x: fixed ethtool statistics for MF modes bnx2x: credit-leakage fixup on vlan_mac_del_all macvlan: fix a possible use after free ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/tcp_bic.c11
-rw-r--r--net/ipv4/tcp_cubic.c10
-rw-r--r--net/ipv4/tcp_input.c41
-rw-r--r--net/ipv4/tcp_ipv4.c2
5 files changed, 29 insertions, 36 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3569d8ecaeac..6afc807ee2ad 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = {
216 SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), 216 SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
217 SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), 217 SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
218 SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), 218 SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
219 SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
220 SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), 219 SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
221 SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), 220 SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
222 SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), 221 SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 6187eb4d1dcf..f45e1c242440 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca)
63{ 63{
64 ca->cnt = 0; 64 ca->cnt = 0;
65 ca->last_max_cwnd = 0; 65 ca->last_max_cwnd = 0;
66 ca->loss_cwnd = 0;
67 ca->last_cwnd = 0; 66 ca->last_cwnd = 0;
68 ca->last_time = 0; 67 ca->last_time = 0;
69 ca->epoch_start = 0; 68 ca->epoch_start = 0;
@@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca)
72 71
73static void bictcp_init(struct sock *sk) 72static void bictcp_init(struct sock *sk)
74{ 73{
75 bictcp_reset(inet_csk_ca(sk)); 74 struct bictcp *ca = inet_csk_ca(sk);
75
76 bictcp_reset(ca);
77 ca->loss_cwnd = 0;
78
76 if (initial_ssthresh) 79 if (initial_ssthresh)
77 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 80 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
78} 81}
@@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
127 } 130 }
128 131
129 /* if in slow start or link utilization is very low */ 132 /* if in slow start or link utilization is very low */
130 if (ca->loss_cwnd == 0) { 133 if (ca->last_max_cwnd == 0) {
131 if (ca->cnt > 20) /* increase cwnd 5% per RTT */ 134 if (ca->cnt > 20) /* increase cwnd 5% per RTT */
132 ca->cnt = 20; 135 ca->cnt = 20;
133 } 136 }
@@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
185{ 188{
186 const struct tcp_sock *tp = tcp_sk(sk); 189 const struct tcp_sock *tp = tcp_sk(sk);
187 const struct bictcp *ca = inet_csk_ca(sk); 190 const struct bictcp *ca = inet_csk_ca(sk);
188 return max(tp->snd_cwnd, ca->last_max_cwnd); 191 return max(tp->snd_cwnd, ca->loss_cwnd);
189} 192}
190 193
191static void bictcp_state(struct sock *sk, u8 new_state) 194static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index f376b05cca81..a9077f441cb2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca)
107{ 107{
108 ca->cnt = 0; 108 ca->cnt = 0;
109 ca->last_max_cwnd = 0; 109 ca->last_max_cwnd = 0;
110 ca->loss_cwnd = 0;
111 ca->last_cwnd = 0; 110 ca->last_cwnd = 0;
112 ca->last_time = 0; 111 ca->last_time = 0;
113 ca->bic_origin_point = 0; 112 ca->bic_origin_point = 0;
@@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk)
142 141
143static void bictcp_init(struct sock *sk) 142static void bictcp_init(struct sock *sk)
144{ 143{
145 bictcp_reset(inet_csk_ca(sk)); 144 struct bictcp *ca = inet_csk_ca(sk);
145
146 bictcp_reset(ca);
147 ca->loss_cwnd = 0;
146 148
147 if (hystart) 149 if (hystart)
148 bictcp_hystart_reset(sk); 150 bictcp_hystart_reset(sk);
@@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
275 * The initial growth of cubic function may be too conservative 277 * The initial growth of cubic function may be too conservative
276 * when the available bandwidth is still unknown. 278 * when the available bandwidth is still unknown.
277 */ 279 */
278 if (ca->loss_cwnd == 0 && ca->cnt > 20) 280 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
279 ca->cnt = 20; /* increase cwnd 5% per RTT */ 281 ca->cnt = 20; /* increase cwnd 5% per RTT */
280 282
281 /* TCP Friendly */ 283 /* TCP Friendly */
@@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
342{ 344{
343 struct bictcp *ca = inet_csk_ca(sk); 345 struct bictcp *ca = inet_csk_ca(sk);
344 346
345 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); 347 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
346} 348}
347 349
348static void bictcp_state(struct sock *sk, u8 new_state) 350static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..976034f82320 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 105#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
106#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 106#define FLAG_DATA_SACKED 0x20 /* New SACK. */
107#define FLAG_ECE 0x40 /* ECE in this ACK */ 107#define FLAG_ECE 0x40 /* ECE in this ACK */
108#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 108#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
110#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 109#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 110#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
1040 * These 6 states form finite state machine, controlled by the following events: 1039 * These 6 states form finite state machine, controlled by the following events:
1041 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) 1040 * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
1042 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) 1041 * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
1043 * 3. Loss detection event of one of three flavors: 1042 * 3. Loss detection event of two flavors:
1044 * A. Scoreboard estimator decided the packet is lost. 1043 * A. Scoreboard estimator decided the packet is lost.
1045 * A'. Reno "three dupacks" marks head of queue lost. 1044 * A'. Reno "three dupacks" marks head of queue lost.
1046 * A''. Its FACK modfication, head until snd.fack is lost. 1045 * A''. Its FACK modification, head until snd.fack is lost.
1047 * B. SACK arrives sacking data transmitted after never retransmitted 1046 * B. SACK arrives sacking SND.NXT at the moment, when the
1048 * hole was sent out.
1049 * C. SACK arrives sacking SND.NXT at the moment, when the
1050 * segment was retransmitted. 1047 * segment was retransmitted.
1051 * 4. D-SACK added new rule: D-SACK changes any tag to S. 1048 * 4. D-SACK added new rule: D-SACK changes any tag to S.
1052 * 1049 *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1153} 1150}
1154 1151
1155/* Check for lost retransmit. This superb idea is borrowed from "ratehalving". 1152/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
1156 * Event "C". Later note: FACK people cheated me again 8), we have to account 1153 * Event "B". Later note: FACK people cheated me again 8), we have to account
1157 * for reordering! Ugly, but should help. 1154 * for reordering! Ugly, but should help.
1158 * 1155 *
1159 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1156 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1844 if (found_dup_sack && ((i + 1) == first_sack_index)) 1841 if (found_dup_sack && ((i + 1) == first_sack_index))
1845 next_dup = &sp[i + 1]; 1842 next_dup = &sp[i + 1];
1846 1843
1847 /* Event "B" in the comment above. */
1848 if (after(end_seq, tp->high_seq))
1849 state.flag |= FLAG_DATA_LOST;
1850
1851 /* Skip too early cached blocks */ 1844 /* Skip too early cached blocks */
1852 while (tcp_sack_cache_ok(tp, cache) && 1845 while (tcp_sack_cache_ok(tp, cache) &&
1853 !before(start_seq, cache->end_seq)) 1846 !before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
2515 tcp_verify_left_out(tp); 2508 tcp_verify_left_out(tp);
2516} 2509}
2517 2510
2518/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2511/* Detect loss in event "A" above by marking head of queue up as lost.
2519 * is against sacked "cnt", otherwise it's against facked "cnt" 2512 * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
2513 * are considered lost. For RFC3517 SACK, a segment is considered lost if it
2514 * has at least tp->reordering SACKed seqments above it; "packets" refers to
2515 * the maximum SACKed segments to pass before reaching this limit.
2520 */ 2516 */
2521static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) 2517static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2522{ 2518{
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2525 int cnt, oldcnt; 2521 int cnt, oldcnt;
2526 int err; 2522 int err;
2527 unsigned int mss; 2523 unsigned int mss;
2524 /* Use SACK to deduce losses of new sequences sent during recovery */
2525 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
2528 2526
2529 WARN_ON(packets > tp->packets_out); 2527 WARN_ON(packets > tp->packets_out);
2530 if (tp->lost_skb_hint) { 2528 if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2546 tp->lost_skb_hint = skb; 2544 tp->lost_skb_hint = skb;
2547 tp->lost_cnt_hint = cnt; 2545 tp->lost_cnt_hint = cnt;
2548 2546
2549 if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2547 if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
2550 break; 2548 break;
2551 2549
2552 oldcnt = cnt; 2550 oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3033 if (tcp_check_sack_reneging(sk, flag)) 3031 if (tcp_check_sack_reneging(sk, flag))
3034 return; 3032 return;
3035 3033
3036 /* C. Process data loss notification, provided it is valid. */ 3034 /* C. Check consistency of the current state. */
3037 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
3038 before(tp->snd_una, tp->high_seq) &&
3039 icsk->icsk_ca_state != TCP_CA_Open &&
3040 tp->fackets_out > tp->reordering) {
3041 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
3042 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
3043 }
3044
3045 /* D. Check consistency of the current state. */
3046 tcp_verify_left_out(tp); 3035 tcp_verify_left_out(tp);
3047 3036
3048 /* E. Check state exit conditions. State can be terminated 3037 /* D. Check state exit conditions. State can be terminated
3049 * when high_seq is ACKed. */ 3038 * when high_seq is ACKed. */
3050 if (icsk->icsk_ca_state == TCP_CA_Open) { 3039 if (icsk->icsk_ca_state == TCP_CA_Open) {
3051 WARN_ON(tp->retrans_out != 0); 3040 WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3077 } 3066 }
3078 } 3067 }
3079 3068
3080 /* F. Process state. */ 3069 /* E. Process state. */
3081 switch (icsk->icsk_ca_state) { 3070 switch (icsk->icsk_ca_state) {
3082 case TCP_CA_Recovery: 3071 case TCP_CA_Recovery:
3083 if (!(flag & FLAG_SND_UNA_ADVANCED)) { 3072 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1eb4ad57670e..337ba4cca052 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -631,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
631 arg.iov[0].iov_len = sizeof(rep.th); 631 arg.iov[0].iov_len = sizeof(rep.th);
632 632
633#ifdef CONFIG_TCP_MD5SIG 633#ifdef CONFIG_TCP_MD5SIG
634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; 634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
635 if (key) { 635 if (key) {
636 rep.opt[0] = htonl((TCPOPT_NOP << 24) | 636 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
637 (TCPOPT_NOP << 16) | 637 (TCPOPT_NOP << 16) |