aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-08-19 20:22:46 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-21 17:42:23 -0400
commit144d56e91044181ec0ef67aeca91e9a8b5718348 (patch)
treee379655603668d66dbec27f446b68479cb18e6e2 /net/ipv4
parentfae6ef87faeb8853896920c68ee703d715799d28 (diff)
tcp: fix possible socket refcount problem
Commit 6f458dfb40 (tcp: improve latencies of timer triggered events) added bug leading to following trace : [ 2866.131281] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.131726] [ 2866.132188] ========================= [ 2866.132281] [ BUG: held lock freed! ] [ 2866.132281] 3.6.0-rc1+ #622 Not tainted [ 2866.132281] ------------------------- [ 2866.132281] kworker/0:1/652 is freeing memory ffff880019ec0000-ffff880019ec0a1f, with a lock still held there! [ 2866.132281] (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] 4 locks held by kworker/0:1/652: [ 2866.132281] #0: (rpciod){.+.+.+}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f [ 2866.132281] #1: ((&task->u.tk_work)){+.+.+.}, at: [<ffffffff81083567>] process_one_work+0x1de/0x47f [ 2866.132281] #2: (sk_lock-AF_INET-RPC){+.+...}, at: [<ffffffff81903619>] tcp_sendmsg+0x29/0xcc6 [ 2866.132281] #3: (&icsk->icsk_retransmit_timer){+.-...}, at: [<ffffffff81078017>] run_timer_softirq+0x1ad/0x35f [ 2866.132281] [ 2866.132281] stack backtrace: [ 2866.132281] Pid: 652, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #622 [ 2866.132281] Call Trace: [ 2866.132281] <IRQ> [<ffffffff810bc527>] debug_check_no_locks_freed+0x112/0x159 [ 2866.132281] [<ffffffff818a0839>] ? __sk_free+0xfd/0x114 [ 2866.132281] [<ffffffff811549fa>] kmem_cache_free+0x6b/0x13a [ 2866.132281] [<ffffffff818a0839>] __sk_free+0xfd/0x114 [ 2866.132281] [<ffffffff818a08c0>] sk_free+0x1c/0x1e [ 2866.132281] [<ffffffff81911e1c>] tcp_write_timer+0x51/0x56 [ 2866.132281] [<ffffffff81078082>] run_timer_softirq+0x218/0x35f [ 2866.132281] [<ffffffff81078017>] ? run_timer_softirq+0x1ad/0x35f [ 2866.132281] [<ffffffff810f5831>] ? rb_commit+0x58/0x85 [ 2866.132281] [<ffffffff81911dcb>] ? tcp_write_timer_handler+0x148/0x148 [ 2866.132281] [<ffffffff81070bd6>] __do_softirq+0xcb/0x1f9 [ 2866.132281] [<ffffffff81a0a00c>] ? _raw_spin_unlock+0x29/0x2e [ 2866.132281] [<ffffffff81a1227c>] call_softirq+0x1c/0x30 [ 2866.132281] [<ffffffff81039f38>] do_softirq+0x4a/0xa6 [ 2866.132281] [<ffffffff81070f2b>] irq_exit+0x51/0xad [ 2866.132281] [<ffffffff81a129cd>] do_IRQ+0x9d/0xb4 [ 2866.132281] [<ffffffff81a0a3ef>] common_interrupt+0x6f/0x6f [ 2866.132281] <EOI> [<ffffffff8109d006>] ? sched_clock_cpu+0x58/0xd1 [ 2866.132281] [<ffffffff81a0a172>] ? _raw_spin_unlock_irqrestore+0x4c/0x56 [ 2866.132281] [<ffffffff81078692>] mod_timer+0x178/0x1a9 [ 2866.132281] [<ffffffff818a00aa>] sk_reset_timer+0x19/0x26 [ 2866.132281] [<ffffffff8190b2cc>] tcp_rearm_rto+0x99/0xa4 [ 2866.132281] [<ffffffff8190dfba>] tcp_event_new_data_sent+0x6e/0x70 [ 2866.132281] [<ffffffff8190f7ea>] tcp_write_xmit+0x7de/0x8e4 [ 2866.132281] [<ffffffff818a565d>] ? __alloc_skb+0xa0/0x1a1 [ 2866.132281] [<ffffffff8190f952>] __tcp_push_pending_frames+0x2e/0x8a [ 2866.132281] [<ffffffff81904122>] tcp_sendmsg+0xb32/0xcc6 [ 2866.132281] [<ffffffff819229c2>] inet_sendmsg+0xaa/0xd5 [ 2866.132281] [<ffffffff81922918>] ? inet_autobind+0x5f/0x5f [ 2866.132281] [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb [ 2866.132281] [<ffffffff8189adab>] sock_sendmsg+0xa3/0xc4 [ 2866.132281] [<ffffffff810f5de6>] ? rb_reserve_next_event+0x26f/0x2d5 [ 2866.132281] [<ffffffff8103e6a9>] ? native_sched_clock+0x29/0x6f [ 2866.132281] [<ffffffff8103e6f8>] ? sched_clock+0x9/0xd [ 2866.132281] [<ffffffff810ee7f1>] ? trace_clock_local+0x9/0xb [ 2866.132281] [<ffffffff8189ae03>] kernel_sendmsg+0x37/0x43 [ 2866.132281] [<ffffffff8199ce49>] xs_send_kvec+0x77/0x80 [ 2866.132281] [<ffffffff8199cec1>] xs_sendpages+0x6f/0x1a0 [ 2866.132281] [<ffffffff8107826d>] ? try_to_del_timer_sync+0x55/0x61 [ 2866.132281] [<ffffffff8199d0d2>] xs_tcp_send_request+0x55/0xf1 [ 2866.132281] [<ffffffff8199bb90>] xprt_transmit+0x89/0x1db [ 2866.132281] [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c [ 2866.132281] [<ffffffff81999d92>] call_transmit+0x1c5/0x20e [ 2866.132281] [<ffffffff819a0d55>] __rpc_execute+0x6f/0x225 [ 2866.132281] [<ffffffff81999bcd>] ? call_connect+0x3c/0x3c [ 2866.132281] [<ffffffff819a0f33>] rpc_async_schedule+0x28/0x34 [ 2866.132281] [<ffffffff810835d6>] process_one_work+0x24d/0x47f [ 2866.132281] [<ffffffff81083567>] ? process_one_work+0x1de/0x47f [ 2866.132281] [<ffffffff819a0f0b>] ? __rpc_execute+0x225/0x225 [ 2866.132281] [<ffffffff81083a6d>] worker_thread+0x236/0x317 [ 2866.132281] [<ffffffff81083837>] ? process_scheduled_works+0x2f/0x2f [ 2866.132281] [<ffffffff8108b7b8>] kthread+0x9a/0xa2 [ 2866.132281] [<ffffffff81a12184>] kernel_thread_helper+0x4/0x10 [ 2866.132281] [<ffffffff81a0a4b0>] ? retint_restore_args+0x13/0x13 [ 2866.132281] [<ffffffff8108b71e>] ? __init_kthread_worker+0x5a/0x5a [ 2866.132281] [<ffffffff81a12180>] ? gs_change+0x13/0x13 [ 2866.308506] IPv4: Attempt to release TCP socket in state 1 ffff880019ec0000 [ 2866.309689] ============================================================================= [ 2866.310254] BUG TCP (Not tainted): Object already free [ 2866.310254] ----------------------------------------------------------------------------- [ 2866.310254] The bug comes from the fact that timer set in sk_reset_timer() can run before we actually do the sock_hold(). socket refcount reaches zero and we free the socket too soon. timer handler is not allowed to reduce socket refcnt if socket is owned by the user, or we need to change sk_reset_timer() implementation. We should take a reference on the socket in case TCP_DELACK_TIMER_DEFERRED or TCP_DELACK_TIMER_DEFERRED bit are set in tsq_flags Also fix a typo in tcp_delack_timer(), where TCP_WRITE_TIMER_DEFERRED was used instead of TCP_DELACK_TIMER_DEFERRED. For consistency, use same socket refcount change for TCP_MTU_REDUCED_DEFERRED, even if not fired from a timer. Reported-by: Fengguang Wu <fengguang.wu@intel.com> Tested-by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_output.c14
-rw-r--r--net/ipv4/tcp_timer.c6
3 files changed, 18 insertions, 10 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5bf2040b25b1..00a748d14062 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
417 417
418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
419 tp->mtu_info = info; 419 tp->mtu_info = info;
420 if (!sock_owned_by_user(sk)) 420 if (!sock_owned_by_user(sk)) {
421 tcp_v4_mtu_reduced(sk); 421 tcp_v4_mtu_reduced(sk);
422 else 422 } else {
423 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); 423 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
424 sock_hold(sk);
425 }
424 goto out; 426 goto out;
425 } 427 }
426 428
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20dfd892c86f..d04632673a9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
910 if (flags & (1UL << TCP_TSQ_DEFERRED)) 910 if (flags & (1UL << TCP_TSQ_DEFERRED))
911 tcp_tsq_handler(sk); 911 tcp_tsq_handler(sk);
912 912
913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) 913 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
914 tcp_write_timer_handler(sk); 914 tcp_write_timer_handler(sk);
915 915 __sock_put(sk);
916 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) 916 }
917 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
917 tcp_delack_timer_handler(sk); 918 tcp_delack_timer_handler(sk);
918 919 __sock_put(sk);
919 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) 920 }
921 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
920 sk->sk_prot->mtu_reduced(sk); 922 sk->sk_prot->mtu_reduced(sk);
923 __sock_put(sk);
924 }
921} 925}
922EXPORT_SYMBOL(tcp_release_cb); 926EXPORT_SYMBOL(tcp_release_cb);
923 927
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..b774a03bd1dc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
252 inet_csk(sk)->icsk_ack.blocked = 1; 252 inet_csk(sk)->icsk_ack.blocked = 1;
253 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 253 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
254 /* deleguate our work to tcp_release_cb() */ 254 /* deleguate our work to tcp_release_cb() */
255 set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); 255 if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
256 sock_hold(sk);
256 } 257 }
257 bh_unlock_sock(sk); 258 bh_unlock_sock(sk);
258 sock_put(sk); 259 sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
481 tcp_write_timer_handler(sk); 482 tcp_write_timer_handler(sk);
482 } else { 483 } else {
483 /* deleguate our work to tcp_release_cb() */ 484 /* deleguate our work to tcp_release_cb() */
484 set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); 485 if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
486 sock_hold(sk);
485 } 487 }
486 bh_unlock_sock(sk); 488 bh_unlock_sock(sk);
487 sock_put(sk); 489 sock_put(sk);