aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2012-07-16 05:13:51 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-17 01:32:26 -0400
commit2eebc1e188e9e45886ee00662519849339884d6d (patch)
treebd747ef244df741bc04a6c4c91dec024721ce681
parent310e158cc3b7a6adf41e778d52be746c4dc88561 (diff)
sctp: Fix list corruption resulting from freeing an association on a list
A few days ago Dave Jones reported this oops: [22766.294255] general protection fault: 0000 [#1] PREEMPT SMP [22766.295376] CPU 0 [22766.295384] Modules linked in: [22766.387137] ffffffffa169f292 6b6b6b6b6b6b6b6b ffff880147c03a90 ffff880147c03a74 [22766.387135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000 [22766.387136] Process trinity-watchdo (pid: 10896, threadinfo ffff88013e7d2000, [22766.387137] Stack: [22766.387140] ffff880147c03a10 [22766.387140] ffffffffa169f2b6 [22766.387140] ffff88013ed95728 [22766.387143] 0000000000000002 [22766.387143] 0000000000000000 [22766.387143] ffff880003fad062 [22766.387144] ffff88013c120000 [22766.387144] [22766.387145] Call Trace: [22766.387145] <IRQ> [22766.387150] [<ffffffffa169f292>] ? __sctp_lookup_association+0x62/0xd0 [sctp] [22766.387154] [<ffffffffa169f2b6>] __sctp_lookup_association+0x86/0xd0 [sctp] [22766.387157] [<ffffffffa169f597>] sctp_rcv+0x207/0xbb0 [sctp] [22766.387161] [<ffffffff810d4da8>] ? trace_hardirqs_off_caller+0x28/0xd0 [22766.387163] [<ffffffff815827e3>] ? nf_hook_slow+0x133/0x210 [22766.387166] [<ffffffff815902fc>] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387168] [<ffffffff8159043d>] ip_local_deliver_finish+0x18d/0x4c0 [22766.387169] [<ffffffff815902fc>] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387171] [<ffffffff81590a07>] ip_local_deliver+0x47/0x80 [22766.387172] [<ffffffff8158fd80>] ip_rcv_finish+0x150/0x680 [22766.387174] [<ffffffff81590c54>] ip_rcv+0x214/0x320 [22766.387176] [<ffffffff81558c07>] __netif_receive_skb+0x7b7/0x910 [22766.387178] [<ffffffff8155856c>] ? __netif_receive_skb+0x11c/0x910 [22766.387180] [<ffffffff810d423e>] ? put_lock_stats.isra.25+0xe/0x40 [22766.387182] [<ffffffff81558f83>] netif_receive_skb+0x23/0x1f0 [22766.387183] [<ffffffff815596a9>] ? dev_gro_receive+0x139/0x440 [22766.387185] [<ffffffff81559280>] napi_skb_finish+0x70/0xa0 [22766.387187] [<ffffffff81559cb5>] napi_gro_receive+0xf5/0x130 [22766.387218] [<ffffffffa01c4679>] e1000_receive_skb+0x59/0x70 [e1000e] [22766.387242] [<ffffffffa01c5aab>] e1000_clean_rx_irq+0x28b/0x460 [e1000e] [22766.387266] [<ffffffffa01c9c18>] e1000e_poll+0x78/0x430 [e1000e] [22766.387268] [<ffffffff81559fea>] net_rx_action+0x1aa/0x3d0 [22766.387270] [<ffffffff810a495f>] ? account_system_vtime+0x10f/0x130 [22766.387273] [<ffffffff810734d0>] __do_softirq+0xe0/0x420 [22766.387275] [<ffffffff8169826c>] call_softirq+0x1c/0x30 [22766.387278] [<ffffffff8101db15>] do_softirq+0xd5/0x110 [22766.387279] [<ffffffff81073bc5>] irq_exit+0xd5/0xe0 [22766.387281] [<ffffffff81698b03>] do_IRQ+0x63/0xd0 [22766.387283] [<ffffffff8168ee2f>] common_interrupt+0x6f/0x6f [22766.387283] <EOI> [22766.387284] [22766.387285] [<ffffffff8168eed9>] ? retint_swapgs+0x13/0x1b [22766.387285] Code: c0 90 5d c3 66 0f 1f 44 00 00 4c 89 c8 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 <0f> b7 87 98 00 00 00 48 89 fb 49 89 f5 66 c1 c0 08 66 39 46 02 [22766.387307] [22766.387307] RIP [22766.387311] [<ffffffffa168a2c9>] sctp_assoc_is_match+0x19/0x90 [sctp] [22766.387311] RSP <ffff880147c039b0> [22766.387142] ffffffffa16ab120 [22766.599537] ---[ end trace 3f6dae82e37b17f5 ]--- [22766.601221] Kernel panic - not syncing: Fatal exception in interrupt It appears from his analysis and some staring at the code that this is likely occuring because an association is getting freed while still on the sctp_assoc_hashtable. As a result, we get a gpf when traversing the hashtable while a freed node corrupts part of the list. Nominally I would think that an mibalanced refcount was responsible for this, but I can't seem to find any obvious imbalance. What I did note however was that the two places where we create an association using sctp_primitive_ASSOCIATE (__sctp_connect and sctp_sendmsg), have failure paths which free a newly created association after calling sctp_primitive_ASSOCIATE. sctp_primitive_ASSOCIATE brings us into the sctp_sf_do_prm_asoc path, which issues a SCTP_CMD_NEW_ASOC side effect, which in turn adds a new association to the aforementioned hash table. the sctp command interpreter that process side effects has not way to unwind previously processed commands, so freeing the association from the __sctp_connect or sctp_sendmsg error path would lead to a freed association remaining on this hash table. I've fixed this but modifying sctp_[un]hash_established to use hlist_del_init, which allows us to proerly use hlist_unhashed to check if the node is on a hashlist safely during a delete. That in turn alows us to safely call sctp_unhash_established in the __sctp_connect and sctp_sendmsg error paths before freeing them, regardles of what the associations state is on the hash list. I noted, while I was doing this, that the __sctp_unhash_endpoint was using hlist_unhsashed in a simmilar fashion, but never nullified any removed nodes pointers to make that function work properly, so I fixed that up in a simmilar fashion. I attempted to test this using a virtual guest running the SCTP_RR test from netperf in a loop while running the trinity fuzzer, both in a loop. I wasn't able to recreate the problem prior to this fix, nor was I able to trigger the failure after (neither of which I suppose is suprising). Given the trace above however, I think its likely that this is what we hit. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Reported-by: davej@redhat.com CC: davej@redhat.com CC: "David S. Miller" <davem@davemloft.net> CC: Vlad Yasevich <vyasevich@gmail.com> CC: Sridhar Samudrala <sri@us.ibm.com> CC: linux-sctp@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sctp/input.c7
-rw-r--r--net/sctp/socket.c12
2 files changed, 12 insertions, 7 deletions
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 80564fe03024..8b9b6790a3df 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
736 736
737 epb = &ep->base; 737 epb = &ep->base;
738 738
739 if (hlist_unhashed(&epb->node))
740 return;
741
742 epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); 739 epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
743 740
744 head = &sctp_ep_hashtable[epb->hashent]; 741 head = &sctp_ep_hashtable[epb->hashent];
745 742
746 sctp_write_lock(&head->lock); 743 sctp_write_lock(&head->lock);
747 __hlist_del(&epb->node); 744 hlist_del_init(&epb->node);
748 sctp_write_unlock(&head->lock); 745 sctp_write_unlock(&head->lock);
749} 746}
750 747
@@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
825 head = &sctp_assoc_hashtable[epb->hashent]; 822 head = &sctp_assoc_hashtable[epb->hashent];
826 823
827 sctp_write_lock(&head->lock); 824 sctp_write_lock(&head->lock);
828 __hlist_del(&epb->node); 825 hlist_del_init(&epb->node);
829 sctp_write_unlock(&head->lock); 826 sctp_write_unlock(&head->lock);
830} 827}
831 828
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b3b8a8d813eb..31c7bfcd9b58 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1231,8 +1231,14 @@ out_free:
1231 SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" 1231 SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
1232 " kaddrs: %p err: %d\n", 1232 " kaddrs: %p err: %d\n",
1233 asoc, kaddrs, err); 1233 asoc, kaddrs, err);
1234 if (asoc) 1234 if (asoc) {
1235 /* sctp_primitive_ASSOCIATE may have added this association
1236 * To the hash table, try to unhash it, just in case, its a noop
1237 * if it wasn't hashed so we're safe
1238 */
1239 sctp_unhash_established(asoc);
1235 sctp_association_free(asoc); 1240 sctp_association_free(asoc);
1241 }
1236 return err; 1242 return err;
1237} 1243}
1238 1244
@@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1942 goto out_unlock; 1948 goto out_unlock;
1943 1949
1944out_free: 1950out_free:
1945 if (new_asoc) 1951 if (new_asoc) {
1952 sctp_unhash_established(asoc);
1946 sctp_association_free(asoc); 1953 sctp_association_free(asoc);
1954 }
1947out_unlock: 1955out_unlock:
1948 sctp_release_sock(sk); 1956 sctp_release_sock(sk);
1949 1957