aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSven Wegener <sven.wegener@stealer.net>2010-06-09 10:10:57 -0400
committerPatrick McHardy <kaber@trash.net>2010-06-09 10:10:57 -0400
commitaea9d711f3d68c656ad31ab578ecfb0bb5cd7f97 (patch)
treeb4681f25962090e6449fc74a68085fae577b476d
parent7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 (diff)
ipvs: Add missing locking during connection table hashing and unhashing
The code that hashes and unhashes connections from the connection table is missing locking of the connection being modified, which opens up a race condition and results in memory corruption when this race condition is hit. Here is what happens in pretty verbose form: CPU 0 CPU 1 ------------ ------------ An active connection is terminated and we schedule ip_vs_conn_expire() on this CPU to expire this connection. IRQ assignment is changed to this CPU, but the expire timer stays scheduled on the other CPU. New connection from same ip:port comes in right before the timer expires, we find the inactive connection in our connection table and get a reference to it. We proper lock the connection in tcp_state_transition() and read the connection flags in set_tcp_state(). ip_vs_conn_expire() gets called, we unhash the connection from our connection table and remove the hashed flag in ip_vs_conn_unhash(), without proper locking! While still holding proper locks we write the connection flags in set_tcp_state() and this sets the hashed flag again. ip_vs_conn_expire() fails to expire the connection, because the other CPU has incremented the reference count. We try to re-insert the connection into our connection table, but this fails in ip_vs_conn_hash(), because the hashed flag has been set by the other CPU. We re-schedule execution of ip_vs_conn_expire(). Now this connection has the hashed flag set, but isn't actually hashed in our connection table and has a dangling list_head. We drop the reference we held on the connection and schedule the expire timer for timeouting the connection on this CPU. Further packets won't be able to find this connection in our connection table. ip_vs_conn_expire() gets called again, we think it's already hashed, but the list_head is dangling and while removing the connection from our connection table we write to the memory location where this list_head points to. The result will probably be a kernel oops at some other point in time. This race condition is pretty subtle, but it can be triggered remotely. It needs the IRQ assignment change or another circumstance where packets coming from the same ip:port for the same service are being processed on different CPUs. And it involves hitting the exact time at which ip_vs_conn_expire() gets called. It can be avoided by making sure that all packets from one connection are always processed on the same CPU and can be made harder to exploit by changing the connection timeouts to some custom values. Signed-off-by: Sven Wegener <sven.wegener@stealer.net> Cc: stable@kernel.org Acked-by: Simon Horman <horms@verge.net.au> Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
1 files changed, 4 insertions, 0 deletions
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..ff04e9edbed6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -162,6 +162,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
162 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 162 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
163 163
164 ct_write_lock(hash); 164 ct_write_lock(hash);
165 spin_lock(&cp->lock);
165 166
166 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 167 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
167 list_add(&cp->c_list, &ip_vs_conn_tab[hash]); 168 list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +175,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
174 ret = 0; 175 ret = 0;
175 } 176 }
176 177
178 spin_unlock(&cp->lock);
177 ct_write_unlock(hash); 179 ct_write_unlock(hash);
178 180
179 return ret; 181 return ret;
@@ -193,6 +195,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
193 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 195 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
194 196
195 ct_write_lock(hash); 197 ct_write_lock(hash);
198 spin_lock(&cp->lock);
196 199
197 if (cp->flags & IP_VS_CONN_F_HASHED) { 200 if (cp->flags & IP_VS_CONN_F_HASHED) {
198 list_del(&cp->c_list); 201 list_del(&cp->c_list);
@@ -202,6 +205,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
202 } else 205 } else
203 ret = 0; 206 ret = 0;
204 207
208 spin_unlock(&cp->lock);
205 ct_write_unlock(hash); 209 ct_write_unlock(hash);
206 210
207 return ret; 211 return ret;