diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2013-05-01 01:24:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-05-01 15:13:49 -0400 |
commit | 60bc851ae59bfe99be6ee89d6bc50008c85ec75d (patch) | |
tree | 5046b97e73431933b0205f5b9381fe09979ef2f4 | |
parent | c3b28ea36946a22469a5519977a3b79428ded4af (diff) |
af_unix: fix a fatal race with bit fields
Using bit fields is dangerous on ppc64/sparc64, as the compiler [1]
uses 64bit instructions to manipulate them.
If the 64bit word includes any atomic_t or spinlock_t, we can lose
critical concurrent changes.
This is happening in af_unix, where unix_sk(sk)->gc_candidate/
gc_maybe_cycle/lock share the same 64bit word.
This leads to fatal deadlock, as one/several cpus spin forever
on a spinlock that will never be available again.
A safer way would be to use a long to store flags.
This way we are sure compiler/arch wont do bad things.
As we own unix_gc_lock spinlock when clearing or setting bits,
we can use the non atomic __set_bit()/__clear_bit().
recursion_level can share the same 64bit location with the spinlock,
as it is set only with this spinlock held.
[1] bug fixed in gcc-4.8.0 :
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080
Reported-by: Ambrose Feinstein <ambrose@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/af_unix.h | 5 | ||||
-rw-r--r-- | net/unix/garbage.c | 12 |
2 files changed, 9 insertions, 8 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a8836e8445cc..dbdfd2b0f3b3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -57,9 +57,10 @@ struct unix_sock { | |||
57 | struct list_head link; | 57 | struct list_head link; |
58 | atomic_long_t inflight; | 58 | atomic_long_t inflight; |
59 | spinlock_t lock; | 59 | spinlock_t lock; |
60 | unsigned int gc_candidate : 1; | ||
61 | unsigned int gc_maybe_cycle : 1; | ||
62 | unsigned char recursion_level; | 60 | unsigned char recursion_level; |
61 | unsigned long gc_flags; | ||
62 | #define UNIX_GC_CANDIDATE 0 | ||
63 | #define UNIX_GC_MAYBE_CYCLE 1 | ||
63 | struct socket_wq peer_wq; | 64 | struct socket_wq peer_wq; |
64 | }; | 65 | }; |
65 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 66 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d0f6545b0010..9bc73f87f64a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
185 | * have been added to the queues after | 185 | * have been added to the queues after |
186 | * starting the garbage collection | 186 | * starting the garbage collection |
187 | */ | 187 | */ |
188 | if (u->gc_candidate) { | 188 | if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { |
189 | hit = true; | 189 | hit = true; |
190 | func(u); | 190 | func(u); |
191 | } | 191 | } |
@@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u) | |||
254 | * of the list, so that it's checked even if it was already | 254 | * of the list, so that it's checked even if it was already |
255 | * passed over | 255 | * passed over |
256 | */ | 256 | */ |
257 | if (u->gc_maybe_cycle) | 257 | if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) |
258 | list_move_tail(&u->link, &gc_candidates); | 258 | list_move_tail(&u->link, &gc_candidates); |
259 | } | 259 | } |
260 | 260 | ||
@@ -315,8 +315,8 @@ void unix_gc(void) | |||
315 | BUG_ON(total_refs < inflight_refs); | 315 | BUG_ON(total_refs < inflight_refs); |
316 | if (total_refs == inflight_refs) { | 316 | if (total_refs == inflight_refs) { |
317 | list_move_tail(&u->link, &gc_candidates); | 317 | list_move_tail(&u->link, &gc_candidates); |
318 | u->gc_candidate = 1; | 318 | __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); |
319 | u->gc_maybe_cycle = 1; | 319 | __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); |
320 | } | 320 | } |
321 | } | 321 | } |
322 | 322 | ||
@@ -344,7 +344,7 @@ void unix_gc(void) | |||
344 | 344 | ||
345 | if (atomic_long_read(&u->inflight) > 0) { | 345 | if (atomic_long_read(&u->inflight) > 0) { |
346 | list_move_tail(&u->link, ¬_cycle_list); | 346 | list_move_tail(&u->link, ¬_cycle_list); |
347 | u->gc_maybe_cycle = 0; | 347 | __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); |
348 | scan_children(&u->sk, inc_inflight_move_tail, NULL); | 348 | scan_children(&u->sk, inc_inflight_move_tail, NULL); |
349 | } | 349 | } |
350 | } | 350 | } |
@@ -356,7 +356,7 @@ void unix_gc(void) | |||
356 | */ | 356 | */ |
357 | while (!list_empty(¬_cycle_list)) { | 357 | while (!list_empty(¬_cycle_list)) { |
358 | u = list_entry(not_cycle_list.next, struct unix_sock, link); | 358 | u = list_entry(not_cycle_list.next, struct unix_sock, link); |
359 | u->gc_candidate = 0; | 359 | __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); |
360 | list_move_tail(&u->link, &gc_inflight_list); | 360 | list_move_tail(&u->link, &gc_inflight_list); |
361 | } | 361 | } |
362 | 362 | ||