aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2013-05-01 01:24:03 -0400
committerDavid S. Miller <davem@davemloft.net>2013-05-01 15:13:49 -0400
commit60bc851ae59bfe99be6ee89d6bc50008c85ec75d (patch)
tree5046b97e73431933b0205f5b9381fe09979ef2f4
parentc3b28ea36946a22469a5519977a3b79428ded4af (diff)
af_unix: fix a fatal race with bit fields
Using bit fields is dangerous on ppc64/sparc64, as the compiler [1] uses 64bit instructions to manipulate them. If the 64bit word includes any atomic_t or spinlock_t, we can lose critical concurrent changes. This is happening in af_unix, where unix_sk(sk)->gc_candidate/ gc_maybe_cycle/lock share the same 64bit word. This leads to fatal deadlock, as one/several cpus spin forever on a spinlock that will never be available again. A safer way would be to use a long to store flags. This way we are sure compiler/arch wont do bad things. As we own unix_gc_lock spinlock when clearing or setting bits, we can use the non atomic __set_bit()/__clear_bit(). recursion_level can share the same 64bit location with the spinlock, as it is set only with this spinlock held. [1] bug fixed in gcc-4.8.0 : http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080 Reported-by: Ambrose Feinstein <ambrose@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/af_unix.h5
-rw-r--r--net/unix/garbage.c12
2 files changed, 9 insertions, 8 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index a8836e8445cc..dbdfd2b0f3b3 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -57,9 +57,10 @@ struct unix_sock {
57 struct list_head link; 57 struct list_head link;
58 atomic_long_t inflight; 58 atomic_long_t inflight;
59 spinlock_t lock; 59 spinlock_t lock;
60 unsigned int gc_candidate : 1;
61 unsigned int gc_maybe_cycle : 1;
62 unsigned char recursion_level; 60 unsigned char recursion_level;
61 unsigned long gc_flags;
62#define UNIX_GC_CANDIDATE 0
63#define UNIX_GC_MAYBE_CYCLE 1
63 struct socket_wq peer_wq; 64 struct socket_wq peer_wq;
64}; 65};
65#define unix_sk(__sk) ((struct unix_sock *)__sk) 66#define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d0f6545b0010..9bc73f87f64a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
185 * have been added to the queues after 185 * have been added to the queues after
186 * starting the garbage collection 186 * starting the garbage collection
187 */ 187 */
188 if (u->gc_candidate) { 188 if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
189 hit = true; 189 hit = true;
190 func(u); 190 func(u);
191 } 191 }
@@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
254 * of the list, so that it's checked even if it was already 254 * of the list, so that it's checked even if it was already
255 * passed over 255 * passed over
256 */ 256 */
257 if (u->gc_maybe_cycle) 257 if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
258 list_move_tail(&u->link, &gc_candidates); 258 list_move_tail(&u->link, &gc_candidates);
259} 259}
260 260
@@ -315,8 +315,8 @@ void unix_gc(void)
315 BUG_ON(total_refs < inflight_refs); 315 BUG_ON(total_refs < inflight_refs);
316 if (total_refs == inflight_refs) { 316 if (total_refs == inflight_refs) {
317 list_move_tail(&u->link, &gc_candidates); 317 list_move_tail(&u->link, &gc_candidates);
318 u->gc_candidate = 1; 318 __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
319 u->gc_maybe_cycle = 1; 319 __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
320 } 320 }
321 } 321 }
322 322
@@ -344,7 +344,7 @@ void unix_gc(void)
344 344
345 if (atomic_long_read(&u->inflight) > 0) { 345 if (atomic_long_read(&u->inflight) > 0) {
346 list_move_tail(&u->link, &not_cycle_list); 346 list_move_tail(&u->link, &not_cycle_list);
347 u->gc_maybe_cycle = 0; 347 __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
348 scan_children(&u->sk, inc_inflight_move_tail, NULL); 348 scan_children(&u->sk, inc_inflight_move_tail, NULL);
349 } 349 }
350 } 350 }
@@ -356,7 +356,7 @@ void unix_gc(void)
356 */ 356 */
357 while (!list_empty(&not_cycle_list)) { 357 while (!list_empty(&not_cycle_list)) {
358 u = list_entry(not_cycle_list.next, struct unix_sock, link); 358 u = list_entry(not_cycle_list.next, struct unix_sock, link);
359 u->gc_candidate = 0; 359 __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
360 list_move_tail(&u->link, &gc_inflight_list); 360 list_move_tail(&u->link, &gc_inflight_list);
361 } 361 }
362 362