aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2017-03-09 19:17:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-09 20:01:10 -0500
commitce5bec54bb5debbbe51b40270d8f209a23cadae4 (patch)
tree2764bb13e341fa4265fd3634ee1e61052d1c53e5
parent68fd814a3391c7e017ae6ace8855788748edd981 (diff)
kasan: fix races in quarantine_remove_cache()
quarantine_remove_cache() frees all pending objects that belong to the cache, before we destroy the cache itself. However there are currently two possibilities how it can fail to do so. First, another thread can hold some of the objects from the cache in temp list in quarantine_put(). quarantine_put() has a windows of enabled interrupts, and on_each_cpu() in quarantine_remove_cache() can finish right in that window. These objects will be later freed into the destroyed cache. Then, quarantine_reduce() has the same problem. It grabs a batch of objects from the global quarantine, then unlocks quarantine_lock and then frees the batch. quarantine_remove_cache() can finish while some objects from the cache are still in the local to_free list in quarantine_reduce(). Fix the race with quarantine_put() by disabling interrupts for the whole duration of quarantine_put(). In combination with on_each_cpu() in quarantine_remove_cache() it ensures that quarantine_remove_cache() either sees the objects in the per-cpu list or in the global list. Fix the race with quarantine_reduce() by protecting quarantine_reduce() with srcu critical section and then doing synchronize_srcu() at the end of quarantine_remove_cache(). I've done some assessment of how good synchronize_srcu() works in this case. And on a 4 CPU VM I see that it blocks waiting for pending read critical sections in about 2-3% of cases. Which looks good to me. I suspect that these races are the root cause of some GPFs that I episodically hit. Previously I did not have any explanation for them. BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 IP: qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 PGD 6aeea067 PUD 60ed7067 PMD 0 Oops: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 13667 Comm: syz-executor2 Not tainted 4.10.0+ #60 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88005f948040 task.stack: ffff880069818000 RIP: 0010:qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155 RSP: 0018:ffff88006981f298 EFLAGS: 00010246 RAX: ffffea0000ffff00 RBX: 0000000000000000 RCX: ffffea0000ffff1f RDX: 0000000000000000 RSI: ffff88003fffc3e0 RDI: 0000000000000000 RBP: ffff88006981f2c0 R08: ffff88002fed7bd8 R09: 00000001001f000d R10: 00000000001f000d R11: ffff88006981f000 R12: ffff88003fffc3e0 R13: ffff88006981f2d0 R14: ffffffff81877fae R15: 0000000080000000 FS: 00007fb911a2d700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c8 CR3: 0000000060ed6000 CR4: 00000000000006f0 Call Trace: quarantine_reduce+0x10e/0x120 mm/kasan/quarantine.c:239 kasan_kmalloc+0xca/0xe0 mm/kasan/kasan.c:590 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:456 [inline] slab_alloc_node mm/slub.c:2718 [inline] kmem_cache_alloc_node+0x1d3/0x280 mm/slub.c:2754 __alloc_skb+0x10f/0x770 net/core/skbuff.c:219 alloc_skb include/linux/skbuff.h:932 [inline] _sctp_make_chunk+0x3b/0x260 net/sctp/sm_make_chunk.c:1388 sctp_make_data net/sctp/sm_make_chunk.c:1420 [inline] sctp_make_datafrag_empty+0x208/0x360 net/sctp/sm_make_chunk.c:746 sctp_datamsg_from_user+0x7e8/0x11d0 net/sctp/chunk.c:266 sctp_sendmsg+0x2611/0x3970 net/sctp/socket.c:1962 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1685 SyS_sendto+0x40/0x50 net/socket.c:1653 I am not sure about backporting. The bug is quite hard to trigger, I've seen it few times during our massive continuous testing (however, it could be cause of some other episodic stray crashes as it leads to memory corruption...). If it is triggered, the consequences are very bad -- almost definite bad memory corruption. The fix is non trivial and has chances of introducing new bugs. I am also not sure how actively people use KASAN on older releases. [dvyukov@google.com: - sorted includes[ Link: http://lkml.kernel.org/r/20170309094028.51088-1-dvyukov@google.com Link: http://lkml.kernel.org/r/20170308151532.5070-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Greg Thelen <gthelen@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/kasan/quarantine.c42
1 files changed, 36 insertions, 6 deletions
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 4ac39f20757a..3a8ddf8baf7d 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -25,6 +25,7 @@
25#include <linux/printk.h> 25#include <linux/printk.h>
26#include <linux/shrinker.h> 26#include <linux/shrinker.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/srcu.h>
28#include <linux/string.h> 29#include <linux/string.h>
29#include <linux/types.h> 30#include <linux/types.h>
30 31
@@ -103,6 +104,7 @@ static int quarantine_tail;
103/* Total size of all objects in global_quarantine across all batches. */ 104/* Total size of all objects in global_quarantine across all batches. */
104static unsigned long quarantine_size; 105static unsigned long quarantine_size;
105static DEFINE_SPINLOCK(quarantine_lock); 106static DEFINE_SPINLOCK(quarantine_lock);
107DEFINE_STATIC_SRCU(remove_cache_srcu);
106 108
107/* Maximum size of the global queue. */ 109/* Maximum size of the global queue. */
108static unsigned long quarantine_max_size; 110static unsigned long quarantine_max_size;
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
173 struct qlist_head *q; 175 struct qlist_head *q;
174 struct qlist_head temp = QLIST_INIT; 176 struct qlist_head temp = QLIST_INIT;
175 177
178 /*
179 * Note: irq must be disabled until after we move the batch to the
180 * global quarantine. Otherwise quarantine_remove_cache() can miss
181 * some objects belonging to the cache if they are in our local temp
182 * list. quarantine_remove_cache() executes on_each_cpu() at the
183 * beginning which ensures that it either sees the objects in per-cpu
184 * lists or in the global quarantine.
185 */
176 local_irq_save(flags); 186 local_irq_save(flags);
177 187
178 q = this_cpu_ptr(&cpu_quarantine); 188 q = this_cpu_ptr(&cpu_quarantine);
179 qlist_put(q, &info->quarantine_link, cache->size); 189 qlist_put(q, &info->quarantine_link, cache->size);
180 if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) 190 if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
181 qlist_move_all(q, &temp); 191 qlist_move_all(q, &temp);
182 192
183 local_irq_restore(flags); 193 spin_lock(&quarantine_lock);
184
185 if (unlikely(!qlist_empty(&temp))) {
186 spin_lock_irqsave(&quarantine_lock, flags);
187 WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); 194 WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
188 qlist_move_all(&temp, &global_quarantine[quarantine_tail]); 195 qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
189 if (global_quarantine[quarantine_tail].bytes >= 196 if (global_quarantine[quarantine_tail].bytes >=
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
196 if (new_tail != quarantine_head) 203 if (new_tail != quarantine_head)
197 quarantine_tail = new_tail; 204 quarantine_tail = new_tail;
198 } 205 }
199 spin_unlock_irqrestore(&quarantine_lock, flags); 206 spin_unlock(&quarantine_lock);
200 } 207 }
208
209 local_irq_restore(flags);
201} 210}
202 211
203void quarantine_reduce(void) 212void quarantine_reduce(void)
204{ 213{
205 size_t total_size, new_quarantine_size, percpu_quarantines; 214 size_t total_size, new_quarantine_size, percpu_quarantines;
206 unsigned long flags; 215 unsigned long flags;
216 int srcu_idx;
207 struct qlist_head to_free = QLIST_INIT; 217 struct qlist_head to_free = QLIST_INIT;
208 218
209 if (likely(READ_ONCE(quarantine_size) <= 219 if (likely(READ_ONCE(quarantine_size) <=
210 READ_ONCE(quarantine_max_size))) 220 READ_ONCE(quarantine_max_size)))
211 return; 221 return;
212 222
223 /*
224 * srcu critical section ensures that quarantine_remove_cache()
225 * will not miss objects belonging to the cache while they are in our
226 * local to_free list. srcu is chosen because (1) it gives us private
227 * grace period domain that does not interfere with anything else,
228 * and (2) it allows synchronize_srcu() to return without waiting
229 * if there are no pending read critical sections (which is the
230 * expected case).
231 */
232 srcu_idx = srcu_read_lock(&remove_cache_srcu);
213 spin_lock_irqsave(&quarantine_lock, flags); 233 spin_lock_irqsave(&quarantine_lock, flags);
214 234
215 /* 235 /*
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
237 spin_unlock_irqrestore(&quarantine_lock, flags); 257 spin_unlock_irqrestore(&quarantine_lock, flags);
238 258
239 qlist_free_all(&to_free, NULL); 259 qlist_free_all(&to_free, NULL);
260 srcu_read_unlock(&remove_cache_srcu, srcu_idx);
240} 261}
241 262
242static void qlist_move_cache(struct qlist_head *from, 263static void qlist_move_cache(struct qlist_head *from,
@@ -280,6 +301,13 @@ void quarantine_remove_cache(struct kmem_cache *cache)
280 unsigned long flags, i; 301 unsigned long flags, i;
281 struct qlist_head to_free = QLIST_INIT; 302 struct qlist_head to_free = QLIST_INIT;
282 303
304 /*
305 * Must be careful to not miss any objects that are being moved from
306 * per-cpu list to the global quarantine in quarantine_put(),
307 * nor objects being freed in quarantine_reduce(). on_each_cpu()
308 * achieves the first goal, while synchronize_srcu() achieves the
309 * second.
310 */
283 on_each_cpu(per_cpu_remove_cache, cache, 1); 311 on_each_cpu(per_cpu_remove_cache, cache, 1);
284 312
285 spin_lock_irqsave(&quarantine_lock, flags); 313 spin_lock_irqsave(&quarantine_lock, flags);
@@ -295,4 +323,6 @@ void quarantine_remove_cache(struct kmem_cache *cache)
295 spin_unlock_irqrestore(&quarantine_lock, flags); 323 spin_unlock_irqrestore(&quarantine_lock, flags);
296 324
297 qlist_free_all(&to_free, cache); 325 qlist_free_all(&to_free, cache);
326
327 synchronize_srcu(&remove_cache_srcu);
298} 328}