summaryrefslogtreecommitdiffstats
path: root/drivers/block/zram
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2018-12-28 03:36:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 15:11:49 -0500
commit3c9959e025472122a61faebb208525cf26b305d1 (patch)
tree899b7879fec008e1f7b4a605ef4cbfc22c377137 /drivers/block/zram
parentfed84c78527009d4f799a3ed9a566502fa026d82 (diff)
zram: fix lockdep warning of free block handling
Patch series "zram idle page writeback", v3. Inherently, swap device has many idle pages which are rare touched since it was allocated. It is never problem if we use storage device as swap. However, it's just waste for zram-swap. This patchset supports zram idle page writeback feature. * Admin can define what is idle page "no access since X time ago" * Admin can define when zram should writeback them * Admin can define when zram should stop writeback to prevent wearout Details are in each patch's description. This patch (of 7): ================================ WARNING: inconsistent lock state 4.19.0+ #390 Not tainted -------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. zram_verify/2095 [HC0[0]:SC1[1]:HE1:SE0] takes: 00000000b1828693 (&(&zram->bitmap_lock)->rlock){+.?.}, at: put_entry_bdev+0x1e/0x50 {SOFTIRQ-ON-W} state was registered at: _raw_spin_lock+0x2c/0x40 zram_make_request+0x755/0xdc9 generic_make_request+0x373/0x6a0 submit_bio+0x6c/0x140 __swap_writepage+0x3a8/0x480 shrink_page_list+0x1102/0x1a60 shrink_inactive_list+0x21b/0x3f0 shrink_node_memcg.constprop.99+0x4f8/0x7e0 shrink_node+0x7d/0x2f0 do_try_to_free_pages+0xe0/0x300 try_to_free_pages+0x116/0x2b0 __alloc_pages_slowpath+0x3f4/0xf80 __alloc_pages_nodemask+0x2a2/0x2f0 __handle_mm_fault+0x42e/0xb50 handle_mm_fault+0x55/0xb0 __do_page_fault+0x235/0x4b0 page_fault+0x1e/0x30 irq event stamp: 228412 hardirqs last enabled at (228412): [<ffffffff98245846>] __slab_free+0x3e6/0x600 hardirqs last disabled at (228411): [<ffffffff98245625>] __slab_free+0x1c5/0x600 softirqs last enabled at (228396): [<ffffffff98e0031e>] __do_softirq+0x31e/0x427 softirqs last disabled at (228403): [<ffffffff98072051>] irq_exit+0xd1/0xe0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&zram->bitmap_lock)->rlock); <Interrupt> lock(&(&zram->bitmap_lock)->rlock); *** DEADLOCK *** no locks held by zram_verify/2095. stack backtrace: CPU: 5 PID: 2095 Comm: zram_verify Not tainted 4.19.0+ #390 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: <IRQ> dump_stack+0x67/0x9b print_usage_bug+0x1bd/0x1d3 mark_lock+0x4aa/0x540 __lock_acquire+0x51d/0x1300 lock_acquire+0x90/0x180 _raw_spin_lock+0x2c/0x40 put_entry_bdev+0x1e/0x50 zram_free_page+0xf6/0x110 zram_slot_free_notify+0x42/0xa0 end_swap_bio_read+0x5b/0x170 blk_update_request+0x8f/0x340 scsi_end_request+0x2c/0x1e0 scsi_io_completion+0x98/0x650 blk_done_softirq+0x9e/0xd0 __do_softirq+0xcc/0x427 irq_exit+0xd1/0xe0 do_IRQ+0x93/0x120 common_interrupt+0xf/0xf </IRQ> With writeback feature, zram_slot_free_notify could be called in softirq context by end_swap_bio_read. However, bitmap_lock is not aware of that so lockdep yell out: get_entry_bdev spin_lock(bitmap->lock); irq softirq end_swap_bio_read zram_slot_free_notify zram_slot_lock <-- deadlock prone zram_free_page put_entry_bdev spin_lock(bitmap->lock); <-- deadlock prone With akpm's suggestion (i.e. bitmap operation is already atomic), we could remove bitmap lock. It might fail to find a empty slot if serious contention happens. However, it's not severe problem because huge page writeback has already possiblity to fail if there is severe memory pressure. Worst case is just keeping the incompressible in memory, not storage. The other problem is zram_slot_lock in zram_slot_slot_free_notify. To make it safe is this patch introduces zram_slot_trylock where zram_slot_free_notify uses it. Although it's rare to be contented, this patch adds new debug stat "miss_free" to keep monitoring how often it happens. Link: http://lkml.kernel.org/r/20181127055429.251614-2-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Reviewed-by: Joey Pabalinas <joeypabalinas@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/zram')
-rw-r--r--drivers/block/zram/zram_drv.c38
-rw-r--r--drivers/block/zram/zram_drv.h2
2 files changed, 22 insertions, 18 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 4879595200e1..21a7046958a3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -53,6 +53,11 @@ static size_t huge_class_size;
53 53
54static void zram_free_page(struct zram *zram, size_t index); 54static void zram_free_page(struct zram *zram, size_t index);
55 55
56static int zram_slot_trylock(struct zram *zram, u32 index)
57{
58 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].value);
59}
60
56static void zram_slot_lock(struct zram *zram, u32 index) 61static void zram_slot_lock(struct zram *zram, u32 index)
57{ 62{
58 bit_spin_lock(ZRAM_LOCK, &zram->table[index].value); 63 bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
@@ -399,7 +404,6 @@ static ssize_t backing_dev_store(struct device *dev,
399 goto out; 404 goto out;
400 405
401 reset_bdev(zram); 406 reset_bdev(zram);
402 spin_lock_init(&zram->bitmap_lock);
403 407
404 zram->old_block_size = old_block_size; 408 zram->old_block_size = old_block_size;
405 zram->bdev = bdev; 409 zram->bdev = bdev;
@@ -443,29 +447,24 @@ out:
443 447
444static unsigned long get_entry_bdev(struct zram *zram) 448static unsigned long get_entry_bdev(struct zram *zram)
445{ 449{
446 unsigned long entry; 450 unsigned long blk_idx = 1;
447 451retry:
448 spin_lock(&zram->bitmap_lock);
449 /* skip 0 bit to confuse zram.handle = 0 */ 452 /* skip 0 bit to confuse zram.handle = 0 */
450 entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); 453 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
451 if (entry == zram->nr_pages) { 454 if (blk_idx == zram->nr_pages)
452 spin_unlock(&zram->bitmap_lock);
453 return 0; 455 return 0;
454 }
455 456
456 set_bit(entry, zram->bitmap); 457 if (test_and_set_bit(blk_idx, zram->bitmap))
457 spin_unlock(&zram->bitmap_lock); 458 goto retry;
458 459
459 return entry; 460 return blk_idx;
460} 461}
461 462
462static void put_entry_bdev(struct zram *zram, unsigned long entry) 463static void put_entry_bdev(struct zram *zram, unsigned long entry)
463{ 464{
464 int was_set; 465 int was_set;
465 466
466 spin_lock(&zram->bitmap_lock);
467 was_set = test_and_clear_bit(entry, zram->bitmap); 467 was_set = test_and_clear_bit(entry, zram->bitmap);
468 spin_unlock(&zram->bitmap_lock);
469 WARN_ON_ONCE(!was_set); 468 WARN_ON_ONCE(!was_set);
470} 469}
471 470
@@ -886,9 +885,10 @@ static ssize_t debug_stat_show(struct device *dev,
886 885
887 down_read(&zram->init_lock); 886 down_read(&zram->init_lock);
888 ret = scnprintf(buf, PAGE_SIZE, 887 ret = scnprintf(buf, PAGE_SIZE,
889 "version: %d\n%8llu\n", 888 "version: %d\n%8llu %8llu\n",
890 version, 889 version,
891 (u64)atomic64_read(&zram->stats.writestall)); 890 (u64)atomic64_read(&zram->stats.writestall),
891 (u64)atomic64_read(&zram->stats.miss_free));
892 up_read(&zram->init_lock); 892 up_read(&zram->init_lock);
893 893
894 return ret; 894 return ret;
@@ -1400,10 +1400,14 @@ static void zram_slot_free_notify(struct block_device *bdev,
1400 1400
1401 zram = bdev->bd_disk->private_data; 1401 zram = bdev->bd_disk->private_data;
1402 1402
1403 zram_slot_lock(zram, index); 1403 atomic64_inc(&zram->stats.notify_free);
1404 if (!zram_slot_trylock(zram, index)) {
1405 atomic64_inc(&zram->stats.miss_free);
1406 return;
1407 }
1408
1404 zram_free_page(zram, index); 1409 zram_free_page(zram, index);
1405 zram_slot_unlock(zram, index); 1410 zram_slot_unlock(zram, index);
1406 atomic64_inc(&zram->stats.notify_free);
1407} 1411}
1408 1412
1409static int zram_rw_page(struct block_device *bdev, sector_t sector, 1413static int zram_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 72c8584b6dff..d1095dfdffa8 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -79,6 +79,7 @@ struct zram_stats {
79 atomic64_t pages_stored; /* no. of pages currently stored */ 79 atomic64_t pages_stored; /* no. of pages currently stored */
80 atomic_long_t max_used_pages; /* no. of maximum pages stored */ 80 atomic_long_t max_used_pages; /* no. of maximum pages stored */
81 atomic64_t writestall; /* no. of write slow paths */ 81 atomic64_t writestall; /* no. of write slow paths */
82 atomic64_t miss_free; /* no. of missed free */
82}; 83};
83 84
84struct zram { 85struct zram {
@@ -110,7 +111,6 @@ struct zram {
110 unsigned int old_block_size; 111 unsigned int old_block_size;
111 unsigned long *bitmap; 112 unsigned long *bitmap;
112 unsigned long nr_pages; 113 unsigned long nr_pages;
113 spinlock_t bitmap_lock;
114#endif 114#endif
115#ifdef CONFIG_ZRAM_MEMORY_TRACKING 115#ifdef CONFIG_ZRAM_MEMORY_TRACKING
116 struct dentry *debugfs_dir; 116 struct dentry *debugfs_dir;