diff options
author | Tejun Heo <tj@kernel.org> | 2014-07-01 16:41:03 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-07-02 12:32:09 -0400 |
commit | ecca47ce8294843045e7465d76fee84dbf07a004 (patch) | |
tree | fdd191b4517786683da541715670c08e77a3fb6a /fs | |
parent | 4c834452aad01531db949414f94f817a86348d59 (diff) |
kernfs: kernfs_notify() must be useable from non-sleepable contexts
d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events
too") added fsnotify triggering to kernfs_notify() which requires a
sleepable context. There are already existing users of
kernfs_notify() which invoke it from an atomic context and in general
it's silly to require a sleepable context for triggering a
notification.
The following is an invalid context bug triggerd by md invoking
sysfs_notify() from IO completion path.
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
2 locks held by swapper/1/0:
#0: (&(&vblk->vq_lock)->rlock){-.-...}, at: [<ffffffffa0039042>] virtblk_done+0x42/0xe0 [virtio_blk]
#1: (&(&bitmap->counts.lock)->rlock){-.....}, at: [<ffffffff81633718>] bitmap_endwrite+0x68/0x240
irq event stamp: 33518
hardirqs last enabled at (33515): [<ffffffff8102544f>] default_idle+0x1f/0x230
hardirqs last disabled at (33516): [<ffffffff818122ed>] common_interrupt+0x6d/0x72
softirqs last enabled at (33518): [<ffffffff810a1272>] _local_bh_enable+0x22/0x50
softirqs last disabled at (33517): [<ffffffff810a29e0>] irq_enter+0x60/0x80
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c
0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000
0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2
Call Trace:
<IRQ> [<ffffffff81807b4c>] dump_stack+0x4d/0x66
[<ffffffff810d4f14>] __might_sleep+0x184/0x240
[<ffffffff8180caf2>] mutex_lock_nested+0x42/0x440
[<ffffffff812d76a0>] kernfs_notify+0x90/0x150
[<ffffffff8163377c>] bitmap_endwrite+0xcc/0x240
[<ffffffffa00de863>] close_write+0x93/0xb0 [raid1]
[<ffffffffa00df029>] r1_bio_write_done+0x29/0x50 [raid1]
[<ffffffffa00e0474>] raid1_end_write_request+0xe4/0x260 [raid1]
[<ffffffff813acb8b>] bio_endio+0x6b/0xa0
[<ffffffff813b46c4>] blk_update_request+0x94/0x420
[<ffffffff813bf0ea>] blk_mq_end_io+0x1a/0x70
[<ffffffffa00392c2>] virtblk_request_done+0x32/0x80 [virtio_blk]
[<ffffffff813c0648>] __blk_mq_complete_request+0x88/0x120
[<ffffffff813c070a>] blk_mq_complete_request+0x2a/0x30
[<ffffffffa0039066>] virtblk_done+0x66/0xe0 [virtio_blk]
[<ffffffffa002535a>] vring_interrupt+0x3a/0xa0 [virtio_ring]
[<ffffffff81116177>] handle_irq_event_percpu+0x77/0x340
[<ffffffff8111647d>] handle_irq_event+0x3d/0x60
[<ffffffff81119436>] handle_edge_irq+0x66/0x130
[<ffffffff8101c3e4>] handle_irq+0x84/0x150
[<ffffffff818146ad>] do_IRQ+0x4d/0xe0
[<ffffffff818122f2>] common_interrupt+0x72/0x72
<EOI> [<ffffffff8105f706>] ? native_safe_halt+0x6/0x10
[<ffffffff81025454>] default_idle+0x24/0x230
[<ffffffff81025f9f>] arch_cpu_idle+0xf/0x20
[<ffffffff810f5adc>] cpu_startup_entry+0x37c/0x7b0
[<ffffffff8104df1b>] start_secondary+0x25b/0x300
This patch fixes it by punting the notification delivery through a
work item. This ends up adding an extra pointer to kernfs_elem_attr
enlarging kernfs_node by a pointer, which is not ideal but not a very
big deal either. If this turns out to be an actual issue, we can move
kernfs_elem_attr->size to kernfs_node->iattr later.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/kernfs/file.c | 69 |
1 files changed, 55 insertions, 14 deletions
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e3d37f607f97..d895b4b7b661 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
@@ -39,6 +39,19 @@ struct kernfs_open_node { | |||
39 | struct list_head files; /* goes through kernfs_open_file.list */ | 39 | struct list_head files; /* goes through kernfs_open_file.list */ |
40 | }; | 40 | }; |
41 | 41 | ||
42 | /* | ||
43 | * kernfs_notify() may be called from any context and bounces notifications | ||
44 | * through a work item. To minimize space overhead in kernfs_node, the | ||
45 | * pending queue is implemented as a singly linked list of kernfs_nodes. | ||
46 | * The list is terminated with the self pointer so that whether a | ||
47 | * kernfs_node is on the list or not can be determined by testing the next | ||
48 | * pointer for NULL. | ||
49 | */ | ||
50 | #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) | ||
51 | |||
52 | static DEFINE_SPINLOCK(kernfs_notify_lock); | ||
53 | static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; | ||
54 | |||
42 | static struct kernfs_open_file *kernfs_of(struct file *file) | 55 | static struct kernfs_open_file *kernfs_of(struct file *file) |
43 | { | 56 | { |
44 | return ((struct seq_file *)file->private_data)->private; | 57 | return ((struct seq_file *)file->private_data)->private; |
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) | |||
783 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; | 796 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; |
784 | } | 797 | } |
785 | 798 | ||
786 | /** | 799 | static void kernfs_notify_workfn(struct work_struct *work) |
787 | * kernfs_notify - notify a kernfs file | ||
788 | * @kn: file to notify | ||
789 | * | ||
790 | * Notify @kn such that poll(2) on @kn wakes up. | ||
791 | */ | ||
792 | void kernfs_notify(struct kernfs_node *kn) | ||
793 | { | 800 | { |
794 | struct kernfs_root *root = kernfs_root(kn); | 801 | struct kernfs_node *kn; |
795 | struct kernfs_open_node *on; | 802 | struct kernfs_open_node *on; |
796 | struct kernfs_super_info *info; | 803 | struct kernfs_super_info *info; |
797 | unsigned long flags; | 804 | repeat: |
798 | 805 | /* pop one off the notify_list */ | |
799 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | 806 | spin_lock_irq(&kernfs_notify_lock); |
807 | kn = kernfs_notify_list; | ||
808 | if (kn == KERNFS_NOTIFY_EOL) { | ||
809 | spin_unlock_irq(&kernfs_notify_lock); | ||
800 | return; | 810 | return; |
811 | } | ||
812 | kernfs_notify_list = kn->attr.notify_next; | ||
813 | kn->attr.notify_next = NULL; | ||
814 | spin_unlock_irq(&kernfs_notify_lock); | ||
801 | 815 | ||
802 | /* kick poll */ | 816 | /* kick poll */ |
803 | spin_lock_irqsave(&kernfs_open_node_lock, flags); | 817 | spin_lock_irq(&kernfs_open_node_lock); |
804 | 818 | ||
805 | on = kn->attr.open; | 819 | on = kn->attr.open; |
806 | if (on) { | 820 | if (on) { |
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn) | |||
808 | wake_up_interruptible(&on->poll); | 822 | wake_up_interruptible(&on->poll); |
809 | } | 823 | } |
810 | 824 | ||
811 | spin_unlock_irqrestore(&kernfs_open_node_lock, flags); | 825 | spin_unlock_irq(&kernfs_open_node_lock); |
812 | 826 | ||
813 | /* kick fsnotify */ | 827 | /* kick fsnotify */ |
814 | mutex_lock(&kernfs_mutex); | 828 | mutex_lock(&kernfs_mutex); |
815 | 829 | ||
816 | list_for_each_entry(info, &root->supers, node) { | 830 | list_for_each_entry(info, &kernfs_root(kn)->supers, node) { |
817 | struct inode *inode; | 831 | struct inode *inode; |
818 | struct dentry *dentry; | 832 | struct dentry *dentry; |
819 | 833 | ||
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn) | |||
833 | } | 847 | } |
834 | 848 | ||
835 | mutex_unlock(&kernfs_mutex); | 849 | mutex_unlock(&kernfs_mutex); |
850 | kernfs_put(kn); | ||
851 | goto repeat; | ||
852 | } | ||
853 | |||
854 | /** | ||
855 | * kernfs_notify - notify a kernfs file | ||
856 | * @kn: file to notify | ||
857 | * | ||
858 | * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any | ||
859 | * context. | ||
860 | */ | ||
861 | void kernfs_notify(struct kernfs_node *kn) | ||
862 | { | ||
863 | static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); | ||
864 | unsigned long flags; | ||
865 | |||
866 | if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) | ||
867 | return; | ||
868 | |||
869 | spin_lock_irqsave(&kernfs_notify_lock, flags); | ||
870 | if (!kn->attr.notify_next) { | ||
871 | kernfs_get(kn); | ||
872 | kn->attr.notify_next = kernfs_notify_list; | ||
873 | kernfs_notify_list = kn; | ||
874 | schedule_work(&kernfs_notify_work); | ||
875 | } | ||
876 | spin_unlock_irqrestore(&kernfs_notify_lock, flags); | ||
836 | } | 877 | } |
837 | EXPORT_SYMBOL_GPL(kernfs_notify); | 878 | EXPORT_SYMBOL_GPL(kernfs_notify); |
838 | 879 | ||