aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-03-04 15:38:46 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-03-09 01:08:29 -0500
commitb7ce40cff0b9f6597f8318fd761accd92727f61f (patch)
tree03f743474c481d9c9c5f931675dba0d79a546ba8
parent92d585ef067da7a966d6ce78c601bd1562b62619 (diff)
kernfs: cache atomic_write_len in kernfs_open_file
While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<mm/memory.c:4188>] might_fault+0x7e/0xb0 [<arch/x86/include/asm/uaccess.h:713 fs/kernfs/file.c:291>] kernfs_fop_write+0xd8/0x190 [<fs/read_write.c:473>] vfs_write+0xe3/0x1d0 [<fs/read_write.c:523 fs/read_write.c:515>] SyS_write+0x5d/0xa0 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [<lib/dump_stack.c:52>] dump_stack+0x52/0x7f [<kernel/locking/lockdep.c:1213>] print_circular_bug+0x129/0x160 [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<include/linux/spinlock.h:343 mm/slub.c:1933>] ? deactivate_slab+0x511/0x550 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<mm/mmap.c:1552>] ? mmap_region+0x24a/0x5c0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/sched/core.c:2477>] ? get_parent_ip+0x11/0x50 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:397>] ? vm_mmap_pgoff+0x6e/0xe0 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<kernel/rcu/update.c:97>] ? __rcu_read_unlock+0x44/0xb0 [<fs/file.c:641>] ? dup_fd+0x3c0/0x3c0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/kernfs/file.c63
-rw-r--r--include/linux/kernfs.h1
2 files changed, 33 insertions, 31 deletions
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index ddcb471b9cc9..8034706a7af8 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -253,55 +253,50 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
253{ 253{
254 struct kernfs_open_file *of = kernfs_of(file); 254 struct kernfs_open_file *of = kernfs_of(file);
255 const struct kernfs_ops *ops; 255 const struct kernfs_ops *ops;
256 char *buf = NULL; 256 size_t len;
257 ssize_t len; 257 char *buf;
258
259 /*
260 * @of->mutex nests outside active ref and is just to ensure that
261 * the ops aren't called concurrently for the same open file.
262 */
263 mutex_lock(&of->mutex);
264 if (!kernfs_get_active(of->kn)) {
265 mutex_unlock(&of->mutex);
266 return -ENODEV;
267 }
268
269 ops = kernfs_ops(of->kn);
270 if (!ops->write) {
271 len = -EINVAL;
272 goto out_unlock;
273 }
274 258
275 if (ops->atomic_write_len) { 259 if (of->atomic_write_len) {
276 len = count; 260 len = count;
277 if (len > ops->atomic_write_len) { 261 if (len > of->atomic_write_len)
278 len = -E2BIG; 262 return -E2BIG;
279 goto out_unlock;
280 }
281 } else { 263 } else {
282 len = min_t(size_t, count, PAGE_SIZE); 264 len = min_t(size_t, count, PAGE_SIZE);
283 } 265 }
284 266
285 buf = kmalloc(len + 1, GFP_KERNEL); 267 buf = kmalloc(len + 1, GFP_KERNEL);
286 if (!buf) { 268 if (!buf)
287 len = -ENOMEM; 269 return -ENOMEM;
288 goto out_unlock;
289 }
290 270
291 if (copy_from_user(buf, user_buf, len)) { 271 if (copy_from_user(buf, user_buf, len)) {
292 len = -EFAULT; 272 len = -EFAULT;
293 goto out_unlock; 273 goto out_free;
294 } 274 }
295 buf[len] = '\0'; /* guarantee string termination */ 275 buf[len] = '\0'; /* guarantee string termination */
296 276
297 len = ops->write(of, buf, len, *ppos); 277 /*
298out_unlock: 278 * @of->mutex nests outside active ref and is just to ensure that
279 * the ops aren't called concurrently for the same open file.
280 */
281 mutex_lock(&of->mutex);
282 if (!kernfs_get_active(of->kn)) {
283 mutex_unlock(&of->mutex);
284 len = -ENODEV;
285 goto out_free;
286 }
287
288 ops = kernfs_ops(of->kn);
289 if (ops->write)
290 len = ops->write(of, buf, len, *ppos);
291 else
292 len = -EINVAL;
293
299 kernfs_put_active(of->kn); 294 kernfs_put_active(of->kn);
300 mutex_unlock(&of->mutex); 295 mutex_unlock(&of->mutex);
301 296
302 if (len > 0) 297 if (len > 0)
303 *ppos += len; 298 *ppos += len;
304 299out_free:
305 kfree(buf); 300 kfree(buf);
306 return len; 301 return len;
307} 302}
@@ -666,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
666 of->file = file; 661 of->file = file;
667 662
668 /* 663 /*
664 * Write path needs to atomic_write_len outside active reference.
665 * Cache it in open_file. See kernfs_fop_write() for details.
666 */
667 of->atomic_write_len = ops->atomic_write_len;
668
669 /*
669 * Always instantiate seq_file even if read access doesn't use 670 * Always instantiate seq_file even if read access doesn't use
670 * seq_file or is not requested. This unifies private data access 671 * seq_file or is not requested. This unifies private data access
671 * and readable regular files are the vast majority anyway. 672 * and readable regular files are the vast majority anyway.
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 09669d092748..b0122dc6f96a 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -158,6 +158,7 @@ struct kernfs_open_file {
158 int event; 158 int event;
159 struct list_head list; 159 struct list_head list;
160 160
161 size_t atomic_write_len;
161 bool mmapped; 162 bool mmapped;
162 const struct vm_operations_struct *vm_ops; 163 const struct vm_operations_struct *vm_ops;
163}; 164};