diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 21:32:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 21:32:21 -0500 |
commit | 8b0fdf631cf6a31f60a9ed3e1c0f37a9715de807 (patch) | |
tree | 50bab0b8c054df37f397d581251ba7df1484e061 /ipc | |
parent | 168fe32a072a4b8dc81a3aebf0e5e588d38e2955 (diff) | |
parent | 36735a6a2b5e042db1af956ce4bcc13f3ff99e21 (diff) |
Merge branch 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull mqueue/bpf vfs cleanups from Al Viro:
"mqueue and bpf go through rather painful and similar contortions to
create objects in their dentry trees. Provide a primitive for doing
that without abusing ->mknod(), switch bpf and mqueue to it.
Another mqueue-related thing that has ended up in that branch is
on-demand creation of internal mount (based upon the work of Giuseppe
Scrivano)"
* 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
mqueue: switch to on-demand creation of internal mount
tidy do_mq_open() up a bit
mqueue: clean prepare_open() up
do_mq_open(): move all work prior to dentry_open() into a helper
mqueue: fold mq_attr_ok() into mqueue_get_inode()
move dentry_open() calls up into do_mq_open()
mqueue: switch to vfs_mkobj(), quit abusing ->d_fsdata
bpf_obj_do_pin(): switch to vfs_mkobj(), quit abusing ->mknod()
new primitive: vfs_mkobj()
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 241 |
1 files changed, 111 insertions, 130 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3bc5bb7d6827..690ae6665500 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
@@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
270 | * that means the min(mq_maxmsg, max_priorities) * struct | 270 | * that means the min(mq_maxmsg, max_priorities) * struct |
271 | * posix_msg_tree_node. | 271 | * posix_msg_tree_node. |
272 | */ | 272 | */ |
273 | |||
274 | ret = -EINVAL; | ||
275 | if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0) | ||
276 | goto out_inode; | ||
277 | if (capable(CAP_SYS_RESOURCE)) { | ||
278 | if (info->attr.mq_maxmsg > HARD_MSGMAX || | ||
279 | info->attr.mq_msgsize > HARD_MSGSIZEMAX) | ||
280 | goto out_inode; | ||
281 | } else { | ||
282 | if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max || | ||
283 | info->attr.mq_msgsize > ipc_ns->mq_msgsize_max) | ||
284 | goto out_inode; | ||
285 | } | ||
286 | ret = -EOVERFLOW; | ||
287 | /* check for overflow */ | ||
288 | if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg) | ||
289 | goto out_inode; | ||
273 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + | 290 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + |
274 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * | 291 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * |
275 | sizeof(struct posix_msg_tree_node); | 292 | sizeof(struct posix_msg_tree_node); |
276 | 293 | mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize; | |
277 | mq_bytes = mq_treesize + (info->attr.mq_maxmsg * | 294 | if (mq_bytes + mq_treesize < mq_bytes) |
278 | info->attr.mq_msgsize); | 295 | goto out_inode; |
279 | 296 | mq_bytes += mq_treesize; | |
280 | spin_lock(&mq_lock); | 297 | spin_lock(&mq_lock); |
281 | if (u->mq_bytes + mq_bytes < u->mq_bytes || | 298 | if (u->mq_bytes + mq_bytes < u->mq_bytes || |
282 | u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { | 299 | u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { |
@@ -308,8 +325,9 @@ err: | |||
308 | static int mqueue_fill_super(struct super_block *sb, void *data, int silent) | 325 | static int mqueue_fill_super(struct super_block *sb, void *data, int silent) |
309 | { | 326 | { |
310 | struct inode *inode; | 327 | struct inode *inode; |
311 | struct ipc_namespace *ns = sb->s_fs_info; | 328 | struct ipc_namespace *ns = data; |
312 | 329 | ||
330 | sb->s_fs_info = ns; | ||
313 | sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; | 331 | sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; |
314 | sb->s_blocksize = PAGE_SIZE; | 332 | sb->s_blocksize = PAGE_SIZE; |
315 | sb->s_blocksize_bits = PAGE_SHIFT; | 333 | sb->s_blocksize_bits = PAGE_SHIFT; |
@@ -326,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) | |||
326 | return 0; | 344 | return 0; |
327 | } | 345 | } |
328 | 346 | ||
347 | static struct file_system_type mqueue_fs_type; | ||
348 | /* | ||
349 | * Return value is pinned only by reference in ->mq_mnt; it will | ||
350 | * live until ipcns dies. Caller does not need to drop it. | ||
351 | */ | ||
352 | static struct vfsmount *mq_internal_mount(void) | ||
353 | { | ||
354 | struct ipc_namespace *ns = current->nsproxy->ipc_ns; | ||
355 | struct vfsmount *m = ns->mq_mnt; | ||
356 | if (m) | ||
357 | return m; | ||
358 | m = kern_mount_data(&mqueue_fs_type, ns); | ||
359 | spin_lock(&mq_lock); | ||
360 | if (unlikely(ns->mq_mnt)) { | ||
361 | spin_unlock(&mq_lock); | ||
362 | if (!IS_ERR(m)) | ||
363 | kern_unmount(m); | ||
364 | return ns->mq_mnt; | ||
365 | } | ||
366 | if (!IS_ERR(m)) | ||
367 | ns->mq_mnt = m; | ||
368 | spin_unlock(&mq_lock); | ||
369 | return m; | ||
370 | } | ||
371 | |||
329 | static struct dentry *mqueue_mount(struct file_system_type *fs_type, | 372 | static struct dentry *mqueue_mount(struct file_system_type *fs_type, |
330 | int flags, const char *dev_name, | 373 | int flags, const char *dev_name, |
331 | void *data) | 374 | void *data) |
332 | { | 375 | { |
333 | struct ipc_namespace *ns; | 376 | struct vfsmount *m; |
334 | if (flags & SB_KERNMOUNT) { | 377 | if (flags & SB_KERNMOUNT) |
335 | ns = data; | 378 | return mount_nodev(fs_type, flags, data, mqueue_fill_super); |
336 | data = NULL; | 379 | m = mq_internal_mount(); |
337 | } else { | 380 | if (IS_ERR(m)) |
338 | ns = current->nsproxy->ipc_ns; | 381 | return ERR_CAST(m); |
339 | } | 382 | atomic_inc(&m->mnt_sb->s_active); |
340 | return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); | 383 | down_write(&m->mnt_sb->s_umount); |
384 | return dget(m->mnt_root); | ||
341 | } | 385 | } |
342 | 386 | ||
343 | static void init_once(void *foo) | 387 | static void init_once(void *foo) |
@@ -416,11 +460,11 @@ static void mqueue_evict_inode(struct inode *inode) | |||
416 | put_ipc_ns(ipc_ns); | 460 | put_ipc_ns(ipc_ns); |
417 | } | 461 | } |
418 | 462 | ||
419 | static int mqueue_create(struct inode *dir, struct dentry *dentry, | 463 | static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) |
420 | umode_t mode, bool excl) | ||
421 | { | 464 | { |
465 | struct inode *dir = dentry->d_parent->d_inode; | ||
422 | struct inode *inode; | 466 | struct inode *inode; |
423 | struct mq_attr *attr = dentry->d_fsdata; | 467 | struct mq_attr *attr = arg; |
424 | int error; | 468 | int error; |
425 | struct ipc_namespace *ipc_ns; | 469 | struct ipc_namespace *ipc_ns; |
426 | 470 | ||
@@ -461,6 +505,12 @@ out_unlock: | |||
461 | return error; | 505 | return error; |
462 | } | 506 | } |
463 | 507 | ||
508 | static int mqueue_create(struct inode *dir, struct dentry *dentry, | ||
509 | umode_t mode, bool excl) | ||
510 | { | ||
511 | return mqueue_create_attr(dentry, mode, NULL); | ||
512 | } | ||
513 | |||
464 | static int mqueue_unlink(struct inode *dir, struct dentry *dentry) | 514 | static int mqueue_unlink(struct inode *dir, struct dentry *dentry) |
465 | { | 515 | { |
466 | struct inode *inode = d_inode(dentry); | 516 | struct inode *inode = d_inode(dentry); |
@@ -691,96 +741,46 @@ static void remove_notification(struct mqueue_inode_info *info) | |||
691 | info->notify_user_ns = NULL; | 741 | info->notify_user_ns = NULL; |
692 | } | 742 | } |
693 | 743 | ||
694 | static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) | 744 | static int prepare_open(struct dentry *dentry, int oflag, int ro, |
695 | { | 745 | umode_t mode, struct filename *name, |
696 | int mq_treesize; | ||
697 | unsigned long total_size; | ||
698 | |||
699 | if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) | ||
700 | return -EINVAL; | ||
701 | if (capable(CAP_SYS_RESOURCE)) { | ||
702 | if (attr->mq_maxmsg > HARD_MSGMAX || | ||
703 | attr->mq_msgsize > HARD_MSGSIZEMAX) | ||
704 | return -EINVAL; | ||
705 | } else { | ||
706 | if (attr->mq_maxmsg > ipc_ns->mq_msg_max || | ||
707 | attr->mq_msgsize > ipc_ns->mq_msgsize_max) | ||
708 | return -EINVAL; | ||
709 | } | ||
710 | /* check for overflow */ | ||
711 | if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) | ||
712 | return -EOVERFLOW; | ||
713 | mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) + | ||
714 | min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) * | ||
715 | sizeof(struct posix_msg_tree_node); | ||
716 | total_size = attr->mq_maxmsg * attr->mq_msgsize; | ||
717 | if (total_size + mq_treesize < total_size) | ||
718 | return -EOVERFLOW; | ||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * Invoked when creating a new queue via sys_mq_open | ||
724 | */ | ||
725 | static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, | ||
726 | struct path *path, int oflag, umode_t mode, | ||
727 | struct mq_attr *attr) | 746 | struct mq_attr *attr) |
728 | { | 747 | { |
729 | const struct cred *cred = current_cred(); | ||
730 | int ret; | ||
731 | |||
732 | if (attr) { | ||
733 | ret = mq_attr_ok(ipc_ns, attr); | ||
734 | if (ret) | ||
735 | return ERR_PTR(ret); | ||
736 | /* store for use during create */ | ||
737 | path->dentry->d_fsdata = attr; | ||
738 | } else { | ||
739 | struct mq_attr def_attr; | ||
740 | |||
741 | def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max, | ||
742 | ipc_ns->mq_msg_default); | ||
743 | def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, | ||
744 | ipc_ns->mq_msgsize_default); | ||
745 | ret = mq_attr_ok(ipc_ns, &def_attr); | ||
746 | if (ret) | ||
747 | return ERR_PTR(ret); | ||
748 | } | ||
749 | |||
750 | mode &= ~current_umask(); | ||
751 | ret = vfs_create(dir, path->dentry, mode, true); | ||
752 | path->dentry->d_fsdata = NULL; | ||
753 | if (ret) | ||
754 | return ERR_PTR(ret); | ||
755 | return dentry_open(path, oflag, cred); | ||
756 | } | ||
757 | |||
758 | /* Opens existing queue */ | ||
759 | static struct file *do_open(struct path *path, int oflag) | ||
760 | { | ||
761 | static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, | 748 | static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, |
762 | MAY_READ | MAY_WRITE }; | 749 | MAY_READ | MAY_WRITE }; |
763 | int acc; | 750 | int acc; |
751 | |||
752 | if (d_really_is_negative(dentry)) { | ||
753 | if (!(oflag & O_CREAT)) | ||
754 | return -ENOENT; | ||
755 | if (ro) | ||
756 | return ro; | ||
757 | audit_inode_parent_hidden(name, dentry->d_parent); | ||
758 | return vfs_mkobj(dentry, mode & ~current_umask(), | ||
759 | mqueue_create_attr, attr); | ||
760 | } | ||
761 | /* it already existed */ | ||
762 | audit_inode(name, dentry, 0); | ||
763 | if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) | ||
764 | return -EEXIST; | ||
764 | if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) | 765 | if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) |
765 | return ERR_PTR(-EINVAL); | 766 | return -EINVAL; |
766 | acc = oflag2acc[oflag & O_ACCMODE]; | 767 | acc = oflag2acc[oflag & O_ACCMODE]; |
767 | if (inode_permission(d_inode(path->dentry), acc)) | 768 | return inode_permission(d_inode(dentry), acc); |
768 | return ERR_PTR(-EACCES); | ||
769 | return dentry_open(path, oflag, current_cred()); | ||
770 | } | 769 | } |
771 | 770 | ||
772 | static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, | 771 | static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, |
773 | struct mq_attr *attr) | 772 | struct mq_attr *attr) |
774 | { | 773 | { |
775 | struct path path; | 774 | struct vfsmount *mnt = mq_internal_mount(); |
776 | struct file *filp; | 775 | struct dentry *root; |
777 | struct filename *name; | 776 | struct filename *name; |
777 | struct path path; | ||
778 | int fd, error; | 778 | int fd, error; |
779 | struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; | ||
780 | struct vfsmount *mnt = ipc_ns->mq_mnt; | ||
781 | struct dentry *root = mnt->mnt_root; | ||
782 | int ro; | 779 | int ro; |
783 | 780 | ||
781 | if (IS_ERR(mnt)) | ||
782 | return PTR_ERR(mnt); | ||
783 | |||
784 | audit_mq_open(oflag, mode, attr); | 784 | audit_mq_open(oflag, mode, attr); |
785 | 785 | ||
786 | if (IS_ERR(name = getname(u_name))) | 786 | if (IS_ERR(name = getname(u_name))) |
@@ -791,7 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, | |||
791 | goto out_putname; | 791 | goto out_putname; |
792 | 792 | ||
793 | ro = mnt_want_write(mnt); /* we'll drop it in any case */ | 793 | ro = mnt_want_write(mnt); /* we'll drop it in any case */ |
794 | error = 0; | 794 | root = mnt->mnt_root; |
795 | inode_lock(d_inode(root)); | 795 | inode_lock(d_inode(root)); |
796 | path.dentry = lookup_one_len(name->name, root, strlen(name->name)); | 796 | path.dentry = lookup_one_len(name->name, root, strlen(name->name)); |
797 | if (IS_ERR(path.dentry)) { | 797 | if (IS_ERR(path.dentry)) { |
@@ -799,38 +799,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, | |||
799 | goto out_putfd; | 799 | goto out_putfd; |
800 | } | 800 | } |
801 | path.mnt = mntget(mnt); | 801 | path.mnt = mntget(mnt); |
802 | 802 | error = prepare_open(path.dentry, oflag, ro, mode, name, attr); | |
803 | if (oflag & O_CREAT) { | 803 | if (!error) { |
804 | if (d_really_is_positive(path.dentry)) { /* entry already exists */ | 804 | struct file *file = dentry_open(&path, oflag, current_cred()); |
805 | audit_inode(name, path.dentry, 0); | 805 | if (!IS_ERR(file)) |
806 | if (oflag & O_EXCL) { | 806 | fd_install(fd, file); |
807 | error = -EEXIST; | 807 | else |
808 | goto out; | 808 | error = PTR_ERR(file); |
809 | } | ||
810 | filp = do_open(&path, oflag); | ||
811 | } else { | ||
812 | if (ro) { | ||
813 | error = ro; | ||
814 | goto out; | ||
815 | } | ||
816 | audit_inode_parent_hidden(name, root); | ||
817 | filp = do_create(ipc_ns, d_inode(root), &path, | ||
818 | oflag, mode, attr); | ||
819 | } | ||
820 | } else { | ||
821 | if (d_really_is_negative(path.dentry)) { | ||
822 | error = -ENOENT; | ||
823 | goto out; | ||
824 | } | ||
825 | audit_inode(name, path.dentry, 0); | ||
826 | filp = do_open(&path, oflag); | ||
827 | } | 809 | } |
828 | |||
829 | if (!IS_ERR(filp)) | ||
830 | fd_install(fd, filp); | ||
831 | else | ||
832 | error = PTR_ERR(filp); | ||
833 | out: | ||
834 | path_put(&path); | 810 | path_put(&path); |
835 | out_putfd: | 811 | out_putfd: |
836 | if (error) { | 812 | if (error) { |
@@ -864,6 +840,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) | |||
864 | struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; | 840 | struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; |
865 | struct vfsmount *mnt = ipc_ns->mq_mnt; | 841 | struct vfsmount *mnt = ipc_ns->mq_mnt; |
866 | 842 | ||
843 | if (!mnt) | ||
844 | return -ENOENT; | ||
845 | |||
867 | name = getname(u_name); | 846 | name = getname(u_name); |
868 | if (IS_ERR(name)) | 847 | if (IS_ERR(name)) |
869 | return PTR_ERR(name); | 848 | return PTR_ERR(name); |
@@ -1590,28 +1569,26 @@ int mq_init_ns(struct ipc_namespace *ns) | |||
1590 | ns->mq_msgsize_max = DFLT_MSGSIZEMAX; | 1569 | ns->mq_msgsize_max = DFLT_MSGSIZEMAX; |
1591 | ns->mq_msg_default = DFLT_MSG; | 1570 | ns->mq_msg_default = DFLT_MSG; |
1592 | ns->mq_msgsize_default = DFLT_MSGSIZE; | 1571 | ns->mq_msgsize_default = DFLT_MSGSIZE; |
1572 | ns->mq_mnt = NULL; | ||
1593 | 1573 | ||
1594 | ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); | ||
1595 | if (IS_ERR(ns->mq_mnt)) { | ||
1596 | int err = PTR_ERR(ns->mq_mnt); | ||
1597 | ns->mq_mnt = NULL; | ||
1598 | return err; | ||
1599 | } | ||
1600 | return 0; | 1574 | return 0; |
1601 | } | 1575 | } |
1602 | 1576 | ||
1603 | void mq_clear_sbinfo(struct ipc_namespace *ns) | 1577 | void mq_clear_sbinfo(struct ipc_namespace *ns) |
1604 | { | 1578 | { |
1605 | ns->mq_mnt->mnt_sb->s_fs_info = NULL; | 1579 | if (ns->mq_mnt) |
1580 | ns->mq_mnt->mnt_sb->s_fs_info = NULL; | ||
1606 | } | 1581 | } |
1607 | 1582 | ||
1608 | void mq_put_mnt(struct ipc_namespace *ns) | 1583 | void mq_put_mnt(struct ipc_namespace *ns) |
1609 | { | 1584 | { |
1610 | kern_unmount(ns->mq_mnt); | 1585 | if (ns->mq_mnt) |
1586 | kern_unmount(ns->mq_mnt); | ||
1611 | } | 1587 | } |
1612 | 1588 | ||
1613 | static int __init init_mqueue_fs(void) | 1589 | static int __init init_mqueue_fs(void) |
1614 | { | 1590 | { |
1591 | struct vfsmount *m; | ||
1615 | int error; | 1592 | int error; |
1616 | 1593 | ||
1617 | mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", | 1594 | mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", |
@@ -1633,6 +1610,10 @@ static int __init init_mqueue_fs(void) | |||
1633 | if (error) | 1610 | if (error) |
1634 | goto out_filesystem; | 1611 | goto out_filesystem; |
1635 | 1612 | ||
1613 | m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns); | ||
1614 | if (IS_ERR(m)) | ||
1615 | goto out_filesystem; | ||
1616 | init_ipc_ns.mq_mnt = m; | ||
1636 | return 0; | 1617 | return 0; |
1637 | 1618 | ||
1638 | out_filesystem: | 1619 | out_filesystem: |