11 files changed, 191 insertions, 42 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index da3f32f1a4e4..6ce72d8d1ee1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -46,6 +46,12 @@ config FS_DAX
          or if unsure, say N.  Saying Y will increase the size of the kernel
          by about 5kB.
+config FS_DAX_PMD
+        bool
+        default FS_DAX
+        depends on FS_DAX
+        depends on BROKEN
 endif # BLOCK
 # Posix ACL utility routines
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1c7af1..c25639e907bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
                        struct page *page)
 {
        const struct block_device_operations *ops = bdev->bd_disk->fops;
+        int result = -EOPNOTSUPP;
        if (!ops->rw_page || bdev_get_integrity(bdev))
-                return -EOPNOTSUPP;
+                return result;
-        return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+        result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+        if (result)
+                return result;
+        result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+        blk_queue_exit(bdev->bd_queue);
+        return result;
 }
 EXPORT_SYMBOL_GPL(bdev_read_page);
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
        int result;
        int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
        const struct block_device_operations *ops = bdev->bd_disk->fops;
        if (!ops->rw_page || bdev_get_integrity(bdev))
                return -EOPNOTSUPP;
+        result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+        if (result)
+                return result;
        set_page_writeback(page);
        result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
        if (result)
                end_page_writeback(page);
        else
                unlock_page(page);
+        blk_queue_exit(bdev->bd_queue);
        return result;
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 7a6b02f72787..c0f3da3926a0 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -879,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
        loff_t pos, eof;
        size_t len;
        void *data;
-        int ret;
+        int ret = -ENOBUFS;
        ASSERT(op != NULL);
        ASSERT(page != NULL);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f200a6..a7a1b218f308 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
        .iterate        = configfs_readdir,
 };
+/**
+ * configfs_register_group - creates a parent-child relation between two groups
+ * @parent_group:       parent group
+ * @group:              child group
+ *
+ * link groups, creates dentry for the child and attaches it to the
+ * parent dentry.
+ *
+ * Return: 0 on success, negative errno code on error
+ */
+int configfs_register_group(struct config_group *parent_group,
+                            struct config_group *group)
+{
+        struct configfs_subsystem *subsys = parent_group->cg_subsys;
+        struct dentry *parent;
+        int ret;
+        mutex_lock(&subsys->su_mutex);
+        link_group(parent_group, group);
+        mutex_unlock(&subsys->su_mutex);
+        parent = parent_group->cg_item.ci_dentry;
+        mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+        ret = create_default_group(parent_group, group);
+        if (!ret) {
+                spin_lock(&configfs_dirent_lock);
+                configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+                spin_unlock(&configfs_dirent_lock);
+        }
+        mutex_unlock(&d_inode(parent)->i_mutex);
+        return ret;
+}
+EXPORT_SYMBOL(configfs_register_group);
+/**
+ * configfs_unregister_group() - unregisters a child group from its parent
+ * @group: parent group to be unregistered
+ *
+ * Undoes configfs_register_group()
+ */
+void configfs_unregister_group(struct config_group *group)
+{
+        struct configfs_subsystem *subsys = group->cg_subsys;
+        struct dentry *dentry = group->cg_item.ci_dentry;
+        struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+        mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+        spin_lock(&configfs_dirent_lock);
+        configfs_detach_prep(dentry, NULL);
+        spin_unlock(&configfs_dirent_lock);
+        configfs_detach_group(&group->cg_item);
+        d_inode(dentry)->i_flags |= S_DEAD;
+        dont_mount(dentry);
+        d_delete(dentry);
+        mutex_unlock(&d_inode(parent)->i_mutex);
+        dput(dentry);
+        mutex_lock(&subsys->su_mutex);
+        unlink_group(group);
+        mutex_unlock(&subsys->su_mutex);
+}
+EXPORT_SYMBOL(configfs_unregister_group);
+/**
+ * configfs_register_default_group() - allocates and registers a child group
+ * @parent_group:       parent group
+ * @name:               child group name
+ * @item_type:          child item type description
+ *
+ * boilerplate to allocate and register a child group with its parent. We need
+ * kzalloc'ed memory because child's default_group is initially empty.
+ *
+ * Return: allocated config group or ERR_PTR() on error
+ */
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+                                const char *name,
+                                struct config_item_type *item_type)
+{
+        int ret;
+        struct config_group *group;
+        group = kzalloc(sizeof(*group), GFP_KERNEL);
+        if (!group)
+                return ERR_PTR(-ENOMEM);
+        config_group_init_type_name(group, name, item_type);
+        ret = configfs_register_group(parent_group, group);
+        if (ret) {
+                kfree(group);
+                return ERR_PTR(ret);
+        }
+        return group;
+}
+EXPORT_SYMBOL(configfs_register_default_group);
+/**
+ * configfs_unregister_default_group() - unregisters and frees a child group
+ * @group:      the group to act on
+ */
+void configfs_unregister_default_group(struct config_group *group)
+{
+        configfs_unregister_group(group);
+        kfree(group);
+}
+EXPORT_SYMBOL(configfs_unregister_default_group);
 int configfs_register_subsystem(struct configfs_subsystem *subsys)
 {
        int err;
diff --git a/fs/dax.c b/fs/dax.c
index d1e5cb7311a1..43671b68220e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        unsigned long pfn;
        int result = 0;
+        /* dax pmd mappings are broken wrt gup and fork */
+        if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
+                return VM_FAULT_FALLBACK;
        /* Fall back to PTEs if we're going to COW */
        if (write && !(vma->vm_flags & VM_SHARED))
                return VM_FAULT_FALLBACK;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3a71cea68420..748d35afc902 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb)
                        /* Fall through */
                case Opt_dax:
 #ifdef CONFIG_FS_DAX
+                        ext2_msg(sb, KERN_WARNING,
+                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
                        set_opt(sbi->s_mount_opt, DAX);
 #else
                        ext2_msg(sb, KERN_INFO, "dax option not supported");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 753f4e68b820..c9ab67da6e5a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                }
                sbi->s_jquota_fmt = m->mount_opt;
 #endif
-#ifndef CONFIG_FS_DAX
        } else if (token == Opt_dax) {
+#ifdef CONFIG_FS_DAX
+                ext4_msg(sb, KERN_WARNING,
+                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+                        sbi->s_mount_opt |= m->mount_opt;
+#else
                ext4_msg(sb, KERN_INFO, "dax option not supported");
                return -1;
 #endif
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4afc4d9d2e41..8b2127ffb226 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -610,9 +610,9 @@ parse_record:
                int status = fat_parse_long(inode, &cpos, &bh, &de,
                                            &unicode, &nr_slots);
                if (status < 0) {
-                        ctx->pos = cpos;
+                        bh = NULL;
                        ret = status;
-                        goto out;
+                        goto end_of_dir;
                } else if (status == PARSE_INVALID)
                        goto record_end;
                else if (status == PARSE_NOT_LONGNAME)
@@ -654,8 +654,9 @@ parse_record:
        fill_len = short_len;
 start_filldir:
-        if (!fake_offset)
+        ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
-                ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
+        if (fake_offset && ctx->pos < 2)
+                ctx->pos = 2;
        if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
                if (!dir_emit_dot(file, ctx))
@@ -681,14 +682,19 @@ record_end:
        fake_offset = 0;
        ctx->pos = cpos;
        goto get_new;
 end_of_dir:
-        ctx->pos = cpos;
+        if (fake_offset && cpos < 2)
+                ctx->pos = 2;
+        else
+                ctx->pos = cpos;
 fill_failed:
        brelse(bh);
        if (unicode)
                __putname(unicode);
 out:
        mutex_unlock(&sbi->s_lock);
        return ret;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 316adb968b65..de4bdfac0cec 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page)
 * truncation is indicated by end of range being LLONG_MAX
 *      In this case, we first scan the range and release found pages.
 *      After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- *      maps and global counts.
+ *      maps and global counts.  Page faults can not race with truncation
+ *      in this routine.  hugetlb_no_page() prevents page faults in the
+ *      truncated range.  It checks i_size before allocation, and again after
+ *      with the page table lock for the page held.  The same lock must be
+ *      acquired to unmap a page.
 * hole punch is indicated if end is not LLONG_MAX
 *      In the hole punch case we scan the range and release found pages.
 *      Only when releasing a page is the associated region/reserv map
 *      deleted.  The region/reserv map for ranges without associated
- *      pages are not modified.
+ *      pages are not modified.  Page faults can race with hole punch.
+ *      This is indicated if we find a mapped page.
 * Note: If the passed end of range value is beyond the end of file, but
 * not LLONG_MAX this routine still performs a hole punch operation.
 */
@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
        next = start;
        while (next < end) {
                /*
-                 * Make sure to never grab more pages that we
+                 * Don't grab more pages than the number left in the range.
-                 * might possibly need.
                 */
                if (end - next < lookup_nr)
                        lookup_nr = end - next;
                /*
-                 * This pagevec_lookup() may return pages past 'end',
+                 * When no more pages are found, we are done.
-                 * so we must check for page->index > end.
                 */
-                if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
+                if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
-                        if (next == start)
+                        break;
-                                break;
-                        next = start;
-                        continue;
-                }
                for (i = 0; i < pagevec_count(&pvec); ++i) {
                        struct page *page = pvec.pages[i];
                        u32 hash;
+                        /*
+                         * The page (index) could be beyond end.  This is
+                         * only possible in the punch hole case as end is
+                         * max page offset in the truncate case.
+                         */
+                        next = page->index;
+                        if (next >= end)
+                                break;
                        hash = hugetlb_fault_mutex_hash(h, current->mm,
                                                        &pseudo_vma,
                                                        mapping, next, 0);
                        mutex_lock(&hugetlb_fault_mutex_table[hash]);
                        lock_page(page);
-                        if (page->index >= end) {
+                        if (likely(!page_mapped(page))) {
-                                unlock_page(page);
-                                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-                                next = end;     /* we are done */
-                                break;
-                        }
-                        /*
-                         * If page is mapped, it was faulted in after being
-                         * unmapped.  Do nothing in this race case.  In the
-                         * normal case page is not mapped.
-                         */
-                        if (!page_mapped(page)) {
                                bool rsv_on_error = !PagePrivate(page);
                                /*
                                 * We must free the huge page and remove
@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                                                hugetlb_fix_reserve_counts(
                                                        inode, rsv_on_error);
                                }
+                        } else {
+                                /*
+                                 * If page is mapped, it was faulted in after
+                                 * being unmapped.  It indicates a race between
+                                 * hole punch and page fault.  Do nothing in
+                                 * this case.  Getting here in a truncate
+                                 * operation is a bug.
+                                 */
+                                BUG_ON(truncate_op);
                        }
-                        if (page->index > next)
-                                next = page->index;
-                        ++next;
                        unlock_page(page);
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                }
+                ++next;
                huge_pagevec_release(&pvec);
+                cond_resched();
        }
        if (truncate_op)
@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
                i_size_write(inode, offset + len);
        inode->i_ctime = CURRENT_TIME;
-        spin_lock(&inode->i_lock);
-        inode->i_private = NULL;
-        spin_unlock(&inode->i_lock);
 out:
        mutex_unlock(&inode->i_mutex);
        return error;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 79b113048eac..0a3f9b594602 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                        switch (rqdata.cmd) {
                                case NCP_LOCK_EX:
                                case NCP_LOCK_SH:
+                                                if (rqdata.timeout < 0)
+                                                        return -EINVAL;
                                                if (rqdata.timeout == 0)
                                                        rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
                                                else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3b48ac25d8a7..a03f6f433075 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir,
                mlog_errno(status);
                goto leave;
        }
+        /* update inode->i_mode after mask with "umask". */
+        inode->i_mode = mode;
        handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
                                                            S_ISDIR(mode),

diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..6ce72d8d1ee1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig
@@ -46,6 +46,12 @@ config FS_DAX
46	or if unsure, say N. Saying Y will increase the size of the kernel	46	or if unsure, say N. Saying Y will increase the size of the kernel
47	by about 5kB.	47	by about 5kB.
48		48
		49	config FS_DAX_PMD
		50	bool
		51	default FS_DAX
		52	depends on FS_DAX
		53	depends on BROKEN
		54
49	endif # BLOCK	55	endif # BLOCK
50		56
51	# Posix ACL utility routines	57	# Posix ACL utility routines


diff --git a/fs/block_dev.c b/fs/block_dev.c index bb0dfb1c7af1..c25639e907bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
390	struct page *page)	390	struct page *page)
391	{	391	{
392	const struct block_device_operations *ops = bdev->bd_disk->fops;	392	const struct block_device_operations *ops = bdev->bd_disk->fops;
		393	int result = -EOPNOTSUPP;
		394
393	if (!ops->rw_page \|\| bdev_get_integrity(bdev))	395	if (!ops->rw_page \|\| bdev_get_integrity(bdev))
394	return -EOPNOTSUPP;	396	return result;
395	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);	397
		398	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
		399	if (result)
		400	return result;
		401	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
		402	blk_queue_exit(bdev->bd_queue);
		403	return result;
396	}	404	}
397	EXPORT_SYMBOL_GPL(bdev_read_page);	405	EXPORT_SYMBOL_GPL(bdev_read_page);
398		406
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
421	int result;	429	int result;
422	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;	430	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
423	const struct block_device_operations *ops = bdev->bd_disk->fops;	431	const struct block_device_operations *ops = bdev->bd_disk->fops;
		432
424	if (!ops->rw_page \|\| bdev_get_integrity(bdev))	433	if (!ops->rw_page \|\| bdev_get_integrity(bdev))
425	return -EOPNOTSUPP;	434	return -EOPNOTSUPP;
		435	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
		436	if (result)
		437	return result;
		438
426	set_page_writeback(page);	439	set_page_writeback(page);
427	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);	440	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
428	if (result)	441	if (result)
429	end_page_writeback(page);	442	end_page_writeback(page);
430	else	443	else
431	unlock_page(page);	444	unlock_page(page);
		445	blk_queue_exit(bdev->bd_queue);
432	return result;	446	return result;
433	}	447	}
434	EXPORT_SYMBOL_GPL(bdev_write_page);	448	EXPORT_SYMBOL_GPL(bdev_write_page);


diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 7a6b02f72787..c0f3da3926a0 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c
@@ -879,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage op, struct page page)
879	loff_t pos, eof;	879	loff_t pos, eof;
880	size_t len;	880	size_t len;
881	void *data;	881	void *data;
882	int ret;	882	int ret = -ENOBUFS;
883		883
884	ASSERT(op != NULL);	884	ASSERT(op != NULL);
885	ASSERT(page != NULL);	885	ASSERT(page != NULL);


diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c81ce7f200a6..a7a1b218f308 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
1636	.iterate = configfs_readdir,	1636	.iterate = configfs_readdir,
1637	};	1637	};
1638		1638
		1639	/**
		1640	* configfs_register_group - creates a parent-child relation between two groups
		1641	* @parent_group: parent group
		1642	* @group: child group
		1643	*
		1644	* link groups, creates dentry for the child and attaches it to the
		1645	* parent dentry.
		1646	*
		1647	* Return: 0 on success, negative errno code on error
		1648	*/
		1649	int configfs_register_group(struct config_group *parent_group,
		1650	struct config_group *group)
		1651	{
		1652	struct configfs_subsystem *subsys = parent_group->cg_subsys;
		1653	struct dentry *parent;
		1654	int ret;
		1655
		1656	mutex_lock(&subsys->su_mutex);
		1657	link_group(parent_group, group);
		1658	mutex_unlock(&subsys->su_mutex);
		1659
		1660	parent = parent_group->cg_item.ci_dentry;
		1661
		1662	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
		1663	ret = create_default_group(parent_group, group);
		1664	if (!ret) {
		1665	spin_lock(&configfs_dirent_lock);
		1666	configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
		1667	spin_unlock(&configfs_dirent_lock);
		1668	}
		1669	mutex_unlock(&d_inode(parent)->i_mutex);
		1670	return ret;
		1671	}
		1672	EXPORT_SYMBOL(configfs_register_group);
		1673
		1674	/**
		1675	* configfs_unregister_group() - unregisters a child group from its parent
		1676	* @group: parent group to be unregistered
		1677	*
		1678	* Undoes configfs_register_group()
		1679	*/
		1680	void configfs_unregister_group(struct config_group *group)
		1681	{
		1682	struct configfs_subsystem *subsys = group->cg_subsys;
		1683	struct dentry *dentry = group->cg_item.ci_dentry;
		1684	struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
		1685
		1686	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
		1687	spin_lock(&configfs_dirent_lock);
		1688	configfs_detach_prep(dentry, NULL);
		1689	spin_unlock(&configfs_dirent_lock);
		1690
		1691	configfs_detach_group(&group->cg_item);
		1692	d_inode(dentry)->i_flags \|= S_DEAD;
		1693	dont_mount(dentry);
		1694	d_delete(dentry);
		1695	mutex_unlock(&d_inode(parent)->i_mutex);
		1696
		1697	dput(dentry);
		1698
		1699	mutex_lock(&subsys->su_mutex);
		1700	unlink_group(group);
		1701	mutex_unlock(&subsys->su_mutex);
		1702	}
		1703	EXPORT_SYMBOL(configfs_unregister_group);
		1704
		1705	/**
		1706	* configfs_register_default_group() - allocates and registers a child group
		1707	* @parent_group: parent group
		1708	* @name: child group name
		1709	* @item_type: child item type description
		1710	*
		1711	* boilerplate to allocate and register a child group with its parent. We need
		1712	* kzalloc'ed memory because child's default_group is initially empty.
		1713	*
		1714	* Return: allocated config group or ERR_PTR() on error
		1715	*/
		1716	struct config_group *
		1717	configfs_register_default_group(struct config_group *parent_group,
		1718	const char *name,
		1719	struct config_item_type *item_type)
		1720	{
		1721	int ret;
		1722	struct config_group *group;
		1723
		1724	group = kzalloc(sizeof(*group), GFP_KERNEL);
		1725	if (!group)
		1726	return ERR_PTR(-ENOMEM);
		1727	config_group_init_type_name(group, name, item_type);
		1728
		1729	ret = configfs_register_group(parent_group, group);
		1730	if (ret) {
		1731	kfree(group);
		1732	return ERR_PTR(ret);
		1733	}
		1734	return group;
		1735	}
		1736	EXPORT_SYMBOL(configfs_register_default_group);
		1737
		1738	/**
		1739	* configfs_unregister_default_group() - unregisters and frees a child group
		1740	* @group: the group to act on
		1741	*/
		1742	void configfs_unregister_default_group(struct config_group *group)
		1743	{
		1744	configfs_unregister_group(group);
		1745	kfree(group);
		1746	}
		1747	EXPORT_SYMBOL(configfs_unregister_default_group);
		1748
1639	int configfs_register_subsystem(struct configfs_subsystem *subsys)	1749	int configfs_register_subsystem(struct configfs_subsystem *subsys)
1640	{	1750	{
1641	int err;	1751	int err;


diff --git a/fs/dax.c b/fs/dax.c index d1e5cb7311a1..43671b68220e 100644 --- a/fs/dax.c +++ b/fs/dax.c
@@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
541	unsigned long pfn;	541	unsigned long pfn;
542	int result = 0;	542	int result = 0;
543		543
		544	/* dax pmd mappings are broken wrt gup and fork */
		545	if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
		546	return VM_FAULT_FALLBACK;
		547
544	/* Fall back to PTEs if we're going to COW */	548	/* Fall back to PTEs if we're going to COW */
545	if (write && !(vma->vm_flags & VM_SHARED))	549	if (write && !(vma->vm_flags & VM_SHARED))
546	return VM_FAULT_FALLBACK;	550	return VM_FAULT_FALLBACK;


diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a71cea68420..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c
@@ -569,6 +569,8 @@ static int parse_options(char options, struct super_block sb)
569	/* Fall through */	569	/* Fall through */
570	case Opt_dax:	570	case Opt_dax:
571	#ifdef CONFIG_FS_DAX	571	#ifdef CONFIG_FS_DAX
		572	ext2_msg(sb, KERN_WARNING,
		573	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
572	set_opt(sbi->s_mount_opt, DAX);	574	set_opt(sbi->s_mount_opt, DAX);
573	#else	575	#else
574	ext2_msg(sb, KERN_INFO, "dax option not supported");	576	ext2_msg(sb, KERN_INFO, "dax option not supported");


diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 753f4e68b820..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c
@@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block sb, char opt, int token,
1664	}	1664	}
1665	sbi->s_jquota_fmt = m->mount_opt;	1665	sbi->s_jquota_fmt = m->mount_opt;
1666	#endif	1666	#endif
1667	#ifndef CONFIG_FS_DAX
1668	} else if (token == Opt_dax) {	1667	} else if (token == Opt_dax) {
		1668	#ifdef CONFIG_FS_DAX
		1669	ext4_msg(sb, KERN_WARNING,
		1670	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
		1671	sbi->s_mount_opt \|= m->mount_opt;
		1672	#else
1669	ext4_msg(sb, KERN_INFO, "dax option not supported");	1673	ext4_msg(sb, KERN_INFO, "dax option not supported");
1670	return -1;	1674	return -1;
1671	#endif	1675	#endif


diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4afc4d9d2e41..8b2127ffb226 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c
@@ -610,9 +610,9 @@ parse_record:
610	int status = fat_parse_long(inode, &cpos, &bh, &de,	610	int status = fat_parse_long(inode, &cpos, &bh, &de,
611	&unicode, &nr_slots);	611	&unicode, &nr_slots);
612	if (status < 0) {	612	if (status < 0) {
613	ctx->pos = cpos;	613	bh = NULL;
614	ret = status;	614	ret = status;
615	goto out;	615	goto end_of_dir;
616	} else if (status == PARSE_INVALID)	616	} else if (status == PARSE_INVALID)
617	goto record_end;	617	goto record_end;
618	else if (status == PARSE_NOT_LONGNAME)	618	else if (status == PARSE_NOT_LONGNAME)
@@ -654,8 +654,9 @@ parse_record:
654	fill_len = short_len;	654	fill_len = short_len;
655		655
656	start_filldir:	656	start_filldir:
657	if (!fake_offset)	657	ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
658	ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);	658	if (fake_offset && ctx->pos < 2)
		659	ctx->pos = 2;
659		660
660	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {	661	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
661	if (!dir_emit_dot(file, ctx))	662	if (!dir_emit_dot(file, ctx))
@@ -681,14 +682,19 @@ record_end:
681	fake_offset = 0;	682	fake_offset = 0;
682	ctx->pos = cpos;	683	ctx->pos = cpos;
683	goto get_new;	684	goto get_new;
		685
684	end_of_dir:	686	end_of_dir:
685	ctx->pos = cpos;	687	if (fake_offset && cpos < 2)
		688	ctx->pos = 2;
		689	else
		690	ctx->pos = cpos;
686	fill_failed:	691	fill_failed:
687	brelse(bh);	692	brelse(bh);
688	if (unicode)	693	if (unicode)
689	__putname(unicode);	694	__putname(unicode);
690	out:	695	out:
691	mutex_unlock(&sbi->s_lock);	696	mutex_unlock(&sbi->s_lock);
		697
692	return ret;	698	return ret;
693	}	699	}
694		700


diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c
@@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page)
332	* truncation is indicated by end of range being LLONG_MAX	332	* truncation is indicated by end of range being LLONG_MAX
333	* In this case, we first scan the range and release found pages.	333	* In this case, we first scan the range and release found pages.
334	* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv	334	* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
335	* maps and global counts.	335	* maps and global counts. Page faults can not race with truncation
		336	* in this routine. hugetlb_no_page() prevents page faults in the
		337	* truncated range. It checks i_size before allocation, and again after
		338	* with the page table lock for the page held. The same lock must be
		339	* acquired to unmap a page.
336	* hole punch is indicated if end is not LLONG_MAX	340	* hole punch is indicated if end is not LLONG_MAX
337	* In the hole punch case we scan the range and release found pages.	341	* In the hole punch case we scan the range and release found pages.
338	* Only when releasing a page is the associated region/reserv map	342	* Only when releasing a page is the associated region/reserv map
339	* deleted. The region/reserv map for ranges without associated	343	* deleted. The region/reserv map for ranges without associated
340	* pages are not modified.	344	* pages are not modified. Page faults can race with hole punch.
		345	* This is indicated if we find a mapped page.
341	* Note: If the passed end of range value is beyond the end of file, but	346	* Note: If the passed end of range value is beyond the end of file, but
342	* not LLONG_MAX this routine still performs a hole punch operation.	347	* not LLONG_MAX this routine still performs a hole punch operation.
343	*/	348	*/
@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
361	next = start;	366	next = start;
362	while (next < end) {	367	while (next < end) {
363	/*	368	/*
364	* Make sure to never grab more pages that we	369	* Don't grab more pages than the number left in the range.
365	* might possibly need.
366	*/	370	*/
367	if (end - next < lookup_nr)	371	if (end - next < lookup_nr)
368	lookup_nr = end - next;	372	lookup_nr = end - next;
369		373
370	/*	374	/*
371	* This pagevec_lookup() may return pages past 'end',	375	* When no more pages are found, we are done.
372	* so we must check for page->index > end.
373	*/	376	*/
374	if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {	377	if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
375	if (next == start)	378	break;
376	break;
377	next = start;
378	continue;
379	}
380		379
381	for (i = 0; i < pagevec_count(&pvec); ++i) {	380	for (i = 0; i < pagevec_count(&pvec); ++i) {
382	struct page *page = pvec.pages[i];	381	struct page *page = pvec.pages[i];
383	u32 hash;	382	u32 hash;
384		383
		384	/*
		385	* The page (index) could be beyond end. This is
		386	* only possible in the punch hole case as end is
		387	* max page offset in the truncate case.
		388	*/
		389	next = page->index;
		390	if (next >= end)
		391	break;
		392
385	hash = hugetlb_fault_mutex_hash(h, current->mm,	393	hash = hugetlb_fault_mutex_hash(h, current->mm,
386	&pseudo_vma,	394	&pseudo_vma,
387	mapping, next, 0);	395	mapping, next, 0);
388	mutex_lock(&hugetlb_fault_mutex_table[hash]);	396	mutex_lock(&hugetlb_fault_mutex_table[hash]);
389		397
390	lock_page(page);	398	lock_page(page);
391	if (page->index >= end) {	399	if (likely(!page_mapped(page))) {
392	unlock_page(page);
393	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
394	next = end; /* we are done */
395	break;
396	}
397
398	/*
399	* If page is mapped, it was faulted in after being
400	* unmapped. Do nothing in this race case. In the
401	* normal case page is not mapped.
402	*/
403	if (!page_mapped(page)) {
404	bool rsv_on_error = !PagePrivate(page);	400	bool rsv_on_error = !PagePrivate(page);
405	/*	401	/*
406	* We must free the huge page and remove	402	* We must free the huge page and remove
@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
421	hugetlb_fix_reserve_counts(	417	hugetlb_fix_reserve_counts(
422	inode, rsv_on_error);	418	inode, rsv_on_error);
423	}	419	}
		420	} else {
		421	/*
		422	* If page is mapped, it was faulted in after
		423	* being unmapped. It indicates a race between
		424	* hole punch and page fault. Do nothing in
		425	* this case. Getting here in a truncate
		426	* operation is a bug.
		427	*/
		428	BUG_ON(truncate_op);
424	}	429	}
425		430
426	if (page->index > next)
427	next = page->index;
428
429	++next;
430	unlock_page(page);	431	unlock_page(page);
431
432	mutex_unlock(&hugetlb_fault_mutex_table[hash]);	432	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
433	}	433	}
		434	++next;
434	huge_pagevec_release(&pvec);	435	huge_pagevec_release(&pvec);
		436	cond_resched();
435	}	437	}
436		438
437	if (truncate_op)	439	if (truncate_op)
@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
647	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)	649	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
648	i_size_write(inode, offset + len);	650	i_size_write(inode, offset + len);
649	inode->i_ctime = CURRENT_TIME;	651	inode->i_ctime = CURRENT_TIME;
650	spin_lock(&inode->i_lock);
651	inode->i_private = NULL;
652	spin_unlock(&inode->i_lock);
653	out:	652	out:
654	mutex_unlock(&inode->i_mutex);	653	mutex_unlock(&inode->i_mutex);
655	return error;	654	return error;


diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 79b113048eac..0a3f9b594602 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c
@@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
525	switch (rqdata.cmd) {	525	switch (rqdata.cmd) {
526	case NCP_LOCK_EX:	526	case NCP_LOCK_EX:
527	case NCP_LOCK_SH:	527	case NCP_LOCK_SH:
		528	if (rqdata.timeout < 0)
		529	return -EINVAL;
528	if (rqdata.timeout == 0)	530	if (rqdata.timeout == 0)
529	rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;	531	rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
530	else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)	532	else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)


diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b48ac25d8a7..a03f6f433075 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c
@@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir,
372	mlog_errno(status);	372	mlog_errno(status);
373	goto leave;	373	goto leave;
374	}	374	}
		375	/* update inode->i_mode after mask with "umask". */
		376	inode->i_mode = mode;
375		377
376	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,	378	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
377	S_ISDIR(mode),	379	S_ISDIR(mode),