aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/configfs/dir.c110
-rw-r--r--fs/dax.c4
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/fat/dir.c16
-rw-r--r--fs/hugetlbfs/inode.c65
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/ocfs2/namei.c2
11 files changed, 191 insertions, 42 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index da3f32f1a4e4..6ce72d8d1ee1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -46,6 +46,12 @@ config FS_DAX
46 or if unsure, say N. Saying Y will increase the size of the kernel 46 or if unsure, say N. Saying Y will increase the size of the kernel
47 by about 5kB. 47 by about 5kB.
48 48
49config FS_DAX_PMD
50 bool
51 default FS_DAX
52 depends on FS_DAX
53 depends on BROKEN
54
49endif # BLOCK 55endif # BLOCK
50 56
51# Posix ACL utility routines 57# Posix ACL utility routines
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1c7af1..c25639e907bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
390 struct page *page) 390 struct page *page)
391{ 391{
392 const struct block_device_operations *ops = bdev->bd_disk->fops; 392 const struct block_device_operations *ops = bdev->bd_disk->fops;
393 int result = -EOPNOTSUPP;
394
393 if (!ops->rw_page || bdev_get_integrity(bdev)) 395 if (!ops->rw_page || bdev_get_integrity(bdev))
394 return -EOPNOTSUPP; 396 return result;
395 return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); 397
398 result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
399 if (result)
400 return result;
401 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
402 blk_queue_exit(bdev->bd_queue);
403 return result;
396} 404}
397EXPORT_SYMBOL_GPL(bdev_read_page); 405EXPORT_SYMBOL_GPL(bdev_read_page);
398 406
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
421 int result; 429 int result;
422 int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; 430 int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
423 const struct block_device_operations *ops = bdev->bd_disk->fops; 431 const struct block_device_operations *ops = bdev->bd_disk->fops;
432
424 if (!ops->rw_page || bdev_get_integrity(bdev)) 433 if (!ops->rw_page || bdev_get_integrity(bdev))
425 return -EOPNOTSUPP; 434 return -EOPNOTSUPP;
435 result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
436 if (result)
437 return result;
438
426 set_page_writeback(page); 439 set_page_writeback(page);
427 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); 440 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
428 if (result) 441 if (result)
429 end_page_writeback(page); 442 end_page_writeback(page);
430 else 443 else
431 unlock_page(page); 444 unlock_page(page);
445 blk_queue_exit(bdev->bd_queue);
432 return result; 446 return result;
433} 447}
434EXPORT_SYMBOL_GPL(bdev_write_page); 448EXPORT_SYMBOL_GPL(bdev_write_page);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 7a6b02f72787..c0f3da3926a0 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -879,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
879 loff_t pos, eof; 879 loff_t pos, eof;
880 size_t len; 880 size_t len;
881 void *data; 881 void *data;
882 int ret; 882 int ret = -ENOBUFS;
883 883
884 ASSERT(op != NULL); 884 ASSERT(op != NULL);
885 ASSERT(page != NULL); 885 ASSERT(page != NULL);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f200a6..a7a1b218f308 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
1636 .iterate = configfs_readdir, 1636 .iterate = configfs_readdir,
1637}; 1637};
1638 1638
1639/**
1640 * configfs_register_group - creates a parent-child relation between two groups
1641 * @parent_group: parent group
1642 * @group: child group
1643 *
1644 * link groups, creates dentry for the child and attaches it to the
1645 * parent dentry.
1646 *
1647 * Return: 0 on success, negative errno code on error
1648 */
1649int configfs_register_group(struct config_group *parent_group,
1650 struct config_group *group)
1651{
1652 struct configfs_subsystem *subsys = parent_group->cg_subsys;
1653 struct dentry *parent;
1654 int ret;
1655
1656 mutex_lock(&subsys->su_mutex);
1657 link_group(parent_group, group);
1658 mutex_unlock(&subsys->su_mutex);
1659
1660 parent = parent_group->cg_item.ci_dentry;
1661
1662 mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
1663 ret = create_default_group(parent_group, group);
1664 if (!ret) {
1665 spin_lock(&configfs_dirent_lock);
1666 configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
1667 spin_unlock(&configfs_dirent_lock);
1668 }
1669 mutex_unlock(&d_inode(parent)->i_mutex);
1670 return ret;
1671}
1672EXPORT_SYMBOL(configfs_register_group);
1673
1674/**
1675 * configfs_unregister_group() - unregisters a child group from its parent
1676 * @group: parent group to be unregistered
1677 *
1678 * Undoes configfs_register_group()
1679 */
1680void configfs_unregister_group(struct config_group *group)
1681{
1682 struct configfs_subsystem *subsys = group->cg_subsys;
1683 struct dentry *dentry = group->cg_item.ci_dentry;
1684 struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
1685
1686 mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
1687 spin_lock(&configfs_dirent_lock);
1688 configfs_detach_prep(dentry, NULL);
1689 spin_unlock(&configfs_dirent_lock);
1690
1691 configfs_detach_group(&group->cg_item);
1692 d_inode(dentry)->i_flags |= S_DEAD;
1693 dont_mount(dentry);
1694 d_delete(dentry);
1695 mutex_unlock(&d_inode(parent)->i_mutex);
1696
1697 dput(dentry);
1698
1699 mutex_lock(&subsys->su_mutex);
1700 unlink_group(group);
1701 mutex_unlock(&subsys->su_mutex);
1702}
1703EXPORT_SYMBOL(configfs_unregister_group);
1704
1705/**
1706 * configfs_register_default_group() - allocates and registers a child group
1707 * @parent_group: parent group
1708 * @name: child group name
1709 * @item_type: child item type description
1710 *
1711 * boilerplate to allocate and register a child group with its parent. We need
1712 * kzalloc'ed memory because child's default_group is initially empty.
1713 *
1714 * Return: allocated config group or ERR_PTR() on error
1715 */
1716struct config_group *
1717configfs_register_default_group(struct config_group *parent_group,
1718 const char *name,
1719 struct config_item_type *item_type)
1720{
1721 int ret;
1722 struct config_group *group;
1723
1724 group = kzalloc(sizeof(*group), GFP_KERNEL);
1725 if (!group)
1726 return ERR_PTR(-ENOMEM);
1727 config_group_init_type_name(group, name, item_type);
1728
1729 ret = configfs_register_group(parent_group, group);
1730 if (ret) {
1731 kfree(group);
1732 return ERR_PTR(ret);
1733 }
1734 return group;
1735}
1736EXPORT_SYMBOL(configfs_register_default_group);
1737
1738/**
1739 * configfs_unregister_default_group() - unregisters and frees a child group
1740 * @group: the group to act on
1741 */
1742void configfs_unregister_default_group(struct config_group *group)
1743{
1744 configfs_unregister_group(group);
1745 kfree(group);
1746}
1747EXPORT_SYMBOL(configfs_unregister_default_group);
1748
1639int configfs_register_subsystem(struct configfs_subsystem *subsys) 1749int configfs_register_subsystem(struct configfs_subsystem *subsys)
1640{ 1750{
1641 int err; 1751 int err;
diff --git a/fs/dax.c b/fs/dax.c
index d1e5cb7311a1..43671b68220e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
541 unsigned long pfn; 541 unsigned long pfn;
542 int result = 0; 542 int result = 0;
543 543
544 /* dax pmd mappings are broken wrt gup and fork */
545 if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
546 return VM_FAULT_FALLBACK;
547
544 /* Fall back to PTEs if we're going to COW */ 548 /* Fall back to PTEs if we're going to COW */
545 if (write && !(vma->vm_flags & VM_SHARED)) 549 if (write && !(vma->vm_flags & VM_SHARED))
546 return VM_FAULT_FALLBACK; 550 return VM_FAULT_FALLBACK;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3a71cea68420..748d35afc902 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb)
569 /* Fall through */ 569 /* Fall through */
570 case Opt_dax: 570 case Opt_dax:
571#ifdef CONFIG_FS_DAX 571#ifdef CONFIG_FS_DAX
572 ext2_msg(sb, KERN_WARNING,
573 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
572 set_opt(sbi->s_mount_opt, DAX); 574 set_opt(sbi->s_mount_opt, DAX);
573#else 575#else
574 ext2_msg(sb, KERN_INFO, "dax option not supported"); 576 ext2_msg(sb, KERN_INFO, "dax option not supported");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 753f4e68b820..c9ab67da6e5a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1664 } 1664 }
1665 sbi->s_jquota_fmt = m->mount_opt; 1665 sbi->s_jquota_fmt = m->mount_opt;
1666#endif 1666#endif
1667#ifndef CONFIG_FS_DAX
1668 } else if (token == Opt_dax) { 1667 } else if (token == Opt_dax) {
1668#ifdef CONFIG_FS_DAX
1669 ext4_msg(sb, KERN_WARNING,
1670 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1671 sbi->s_mount_opt |= m->mount_opt;
1672#else
1669 ext4_msg(sb, KERN_INFO, "dax option not supported"); 1673 ext4_msg(sb, KERN_INFO, "dax option not supported");
1670 return -1; 1674 return -1;
1671#endif 1675#endif
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4afc4d9d2e41..8b2127ffb226 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -610,9 +610,9 @@ parse_record:
610 int status = fat_parse_long(inode, &cpos, &bh, &de, 610 int status = fat_parse_long(inode, &cpos, &bh, &de,
611 &unicode, &nr_slots); 611 &unicode, &nr_slots);
612 if (status < 0) { 612 if (status < 0) {
613 ctx->pos = cpos; 613 bh = NULL;
614 ret = status; 614 ret = status;
615 goto out; 615 goto end_of_dir;
616 } else if (status == PARSE_INVALID) 616 } else if (status == PARSE_INVALID)
617 goto record_end; 617 goto record_end;
618 else if (status == PARSE_NOT_LONGNAME) 618 else if (status == PARSE_NOT_LONGNAME)
@@ -654,8 +654,9 @@ parse_record:
654 fill_len = short_len; 654 fill_len = short_len;
655 655
656start_filldir: 656start_filldir:
657 if (!fake_offset) 657 ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
658 ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); 658 if (fake_offset && ctx->pos < 2)
659 ctx->pos = 2;
659 660
660 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { 661 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
661 if (!dir_emit_dot(file, ctx)) 662 if (!dir_emit_dot(file, ctx))
@@ -681,14 +682,19 @@ record_end:
681 fake_offset = 0; 682 fake_offset = 0;
682 ctx->pos = cpos; 683 ctx->pos = cpos;
683 goto get_new; 684 goto get_new;
685
684end_of_dir: 686end_of_dir:
685 ctx->pos = cpos; 687 if (fake_offset && cpos < 2)
688 ctx->pos = 2;
689 else
690 ctx->pos = cpos;
686fill_failed: 691fill_failed:
687 brelse(bh); 692 brelse(bh);
688 if (unicode) 693 if (unicode)
689 __putname(unicode); 694 __putname(unicode);
690out: 695out:
691 mutex_unlock(&sbi->s_lock); 696 mutex_unlock(&sbi->s_lock);
697
692 return ret; 698 return ret;
693} 699}
694 700
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 316adb968b65..de4bdfac0cec 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page)
332 * truncation is indicated by end of range being LLONG_MAX 332 * truncation is indicated by end of range being LLONG_MAX
333 * In this case, we first scan the range and release found pages. 333 * In this case, we first scan the range and release found pages.
334 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv 334 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
335 * maps and global counts. 335 * maps and global counts. Page faults can not race with truncation
336 * in this routine. hugetlb_no_page() prevents page faults in the
337 * truncated range. It checks i_size before allocation, and again after
338 * with the page table lock for the page held. The same lock must be
339 * acquired to unmap a page.
336 * hole punch is indicated if end is not LLONG_MAX 340 * hole punch is indicated if end is not LLONG_MAX
337 * In the hole punch case we scan the range and release found pages. 341 * In the hole punch case we scan the range and release found pages.
338 * Only when releasing a page is the associated region/reserv map 342 * Only when releasing a page is the associated region/reserv map
339 * deleted. The region/reserv map for ranges without associated 343 * deleted. The region/reserv map for ranges without associated
340 * pages are not modified. 344 * pages are not modified. Page faults can race with hole punch.
345 * This is indicated if we find a mapped page.
341 * Note: If the passed end of range value is beyond the end of file, but 346 * Note: If the passed end of range value is beyond the end of file, but
342 * not LLONG_MAX this routine still performs a hole punch operation. 347 * not LLONG_MAX this routine still performs a hole punch operation.
343 */ 348 */
@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
361 next = start; 366 next = start;
362 while (next < end) { 367 while (next < end) {
363 /* 368 /*
364 * Make sure to never grab more pages that we 369 * Don't grab more pages than the number left in the range.
365 * might possibly need.
366 */ 370 */
367 if (end - next < lookup_nr) 371 if (end - next < lookup_nr)
368 lookup_nr = end - next; 372 lookup_nr = end - next;
369 373
370 /* 374 /*
371 * This pagevec_lookup() may return pages past 'end', 375 * When no more pages are found, we are done.
372 * so we must check for page->index > end.
373 */ 376 */
374 if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { 377 if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
375 if (next == start) 378 break;
376 break;
377 next = start;
378 continue;
379 }
380 379
381 for (i = 0; i < pagevec_count(&pvec); ++i) { 380 for (i = 0; i < pagevec_count(&pvec); ++i) {
382 struct page *page = pvec.pages[i]; 381 struct page *page = pvec.pages[i];
383 u32 hash; 382 u32 hash;
384 383
384 /*
385 * The page (index) could be beyond end. This is
386 * only possible in the punch hole case as end is
387 * max page offset in the truncate case.
388 */
389 next = page->index;
390 if (next >= end)
391 break;
392
385 hash = hugetlb_fault_mutex_hash(h, current->mm, 393 hash = hugetlb_fault_mutex_hash(h, current->mm,
386 &pseudo_vma, 394 &pseudo_vma,
387 mapping, next, 0); 395 mapping, next, 0);
388 mutex_lock(&hugetlb_fault_mutex_table[hash]); 396 mutex_lock(&hugetlb_fault_mutex_table[hash]);
389 397
390 lock_page(page); 398 lock_page(page);
391 if (page->index >= end) { 399 if (likely(!page_mapped(page))) {
392 unlock_page(page);
393 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
394 next = end; /* we are done */
395 break;
396 }
397
398 /*
399 * If page is mapped, it was faulted in after being
400 * unmapped. Do nothing in this race case. In the
401 * normal case page is not mapped.
402 */
403 if (!page_mapped(page)) {
404 bool rsv_on_error = !PagePrivate(page); 400 bool rsv_on_error = !PagePrivate(page);
405 /* 401 /*
406 * We must free the huge page and remove 402 * We must free the huge page and remove
@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
421 hugetlb_fix_reserve_counts( 417 hugetlb_fix_reserve_counts(
422 inode, rsv_on_error); 418 inode, rsv_on_error);
423 } 419 }
420 } else {
421 /*
422 * If page is mapped, it was faulted in after
423 * being unmapped. It indicates a race between
424 * hole punch and page fault. Do nothing in
425 * this case. Getting here in a truncate
426 * operation is a bug.
427 */
428 BUG_ON(truncate_op);
424 } 429 }
425 430
426 if (page->index > next)
427 next = page->index;
428
429 ++next;
430 unlock_page(page); 431 unlock_page(page);
431
432 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 432 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
433 } 433 }
434 ++next;
434 huge_pagevec_release(&pvec); 435 huge_pagevec_release(&pvec);
436 cond_resched();
435 } 437 }
436 438
437 if (truncate_op) 439 if (truncate_op)
@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
647 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) 649 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
648 i_size_write(inode, offset + len); 650 i_size_write(inode, offset + len);
649 inode->i_ctime = CURRENT_TIME; 651 inode->i_ctime = CURRENT_TIME;
650 spin_lock(&inode->i_lock);
651 inode->i_private = NULL;
652 spin_unlock(&inode->i_lock);
653out: 652out:
654 mutex_unlock(&inode->i_mutex); 653 mutex_unlock(&inode->i_mutex);
655 return error; 654 return error;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 79b113048eac..0a3f9b594602 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
525 switch (rqdata.cmd) { 525 switch (rqdata.cmd) {
526 case NCP_LOCK_EX: 526 case NCP_LOCK_EX:
527 case NCP_LOCK_SH: 527 case NCP_LOCK_SH:
528 if (rqdata.timeout < 0)
529 return -EINVAL;
528 if (rqdata.timeout == 0) 530 if (rqdata.timeout == 0)
529 rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; 531 rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
530 else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) 532 else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3b48ac25d8a7..a03f6f433075 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir,
372 mlog_errno(status); 372 mlog_errno(status);
373 goto leave; 373 goto leave;
374 } 374 }
375 /* update inode->i_mode after mask with "umask". */
376 inode->i_mode = mode;
375 377
376 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 378 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
377 S_ISDIR(mode), 379 S_ISDIR(mode),