diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-23 11:24:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-23 11:31:58 -0400 |
commit | 03fe2debbb2771fb90881e4ce8109b09cf772a5c (patch) | |
tree | fbaf8738296b2e9dcba81c6daef2d515b6c4948c /fs | |
parent | 6686c459e1449a3ee5f3fd313b0a559ace7a700e (diff) | |
parent | f36b7534b83357cf52e747905de6d65b4f7c2512 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Fun set of conflict resolutions here...
For the mac80211 stuff, these were fortunately just parallel
adds. Trivially resolved.
In drivers/net/phy/phy.c we had a bug fix in 'net' that moved the
function phy_disable_interrupts() earlier in the file, whilst in
'net-next' the phy_error() call from this function was removed.
In net/ipv4/xfrm4_policy.c, David Ahern's changes to remove the
'rt_table_id' member of rtable collided with a bug fix in 'net' that
added a new struct member "rt_mtu_locked" which needs to be copied
over here.
The mlxsw driver conflict consisted of net-next separating
the span code and definitions into separate files, whilst
a 'net' bug fix made some changes to that moved code.
The mlx5 infiniband conflict resolution was quite non-trivial,
the RDMA tree's merge commit was used as a guide here, and
here are their notes:
====================
Due to bug fixes found by the syzkaller bot and taken into the for-rc
branch after development for the 4.17 merge window had already started
being taken into the for-next branch, there were fairly non-trivial
merge issues that would need to be resolved between the for-rc branch
and the for-next branch. This merge resolves those conflicts and
provides a unified base upon which ongoing development for 4.17 can
be based.
Conflicts:
drivers/infiniband/hw/mlx5/main.c - Commit 42cea83f9524
(IB/mlx5: Fix cleanup order on unload) added to for-rc and
commit b5ca15ad7e61 (IB/mlx5: Add proper representors support)
add as part of the devel cycle both needed to modify the
init/de-init functions used by mlx5. To support the new
representors, the new functions added by the cleanup patch
needed to be made non-static, and the init/de-init list
added by the representors patch needed to be modified to
match the init/de-init list changes made by the cleanup
patch.
Updates:
drivers/infiniband/hw/mlx5/mlx5_ib.h - Update function
prototypes added by representors patch to reflect new function
names as changed by cleanup patch
drivers/infiniband/hw/mlx5/ib_rep.c - Update init/de-init
stage list to match new order from cleanup patch
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 44 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 1 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 1 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 8 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 20 | ||||
-rw-r--r-- | fs/dcache.c | 21 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 3 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 17 | ||||
-rw-r--r-- | fs/namei.c | 5 | ||||
-rw-r--r-- | fs/nfs/direct.c | 2 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 13 | ||||
-rw-r--r-- | fs/nfs/super.c | 2 | ||||
-rw-r--r-- | fs/nfs/write.c | 83 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 62 | ||||
-rw-r--r-- | fs/overlayfs/Kconfig | 14 | ||||
-rw-r--r-- | fs/overlayfs/export.c | 216 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 58 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 6 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 1 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 1 | ||||
-rw-r--r-- | fs/sysfs/symlink.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 42 |
22 files changed, 436 insertions, 185 deletions
@@ -68,9 +68,9 @@ struct aio_ring { | |||
68 | #define AIO_RING_PAGES 8 | 68 | #define AIO_RING_PAGES 8 |
69 | 69 | ||
70 | struct kioctx_table { | 70 | struct kioctx_table { |
71 | struct rcu_head rcu; | 71 | struct rcu_head rcu; |
72 | unsigned nr; | 72 | unsigned nr; |
73 | struct kioctx *table[]; | 73 | struct kioctx __rcu *table[]; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct kioctx_cpu { | 76 | struct kioctx_cpu { |
@@ -115,7 +115,8 @@ struct kioctx { | |||
115 | struct page **ring_pages; | 115 | struct page **ring_pages; |
116 | long nr_pages; | 116 | long nr_pages; |
117 | 117 | ||
118 | struct work_struct free_work; | 118 | struct rcu_head free_rcu; |
119 | struct work_struct free_work; /* see free_ioctx() */ | ||
119 | 120 | ||
120 | /* | 121 | /* |
121 | * signals when all in-flight requests are done | 122 | * signals when all in-flight requests are done |
@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) | |||
329 | for (i = 0; i < table->nr; i++) { | 330 | for (i = 0; i < table->nr; i++) { |
330 | struct kioctx *ctx; | 331 | struct kioctx *ctx; |
331 | 332 | ||
332 | ctx = table->table[i]; | 333 | ctx = rcu_dereference(table->table[i]); |
333 | if (ctx && ctx->aio_ring_file == file) { | 334 | if (ctx && ctx->aio_ring_file == file) { |
334 | if (!atomic_read(&ctx->dead)) { | 335 | if (!atomic_read(&ctx->dead)) { |
335 | ctx->user_id = ctx->mmap_base = vma->vm_start; | 336 | ctx->user_id = ctx->mmap_base = vma->vm_start; |
@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) | |||
588 | return cancel(&kiocb->common); | 589 | return cancel(&kiocb->common); |
589 | } | 590 | } |
590 | 591 | ||
592 | /* | ||
593 | * free_ioctx() should be RCU delayed to synchronize against the RCU | ||
594 | * protected lookup_ioctx() and also needs process context to call | ||
595 | * aio_free_ring(), so the double bouncing through kioctx->free_rcu and | ||
596 | * ->free_work. | ||
597 | */ | ||
591 | static void free_ioctx(struct work_struct *work) | 598 | static void free_ioctx(struct work_struct *work) |
592 | { | 599 | { |
593 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); | 600 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); |
@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work) | |||
601 | kmem_cache_free(kioctx_cachep, ctx); | 608 | kmem_cache_free(kioctx_cachep, ctx); |
602 | } | 609 | } |
603 | 610 | ||
611 | static void free_ioctx_rcufn(struct rcu_head *head) | ||
612 | { | ||
613 | struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); | ||
614 | |||
615 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
616 | schedule_work(&ctx->free_work); | ||
617 | } | ||
618 | |||
604 | static void free_ioctx_reqs(struct percpu_ref *ref) | 619 | static void free_ioctx_reqs(struct percpu_ref *ref) |
605 | { | 620 | { |
606 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); | 621 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); |
@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |||
609 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) | 624 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) |
610 | complete(&ctx->rq_wait->comp); | 625 | complete(&ctx->rq_wait->comp); |
611 | 626 | ||
612 | INIT_WORK(&ctx->free_work, free_ioctx); | 627 | /* Synchronize against RCU protected table->table[] dereferences */ |
613 | schedule_work(&ctx->free_work); | 628 | call_rcu(&ctx->free_rcu, free_ioctx_rcufn); |
614 | } | 629 | } |
615 | 630 | ||
616 | /* | 631 | /* |
@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |||
651 | while (1) { | 666 | while (1) { |
652 | if (table) | 667 | if (table) |
653 | for (i = 0; i < table->nr; i++) | 668 | for (i = 0; i < table->nr; i++) |
654 | if (!table->table[i]) { | 669 | if (!rcu_access_pointer(table->table[i])) { |
655 | ctx->id = i; | 670 | ctx->id = i; |
656 | table->table[i] = ctx; | 671 | rcu_assign_pointer(table->table[i], ctx); |
657 | spin_unlock(&mm->ioctx_lock); | 672 | spin_unlock(&mm->ioctx_lock); |
658 | 673 | ||
659 | /* While kioctx setup is in progress, | 674 | /* While kioctx setup is in progress, |
@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, | |||
834 | } | 849 | } |
835 | 850 | ||
836 | table = rcu_dereference_raw(mm->ioctx_table); | 851 | table = rcu_dereference_raw(mm->ioctx_table); |
837 | WARN_ON(ctx != table->table[ctx->id]); | 852 | WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); |
838 | table->table[ctx->id] = NULL; | 853 | RCU_INIT_POINTER(table->table[ctx->id], NULL); |
839 | spin_unlock(&mm->ioctx_lock); | 854 | spin_unlock(&mm->ioctx_lock); |
840 | 855 | ||
841 | /* percpu_ref_kill() will do the necessary call_rcu() */ | 856 | /* free_ioctx_reqs() will do the necessary RCU synchronization */ |
842 | wake_up_all(&ctx->wait); | 857 | wake_up_all(&ctx->wait); |
843 | 858 | ||
844 | /* | 859 | /* |
@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm) | |||
880 | 895 | ||
881 | skipped = 0; | 896 | skipped = 0; |
882 | for (i = 0; i < table->nr; ++i) { | 897 | for (i = 0; i < table->nr; ++i) { |
883 | struct kioctx *ctx = table->table[i]; | 898 | struct kioctx *ctx = |
899 | rcu_dereference_protected(table->table[i], true); | ||
884 | 900 | ||
885 | if (!ctx) { | 901 | if (!ctx) { |
886 | skipped++; | 902 | skipped++; |
@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) | |||
1069 | if (!table || id >= table->nr) | 1085 | if (!table || id >= table->nr) |
1070 | goto out; | 1086 | goto out; |
1071 | 1087 | ||
1072 | ctx = table->table[id]; | 1088 | ctx = rcu_dereference(table->table[id]); |
1073 | if (ctx && ctx->user_id == ctx_id) { | 1089 | if (ctx && ctx->user_id == ctx_id) { |
1074 | percpu_ref_get(&ctx->users); | 1090 | percpu_ref_get(&ctx->users); |
1075 | ret = ctx; | 1091 | ret = ctx; |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f94b2d8c744a..26484648d090 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1519,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) | |||
1519 | if (!node) | 1519 | if (!node) |
1520 | break; | 1520 | break; |
1521 | bytenr = node->val; | 1521 | bytenr = node->val; |
1522 | shared.share_count = 0; | ||
1522 | cond_resched(); | 1523 | cond_resched(); |
1523 | } | 1524 | } |
1524 | 1525 | ||
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index dec0907dfb8a..fcfc20de2df3 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, | |||
1370 | stripe_start = stripe->physical; | 1370 | stripe_start = stripe->physical; |
1371 | if (physical >= stripe_start && | 1371 | if (physical >= stripe_start && |
1372 | physical < stripe_start + rbio->stripe_len && | 1372 | physical < stripe_start + rbio->stripe_len && |
1373 | stripe->dev->bdev && | ||
1373 | bio->bi_disk == stripe->dev->bdev->bd_disk && | 1374 | bio->bi_disk == stripe->dev->bdev->bd_disk && |
1374 | bio->bi_partno == stripe->dev->bdev->bd_partno) { | 1375 | bio->bi_partno == stripe->dev->bdev->bd_partno) { |
1375 | return i; | 1376 | return i; |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index d11c70bff5a9..a8bafed931f4 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -423,7 +423,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, | |||
423 | { | 423 | { |
424 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 424 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
425 | 425 | ||
426 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->nodesize); | 426 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); |
427 | } | 427 | } |
428 | 428 | ||
429 | BTRFS_ATTR(, nodesize, btrfs_nodesize_show); | 429 | BTRFS_ATTR(, nodesize, btrfs_nodesize_show); |
@@ -433,7 +433,8 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | |||
433 | { | 433 | { |
434 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 434 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
435 | 435 | ||
436 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); | 436 | return snprintf(buf, PAGE_SIZE, "%u\n", |
437 | fs_info->super_copy->sectorsize); | ||
437 | } | 438 | } |
438 | 439 | ||
439 | BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); | 440 | BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); |
@@ -443,7 +444,8 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | |||
443 | { | 444 | { |
444 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 445 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
445 | 446 | ||
446 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); | 447 | return snprintf(buf, PAGE_SIZE, "%u\n", |
448 | fs_info->super_copy->sectorsize); | ||
447 | } | 449 | } |
448 | 450 | ||
449 | BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); | 451 | BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9220f004001c..04f07144b45c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -1722,23 +1722,19 @@ static void update_super_roots(struct btrfs_fs_info *fs_info) | |||
1722 | 1722 | ||
1723 | super = fs_info->super_copy; | 1723 | super = fs_info->super_copy; |
1724 | 1724 | ||
1725 | /* update latest btrfs_super_block::chunk_root refs */ | ||
1726 | root_item = &fs_info->chunk_root->root_item; | 1725 | root_item = &fs_info->chunk_root->root_item; |
1727 | btrfs_set_super_chunk_root(super, root_item->bytenr); | 1726 | super->chunk_root = root_item->bytenr; |
1728 | btrfs_set_super_chunk_root_generation(super, root_item->generation); | 1727 | super->chunk_root_generation = root_item->generation; |
1729 | btrfs_set_super_chunk_root_level(super, root_item->level); | 1728 | super->chunk_root_level = root_item->level; |
1730 | 1729 | ||
1731 | /* update latest btrfs_super_block::root refs */ | ||
1732 | root_item = &fs_info->tree_root->root_item; | 1730 | root_item = &fs_info->tree_root->root_item; |
1733 | btrfs_set_super_root(super, root_item->bytenr); | 1731 | super->root = root_item->bytenr; |
1734 | btrfs_set_super_generation(super, root_item->generation); | 1732 | super->generation = root_item->generation; |
1735 | btrfs_set_super_root_level(super, root_item->level); | 1733 | super->root_level = root_item->level; |
1736 | |||
1737 | if (btrfs_test_opt(fs_info, SPACE_CACHE)) | 1734 | if (btrfs_test_opt(fs_info, SPACE_CACHE)) |
1738 | btrfs_set_super_cache_generation(super, root_item->generation); | 1735 | super->cache_generation = root_item->generation; |
1739 | if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) | 1736 | if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) |
1740 | btrfs_set_super_uuid_tree_generation(super, | 1737 | super->uuid_tree_generation = root_item->generation; |
1741 | root_item->generation); | ||
1742 | } | 1738 | } |
1743 | 1739 | ||
1744 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1740 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
diff --git a/fs/dcache.c b/fs/dcache.c index 7c38f39958bc..8945e6cabd93 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -647,11 +647,16 @@ again: | |||
647 | spin_unlock(&parent->d_lock); | 647 | spin_unlock(&parent->d_lock); |
648 | goto again; | 648 | goto again; |
649 | } | 649 | } |
650 | rcu_read_unlock(); | 650 | if (parent != dentry) { |
651 | if (parent != dentry) | ||
652 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 651 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
653 | else | 652 | if (unlikely(dentry->d_lockref.count < 0)) { |
653 | spin_unlock(&parent->d_lock); | ||
654 | parent = NULL; | ||
655 | } | ||
656 | } else { | ||
654 | parent = NULL; | 657 | parent = NULL; |
658 | } | ||
659 | rcu_read_unlock(); | ||
655 | return parent; | 660 | return parent; |
656 | } | 661 | } |
657 | 662 | ||
@@ -2474,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, | |||
2474 | 2479 | ||
2475 | retry: | 2480 | retry: |
2476 | rcu_read_lock(); | 2481 | rcu_read_lock(); |
2477 | seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; | 2482 | seq = smp_load_acquire(&parent->d_inode->i_dir_seq); |
2478 | r_seq = read_seqbegin(&rename_lock); | 2483 | r_seq = read_seqbegin(&rename_lock); |
2479 | dentry = __d_lookup_rcu(parent, name, &d_seq); | 2484 | dentry = __d_lookup_rcu(parent, name, &d_seq); |
2480 | if (unlikely(dentry)) { | 2485 | if (unlikely(dentry)) { |
@@ -2495,8 +2500,14 @@ retry: | |||
2495 | rcu_read_unlock(); | 2500 | rcu_read_unlock(); |
2496 | goto retry; | 2501 | goto retry; |
2497 | } | 2502 | } |
2503 | |||
2504 | if (unlikely(seq & 1)) { | ||
2505 | rcu_read_unlock(); | ||
2506 | goto retry; | ||
2507 | } | ||
2508 | |||
2498 | hlist_bl_lock(b); | 2509 | hlist_bl_lock(b); |
2499 | if (unlikely(parent->d_inode->i_dir_seq != seq)) { | 2510 | if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { |
2500 | hlist_bl_unlock(b); | 2511 | hlist_bl_unlock(b); |
2501 | rcu_read_unlock(); | 2512 | rcu_read_unlock(); |
2502 | goto retry; | 2513 | goto retry; |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 86d6a4435c87..51f940e76c5e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -807,9 +807,6 @@ do_alloc: | |||
807 | iomap->length = hole_size(inode, lblock, &mp); | 807 | iomap->length = hole_size(inode, lblock, &mp); |
808 | else | 808 | else |
809 | iomap->length = size - pos; | 809 | iomap->length = size - pos; |
810 | } else { | ||
811 | if (height <= ip->i_height) | ||
812 | iomap->length = hole_size(inode, lblock, &mp); | ||
813 | } | 810 | } |
814 | goto out_release; | 811 | goto out_release; |
815 | } | 812 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8fe1b0aa2896..b9a254dcc0e7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec) | |||
108 | pagevec_reinit(pvec); | 108 | pagevec_reinit(pvec); |
109 | } | 109 | } |
110 | 110 | ||
111 | /* | ||
112 | * Mask used when checking the page offset value passed in via system | ||
113 | * calls. This value will be converted to a loff_t which is signed. | ||
114 | * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the | ||
115 | * value. The extra bit (- 1 in the shift value) is to take the sign | ||
116 | * bit into account. | ||
117 | */ | ||
118 | #define PGOFF_LOFFT_MAX \ | ||
119 | (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) | ||
120 | |||
111 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 121 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
112 | { | 122 | { |
113 | struct inode *inode = file_inode(file); | 123 | struct inode *inode = file_inode(file); |
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
127 | vma->vm_ops = &hugetlb_vm_ops; | 137 | vma->vm_ops = &hugetlb_vm_ops; |
128 | 138 | ||
129 | /* | 139 | /* |
130 | * Offset passed to mmap (before page shift) could have been | 140 | * page based offset in vm_pgoff could be sufficiently large to |
131 | * negative when represented as a (l)off_t. | 141 | * overflow a (l)off_t when converted to byte offset. |
132 | */ | 142 | */ |
133 | if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) | 143 | if (vma->vm_pgoff & PGOFF_LOFFT_MAX) |
134 | return -EINVAL; | 144 | return -EINVAL; |
135 | 145 | ||
146 | /* must be huge page aligned */ | ||
136 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) | 147 | if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) |
137 | return -EINVAL; | 148 | return -EINVAL; |
138 | 149 | ||
diff --git a/fs/namei.c b/fs/namei.c index 921ae32dbc80..cafa365eeb70 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd) | |||
559 | static bool path_connected(const struct path *path) | 559 | static bool path_connected(const struct path *path) |
560 | { | 560 | { |
561 | struct vfsmount *mnt = path->mnt; | 561 | struct vfsmount *mnt = path->mnt; |
562 | struct super_block *sb = mnt->mnt_sb; | ||
562 | 563 | ||
563 | /* Only bind mounts can have disconnected paths */ | 564 | /* Bind mounts and multi-root filesystems can have disconnected paths */ |
564 | if (mnt->mnt_root == mnt->mnt_sb->s_root) | 565 | if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) |
565 | return true; | 566 | return true; |
566 | 567 | ||
567 | return is_subdir(path->dentry, mnt->mnt_root); | 568 | return is_subdir(path->dentry, mnt->mnt_root); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8c10b0562e75..621c517b325c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -86,10 +86,10 @@ struct nfs_direct_req { | |||
86 | struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; | 86 | struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; |
87 | int mirror_count; | 87 | int mirror_count; |
88 | 88 | ||
89 | loff_t io_start; /* Start offset for I/O */ | ||
89 | ssize_t count, /* bytes actually processed */ | 90 | ssize_t count, /* bytes actually processed */ |
90 | max_count, /* max expected count */ | 91 | max_count, /* max expected count */ |
91 | bytes_left, /* bytes left to be sent */ | 92 | bytes_left, /* bytes left to be sent */ |
92 | io_start, /* start of IO */ | ||
93 | error; /* any reported error */ | 93 | error; /* any reported error */ |
94 | struct completion completion; /* wait for i/o completion */ | 94 | struct completion completion; /* wait for i/o completion */ |
95 | 95 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c13e826614b5..ee723aa153a3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) | |||
292 | void | 292 | void |
293 | pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | 293 | pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) |
294 | { | 294 | { |
295 | struct inode *inode = lo->plh_inode; | 295 | struct inode *inode; |
296 | 296 | ||
297 | if (!lo) | ||
298 | return; | ||
299 | inode = lo->plh_inode; | ||
297 | pnfs_layoutreturn_before_put_layout_hdr(lo); | 300 | pnfs_layoutreturn_before_put_layout_hdr(lo); |
298 | 301 | ||
299 | if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 302 | if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { |
@@ -1241,10 +1244,12 @@ retry: | |||
1241 | spin_lock(&ino->i_lock); | 1244 | spin_lock(&ino->i_lock); |
1242 | lo = nfsi->layout; | 1245 | lo = nfsi->layout; |
1243 | if (!lo || !pnfs_layout_is_valid(lo) || | 1246 | if (!lo || !pnfs_layout_is_valid(lo) || |
1244 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) | 1247 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1248 | lo = NULL; | ||
1245 | goto out_noroc; | 1249 | goto out_noroc; |
1250 | } | ||
1251 | pnfs_get_layout_hdr(lo); | ||
1246 | if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { | 1252 | if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { |
1247 | pnfs_get_layout_hdr(lo); | ||
1248 | spin_unlock(&ino->i_lock); | 1253 | spin_unlock(&ino->i_lock); |
1249 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, | 1254 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, |
1250 | TASK_UNINTERRUPTIBLE); | 1255 | TASK_UNINTERRUPTIBLE); |
@@ -1312,10 +1317,12 @@ out_noroc: | |||
1312 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 1317 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; |
1313 | if (ld->prepare_layoutreturn) | 1318 | if (ld->prepare_layoutreturn) |
1314 | ld->prepare_layoutreturn(args); | 1319 | ld->prepare_layoutreturn(args); |
1320 | pnfs_put_layout_hdr(lo); | ||
1315 | return true; | 1321 | return true; |
1316 | } | 1322 | } |
1317 | if (layoutreturn) | 1323 | if (layoutreturn) |
1318 | pnfs_send_layoutreturn(lo, &stateid, iomode, true); | 1324 | pnfs_send_layoutreturn(lo, &stateid, iomode, true); |
1325 | pnfs_put_layout_hdr(lo); | ||
1319 | return false; | 1326 | return false; |
1320 | } | 1327 | } |
1321 | 1328 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 29bacdc56f6a..5e470e233c83 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, | |||
2631 | /* initial superblock/root creation */ | 2631 | /* initial superblock/root creation */ |
2632 | mount_info->fill_super(s, mount_info); | 2632 | mount_info->fill_super(s, mount_info); |
2633 | nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); | 2633 | nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); |
2634 | if (!(server->flags & NFS_MOUNT_UNSHARED)) | ||
2635 | s->s_iflags |= SB_I_MULTIROOT; | ||
2634 | } | 2636 | } |
2635 | 2637 | ||
2636 | mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); | 2638 | mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7428a669d7a7..e7d8ceae8f26 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head, | |||
1876 | return status; | 1876 | return status; |
1877 | } | 1877 | } |
1878 | 1878 | ||
1879 | int nfs_commit_inode(struct inode *inode, int how) | 1879 | static int __nfs_commit_inode(struct inode *inode, int how, |
1880 | struct writeback_control *wbc) | ||
1880 | { | 1881 | { |
1881 | LIST_HEAD(head); | 1882 | LIST_HEAD(head); |
1882 | struct nfs_commit_info cinfo; | 1883 | struct nfs_commit_info cinfo; |
1883 | int may_wait = how & FLUSH_SYNC; | 1884 | int may_wait = how & FLUSH_SYNC; |
1884 | int error = 0; | 1885 | int ret, nscan; |
1885 | int res; | ||
1886 | 1886 | ||
1887 | nfs_init_cinfo_from_inode(&cinfo, inode); | 1887 | nfs_init_cinfo_from_inode(&cinfo, inode); |
1888 | nfs_commit_begin(cinfo.mds); | 1888 | nfs_commit_begin(cinfo.mds); |
1889 | res = nfs_scan_commit(inode, &head, &cinfo); | 1889 | for (;;) { |
1890 | if (res) | 1890 | ret = nscan = nfs_scan_commit(inode, &head, &cinfo); |
1891 | error = nfs_generic_commit_list(inode, &head, how, &cinfo); | 1891 | if (ret <= 0) |
1892 | break; | ||
1893 | ret = nfs_generic_commit_list(inode, &head, how, &cinfo); | ||
1894 | if (ret < 0) | ||
1895 | break; | ||
1896 | ret = 0; | ||
1897 | if (wbc && wbc->sync_mode == WB_SYNC_NONE) { | ||
1898 | if (nscan < wbc->nr_to_write) | ||
1899 | wbc->nr_to_write -= nscan; | ||
1900 | else | ||
1901 | wbc->nr_to_write = 0; | ||
1902 | } | ||
1903 | if (nscan < INT_MAX) | ||
1904 | break; | ||
1905 | cond_resched(); | ||
1906 | } | ||
1892 | nfs_commit_end(cinfo.mds); | 1907 | nfs_commit_end(cinfo.mds); |
1893 | if (res == 0) | 1908 | if (ret || !may_wait) |
1894 | return res; | 1909 | return ret; |
1895 | if (error < 0) | 1910 | return wait_on_commit(cinfo.mds); |
1896 | goto out_error; | 1911 | } |
1897 | if (!may_wait) | 1912 | |
1898 | goto out_mark_dirty; | 1913 | int nfs_commit_inode(struct inode *inode, int how) |
1899 | error = wait_on_commit(cinfo.mds); | 1914 | { |
1900 | if (error < 0) | 1915 | return __nfs_commit_inode(inode, how, NULL); |
1901 | return error; | ||
1902 | return res; | ||
1903 | out_error: | ||
1904 | res = error; | ||
1905 | /* Note: If we exit without ensuring that the commit is complete, | ||
1906 | * we must mark the inode as dirty. Otherwise, future calls to | ||
1907 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure | ||
1908 | * that the data is on the disk. | ||
1909 | */ | ||
1910 | out_mark_dirty: | ||
1911 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
1912 | return res; | ||
1913 | } | 1916 | } |
1914 | EXPORT_SYMBOL_GPL(nfs_commit_inode); | 1917 | EXPORT_SYMBOL_GPL(nfs_commit_inode); |
1915 | 1918 | ||
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1919 | int flags = FLUSH_SYNC; | 1922 | int flags = FLUSH_SYNC; |
1920 | int ret = 0; | 1923 | int ret = 0; |
1921 | 1924 | ||
1922 | /* no commits means nothing needs to be done */ | ||
1923 | if (!atomic_long_read(&nfsi->commit_info.ncommit)) | ||
1924 | return ret; | ||
1925 | |||
1926 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1925 | if (wbc->sync_mode == WB_SYNC_NONE) { |
1926 | /* no commits means nothing needs to be done */ | ||
1927 | if (!atomic_long_read(&nfsi->commit_info.ncommit)) | ||
1928 | goto check_requests_outstanding; | ||
1929 | |||
1927 | /* Don't commit yet if this is a non-blocking flush and there | 1930 | /* Don't commit yet if this is a non-blocking flush and there |
1928 | * are a lot of outstanding writes for this mapping. | 1931 | * are a lot of outstanding writes for this mapping. |
1929 | */ | 1932 | */ |
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1934 | flags = 0; | 1937 | flags = 0; |
1935 | } | 1938 | } |
1936 | 1939 | ||
1937 | ret = nfs_commit_inode(inode, flags); | 1940 | ret = __nfs_commit_inode(inode, flags, wbc); |
1938 | if (ret >= 0) { | 1941 | if (!ret) { |
1939 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1942 | if (flags & FLUSH_SYNC) |
1940 | if (ret < wbc->nr_to_write) | 1943 | return 0; |
1941 | wbc->nr_to_write -= ret; | 1944 | } else if (atomic_long_read(&nfsi->commit_info.ncommit)) |
1942 | else | 1945 | goto out_mark_dirty; |
1943 | wbc->nr_to_write = 0; | 1946 | |
1944 | } | 1947 | check_requests_outstanding: |
1945 | return 0; | 1948 | if (!atomic_read(&nfsi->commit_info.rpcs_out)) |
1946 | } | 1949 | return ret; |
1947 | out_mark_dirty: | 1950 | out_mark_dirty: |
1948 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 1951 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
1949 | return ret; | 1952 | return ret; |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 150521c9671b..61b770e39809 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl) | |||
268 | kfree(nbl); | 268 | kfree(nbl); |
269 | } | 269 | } |
270 | 270 | ||
271 | static void | ||
272 | remove_blocked_locks(struct nfs4_lockowner *lo) | ||
273 | { | ||
274 | struct nfs4_client *clp = lo->lo_owner.so_client; | ||
275 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | ||
276 | struct nfsd4_blocked_lock *nbl; | ||
277 | LIST_HEAD(reaplist); | ||
278 | |||
279 | /* Dequeue all blocked locks */ | ||
280 | spin_lock(&nn->blocked_locks_lock); | ||
281 | while (!list_empty(&lo->lo_blocked)) { | ||
282 | nbl = list_first_entry(&lo->lo_blocked, | ||
283 | struct nfsd4_blocked_lock, | ||
284 | nbl_list); | ||
285 | list_del_init(&nbl->nbl_list); | ||
286 | list_move(&nbl->nbl_lru, &reaplist); | ||
287 | } | ||
288 | spin_unlock(&nn->blocked_locks_lock); | ||
289 | |||
290 | /* Now free them */ | ||
291 | while (!list_empty(&reaplist)) { | ||
292 | nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, | ||
293 | nbl_lru); | ||
294 | list_del_init(&nbl->nbl_lru); | ||
295 | posix_unblock_lock(&nbl->nbl_lock); | ||
296 | free_blocked_lock(nbl); | ||
297 | } | ||
298 | } | ||
299 | |||
271 | static int | 300 | static int |
272 | nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) | 301 | nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) |
273 | { | 302 | { |
@@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) | |||
1866 | static void | 1895 | static void |
1867 | __destroy_client(struct nfs4_client *clp) | 1896 | __destroy_client(struct nfs4_client *clp) |
1868 | { | 1897 | { |
1898 | int i; | ||
1869 | struct nfs4_openowner *oo; | 1899 | struct nfs4_openowner *oo; |
1870 | struct nfs4_delegation *dp; | 1900 | struct nfs4_delegation *dp; |
1871 | struct list_head reaplist; | 1901 | struct list_head reaplist; |
@@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp) | |||
1895 | nfs4_get_stateowner(&oo->oo_owner); | 1925 | nfs4_get_stateowner(&oo->oo_owner); |
1896 | release_openowner(oo); | 1926 | release_openowner(oo); |
1897 | } | 1927 | } |
1928 | for (i = 0; i < OWNER_HASH_SIZE; i++) { | ||
1929 | struct nfs4_stateowner *so, *tmp; | ||
1930 | |||
1931 | list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], | ||
1932 | so_strhash) { | ||
1933 | /* Should be no openowners at this point */ | ||
1934 | WARN_ON_ONCE(so->so_is_open_owner); | ||
1935 | remove_blocked_locks(lockowner(so)); | ||
1936 | } | ||
1937 | } | ||
1898 | nfsd4_return_all_client_layouts(clp); | 1938 | nfsd4_return_all_client_layouts(clp); |
1899 | nfsd4_shutdown_callback(clp); | 1939 | nfsd4_shutdown_callback(clp); |
1900 | if (clp->cl_cb_conn.cb_xprt) | 1940 | if (clp->cl_cb_conn.cb_xprt) |
@@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
6355 | } | 6395 | } |
6356 | spin_unlock(&clp->cl_lock); | 6396 | spin_unlock(&clp->cl_lock); |
6357 | free_ol_stateid_reaplist(&reaplist); | 6397 | free_ol_stateid_reaplist(&reaplist); |
6398 | remove_blocked_locks(lo); | ||
6358 | nfs4_put_stateowner(&lo->lo_owner); | 6399 | nfs4_put_stateowner(&lo->lo_owner); |
6359 | 6400 | ||
6360 | return status; | 6401 | return status; |
@@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net) | |||
7140 | } | 7181 | } |
7141 | } | 7182 | } |
7142 | 7183 | ||
7184 | WARN_ON(!list_empty(&nn->blocked_locks_lru)); | ||
7185 | |||
7143 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { | 7186 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { |
7144 | while (!list_empty(&nn->unconf_id_hashtbl[i])) { | 7187 | while (!list_empty(&nn->unconf_id_hashtbl[i])) { |
7145 | clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); | 7188 | clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); |
@@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net) | |||
7206 | struct nfs4_delegation *dp = NULL; | 7249 | struct nfs4_delegation *dp = NULL; |
7207 | struct list_head *pos, *next, reaplist; | 7250 | struct list_head *pos, *next, reaplist; |
7208 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 7251 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
7209 | struct nfsd4_blocked_lock *nbl; | ||
7210 | 7252 | ||
7211 | cancel_delayed_work_sync(&nn->laundromat_work); | 7253 | cancel_delayed_work_sync(&nn->laundromat_work); |
7212 | locks_end_grace(&nn->nfsd4_manager); | 7254 | locks_end_grace(&nn->nfsd4_manager); |
@@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net) | |||
7227 | nfs4_put_stid(&dp->dl_stid); | 7269 | nfs4_put_stid(&dp->dl_stid); |
7228 | } | 7270 | } |
7229 | 7271 | ||
7230 | BUG_ON(!list_empty(&reaplist)); | ||
7231 | spin_lock(&nn->blocked_locks_lock); | ||
7232 | while (!list_empty(&nn->blocked_locks_lru)) { | ||
7233 | nbl = list_first_entry(&nn->blocked_locks_lru, | ||
7234 | struct nfsd4_blocked_lock, nbl_lru); | ||
7235 | list_move(&nbl->nbl_lru, &reaplist); | ||
7236 | list_del_init(&nbl->nbl_list); | ||
7237 | } | ||
7238 | spin_unlock(&nn->blocked_locks_lock); | ||
7239 | |||
7240 | while (!list_empty(&reaplist)) { | ||
7241 | nbl = list_first_entry(&reaplist, | ||
7242 | struct nfsd4_blocked_lock, nbl_lru); | ||
7243 | list_del_init(&nbl->nbl_lru); | ||
7244 | posix_unblock_lock(&nbl->nbl_lock); | ||
7245 | free_blocked_lock(nbl); | ||
7246 | } | ||
7247 | |||
7248 | nfsd4_client_tracking_exit(net); | 7272 | nfsd4_client_tracking_exit(net); |
7249 | nfs4_state_destroy_net(net); | 7273 | nfs4_state_destroy_net(net); |
7250 | } | 7274 | } |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 406e72de88f6..ce6ff5a0a6e4 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig | |||
@@ -24,6 +24,8 @@ config OVERLAY_FS_REDIRECT_DIR | |||
24 | an overlay which has redirects on a kernel that doesn't support this | 24 | an overlay which has redirects on a kernel that doesn't support this |
25 | feature will have unexpected results. | 25 | feature will have unexpected results. |
26 | 26 | ||
27 | If unsure, say N. | ||
28 | |||
27 | config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW | 29 | config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW |
28 | bool "Overlayfs: follow redirects even if redirects are turned off" | 30 | bool "Overlayfs: follow redirects even if redirects are turned off" |
29 | default y | 31 | default y |
@@ -32,8 +34,13 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW | |||
32 | Disable this to get a possibly more secure configuration, but that | 34 | Disable this to get a possibly more secure configuration, but that |
33 | might not be backward compatible with previous kernels. | 35 | might not be backward compatible with previous kernels. |
34 | 36 | ||
37 | If backward compatibility is not an issue, then it is safe and | ||
38 | recommended to say N here. | ||
39 | |||
35 | For more information, see Documentation/filesystems/overlayfs.txt | 40 | For more information, see Documentation/filesystems/overlayfs.txt |
36 | 41 | ||
42 | If unsure, say Y. | ||
43 | |||
37 | config OVERLAY_FS_INDEX | 44 | config OVERLAY_FS_INDEX |
38 | bool "Overlayfs: turn on inodes index feature by default" | 45 | bool "Overlayfs: turn on inodes index feature by default" |
39 | depends on OVERLAY_FS | 46 | depends on OVERLAY_FS |
@@ -51,6 +58,8 @@ config OVERLAY_FS_INDEX | |||
51 | That is, mounting an overlay which has an inodes index on a kernel | 58 | That is, mounting an overlay which has an inodes index on a kernel |
52 | that doesn't support this feature will have unexpected results. | 59 | that doesn't support this feature will have unexpected results. |
53 | 60 | ||
61 | If unsure, say N. | ||
62 | |||
54 | config OVERLAY_FS_NFS_EXPORT | 63 | config OVERLAY_FS_NFS_EXPORT |
55 | bool "Overlayfs: turn on NFS export feature by default" | 64 | bool "Overlayfs: turn on NFS export feature by default" |
56 | depends on OVERLAY_FS | 65 | depends on OVERLAY_FS |
@@ -72,3 +81,8 @@ config OVERLAY_FS_NFS_EXPORT | |||
72 | Note, that the NFS export feature is not backward compatible. | 81 | Note, that the NFS export feature is not backward compatible. |
73 | That is, mounting an overlay which has a full index on a kernel | 82 | That is, mounting an overlay which has a full index on a kernel |
74 | that doesn't support this feature will have unexpected results. | 83 | that doesn't support this feature will have unexpected results. |
84 | |||
85 | Most users should say N here and enable this feature on a case-by- | ||
86 | case basis with the "nfs_export=on" mount option. | ||
87 | |||
88 | Say N unless you fully understand the consequences. | ||
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index bb94ce9da5c8..87bd4148f4fb 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c | |||
@@ -19,6 +19,142 @@ | |||
19 | #include <linux/ratelimit.h> | 19 | #include <linux/ratelimit.h> |
20 | #include "overlayfs.h" | 20 | #include "overlayfs.h" |
21 | 21 | ||
22 | static int ovl_encode_maybe_copy_up(struct dentry *dentry) | ||
23 | { | ||
24 | int err; | ||
25 | |||
26 | if (ovl_dentry_upper(dentry)) | ||
27 | return 0; | ||
28 | |||
29 | err = ovl_want_write(dentry); | ||
30 | if (!err) { | ||
31 | err = ovl_copy_up(dentry); | ||
32 | ovl_drop_write(dentry); | ||
33 | } | ||
34 | |||
35 | if (err) { | ||
36 | pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n", | ||
37 | dentry, err); | ||
38 | } | ||
39 | |||
40 | return err; | ||
41 | } | ||
42 | |||
43 | /* | ||
44 | * Before encoding a non-upper directory file handle from real layer N, we need | ||
45 | * to check if it will be possible to reconnect an overlay dentry from the real | ||
46 | * lower decoded dentry. This is done by following the overlay ancestry up to a | ||
47 | * "layer N connected" ancestor and verifying that all parents along the way are | ||
48 | * "layer N connectable". If an ancestor that is NOT "layer N connectable" is | ||
49 | * found, we need to copy up an ancestor, which is "layer N connectable", thus | ||
50 | * making that ancestor "layer N connected". For example: | ||
51 | * | ||
52 | * layer 1: /a | ||
53 | * layer 2: /a/b/c | ||
54 | * | ||
55 | * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is | ||
56 | * copied up and renamed, upper dir /a will be indexed by lower dir /a from | ||
57 | * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*) | ||
58 | * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay | ||
59 | * dentry from the connected lower dentry /a/b/c. | ||
60 | * | ||
61 | * To avoid this problem on decode time, we need to copy up an ancestor of | ||
62 | * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is | ||
63 | * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected" | ||
64 | * and when the time comes to decode the file handle from lower dentry /a/b/c, | ||
65 | * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding | ||
66 | * a connected overlay dentry will be accomplished. | ||
67 | * | ||
68 | * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an | ||
69 | * entry /a in the lower layers above layer N and find the indexed dir /a from | ||
70 | * layer 1. If that improvement is made, then the check for "layer N connected" | ||
71 | * will need to verify there are no redirects in lower layers above N. In the | ||
72 | * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a | ||
73 | * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable": | ||
74 | * | ||
75 | * layer 1: /A (redirect = /a) | ||
76 | * layer 2: /a/b/c | ||
77 | */ | ||
78 | |||
79 | /* Return the lowest layer for encoding a connectable file handle */ | ||
80 | static int ovl_connectable_layer(struct dentry *dentry) | ||
81 | { | ||
82 | struct ovl_entry *oe = OVL_E(dentry); | ||
83 | |||
84 | /* We can get overlay root from root of any layer */ | ||
85 | if (dentry == dentry->d_sb->s_root) | ||
86 | return oe->numlower; | ||
87 | |||
88 | /* | ||
89 | * If it's an unindexed merge dir, then it's not connectable with any | ||
90 | * lower layer | ||
91 | */ | ||
92 | if (ovl_dentry_upper(dentry) && | ||
93 | !ovl_test_flag(OVL_INDEX, d_inode(dentry))) | ||
94 | return 0; | ||
95 | |||
96 | /* We can get upper/overlay path from indexed/lower dentry */ | ||
97 | return oe->lowerstack[0].layer->idx; | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * @dentry is "connected" if all ancestors up to root or a "connected" ancestor | ||
102 | * have the same uppermost lower layer as the origin's layer. We may need to | ||
103 | * copy up a "connectable" ancestor to make it "connected". A "connected" dentry | ||
104 | * cannot become non "connected", so cache positive result in dentry flags. | ||
105 | * | ||
106 | * Return the connected origin layer or < 0 on error. | ||
107 | */ | ||
108 | static int ovl_connect_layer(struct dentry *dentry) | ||
109 | { | ||
110 | struct dentry *next, *parent = NULL; | ||
111 | int origin_layer; | ||
112 | int err = 0; | ||
113 | |||
114 | if (WARN_ON(dentry == dentry->d_sb->s_root) || | ||
115 | WARN_ON(!ovl_dentry_lower(dentry))) | ||
116 | return -EIO; | ||
117 | |||
118 | origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx; | ||
119 | if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry)) | ||
120 | return origin_layer; | ||
121 | |||
122 | /* Find the topmost origin layer connectable ancestor of @dentry */ | ||
123 | next = dget(dentry); | ||
124 | for (;;) { | ||
125 | parent = dget_parent(next); | ||
126 | if (WARN_ON(parent == next)) { | ||
127 | err = -EIO; | ||
128 | break; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * If @parent is not origin layer connectable, then copy up | ||
133 | * @next which is origin layer connectable and we are done. | ||
134 | */ | ||
135 | if (ovl_connectable_layer(parent) < origin_layer) { | ||
136 | err = ovl_encode_maybe_copy_up(next); | ||
137 | break; | ||
138 | } | ||
139 | |||
140 | /* If @parent is connected or indexed we are done */ | ||
141 | if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) || | ||
142 | ovl_test_flag(OVL_INDEX, d_inode(parent))) | ||
143 | break; | ||
144 | |||
145 | dput(next); | ||
146 | next = parent; | ||
147 | } | ||
148 | |||
149 | dput(parent); | ||
150 | dput(next); | ||
151 | |||
152 | if (!err) | ||
153 | ovl_dentry_set_flag(OVL_E_CONNECTED, dentry); | ||
154 | |||
155 | return err ?: origin_layer; | ||
156 | } | ||
157 | |||
22 | /* | 158 | /* |
23 | * We only need to encode origin if there is a chance that the same object was | 159 | * We only need to encode origin if there is a chance that the same object was |
24 | * encoded pre copy up and then we need to stay consistent with the same | 160 | * encoded pre copy up and then we need to stay consistent with the same |
@@ -41,73 +177,59 @@ | |||
41 | * L = lower file handle | 177 | * L = lower file handle |
42 | * | 178 | * |
43 | * (*) Connecting an overlay dir from real lower dentry is not always | 179 | * (*) Connecting an overlay dir from real lower dentry is not always |
44 | * possible when there are redirects in lower layers. To mitigate this case, | 180 | * possible when there are redirects in lower layers and non-indexed merge dirs. |
45 | * we copy up the lower dir first and then encode an upper dir file handle. | 181 | * To mitigate those case, we may copy up the lower dir ancestor before encode |
182 | * a lower dir file handle. | ||
183 | * | ||
184 | * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. | ||
46 | */ | 185 | */ |
47 | static bool ovl_should_encode_origin(struct dentry *dentry) | 186 | static int ovl_check_encode_origin(struct dentry *dentry) |
48 | { | 187 | { |
49 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | 188 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; |
50 | 189 | ||
190 | /* Upper file handle for pure upper */ | ||
51 | if (!ovl_dentry_lower(dentry)) | 191 | if (!ovl_dentry_lower(dentry)) |
52 | return false; | 192 | return 0; |
53 | 193 | ||
54 | /* | 194 | /* |
55 | * Decoding a merge dir, whose origin's parent is under a redirected | 195 | * Upper file handle for non-indexed upper. |
56 | * lower dir is not always possible. As a simple aproximation, we do | ||
57 | * not encode lower dir file handles when overlay has multiple lower | ||
58 | * layers and origin is below the topmost lower layer. | ||
59 | * | 196 | * |
60 | * TODO: copy up only the parent that is under redirected lower. | 197 | * Root is never indexed, so if there's an upper layer, encode upper for |
198 | * root. | ||
61 | */ | 199 | */ |
62 | if (d_is_dir(dentry) && ofs->upper_mnt && | ||
63 | OVL_E(dentry)->lowerstack[0].layer->idx > 1) | ||
64 | return false; | ||
65 | |||
66 | /* Decoding a non-indexed upper from origin is not implemented */ | ||
67 | if (ovl_dentry_upper(dentry) && | 200 | if (ovl_dentry_upper(dentry) && |
68 | !ovl_test_flag(OVL_INDEX, d_inode(dentry))) | 201 | !ovl_test_flag(OVL_INDEX, d_inode(dentry))) |
69 | return false; | ||
70 | |||
71 | return true; | ||
72 | } | ||
73 | |||
74 | static int ovl_encode_maybe_copy_up(struct dentry *dentry) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | if (ovl_dentry_upper(dentry)) | ||
79 | return 0; | 202 | return 0; |
80 | 203 | ||
81 | err = ovl_want_write(dentry); | 204 | /* |
82 | if (err) | 205 | * Decoding a merge dir, whose origin's ancestor is under a redirected |
83 | return err; | 206 | * lower dir or under a non-indexed upper is not always possible. |
84 | 207 | * ovl_connect_layer() will try to make origin's layer "connected" by | |
85 | err = ovl_copy_up(dentry); | 208 | * copying up a "connectable" ancestor. |
209 | */ | ||
210 | if (d_is_dir(dentry) && ofs->upper_mnt) | ||
211 | return ovl_connect_layer(dentry); | ||
86 | 212 | ||
87 | ovl_drop_write(dentry); | 213 | /* Lower file handle for indexed and non-upper dir/non-dir */ |
88 | return err; | 214 | return 1; |
89 | } | 215 | } |
90 | 216 | ||
91 | static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) | 217 | static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) |
92 | { | 218 | { |
93 | struct dentry *origin = ovl_dentry_lower(dentry); | ||
94 | struct ovl_fh *fh = NULL; | 219 | struct ovl_fh *fh = NULL; |
95 | int err; | 220 | int err, enc_lower; |
96 | 221 | ||
97 | /* | 222 | /* |
98 | * If we should not encode a lower dir file handle, copy up and encode | 223 | * Check if we should encode a lower or upper file handle and maybe |
99 | * an upper dir file handle. | 224 | * copy up an ancestor to make lower file handle connectable. |
100 | */ | 225 | */ |
101 | if (!ovl_should_encode_origin(dentry)) { | 226 | err = enc_lower = ovl_check_encode_origin(dentry); |
102 | err = ovl_encode_maybe_copy_up(dentry); | 227 | if (enc_lower < 0) |
103 | if (err) | 228 | goto fail; |
104 | goto fail; | ||
105 | |||
106 | origin = NULL; | ||
107 | } | ||
108 | 229 | ||
109 | /* Encode an upper or origin file handle */ | 230 | /* Encode an upper or lower file handle */ |
110 | fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); | 231 | fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) : |
232 | ovl_dentry_upper(dentry), !enc_lower); | ||
111 | err = PTR_ERR(fh); | 233 | err = PTR_ERR(fh); |
112 | if (IS_ERR(fh)) | 234 | if (IS_ERR(fh)) |
113 | goto fail; | 235 | goto fail; |
@@ -355,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, | |||
355 | dput(upper); | 477 | dput(upper); |
356 | } | 478 | } |
357 | 479 | ||
358 | if (!this) | 480 | if (IS_ERR_OR_NULL(this)) |
359 | return NULL; | 481 | return this; |
360 | 482 | ||
361 | if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { | 483 | if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { |
362 | dput(this); | 484 | dput(this); |
@@ -498,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, | |||
498 | if (err == -ECHILD) { | 620 | if (err == -ECHILD) { |
499 | this = ovl_lookup_real_ancestor(sb, real, | 621 | this = ovl_lookup_real_ancestor(sb, real, |
500 | layer); | 622 | layer); |
501 | err = IS_ERR(this) ? PTR_ERR(this) : 0; | 623 | err = PTR_ERR_OR_ZERO(this); |
502 | } | 624 | } |
503 | if (!err) { | 625 | if (!err) { |
504 | dput(connected); | 626 | dput(connected); |
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index fcd97b783fa1..3b1bd469accd 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c | |||
@@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, | |||
669 | return inode; | 669 | return inode; |
670 | } | 670 | } |
671 | 671 | ||
672 | /* | ||
673 | * Does overlay inode need to be hashed by lower inode? | ||
674 | */ | ||
675 | static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, | ||
676 | struct dentry *lower, struct dentry *index) | ||
677 | { | ||
678 | struct ovl_fs *ofs = sb->s_fs_info; | ||
679 | |||
680 | /* No, if pure upper */ | ||
681 | if (!lower) | ||
682 | return false; | ||
683 | |||
684 | /* Yes, if already indexed */ | ||
685 | if (index) | ||
686 | return true; | ||
687 | |||
688 | /* Yes, if won't be copied up */ | ||
689 | if (!ofs->upper_mnt) | ||
690 | return true; | ||
691 | |||
692 | /* No, if lower hardlink is or will be broken on copy up */ | ||
693 | if ((upper || !ovl_indexdir(sb)) && | ||
694 | !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) | ||
695 | return false; | ||
696 | |||
697 | /* No, if non-indexed upper with NFS export */ | ||
698 | if (sb->s_export_op && upper) | ||
699 | return false; | ||
700 | |||
701 | /* Otherwise, hash by lower inode for fsnotify */ | ||
702 | return true; | ||
703 | } | ||
704 | |||
672 | struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, | 705 | struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, |
673 | struct dentry *lowerdentry, struct dentry *index, | 706 | struct dentry *lowerdentry, struct dentry *index, |
674 | unsigned int numlower) | 707 | unsigned int numlower) |
675 | { | 708 | { |
676 | struct ovl_fs *ofs = sb->s_fs_info; | ||
677 | struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; | 709 | struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; |
678 | struct inode *inode; | 710 | struct inode *inode; |
679 | /* Already indexed or could be indexed on copy up? */ | 711 | bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); |
680 | bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); | ||
681 | struct dentry *origin = indexed ? lowerdentry : NULL; | ||
682 | bool is_dir; | 712 | bool is_dir; |
683 | 713 | ||
684 | if (WARN_ON(upperdentry && indexed && !lowerdentry)) | ||
685 | return ERR_PTR(-EIO); | ||
686 | |||
687 | if (!realinode) | 714 | if (!realinode) |
688 | realinode = d_inode(lowerdentry); | 715 | realinode = d_inode(lowerdentry); |
689 | 716 | ||
690 | /* | 717 | /* |
691 | * Copy up origin (lower) may exist for non-indexed non-dir upper, but | 718 | * Copy up origin (lower) may exist for non-indexed upper, but we must |
692 | * we must not use lower as hash key in that case. | 719 | * not use lower as hash key if this is a broken hardlink. |
693 | * Hash non-dir that is or could be indexed by origin inode. | ||
694 | * Hash dir that is or could be merged by origin inode. | ||
695 | * Hash pure upper and non-indexed non-dir by upper inode. | ||
696 | * Hash non-indexed dir by upper inode for NFS export. | ||
697 | */ | 720 | */ |
698 | is_dir = S_ISDIR(realinode->i_mode); | 721 | is_dir = S_ISDIR(realinode->i_mode); |
699 | if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) | 722 | if (upperdentry || bylower) { |
700 | origin = lowerdentry; | 723 | struct inode *key = d_inode(bylower ? lowerdentry : |
701 | 724 | upperdentry); | |
702 | if (upperdentry || origin) { | ||
703 | struct inode *key = d_inode(origin ?: upperdentry); | ||
704 | unsigned int nlink = is_dir ? 1 : realinode->i_nlink; | 725 | unsigned int nlink = is_dir ? 1 : realinode->i_nlink; |
705 | 726 | ||
706 | inode = iget5_locked(sb, (unsigned long) key, | 727 | inode = iget5_locked(sb, (unsigned long) key, |
@@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, | |||
728 | nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); | 749 | nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); |
729 | set_nlink(inode, nlink); | 750 | set_nlink(inode, nlink); |
730 | } else { | 751 | } else { |
752 | /* Lower hardlink that will be broken on copy up */ | ||
731 | inode = new_inode(sb); | 753 | inode = new_inode(sb); |
732 | if (!inode) | 754 | if (!inode) |
733 | goto out_nomem; | 755 | goto out_nomem; |
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index de3e6da1d5a5..70fcfcc684cc 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c | |||
@@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
913 | stack[ctr].layer = lower.layer; | 913 | stack[ctr].layer = lower.layer; |
914 | ctr++; | 914 | ctr++; |
915 | 915 | ||
916 | if (d.stop) | ||
917 | break; | ||
918 | |||
919 | /* | 916 | /* |
920 | * Following redirects can have security consequences: it's like | 917 | * Following redirects can have security consequences: it's like |
921 | * a symlink into the lower layer without the permission checks. | 918 | * a symlink into the lower layer without the permission checks. |
@@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | |||
933 | goto out_put; | 930 | goto out_put; |
934 | } | 931 | } |
935 | 932 | ||
933 | if (d.stop) | ||
934 | break; | ||
935 | |||
936 | if (d.redirect && d.redirect[0] == '/' && poe != roe) { | 936 | if (d.redirect && d.redirect[0] == '/' && poe != roe) { |
937 | poe = roe; | 937 | poe = roe; |
938 | /* Find the current layer on the root dentry */ | 938 | /* Find the current layer on the root dentry */ |
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0df25a9c94bd..225ff1171147 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h | |||
@@ -40,6 +40,7 @@ enum ovl_inode_flag { | |||
40 | enum ovl_entry_flag { | 40 | enum ovl_entry_flag { |
41 | OVL_E_UPPER_ALIAS, | 41 | OVL_E_UPPER_ALIAS, |
42 | OVL_E_OPAQUE, | 42 | OVL_E_OPAQUE, |
43 | OVL_E_CONNECTED, | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | /* | 46 | /* |
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9ee37c76091d..7c24619ae7fc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c | |||
@@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) | |||
1359 | 1359 | ||
1360 | /* Root is always merge -> can have whiteouts */ | 1360 | /* Root is always merge -> can have whiteouts */ |
1361 | ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); | 1361 | ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); |
1362 | ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); | ||
1362 | ovl_inode_init(d_inode(root_dentry), upperpath.dentry, | 1363 | ovl_inode_init(d_inode(root_dentry), upperpath.dentry, |
1363 | ovl_dentry_lower(root_dentry)); | 1364 | ovl_dentry_lower(root_dentry)); |
1364 | 1365 | ||
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 8664db25a9a6..215c225b2ca1 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, | |||
106 | { | 106 | { |
107 | return sysfs_do_create_link(kobj, target, name, 0); | 107 | return sysfs_do_create_link(kobj, target, name, 0); |
108 | } | 108 | } |
109 | EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn); | ||
109 | 110 | ||
110 | /** | 111 | /** |
111 | * sysfs_delete_link - remove symlink in object's directory. | 112 | * sysfs_delete_link - remove symlink in object's directory. |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 66e1edbfb2b2..046469fcc1b8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -955,15 +955,29 @@ static inline bool imap_needs_alloc(struct inode *inode, | |||
955 | (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); | 955 | (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); |
956 | } | 956 | } |
957 | 957 | ||
958 | static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps) | ||
959 | { | ||
960 | return nimaps && | ||
961 | imap->br_startblock != HOLESTARTBLOCK && | ||
962 | imap->br_state != XFS_EXT_UNWRITTEN; | ||
963 | } | ||
964 | |||
958 | static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) | 965 | static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) |
959 | { | 966 | { |
960 | /* | 967 | /* |
961 | * COW writes will allocate delalloc space, so we need to make sure | 968 | * COW writes may allocate delalloc space or convert unwritten COW |
962 | * to take the lock exclusively here. | 969 | * extents, so we need to make sure to take the lock exclusively here. |
963 | */ | 970 | */ |
964 | if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) | 971 | if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) |
965 | return true; | 972 | return true; |
966 | if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE)) | 973 | |
974 | /* | ||
975 | * Extents not yet cached requires exclusive access, don't block. | ||
976 | * This is an opencoded xfs_ilock_data_map_shared() to cater for the | ||
977 | * non-blocking behaviour. | ||
978 | */ | ||
979 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && | ||
980 | !(ip->i_df.if_flags & XFS_IFEXTENTS)) | ||
967 | return true; | 981 | return true; |
968 | return false; | 982 | return false; |
969 | } | 983 | } |
@@ -993,16 +1007,18 @@ xfs_file_iomap_begin( | |||
993 | return xfs_file_iomap_begin_delay(inode, offset, length, iomap); | 1007 | return xfs_file_iomap_begin_delay(inode, offset, length, iomap); |
994 | } | 1008 | } |
995 | 1009 | ||
996 | if (need_excl_ilock(ip, flags)) { | 1010 | if (need_excl_ilock(ip, flags)) |
997 | lockmode = XFS_ILOCK_EXCL; | 1011 | lockmode = XFS_ILOCK_EXCL; |
998 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1012 | else |
999 | } else { | 1013 | lockmode = XFS_ILOCK_SHARED; |
1000 | lockmode = xfs_ilock_data_map_shared(ip); | ||
1001 | } | ||
1002 | 1014 | ||
1003 | if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) { | 1015 | if (flags & IOMAP_NOWAIT) { |
1004 | error = -EAGAIN; | 1016 | if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) |
1005 | goto out_unlock; | 1017 | return -EAGAIN; |
1018 | if (!xfs_ilock_nowait(ip, lockmode)) | ||
1019 | return -EAGAIN; | ||
1020 | } else { | ||
1021 | xfs_ilock(ip, lockmode); | ||
1006 | } | 1022 | } |
1007 | 1023 | ||
1008 | ASSERT(offset <= mp->m_super->s_maxbytes); | 1024 | ASSERT(offset <= mp->m_super->s_maxbytes); |
@@ -1024,7 +1040,9 @@ xfs_file_iomap_begin( | |||
1024 | goto out_unlock; | 1040 | goto out_unlock; |
1025 | } | 1041 | } |
1026 | 1042 | ||
1027 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | 1043 | if (xfs_is_reflink_inode(ip) && |
1044 | ((flags & IOMAP_WRITE) || | ||
1045 | ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) { | ||
1028 | if (flags & IOMAP_DIRECT) { | 1046 | if (flags & IOMAP_DIRECT) { |
1029 | /* | 1047 | /* |
1030 | * A reflinked inode will result in CoW alloc. | 1048 | * A reflinked inode will result in CoW alloc. |