aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c31
-rw-r--r--fs/btrfs/extent_io.c19
-rw-r--r--fs/btrfs/free-space-cache.c16
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/ext4_jbd2.c6
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/data.c7
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/revoke.c18
-rw-r--r--fs/jbd2/transaction.c25
-rw-r--r--fs/kernfs/dir.c9
-rw-r--r--fs/namei.c22
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfsd/blocklayout.c11
-rw-r--r--fs/nfsd/nfs4callback.c119
-rw-r--r--fs/nfsd/nfs4state.c147
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/nilfs2/btree.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
-rw-r--r--fs/splice.c12
31 files changed, 406 insertions, 135 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0ec8e228b89f..7effed6f2fa6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3180 btrfs_mark_buffer_dirty(leaf); 3180 btrfs_mark_buffer_dirty(leaf);
3181fail: 3181fail:
3182 btrfs_release_path(path); 3182 btrfs_release_path(path);
3183 if (ret)
3184 btrfs_abort_transaction(trans, root, ret);
3185 return ret; 3183 return ret;
3186 3184
3187} 3185}
@@ -3487,8 +3485,30 @@ again:
3487 ret = 0; 3485 ret = 0;
3488 } 3486 }
3489 } 3487 }
3490 if (!ret) 3488 if (!ret) {
3491 ret = write_one_cache_group(trans, root, path, cache); 3489 ret = write_one_cache_group(trans, root, path, cache);
3490 /*
3491 * Our block group might still be attached to the list
3492 * of new block groups in the transaction handle of some
3493 * other task (struct btrfs_trans_handle->new_bgs). This
3494 * means its block group item isn't yet in the extent
3495 * tree. If this happens ignore the error, as we will
3496 * try again later in the critical section of the
3497 * transaction commit.
3498 */
3499 if (ret == -ENOENT) {
3500 ret = 0;
3501 spin_lock(&cur_trans->dirty_bgs_lock);
3502 if (list_empty(&cache->dirty_list)) {
3503 list_add_tail(&cache->dirty_list,
3504 &cur_trans->dirty_bgs);
3505 btrfs_get_block_group(cache);
3506 }
3507 spin_unlock(&cur_trans->dirty_bgs_lock);
3508 } else if (ret) {
3509 btrfs_abort_transaction(trans, root, ret);
3510 }
3511 }
3492 3512
3493 /* if its not on the io list, we need to put the block group */ 3513 /* if its not on the io list, we need to put the block group */
3494 if (should_put) 3514 if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3597 ret = 0; 3617 ret = 0;
3598 } 3618 }
3599 } 3619 }
3600 if (!ret) 3620 if (!ret) {
3601 ret = write_one_cache_group(trans, root, path, cache); 3621 ret = write_one_cache_group(trans, root, path, cache);
3622 if (ret)
3623 btrfs_abort_transaction(trans, root, ret);
3624 }
3602 3625
3603 /* if its not on the io list, we need to put the block group */ 3626 /* if its not on the io list, we need to put the block group */
3604 if (should_put) 3627 if (should_put)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 43af5a61ad25..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4772 start >> PAGE_CACHE_SHIFT); 4772 start >> PAGE_CACHE_SHIFT);
4773 if (eb && atomic_inc_not_zero(&eb->refs)) { 4773 if (eb && atomic_inc_not_zero(&eb->refs)) {
4774 rcu_read_unlock(); 4774 rcu_read_unlock();
4775 /*
4776 * Lock our eb's refs_lock to avoid races with
4777 * free_extent_buffer. When we get our eb it might be flagged
4778 * with EXTENT_BUFFER_STALE and another task running
4779 * free_extent_buffer might have seen that flag set,
4780 * eb->refs == 2, that the buffer isn't under IO (dirty and
4781 * writeback flags not set) and it's still in the tree (flag
4782 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
4783 * of decrementing the extent buffer's reference count twice.
4784 * So here we could race and increment the eb's reference count,
4785 * clear its stale flag, mark it as dirty and drop our reference
4786 * before the other task finishes executing free_extent_buffer,
4787 * which would later result in an attempt to free an extent
4788 * buffer that is dirty.
4789 */
4790 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4791 spin_lock(&eb->refs_lock);
4792 spin_unlock(&eb->refs_lock);
4793 }
4775 mark_extent_buffer_accessed(eb, NULL); 4794 mark_extent_buffer_accessed(eb, NULL);
4776 return eb; 4795 return eb;
4777 } 4796 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 41c510b7cc11..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & 88 mapping_gfp_mask(inode->i_mapping) &
89 ~(GFP_NOFS & ~__GFP_HIGHMEM)); 89 ~(__GFP_FS | __GFP_HIGHMEM));
90 90
91 return inode; 91 return inode;
92} 92}
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3467 int ret; 3467 int ret;
3468 struct btrfs_io_ctl io_ctl; 3468 struct btrfs_io_ctl io_ctl;
3469 bool release_metadata = true;
3469 3470
3470 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3471 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3471 return 0; 3472 return 0;
@@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3473 memset(&io_ctl, 0, sizeof(io_ctl)); 3474 memset(&io_ctl, 0, sizeof(io_ctl));
3474 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3475 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3475 trans, path, 0); 3476 trans, path, 0);
3476 if (!ret) 3477 if (!ret) {
3478 /*
3479 * At this point writepages() didn't error out, so our metadata
3480 * reservation is released when the writeback finishes, at
3481 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3482 * with or without an error.
3483 */
3484 release_metadata = false;
3477 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3485 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3486 }
3478 3487
3479 if (ret) { 3488 if (ret) {
3480 btrfs_delalloc_release_metadata(inode, inode->i_size); 3489 if (release_metadata)
3490 btrfs_delalloc_release_metadata(inode, inode->i_size);
3481#ifdef DEBUG 3491#ifdef DEBUG
3482 btrfs_err(root->fs_info, 3492 btrfs_err(root->fs_info,
3483 "failed to write free ino cache for root %llu", 3493 "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
723{ 723{
724 int ret = 0; 724 int ret = 0;
725 int ret_wb = 0;
725 u64 end; 726 u64 end;
726 u64 orig_end; 727 u64 orig_end;
727 struct btrfs_ordered_extent *ordered; 728 struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
741 if (ret) 742 if (ret)
742 return ret; 743 return ret;
743 744
744 ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 745 /*
745 if (ret) 746 * If we have a writeback error don't return immediately. Wait first
746 return ret; 747 * for any ordered extents that haven't completed yet. This is to make
748 * sure no one can dirty the same page ranges and call writepages()
749 * before the ordered extents complete - to avoid failures (-EEXIST)
750 * when adding the new ordered extents to the ordered tree.
751 */
752 ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
747 753
748 end = orig_end; 754 end = orig_end;
749 while (1) { 755 while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
767 break; 773 break;
768 end--; 774 end--;
769 } 775 }
770 return ret; 776 return ret_wb ? ret_wb : ret;
771} 777}
772 778
773/* 779/*
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index da94e41bdbf6..537356742091 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -173,5 +173,5 @@ MODULE_LICENSE("GPL");
173MODULE_VERSION("0.0.2"); 173MODULE_VERSION("0.0.2");
174MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); 174MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
175 175
176module_init(configfs_init); 176core_initcall(configfs_init);
177module_exit(configfs_exit); 177module_exit(configfs_exit);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 59fedbcf8798..86a2121828c3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
121 int len, i; 121 int len, i;
122 int err = -ENOMEM; 122 int err = -ENOMEM;
123 123
124 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 124 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
125 if (!entry) 125 if (!entry)
126 return err; 126 return err;
127 127
diff --git a/fs/exec.c b/fs/exec.c
index 49a1c61433b7..1977c2a553ac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
659 if (stack_base > STACK_SIZE_MAX) 659 if (stack_base > STACK_SIZE_MAX)
660 stack_base = STACK_SIZE_MAX; 660 stack_base = STACK_SIZE_MAX;
661 661
662 /* Add space for stack randomization. */
663 stack_base += (STACK_RND_MASK << PAGE_SHIFT);
664
662 /* Make sure we didn't let the argument array grow too large. */ 665 /* Make sure we didn't let the argument array grow too large. */
663 if (vma->vm_end - vma->vm_start > stack_base) 666 if (vma->vm_end - vma->vm_start > stack_base)
664 return -ENOMEM; 667 return -ENOMEM;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 009a0590b20f..9a83f149ac85 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2889,7 +2889,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
2889 struct ext4_map_blocks *map, int flags); 2889 struct ext4_map_blocks *map, int flags);
2890extern int ext4_ext_calc_metadata_amount(struct inode *inode, 2890extern int ext4_ext_calc_metadata_amount(struct inode *inode,
2891 ext4_lblk_t lblocks); 2891 ext4_lblk_t lblocks);
2892extern int ext4_extent_tree_init(handle_t *, struct inode *);
2893extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 2892extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
2894 int num, 2893 int num,
2895 struct ext4_ext_path *path); 2894 struct ext4_ext_path *path);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 3445035c7e01..d41843181818 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
87 ext4_put_nojournal(handle); 87 ext4_put_nojournal(handle);
88 return 0; 88 return 0;
89 } 89 }
90
91 if (!handle->h_transaction) {
92 err = jbd2_journal_stop(handle);
93 return handle->h_err ? handle->h_err : err;
94 }
95
90 sb = handle->h_transaction->t_journal->j_private; 96 sb = handle->h_transaction->t_journal->j_private;
91 err = handle->h_err; 97 err = handle->h_err;
92 rc = jbd2_journal_stop(handle); 98 rc = jbd2_journal_stop(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d74e08029643..e003a1e81dc3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
377 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); 377 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
378 ext4_lblk_t last = lblock + len - 1; 378 ext4_lblk_t last = lblock + len - 1;
379 379
380 if (lblock > last) 380 if (len == 0 || lblock > last)
381 return 0; 381 return 0;
382 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 382 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
383} 383}
@@ -5396,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5396 loff_t new_size, ioffset; 5396 loff_t new_size, ioffset;
5397 int ret; 5397 int ret;
5398 5398
5399 /*
5400 * We need to test this early because xfstests assumes that a
5401 * collapse range of (0, 1) will return EOPNOTSUPP if the file
5402 * system does not support collapse range.
5403 */
5404 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5405 return -EOPNOTSUPP;
5406
5399 /* Collapse range works only on fs block size aligned offsets. */ 5407 /* Collapse range works only on fs block size aligned offsets. */
5400 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || 5408 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5401 len & (EXT4_CLUSTER_SIZE(sb) - 1)) 5409 len & (EXT4_CLUSTER_SIZE(sb) - 1))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 55b187c3bac1..0554b0b5957b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4345,7 +4345,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
4345 int inode_size = EXT4_INODE_SIZE(sb); 4345 int inode_size = EXT4_INODE_SIZE(sb);
4346 4346
4347 oi.orig_ino = orig_ino; 4347 oi.orig_ino = orig_ino;
4348 ino = orig_ino & ~(inodes_per_block - 1); 4348 ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
4349 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 4349 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
4350 if (ino == orig_ino) 4350 if (ino == orig_ino)
4351 continue; 4351 continue;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f06d0589ddba..ca9d4a2fed41 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
294 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 294 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
295 295
296 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 296 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
297 if (bdev_read_only(sb->s_bdev))
298 return;
297 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 299 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
298 es->s_last_error_time = cpu_to_le32(get_seconds()); 300 es->s_last_error_time = cpu_to_le32(get_seconds());
299 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 301 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b91b0e10678e..1e1aae669fa8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1513{ 1513{
1514 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1516 bool locked = false;
1516 int ret; 1517 int ret;
1517 long diff; 1518 long diff;
1518 1519
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1533 1534
1534 diff = nr_pages_to_write(sbi, DATA, wbc); 1535 diff = nr_pages_to_write(sbi, DATA, wbc);
1535 1536
1537 if (!S_ISDIR(inode->i_mode)) {
1538 mutex_lock(&sbi->writepages);
1539 locked = true;
1540 }
1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1541 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1542 if (locked)
1543 mutex_unlock(&sbi->writepages);
1537 1544
1538 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1545 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1539 1546
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d8921cf2ba9a..8de34ab6d5b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
625 struct mutex cp_mutex; /* checkpoint procedure lock */ 625 struct mutex cp_mutex; /* checkpoint procedure lock */
626 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 626 struct rw_semaphore cp_rwsem; /* blocking FS operations */
627 struct rw_semaphore node_write; /* locking node writes */ 627 struct rw_semaphore node_write; /* locking node writes */
628 struct mutex writepages; /* mutex for writepages() */
628 wait_queue_head_t cp_wait; 629 wait_queue_head_t cp_wait;
629 630
630 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 631 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e3794edae42..658e8079aaf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -298,16 +298,14 @@ fail:
298 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) 299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
300{ 300{
301 struct page *page; 301 struct page *page = page_follow_link_light(dentry, nd);
302 302
303 page = page_follow_link_light(dentry, nd); 303 if (IS_ERR_OR_NULL(page))
304 if (IS_ERR(page))
305 return page; 304 return page;
306 305
307 /* this is broken symlink case */ 306 /* this is broken symlink case */
308 if (*nd_get_link(nd) == 0) { 307 if (*nd_get_link(nd) == 0) {
309 kunmap(page); 308 page_put_link(dentry, nd, page);
310 page_cache_release(page);
311 return ERR_PTR(-ENOENT); 309 return ERR_PTR(-ENOENT);
312 } 310 }
313 return page; 311 return page;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 160b88346b24..b2dd1b01f076 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1035,6 +1035,7 @@ try_onemore:
1035 sbi->raw_super = raw_super; 1035 sbi->raw_super = raw_super;
1036 sbi->raw_super_buf = raw_super_buf; 1036 sbi->raw_super_buf = raw_super_buf;
1037 mutex_init(&sbi->gc_mutex); 1037 mutex_init(&sbi->gc_mutex);
1038 mutex_init(&sbi->writepages);
1038 mutex_init(&sbi->cp_mutex); 1039 mutex_init(&sbi->cp_mutex);
1039 init_rwsem(&sbi->node_write); 1040 init_rwsem(&sbi->node_write);
1040 clear_sbi_flag(sbi, SBI_POR_DOING); 1041 clear_sbi_flag(sbi, SBI_POR_DOING);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ef263174acd2..07d8d8f52faf 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -581,7 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
581 if (name == NULL) 581 if (name == NULL)
582 goto out_put; 582 goto out_put;
583 583
584 fd = file_create(name, mode & S_IFMT); 584 fd = file_create(name, mode & 0777);
585 if (fd < 0) 585 if (fd < 0)
586 error = fd; 586 error = fd;
587 else 587 else
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b5128c6e63ad..a9079d035ae5 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
842{ 842{
843 jbd2_journal_revoke_header_t *header; 843 jbd2_journal_revoke_header_t *header;
844 int offset, max; 844 int offset, max;
845 int csum_size = 0;
846 __u32 rcount;
845 int record_len = 4; 847 int record_len = 4;
846 848
847 header = (jbd2_journal_revoke_header_t *) bh->b_data; 849 header = (jbd2_journal_revoke_header_t *) bh->b_data;
848 offset = sizeof(jbd2_journal_revoke_header_t); 850 offset = sizeof(jbd2_journal_revoke_header_t);
849 max = be32_to_cpu(header->r_count); 851 rcount = be32_to_cpu(header->r_count);
850 852
851 if (!jbd2_revoke_block_csum_verify(journal, header)) 853 if (!jbd2_revoke_block_csum_verify(journal, header))
852 return -EINVAL; 854 return -EINVAL;
853 855
856 if (jbd2_journal_has_csum_v2or3(journal))
857 csum_size = sizeof(struct jbd2_journal_revoke_tail);
858 if (rcount > journal->j_blocksize - csum_size)
859 return -EINVAL;
860 max = rcount;
861
854 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 862 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
855 record_len = 8; 863 record_len = 8;
856 864
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c6cbaef2bda1..14214da80eb8 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal,
577{ 577{
578 int csum_size = 0; 578 int csum_size = 0;
579 struct buffer_head *descriptor; 579 struct buffer_head *descriptor;
580 int offset; 580 int sz, offset;
581 journal_header_t *header; 581 journal_header_t *header;
582 582
583 /* If we are already aborting, this all becomes a noop. We 583 /* If we are already aborting, this all becomes a noop. We
@@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal,
594 if (jbd2_journal_has_csum_v2or3(journal)) 594 if (jbd2_journal_has_csum_v2or3(journal))
595 csum_size = sizeof(struct jbd2_journal_revoke_tail); 595 csum_size = sizeof(struct jbd2_journal_revoke_tail);
596 596
597 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
598 sz = 8;
599 else
600 sz = 4;
601
597 /* Make sure we have a descriptor with space left for the record */ 602 /* Make sure we have a descriptor with space left for the record */
598 if (descriptor) { 603 if (descriptor) {
599 if (offset >= journal->j_blocksize - csum_size) { 604 if (offset + sz > journal->j_blocksize - csum_size) {
600 flush_descriptor(journal, descriptor, offset, write_op); 605 flush_descriptor(journal, descriptor, offset, write_op);
601 descriptor = NULL; 606 descriptor = NULL;
602 } 607 }
@@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal,
619 *descriptorp = descriptor; 624 *descriptorp = descriptor;
620 } 625 }
621 626
622 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { 627 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
623 * ((__be64 *)(&descriptor->b_data[offset])) = 628 * ((__be64 *)(&descriptor->b_data[offset])) =
624 cpu_to_be64(record->blocknr); 629 cpu_to_be64(record->blocknr);
625 offset += 8; 630 else
626
627 } else {
628 * ((__be32 *)(&descriptor->b_data[offset])) = 631 * ((__be32 *)(&descriptor->b_data[offset])) =
629 cpu_to_be32(record->blocknr); 632 cpu_to_be32(record->blocknr);
630 offset += 4; 633 offset += sz;
631 }
632 634
633 *offsetp = offset; 635 *offsetp = offset;
634} 636}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 5f09370c90a8..ff2f2e6ad311 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
551 int result; 551 int result;
552 int wanted; 552 int wanted;
553 553
554 WARN_ON(!transaction);
555 if (is_handle_aborted(handle)) 554 if (is_handle_aborted(handle))
556 return -EROFS; 555 return -EROFS;
557 journal = transaction->t_journal; 556 journal = transaction->t_journal;
@@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
627 tid_t tid; 626 tid_t tid;
628 int need_to_start, ret; 627 int need_to_start, ret;
629 628
630 WARN_ON(!transaction);
631 /* If we've had an abort of any type, don't even think about 629 /* If we've had an abort of any type, don't even think about
632 * actually doing the restart! */ 630 * actually doing the restart! */
633 if (is_handle_aborted(handle)) 631 if (is_handle_aborted(handle))
@@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
785 int need_copy = 0; 783 int need_copy = 0;
786 unsigned long start_lock, time_lock; 784 unsigned long start_lock, time_lock;
787 785
788 WARN_ON(!transaction);
789 if (is_handle_aborted(handle)) 786 if (is_handle_aborted(handle))
790 return -EROFS; 787 return -EROFS;
791 journal = transaction->t_journal; 788 journal = transaction->t_journal;
@@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1051 int err; 1048 int err;
1052 1049
1053 jbd_debug(5, "journal_head %p\n", jh); 1050 jbd_debug(5, "journal_head %p\n", jh);
1054 WARN_ON(!transaction);
1055 err = -EROFS; 1051 err = -EROFS;
1056 if (is_handle_aborted(handle)) 1052 if (is_handle_aborted(handle))
1057 goto out; 1053 goto out;
@@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1266 struct journal_head *jh; 1262 struct journal_head *jh;
1267 int ret = 0; 1263 int ret = 0;
1268 1264
1269 WARN_ON(!transaction);
1270 if (is_handle_aborted(handle)) 1265 if (is_handle_aborted(handle))
1271 return -EROFS; 1266 return -EROFS;
1272 journal = transaction->t_journal; 1267 journal = transaction->t_journal;
@@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1397 int err = 0; 1392 int err = 0;
1398 int was_modified = 0; 1393 int was_modified = 0;
1399 1394
1400 WARN_ON(!transaction);
1401 if (is_handle_aborted(handle)) 1395 if (is_handle_aborted(handle))
1402 return -EROFS; 1396 return -EROFS;
1403 journal = transaction->t_journal; 1397 journal = transaction->t_journal;
@@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle)
1530 tid_t tid; 1524 tid_t tid;
1531 pid_t pid; 1525 pid_t pid;
1532 1526
1533 if (!transaction) 1527 if (!transaction) {
1534 goto free_and_exit; 1528 /*
1529 * Handle is already detached from the transaction so
1530 * there is nothing to do other than decrease a refcount,
1531 * or free the handle if refcount drops to zero
1532 */
1533 if (--handle->h_ref > 0) {
1534 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1535 handle->h_ref);
1536 return err;
1537 } else {
1538 if (handle->h_rsv_handle)
1539 jbd2_free_handle(handle->h_rsv_handle);
1540 goto free_and_exit;
1541 }
1542 }
1535 journal = transaction->t_journal; 1543 journal = transaction->t_journal;
1536 1544
1537 J_ASSERT(journal_current_handle() == handle); 1545 J_ASSERT(journal_current_handle() == handle);
@@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
2373 transaction_t *transaction = handle->h_transaction; 2381 transaction_t *transaction = handle->h_transaction;
2374 journal_t *journal; 2382 journal_t *journal;
2375 2383
2376 WARN_ON(!transaction);
2377 if (is_handle_aborted(handle)) 2384 if (is_handle_aborted(handle))
2378 return -EROFS; 2385 return -EROFS;
2379 journal = transaction->t_journal; 2386 journal = transaction->t_journal;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f131fc23ffc4..fffca9517321 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
518 if (!kn) 518 if (!kn)
519 goto err_out1; 519 goto err_out1;
520 520
521 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); 521 /*
522 * If the ino of the sysfs entry created for a kmem cache gets
523 * allocated from an ida layer, which is accounted to the memcg that
524 * owns the cache, the memcg will get pinned forever. So do not account
525 * ino ida allocations.
526 */
527 ret = ida_simple_get(&root->ino_ida, 1, 0,
528 GFP_KERNEL | __GFP_NOACCOUNT);
522 if (ret < 0) 529 if (ret < 0)
523 goto err_out2; 530 goto err_out2;
524 kn->ino = ret; 531 kn->ino = ret;
diff --git a/fs/namei.c b/fs/namei.c
index 4a8d998b7274..fe30d3be43a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1415,6 +1415,7 @@ static int lookup_fast(struct nameidata *nd,
1415 */ 1415 */
1416 if (nd->flags & LOOKUP_RCU) { 1416 if (nd->flags & LOOKUP_RCU) {
1417 unsigned seq; 1417 unsigned seq;
1418 bool negative;
1418 dentry = __d_lookup_rcu(parent, &nd->last, &seq); 1419 dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1419 if (!dentry) 1420 if (!dentry)
1420 goto unlazy; 1421 goto unlazy;
@@ -1424,8 +1425,11 @@ static int lookup_fast(struct nameidata *nd,
1424 * the dentry name information from lookup. 1425 * the dentry name information from lookup.
1425 */ 1426 */
1426 *inode = dentry->d_inode; 1427 *inode = dentry->d_inode;
1428 negative = d_is_negative(dentry);
1427 if (read_seqcount_retry(&dentry->d_seq, seq)) 1429 if (read_seqcount_retry(&dentry->d_seq, seq))
1428 return -ECHILD; 1430 return -ECHILD;
1431 if (negative)
1432 return -ENOENT;
1429 1433
1430 /* 1434 /*
1431 * This sequence count validates that the parent had no 1435 * This sequence count validates that the parent had no
@@ -1472,6 +1476,10 @@ unlazy:
1472 goto need_lookup; 1476 goto need_lookup;
1473 } 1477 }
1474 1478
1479 if (unlikely(d_is_negative(dentry))) {
1480 dput(dentry);
1481 return -ENOENT;
1482 }
1475 path->mnt = mnt; 1483 path->mnt = mnt;
1476 path->dentry = dentry; 1484 path->dentry = dentry;
1477 err = follow_managed(path, nd->flags); 1485 err = follow_managed(path, nd->flags);
@@ -1583,10 +1591,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1583 goto out_err; 1591 goto out_err;
1584 1592
1585 inode = path->dentry->d_inode; 1593 inode = path->dentry->d_inode;
1594 err = -ENOENT;
1595 if (d_is_negative(path->dentry))
1596 goto out_path_put;
1586 } 1597 }
1587 err = -ENOENT;
1588 if (d_is_negative(path->dentry))
1589 goto out_path_put;
1590 1598
1591 if (should_follow_link(path->dentry, follow)) { 1599 if (should_follow_link(path->dentry, follow)) {
1592 if (nd->flags & LOOKUP_RCU) { 1600 if (nd->flags & LOOKUP_RCU) {
@@ -3036,14 +3044,13 @@ retry_lookup:
3036 3044
3037 BUG_ON(nd->flags & LOOKUP_RCU); 3045 BUG_ON(nd->flags & LOOKUP_RCU);
3038 inode = path->dentry->d_inode; 3046 inode = path->dentry->d_inode;
3039finish_lookup:
3040 /* we _can_ be in RCU mode here */
3041 error = -ENOENT; 3047 error = -ENOENT;
3042 if (d_is_negative(path->dentry)) { 3048 if (d_is_negative(path->dentry)) {
3043 path_to_nameidata(path, nd); 3049 path_to_nameidata(path, nd);
3044 goto out; 3050 goto out;
3045 } 3051 }
3046 3052finish_lookup:
3053 /* we _can_ be in RCU mode here */
3047 if (should_follow_link(path->dentry, !symlink_ok)) { 3054 if (should_follow_link(path->dentry, !symlink_ok)) {
3048 if (nd->flags & LOOKUP_RCU) { 3055 if (nd->flags & LOOKUP_RCU) {
3049 if (unlikely(nd->path.mnt != path->mnt || 3056 if (unlikely(nd->path.mnt != path->mnt ||
@@ -3226,7 +3233,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3226 3233
3227 if (unlikely(file->f_flags & __O_TMPFILE)) { 3234 if (unlikely(file->f_flags & __O_TMPFILE)) {
3228 error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); 3235 error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
3229 goto out; 3236 goto out2;
3230 } 3237 }
3231 3238
3232 error = path_init(dfd, pathname, flags, nd); 3239 error = path_init(dfd, pathname, flags, nd);
@@ -3256,6 +3263,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3256 } 3263 }
3257out: 3264out:
3258 path_cleanup(nd); 3265 path_cleanup(nd);
3266out2:
3259 if (!(opened & FILE_OPENED)) { 3267 if (!(opened & FILE_OPENED)) {
3260 BUG_ON(!error); 3268 BUG_ON(!error);
3261 put_filp(file); 3269 put_filp(file);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1f4f9dac6e5a..1b9e11167bae 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3179,6 +3179,12 @@ bool fs_fully_visible(struct file_system_type *type)
3179 if (mnt->mnt.mnt_sb->s_type != type) 3179 if (mnt->mnt.mnt_sb->s_type != type)
3180 continue; 3180 continue;
3181 3181
3182 /* This mount is not fully visible if it's root directory
3183 * is not the root directory of the filesystem.
3184 */
3185 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3186 continue;
3187
3182 /* This mount is not fully visible if there are any child mounts 3188 /* This mount is not fully visible if there are any child mounts
3183 * that cover anything except for empty directories. 3189 * that cover anything except for empty directories.
3184 */ 3190 */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647bf195d..cdefaa331a07 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
181} 181}
182 182
183const struct nfsd4_layout_ops bl_layout_ops = { 183const struct nfsd4_layout_ops bl_layout_ops = {
184 /*
185 * Pretend that we send notification to the client. This is a blatant
186 * lie to force recent Linux clients to cache our device IDs.
187 * We rarely ever change the device ID, so the harm of leaking deviceids
188 * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
189 * in this regard, but I filed errata 4119 for this a while ago, and
190 * hopefully the Linux client will eventually start caching deviceids
191 * without this again.
192 */
193 .notify_types =
194 NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
184 .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, 195 .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
185 .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, 196 .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
186 .proc_layoutget = nfsd4_block_proc_layoutget, 197 .proc_layoutget = nfsd4_block_proc_layoutget,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 58277859a467..5694cfb7a47b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
224} 224}
225 225
226static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, 226static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
227 enum nfsstat4 *status) 227 int *status)
228{ 228{
229 __be32 *p; 229 __be32 *p;
230 u32 op; 230 u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
235 op = be32_to_cpup(p++); 235 op = be32_to_cpup(p++);
236 if (unlikely(op != expected)) 236 if (unlikely(op != expected))
237 goto out_unexpected; 237 goto out_unexpected;
238 *status = be32_to_cpup(p); 238 *status = nfs_cb_stat_to_errno(be32_to_cpup(p));
239 return 0; 239 return 0;
240out_overflow: 240out_overflow:
241 print_overflow_msg(__func__, xdr); 241 print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
446static int decode_cb_sequence4res(struct xdr_stream *xdr, 446static int decode_cb_sequence4res(struct xdr_stream *xdr,
447 struct nfsd4_callback *cb) 447 struct nfsd4_callback *cb)
448{ 448{
449 enum nfsstat4 nfserr;
450 int status; 449 int status;
451 450
452 if (cb->cb_minorversion == 0) 451 if (cb->cb_minorversion == 0)
453 return 0; 452 return 0;
454 453
455 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); 454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
456 if (unlikely(status)) 455 if (unlikely(status || cb->cb_status))
457 goto out; 456 return status;
458 if (unlikely(nfserr != NFS4_OK)) 457
459 goto out_default; 458 return decode_cb_sequence4resok(xdr, cb);
460 status = decode_cb_sequence4resok(xdr, cb);
461out:
462 return status;
463out_default:
464 return nfs_cb_stat_to_errno(nfserr);
465} 459}
466 460
467/* 461/*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
524 struct nfsd4_callback *cb) 518 struct nfsd4_callback *cb)
525{ 519{
526 struct nfs4_cb_compound_hdr hdr; 520 struct nfs4_cb_compound_hdr hdr;
527 enum nfsstat4 nfserr;
528 int status; 521 int status;
529 522
530 status = decode_cb_compound4res(xdr, &hdr); 523 status = decode_cb_compound4res(xdr, &hdr);
531 if (unlikely(status)) 524 if (unlikely(status))
532 goto out; 525 return status;
533 526
534 if (cb != NULL) { 527 if (cb != NULL) {
535 status = decode_cb_sequence4res(xdr, cb); 528 status = decode_cb_sequence4res(xdr, cb);
536 if (unlikely(status)) 529 if (unlikely(status || cb->cb_status))
537 goto out; 530 return status;
538 } 531 }
539 532
540 status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); 533 return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
541 if (unlikely(status))
542 goto out;
543 if (unlikely(nfserr != NFS4_OK))
544 status = nfs_cb_stat_to_errno(nfserr);
545out:
546 return status;
547} 534}
548 535
549#ifdef CONFIG_NFSD_PNFS 536#ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
621 struct nfsd4_callback *cb) 608 struct nfsd4_callback *cb)
622{ 609{
623 struct nfs4_cb_compound_hdr hdr; 610 struct nfs4_cb_compound_hdr hdr;
624 enum nfsstat4 nfserr;
625 int status; 611 int status;
626 612
627 status = decode_cb_compound4res(xdr, &hdr); 613 status = decode_cb_compound4res(xdr, &hdr);
628 if (unlikely(status)) 614 if (unlikely(status))
629 goto out; 615 return status;
616
630 if (cb) { 617 if (cb) {
631 status = decode_cb_sequence4res(xdr, cb); 618 status = decode_cb_sequence4res(xdr, cb);
632 if (unlikely(status)) 619 if (unlikely(status || cb->cb_status))
633 goto out; 620 return status;
634 } 621 }
635 status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); 622 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
636 if (unlikely(status))
637 goto out;
638 if (unlikely(nfserr != NFS4_OK))
639 status = nfs_cb_stat_to_errno(nfserr);
640out:
641 return status;
642} 623}
643#endif /* CONFIG_NFSD_PNFS */ 624#endif /* CONFIG_NFSD_PNFS */
644 625
@@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
898 if (!nfsd41_cb_get_slot(clp, task)) 879 if (!nfsd41_cb_get_slot(clp, task))
899 return; 880 return;
900 } 881 }
901 spin_lock(&clp->cl_lock);
902 if (list_empty(&cb->cb_per_client)) {
903 /* This is the first call, not a restart */
904 cb->cb_done = false;
905 list_add(&cb->cb_per_client, &clp->cl_callbacks);
906 }
907 spin_unlock(&clp->cl_lock);
908 rpc_call_start(task); 882 rpc_call_start(task);
909} 883}
910 884
@@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
918 892
919 if (clp->cl_minorversion) { 893 if (clp->cl_minorversion) {
920 /* No need for lock, access serialized in nfsd4_cb_prepare */ 894 /* No need for lock, access serialized in nfsd4_cb_prepare */
921 ++clp->cl_cb_session->se_cb_seq_nr; 895 if (!task->tk_status)
896 ++clp->cl_cb_session->se_cb_seq_nr;
922 clear_bit(0, &clp->cl_cb_slot_busy); 897 clear_bit(0, &clp->cl_cb_slot_busy);
923 rpc_wake_up_next(&clp->cl_cb_waitq); 898 rpc_wake_up_next(&clp->cl_cb_waitq);
924 dprintk("%s: freed slot, new seqid=%d\n", __func__, 899 dprintk("%s: freed slot, new seqid=%d\n", __func__,
925 clp->cl_cb_session->se_cb_seq_nr); 900 clp->cl_cb_session->se_cb_seq_nr);
926 } 901 }
927 902
928 if (clp->cl_cb_client != task->tk_client) { 903 /*
929 /* We're shutting down or changing cl_cb_client; leave 904 * If the backchannel connection was shut down while this
930 * it to nfsd4_process_cb_update to restart the call if 905 * task was queued, we need to resubmit it after setting up
931 * necessary. */ 906 * a new backchannel connection.
907 *
908 * Note that if we lost our callback connection permanently
909 * the submission code will error out, so we don't need to
910 * handle that case here.
911 */
912 if (task->tk_flags & RPC_TASK_KILLED) {
913 task->tk_status = 0;
914 cb->cb_need_restart = true;
932 return; 915 return;
933 } 916 }
934 917
935 if (cb->cb_done) 918 if (cb->cb_status) {
936 return; 919 WARN_ON_ONCE(task->tk_status);
920 task->tk_status = cb->cb_status;
921 }
937 922
938 switch (cb->cb_ops->done(cb, task)) { 923 switch (cb->cb_ops->done(cb, task)) {
939 case 0: 924 case 0:
@@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
949 default: 934 default:
950 BUG(); 935 BUG();
951 } 936 }
952 cb->cb_done = true;
953} 937}
954 938
955static void nfsd4_cb_release(void *calldata) 939static void nfsd4_cb_release(void *calldata)
956{ 940{
957 struct nfsd4_callback *cb = calldata; 941 struct nfsd4_callback *cb = calldata;
958 struct nfs4_client *clp = cb->cb_clp;
959
960 if (cb->cb_done) {
961 spin_lock(&clp->cl_lock);
962 list_del(&cb->cb_per_client);
963 spin_unlock(&clp->cl_lock);
964 942
943 if (cb->cb_need_restart)
944 nfsd4_run_cb(cb);
945 else
965 cb->cb_ops->release(cb); 946 cb->cb_ops->release(cb);
966 } 947
967} 948}
968 949
969static const struct rpc_call_ops nfsd4_cb_ops = { 950static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1058 nfsd4_mark_cb_down(clp, err); 1039 nfsd4_mark_cb_down(clp, err);
1059 return; 1040 return;
1060 } 1041 }
1061 /* Yay, the callback channel's back! Restart any callbacks: */
1062 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
1063 queue_work(callback_wq, &cb->cb_work);
1064} 1042}
1065 1043
1066static void 1044static void
@@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
1071 struct nfs4_client *clp = cb->cb_clp; 1049 struct nfs4_client *clp = cb->cb_clp;
1072 struct rpc_clnt *clnt; 1050 struct rpc_clnt *clnt;
1073 1051
1074 if (cb->cb_ops && cb->cb_ops->prepare) 1052 if (cb->cb_need_restart) {
1075 cb->cb_ops->prepare(cb); 1053 cb->cb_need_restart = false;
1054 } else {
1055 if (cb->cb_ops && cb->cb_ops->prepare)
1056 cb->cb_ops->prepare(cb);
1057 }
1076 1058
1077 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 1059 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
1078 nfsd4_process_cb_update(cb); 1060 nfsd4_process_cb_update(cb);
@@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
1084 cb->cb_ops->release(cb); 1066 cb->cb_ops->release(cb);
1085 return; 1067 return;
1086 } 1068 }
1069
1070 /*
1071 * Don't send probe messages for 4.1 or later.
1072 */
1073 if (!cb->cb_ops && clp->cl_minorversion) {
1074 clp->cl_cb_state = NFSD4_CB_UP;
1075 return;
1076 }
1077
1087 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 1078 cb->cb_msg.rpc_cred = clp->cl_cb_cred;
1088 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 1079 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
1089 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); 1080 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
@@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
1098 cb->cb_msg.rpc_resp = cb; 1089 cb->cb_msg.rpc_resp = cb;
1099 cb->cb_ops = ops; 1090 cb->cb_ops = ops;
1100 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1091 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1101 INIT_LIST_HEAD(&cb->cb_per_client); 1092 cb->cb_status = 0;
1102 cb->cb_done = true; 1093 cb->cb_need_restart = false;
1103} 1094}
1104 1095
1105void nfsd4_run_cb(struct nfsd4_callback *cb) 1096void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 38f2d7abe3a7..039f9c8a95e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab;
94static struct kmem_cache *file_slab; 94static struct kmem_cache *file_slab;
95static struct kmem_cache *stateid_slab; 95static struct kmem_cache *stateid_slab;
96static struct kmem_cache *deleg_slab; 96static struct kmem_cache *deleg_slab;
97static struct kmem_cache *odstate_slab;
97 98
98static void free_session(struct nfsd4_session *); 99static void free_session(struct nfsd4_session *);
99 100
@@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi)
281 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 282 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
282 hlist_del_rcu(&fi->fi_hash); 283 hlist_del_rcu(&fi->fi_hash);
283 spin_unlock(&state_lock); 284 spin_unlock(&state_lock);
285 WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
284 WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); 286 WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
285 call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); 287 call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
286 } 288 }
@@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
471 __nfs4_file_put_access(fp, O_RDONLY); 473 __nfs4_file_put_access(fp, O_RDONLY);
472} 474}
473 475
476/*
477 * Allocate a new open/delegation state counter. This is needed for
478 * pNFS for proper return on close semantics.
479 *
480 * Note that we only allocate it for pNFS-enabled exports, otherwise
481 * all pointers to struct nfs4_clnt_odstate are always NULL.
482 */
483static struct nfs4_clnt_odstate *
484alloc_clnt_odstate(struct nfs4_client *clp)
485{
486 struct nfs4_clnt_odstate *co;
487
488 co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
489 if (co) {
490 co->co_client = clp;
491 atomic_set(&co->co_odcount, 1);
492 }
493 return co;
494}
495
496static void
497hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
498{
499 struct nfs4_file *fp = co->co_file;
500
501 lockdep_assert_held(&fp->fi_lock);
502 list_add(&co->co_perfile, &fp->fi_clnt_odstate);
503}
504
505static inline void
506get_clnt_odstate(struct nfs4_clnt_odstate *co)
507{
508 if (co)
509 atomic_inc(&co->co_odcount);
510}
511
512static void
513put_clnt_odstate(struct nfs4_clnt_odstate *co)
514{
515 struct nfs4_file *fp;
516
517 if (!co)
518 return;
519
520 fp = co->co_file;
521 if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
522 list_del(&co->co_perfile);
523 spin_unlock(&fp->fi_lock);
524
525 nfsd4_return_all_file_layouts(co->co_client, fp);
526 kmem_cache_free(odstate_slab, co);
527 }
528}
529
530static struct nfs4_clnt_odstate *
531find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
532{
533 struct nfs4_clnt_odstate *co;
534 struct nfs4_client *cl;
535
536 if (!new)
537 return NULL;
538
539 cl = new->co_client;
540
541 spin_lock(&fp->fi_lock);
542 list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
543 if (co->co_client == cl) {
544 get_clnt_odstate(co);
545 goto out;
546 }
547 }
548 co = new;
549 co->co_file = fp;
550 hash_clnt_odstate_locked(new);
551out:
552 spin_unlock(&fp->fi_lock);
553 return co;
554}
555
474struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, 556struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
475 struct kmem_cache *slab) 557 struct kmem_cache *slab)
476{ 558{
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
606} 688}
607 689
608static struct nfs4_delegation * 690static struct nfs4_delegation *
609alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) 691alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
692 struct nfs4_clnt_odstate *odstate)
610{ 693{
611 struct nfs4_delegation *dp; 694 struct nfs4_delegation *dp;
612 long n; 695 long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
631 INIT_LIST_HEAD(&dp->dl_perfile); 714 INIT_LIST_HEAD(&dp->dl_perfile);
632 INIT_LIST_HEAD(&dp->dl_perclnt); 715 INIT_LIST_HEAD(&dp->dl_perclnt);
633 INIT_LIST_HEAD(&dp->dl_recall_lru); 716 INIT_LIST_HEAD(&dp->dl_recall_lru);
717 dp->dl_clnt_odstate = odstate;
718 get_clnt_odstate(odstate);
634 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 719 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
635 dp->dl_retries = 1; 720 dp->dl_retries = 1;
636 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, 721 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
714 spin_lock(&state_lock); 799 spin_lock(&state_lock);
715 unhash_delegation_locked(dp); 800 unhash_delegation_locked(dp);
716 spin_unlock(&state_lock); 801 spin_unlock(&state_lock);
802 put_clnt_odstate(dp->dl_clnt_odstate);
717 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 803 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
718 nfs4_put_stid(&dp->dl_stid); 804 nfs4_put_stid(&dp->dl_stid);
719} 805}
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
724 810
725 WARN_ON(!list_empty(&dp->dl_recall_lru)); 811 WARN_ON(!list_empty(&dp->dl_recall_lru));
726 812
813 put_clnt_odstate(dp->dl_clnt_odstate);
727 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 814 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
728 815
729 if (clp->cl_minorversion == 0) 816 if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
933{ 1020{
934 struct nfs4_ol_stateid *stp = openlockstateid(stid); 1021 struct nfs4_ol_stateid *stp = openlockstateid(stid);
935 1022
1023 put_clnt_odstate(stp->st_clnt_odstate);
936 release_all_access(stp); 1024 release_all_access(stp);
937 if (stp->st_stateowner) 1025 if (stp->st_stateowner)
938 nfs4_put_stateowner(stp->st_stateowner); 1026 nfs4_put_stateowner(stp->st_stateowner);
@@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1538 INIT_LIST_HEAD(&clp->cl_openowners); 1626 INIT_LIST_HEAD(&clp->cl_openowners);
1539 INIT_LIST_HEAD(&clp->cl_delegations); 1627 INIT_LIST_HEAD(&clp->cl_delegations);
1540 INIT_LIST_HEAD(&clp->cl_lru); 1628 INIT_LIST_HEAD(&clp->cl_lru);
1541 INIT_LIST_HEAD(&clp->cl_callbacks);
1542 INIT_LIST_HEAD(&clp->cl_revoked); 1629 INIT_LIST_HEAD(&clp->cl_revoked);
1543#ifdef CONFIG_NFSD_PNFS 1630#ifdef CONFIG_NFSD_PNFS
1544 INIT_LIST_HEAD(&clp->cl_lo_states); 1631 INIT_LIST_HEAD(&clp->cl_lo_states);
@@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp)
1634 while (!list_empty(&reaplist)) { 1721 while (!list_empty(&reaplist)) {
1635 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1722 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1636 list_del_init(&dp->dl_recall_lru); 1723 list_del_init(&dp->dl_recall_lru);
1724 put_clnt_odstate(dp->dl_clnt_odstate);
1637 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 1725 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
1638 nfs4_put_stid(&dp->dl_stid); 1726 nfs4_put_stid(&dp->dl_stid);
1639 } 1727 }
@@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3057 spin_lock_init(&fp->fi_lock); 3145 spin_lock_init(&fp->fi_lock);
3058 INIT_LIST_HEAD(&fp->fi_stateids); 3146 INIT_LIST_HEAD(&fp->fi_stateids);
3059 INIT_LIST_HEAD(&fp->fi_delegations); 3147 INIT_LIST_HEAD(&fp->fi_delegations);
3148 INIT_LIST_HEAD(&fp->fi_clnt_odstate);
3060 fh_copy_shallow(&fp->fi_fhandle, fh); 3149 fh_copy_shallow(&fp->fi_fhandle, fh);
3061 fp->fi_deleg_file = NULL; 3150 fp->fi_deleg_file = NULL;
3062 fp->fi_had_conflict = false; 3151 fp->fi_had_conflict = false;
@@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3073void 3162void
3074nfsd4_free_slabs(void) 3163nfsd4_free_slabs(void)
3075{ 3164{
3165 kmem_cache_destroy(odstate_slab);
3076 kmem_cache_destroy(openowner_slab); 3166 kmem_cache_destroy(openowner_slab);
3077 kmem_cache_destroy(lockowner_slab); 3167 kmem_cache_destroy(lockowner_slab);
3078 kmem_cache_destroy(file_slab); 3168 kmem_cache_destroy(file_slab);
@@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void)
3103 sizeof(struct nfs4_delegation), 0, 0, NULL); 3193 sizeof(struct nfs4_delegation), 0, 0, NULL);
3104 if (deleg_slab == NULL) 3194 if (deleg_slab == NULL)
3105 goto out_free_stateid_slab; 3195 goto out_free_stateid_slab;
3196 odstate_slab = kmem_cache_create("nfsd4_odstate",
3197 sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
3198 if (odstate_slab == NULL)
3199 goto out_free_deleg_slab;
3106 return 0; 3200 return 0;
3107 3201
3202out_free_deleg_slab:
3203 kmem_cache_destroy(deleg_slab);
3108out_free_stateid_slab: 3204out_free_stateid_slab:
3109 kmem_cache_destroy(stateid_slab); 3205 kmem_cache_destroy(stateid_slab);
3110out_free_file_slab: 3206out_free_file_slab:
@@ -3581,6 +3677,14 @@ alloc_stateid:
3581 open->op_stp = nfs4_alloc_open_stateid(clp); 3677 open->op_stp = nfs4_alloc_open_stateid(clp);
3582 if (!open->op_stp) 3678 if (!open->op_stp)
3583 return nfserr_jukebox; 3679 return nfserr_jukebox;
3680
3681 if (nfsd4_has_session(cstate) &&
3682 (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
3683 open->op_odstate = alloc_clnt_odstate(clp);
3684 if (!open->op_odstate)
3685 return nfserr_jukebox;
3686 }
3687
3584 return nfs_ok; 3688 return nfs_ok;
3585} 3689}
3586 3690
@@ -3869,7 +3973,7 @@ out_fput:
3869 3973
3870static struct nfs4_delegation * 3974static struct nfs4_delegation *
3871nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, 3975nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3872 struct nfs4_file *fp) 3976 struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
3873{ 3977{
3874 int status; 3978 int status;
3875 struct nfs4_delegation *dp; 3979 struct nfs4_delegation *dp;
@@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3877 if (fp->fi_had_conflict) 3981 if (fp->fi_had_conflict)
3878 return ERR_PTR(-EAGAIN); 3982 return ERR_PTR(-EAGAIN);
3879 3983
3880 dp = alloc_init_deleg(clp, fh); 3984 dp = alloc_init_deleg(clp, fh, odstate);
3881 if (!dp) 3985 if (!dp)
3882 return ERR_PTR(-ENOMEM); 3986 return ERR_PTR(-ENOMEM);
3883 3987
@@ -3903,6 +4007,7 @@ out_unlock:
3903 spin_unlock(&state_lock); 4007 spin_unlock(&state_lock);
3904out: 4008out:
3905 if (status) { 4009 if (status) {
4010 put_clnt_odstate(dp->dl_clnt_odstate);
3906 nfs4_put_stid(&dp->dl_stid); 4011 nfs4_put_stid(&dp->dl_stid);
3907 return ERR_PTR(status); 4012 return ERR_PTR(status);
3908 } 4013 }
@@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
3980 default: 4085 default:
3981 goto out_no_deleg; 4086 goto out_no_deleg;
3982 } 4087 }
3983 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); 4088 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
3984 if (IS_ERR(dp)) 4089 if (IS_ERR(dp))
3985 goto out_no_deleg; 4090 goto out_no_deleg;
3986 4091
@@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
4069 release_open_stateid(stp); 4174 release_open_stateid(stp);
4070 goto out; 4175 goto out;
4071 } 4176 }
4177
4178 stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
4179 open->op_odstate);
4180 if (stp->st_clnt_odstate == open->op_odstate)
4181 open->op_odstate = NULL;
4072 } 4182 }
4073 update_stateid(&stp->st_stid.sc_stateid); 4183 update_stateid(&stp->st_stid.sc_stateid);
4074 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4184 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4129 kmem_cache_free(file_slab, open->op_file); 4239 kmem_cache_free(file_slab, open->op_file);
4130 if (open->op_stp) 4240 if (open->op_stp)
4131 nfs4_put_stid(&open->op_stp->st_stid); 4241 nfs4_put_stid(&open->op_stp->st_stid);
4242 if (open->op_odstate)
4243 kmem_cache_free(odstate_slab, open->op_odstate);
4132} 4244}
4133 4245
4134__be32 4246__be32
@@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
4385 return nfserr_old_stateid; 4497 return nfserr_old_stateid;
4386} 4498}
4387 4499
4500static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
4501{
4502 if (ols->st_stateowner->so_is_open_owner &&
4503 !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
4504 return nfserr_bad_stateid;
4505 return nfs_ok;
4506}
4507
4388static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) 4508static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4389{ 4509{
4390 struct nfs4_stid *s; 4510 struct nfs4_stid *s;
4391 struct nfs4_ol_stateid *ols;
4392 __be32 status = nfserr_bad_stateid; 4511 __be32 status = nfserr_bad_stateid;
4393 4512
4394 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4513 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4418 break; 4537 break;
4419 case NFS4_OPEN_STID: 4538 case NFS4_OPEN_STID:
4420 case NFS4_LOCK_STID: 4539 case NFS4_LOCK_STID:
4421 ols = openlockstateid(s); 4540 status = nfsd4_check_openowner_confirmed(openlockstateid(s));
4422 if (ols->st_stateowner->so_is_open_owner
4423 && !(openowner(ols->st_stateowner)->oo_flags
4424 & NFS4_OO_CONFIRMED))
4425 status = nfserr_bad_stateid;
4426 else
4427 status = nfs_ok;
4428 break; 4541 break;
4429 default: 4542 default:
4430 printk("unknown stateid type %x\n", s->sc_type); 4543 printk("unknown stateid type %x\n", s->sc_type);
@@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
4516 status = nfs4_check_fh(current_fh, stp); 4629 status = nfs4_check_fh(current_fh, stp);
4517 if (status) 4630 if (status)
4518 goto out; 4631 goto out;
4519 if (stp->st_stateowner->so_is_open_owner 4632 status = nfsd4_check_openowner_confirmed(stp);
4520 && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) 4633 if (status)
4521 goto out; 4634 goto out;
4522 status = nfs4_check_openmode(stp, flags); 4635 status = nfs4_check_openmode(stp, flags);
4523 if (status) 4636 if (status)
@@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4852 update_stateid(&stp->st_stid.sc_stateid); 4965 update_stateid(&stp->st_stid.sc_stateid);
4853 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4966 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4854 4967
4855 nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
4856 stp->st_stid.sc_file);
4857
4858 nfsd4_close_open_stateid(stp); 4968 nfsd4_close_open_stateid(stp);
4859 4969
4860 /* put reference from nfs4_preprocess_seqid_op */ 4970 /* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net)
6488 list_for_each_safe(pos, next, &reaplist) { 6598 list_for_each_safe(pos, next, &reaplist) {
6489 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6599 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6490 list_del_init(&dp->dl_recall_lru); 6600 list_del_init(&dp->dl_recall_lru);
6601 put_clnt_odstate(dp->dl_clnt_odstate);
6491 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 6602 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
6492 nfs4_put_stid(&dp->dl_stid); 6603 nfs4_put_stid(&dp->dl_stid);
6493 } 6604 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4f3bfeb11766..dbc4f85a5008 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,12 +63,12 @@ typedef struct {
63 63
64struct nfsd4_callback { 64struct nfsd4_callback {
65 struct nfs4_client *cb_clp; 65 struct nfs4_client *cb_clp;
66 struct list_head cb_per_client;
67 u32 cb_minorversion; 66 u32 cb_minorversion;
68 struct rpc_message cb_msg; 67 struct rpc_message cb_msg;
69 struct nfsd4_callback_ops *cb_ops; 68 struct nfsd4_callback_ops *cb_ops;
70 struct work_struct cb_work; 69 struct work_struct cb_work;
71 bool cb_done; 70 int cb_status;
71 bool cb_need_restart;
72}; 72};
73 73
74struct nfsd4_callback_ops { 74struct nfsd4_callback_ops {
@@ -126,6 +126,7 @@ struct nfs4_delegation {
126 struct list_head dl_perfile; 126 struct list_head dl_perfile;
127 struct list_head dl_perclnt; 127 struct list_head dl_perclnt;
128 struct list_head dl_recall_lru; /* delegation recalled */ 128 struct list_head dl_recall_lru; /* delegation recalled */
129 struct nfs4_clnt_odstate *dl_clnt_odstate;
129 u32 dl_type; 130 u32 dl_type;
130 time_t dl_time; 131 time_t dl_time;
131/* For recall: */ 132/* For recall: */
@@ -332,7 +333,6 @@ struct nfs4_client {
332 int cl_cb_state; 333 int cl_cb_state;
333 struct nfsd4_callback cl_cb_null; 334 struct nfsd4_callback cl_cb_null;
334 struct nfsd4_session *cl_cb_session; 335 struct nfsd4_session *cl_cb_session;
335 struct list_head cl_callbacks; /* list of in-progress callbacks */
336 336
337 /* for all client information that callback code might need: */ 337 /* for all client information that callback code might need: */
338 spinlock_t cl_lock; 338 spinlock_t cl_lock;
@@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
465} 465}
466 466
467/* 467/*
468 * Per-client state indicating no. of opens and outstanding delegations
469 * on a file from a particular client.'od' stands for 'open & delegation'
470 */
471struct nfs4_clnt_odstate {
472 struct nfs4_client *co_client;
473 struct nfs4_file *co_file;
474 struct list_head co_perfile;
475 atomic_t co_odcount;
476};
477
478/*
468 * nfs4_file: a file opened by some number of (open) nfs4_stateowners. 479 * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
469 * 480 *
470 * These objects are global. nfsd keeps one instance of a nfs4_file per 481 * These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +496,7 @@ struct nfs4_file {
485 struct list_head fi_delegations; 496 struct list_head fi_delegations;
486 struct rcu_head fi_rcu; 497 struct rcu_head fi_rcu;
487 }; 498 };
499 struct list_head fi_clnt_odstate;
488 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 500 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
489 struct file * fi_fds[3]; 501 struct file * fi_fds[3];
490 /* 502 /*
@@ -526,6 +538,7 @@ struct nfs4_ol_stateid {
526 struct list_head st_perstateowner; 538 struct list_head st_perstateowner;
527 struct list_head st_locks; 539 struct list_head st_locks;
528 struct nfs4_stateowner * st_stateowner; 540 struct nfs4_stateowner * st_stateowner;
541 struct nfs4_clnt_odstate * st_clnt_odstate;
529 unsigned char st_access_bmap; 542 unsigned char st_access_bmap;
530 unsigned char st_deny_bmap; 543 unsigned char st_deny_bmap;
531 struct nfs4_ol_stateid * st_openstp; 544 struct nfs4_ol_stateid * st_openstp;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f982ae84f0cd..2f8c092be2b3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -247,6 +247,7 @@ struct nfsd4_open {
247 struct nfs4_openowner *op_openowner; /* used during processing */ 247 struct nfs4_openowner *op_openowner; /* used during processing */
248 struct nfs4_file *op_file; /* used during processing */ 248 struct nfs4_file *op_file; /* used during processing */
249 struct nfs4_ol_stateid *op_stp; /* used during processing */ 249 struct nfs4_ol_stateid *op_stp; /* used during processing */
250 struct nfs4_clnt_odstate *op_odstate; /* used during processing */
250 struct nfs4_acl *op_acl; 251 struct nfs4_acl *op_acl;
251 struct xdr_netobj op_label; 252 struct xdr_netobj op_label;
252}; 253};
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 059f37137f9a..919fd5bb14a8 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
388 nchildren = nilfs_btree_node_get_nchildren(node); 388 nchildren = nilfs_btree_node_get_nchildren(node);
389 389
390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391 level > NILFS_BTREE_LEVEL_MAX || 391 level >= NILFS_BTREE_LEVEL_MAX ||
392 nchildren < 0 || 392 nchildren < 0 ||
393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", 394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..fdf4b41d0609 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -757,6 +757,19 @@ lookup:
757 if (tmpres) { 757 if (tmpres) {
758 spin_unlock(&dlm->spinlock); 758 spin_unlock(&dlm->spinlock);
759 spin_lock(&tmpres->spinlock); 759 spin_lock(&tmpres->spinlock);
760
761 /*
762 * Right after dlm spinlock was released, dlm_thread could have
763 * purged the lockres. Check if lockres got unhashed. If so
764 * start over.
765 */
766 if (hlist_unhashed(&tmpres->hash_node)) {
767 spin_unlock(&tmpres->spinlock);
768 dlm_lockres_put(tmpres);
769 tmpres = NULL;
770 goto lookup;
771 }
772
760 /* Wait on the thread that is mastering the resource */ 773 /* Wait on the thread that is mastering the resource */
761 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 774 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
762 __dlm_wait_on_lockres(tmpres); 775 __dlm_wait_on_lockres(tmpres);
diff --git a/fs/splice.c b/fs/splice.c
index 476024bb6546..bfe62ae40f40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1161 long ret, bytes; 1161 long ret, bytes;
1162 umode_t i_mode; 1162 umode_t i_mode;
1163 size_t len; 1163 size_t len;
1164 int i, flags; 1164 int i, flags, more;
1165 1165
1166 /* 1166 /*
1167 * We require the input being a regular file, as we don't want to 1167 * We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1204 * Don't block on output, we have to drain the direct pipe. 1204 * Don't block on output, we have to drain the direct pipe.
1205 */ 1205 */
1206 sd->flags &= ~SPLICE_F_NONBLOCK; 1206 sd->flags &= ~SPLICE_F_NONBLOCK;
1207 more = sd->flags & SPLICE_F_MORE;
1207 1208
1208 while (len) { 1209 while (len) {
1209 size_t read_len; 1210 size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1217 sd->total_len = read_len; 1218 sd->total_len = read_len;
1218 1219
1219 /* 1220 /*
1221 * If more data is pending, set SPLICE_F_MORE
1222 * If this is the last data and SPLICE_F_MORE was not set
1223 * initially, clears it.
1224 */
1225 if (read_len < len)
1226 sd->flags |= SPLICE_F_MORE;
1227 else if (!more)
1228 sd->flags &= ~SPLICE_F_MORE;
1229 /*
1220 * NOTE: nonblocking mode only applies to the input. We 1230 * NOTE: nonblocking mode only applies to the input. We
1221 * must not do the output in nonblocking mode as then we 1231 * must not do the output in nonblocking mode as then we
1222 * could get stuck data in the internal pipe: 1232 * could get stuck data in the internal pipe: