aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/extent-tree.c121
-rw-r--r--fs/btrfs/extent_io.c73
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/volumes.c15
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext4/Kconfig9
-rw-r--r--fs/ext4/crypto_fname.c280
-rw-r--r--fs/ext4/crypto_key.c1
-rw-r--r--fs/ext4/crypto_policy.c14
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/ext4_crypto.h11
-rw-r--r--fs/ext4/ext4_jbd2.c6
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/extents_status.c8
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c72
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/f2fs/data.c7
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/jbd2/recovery.c10
-rw-r--r--fs/jbd2/revoke.c18
-rw-r--r--fs/jbd2/transaction.c25
-rw-r--r--fs/kernfs/dir.c9
-rw-r--r--fs/namei.c22
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfsd/blocklayout.c11
-rw-r--r--fs/nfsd/nfs4callback.c119
-rw-r--r--fs/nfsd/nfs4state.c147
-rw-r--r--fs/nfsd/state.h19
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/nilfs2/btree.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
-rw-r--r--fs/splice.c12
45 files changed, 734 insertions, 441 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cde698a07d21..a2ae42720a6a 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1802 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1802 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1803 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1803 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1804 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1804 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1805 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1806
1805 inode->i_version = btrfs_stack_inode_sequence(inode_item); 1807 inode->i_version = btrfs_stack_inode_sequence(inode_item);
1806 inode->i_rdev = 0; 1808 inode->i_rdev = 0;
1807 *rdev = btrfs_stack_inode_rdev(inode_item); 1809 *rdev = btrfs_stack_inode_rdev(inode_item);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1eef4ee01d1a..7effed6f2fa6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3178,10 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3180 btrfs_mark_buffer_dirty(leaf); 3180 btrfs_mark_buffer_dirty(leaf);
3181 btrfs_release_path(path);
3182fail: 3181fail:
3183 if (ret) 3182 btrfs_release_path(path);
3184 btrfs_abort_transaction(trans, root, ret);
3185 return ret; 3183 return ret;
3186 3184
3187} 3185}
@@ -3305,8 +3303,7 @@ again:
3305 3303
3306 spin_lock(&block_group->lock); 3304 spin_lock(&block_group->lock);
3307 if (block_group->cached != BTRFS_CACHE_FINISHED || 3305 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3308 !btrfs_test_opt(root, SPACE_CACHE) || 3306 !btrfs_test_opt(root, SPACE_CACHE)) {
3309 block_group->delalloc_bytes) {
3310 /* 3307 /*
3311 * don't bother trying to write stuff out _if_ 3308 * don't bother trying to write stuff out _if_
3312 * a) we're not cached, 3309 * a) we're not cached,
@@ -3408,17 +3405,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3408 int loops = 0; 3405 int loops = 0;
3409 3406
3410 spin_lock(&cur_trans->dirty_bgs_lock); 3407 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) { 3408 if (list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3409 spin_unlock(&cur_trans->dirty_bgs_lock);
3410 return 0;
3413 } 3411 }
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3414 spin_unlock(&cur_trans->dirty_bgs_lock); 3413 spin_unlock(&cur_trans->dirty_bgs_lock);
3415 3414
3416again: 3415again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3419 return 0;
3420 }
3421
3422 /* 3416 /*
3423 * make sure all the block groups on our dirty list actually 3417 * make sure all the block groups on our dirty list actually
3424 * exist 3418 * exist
@@ -3431,18 +3425,16 @@ again:
3431 return -ENOMEM; 3425 return -ENOMEM;
3432 } 3426 }
3433 3427
3428 /*
3429 * cache_write_mutex is here only to save us from balance or automatic
3430 * removal of empty block groups deleting this block group while we are
3431 * writing out the cache
3432 */
3433 mutex_lock(&trans->transaction->cache_write_mutex);
3434 while (!list_empty(&dirty)) { 3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty, 3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache, 3436 struct btrfs_block_group_cache,
3437 dirty_list); 3437 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /* 3438 /*
3447 * this can happen if something re-dirties a block 3439 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to 3440 * group that is already under IO. Just wait for it to
@@ -3493,9 +3485,30 @@ again:
3493 ret = 0; 3485 ret = 0;
3494 } 3486 }
3495 } 3487 }
3496 if (!ret) 3488 if (!ret) {
3497 ret = write_one_cache_group(trans, root, path, cache); 3489 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex); 3490 /*
3491 * Our block group might still be attached to the list
3492 * of new block groups in the transaction handle of some
3493 * other task (struct btrfs_trans_handle->new_bgs). This
3494 * means its block group item isn't yet in the extent
3495 * tree. If this happens ignore the error, as we will
3496 * try again later in the critical section of the
3497 * transaction commit.
3498 */
3499 if (ret == -ENOENT) {
3500 ret = 0;
3501 spin_lock(&cur_trans->dirty_bgs_lock);
3502 if (list_empty(&cache->dirty_list)) {
3503 list_add_tail(&cache->dirty_list,
3504 &cur_trans->dirty_bgs);
3505 btrfs_get_block_group(cache);
3506 }
3507 spin_unlock(&cur_trans->dirty_bgs_lock);
3508 } else if (ret) {
3509 btrfs_abort_transaction(trans, root, ret);
3510 }
3511 }
3499 3512
3500 /* if its not on the io list, we need to put the block group */ 3513 /* if its not on the io list, we need to put the block group */
3501 if (should_put) 3514 if (should_put)
@@ -3503,7 +3516,16 @@ again:
3503 3516
3504 if (ret) 3517 if (ret)
3505 break; 3518 break;
3519
3520 /*
3521 * Avoid blocking other tasks for too long. It might even save
3522 * us from writing caches for block groups that are going to be
3523 * removed.
3524 */
3525 mutex_unlock(&trans->transaction->cache_write_mutex);
3526 mutex_lock(&trans->transaction->cache_write_mutex);
3506 } 3527 }
3528 mutex_unlock(&trans->transaction->cache_write_mutex);
3507 3529
3508 /* 3530 /*
3509 * go through delayed refs for all the stuff we've just kicked off 3531 * go through delayed refs for all the stuff we've just kicked off
@@ -3514,8 +3536,15 @@ again:
3514 loops++; 3536 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock); 3537 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3538 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3539 /*
3540 * dirty_bgs_lock protects us from concurrent block group
3541 * deletes too (not just cache_write_mutex).
3542 */
3543 if (!list_empty(&dirty)) {
3544 spin_unlock(&cur_trans->dirty_bgs_lock);
3545 goto again;
3546 }
3517 spin_unlock(&cur_trans->dirty_bgs_lock); 3547 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 } 3548 }
3520 3549
3521 btrfs_free_path(path); 3550 btrfs_free_path(path);
@@ -3588,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3588 ret = 0; 3617 ret = 0;
3589 } 3618 }
3590 } 3619 }
3591 if (!ret) 3620 if (!ret) {
3592 ret = write_one_cache_group(trans, root, path, cache); 3621 ret = write_one_cache_group(trans, root, path, cache);
3622 if (ret)
3623 btrfs_abort_transaction(trans, root, ret);
3624 }
3593 3625
3594 /* if its not on the io list, we need to put the block group */ 3626 /* if its not on the io list, we need to put the block group */
3595 if (should_put) 3627 if (should_put)
@@ -7537,7 +7569,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7537 * returns the key for the extent through ins, and a tree buffer for 7569 * returns the key for the extent through ins, and a tree buffer for
7538 * the first block of the extent through buf. 7570 * the first block of the extent through buf.
7539 * 7571 *
7540 * returns the tree buffer or NULL. 7572 * returns the tree buffer or an ERR_PTR on error.
7541 */ 7573 */
7542struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, 7574struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7543 struct btrfs_root *root, 7575 struct btrfs_root *root,
@@ -7548,6 +7580,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7548 struct btrfs_key ins; 7580 struct btrfs_key ins;
7549 struct btrfs_block_rsv *block_rsv; 7581 struct btrfs_block_rsv *block_rsv;
7550 struct extent_buffer *buf; 7582 struct extent_buffer *buf;
7583 struct btrfs_delayed_extent_op *extent_op;
7551 u64 flags = 0; 7584 u64 flags = 0;
7552 int ret; 7585 int ret;
7553 u32 blocksize = root->nodesize; 7586 u32 blocksize = root->nodesize;
@@ -7568,13 +7601,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7568 7601
7569 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7602 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7570 empty_size, hint, &ins, 0, 0); 7603 empty_size, hint, &ins, 0, 0);
7571 if (ret) { 7604 if (ret)
7572 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7605 goto out_unuse;
7573 return ERR_PTR(ret);
7574 }
7575 7606
7576 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); 7607 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
7577 BUG_ON(IS_ERR(buf)); /* -ENOMEM */ 7608 if (IS_ERR(buf)) {
7609 ret = PTR_ERR(buf);
7610 goto out_free_reserved;
7611 }
7578 7612
7579 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 7613 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7580 if (parent == 0) 7614 if (parent == 0)
@@ -7584,9 +7618,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7584 BUG_ON(parent > 0); 7618 BUG_ON(parent > 0);
7585 7619
7586 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 7620 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
7587 struct btrfs_delayed_extent_op *extent_op;
7588 extent_op = btrfs_alloc_delayed_extent_op(); 7621 extent_op = btrfs_alloc_delayed_extent_op();
7589 BUG_ON(!extent_op); /* -ENOMEM */ 7622 if (!extent_op) {
7623 ret = -ENOMEM;
7624 goto out_free_buf;
7625 }
7590 if (key) 7626 if (key)
7591 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 7627 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7592 else 7628 else
@@ -7601,13 +7637,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7601 extent_op->level = level; 7637 extent_op->level = level;
7602 7638
7603 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 7639 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
7604 ins.objectid, 7640 ins.objectid, ins.offset,
7605 ins.offset, parent, root_objectid, 7641 parent, root_objectid, level,
7606 level, BTRFS_ADD_DELAYED_EXTENT, 7642 BTRFS_ADD_DELAYED_EXTENT,
7607 extent_op, 0); 7643 extent_op, 0);
7608 BUG_ON(ret); /* -ENOMEM */ 7644 if (ret)
7645 goto out_free_delayed;
7609 } 7646 }
7610 return buf; 7647 return buf;
7648
7649out_free_delayed:
7650 btrfs_free_delayed_extent_op(extent_op);
7651out_free_buf:
7652 free_extent_buffer(buf);
7653out_free_reserved:
7654 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
7655out_unuse:
7656 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7657 return ERR_PTR(ret);
7611} 7658}
7612 7659
7613struct walk_control { 7660struct walk_control {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 782f3bc4651d..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4560 do { 4560 do {
4561 index--; 4561 index--;
4562 page = eb->pages[index]; 4562 page = eb->pages[index];
4563 if (page && mapped) { 4563 if (!page)
4564 continue;
4565 if (mapped)
4564 spin_lock(&page->mapping->private_lock); 4566 spin_lock(&page->mapping->private_lock);
4567 /*
4568 * We do this since we'll remove the pages after we've
4569 * removed the eb from the radix tree, so we could race
4570 * and have this page now attached to the new eb. So
4571 * only clear page_private if it's still connected to
4572 * this eb.
4573 */
4574 if (PagePrivate(page) &&
4575 page->private == (unsigned long)eb) {
4576 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4577 BUG_ON(PageDirty(page));
4578 BUG_ON(PageWriteback(page));
4565 /* 4579 /*
4566 * We do this since we'll remove the pages after we've 4580 * We need to make sure we haven't be attached
4567 * removed the eb from the radix tree, so we could race 4581 * to a new eb.
4568 * and have this page now attached to the new eb. So
4569 * only clear page_private if it's still connected to
4570 * this eb.
4571 */ 4582 */
4572 if (PagePrivate(page) && 4583 ClearPagePrivate(page);
4573 page->private == (unsigned long)eb) { 4584 set_page_private(page, 0);
4574 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 4585 /* One for the page private */
4575 BUG_ON(PageDirty(page));
4576 BUG_ON(PageWriteback(page));
4577 /*
4578 * We need to make sure we haven't be attached
4579 * to a new eb.
4580 */
4581 ClearPagePrivate(page);
4582 set_page_private(page, 0);
4583 /* One for the page private */
4584 page_cache_release(page);
4585 }
4586 spin_unlock(&page->mapping->private_lock);
4587
4588 }
4589 if (page) {
4590 /* One for when we alloced the page */
4591 page_cache_release(page); 4586 page_cache_release(page);
4592 } 4587 }
4588
4589 if (mapped)
4590 spin_unlock(&page->mapping->private_lock);
4591
4592 /* One for when we alloced the page */
4593 page_cache_release(page);
4593 } while (index != 0); 4594 } while (index != 0);
4594} 4595}
4595 4596
@@ -4771,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4771 start >> PAGE_CACHE_SHIFT); 4772 start >> PAGE_CACHE_SHIFT);
4772 if (eb && atomic_inc_not_zero(&eb->refs)) { 4773 if (eb && atomic_inc_not_zero(&eb->refs)) {
4773 rcu_read_unlock(); 4774 rcu_read_unlock();
4775 /*
4776 * Lock our eb's refs_lock to avoid races with
4777 * free_extent_buffer. When we get our eb it might be flagged
4778 * with EXTENT_BUFFER_STALE and another task running
4779 * free_extent_buffer might have seen that flag set,
4780 * eb->refs == 2, that the buffer isn't under IO (dirty and
4781 * writeback flags not set) and it's still in the tree (flag
4782 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
4783 * of decrementing the extent buffer's reference count twice.
4784 * So here we could race and increment the eb's reference count,
4785 * clear its stale flag, mark it as dirty and drop our reference
4786 * before the other task finishes executing free_extent_buffer,
4787 * which would later result in an attempt to free an extent
4788 * buffer that is dirty.
4789 */
4790 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4791 spin_lock(&eb->refs_lock);
4792 spin_unlock(&eb->refs_lock);
4793 }
4774 mark_extent_buffer_accessed(eb, NULL); 4794 mark_extent_buffer_accessed(eb, NULL);
4775 return eb; 4795 return eb;
4776 } 4796 }
@@ -4870,6 +4890,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4870 mark_extent_buffer_accessed(exists, p); 4890 mark_extent_buffer_accessed(exists, p);
4871 goto free_eb; 4891 goto free_eb;
4872 } 4892 }
4893 exists = NULL;
4873 4894
4874 /* 4895 /*
4875 * Do this so attach doesn't complain and we need to 4896 * Do this so attach doesn't complain and we need to
@@ -4933,12 +4954,12 @@ again:
4933 return eb; 4954 return eb;
4934 4955
4935free_eb: 4956free_eb:
4957 WARN_ON(!atomic_dec_and_test(&eb->refs));
4936 for (i = 0; i < num_pages; i++) { 4958 for (i = 0; i < num_pages; i++) {
4937 if (eb->pages[i]) 4959 if (eb->pages[i])
4938 unlock_page(eb->pages[i]); 4960 unlock_page(eb->pages[i]);
4939 } 4961 }
4940 4962
4941 WARN_ON(!atomic_dec_and_test(&eb->refs));
4942 btrfs_release_extent_buffer(eb); 4963 btrfs_release_extent_buffer(eb);
4943 return exists; 4964 return exists;
4944} 4965}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81fa75a8e1f3..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & 88 mapping_gfp_mask(inode->i_mapping) &
89 ~(GFP_NOFS & ~__GFP_HIGHMEM)); 89 ~(__GFP_FS | __GFP_HIGHMEM));
90 90
91 return inode; 91 return inode;
92} 92}
@@ -1218,7 +1218,7 @@ out:
1218 * 1218 *
1219 * This function writes out a free space cache struct to disk for quick recovery 1219 * This function writes out a free space cache struct to disk for quick recovery
1220 * on mount. This will return 0 if it was successfull in writing the cache out, 1220 * on mount. This will return 0 if it was successfull in writing the cache out,
1221 * and -1 if it was not. 1221 * or an errno if it was not.
1222 */ 1222 */
1223static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1223static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1224 struct btrfs_free_space_ctl *ctl, 1224 struct btrfs_free_space_ctl *ctl,
@@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1235 int must_iput = 0; 1235 int must_iput = 0;
1236 1236
1237 if (!i_size_read(inode)) 1237 if (!i_size_read(inode))
1238 return -1; 1238 return -EIO;
1239 1239
1240 WARN_ON(io_ctl->pages); 1240 WARN_ON(io_ctl->pages);
1241 ret = io_ctl_init(io_ctl, inode, root, 1); 1241 ret = io_ctl_init(io_ctl, inode, root, 1);
1242 if (ret) 1242 if (ret)
1243 return -1; 1243 return ret;
1244 1244
1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1246 down_write(&block_group->data_rwsem); 1246 down_write(&block_group->data_rwsem);
@@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1258 } 1258 }
1259 1259
1260 /* Lock all pages first so we can lock the extent safely. */ 1260 /* Lock all pages first so we can lock the extent safely. */
1261 io_ctl_prepare_pages(io_ctl, inode, 0); 1261 ret = io_ctl_prepare_pages(io_ctl, inode, 0);
1262 if (ret)
1263 goto out;
1262 1264
1263 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1265 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1264 0, &cached_state); 1266 0, &cached_state);
@@ -3464,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3464 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3465 int ret; 3467 int ret;
3466 struct btrfs_io_ctl io_ctl; 3468 struct btrfs_io_ctl io_ctl;
3469 bool release_metadata = true;
3467 3470
3468 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3471 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3469 return 0; 3472 return 0;
@@ -3471,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3471 memset(&io_ctl, 0, sizeof(io_ctl)); 3474 memset(&io_ctl, 0, sizeof(io_ctl));
3472 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3475 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3473 trans, path, 0); 3476 trans, path, 0);
3474 if (!ret) 3477 if (!ret) {
3478 /*
3479 * At this point writepages() didn't error out, so our metadata
3480 * reservation is released when the writeback finishes, at
3481 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3482 * with or without an error.
3483 */
3484 release_metadata = false;
3475 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3485 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3486 }
3476 3487
3477 if (ret) { 3488 if (ret) {
3478 btrfs_delalloc_release_metadata(inode, inode->i_size); 3489 if (release_metadata)
3490 btrfs_delalloc_release_metadata(inode, inode->i_size);
3479#ifdef DEBUG 3491#ifdef DEBUG
3480 btrfs_err(root->fs_info, 3492 btrfs_err(root->fs_info,
3481 "failed to write free ino cache for root %llu", 3493 "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada4d24ed11b..8bb013672aee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3632,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode)
3632 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3632 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3633 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); 3633 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3634 3634
3635 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3636 inode->i_generation = BTRFS_I(inode)->generation;
3637 inode->i_rdev = 0;
3638 rdev = btrfs_inode_rdev(leaf, inode_item);
3639
3640 BTRFS_I(inode)->index_cnt = (u64)-1;
3641 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3642
3643cache_index:
3635 /* 3644 /*
3636 * If we were modified in the current generation and evicted from memory 3645 * If we were modified in the current generation and evicted from memory
3637 * and then re-read we need to do a full sync since we don't have any 3646 * and then re-read we need to do a full sync since we don't have any
3638 * idea about which extents were modified before we were evicted from 3647 * idea about which extents were modified before we were evicted from
3639 * cache. 3648 * cache.
3649 *
3650 * This is required for both inode re-read from disk and delayed inode
3651 * in delayed_nodes_tree.
3640 */ 3652 */
3641 if (BTRFS_I(inode)->last_trans == root->fs_info->generation) 3653 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
3642 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3654 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3643 &BTRFS_I(inode)->runtime_flags); 3655 &BTRFS_I(inode)->runtime_flags);
3644 3656
3645 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3646 inode->i_generation = BTRFS_I(inode)->generation;
3647 inode->i_rdev = 0;
3648 rdev = btrfs_inode_rdev(leaf, inode_item);
3649
3650 BTRFS_I(inode)->index_cnt = (u64)-1;
3651 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3652
3653cache_index:
3654 path->slots[0]++; 3657 path->slots[0]++;
3655 if (inode->i_nlink != 1 || 3658 if (inode->i_nlink != 1 ||
3656 path->slots[0] >= btrfs_header_nritems(leaf)) 3659 path->slots[0] >= btrfs_header_nritems(leaf))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b05653f182c2..1c22c6518504 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2410 "Attempt to delete subvolume %llu during send", 2410 "Attempt to delete subvolume %llu during send",
2411 dest->root_key.objectid); 2411 dest->root_key.objectid);
2412 err = -EPERM; 2412 err = -EPERM;
2413 goto out_dput; 2413 goto out_unlock_inode;
2414 } 2414 }
2415 2415
2416 d_invalidate(dentry); 2416 d_invalidate(dentry);
@@ -2505,6 +2505,7 @@ out_up_write:
2505 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); 2505 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
2506 spin_unlock(&dest->root_item_lock); 2506 spin_unlock(&dest->root_item_lock);
2507 } 2507 }
2508out_unlock_inode:
2508 mutex_unlock(&inode->i_mutex); 2509 mutex_unlock(&inode->i_mutex);
2509 if (!err) { 2510 if (!err) {
2510 shrink_dcache_sb(root->fs_info->sb); 2511 shrink_dcache_sb(root->fs_info->sb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
723{ 723{
724 int ret = 0; 724 int ret = 0;
725 int ret_wb = 0;
725 u64 end; 726 u64 end;
726 u64 orig_end; 727 u64 orig_end;
727 struct btrfs_ordered_extent *ordered; 728 struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
741 if (ret) 742 if (ret)
742 return ret; 743 return ret;
743 744
744 ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 745 /*
745 if (ret) 746 * If we have a writeback error don't return immediately. Wait first
746 return ret; 747 * for any ordered extents that haven't completed yet. This is to make
748 * sure no one can dirty the same page ranges and call writepages()
749 * before the ordered extents complete - to avoid failures (-EEXIST)
750 * when adding the new ordered extents to the ordered tree.
751 */
752 ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
747 753
748 end = orig_end; 754 end = orig_end;
749 while (1) { 755 while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
767 break; 773 break;
768 end--; 774 end--;
769 } 775 }
770 return ret; 776 return ret_wb ? ret_wb : ret;
771} 777}
772 778
773/* 779/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bcd2a007517..96aebf3bcd5b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
1058 struct extent_map *em; 1058 struct extent_map *em;
1059 struct list_head *search_list = &trans->transaction->pending_chunks; 1059 struct list_head *search_list = &trans->transaction->pending_chunks;
1060 int ret = 0; 1060 int ret = 0;
1061 u64 physical_start = *start;
1061 1062
1062again: 1063again:
1063 list_for_each_entry(em, search_list, list) { 1064 list_for_each_entry(em, search_list, list) {
@@ -1068,9 +1069,9 @@ again:
1068 for (i = 0; i < map->num_stripes; i++) { 1069 for (i = 0; i < map->num_stripes; i++) {
1069 if (map->stripes[i].dev != device) 1070 if (map->stripes[i].dev != device)
1070 continue; 1071 continue;
1071 if (map->stripes[i].physical >= *start + len || 1072 if (map->stripes[i].physical >= physical_start + len ||
1072 map->stripes[i].physical + em->orig_block_len <= 1073 map->stripes[i].physical + em->orig_block_len <=
1073 *start) 1074 physical_start)
1074 continue; 1075 continue;
1075 *start = map->stripes[i].physical + 1076 *start = map->stripes[i].physical +
1076 em->orig_block_len; 1077 em->orig_block_len;
@@ -1193,8 +1194,14 @@ again:
1193 */ 1194 */
1194 if (contains_pending_extent(trans, device, 1195 if (contains_pending_extent(trans, device,
1195 &search_start, 1196 &search_start,
1196 hole_size)) 1197 hole_size)) {
1197 hole_size = 0; 1198 if (key.offset >= search_start) {
1199 hole_size = key.offset - search_start;
1200 } else {
1201 WARN_ON_ONCE(1);
1202 hole_size = 0;
1203 }
1204 }
1198 1205
1199 if (hole_size > max_hole_size) { 1206 if (hole_size > max_hole_size) {
1200 max_hole_start = search_start; 1207 max_hole_start = search_start;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index da94e41bdbf6..537356742091 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -173,5 +173,5 @@ MODULE_LICENSE("GPL");
173MODULE_VERSION("0.0.2"); 173MODULE_VERSION("0.0.2");
174MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); 174MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
175 175
176module_init(configfs_init); 176core_initcall(configfs_init);
177module_exit(configfs_exit); 177module_exit(configfs_exit);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 59fedbcf8798..86a2121828c3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
121 int len, i; 121 int len, i;
122 int err = -ENOMEM; 122 int err = -ENOMEM;
123 123
124 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 124 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
125 if (!entry) 125 if (!entry)
126 return err; 126 return err;
127 127
diff --git a/fs/exec.c b/fs/exec.c
index 49a1c61433b7..1977c2a553ac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
659 if (stack_base > STACK_SIZE_MAX) 659 if (stack_base > STACK_SIZE_MAX)
660 stack_base = STACK_SIZE_MAX; 660 stack_base = STACK_SIZE_MAX;
661 661
662 /* Add space for stack randomization. */
663 stack_base += (STACK_RND_MASK << PAGE_SHIFT);
664
662 /* Make sure we didn't let the argument array grow too large. */ 665 /* Make sure we didn't let the argument array grow too large. */
663 if (vma->vm_end - vma->vm_start > stack_base) 666 if (vma->vm_end - vma->vm_start > stack_base)
664 return -ENOMEM; 667 return -ENOMEM;
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 18228c201f7f..024f2284d3f6 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,8 +64,8 @@ config EXT4_FS_SECURITY
64 If you are not using a security module that requires using 64 If you are not using a security module that requires using
65 extended attributes for file security labels, say N. 65 extended attributes for file security labels, say N.
66 66
67config EXT4_FS_ENCRYPTION 67config EXT4_ENCRYPTION
68 bool "Ext4 Encryption" 68 tristate "Ext4 Encryption"
69 depends on EXT4_FS 69 depends on EXT4_FS
70 select CRYPTO_AES 70 select CRYPTO_AES
71 select CRYPTO_CBC 71 select CRYPTO_CBC
@@ -81,6 +81,11 @@ config EXT4_FS_ENCRYPTION
81 efficient since it avoids caching the encrypted and 81 efficient since it avoids caching the encrypted and
82 decrypted pages in the page cache. 82 decrypted pages in the page cache.
83 83
84config EXT4_FS_ENCRYPTION
85 bool
86 default y
87 depends on EXT4_ENCRYPTION
88
84config EXT4_DEBUG 89config EXT4_DEBUG
85 bool "EXT4 debugging support" 90 bool "EXT4 debugging support"
86 depends on EXT4_FS 91 depends on EXT4_FS
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index ca2f5948c1ac..fded02f72299 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -66,6 +66,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
66 int res = 0; 66 int res = 0;
67 char iv[EXT4_CRYPTO_BLOCK_SIZE]; 67 char iv[EXT4_CRYPTO_BLOCK_SIZE];
68 struct scatterlist sg[1]; 68 struct scatterlist sg[1];
69 int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
69 char *workbuf; 70 char *workbuf;
70 71
71 if (iname->len <= 0 || iname->len > ctx->lim) 72 if (iname->len <= 0 || iname->len > ctx->lim)
@@ -73,6 +74,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
73 74
74 ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ? 75 ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
75 EXT4_CRYPTO_BLOCK_SIZE : iname->len; 76 EXT4_CRYPTO_BLOCK_SIZE : iname->len;
77 ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
76 ciphertext_len = (ciphertext_len > ctx->lim) 78 ciphertext_len = (ciphertext_len > ctx->lim)
77 ? ctx->lim : ciphertext_len; 79 ? ctx->lim : ciphertext_len;
78 80
@@ -101,7 +103,7 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
101 /* Create encryption request */ 103 /* Create encryption request */
102 sg_init_table(sg, 1); 104 sg_init_table(sg, 1);
103 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); 105 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
104 ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv); 106 ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv);
105 res = crypto_ablkcipher_encrypt(req); 107 res = crypto_ablkcipher_encrypt(req);
106 if (res == -EINPROGRESS || res == -EBUSY) { 108 if (res == -EINPROGRESS || res == -EBUSY) {
107 BUG_ON(req->base.data != &ecr); 109 BUG_ON(req->base.data != &ecr);
@@ -198,106 +200,57 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
198 return oname->len; 200 return oname->len;
199} 201}
200 202
203static const char *lookup_table =
204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
205
201/** 206/**
202 * ext4_fname_encode_digest() - 207 * ext4_fname_encode_digest() -
203 * 208 *
204 * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. 209 * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
205 * The encoded string is roughly 4/3 times the size of the input string. 210 * The encoded string is roughly 4/3 times the size of the input string.
206 */ 211 */
207int ext4_fname_encode_digest(char *dst, char *src, u32 len) 212static int digest_encode(const char *src, int len, char *dst)
208{ 213{
209 static const char *lookup_table = 214 int i = 0, bits = 0, ac = 0;
210 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+"; 215 char *cp = dst;
211 u32 current_chunk, num_chunks, i; 216
212 char tmp_buf[3]; 217 while (i < len) {
213 u32 c0, c1, c2, c3; 218 ac += (((unsigned char) src[i]) << bits);
214 219 bits += 8;
215 current_chunk = 0; 220 do {
216 num_chunks = len/3; 221 *cp++ = lookup_table[ac & 0x3f];
217 for (i = 0; i < num_chunks; i++) { 222 ac >>= 6;
218 c0 = src[3*i] & 0x3f; 223 bits -= 6;
219 c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f; 224 } while (bits >= 6);
220 c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
221 c3 = (src[3*i+2]>>2) & 0x3f;
222 dst[4*i] = lookup_table[c0];
223 dst[4*i+1] = lookup_table[c1];
224 dst[4*i+2] = lookup_table[c2];
225 dst[4*i+3] = lookup_table[c3];
226 }
227 if (i*3 < len) {
228 memset(tmp_buf, 0, 3);
229 memcpy(tmp_buf, &src[3*i], len-3*i);
230 c0 = tmp_buf[0] & 0x3f;
231 c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
232 c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
233 c3 = (tmp_buf[2]>>2) & 0x3f;
234 dst[4*i] = lookup_table[c0];
235 dst[4*i+1] = lookup_table[c1];
236 dst[4*i+2] = lookup_table[c2];
237 dst[4*i+3] = lookup_table[c3];
238 i++; 225 i++;
239 } 226 }
240 return (i * 4); 227 if (bits)
228 *cp++ = lookup_table[ac & 0x3f];
229 return cp - dst;
241} 230}
242 231
243/** 232static int digest_decode(const char *src, int len, char *dst)
244 * ext4_fname_hash() -
245 *
246 * This function computes the hash of the input filename, and sets the output
247 * buffer to the *encoded* digest. It returns the length of the digest as its
248 * return value. Errors are returned as negative numbers. We trust the caller
249 * to allocate sufficient memory to oname string.
250 */
251static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
252 const struct ext4_str *iname,
253 struct ext4_str *oname)
254{ 233{
255 struct scatterlist sg; 234 int i = 0, bits = 0, ac = 0;
256 struct hash_desc desc = { 235 const char *p;
257 .tfm = (struct crypto_hash *)ctx->htfm, 236 char *cp = dst;
258 .flags = CRYPTO_TFM_REQ_MAY_SLEEP 237
259 }; 238 while (i < len) {
260 int res = 0; 239 p = strchr(lookup_table, src[i]);
261 240 if (p == NULL || src[i] == 0)
262 if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { 241 return -2;
263 res = ext4_fname_encode_digest(oname->name, iname->name, 242 ac += (p - lookup_table) << bits;
264 iname->len); 243 bits += 6;
265 oname->len = res; 244 if (bits >= 8) {
266 return res; 245 *cp++ = ac & 0xff;
267 } 246 ac >>= 8;
268 247 bits -= 8;
269 sg_init_one(&sg, iname->name, iname->len); 248 }
270 res = crypto_hash_init(&desc); 249 i++;
271 if (res) {
272 printk(KERN_ERR
273 "%s: Error initializing crypto hash; res = [%d]\n",
274 __func__, res);
275 goto out;
276 }
277 res = crypto_hash_update(&desc, &sg, iname->len);
278 if (res) {
279 printk(KERN_ERR
280 "%s: Error updating crypto hash; res = [%d]\n",
281 __func__, res);
282 goto out;
283 }
284 res = crypto_hash_final(&desc,
285 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
286 if (res) {
287 printk(KERN_ERR
288 "%s: Error finalizing crypto hash; res = [%d]\n",
289 __func__, res);
290 goto out;
291 } 250 }
292 /* Encode the digest as a printable string--this will increase the 251 if (ac)
293 * size of the digest */ 252 return -1;
294 oname->name[0] = 'I'; 253 return cp - dst;
295 res = ext4_fname_encode_digest(oname->name+1,
296 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
297 EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
298 oname->len = res;
299out:
300 return res;
301} 254}
302 255
303/** 256/**
@@ -405,6 +358,7 @@ struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
405 if (IS_ERR(ctx)) 358 if (IS_ERR(ctx))
406 return ctx; 359 return ctx;
407 360
361 ctx->flags = ei->i_crypt_policy_flags;
408 if (ctx->has_valid_key) { 362 if (ctx->has_valid_key) {
409 if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) { 363 if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
410 printk_once(KERN_WARNING 364 printk_once(KERN_WARNING
@@ -517,6 +471,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
517 u32 namelen) 471 u32 namelen)
518{ 472{
519 u32 ciphertext_len; 473 u32 ciphertext_len;
474 int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
520 475
521 if (ctx == NULL) 476 if (ctx == NULL)
522 return -EIO; 477 return -EIO;
@@ -524,6 +479,7 @@ int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
524 return -EACCES; 479 return -EACCES;
525 ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ? 480 ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
526 EXT4_CRYPTO_BLOCK_SIZE : namelen; 481 EXT4_CRYPTO_BLOCK_SIZE : namelen;
482 ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding);
527 ciphertext_len = (ciphertext_len > ctx->lim) 483 ciphertext_len = (ciphertext_len > ctx->lim)
528 ? ctx->lim : ciphertext_len; 484 ? ctx->lim : ciphertext_len;
529 return (int) ciphertext_len; 485 return (int) ciphertext_len;
@@ -539,10 +495,13 @@ int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
539 u32 ilen, struct ext4_str *crypto_str) 495 u32 ilen, struct ext4_str *crypto_str)
540{ 496{
541 unsigned int olen; 497 unsigned int olen;
498 int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK);
542 499
543 if (!ctx) 500 if (!ctx)
544 return -EIO; 501 return -EIO;
545 olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE); 502 if (padding < EXT4_CRYPTO_BLOCK_SIZE)
503 padding = EXT4_CRYPTO_BLOCK_SIZE;
504 olen = ext4_fname_crypto_round_up(ilen, padding);
546 crypto_str->len = olen; 505 crypto_str->len = olen;
547 if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) 506 if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
548 olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2; 507 olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
@@ -571,9 +530,13 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
571 * ext4_fname_disk_to_usr() - converts a filename from disk space to user space 530 * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
572 */ 531 */
573int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, 532int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
574 const struct ext4_str *iname, 533 struct dx_hash_info *hinfo,
575 struct ext4_str *oname) 534 const struct ext4_str *iname,
535 struct ext4_str *oname)
576{ 536{
537 char buf[24];
538 int ret;
539
577 if (ctx == NULL) 540 if (ctx == NULL)
578 return -EIO; 541 return -EIO;
579 if (iname->len < 3) { 542 if (iname->len < 3) {
@@ -587,18 +550,33 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
587 } 550 }
588 if (ctx->has_valid_key) 551 if (ctx->has_valid_key)
589 return ext4_fname_decrypt(ctx, iname, oname); 552 return ext4_fname_decrypt(ctx, iname, oname);
590 else 553
591 return ext4_fname_hash(ctx, iname, oname); 554 if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
555 ret = digest_encode(iname->name, iname->len, oname->name);
556 oname->len = ret;
557 return ret;
558 }
559 if (hinfo) {
560 memcpy(buf, &hinfo->hash, 4);
561 memcpy(buf+4, &hinfo->minor_hash, 4);
562 } else
563 memset(buf, 0, 8);
564 memcpy(buf + 8, iname->name + iname->len - 16, 16);
565 oname->name[0] = '_';
566 ret = digest_encode(buf, 24, oname->name+1);
567 oname->len = ret + 1;
568 return ret + 1;
592} 569}
593 570
594int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, 571int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
572 struct dx_hash_info *hinfo,
595 const struct ext4_dir_entry_2 *de, 573 const struct ext4_dir_entry_2 *de,
596 struct ext4_str *oname) 574 struct ext4_str *oname)
597{ 575{
598 struct ext4_str iname = {.name = (unsigned char *) de->name, 576 struct ext4_str iname = {.name = (unsigned char *) de->name,
599 .len = de->name_len }; 577 .len = de->name_len };
600 578
601 return _ext4_fname_disk_to_usr(ctx, &iname, oname); 579 return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname);
602} 580}
603 581
604 582
@@ -640,10 +618,11 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
640 const struct qstr *iname, 618 const struct qstr *iname,
641 struct dx_hash_info *hinfo) 619 struct dx_hash_info *hinfo)
642{ 620{
643 struct ext4_str tmp, tmp2; 621 struct ext4_str tmp;
644 int ret = 0; 622 int ret = 0;
623 char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1];
645 624
646 if (!ctx || !ctx->has_valid_key || 625 if (!ctx ||
647 ((iname->name[0] == '.') && 626 ((iname->name[0] == '.') &&
648 ((iname->len == 1) || 627 ((iname->len == 1) ||
649 ((iname->name[1] == '.') && (iname->len == 2))))) { 628 ((iname->name[1] == '.') && (iname->len == 2))))) {
@@ -651,59 +630,90 @@ int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
651 return 0; 630 return 0;
652 } 631 }
653 632
633 if (!ctx->has_valid_key && iname->name[0] == '_') {
634 if (iname->len != 33)
635 return -ENOENT;
636 ret = digest_decode(iname->name+1, iname->len, buf);
637 if (ret != 24)
638 return -ENOENT;
639 memcpy(&hinfo->hash, buf, 4);
640 memcpy(&hinfo->minor_hash, buf + 4, 4);
641 return 0;
642 }
643
644 if (!ctx->has_valid_key && iname->name[0] != '_') {
645 if (iname->len > 43)
646 return -ENOENT;
647 ret = digest_decode(iname->name, iname->len, buf);
648 ext4fs_dirhash(buf, ret, hinfo);
649 return 0;
650 }
651
654 /* First encrypt the plaintext name */ 652 /* First encrypt the plaintext name */
655 ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp); 653 ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
656 if (ret < 0) 654 if (ret < 0)
657 return ret; 655 return ret;
658 656
659 ret = ext4_fname_encrypt(ctx, iname, &tmp); 657 ret = ext4_fname_encrypt(ctx, iname, &tmp);
660 if (ret < 0) 658 if (ret >= 0) {
661 goto out; 659 ext4fs_dirhash(tmp.name, tmp.len, hinfo);
662 660 ret = 0;
663 tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
664 tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
665 if (tmp2.name == NULL) {
666 ret = -ENOMEM;
667 goto out;
668 } 661 }
669 662
670 ret = ext4_fname_hash(ctx, &tmp, &tmp2);
671 if (ret > 0)
672 ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
673 ext4_fname_crypto_free_buffer(&tmp2);
674out:
675 ext4_fname_crypto_free_buffer(&tmp); 663 ext4_fname_crypto_free_buffer(&tmp);
676 return ret; 664 return ret;
677} 665}
678 666
679/** 667int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
680 * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string 668 int len, const char * const name,
681 */ 669 struct ext4_dir_entry_2 *de)
682int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
683 const struct ext4_dir_entry_2 *de,
684 struct dx_hash_info *hinfo)
685{ 670{
686 struct ext4_str iname = {.name = (unsigned char *) de->name, 671 int ret = -ENOENT;
687 .len = de->name_len}; 672 int bigname = (*name == '_');
688 struct ext4_str tmp;
689 int ret;
690 673
691 if (!ctx || 674 if (ctx->has_valid_key) {
692 ((iname.name[0] == '.') && 675 if (cstr->name == NULL) {
693 ((iname.len == 1) || 676 struct qstr istr;
694 ((iname.name[1] == '.') && (iname.len == 2))))) { 677
695 ext4fs_dirhash(iname.name, iname.len, hinfo); 678 ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr);
696 return 0; 679 if (ret < 0)
680 goto errout;
681 istr.name = name;
682 istr.len = len;
683 ret = ext4_fname_encrypt(ctx, &istr, cstr);
684 if (ret < 0)
685 goto errout;
686 }
687 } else {
688 if (cstr->name == NULL) {
689 cstr->name = kmalloc(32, GFP_KERNEL);
690 if (cstr->name == NULL)
691 return -ENOMEM;
692 if ((bigname && (len != 33)) ||
693 (!bigname && (len > 43)))
694 goto errout;
695 ret = digest_decode(name+bigname, len-bigname,
696 cstr->name);
697 if (ret < 0) {
698 ret = -ENOENT;
699 goto errout;
700 }
701 cstr->len = ret;
702 }
703 if (bigname) {
704 if (de->name_len < 16)
705 return 0;
706 ret = memcmp(de->name + de->name_len - 16,
707 cstr->name + 8, 16);
708 return (ret == 0) ? 1 : 0;
709 }
697 } 710 }
698 711 if (de->name_len != cstr->len)
699 tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1; 712 return 0;
700 tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL); 713 ret = memcmp(de->name, cstr->name, cstr->len);
701 if (tmp.name == NULL) 714 return (ret == 0) ? 1 : 0;
702 return -ENOMEM; 715errout:
703 716 kfree(cstr->name);
704 ret = ext4_fname_hash(ctx, &iname, &tmp); 717 cstr->name = NULL;
705 if (ret > 0)
706 ext4fs_dirhash(tmp.name, tmp.len, hinfo);
707 ext4_fname_crypto_free_buffer(&tmp);
708 return ret; 718 return ret;
709} 719}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index c8392af8abbb..52170d0b7c40 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -110,6 +110,7 @@ int ext4_generate_encryption_key(struct inode *inode)
110 } 110 }
111 res = 0; 111 res = 0;
112 112
113 ei->i_crypt_policy_flags = ctx.flags;
113 if (S_ISREG(inode->i_mode)) 114 if (S_ISREG(inode->i_mode))
114 crypt_key->mode = ctx.contents_encryption_mode; 115 crypt_key->mode = ctx.contents_encryption_mode;
115 else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 116 else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 30eaf9e9864a..a6d6291aea16 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -37,6 +37,8 @@ static int ext4_is_encryption_context_consistent_with_policy(
37 return 0; 37 return 0;
38 return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, 38 return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
39 EXT4_KEY_DESCRIPTOR_SIZE) == 0 && 39 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
40 (ctx.flags ==
41 policy->flags) &&
40 (ctx.contents_encryption_mode == 42 (ctx.contents_encryption_mode ==
41 policy->contents_encryption_mode) && 43 policy->contents_encryption_mode) &&
42 (ctx.filenames_encryption_mode == 44 (ctx.filenames_encryption_mode ==
@@ -56,25 +58,25 @@ static int ext4_create_encryption_context_from_policy(
56 printk(KERN_WARNING 58 printk(KERN_WARNING
57 "%s: Invalid contents encryption mode %d\n", __func__, 59 "%s: Invalid contents encryption mode %d\n", __func__,
58 policy->contents_encryption_mode); 60 policy->contents_encryption_mode);
59 res = -EINVAL; 61 return -EINVAL;
60 goto out;
61 } 62 }
62 if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { 63 if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
63 printk(KERN_WARNING 64 printk(KERN_WARNING
64 "%s: Invalid filenames encryption mode %d\n", __func__, 65 "%s: Invalid filenames encryption mode %d\n", __func__,
65 policy->filenames_encryption_mode); 66 policy->filenames_encryption_mode);
66 res = -EINVAL; 67 return -EINVAL;
67 goto out;
68 } 68 }
69 if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
70 return -EINVAL;
69 ctx.contents_encryption_mode = policy->contents_encryption_mode; 71 ctx.contents_encryption_mode = policy->contents_encryption_mode;
70 ctx.filenames_encryption_mode = policy->filenames_encryption_mode; 72 ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
73 ctx.flags = policy->flags;
71 BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); 74 BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
72 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); 75 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
73 76
74 res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, 77 res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
75 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, 78 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
76 sizeof(ctx), 0); 79 sizeof(ctx), 0);
77out:
78 if (!res) 80 if (!res)
79 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); 81 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
80 return res; 82 return res;
@@ -115,6 +117,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
115 policy->version = 0; 117 policy->version = 0;
116 policy->contents_encryption_mode = ctx.contents_encryption_mode; 118 policy->contents_encryption_mode = ctx.contents_encryption_mode;
117 policy->filenames_encryption_mode = ctx.filenames_encryption_mode; 119 policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
120 policy->flags = ctx.flags;
118 memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, 121 memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
119 EXT4_KEY_DESCRIPTOR_SIZE); 122 EXT4_KEY_DESCRIPTOR_SIZE);
120 return 0; 123 return 0;
@@ -176,6 +179,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
176 EXT4_ENCRYPTION_MODE_AES_256_XTS; 179 EXT4_ENCRYPTION_MODE_AES_256_XTS;
177 ctx.filenames_encryption_mode = 180 ctx.filenames_encryption_mode =
178 EXT4_ENCRYPTION_MODE_AES_256_CTS; 181 EXT4_ENCRYPTION_MODE_AES_256_CTS;
182 ctx.flags = 0;
179 memset(ctx.master_key_descriptor, 0x42, 183 memset(ctx.master_key_descriptor, 0x42,
180 EXT4_KEY_DESCRIPTOR_SIZE); 184 EXT4_KEY_DESCRIPTOR_SIZE);
181 res = 0; 185 res = 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 61db51a5ce4c..5665d82d2332 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -249,7 +249,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
249 } else { 249 } else {
250 /* Directory is encrypted */ 250 /* Directory is encrypted */
251 err = ext4_fname_disk_to_usr(enc_ctx, 251 err = ext4_fname_disk_to_usr(enc_ctx,
252 de, &fname_crypto_str); 252 NULL, de, &fname_crypto_str);
253 if (err < 0) 253 if (err < 0)
254 goto errout; 254 goto errout;
255 if (!dir_emit(ctx, 255 if (!dir_emit(ctx,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ef267adce19a..9a83f149ac85 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -911,6 +911,7 @@ struct ext4_inode_info {
911 911
912 /* on-disk additional length */ 912 /* on-disk additional length */
913 __u16 i_extra_isize; 913 __u16 i_extra_isize;
914 char i_crypt_policy_flags;
914 915
915 /* Indicate the inline data space. */ 916 /* Indicate the inline data space. */
916 u16 i_inline_off; 917 u16 i_inline_off;
@@ -1066,12 +1067,6 @@ extern void ext4_set_bits(void *bm, int cur, int len);
1066/* Metadata checksum algorithm codes */ 1067/* Metadata checksum algorithm codes */
1067#define EXT4_CRC32C_CHKSUM 1 1068#define EXT4_CRC32C_CHKSUM 1
1068 1069
1069/* Encryption algorithms */
1070#define EXT4_ENCRYPTION_MODE_INVALID 0
1071#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
1072#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
1073#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
1074
1075/* 1070/*
1076 * Structure of the super block 1071 * Structure of the super block
1077 */ 1072 */
@@ -2093,9 +2088,11 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
2093int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, 2088int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
2094 u32 ilen, struct ext4_str *crypto_str); 2089 u32 ilen, struct ext4_str *crypto_str);
2095int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, 2090int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2091 struct dx_hash_info *hinfo,
2096 const struct ext4_str *iname, 2092 const struct ext4_str *iname,
2097 struct ext4_str *oname); 2093 struct ext4_str *oname);
2098int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, 2094int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2095 struct dx_hash_info *hinfo,
2099 const struct ext4_dir_entry_2 *de, 2096 const struct ext4_dir_entry_2 *de,
2100 struct ext4_str *oname); 2097 struct ext4_str *oname);
2101int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, 2098int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
@@ -2104,11 +2101,12 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
2104int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, 2101int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
2105 const struct qstr *iname, 2102 const struct qstr *iname,
2106 struct dx_hash_info *hinfo); 2103 struct dx_hash_info *hinfo);
2107int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
2108 const struct ext4_dir_entry_2 *de,
2109 struct dx_hash_info *hinfo);
2110int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, 2104int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
2111 u32 namelen); 2105 u32 namelen);
2106int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr,
2107 int len, const char * const name,
2108 struct ext4_dir_entry_2 *de);
2109
2112 2110
2113#ifdef CONFIG_EXT4_FS_ENCRYPTION 2111#ifdef CONFIG_EXT4_FS_ENCRYPTION
2114void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx); 2112void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
@@ -2891,7 +2889,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
2891 struct ext4_map_blocks *map, int flags); 2889 struct ext4_map_blocks *map, int flags);
2892extern int ext4_ext_calc_metadata_amount(struct inode *inode, 2890extern int ext4_ext_calc_metadata_amount(struct inode *inode,
2893 ext4_lblk_t lblocks); 2891 ext4_lblk_t lblocks);
2894extern int ext4_extent_tree_init(handle_t *, struct inode *);
2895extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, 2892extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
2896 int num, 2893 int num,
2897 struct ext4_ext_path *path); 2894 struct ext4_ext_path *path);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index c2ba35a914b6..d75159c101ce 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -20,12 +20,20 @@ struct ext4_encryption_policy {
20 char version; 20 char version;
21 char contents_encryption_mode; 21 char contents_encryption_mode;
22 char filenames_encryption_mode; 22 char filenames_encryption_mode;
23 char flags;
23 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; 24 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
24} __attribute__((__packed__)); 25} __attribute__((__packed__));
25 26
26#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1 27#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
27#define EXT4_KEY_DERIVATION_NONCE_SIZE 16 28#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
28 29
30#define EXT4_POLICY_FLAGS_PAD_4 0x00
31#define EXT4_POLICY_FLAGS_PAD_8 0x01
32#define EXT4_POLICY_FLAGS_PAD_16 0x02
33#define EXT4_POLICY_FLAGS_PAD_32 0x03
34#define EXT4_POLICY_FLAGS_PAD_MASK 0x03
35#define EXT4_POLICY_FLAGS_VALID 0x03
36
29/** 37/**
30 * Encryption context for inode 38 * Encryption context for inode
31 * 39 *
@@ -41,7 +49,7 @@ struct ext4_encryption_context {
41 char format; 49 char format;
42 char contents_encryption_mode; 50 char contents_encryption_mode;
43 char filenames_encryption_mode; 51 char filenames_encryption_mode;
44 char reserved; 52 char flags;
45 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; 53 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
46 char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE]; 54 char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
47} __attribute__((__packed__)); 55} __attribute__((__packed__));
@@ -120,6 +128,7 @@ struct ext4_fname_crypto_ctx {
120 struct crypto_hash *htfm; 128 struct crypto_hash *htfm;
121 struct page *workpage; 129 struct page *workpage;
122 struct ext4_encryption_key key; 130 struct ext4_encryption_key key;
131 unsigned flags : 8;
123 unsigned has_valid_key : 1; 132 unsigned has_valid_key : 1;
124 unsigned ctfm_key_is_ready : 1; 133 unsigned ctfm_key_is_ready : 1;
125}; 134};
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 3445035c7e01..d41843181818 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
87 ext4_put_nojournal(handle); 87 ext4_put_nojournal(handle);
88 return 0; 88 return 0;
89 } 89 }
90
91 if (!handle->h_transaction) {
92 err = jbd2_journal_stop(handle);
93 return handle->h_err ? handle->h_err : err;
94 }
95
90 sb = handle->h_transaction->t_journal->j_private; 96 sb = handle->h_transaction->t_journal->j_private;
91 err = handle->h_err; 97 err = handle->h_err;
92 rc = jbd2_journal_stop(handle); 98 rc = jbd2_journal_stop(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 973816bfe4a9..e003a1e81dc3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
377 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); 377 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
378 ext4_lblk_t last = lblock + len - 1; 378 ext4_lblk_t last = lblock + len - 1;
379 379
380 if (lblock > last) 380 if (len == 0 || lblock > last)
381 return 0; 381 return 0;
382 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 382 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
383} 383}
@@ -4927,13 +4927,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4927 if (ret) 4927 if (ret)
4928 return ret; 4928 return ret;
4929 4929
4930 /*
4931 * currently supporting (pre)allocate mode for extent-based
4932 * files _only_
4933 */
4934 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4935 return -EOPNOTSUPP;
4936
4937 if (mode & FALLOC_FL_COLLAPSE_RANGE) 4930 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4938 return ext4_collapse_range(inode, offset, len); 4931 return ext4_collapse_range(inode, offset, len);
4939 4932
@@ -4955,6 +4948,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4955 4948
4956 mutex_lock(&inode->i_mutex); 4949 mutex_lock(&inode->i_mutex);
4957 4950
4951 /*
4952 * We only support preallocation for extent-based files only
4953 */
4954 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4955 ret = -EOPNOTSUPP;
4956 goto out;
4957 }
4958
4958 if (!(mode & FALLOC_FL_KEEP_SIZE) && 4959 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4959 offset + len > i_size_read(inode)) { 4960 offset + len > i_size_read(inode)) {
4960 new_size = offset + len; 4961 new_size = offset + len;
@@ -5395,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5395 loff_t new_size, ioffset; 5396 loff_t new_size, ioffset;
5396 int ret; 5397 int ret;
5397 5398
5399 /*
5400 * We need to test this early because xfstests assumes that a
5401 * collapse range of (0, 1) will return EOPNOTSUPP if the file
5402 * system does not support collapse range.
5403 */
5404 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5405 return -EOPNOTSUPP;
5406
5398 /* Collapse range works only on fs block size aligned offsets. */ 5407 /* Collapse range works only on fs block size aligned offsets. */
5399 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || 5408 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5400 len & (EXT4_CLUSTER_SIZE(sb) - 1)) 5409 len & (EXT4_CLUSTER_SIZE(sb) - 1))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d33d5a6852b9..26724aeece73 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -703,6 +703,14 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
703 703
704 BUG_ON(end < lblk); 704 BUG_ON(end < lblk);
705 705
706 if ((status & EXTENT_STATUS_DELAYED) &&
707 (status & EXTENT_STATUS_WRITTEN)) {
708 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
709 " delayed and written which can potentially "
710 " cause data loss.\n", lblk, len);
711 WARN_ON(1);
712 }
713
706 newes.es_lblk = lblk; 714 newes.es_lblk = lblk;
707 newes.es_len = len; 715 newes.es_len = len;
708 ext4_es_store_pblock_status(&newes, pblk, status); 716 ext4_es_store_pblock_status(&newes, pblk, status);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbd0654a2675..0554b0b5957b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -531,6 +531,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
531 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 531 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
532 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 532 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
533 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 533 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
534 !(status & EXTENT_STATUS_WRITTEN) &&
534 ext4_find_delalloc_range(inode, map->m_lblk, 535 ext4_find_delalloc_range(inode, map->m_lblk,
535 map->m_lblk + map->m_len - 1)) 536 map->m_lblk + map->m_len - 1))
536 status |= EXTENT_STATUS_DELAYED; 537 status |= EXTENT_STATUS_DELAYED;
@@ -635,6 +636,7 @@ found:
635 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 636 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
636 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 637 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
637 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 638 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
639 !(status & EXTENT_STATUS_WRITTEN) &&
638 ext4_find_delalloc_range(inode, map->m_lblk, 640 ext4_find_delalloc_range(inode, map->m_lblk,
639 map->m_lblk + map->m_len - 1)) 641 map->m_lblk + map->m_len - 1))
640 status |= EXTENT_STATUS_DELAYED; 642 status |= EXTENT_STATUS_DELAYED;
@@ -4343,7 +4345,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
4343 int inode_size = EXT4_INODE_SIZE(sb); 4345 int inode_size = EXT4_INODE_SIZE(sb);
4344 4346
4345 oi.orig_ino = orig_ino; 4347 oi.orig_ino = orig_ino;
4346 ino = orig_ino & ~(inodes_per_block - 1); 4348 ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
4347 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 4349 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
4348 if (ino == orig_ino) 4350 if (ino == orig_ino)
4349 continue; 4351 continue;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7223b0b4bc38..814f3beb4369 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -640,7 +640,7 @@ static struct stats dx_show_leaf(struct inode *dir,
640 ext4_put_fname_crypto_ctx(&ctx); 640 ext4_put_fname_crypto_ctx(&ctx);
641 ctx = NULL; 641 ctx = NULL;
642 } 642 }
643 res = ext4_fname_disk_to_usr(ctx, de, 643 res = ext4_fname_disk_to_usr(ctx, NULL, de,
644 &fname_crypto_str); 644 &fname_crypto_str);
645 if (res < 0) { 645 if (res < 0) {
646 printk(KERN_WARNING "Error " 646 printk(KERN_WARNING "Error "
@@ -653,15 +653,8 @@ static struct stats dx_show_leaf(struct inode *dir,
653 name = fname_crypto_str.name; 653 name = fname_crypto_str.name;
654 len = fname_crypto_str.len; 654 len = fname_crypto_str.len;
655 } 655 }
656 res = ext4_fname_disk_to_hash(ctx, de, 656 ext4fs_dirhash(de->name, de->name_len,
657 &h); 657 &h);
658 if (res < 0) {
659 printk(KERN_WARNING "Error "
660 "converting filename "
661 "from disk to htree"
662 "\n");
663 h.hash = 0xDEADBEEF;
664 }
665 printk("%*.s:(E)%x.%u ", len, name, 658 printk("%*.s:(E)%x.%u ", len, name,
666 h.hash, (unsigned) ((char *) de 659 h.hash, (unsigned) ((char *) de
667 - base)); 660 - base));
@@ -1008,15 +1001,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
1008 /* silently ignore the rest of the block */ 1001 /* silently ignore the rest of the block */
1009 break; 1002 break;
1010 } 1003 }
1011#ifdef CONFIG_EXT4_FS_ENCRYPTION
1012 err = ext4_fname_disk_to_hash(ctx, de, hinfo);
1013 if (err < 0) {
1014 count = err;
1015 goto errout;
1016 }
1017#else
1018 ext4fs_dirhash(de->name, de->name_len, hinfo); 1004 ext4fs_dirhash(de->name, de->name_len, hinfo);
1019#endif
1020 if ((hinfo->hash < start_hash) || 1005 if ((hinfo->hash < start_hash) ||
1021 ((hinfo->hash == start_hash) && 1006 ((hinfo->hash == start_hash) &&
1022 (hinfo->minor_hash < start_minor_hash))) 1007 (hinfo->minor_hash < start_minor_hash)))
@@ -1032,7 +1017,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
1032 &tmp_str); 1017 &tmp_str);
1033 } else { 1018 } else {
1034 /* Directory is encrypted */ 1019 /* Directory is encrypted */
1035 err = ext4_fname_disk_to_usr(ctx, de, 1020 err = ext4_fname_disk_to_usr(ctx, hinfo, de,
1036 &fname_crypto_str); 1021 &fname_crypto_str);
1037 if (err < 0) { 1022 if (err < 0) {
1038 count = err; 1023 count = err;
@@ -1193,26 +1178,10 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
1193 int count = 0; 1178 int count = 0;
1194 char *base = (char *) de; 1179 char *base = (char *) de;
1195 struct dx_hash_info h = *hinfo; 1180 struct dx_hash_info h = *hinfo;
1196#ifdef CONFIG_EXT4_FS_ENCRYPTION
1197 struct ext4_fname_crypto_ctx *ctx = NULL;
1198 int err;
1199
1200 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1201 if (IS_ERR(ctx))
1202 return PTR_ERR(ctx);
1203#endif
1204 1181
1205 while ((char *) de < base + blocksize) { 1182 while ((char *) de < base + blocksize) {
1206 if (de->name_len && de->inode) { 1183 if (de->name_len && de->inode) {
1207#ifdef CONFIG_EXT4_FS_ENCRYPTION
1208 err = ext4_fname_disk_to_hash(ctx, de, &h);
1209 if (err < 0) {
1210 ext4_put_fname_crypto_ctx(&ctx);
1211 return err;
1212 }
1213#else
1214 ext4fs_dirhash(de->name, de->name_len, &h); 1184 ext4fs_dirhash(de->name, de->name_len, &h);
1215#endif
1216 map_tail--; 1185 map_tail--;
1217 map_tail->hash = h.hash; 1186 map_tail->hash = h.hash;
1218 map_tail->offs = ((char *) de - base)>>2; 1187 map_tail->offs = ((char *) de - base)>>2;
@@ -1223,9 +1192,6 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
1223 /* XXX: do we need to check rec_len == 0 case? -Chris */ 1192 /* XXX: do we need to check rec_len == 0 case? -Chris */
1224 de = ext4_next_entry(de, blocksize); 1193 de = ext4_next_entry(de, blocksize);
1225 } 1194 }
1226#ifdef CONFIG_EXT4_FS_ENCRYPTION
1227 ext4_put_fname_crypto_ctx(&ctx);
1228#endif
1229 return count; 1195 return count;
1230} 1196}
1231 1197
@@ -1287,16 +1253,8 @@ static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
1287 return 0; 1253 return 0;
1288 1254
1289#ifdef CONFIG_EXT4_FS_ENCRYPTION 1255#ifdef CONFIG_EXT4_FS_ENCRYPTION
1290 if (ctx) { 1256 if (ctx)
1291 /* Directory is encrypted */ 1257 return ext4_fname_match(ctx, fname_crypto_str, len, name, de);
1292 res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
1293 if (res < 0)
1294 return res;
1295 if (len != res)
1296 return 0;
1297 res = memcmp(name, fname_crypto_str->name, len);
1298 return (res == 0) ? 1 : 0;
1299 }
1300#endif 1258#endif
1301 if (len != de->name_len) 1259 if (len != de->name_len)
1302 return 0; 1260 return 0;
@@ -1324,16 +1282,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1324 if (IS_ERR(ctx)) 1282 if (IS_ERR(ctx))
1325 return -1; 1283 return -1;
1326 1284
1327 if (ctx != NULL) {
1328 /* Allocate buffer to hold maximum name length */
1329 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1330 &fname_crypto_str);
1331 if (res < 0) {
1332 ext4_put_fname_crypto_ctx(&ctx);
1333 return -1;
1334 }
1335 }
1336
1337 de = (struct ext4_dir_entry_2 *)search_buf; 1285 de = (struct ext4_dir_entry_2 *)search_buf;
1338 dlimit = search_buf + buf_size; 1286 dlimit = search_buf + buf_size;
1339 while ((char *) de < dlimit) { 1287 while ((char *) de < dlimit) {
@@ -1872,14 +1820,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1872 return res; 1820 return res;
1873 } 1821 }
1874 reclen = EXT4_DIR_REC_LEN(res); 1822 reclen = EXT4_DIR_REC_LEN(res);
1875
1876 /* Allocate buffer to hold maximum name length */
1877 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1878 &fname_crypto_str);
1879 if (res < 0) {
1880 ext4_put_fname_crypto_ctx(&ctx);
1881 return -1;
1882 }
1883 } 1823 }
1884 1824
1885 de = (struct ext4_dir_entry_2 *)buf; 1825 de = (struct ext4_dir_entry_2 *)buf;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 8a8ec6293b19..cf0c472047e3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1432,12 +1432,15 @@ static int ext4_flex_group_add(struct super_block *sb,
1432 goto exit; 1432 goto exit;
1433 /* 1433 /*
1434 * We will always be modifying at least the superblock and GDT 1434 * We will always be modifying at least the superblock and GDT
1435 * block. If we are adding a group past the last current GDT block, 1435 * blocks. If we are adding a group past the last current GDT block,
1436 * we will also modify the inode and the dindirect block. If we 1436 * we will also modify the inode and the dindirect block. If we
1437 * are adding a group with superblock/GDT backups we will also 1437 * are adding a group with superblock/GDT backups we will also
1438 * modify each of the reserved GDT dindirect blocks. 1438 * modify each of the reserved GDT dindirect blocks.
1439 */ 1439 */
1440 credit = flex_gd->count * 4 + reserved_gdb; 1440 credit = 3; /* sb, resize inode, resize inode dindirect */
1441 /* GDT blocks */
1442 credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
1443 credit += reserved_gdb; /* Reserved GDT dindirect blocks */
1441 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit); 1444 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
1442 if (IS_ERR(handle)) { 1445 if (IS_ERR(handle)) {
1443 err = PTR_ERR(handle); 1446 err = PTR_ERR(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f06d0589ddba..ca9d4a2fed41 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
294 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 294 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
295 295
296 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 296 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
297 if (bdev_read_only(sb->s_bdev))
298 return;
297 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 299 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
298 es->s_last_error_time = cpu_to_le32(get_seconds()); 300 es->s_last_error_time = cpu_to_le32(get_seconds());
299 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 301 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 19f78f20975e..187b78920314 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -74,7 +74,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
74 goto errout; 74 goto errout;
75 } 75 }
76 pstr.name = paddr; 76 pstr.name = paddr;
77 res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr); 77 res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
78 if (res < 0) 78 if (res < 0)
79 goto errout; 79 goto errout;
80 /* Null-terminate the name */ 80 /* Null-terminate the name */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b91b0e10678e..1e1aae669fa8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1513{ 1513{
1514 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1516 bool locked = false;
1516 int ret; 1517 int ret;
1517 long diff; 1518 long diff;
1518 1519
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1533 1534
1534 diff = nr_pages_to_write(sbi, DATA, wbc); 1535 diff = nr_pages_to_write(sbi, DATA, wbc);
1535 1536
1537 if (!S_ISDIR(inode->i_mode)) {
1538 mutex_lock(&sbi->writepages);
1539 locked = true;
1540 }
1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1541 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1542 if (locked)
1543 mutex_unlock(&sbi->writepages);
1537 1544
1538 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1545 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1539 1546
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d8921cf2ba9a..8de34ab6d5b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
625 struct mutex cp_mutex; /* checkpoint procedure lock */ 625 struct mutex cp_mutex; /* checkpoint procedure lock */
626 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 626 struct rw_semaphore cp_rwsem; /* blocking FS operations */
627 struct rw_semaphore node_write; /* locking node writes */ 627 struct rw_semaphore node_write; /* locking node writes */
628 struct mutex writepages; /* mutex for writepages() */
628 wait_queue_head_t cp_wait; 629 wait_queue_head_t cp_wait;
629 630
630 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 631 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e3794edae42..658e8079aaf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -298,16 +298,14 @@ fail:
298 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) 299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
300{ 300{
301 struct page *page; 301 struct page *page = page_follow_link_light(dentry, nd);
302 302
303 page = page_follow_link_light(dentry, nd); 303 if (IS_ERR_OR_NULL(page))
304 if (IS_ERR(page))
305 return page; 304 return page;
306 305
307 /* this is broken symlink case */ 306 /* this is broken symlink case */
308 if (*nd_get_link(nd) == 0) { 307 if (*nd_get_link(nd) == 0) {
309 kunmap(page); 308 page_put_link(dentry, nd, page);
310 page_cache_release(page);
311 return ERR_PTR(-ENOENT); 309 return ERR_PTR(-ENOENT);
312 } 310 }
313 return page; 311 return page;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 160b88346b24..b2dd1b01f076 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1035,6 +1035,7 @@ try_onemore:
1035 sbi->raw_super = raw_super; 1035 sbi->raw_super = raw_super;
1036 sbi->raw_super_buf = raw_super_buf; 1036 sbi->raw_super_buf = raw_super_buf;
1037 mutex_init(&sbi->gc_mutex); 1037 mutex_init(&sbi->gc_mutex);
1038 mutex_init(&sbi->writepages);
1038 mutex_init(&sbi->cp_mutex); 1039 mutex_init(&sbi->cp_mutex);
1039 init_rwsem(&sbi->node_write); 1040 init_rwsem(&sbi->node_write);
1040 clear_sbi_flag(sbi, SBI_POR_DOING); 1041 clear_sbi_flag(sbi, SBI_POR_DOING);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ef263174acd2..07d8d8f52faf 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -581,7 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
581 if (name == NULL) 581 if (name == NULL)
582 goto out_put; 582 goto out_put;
583 583
584 fd = file_create(name, mode & S_IFMT); 584 fd = file_create(name, mode & 0777);
585 if (fd < 0) 585 if (fd < 0)
586 error = fd; 586 error = fd;
587 else 587 else
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b5128c6e63ad..a9079d035ae5 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
842{ 842{
843 jbd2_journal_revoke_header_t *header; 843 jbd2_journal_revoke_header_t *header;
844 int offset, max; 844 int offset, max;
845 int csum_size = 0;
846 __u32 rcount;
845 int record_len = 4; 847 int record_len = 4;
846 848
847 header = (jbd2_journal_revoke_header_t *) bh->b_data; 849 header = (jbd2_journal_revoke_header_t *) bh->b_data;
848 offset = sizeof(jbd2_journal_revoke_header_t); 850 offset = sizeof(jbd2_journal_revoke_header_t);
849 max = be32_to_cpu(header->r_count); 851 rcount = be32_to_cpu(header->r_count);
850 852
851 if (!jbd2_revoke_block_csum_verify(journal, header)) 853 if (!jbd2_revoke_block_csum_verify(journal, header))
852 return -EINVAL; 854 return -EINVAL;
853 855
856 if (jbd2_journal_has_csum_v2or3(journal))
857 csum_size = sizeof(struct jbd2_journal_revoke_tail);
858 if (rcount > journal->j_blocksize - csum_size)
859 return -EINVAL;
860 max = rcount;
861
854 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 862 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
855 record_len = 8; 863 record_len = 8;
856 864
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c6cbaef2bda1..14214da80eb8 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal,
577{ 577{
578 int csum_size = 0; 578 int csum_size = 0;
579 struct buffer_head *descriptor; 579 struct buffer_head *descriptor;
580 int offset; 580 int sz, offset;
581 journal_header_t *header; 581 journal_header_t *header;
582 582
583 /* If we are already aborting, this all becomes a noop. We 583 /* If we are already aborting, this all becomes a noop. We
@@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal,
594 if (jbd2_journal_has_csum_v2or3(journal)) 594 if (jbd2_journal_has_csum_v2or3(journal))
595 csum_size = sizeof(struct jbd2_journal_revoke_tail); 595 csum_size = sizeof(struct jbd2_journal_revoke_tail);
596 596
597 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
598 sz = 8;
599 else
600 sz = 4;
601
597 /* Make sure we have a descriptor with space left for the record */ 602 /* Make sure we have a descriptor with space left for the record */
598 if (descriptor) { 603 if (descriptor) {
599 if (offset >= journal->j_blocksize - csum_size) { 604 if (offset + sz > journal->j_blocksize - csum_size) {
600 flush_descriptor(journal, descriptor, offset, write_op); 605 flush_descriptor(journal, descriptor, offset, write_op);
601 descriptor = NULL; 606 descriptor = NULL;
602 } 607 }
@@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal,
619 *descriptorp = descriptor; 624 *descriptorp = descriptor;
620 } 625 }
621 626
622 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { 627 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
623 * ((__be64 *)(&descriptor->b_data[offset])) = 628 * ((__be64 *)(&descriptor->b_data[offset])) =
624 cpu_to_be64(record->blocknr); 629 cpu_to_be64(record->blocknr);
625 offset += 8; 630 else
626
627 } else {
628 * ((__be32 *)(&descriptor->b_data[offset])) = 631 * ((__be32 *)(&descriptor->b_data[offset])) =
629 cpu_to_be32(record->blocknr); 632 cpu_to_be32(record->blocknr);
630 offset += 4; 633 offset += sz;
631 }
632 634
633 *offsetp = offset; 635 *offsetp = offset;
634} 636}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 5f09370c90a8..ff2f2e6ad311 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
551 int result; 551 int result;
552 int wanted; 552 int wanted;
553 553
554 WARN_ON(!transaction);
555 if (is_handle_aborted(handle)) 554 if (is_handle_aborted(handle))
556 return -EROFS; 555 return -EROFS;
557 journal = transaction->t_journal; 556 journal = transaction->t_journal;
@@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
627 tid_t tid; 626 tid_t tid;
628 int need_to_start, ret; 627 int need_to_start, ret;
629 628
630 WARN_ON(!transaction);
631 /* If we've had an abort of any type, don't even think about 629 /* If we've had an abort of any type, don't even think about
632 * actually doing the restart! */ 630 * actually doing the restart! */
633 if (is_handle_aborted(handle)) 631 if (is_handle_aborted(handle))
@@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
785 int need_copy = 0; 783 int need_copy = 0;
786 unsigned long start_lock, time_lock; 784 unsigned long start_lock, time_lock;
787 785
788 WARN_ON(!transaction);
789 if (is_handle_aborted(handle)) 786 if (is_handle_aborted(handle))
790 return -EROFS; 787 return -EROFS;
791 journal = transaction->t_journal; 788 journal = transaction->t_journal;
@@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
1051 int err; 1048 int err;
1052 1049
1053 jbd_debug(5, "journal_head %p\n", jh); 1050 jbd_debug(5, "journal_head %p\n", jh);
1054 WARN_ON(!transaction);
1055 err = -EROFS; 1051 err = -EROFS;
1056 if (is_handle_aborted(handle)) 1052 if (is_handle_aborted(handle))
1057 goto out; 1053 goto out;
@@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1266 struct journal_head *jh; 1262 struct journal_head *jh;
1267 int ret = 0; 1263 int ret = 0;
1268 1264
1269 WARN_ON(!transaction);
1270 if (is_handle_aborted(handle)) 1265 if (is_handle_aborted(handle))
1271 return -EROFS; 1266 return -EROFS;
1272 journal = transaction->t_journal; 1267 journal = transaction->t_journal;
@@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1397 int err = 0; 1392 int err = 0;
1398 int was_modified = 0; 1393 int was_modified = 0;
1399 1394
1400 WARN_ON(!transaction);
1401 if (is_handle_aborted(handle)) 1395 if (is_handle_aborted(handle))
1402 return -EROFS; 1396 return -EROFS;
1403 journal = transaction->t_journal; 1397 journal = transaction->t_journal;
@@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle)
1530 tid_t tid; 1524 tid_t tid;
1531 pid_t pid; 1525 pid_t pid;
1532 1526
1533 if (!transaction) 1527 if (!transaction) {
1534 goto free_and_exit; 1528 /*
1529 * Handle is already detached from the transaction so
1530 * there is nothing to do other than decrease a refcount,
1531 * or free the handle if refcount drops to zero
1532 */
1533 if (--handle->h_ref > 0) {
1534 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1535 handle->h_ref);
1536 return err;
1537 } else {
1538 if (handle->h_rsv_handle)
1539 jbd2_free_handle(handle->h_rsv_handle);
1540 goto free_and_exit;
1541 }
1542 }
1535 journal = transaction->t_journal; 1543 journal = transaction->t_journal;
1536 1544
1537 J_ASSERT(journal_current_handle() == handle); 1545 J_ASSERT(journal_current_handle() == handle);
@@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
2373 transaction_t *transaction = handle->h_transaction; 2381 transaction_t *transaction = handle->h_transaction;
2374 journal_t *journal; 2382 journal_t *journal;
2375 2383
2376 WARN_ON(!transaction);
2377 if (is_handle_aborted(handle)) 2384 if (is_handle_aborted(handle))
2378 return -EROFS; 2385 return -EROFS;
2379 journal = transaction->t_journal; 2386 journal = transaction->t_journal;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f131fc23ffc4..fffca9517321 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
518 if (!kn) 518 if (!kn)
519 goto err_out1; 519 goto err_out1;
520 520
521 ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); 521 /*
522 * If the ino of the sysfs entry created for a kmem cache gets
523 * allocated from an ida layer, which is accounted to the memcg that
524 * owns the cache, the memcg will get pinned forever. So do not account
525 * ino ida allocations.
526 */
527 ret = ida_simple_get(&root->ino_ida, 1, 0,
528 GFP_KERNEL | __GFP_NOACCOUNT);
522 if (ret < 0) 529 if (ret < 0)
523 goto err_out2; 530 goto err_out2;
524 kn->ino = ret; 531 kn->ino = ret;
diff --git a/fs/namei.c b/fs/namei.c
index 4a8d998b7274..fe30d3be43a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1415,6 +1415,7 @@ static int lookup_fast(struct nameidata *nd,
1415 */ 1415 */
1416 if (nd->flags & LOOKUP_RCU) { 1416 if (nd->flags & LOOKUP_RCU) {
1417 unsigned seq; 1417 unsigned seq;
1418 bool negative;
1418 dentry = __d_lookup_rcu(parent, &nd->last, &seq); 1419 dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1419 if (!dentry) 1420 if (!dentry)
1420 goto unlazy; 1421 goto unlazy;
@@ -1424,8 +1425,11 @@ static int lookup_fast(struct nameidata *nd,
1424 * the dentry name information from lookup. 1425 * the dentry name information from lookup.
1425 */ 1426 */
1426 *inode = dentry->d_inode; 1427 *inode = dentry->d_inode;
1428 negative = d_is_negative(dentry);
1427 if (read_seqcount_retry(&dentry->d_seq, seq)) 1429 if (read_seqcount_retry(&dentry->d_seq, seq))
1428 return -ECHILD; 1430 return -ECHILD;
1431 if (negative)
1432 return -ENOENT;
1429 1433
1430 /* 1434 /*
1431 * This sequence count validates that the parent had no 1435 * This sequence count validates that the parent had no
@@ -1472,6 +1476,10 @@ unlazy:
1472 goto need_lookup; 1476 goto need_lookup;
1473 } 1477 }
1474 1478
1479 if (unlikely(d_is_negative(dentry))) {
1480 dput(dentry);
1481 return -ENOENT;
1482 }
1475 path->mnt = mnt; 1483 path->mnt = mnt;
1476 path->dentry = dentry; 1484 path->dentry = dentry;
1477 err = follow_managed(path, nd->flags); 1485 err = follow_managed(path, nd->flags);
@@ -1583,10 +1591,10 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1583 goto out_err; 1591 goto out_err;
1584 1592
1585 inode = path->dentry->d_inode; 1593 inode = path->dentry->d_inode;
1594 err = -ENOENT;
1595 if (d_is_negative(path->dentry))
1596 goto out_path_put;
1586 } 1597 }
1587 err = -ENOENT;
1588 if (d_is_negative(path->dentry))
1589 goto out_path_put;
1590 1598
1591 if (should_follow_link(path->dentry, follow)) { 1599 if (should_follow_link(path->dentry, follow)) {
1592 if (nd->flags & LOOKUP_RCU) { 1600 if (nd->flags & LOOKUP_RCU) {
@@ -3036,14 +3044,13 @@ retry_lookup:
3036 3044
3037 BUG_ON(nd->flags & LOOKUP_RCU); 3045 BUG_ON(nd->flags & LOOKUP_RCU);
3038 inode = path->dentry->d_inode; 3046 inode = path->dentry->d_inode;
3039finish_lookup:
3040 /* we _can_ be in RCU mode here */
3041 error = -ENOENT; 3047 error = -ENOENT;
3042 if (d_is_negative(path->dentry)) { 3048 if (d_is_negative(path->dentry)) {
3043 path_to_nameidata(path, nd); 3049 path_to_nameidata(path, nd);
3044 goto out; 3050 goto out;
3045 } 3051 }
3046 3052finish_lookup:
3053 /* we _can_ be in RCU mode here */
3047 if (should_follow_link(path->dentry, !symlink_ok)) { 3054 if (should_follow_link(path->dentry, !symlink_ok)) {
3048 if (nd->flags & LOOKUP_RCU) { 3055 if (nd->flags & LOOKUP_RCU) {
3049 if (unlikely(nd->path.mnt != path->mnt || 3056 if (unlikely(nd->path.mnt != path->mnt ||
@@ -3226,7 +3233,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3226 3233
3227 if (unlikely(file->f_flags & __O_TMPFILE)) { 3234 if (unlikely(file->f_flags & __O_TMPFILE)) {
3228 error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); 3235 error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);
3229 goto out; 3236 goto out2;
3230 } 3237 }
3231 3238
3232 error = path_init(dfd, pathname, flags, nd); 3239 error = path_init(dfd, pathname, flags, nd);
@@ -3256,6 +3263,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3256 } 3263 }
3257out: 3264out:
3258 path_cleanup(nd); 3265 path_cleanup(nd);
3266out2:
3259 if (!(opened & FILE_OPENED)) { 3267 if (!(opened & FILE_OPENED)) {
3260 BUG_ON(!error); 3268 BUG_ON(!error);
3261 put_filp(file); 3269 put_filp(file);
diff --git a/fs/namespace.c b/fs/namespace.c
index 1f4f9dac6e5a..1b9e11167bae 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3179,6 +3179,12 @@ bool fs_fully_visible(struct file_system_type *type)
3179 if (mnt->mnt.mnt_sb->s_type != type) 3179 if (mnt->mnt.mnt_sb->s_type != type)
3180 continue; 3180 continue;
3181 3181
3182 /* This mount is not fully visible if it's root directory
3183 * is not the root directory of the filesystem.
3184 */
3185 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3186 continue;
3187
3182 /* This mount is not fully visible if there are any child mounts 3188 /* This mount is not fully visible if there are any child mounts
3183 * that cover anything except for empty directories. 3189 * that cover anything except for empty directories.
3184 */ 3190 */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647bf195d..cdefaa331a07 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
181} 181}
182 182
183const struct nfsd4_layout_ops bl_layout_ops = { 183const struct nfsd4_layout_ops bl_layout_ops = {
184 /*
185 * Pretend that we send notification to the client. This is a blatant
186 * lie to force recent Linux clients to cache our device IDs.
187 * We rarely ever change the device ID, so the harm of leaking deviceids
188 * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
189 * in this regard, but I filed errata 4119 for this a while ago, and
190 * hopefully the Linux client will eventually start caching deviceids
191 * without this again.
192 */
193 .notify_types =
194 NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
184 .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, 195 .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
185 .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, 196 .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
186 .proc_layoutget = nfsd4_block_proc_layoutget, 197 .proc_layoutget = nfsd4_block_proc_layoutget,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 58277859a467..5694cfb7a47b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status)
224} 224}
225 225
226static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, 226static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
227 enum nfsstat4 *status) 227 int *status)
228{ 228{
229 __be32 *p; 229 __be32 *p;
230 u32 op; 230 u32 op;
@@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
235 op = be32_to_cpup(p++); 235 op = be32_to_cpup(p++);
236 if (unlikely(op != expected)) 236 if (unlikely(op != expected))
237 goto out_unexpected; 237 goto out_unexpected;
238 *status = be32_to_cpup(p); 238 *status = nfs_cb_stat_to_errno(be32_to_cpup(p));
239 return 0; 239 return 0;
240out_overflow: 240out_overflow:
241 print_overflow_msg(__func__, xdr); 241 print_overflow_msg(__func__, xdr);
@@ -446,22 +446,16 @@ out_overflow:
446static int decode_cb_sequence4res(struct xdr_stream *xdr, 446static int decode_cb_sequence4res(struct xdr_stream *xdr,
447 struct nfsd4_callback *cb) 447 struct nfsd4_callback *cb)
448{ 448{
449 enum nfsstat4 nfserr;
450 int status; 449 int status;
451 450
452 if (cb->cb_minorversion == 0) 451 if (cb->cb_minorversion == 0)
453 return 0; 452 return 0;
454 453
455 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); 454 status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
456 if (unlikely(status)) 455 if (unlikely(status || cb->cb_status))
457 goto out; 456 return status;
458 if (unlikely(nfserr != NFS4_OK)) 457
459 goto out_default; 458 return decode_cb_sequence4resok(xdr, cb);
460 status = decode_cb_sequence4resok(xdr, cb);
461out:
462 return status;
463out_default:
464 return nfs_cb_stat_to_errno(nfserr);
465} 459}
466 460
467/* 461/*
@@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
524 struct nfsd4_callback *cb) 518 struct nfsd4_callback *cb)
525{ 519{
526 struct nfs4_cb_compound_hdr hdr; 520 struct nfs4_cb_compound_hdr hdr;
527 enum nfsstat4 nfserr;
528 int status; 521 int status;
529 522
530 status = decode_cb_compound4res(xdr, &hdr); 523 status = decode_cb_compound4res(xdr, &hdr);
531 if (unlikely(status)) 524 if (unlikely(status))
532 goto out; 525 return status;
533 526
534 if (cb != NULL) { 527 if (cb != NULL) {
535 status = decode_cb_sequence4res(xdr, cb); 528 status = decode_cb_sequence4res(xdr, cb);
536 if (unlikely(status)) 529 if (unlikely(status || cb->cb_status))
537 goto out; 530 return status;
538 } 531 }
539 532
540 status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); 533 return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
541 if (unlikely(status))
542 goto out;
543 if (unlikely(nfserr != NFS4_OK))
544 status = nfs_cb_stat_to_errno(nfserr);
545out:
546 return status;
547} 534}
548 535
549#ifdef CONFIG_NFSD_PNFS 536#ifdef CONFIG_NFSD_PNFS
@@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
621 struct nfsd4_callback *cb) 608 struct nfsd4_callback *cb)
622{ 609{
623 struct nfs4_cb_compound_hdr hdr; 610 struct nfs4_cb_compound_hdr hdr;
624 enum nfsstat4 nfserr;
625 int status; 611 int status;
626 612
627 status = decode_cb_compound4res(xdr, &hdr); 613 status = decode_cb_compound4res(xdr, &hdr);
628 if (unlikely(status)) 614 if (unlikely(status))
629 goto out; 615 return status;
616
630 if (cb) { 617 if (cb) {
631 status = decode_cb_sequence4res(xdr, cb); 618 status = decode_cb_sequence4res(xdr, cb);
632 if (unlikely(status)) 619 if (unlikely(status || cb->cb_status))
633 goto out; 620 return status;
634 } 621 }
635 status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); 622 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
636 if (unlikely(status))
637 goto out;
638 if (unlikely(nfserr != NFS4_OK))
639 status = nfs_cb_stat_to_errno(nfserr);
640out:
641 return status;
642} 623}
643#endif /* CONFIG_NFSD_PNFS */ 624#endif /* CONFIG_NFSD_PNFS */
644 625
@@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
898 if (!nfsd41_cb_get_slot(clp, task)) 879 if (!nfsd41_cb_get_slot(clp, task))
899 return; 880 return;
900 } 881 }
901 spin_lock(&clp->cl_lock);
902 if (list_empty(&cb->cb_per_client)) {
903 /* This is the first call, not a restart */
904 cb->cb_done = false;
905 list_add(&cb->cb_per_client, &clp->cl_callbacks);
906 }
907 spin_unlock(&clp->cl_lock);
908 rpc_call_start(task); 882 rpc_call_start(task);
909} 883}
910 884
@@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
918 892
919 if (clp->cl_minorversion) { 893 if (clp->cl_minorversion) {
920 /* No need for lock, access serialized in nfsd4_cb_prepare */ 894 /* No need for lock, access serialized in nfsd4_cb_prepare */
921 ++clp->cl_cb_session->se_cb_seq_nr; 895 if (!task->tk_status)
896 ++clp->cl_cb_session->se_cb_seq_nr;
922 clear_bit(0, &clp->cl_cb_slot_busy); 897 clear_bit(0, &clp->cl_cb_slot_busy);
923 rpc_wake_up_next(&clp->cl_cb_waitq); 898 rpc_wake_up_next(&clp->cl_cb_waitq);
924 dprintk("%s: freed slot, new seqid=%d\n", __func__, 899 dprintk("%s: freed slot, new seqid=%d\n", __func__,
925 clp->cl_cb_session->se_cb_seq_nr); 900 clp->cl_cb_session->se_cb_seq_nr);
926 } 901 }
927 902
928 if (clp->cl_cb_client != task->tk_client) { 903 /*
929 /* We're shutting down or changing cl_cb_client; leave 904 * If the backchannel connection was shut down while this
930 * it to nfsd4_process_cb_update to restart the call if 905 * task was queued, we need to resubmit it after setting up
931 * necessary. */ 906 * a new backchannel connection.
907 *
908 * Note that if we lost our callback connection permanently
909 * the submission code will error out, so we don't need to
910 * handle that case here.
911 */
912 if (task->tk_flags & RPC_TASK_KILLED) {
913 task->tk_status = 0;
914 cb->cb_need_restart = true;
932 return; 915 return;
933 } 916 }
934 917
935 if (cb->cb_done) 918 if (cb->cb_status) {
936 return; 919 WARN_ON_ONCE(task->tk_status);
920 task->tk_status = cb->cb_status;
921 }
937 922
938 switch (cb->cb_ops->done(cb, task)) { 923 switch (cb->cb_ops->done(cb, task)) {
939 case 0: 924 case 0:
@@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
949 default: 934 default:
950 BUG(); 935 BUG();
951 } 936 }
952 cb->cb_done = true;
953} 937}
954 938
955static void nfsd4_cb_release(void *calldata) 939static void nfsd4_cb_release(void *calldata)
956{ 940{
957 struct nfsd4_callback *cb = calldata; 941 struct nfsd4_callback *cb = calldata;
958 struct nfs4_client *clp = cb->cb_clp;
959
960 if (cb->cb_done) {
961 spin_lock(&clp->cl_lock);
962 list_del(&cb->cb_per_client);
963 spin_unlock(&clp->cl_lock);
964 942
943 if (cb->cb_need_restart)
944 nfsd4_run_cb(cb);
945 else
965 cb->cb_ops->release(cb); 946 cb->cb_ops->release(cb);
966 } 947
967} 948}
968 949
969static const struct rpc_call_ops nfsd4_cb_ops = { 950static const struct rpc_call_ops nfsd4_cb_ops = {
@@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1058 nfsd4_mark_cb_down(clp, err); 1039 nfsd4_mark_cb_down(clp, err);
1059 return; 1040 return;
1060 } 1041 }
1061 /* Yay, the callback channel's back! Restart any callbacks: */
1062 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
1063 queue_work(callback_wq, &cb->cb_work);
1064} 1042}
1065 1043
1066static void 1044static void
@@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work)
1071 struct nfs4_client *clp = cb->cb_clp; 1049 struct nfs4_client *clp = cb->cb_clp;
1072 struct rpc_clnt *clnt; 1050 struct rpc_clnt *clnt;
1073 1051
1074 if (cb->cb_ops && cb->cb_ops->prepare) 1052 if (cb->cb_need_restart) {
1075 cb->cb_ops->prepare(cb); 1053 cb->cb_need_restart = false;
1054 } else {
1055 if (cb->cb_ops && cb->cb_ops->prepare)
1056 cb->cb_ops->prepare(cb);
1057 }
1076 1058
1077 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 1059 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
1078 nfsd4_process_cb_update(cb); 1060 nfsd4_process_cb_update(cb);
@@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work)
1084 cb->cb_ops->release(cb); 1066 cb->cb_ops->release(cb);
1085 return; 1067 return;
1086 } 1068 }
1069
1070 /*
1071 * Don't send probe messages for 4.1 or later.
1072 */
1073 if (!cb->cb_ops && clp->cl_minorversion) {
1074 clp->cl_cb_state = NFSD4_CB_UP;
1075 return;
1076 }
1077
1087 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 1078 cb->cb_msg.rpc_cred = clp->cl_cb_cred;
1088 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 1079 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
1089 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); 1080 cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
@@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
1098 cb->cb_msg.rpc_resp = cb; 1089 cb->cb_msg.rpc_resp = cb;
1099 cb->cb_ops = ops; 1090 cb->cb_ops = ops;
1100 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1091 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
1101 INIT_LIST_HEAD(&cb->cb_per_client); 1092 cb->cb_status = 0;
1102 cb->cb_done = true; 1093 cb->cb_need_restart = false;
1103} 1094}
1104 1095
1105void nfsd4_run_cb(struct nfsd4_callback *cb) 1096void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 38f2d7abe3a7..039f9c8a95e8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab;
94static struct kmem_cache *file_slab; 94static struct kmem_cache *file_slab;
95static struct kmem_cache *stateid_slab; 95static struct kmem_cache *stateid_slab;
96static struct kmem_cache *deleg_slab; 96static struct kmem_cache *deleg_slab;
97static struct kmem_cache *odstate_slab;
97 98
98static void free_session(struct nfsd4_session *); 99static void free_session(struct nfsd4_session *);
99 100
@@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi)
281 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 282 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
282 hlist_del_rcu(&fi->fi_hash); 283 hlist_del_rcu(&fi->fi_hash);
283 spin_unlock(&state_lock); 284 spin_unlock(&state_lock);
285 WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
284 WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); 286 WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
285 call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); 287 call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
286 } 288 }
@@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
471 __nfs4_file_put_access(fp, O_RDONLY); 473 __nfs4_file_put_access(fp, O_RDONLY);
472} 474}
473 475
476/*
477 * Allocate a new open/delegation state counter. This is needed for
478 * pNFS for proper return on close semantics.
479 *
480 * Note that we only allocate it for pNFS-enabled exports, otherwise
481 * all pointers to struct nfs4_clnt_odstate are always NULL.
482 */
483static struct nfs4_clnt_odstate *
484alloc_clnt_odstate(struct nfs4_client *clp)
485{
486 struct nfs4_clnt_odstate *co;
487
488 co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
489 if (co) {
490 co->co_client = clp;
491 atomic_set(&co->co_odcount, 1);
492 }
493 return co;
494}
495
496static void
497hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
498{
499 struct nfs4_file *fp = co->co_file;
500
501 lockdep_assert_held(&fp->fi_lock);
502 list_add(&co->co_perfile, &fp->fi_clnt_odstate);
503}
504
505static inline void
506get_clnt_odstate(struct nfs4_clnt_odstate *co)
507{
508 if (co)
509 atomic_inc(&co->co_odcount);
510}
511
512static void
513put_clnt_odstate(struct nfs4_clnt_odstate *co)
514{
515 struct nfs4_file *fp;
516
517 if (!co)
518 return;
519
520 fp = co->co_file;
521 if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
522 list_del(&co->co_perfile);
523 spin_unlock(&fp->fi_lock);
524
525 nfsd4_return_all_file_layouts(co->co_client, fp);
526 kmem_cache_free(odstate_slab, co);
527 }
528}
529
530static struct nfs4_clnt_odstate *
531find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
532{
533 struct nfs4_clnt_odstate *co;
534 struct nfs4_client *cl;
535
536 if (!new)
537 return NULL;
538
539 cl = new->co_client;
540
541 spin_lock(&fp->fi_lock);
542 list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
543 if (co->co_client == cl) {
544 get_clnt_odstate(co);
545 goto out;
546 }
547 }
548 co = new;
549 co->co_file = fp;
550 hash_clnt_odstate_locked(new);
551out:
552 spin_unlock(&fp->fi_lock);
553 return co;
554}
555
474struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, 556struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
475 struct kmem_cache *slab) 557 struct kmem_cache *slab)
476{ 558{
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
606} 688}
607 689
608static struct nfs4_delegation * 690static struct nfs4_delegation *
609alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) 691alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
692 struct nfs4_clnt_odstate *odstate)
610{ 693{
611 struct nfs4_delegation *dp; 694 struct nfs4_delegation *dp;
612 long n; 695 long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
631 INIT_LIST_HEAD(&dp->dl_perfile); 714 INIT_LIST_HEAD(&dp->dl_perfile);
632 INIT_LIST_HEAD(&dp->dl_perclnt); 715 INIT_LIST_HEAD(&dp->dl_perclnt);
633 INIT_LIST_HEAD(&dp->dl_recall_lru); 716 INIT_LIST_HEAD(&dp->dl_recall_lru);
717 dp->dl_clnt_odstate = odstate;
718 get_clnt_odstate(odstate);
634 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 719 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
635 dp->dl_retries = 1; 720 dp->dl_retries = 1;
636 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, 721 nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
714 spin_lock(&state_lock); 799 spin_lock(&state_lock);
715 unhash_delegation_locked(dp); 800 unhash_delegation_locked(dp);
716 spin_unlock(&state_lock); 801 spin_unlock(&state_lock);
802 put_clnt_odstate(dp->dl_clnt_odstate);
717 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 803 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
718 nfs4_put_stid(&dp->dl_stid); 804 nfs4_put_stid(&dp->dl_stid);
719} 805}
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
724 810
725 WARN_ON(!list_empty(&dp->dl_recall_lru)); 811 WARN_ON(!list_empty(&dp->dl_recall_lru));
726 812
813 put_clnt_odstate(dp->dl_clnt_odstate);
727 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 814 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
728 815
729 if (clp->cl_minorversion == 0) 816 if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
933{ 1020{
934 struct nfs4_ol_stateid *stp = openlockstateid(stid); 1021 struct nfs4_ol_stateid *stp = openlockstateid(stid);
935 1022
1023 put_clnt_odstate(stp->st_clnt_odstate);
936 release_all_access(stp); 1024 release_all_access(stp);
937 if (stp->st_stateowner) 1025 if (stp->st_stateowner)
938 nfs4_put_stateowner(stp->st_stateowner); 1026 nfs4_put_stateowner(stp->st_stateowner);
@@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1538 INIT_LIST_HEAD(&clp->cl_openowners); 1626 INIT_LIST_HEAD(&clp->cl_openowners);
1539 INIT_LIST_HEAD(&clp->cl_delegations); 1627 INIT_LIST_HEAD(&clp->cl_delegations);
1540 INIT_LIST_HEAD(&clp->cl_lru); 1628 INIT_LIST_HEAD(&clp->cl_lru);
1541 INIT_LIST_HEAD(&clp->cl_callbacks);
1542 INIT_LIST_HEAD(&clp->cl_revoked); 1629 INIT_LIST_HEAD(&clp->cl_revoked);
1543#ifdef CONFIG_NFSD_PNFS 1630#ifdef CONFIG_NFSD_PNFS
1544 INIT_LIST_HEAD(&clp->cl_lo_states); 1631 INIT_LIST_HEAD(&clp->cl_lo_states);
@@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp)
1634 while (!list_empty(&reaplist)) { 1721 while (!list_empty(&reaplist)) {
1635 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1722 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1636 list_del_init(&dp->dl_recall_lru); 1723 list_del_init(&dp->dl_recall_lru);
1724 put_clnt_odstate(dp->dl_clnt_odstate);
1637 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 1725 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
1638 nfs4_put_stid(&dp->dl_stid); 1726 nfs4_put_stid(&dp->dl_stid);
1639 } 1727 }
@@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3057 spin_lock_init(&fp->fi_lock); 3145 spin_lock_init(&fp->fi_lock);
3058 INIT_LIST_HEAD(&fp->fi_stateids); 3146 INIT_LIST_HEAD(&fp->fi_stateids);
3059 INIT_LIST_HEAD(&fp->fi_delegations); 3147 INIT_LIST_HEAD(&fp->fi_delegations);
3148 INIT_LIST_HEAD(&fp->fi_clnt_odstate);
3060 fh_copy_shallow(&fp->fi_fhandle, fh); 3149 fh_copy_shallow(&fp->fi_fhandle, fh);
3061 fp->fi_deleg_file = NULL; 3150 fp->fi_deleg_file = NULL;
3062 fp->fi_had_conflict = false; 3151 fp->fi_had_conflict = false;
@@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3073void 3162void
3074nfsd4_free_slabs(void) 3163nfsd4_free_slabs(void)
3075{ 3164{
3165 kmem_cache_destroy(odstate_slab);
3076 kmem_cache_destroy(openowner_slab); 3166 kmem_cache_destroy(openowner_slab);
3077 kmem_cache_destroy(lockowner_slab); 3167 kmem_cache_destroy(lockowner_slab);
3078 kmem_cache_destroy(file_slab); 3168 kmem_cache_destroy(file_slab);
@@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void)
3103 sizeof(struct nfs4_delegation), 0, 0, NULL); 3193 sizeof(struct nfs4_delegation), 0, 0, NULL);
3104 if (deleg_slab == NULL) 3194 if (deleg_slab == NULL)
3105 goto out_free_stateid_slab; 3195 goto out_free_stateid_slab;
3196 odstate_slab = kmem_cache_create("nfsd4_odstate",
3197 sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
3198 if (odstate_slab == NULL)
3199 goto out_free_deleg_slab;
3106 return 0; 3200 return 0;
3107 3201
3202out_free_deleg_slab:
3203 kmem_cache_destroy(deleg_slab);
3108out_free_stateid_slab: 3204out_free_stateid_slab:
3109 kmem_cache_destroy(stateid_slab); 3205 kmem_cache_destroy(stateid_slab);
3110out_free_file_slab: 3206out_free_file_slab:
@@ -3581,6 +3677,14 @@ alloc_stateid:
3581 open->op_stp = nfs4_alloc_open_stateid(clp); 3677 open->op_stp = nfs4_alloc_open_stateid(clp);
3582 if (!open->op_stp) 3678 if (!open->op_stp)
3583 return nfserr_jukebox; 3679 return nfserr_jukebox;
3680
3681 if (nfsd4_has_session(cstate) &&
3682 (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
3683 open->op_odstate = alloc_clnt_odstate(clp);
3684 if (!open->op_odstate)
3685 return nfserr_jukebox;
3686 }
3687
3584 return nfs_ok; 3688 return nfs_ok;
3585} 3689}
3586 3690
@@ -3869,7 +3973,7 @@ out_fput:
3869 3973
3870static struct nfs4_delegation * 3974static struct nfs4_delegation *
3871nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, 3975nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3872 struct nfs4_file *fp) 3976 struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
3873{ 3977{
3874 int status; 3978 int status;
3875 struct nfs4_delegation *dp; 3979 struct nfs4_delegation *dp;
@@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3877 if (fp->fi_had_conflict) 3981 if (fp->fi_had_conflict)
3878 return ERR_PTR(-EAGAIN); 3982 return ERR_PTR(-EAGAIN);
3879 3983
3880 dp = alloc_init_deleg(clp, fh); 3984 dp = alloc_init_deleg(clp, fh, odstate);
3881 if (!dp) 3985 if (!dp)
3882 return ERR_PTR(-ENOMEM); 3986 return ERR_PTR(-ENOMEM);
3883 3987
@@ -3903,6 +4007,7 @@ out_unlock:
3903 spin_unlock(&state_lock); 4007 spin_unlock(&state_lock);
3904out: 4008out:
3905 if (status) { 4009 if (status) {
4010 put_clnt_odstate(dp->dl_clnt_odstate);
3906 nfs4_put_stid(&dp->dl_stid); 4011 nfs4_put_stid(&dp->dl_stid);
3907 return ERR_PTR(status); 4012 return ERR_PTR(status);
3908 } 4013 }
@@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
3980 default: 4085 default:
3981 goto out_no_deleg; 4086 goto out_no_deleg;
3982 } 4087 }
3983 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); 4088 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
3984 if (IS_ERR(dp)) 4089 if (IS_ERR(dp))
3985 goto out_no_deleg; 4090 goto out_no_deleg;
3986 4091
@@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
4069 release_open_stateid(stp); 4174 release_open_stateid(stp);
4070 goto out; 4175 goto out;
4071 } 4176 }
4177
4178 stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
4179 open->op_odstate);
4180 if (stp->st_clnt_odstate == open->op_odstate)
4181 open->op_odstate = NULL;
4072 } 4182 }
4073 update_stateid(&stp->st_stid.sc_stateid); 4183 update_stateid(&stp->st_stid.sc_stateid);
4074 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4184 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4129 kmem_cache_free(file_slab, open->op_file); 4239 kmem_cache_free(file_slab, open->op_file);
4130 if (open->op_stp) 4240 if (open->op_stp)
4131 nfs4_put_stid(&open->op_stp->st_stid); 4241 nfs4_put_stid(&open->op_stp->st_stid);
4242 if (open->op_odstate)
4243 kmem_cache_free(odstate_slab, open->op_odstate);
4132} 4244}
4133 4245
4134__be32 4246__be32
@@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
4385 return nfserr_old_stateid; 4497 return nfserr_old_stateid;
4386} 4498}
4387 4499
4500static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
4501{
4502 if (ols->st_stateowner->so_is_open_owner &&
4503 !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
4504 return nfserr_bad_stateid;
4505 return nfs_ok;
4506}
4507
4388static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) 4508static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4389{ 4509{
4390 struct nfs4_stid *s; 4510 struct nfs4_stid *s;
4391 struct nfs4_ol_stateid *ols;
4392 __be32 status = nfserr_bad_stateid; 4511 __be32 status = nfserr_bad_stateid;
4393 4512
4394 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4513 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4418 break; 4537 break;
4419 case NFS4_OPEN_STID: 4538 case NFS4_OPEN_STID:
4420 case NFS4_LOCK_STID: 4539 case NFS4_LOCK_STID:
4421 ols = openlockstateid(s); 4540 status = nfsd4_check_openowner_confirmed(openlockstateid(s));
4422 if (ols->st_stateowner->so_is_open_owner
4423 && !(openowner(ols->st_stateowner)->oo_flags
4424 & NFS4_OO_CONFIRMED))
4425 status = nfserr_bad_stateid;
4426 else
4427 status = nfs_ok;
4428 break; 4541 break;
4429 default: 4542 default:
4430 printk("unknown stateid type %x\n", s->sc_type); 4543 printk("unknown stateid type %x\n", s->sc_type);
@@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
4516 status = nfs4_check_fh(current_fh, stp); 4629 status = nfs4_check_fh(current_fh, stp);
4517 if (status) 4630 if (status)
4518 goto out; 4631 goto out;
4519 if (stp->st_stateowner->so_is_open_owner 4632 status = nfsd4_check_openowner_confirmed(stp);
4520 && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) 4633 if (status)
4521 goto out; 4634 goto out;
4522 status = nfs4_check_openmode(stp, flags); 4635 status = nfs4_check_openmode(stp, flags);
4523 if (status) 4636 if (status)
@@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4852 update_stateid(&stp->st_stid.sc_stateid); 4965 update_stateid(&stp->st_stid.sc_stateid);
4853 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4966 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4854 4967
4855 nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
4856 stp->st_stid.sc_file);
4857
4858 nfsd4_close_open_stateid(stp); 4968 nfsd4_close_open_stateid(stp);
4859 4969
4860 /* put reference from nfs4_preprocess_seqid_op */ 4970 /* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net)
6488 list_for_each_safe(pos, next, &reaplist) { 6598 list_for_each_safe(pos, next, &reaplist) {
6489 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6599 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6490 list_del_init(&dp->dl_recall_lru); 6600 list_del_init(&dp->dl_recall_lru);
6601 put_clnt_odstate(dp->dl_clnt_odstate);
6491 nfs4_put_deleg_lease(dp->dl_stid.sc_file); 6602 nfs4_put_deleg_lease(dp->dl_stid.sc_file);
6492 nfs4_put_stid(&dp->dl_stid); 6603 nfs4_put_stid(&dp->dl_stid);
6493 } 6604 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4f3bfeb11766..dbc4f85a5008 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -63,12 +63,12 @@ typedef struct {
63 63
64struct nfsd4_callback { 64struct nfsd4_callback {
65 struct nfs4_client *cb_clp; 65 struct nfs4_client *cb_clp;
66 struct list_head cb_per_client;
67 u32 cb_minorversion; 66 u32 cb_minorversion;
68 struct rpc_message cb_msg; 67 struct rpc_message cb_msg;
69 struct nfsd4_callback_ops *cb_ops; 68 struct nfsd4_callback_ops *cb_ops;
70 struct work_struct cb_work; 69 struct work_struct cb_work;
71 bool cb_done; 70 int cb_status;
71 bool cb_need_restart;
72}; 72};
73 73
74struct nfsd4_callback_ops { 74struct nfsd4_callback_ops {
@@ -126,6 +126,7 @@ struct nfs4_delegation {
126 struct list_head dl_perfile; 126 struct list_head dl_perfile;
127 struct list_head dl_perclnt; 127 struct list_head dl_perclnt;
128 struct list_head dl_recall_lru; /* delegation recalled */ 128 struct list_head dl_recall_lru; /* delegation recalled */
129 struct nfs4_clnt_odstate *dl_clnt_odstate;
129 u32 dl_type; 130 u32 dl_type;
130 time_t dl_time; 131 time_t dl_time;
131/* For recall: */ 132/* For recall: */
@@ -332,7 +333,6 @@ struct nfs4_client {
332 int cl_cb_state; 333 int cl_cb_state;
333 struct nfsd4_callback cl_cb_null; 334 struct nfsd4_callback cl_cb_null;
334 struct nfsd4_session *cl_cb_session; 335 struct nfsd4_session *cl_cb_session;
335 struct list_head cl_callbacks; /* list of in-progress callbacks */
336 336
337 /* for all client information that callback code might need: */ 337 /* for all client information that callback code might need: */
338 spinlock_t cl_lock; 338 spinlock_t cl_lock;
@@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
465} 465}
466 466
467/* 467/*
468 * Per-client state indicating no. of opens and outstanding delegations
469 * on a file from a particular client.'od' stands for 'open & delegation'
470 */
471struct nfs4_clnt_odstate {
472 struct nfs4_client *co_client;
473 struct nfs4_file *co_file;
474 struct list_head co_perfile;
475 atomic_t co_odcount;
476};
477
478/*
468 * nfs4_file: a file opened by some number of (open) nfs4_stateowners. 479 * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
469 * 480 *
470 * These objects are global. nfsd keeps one instance of a nfs4_file per 481 * These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +496,7 @@ struct nfs4_file {
485 struct list_head fi_delegations; 496 struct list_head fi_delegations;
486 struct rcu_head fi_rcu; 497 struct rcu_head fi_rcu;
487 }; 498 };
499 struct list_head fi_clnt_odstate;
488 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 500 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
489 struct file * fi_fds[3]; 501 struct file * fi_fds[3];
490 /* 502 /*
@@ -526,6 +538,7 @@ struct nfs4_ol_stateid {
526 struct list_head st_perstateowner; 538 struct list_head st_perstateowner;
527 struct list_head st_locks; 539 struct list_head st_locks;
528 struct nfs4_stateowner * st_stateowner; 540 struct nfs4_stateowner * st_stateowner;
541 struct nfs4_clnt_odstate * st_clnt_odstate;
529 unsigned char st_access_bmap; 542 unsigned char st_access_bmap;
530 unsigned char st_deny_bmap; 543 unsigned char st_deny_bmap;
531 struct nfs4_ol_stateid * st_openstp; 544 struct nfs4_ol_stateid * st_openstp;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f982ae84f0cd..2f8c092be2b3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -247,6 +247,7 @@ struct nfsd4_open {
247 struct nfs4_openowner *op_openowner; /* used during processing */ 247 struct nfs4_openowner *op_openowner; /* used during processing */
248 struct nfs4_file *op_file; /* used during processing */ 248 struct nfs4_file *op_file; /* used during processing */
249 struct nfs4_ol_stateid *op_stp; /* used during processing */ 249 struct nfs4_ol_stateid *op_stp; /* used during processing */
250 struct nfs4_clnt_odstate *op_odstate; /* used during processing */
250 struct nfs4_acl *op_acl; 251 struct nfs4_acl *op_acl;
251 struct xdr_netobj op_label; 252 struct xdr_netobj op_label;
252}; 253};
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 059f37137f9a..919fd5bb14a8 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
388 nchildren = nilfs_btree_node_get_nchildren(node); 388 nchildren = nilfs_btree_node_get_nchildren(node);
389 389
390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391 level > NILFS_BTREE_LEVEL_MAX || 391 level >= NILFS_BTREE_LEVEL_MAX ||
392 nchildren < 0 || 392 nchildren < 0 ||
393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", 394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..fdf4b41d0609 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -757,6 +757,19 @@ lookup:
757 if (tmpres) { 757 if (tmpres) {
758 spin_unlock(&dlm->spinlock); 758 spin_unlock(&dlm->spinlock);
759 spin_lock(&tmpres->spinlock); 759 spin_lock(&tmpres->spinlock);
760
761 /*
762 * Right after dlm spinlock was released, dlm_thread could have
763 * purged the lockres. Check if lockres got unhashed. If so
764 * start over.
765 */
766 if (hlist_unhashed(&tmpres->hash_node)) {
767 spin_unlock(&tmpres->spinlock);
768 dlm_lockres_put(tmpres);
769 tmpres = NULL;
770 goto lookup;
771 }
772
760 /* Wait on the thread that is mastering the resource */ 773 /* Wait on the thread that is mastering the resource */
761 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 774 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
762 __dlm_wait_on_lockres(tmpres); 775 __dlm_wait_on_lockres(tmpres);
diff --git a/fs/splice.c b/fs/splice.c
index 476024bb6546..bfe62ae40f40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1161 long ret, bytes; 1161 long ret, bytes;
1162 umode_t i_mode; 1162 umode_t i_mode;
1163 size_t len; 1163 size_t len;
1164 int i, flags; 1164 int i, flags, more;
1165 1165
1166 /* 1166 /*
1167 * We require the input being a regular file, as we don't want to 1167 * We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1204 * Don't block on output, we have to drain the direct pipe. 1204 * Don't block on output, we have to drain the direct pipe.
1205 */ 1205 */
1206 sd->flags &= ~SPLICE_F_NONBLOCK; 1206 sd->flags &= ~SPLICE_F_NONBLOCK;
1207 more = sd->flags & SPLICE_F_MORE;
1207 1208
1208 while (len) { 1209 while (len) {
1209 size_t read_len; 1210 size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1217 sd->total_len = read_len; 1218 sd->total_len = read_len;
1218 1219
1219 /* 1220 /*
1221 * If more data is pending, set SPLICE_F_MORE
1222 * If this is the last data and SPLICE_F_MORE was not set
1223 * initially, clears it.
1224 */
1225 if (read_len < len)
1226 sd->flags |= SPLICE_F_MORE;
1227 else if (!more)
1228 sd->flags &= ~SPLICE_F_MORE;
1229 /*
1220 * NOTE: nonblocking mode only applies to the input. We 1230 * NOTE: nonblocking mode only applies to the input. We
1221 * must not do the output in nonblocking mode as then we 1231 * must not do the output in nonblocking mode as then we
1222 * could get stuck data in the internal pipe: 1232 * could get stuck data in the internal pipe: