aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c19
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file.c87
-rw-r--r--fs/btrfs/inode.c113
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/send.c171
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/transaction.c42
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/xattr.c8
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/file.c34
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/fs-writeback.c93
-rw-r--r--fs/fuse/dev.c19
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/locks.c10
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c45
-rw-r--r--fs/nfs/dir.c22
-rw-r--r--fs/nfs/file.c11
-rw-r--r--fs/nfs/inode.c111
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4client.c9
-rw-r--r--fs/nfs/nfs4proc.c31
-rw-r--r--fs/nfs/nfs4session.h1
-rw-r--r--fs/nfs/nfs4state.c18
-rw-r--r--fs/nfs/proc.c6
-rw-r--r--fs/nfs/write.c30
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/blocklayoutxdr.c6
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c6
-rw-r--r--fs/nfsd/nfs4xdr.c20
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h15
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/proc/task_mmu.c3
58 files changed, 1106 insertions, 256 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..a91795e01a7f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
699 boff = tmp % bsize; 699 boff = tmp % bsize;
700 if (boff) { 700 if (boff) {
701 bh = affs_bread_ino(inode, bidx, 0); 701 bh = affs_bread_ino(inode, bidx, 0);
702 if (IS_ERR(bh)) 702 if (IS_ERR(bh)) {
703 return PTR_ERR(bh); 703 written = PTR_ERR(bh);
704 goto err_first_bh;
705 }
704 tmp = min(bsize - boff, to - from); 706 tmp = min(bsize - boff, to - from);
705 BUG_ON(boff + tmp > bsize || tmp > bsize); 707 BUG_ON(boff + tmp > bsize || tmp > bsize);
706 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 708 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
712 bidx++; 714 bidx++;
713 } else if (bidx) { 715 } else if (bidx) {
714 bh = affs_bread_ino(inode, bidx - 1, 0); 716 bh = affs_bread_ino(inode, bidx - 1, 0);
715 if (IS_ERR(bh)) 717 if (IS_ERR(bh)) {
716 return PTR_ERR(bh); 718 written = PTR_ERR(bh);
719 goto err_first_bh;
720 }
717 } 721 }
718 while (from + bsize <= to) { 722 while (from + bsize <= to) {
719 prev_bh = bh; 723 prev_bh = bh;
720 bh = affs_getemptyblk_ino(inode, bidx); 724 bh = affs_getemptyblk_ino(inode, bidx);
721 if (IS_ERR(bh)) 725 if (IS_ERR(bh))
722 goto out; 726 goto err_bh;
723 memcpy(AFFS_DATA(bh), data + from, bsize); 727 memcpy(AFFS_DATA(bh), data + from, bsize);
724 if (buffer_new(bh)) { 728 if (buffer_new(bh)) {
725 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 729 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
751 prev_bh = bh; 755 prev_bh = bh;
752 bh = affs_bread_ino(inode, bidx, 1); 756 bh = affs_bread_ino(inode, bidx, 1);
753 if (IS_ERR(bh)) 757 if (IS_ERR(bh))
754 goto out; 758 goto err_bh;
755 tmp = min(bsize, to - from); 759 tmp = min(bsize, to - from);
756 BUG_ON(tmp > bsize); 760 BUG_ON(tmp > bsize);
757 memcpy(AFFS_DATA(bh), data + from, tmp); 761 memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
790 if (tmp > inode->i_size) 794 if (tmp > inode->i_size)
791 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 795 inode->i_size = AFFS_I(inode)->mmu_private = tmp;
792 796
797err_first_bh:
793 unlock_page(page); 798 unlock_page(page);
794 page_cache_release(page); 799 page_cache_release(page);
795 800
796 return written; 801 return written;
797 802
798out: 803err_bh:
799 bh = prev_bh; 804 bh = prev_bh;
800 if (!written) 805 if (!written)
801 written = PTR_ERR(bh); 806 written = PTR_ERR(bh);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1645 1645
1646 parent_nritems = btrfs_header_nritems(parent); 1646 parent_nritems = btrfs_header_nritems(parent);
1647 blocksize = root->nodesize; 1647 blocksize = root->nodesize;
1648 end_slot = parent_nritems; 1648 end_slot = parent_nritems - 1;
1649 1649
1650 if (parent_nritems == 1) 1650 if (parent_nritems <= 1)
1651 return 0; 1651 return 0;
1652 1652
1653 btrfs_set_lock_blocking(parent); 1653 btrfs_set_lock_blocking(parent);
1654 1654
1655 for (i = start_slot; i < end_slot; i++) { 1655 for (i = start_slot; i <= end_slot; i++) {
1656 int close = 1; 1656 int close = 1;
1657 1657
1658 btrfs_node_key(parent, &disk_key, i); 1658 btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1669 other = btrfs_node_blockptr(parent, i - 1); 1669 other = btrfs_node_blockptr(parent, i - 1);
1670 close = close_blocks(blocknr, other, blocksize); 1670 close = close_blocks(blocknr, other, blocksize);
1671 } 1671 }
1672 if (!close && i < end_slot - 2) { 1672 if (!close && i < end_slot) {
1673 other = btrfs_node_blockptr(parent, i + 1); 1673 other = btrfs_node_blockptr(parent, i + 1);
1674 close = close_blocks(blocknr, other, blocksize); 1674 close = close_blocks(blocknr, other, blocksize);
1675 } 1675 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3387 3387
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3389 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root);
3390int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3392int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3391int btrfs_free_block_groups(struct btrfs_fs_info *info); 3393int btrfs_free_block_groups(struct btrfs_fs_info *info);
3392int btrfs_read_block_groups(struct btrfs_root *root); 3394int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3909 loff_t actual_len, u64 *alloc_hint); 3911 loff_t actual_len, u64 *alloc_hint);
3910int btrfs_inode_check_errors(struct inode *inode); 3912int btrfs_inode_check_errors(struct inode *inode);
3911extern const struct dentry_operations btrfs_dentry_operations; 3913extern const struct dentry_operations btrfs_dentry_operations;
3914#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3915void btrfs_test_inode_set_ops(struct inode *inode);
3916#endif
3912 3917
3913/* ioctl.c */ 3918/* ioctl.c */
3914long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3919long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3921 } 3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) { 3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", 3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb), 3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key) 3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk)); 3927 + sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3208 return 0; 3208 return 0;
3209 } 3209 }
3210 3210
3211 if (trans->aborted)
3212 return 0;
3211again: 3213again:
3212 inode = lookup_free_space_inode(root, block_group, path); 3214 inode = lookup_free_space_inode(root, block_group, path);
3213 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 3215 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
3243 */ 3245 */
3244 BTRFS_I(inode)->generation = 0; 3246 BTRFS_I(inode)->generation = 0;
3245 ret = btrfs_update_inode(trans, root, inode); 3247 ret = btrfs_update_inode(trans, root, inode);
3248 if (ret) {
3249 /*
3250 * So theoretically we could recover from this, simply set the
3251 * super cache generation to 0 so we know to invalidate the
3252 * cache, but then we'd have to keep track of the block groups
3253 * that fail this way so we know we _have_ to reset this cache
3254 * before the next commit or risk reading stale cache. So to
3255 * limit our exposure to horrible edge cases lets just abort the
3256 * transaction, this only happens in really bad situations
3257 * anyway.
3258 */
3259 btrfs_abort_transaction(trans, root, ret);
3260 goto out_put;
3261 }
3246 WARN_ON(ret); 3262 WARN_ON(ret);
3247 3263
3248 if (i_size_read(inode) > 0) { 3264 if (i_size_read(inode) > 0) {
@@ -3309,6 +3325,32 @@ out:
3309 return ret; 3325 return ret;
3310} 3326}
3311 3327
3328int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root)
3330{
3331 struct btrfs_block_group_cache *cache, *tmp;
3332 struct btrfs_transaction *cur_trans = trans->transaction;
3333 struct btrfs_path *path;
3334
3335 if (list_empty(&cur_trans->dirty_bgs) ||
3336 !btrfs_test_opt(root, SPACE_CACHE))
3337 return 0;
3338
3339 path = btrfs_alloc_path();
3340 if (!path)
3341 return -ENOMEM;
3342
3343 /* Could add new block groups, use _safe just in case */
3344 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3345 dirty_list) {
3346 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3347 cache_save_setup(cache, trans, path);
3348 }
3349
3350 btrfs_free_path(path);
3351 return 0;
3352}
3353
3312int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3313 struct btrfs_root *root) 3355 struct btrfs_root *root)
3314{ 3356{
@@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5094 num_bytes = ALIGN(num_bytes, root->sectorsize); 5136 num_bytes = ALIGN(num_bytes, root->sectorsize);
5095 5137
5096 spin_lock(&BTRFS_I(inode)->lock); 5138 spin_lock(&BTRFS_I(inode)->lock);
5097 BTRFS_I(inode)->outstanding_extents++; 5139 nr_extents = (unsigned)div64_u64(num_bytes +
5140 BTRFS_MAX_EXTENT_SIZE - 1,
5141 BTRFS_MAX_EXTENT_SIZE);
5142 BTRFS_I(inode)->outstanding_extents += nr_extents;
5143 nr_extents = 0;
5098 5144
5099 if (BTRFS_I(inode)->outstanding_extents > 5145 if (BTRFS_I(inode)->outstanding_extents >
5100 BTRFS_I(inode)->reserved_extents) 5146 BTRFS_I(inode)->reserved_extents)
@@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5239 if (dropped > 0) 5285 if (dropped > 0)
5240 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5286 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5241 5287
5288 if (btrfs_test_is_dummy_root(root))
5289 return;
5290
5242 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5291 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5243 btrfs_ino(inode), to_free, 0); 5292 btrfs_ino(inode), to_free, 0);
5244 if (root->fs_info->quota_enabled) { 5293 if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4968 4968
4969 /* Should be safe to release our pages at this point */ 4969 /* Should be safe to release our pages at this point */
4970 btrfs_release_extent_buffer_page(eb); 4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4971 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4972 return 1; 4978 return 1;
4973 } 4979 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..30982bbd31c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1811 mutex_unlock(&inode->i_mutex); 1811 mutex_unlock(&inode->i_mutex);
1812 1812
1813 /* 1813 /*
1814 * we want to make sure fsync finds this change
1815 * but we haven't joined a transaction running right now.
1816 *
1817 * Later on, someone is sure to update the inode and get the
1818 * real transid recorded.
1819 *
1820 * We set last_trans now to the fs_info generation + 1,
1821 * this will either be one more than the running transaction
1822 * or the generation used for the next transaction if there isn't
1823 * one running right now.
1824 *
1825 * We also have to set last_sub_trans to the current log transid, 1814 * We also have to set last_sub_trans to the current log transid,
1826 * otherwise subsequent syncs to a file that's been synced in this 1815 * otherwise subsequent syncs to a file that's been synced in this
1827 * transaction will appear to have already occured. 1816 * transaction will appear to have already occured.
1828 */ 1817 */
1829 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1830 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1818 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1831 if (num_written > 0) { 1819 if (num_written > 0) {
1832 err = generic_write_sync(file, pos, num_written); 1820 err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1959 atomic_inc(&root->log_batch); 1947 atomic_inc(&root->log_batch);
1960 1948
1961 /* 1949 /*
1962 * check the transaction that last modified this inode 1950 * If the last transaction that changed this file was before the current
1963 * and see if its already been committed 1951 * transaction and we have the full sync flag set in our inode, we can
1964 */ 1952 * bail out now without any syncing.
1965 if (!BTRFS_I(inode)->last_trans) { 1953 *
1966 mutex_unlock(&inode->i_mutex); 1954 * Note that we can't bail out if the full sync flag isn't set. This is
1967 goto out; 1955 * because when the full sync flag is set we start all ordered extents
1968 } 1956 * and wait for them to fully complete - when they complete they update
1969 1957 * the inode's last_trans field through:
1970 /* 1958 *
1971 * if the last transaction that changed this file was before 1959 * btrfs_finish_ordered_io() ->
1972 * the current transaction, we can bail out now without any 1960 * btrfs_update_inode_fallback() ->
1973 * syncing 1961 * btrfs_update_inode() ->
1962 * btrfs_set_inode_last_trans()
1963 *
1964 * So we are sure that last_trans is up to date and can do this check to
1965 * bail out safely. For the fast path, when the full sync flag is not
1966 * set in our inode, we can not do it because we start only our ordered
1967 * extents and don't wait for them to complete (that is when
1968 * btrfs_finish_ordered_io runs), so here at this point their last_trans
1969 * value might be less than or equals to fs_info->last_trans_committed,
1970 * and setting a speculative last_trans for an inode when a buffered
1971 * write is made (such as fs_info->generation + 1 for example) would not
1972 * be reliable since after setting the value and before fsync is called
1973 * any number of transactions can start and commit (transaction kthread
1974 * commits the current transaction periodically), and a transaction
1975 * commit does not start nor waits for ordered extents to complete.
1974 */ 1976 */
1975 smp_mb(); 1977 smp_mb();
1976 if (btrfs_inode_in_log(inode, root->fs_info->generation) || 1978 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1977 BTRFS_I(inode)->last_trans <= 1979 (full_sync && BTRFS_I(inode)->last_trans <=
1978 root->fs_info->last_trans_committed) { 1980 root->fs_info->last_trans_committed)) {
1979 BTRFS_I(inode)->last_trans = 0;
1980
1981 /* 1981 /*
1982 * We'v had everything committed since the last time we were 1982 * We'v had everything committed since the last time we were
1983 * modified so clear this flag in case it was set for whatever 1983 * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2275 bool same_page; 2275 bool same_page;
2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2277 u64 ino_size; 2277 u64 ino_size;
2278 bool truncated_page = false;
2279 bool updated_inode = false;
2278 2280
2279 ret = btrfs_wait_ordered_range(inode, offset, len); 2281 ret = btrfs_wait_ordered_range(inode, offset, len);
2280 if (ret) 2282 if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2306 * entire page. 2308 * entire page.
2307 */ 2309 */
2308 if (same_page && len < PAGE_CACHE_SIZE) { 2310 if (same_page && len < PAGE_CACHE_SIZE) {
2309 if (offset < ino_size) 2311 if (offset < ino_size) {
2312 truncated_page = true;
2310 ret = btrfs_truncate_page(inode, offset, len, 0); 2313 ret = btrfs_truncate_page(inode, offset, len, 0);
2314 } else {
2315 ret = 0;
2316 }
2311 goto out_only_mutex; 2317 goto out_only_mutex;
2312 } 2318 }
2313 2319
2314 /* zero back part of the first page */ 2320 /* zero back part of the first page */
2315 if (offset < ino_size) { 2321 if (offset < ino_size) {
2322 truncated_page = true;
2316 ret = btrfs_truncate_page(inode, offset, 0, 0); 2323 ret = btrfs_truncate_page(inode, offset, 0, 0);
2317 if (ret) { 2324 if (ret) {
2318 mutex_unlock(&inode->i_mutex); 2325 mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2348 if (!ret) { 2355 if (!ret) {
2349 /* zero the front end of the last page */ 2356 /* zero the front end of the last page */
2350 if (tail_start + tail_len < ino_size) { 2357 if (tail_start + tail_len < ino_size) {
2358 truncated_page = true;
2351 ret = btrfs_truncate_page(inode, 2359 ret = btrfs_truncate_page(inode,
2352 tail_start + tail_len, 0, 1); 2360 tail_start + tail_len, 0, 1);
2353 if (ret) 2361 if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2357 } 2365 }
2358 2366
2359 if (lockend < lockstart) { 2367 if (lockend < lockstart) {
2360 mutex_unlock(&inode->i_mutex); 2368 ret = 0;
2361 return 0; 2369 goto out_only_mutex;
2362 } 2370 }
2363 2371
2364 while (1) { 2372 while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
2506 2514
2507 trans->block_rsv = &root->fs_info->trans_block_rsv; 2515 trans->block_rsv = &root->fs_info->trans_block_rsv;
2508 ret = btrfs_update_inode(trans, root, inode); 2516 ret = btrfs_update_inode(trans, root, inode);
2517 updated_inode = true;
2509 btrfs_end_transaction(trans, root); 2518 btrfs_end_transaction(trans, root);
2510 btrfs_btree_balance_dirty(root); 2519 btrfs_btree_balance_dirty(root);
2511out_free: 2520out_free:
@@ -2515,6 +2524,22 @@ out:
2515 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2524 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2516 &cached_state, GFP_NOFS); 2525 &cached_state, GFP_NOFS);
2517out_only_mutex: 2526out_only_mutex:
2527 if (!updated_inode && truncated_page && !ret && !err) {
2528 /*
2529 * If we only end up zeroing part of a page, we still need to
2530 * update the inode item, so that all the time fields are
2531 * updated as well as the necessary btrfs inode in memory fields
2532 * for detecting, at fsync time, if the inode isn't yet in the
2533 * log tree or it's there but not up to date.
2534 */
2535 trans = btrfs_start_transaction(root, 1);
2536 if (IS_ERR(trans)) {
2537 err = PTR_ERR(trans);
2538 } else {
2539 err = btrfs_update_inode(trans, root, inode);
2540 ret = btrfs_end_transaction(trans, root);
2541 }
2542 }
2518 mutex_unlock(&inode->i_mutex); 2543 mutex_unlock(&inode->i_mutex);
2519 if (ret && !err) 2544 if (ret && !err)
2520 err = ret; 2545 err = ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..d2e732d7af52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 108
109static int btrfs_dirty_inode(struct inode *inode); 109static int btrfs_dirty_inode(struct inode *inode);
110 110
111#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
112void btrfs_test_inode_set_ops(struct inode *inode)
113{
114 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
115}
116#endif
117
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 118static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 119 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 120 const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
1542 u64 new_size; 1549 u64 new_size;
1543 1550
1544 /* 1551 /*
1545 * We need the largest size of the remaining extent to see if we 1552 * See the explanation in btrfs_merge_extent_hook, the same
1546 * need to add a new outstanding extent. Think of the following 1553 * applies here, just in reverse.
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */ 1554 */
1561 new_size = orig->end - split + 1; 1555 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size) 1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE); 1557 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1558 new_size = split - orig->start;
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents) 1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1560 BTRFS_MAX_EXTENT_SIZE);
1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1569 return; 1563 return;
1570 } 1564 }
1571 1565
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1591 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1592 return; 1586 return;
1593 1587
1594 old_size = other->end - other->start + 1; 1588 if (new->start > other->start)
1595 new_size = old_size + (new->end - new->start + 1); 1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1596 1592
1597 /* we're not bigger than the max, unreserve the space and go */ 1593 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) { 1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1603 } 1599 }
1604 1600
1605 /* 1601 /*
1606 * If we grew by another max_extent, just return, we want to keep that 1602 * We have to add up either side to figure out how many extents were
1607 * reserved amount. 1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1608 */ 1618 */
1619 old_size = other->end - other->start + 1;
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, 1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE); 1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents) 1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1613 return; 1628 return;
1614 1629
1615 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1686 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1687 } 1702 }
1688 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1689 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1690 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1691 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1741 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1742 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1743 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1745 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1746 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7213 u64 start = iblock << inode->i_blkbits; 7236 u64 start = iblock << inode->i_blkbits;
7214 u64 lockstart, lockend; 7237 u64 lockstart, lockend;
7215 u64 len = bh_result->b_size; 7238 u64 len = bh_result->b_size;
7216 u64 orig_len = len; 7239 u64 *outstanding_extents = NULL;
7217 int unlock_bits = EXTENT_LOCKED; 7240 int unlock_bits = EXTENT_LOCKED;
7218 int ret = 0; 7241 int ret = 0;
7219 7242
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7225 lockstart = start; 7248 lockstart = start;
7226 lockend = start + len - 1; 7249 lockend = start + len - 1;
7227 7250
7251 if (current->journal_info) {
7252 /*
7253 * Need to pull our outstanding extents and set journal_info to NULL so
7254 * that anything that needs to check if there's a transction doesn't get
7255 * confused.
7256 */
7257 outstanding_extents = current->journal_info;
7258 current->journal_info = NULL;
7259 }
7260
7228 /* 7261 /*
7229 * If this errors out it's because we couldn't invalidate pagecache for 7262 * If this errors out it's because we couldn't invalidate pagecache for
7230 * this range and we need to fallback to buffered. 7263 * this range and we need to fallback to buffered.
@@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7285 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 7318 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7286 em->block_start != EXTENT_MAP_HOLE)) { 7319 em->block_start != EXTENT_MAP_HOLE)) {
7287 int type; 7320 int type;
7288 int ret;
7289 u64 block_start, orig_start, orig_block_len, ram_bytes; 7321 u64 block_start, orig_start, orig_block_len, ram_bytes;
7290 7322
7291 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 7323 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7349,11 +7381,20 @@ unlock:
7349 if (start + len > i_size_read(inode)) 7381 if (start + len > i_size_read(inode))
7350 i_size_write(inode, start + len); 7382 i_size_write(inode, start + len);
7351 7383
7352 if (len < orig_len) { 7384 /*
7385 * If we have an outstanding_extents count still set then we're
7386 * within our reservation, otherwise we need to adjust our inode
7387 * counter appropriately.
7388 */
7389 if (*outstanding_extents) {
7390 (*outstanding_extents)--;
7391 } else {
7353 spin_lock(&BTRFS_I(inode)->lock); 7392 spin_lock(&BTRFS_I(inode)->lock);
7354 BTRFS_I(inode)->outstanding_extents++; 7393 BTRFS_I(inode)->outstanding_extents++;
7355 spin_unlock(&BTRFS_I(inode)->lock); 7394 spin_unlock(&BTRFS_I(inode)->lock);
7356 } 7395 }
7396
7397 current->journal_info = outstanding_extents;
7357 btrfs_free_reserved_data_space(inode, len); 7398 btrfs_free_reserved_data_space(inode, len);
7358 } 7399 }
7359 7400
@@ -7377,6 +7418,8 @@ unlock:
7377unlock_err: 7418unlock_err:
7378 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7419 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7379 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7420 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7421 if (outstanding_extents)
7422 current->journal_info = outstanding_extents;
7380 return ret; 7423 return ret;
7381} 7424}
7382 7425
@@ -8076,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8076{ 8119{
8077 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8078 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
8122 u64 outstanding_extents = 0;
8079 size_t count = 0; 8123 size_t count = 0;
8080 int flags = 0; 8124 int flags = 0;
8081 bool wakeup = true; 8125 bool wakeup = true;
@@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8113 ret = btrfs_delalloc_reserve_space(inode, count); 8157 ret = btrfs_delalloc_reserve_space(inode, count);
8114 if (ret) 8158 if (ret)
8115 goto out; 8159 goto out;
8160 outstanding_extents = div64_u64(count +
8161 BTRFS_MAX_EXTENT_SIZE - 1,
8162 BTRFS_MAX_EXTENT_SIZE);
8163
8164 /*
8165 * We need to know how many extents we reserved so that we can
8166 * do the accounting properly if we go over the number we
8167 * originally calculated. Abuse current->journal_info for this.
8168 */
8169 current->journal_info = &outstanding_extents;
8116 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8170 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8117 &BTRFS_I(inode)->runtime_flags)) { 8171 &BTRFS_I(inode)->runtime_flags)) {
8118 inode_dio_done(inode); 8172 inode_dio_done(inode);
@@ -8125,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8125 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8126 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8127 if (rw & WRITE) { 8181 if (rw & WRITE) {
8182 current->journal_info = NULL;
8128 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8129 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
8130 else if (ret >= 0 && (size_t)ret < count) 8185 else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
452 continue; 452 continue;
453 if (entry_end(ordered) <= start) 453 if (entry_end(ordered) <= start)
454 break; 454 break;
455 if (!list_empty(&ordered->log_list)) 455 if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
456 continue;
457 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
458 continue; 456 continue;
459 list_add(&ordered->log_list, logged_list); 457 list_add(&ordered->log_list, logged_list);
460 atomic_inc(&ordered->refs); 458 atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
511 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 509 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
512 &ordered->flags)); 510 &ordered->flags));
513 511
514 if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 512 list_add_tail(&ordered->trans_list, &trans->ordered);
515 list_add_tail(&ordered->trans_list, &trans->ordered);
516 spin_lock_irq(&log->log_extents_lock[index]); 513 spin_lock_irq(&log->log_extents_lock[index]);
517 } 514 }
518 spin_unlock_irq(&log->log_extents_lock[index]); 515 spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1259 if (oper1->seq < oper2->seq) 1259 if (oper1->seq < oper2->seq)
1260 return -1; 1260 return -1;
1261 if (oper1->seq > oper2->seq) 1261 if (oper1->seq > oper2->seq)
1262 return -1; 1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1263 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1264 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1265 if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
230 u64 parent_ino; 230 u64 parent_ino;
231 u64 ino; 231 u64 ino;
232 u64 gen; 232 u64 gen;
233 bool is_orphan;
233 struct list_head update_refs; 234 struct list_head update_refs;
234}; 235};
235 236
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2984 u64 ino_gen, 2985 u64 ino_gen,
2985 u64 parent_ino, 2986 u64 parent_ino,
2986 struct list_head *new_refs, 2987 struct list_head *new_refs,
2987 struct list_head *deleted_refs) 2988 struct list_head *deleted_refs,
2989 const bool is_orphan)
2988{ 2990{
2989 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2991 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2990 struct rb_node *parent = NULL; 2992 struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2999 pm->parent_ino = parent_ino; 3001 pm->parent_ino = parent_ino;
3000 pm->ino = ino; 3002 pm->ino = ino;
3001 pm->gen = ino_gen; 3003 pm->gen = ino_gen;
3004 pm->is_orphan = is_orphan;
3002 INIT_LIST_HEAD(&pm->list); 3005 INIT_LIST_HEAD(&pm->list);
3003 INIT_LIST_HEAD(&pm->update_refs); 3006 INIT_LIST_HEAD(&pm->update_refs);
3004 RB_CLEAR_NODE(&pm->node); 3007 RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3131 rmdir_ino = dm->rmdir_ino; 3134 rmdir_ino = dm->rmdir_ino;
3132 free_waiting_dir_move(sctx, dm); 3135 free_waiting_dir_move(sctx, dm);
3133 3136
3134 ret = get_first_ref(sctx->parent_root, pm->ino, 3137 if (pm->is_orphan) {
3135 &parent_ino, &parent_gen, name); 3138 ret = gen_unique_name(sctx, pm->ino,
3136 if (ret < 0) 3139 pm->gen, from_path);
3137 goto out; 3140 } else {
3138 3141 ret = get_first_ref(sctx->parent_root, pm->ino,
3139 ret = get_cur_path(sctx, parent_ino, parent_gen, 3142 &parent_ino, &parent_gen, name);
3140 from_path); 3143 if (ret < 0)
3141 if (ret < 0) 3144 goto out;
3142 goto out; 3145 ret = get_cur_path(sctx, parent_ino, parent_gen,
3143 ret = fs_path_add_path(from_path, name); 3146 from_path);
3147 if (ret < 0)
3148 goto out;
3149 ret = fs_path_add_path(from_path, name);
3150 }
3144 if (ret < 0) 3151 if (ret < 0)
3145 goto out; 3152 goto out;
3146 3153
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3150 LIST_HEAD(deleted_refs); 3157 LIST_HEAD(deleted_refs);
3151 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3158 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3152 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3159 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3153 &pm->update_refs, &deleted_refs); 3160 &pm->update_refs, &deleted_refs,
3161 pm->is_orphan);
3154 if (ret < 0) 3162 if (ret < 0)
3155 goto out; 3163 goto out;
3156 if (rmdir_ino) { 3164 if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
3283 return ret; 3291 return ret;
3284} 3292}
3285 3293
3294/*
3295 * We might need to delay a directory rename even when no ancestor directory
3296 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3297 * renamed. This happens when we rename a directory to the old name (the name
3298 * in the parent root) of some other unrelated directory that got its rename
3299 * delayed due to some ancestor with higher number that got renamed.
3300 *
3301 * Example:
3302 *
3303 * Parent snapshot:
3304 * . (ino 256)
3305 * |---- a/ (ino 257)
3306 * | |---- file (ino 260)
3307 * |
3308 * |---- b/ (ino 258)
3309 * |---- c/ (ino 259)
3310 *
3311 * Send snapshot:
3312 * . (ino 256)
3313 * |---- a/ (ino 258)
3314 * |---- x/ (ino 259)
3315 * |---- y/ (ino 257)
3316 * |----- file (ino 260)
3317 *
3318 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3319 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3320 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3321 * must issue is:
3322 *
3323 * 1 - rename 259 from 'c' to 'x'
3324 * 2 - rename 257 from 'a' to 'x/y'
3325 * 3 - rename 258 from 'b' to 'a'
3326 *
3327 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3328 * be done right away and < 0 on error.
3329 */
3330static int wait_for_dest_dir_move(struct send_ctx *sctx,
3331 struct recorded_ref *parent_ref,
3332 const bool is_orphan)
3333{
3334 struct btrfs_path *path;
3335 struct btrfs_key key;
3336 struct btrfs_key di_key;
3337 struct btrfs_dir_item *di;
3338 u64 left_gen;
3339 u64 right_gen;
3340 int ret = 0;
3341
3342 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3343 return 0;
3344
3345 path = alloc_path_for_send();
3346 if (!path)
3347 return -ENOMEM;
3348
3349 key.objectid = parent_ref->dir;
3350 key.type = BTRFS_DIR_ITEM_KEY;
3351 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3352
3353 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3354 if (ret < 0) {
3355 goto out;
3356 } else if (ret > 0) {
3357 ret = 0;
3358 goto out;
3359 }
3360
3361 di = btrfs_match_dir_item_name(sctx->parent_root, path,
3362 parent_ref->name, parent_ref->name_len);
3363 if (!di) {
3364 ret = 0;
3365 goto out;
3366 }
3367 /*
3368 * di_key.objectid has the number of the inode that has a dentry in the
3369 * parent directory with the same name that sctx->cur_ino is being
3370 * renamed to. We need to check if that inode is in the send root as
3371 * well and if it is currently marked as an inode with a pending rename,
3372 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3373 * that it happens after that other inode is renamed.
3374 */
3375 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3376 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3377 ret = 0;
3378 goto out;
3379 }
3380
3381 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3382 &left_gen, NULL, NULL, NULL, NULL);
3383 if (ret < 0)
3384 goto out;
3385 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3386 &right_gen, NULL, NULL, NULL, NULL);
3387 if (ret < 0) {
3388 if (ret == -ENOENT)
3389 ret = 0;
3390 goto out;
3391 }
3392
3393 /* Different inode, no need to delay the rename of sctx->cur_ino */
3394 if (right_gen != left_gen) {
3395 ret = 0;
3396 goto out;
3397 }
3398
3399 if (is_waiting_for_move(sctx, di_key.objectid)) {
3400 ret = add_pending_dir_move(sctx,
3401 sctx->cur_ino,
3402 sctx->cur_inode_gen,
3403 di_key.objectid,
3404 &sctx->new_refs,
3405 &sctx->deleted_refs,
3406 is_orphan);
3407 if (!ret)
3408 ret = 1;
3409 }
3410out:
3411 btrfs_free_path(path);
3412 return ret;
3413}
3414
3286static int wait_for_parent_move(struct send_ctx *sctx, 3415static int wait_for_parent_move(struct send_ctx *sctx,
3287 struct recorded_ref *parent_ref) 3416 struct recorded_ref *parent_ref)
3288{ 3417{
@@ -3349,7 +3478,8 @@ out:
3349 sctx->cur_inode_gen, 3478 sctx->cur_inode_gen,
3350 ino, 3479 ino,
3351 &sctx->new_refs, 3480 &sctx->new_refs,
3352 &sctx->deleted_refs); 3481 &sctx->deleted_refs,
3482 false);
3353 if (!ret) 3483 if (!ret)
3354 ret = 1; 3484 ret = 1;
3355 } 3485 }
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3372 int did_overwrite = 0; 3502 int did_overwrite = 0;
3373 int is_orphan = 0; 3503 int is_orphan = 0;
3374 u64 last_dir_ino_rm = 0; 3504 u64 last_dir_ino_rm = 0;
3505 bool can_rename = true;
3375 3506
3376verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3507verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3377 3508
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3490 } 3621 }
3491 } 3622 }
3492 3623
3624 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3625 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3626 if (ret < 0)
3627 goto out;
3628 if (ret == 1) {
3629 can_rename = false;
3630 *pending_move = 1;
3631 }
3632 }
3633
3493 /* 3634 /*
3494 * link/move the ref to the new place. If we have an orphan 3635 * link/move the ref to the new place. If we have an orphan
3495 * inode, move it and update valid_path. If not, link or move 3636 * inode, move it and update valid_path. If not, link or move
3496 * it depending on the inode mode. 3637 * it depending on the inode mode.
3497 */ 3638 */
3498 if (is_orphan) { 3639 if (is_orphan && can_rename) {
3499 ret = send_rename(sctx, valid_path, cur->full_path); 3640 ret = send_rename(sctx, valid_path, cur->full_path);
3500 if (ret < 0) 3641 if (ret < 0)
3501 goto out; 3642 goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3503 ret = fs_path_copy(valid_path, cur->full_path); 3644 ret = fs_path_copy(valid_path, cur->full_path);
3504 if (ret < 0) 3645 if (ret < 0)
3505 goto out; 3646 goto out;
3506 } else { 3647 } else if (can_rename) {
3507 if (S_ISDIR(sctx->cur_inode_mode)) { 3648 if (S_ISDIR(sctx->cur_inode_mode)) {
3508 /* 3649 /*
3509 * Dirs can't be linked, so move it. For moved 3650 * Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1023 u64 old_root_bytenr; 1023 u64 old_root_bytenr;
1024 u64 old_root_used; 1024 u64 old_root_used;
1025 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1027 1026
1028 old_root_used = btrfs_root_used(&root->root_item); 1027 old_root_used = btrfs_root_used(&root->root_item);
1029 btrfs_write_dirty_block_groups(trans, root);
1030 1028
1031 while (1) { 1029 while (1) {
1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1030 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1033 if (old_root_bytenr == root->node->start && 1031 if (old_root_bytenr == root->node->start &&
1034 old_root_used == btrfs_root_used(&root->root_item) && 1032 old_root_used == btrfs_root_used(&root->root_item))
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1037 break; 1033 break;
1038 1034
1039 btrfs_set_root_node(&root->root_item, root->node); 1035 btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1044 return ret; 1040 return ret;
1045 1041
1046 old_root_used = btrfs_root_used(&root->root_item); 1042 old_root_used = btrfs_root_used(&root->root_item);
1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1056 if (ret)
1057 return ret;
1058 } 1043 }
1059 1044
1060 return 0; 1045 return 0;
@@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1071 struct btrfs_root *root) 1056 struct btrfs_root *root)
1072{ 1057{
1073 struct btrfs_fs_info *fs_info = root->fs_info; 1058 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1074 struct list_head *next; 1060 struct list_head *next;
1075 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1076 int ret; 1062 int ret;
@@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1098 if (ret) 1084 if (ret)
1099 return ret; 1085 return ret;
1100 1086
1087 ret = btrfs_setup_space_cache(trans, root);
1088 if (ret)
1089 return ret;
1090
1101 /* run_qgroups might have added some more refs */ 1091 /* run_qgroups might have added some more refs */
1102 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1103 if (ret) 1093 if (ret)
1104 return ret; 1094 return ret;
1105 1095again:
1106 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1107 next = fs_info->dirty_cowonly_roots.next; 1097 next = fs_info->dirty_cowonly_roots.next;
1108 list_del_init(next); 1098 list_del_init(next);
@@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1115 ret = update_cowonly_root(trans, root); 1105 ret = update_cowonly_root(trans, root);
1116 if (ret) 1106 if (ret)
1117 return ret; 1107 return ret;
1108 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1109 if (ret)
1110 return ret;
1118 } 1111 }
1119 1112
1113 while (!list_empty(dirty_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret)
1116 return ret;
1117 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1118 if (ret)
1119 return ret;
1120 }
1121
1122 if (!list_empty(&fs_info->dirty_cowonly_roots))
1123 goto again;
1124
1120 list_add_tail(&fs_info->extent_root->dirty_list, 1125 list_add_tail(&fs_info->extent_root->dirty_list,
1121 &trans->transaction->switch_commits); 1126 &trans->transaction->switch_commits);
1122 btrfs_after_dev_replace_commit(fs_info); 1127 btrfs_after_dev_replace_commit(fs_info);
@@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1814 1819
1815 wait_for_commit(root, cur_trans); 1820 wait_for_commit(root, cur_trans);
1816 1821
1822 if (unlikely(cur_trans->aborted))
1823 ret = cur_trans->aborted;
1824
1817 btrfs_put_transaction(cur_trans); 1825 btrfs_put_transaction(cur_trans);
1818 1826
1819 return ret; 1827 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
1013 1013
1014 while (cur_offset < item_size) { 1014 while (cur_offset < item_size) {
1015 extref = (struct btrfs_inode_extref *)base + cur_offset; 1015 extref = (struct btrfs_inode_extref *)(base + cur_offset);
1016 1016
1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
1018 1018
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
111 name, name_len, -1); 111 name, name_len, -1);
112 if (!di && (flags & XATTR_REPLACE)) 112 if (!di && (flags & XATTR_REPLACE))
113 ret = -ENODATA; 113 ret = -ENODATA;
114 else if (IS_ERR(di))
115 ret = PTR_ERR(di);
114 else if (di) 116 else if (di)
115 ret = btrfs_delete_one_dir_name(trans, root, path, di); 117 ret = btrfs_delete_one_dir_name(trans, root, path, di);
116 goto out; 118 goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
127 ASSERT(mutex_is_locked(&inode->i_mutex)); 129 ASSERT(mutex_is_locked(&inode->i_mutex));
128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), 130 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
129 name, name_len, 0); 131 name, name_len, 0);
130 if (!di) { 132 if (!di)
131 ret = -ENODATA; 133 ret = -ENODATA;
134 else if (IS_ERR(di))
135 ret = PTR_ERR(di);
136 if (ret)
132 goto out; 137 goto out;
133 }
134 btrfs_release_path(path); 138 btrfs_release_path(path);
135 di = NULL; 139 di = NULL;
136 } 140 }
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * fs/cifs/cifsencrypt.c 2 * fs/cifs/cifsencrypt.c
3 * 3 *
4 * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP
5 * for more detailed information
6 *
4 * Copyright (C) International Business Machines Corp., 2005,2013 7 * Copyright (C) International Business Machines Corp., 2005,2013
5 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 9 *
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
515 __func__); 518 __func__);
516 return rc; 519 return rc;
517 } 520 }
518 } else if (ses->serverName) { 521 } else {
522 /* We use ses->serverName if no domain name available */
519 len = strlen(ses->serverName); 523 len = strlen(ses->serverName);
520 524
521 server = kmalloc(2 + (len * 2), GFP_KERNEL); 525 server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..480cf9c81d50 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1599,6 +1599,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1599 pr_warn("CIFS: username too long\n"); 1599 pr_warn("CIFS: username too long\n");
1600 goto cifs_parse_mount_err; 1600 goto cifs_parse_mount_err;
1601 } 1601 }
1602
1603 kfree(vol->username);
1602 vol->username = kstrdup(string, GFP_KERNEL); 1604 vol->username = kstrdup(string, GFP_KERNEL);
1603 if (!vol->username) 1605 if (!vol->username)
1604 goto cifs_parse_mount_err; 1606 goto cifs_parse_mount_err;
@@ -1700,6 +1702,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1700 goto cifs_parse_mount_err; 1702 goto cifs_parse_mount_err;
1701 } 1703 }
1702 1704
1705 kfree(vol->domainname);
1703 vol->domainname = kstrdup(string, GFP_KERNEL); 1706 vol->domainname = kstrdup(string, GFP_KERNEL);
1704 if (!vol->domainname) { 1707 if (!vol->domainname) {
1705 pr_warn("CIFS: no memory for domainname\n"); 1708 pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1731 } 1734 }
1732 1735
1733 if (strncasecmp(string, "default", 7) != 0) { 1736 if (strncasecmp(string, "default", 7) != 0) {
1737 kfree(vol->iocharset);
1734 vol->iocharset = kstrdup(string, 1738 vol->iocharset = kstrdup(string,
1735 GFP_KERNEL); 1739 GFP_KERNEL);
1736 if (!vol->iocharset) { 1740 if (!vol->iocharset) {
@@ -2913,8 +2917,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2913 * calling name ends in null (byte 16) from old smb 2917 * calling name ends in null (byte 16) from old smb
2914 * convention. 2918 * convention.
2915 */ 2919 */
2916 if (server->workstation_RFC1001_name && 2920 if (server->workstation_RFC1001_name[0] != 0)
2917 server->workstation_RFC1001_name[0] != 0)
2918 rfc1002mangle(ses_init_buf->trailer. 2921 rfc1002mangle(ses_init_buf->trailer.
2919 session_req.calling_name, 2922 session_req.calling_name,
2920 server->workstation_RFC1001_name, 2923 server->workstation_RFC1001_name,
@@ -3692,6 +3695,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
3692#endif /* CIFS_WEAK_PW_HASH */ 3695#endif /* CIFS_WEAK_PW_HASH */
3693 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, 3696 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3694 bcc_ptr, nls_codepage); 3697 bcc_ptr, nls_codepage);
3698 if (rc) {
3699 cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
3700 __func__, rc);
3701 cifs_buf_release(smb_buffer);
3702 return rc;
3703 }
3695 3704
3696 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3705 bcc_ptr += CIFS_AUTH_RESP_SIZE;
3697 if (ses->capabilities & CAP_UNICODE) { 3706 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..ca30c391a894 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1823,6 +1823,7 @@ refind_writable:
1823 cifsFileInfo_put(inv_file); 1823 cifsFileInfo_put(inv_file);
1824 spin_lock(&cifs_file_list_lock); 1824 spin_lock(&cifs_file_list_lock);
1825 ++refind; 1825 ++refind;
1826 inv_file = NULL;
1826 goto refind_writable; 1827 goto refind_writable;
1827 } 1828 }
1828 } 1829 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..3e126d7bb2ea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
771 cifs_buf_release(srchinf->ntwrk_buf_start); 771 cifs_buf_release(srchinf->ntwrk_buf_start);
772 } 772 }
773 kfree(srchinf); 773 kfree(srchinf);
774 if (rc)
775 goto cgii_exit;
774 } else 776 } else
775 goto cgii_exit; 777 goto cgii_exit;
776 778
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 689f035915cf..22dfdf17d065 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
322 322
323 /* return pointer to beginning of data area, ie offset from SMB start */ 323 /* return pointer to beginning of data area, ie offset from SMB start */
324 if ((*off != 0) && (*len != 0)) 324 if ((*off != 0) && (*len != 0))
325 return hdr->ProtocolId + *off; 325 return (char *)(&hdr->ProtocolId[0]) + *off;
326 else 326 else
327 return NULL; 327 return NULL;
328} 328}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96b5d40a2ece..eab05e1aa587 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
684 684
685 /* No need to change MaxChunks since already set to 1 */ 685 /* No need to change MaxChunks since already set to 1 */
686 chunk_sizes_updated = true; 686 chunk_sizes_updated = true;
687 } 687 } else
688 goto cchunk_out;
688 } 689 }
689 690
690cchunk_out: 691cchunk_out:
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3417340bf89e..65cd7a84c8bc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1218 struct smb2_ioctl_req *req; 1218 struct smb2_ioctl_req *req;
1219 struct smb2_ioctl_rsp *rsp; 1219 struct smb2_ioctl_rsp *rsp;
1220 struct TCP_Server_Info *server; 1220 struct TCP_Server_Info *server;
1221 struct cifs_ses *ses = tcon->ses; 1221 struct cifs_ses *ses;
1222 struct kvec iov[2]; 1222 struct kvec iov[2];
1223 int resp_buftype; 1223 int resp_buftype;
1224 int num_iovecs; 1224 int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1233 if (plen) 1233 if (plen)
1234 *plen = 0; 1234 *plen = 0;
1235 1235
1236 if (tcon)
1237 ses = tcon->ses;
1238 else
1239 return -EIO;
1240
1236 if (ses && (ses->server)) 1241 if (ses && (ses->server))
1237 server = ses->server; 1242 server = ses->server;
1238 else 1243 else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1296 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; 1301 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
1297 1302
1298 if ((rc != 0) && (rc != -EINVAL)) { 1303 if ((rc != 0) && (rc != -EINVAL)) {
1299 if (tcon) 1304 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1300 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1301 goto ioctl_exit; 1305 goto ioctl_exit;
1302 } else if (rc == -EINVAL) { 1306 } else if (rc == -EINVAL) {
1303 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && 1307 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
1304 (opcode != FSCTL_SRV_COPYCHUNK)) { 1308 (opcode != FSCTL_SRV_COPYCHUNK)) {
1305 if (tcon) 1309 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1306 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1307 goto ioctl_exit; 1310 goto ioctl_exit;
1308 } 1311 }
1309 } 1312 }
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1629 1632
1630 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); 1633 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
1631 1634
1632 if ((rc != 0) && tcon) 1635 if (rc != 0)
1633 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); 1636 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
1634 1637
1635 free_rsp_buf(resp_buftype, iov[0].iov_base); 1638 free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
2114 struct kvec iov[2]; 2117 struct kvec iov[2];
2115 int rc = 0; 2118 int rc = 0;
2116 int len; 2119 int len;
2117 int resp_buftype; 2120 int resp_buftype = CIFS_NO_BUFFER;
2118 unsigned char *bufptr; 2121 unsigned char *bufptr;
2119 struct TCP_Server_Info *server; 2122 struct TCP_Server_Info *server;
2120 struct cifs_ses *ses = tcon->ses; 2123 struct cifs_ses *ses = tcon->ses;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882b306f..5ba029e627cc 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key)
124} 124}
125 125
126#define ECRYPTFS_MAX_KEYSET_SIZE 1024 126#define ECRYPTFS_MAX_KEYSET_SIZE 1024
127#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 127#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31
128#define ECRYPTFS_MAX_NUM_ENC_KEYS 64 128#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
129#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ 129#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
130#define ECRYPTFS_SALT_BYTES 2 130#define ECRYPTFS_SALT_BYTES 2
@@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat {
237 struct crypto_ablkcipher *tfm; 237 struct crypto_ablkcipher *tfm;
238 struct crypto_hash *hash_tfm; /* Crypto context for generating 238 struct crypto_hash *hash_tfm; /* Crypto context for generating
239 * the initialization vectors */ 239 * the initialization vectors */
240 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; 240 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
241 unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; 241 unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
242 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; 242 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
243 struct list_head keysig_list; 243 struct list_head keysig_list;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b07731e68c0b..fd39bad6f1bd 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -303,9 +303,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
303 struct file *lower_file = ecryptfs_file_to_lower(file); 303 struct file *lower_file = ecryptfs_file_to_lower(file);
304 long rc = -ENOTTY; 304 long rc = -ENOTTY;
305 305
306 if (lower_file->f_op->unlocked_ioctl) 306 if (!lower_file->f_op->unlocked_ioctl)
307 return rc;
308
309 switch (cmd) {
310 case FITRIM:
311 case FS_IOC_GETFLAGS:
312 case FS_IOC_SETFLAGS:
313 case FS_IOC_GETVERSION:
314 case FS_IOC_SETVERSION:
307 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); 315 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
308 return rc; 316 fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
317
318 return rc;
319 default:
320 return rc;
321 }
309} 322}
310 323
311#ifdef CONFIG_COMPAT 324#ifdef CONFIG_COMPAT
@@ -315,9 +328,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
315 struct file *lower_file = ecryptfs_file_to_lower(file); 328 struct file *lower_file = ecryptfs_file_to_lower(file);
316 long rc = -ENOIOCTLCMD; 329 long rc = -ENOIOCTLCMD;
317 330
318 if (lower_file->f_op->compat_ioctl) 331 if (!lower_file->f_op->compat_ioctl)
332 return rc;
333
334 switch (cmd) {
335 case FITRIM:
336 case FS_IOC32_GETFLAGS:
337 case FS_IOC32_SETFLAGS:
338 case FS_IOC32_GETVERSION:
339 case FS_IOC32_SETVERSION:
319 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); 340 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
320 return rc; 341 fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
342
343 return rc;
344 default:
345 return rc;
346 }
321} 347}
322#endif 348#endif
323 349
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 917bd5c9776a..6bd67e2011f0 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
891 struct blkcipher_desc desc; 891 struct blkcipher_desc desc;
892 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; 892 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
893 char iv[ECRYPTFS_MAX_IV_BYTES]; 893 char iv[ECRYPTFS_MAX_IV_BYTES];
894 char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; 894 char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
895}; 895};
896 896
897/** 897/**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1895d60f4122..c095d3264259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
407 if (!cipher_name_set) { 407 if (!cipher_name_set) {
408 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 408 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
409 409
410 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); 410 BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE);
411 strcpy(mount_crypt_stat->global_default_cipher_name, 411 strcpy(mount_crypt_stat->global_default_cipher_name,
412 ECRYPTFS_DEFAULT_CIPHER); 412 ECRYPTFS_DEFAULT_CIPHER);
413 } 413 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
53 struct completion *done; /* set if the caller waits */ 53 struct completion *done; /* set if the caller waits */
54}; 54};
55 55
56/*
57 * If an inode is constantly having its pages dirtied, but then the
58 * updates stop dirtytime_expire_interval seconds in the past, it's
59 * possible for the worst case time between when an inode has its
60 * timestamps updated and when they finally get written out to be two
61 * dirtytime_expire_intervals. We set the default to 12 hours (in
62 * seconds), which means most of the time inodes will have their
63 * timestamps written to disk after 12 hours, but in the worst case a
64 * few inodes might not their timestamps updated for 24 hours.
65 */
66unsigned int dirtytime_expire_interval = 12 * 60 * 60;
67
56/** 68/**
57 * writeback_in_progress - determine whether there is writeback in progress 69 * writeback_in_progress - determine whether there is writeback in progress
58 * @bdi: the device's backing_dev_info structure. 70 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
275 287
276 if ((flags & EXPIRE_DIRTY_ATIME) == 0) 288 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
277 older_than_this = work->older_than_this; 289 older_than_this = work->older_than_this;
278 else if ((work->reason == WB_REASON_SYNC) == 0) { 290 else if (!work->for_sync) {
279 expire_time = jiffies - (HZ * 86400); 291 expire_time = jiffies - (dirtytime_expire_interval * HZ);
280 older_than_this = &expire_time; 292 older_than_this = &expire_time;
281 } 293 }
282 while (!list_empty(delaying_queue)) { 294 while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
458 */ 470 */
459 redirty_tail(inode, wb); 471 redirty_tail(inode, wb);
460 } else if (inode->i_state & I_DIRTY_TIME) { 472 } else if (inode->i_state & I_DIRTY_TIME) {
473 inode->dirtied_when = jiffies;
461 list_move(&inode->i_wb_list, &wb->b_dirty_time); 474 list_move(&inode->i_wb_list, &wb->b_dirty_time);
462 } else { 475 } else {
463 /* The inode is clean. Remove from writeback lists. */ 476 /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
505 spin_lock(&inode->i_lock); 518 spin_lock(&inode->i_lock);
506 519
507 dirty = inode->i_state & I_DIRTY; 520 dirty = inode->i_state & I_DIRTY;
508 if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && 521 if (inode->i_state & I_DIRTY_TIME) {
509 (inode->i_state & I_DIRTY_TIME)) || 522 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
510 (inode->i_state & I_DIRTY_TIME_EXPIRED)) { 523 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
511 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; 524 unlikely(time_after(jiffies,
512 trace_writeback_lazytime(inode); 525 (inode->dirtied_time_when +
513 } 526 dirtytime_expire_interval * HZ)))) {
527 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
528 trace_writeback_lazytime(inode);
529 }
530 } else
531 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
514 inode->i_state &= ~dirty; 532 inode->i_state &= ~dirty;
515 533
516 /* 534 /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1131 rcu_read_unlock(); 1149 rcu_read_unlock();
1132} 1150}
1133 1151
1152/*
1153 * Wake up bdi's periodically to make sure dirtytime inodes gets
1154 * written back periodically. We deliberately do *not* check the
1155 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
1156 * kernel to be constantly waking up once there are any dirtytime
1157 * inodes on the system. So instead we define a separate delayed work
1158 * function which gets called much more rarely. (By default, only
1159 * once every 12 hours.)
1160 *
1161 * If there is any other write activity going on in the file system,
1162 * this function won't be necessary. But if the only thing that has
1163 * happened on the file system is a dirtytime inode caused by an atime
1164 * update, we need this infrastructure below to make sure that inode
1165 * eventually gets pushed out to disk.
1166 */
1167static void wakeup_dirtytime_writeback(struct work_struct *w);
1168static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1169
1170static void wakeup_dirtytime_writeback(struct work_struct *w)
1171{
1172 struct backing_dev_info *bdi;
1173
1174 rcu_read_lock();
1175 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1176 if (list_empty(&bdi->wb.b_dirty_time))
1177 continue;
1178 bdi_wakeup_thread(bdi);
1179 }
1180 rcu_read_unlock();
1181 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1182}
1183
1184static int __init start_dirtytime_writeback(void)
1185{
1186 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1187 return 0;
1188}
1189__initcall(start_dirtytime_writeback);
1190
1191int dirtytime_interval_handler(struct ctl_table *table, int write,
1192 void __user *buffer, size_t *lenp, loff_t *ppos)
1193{
1194 int ret;
1195
1196 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1197 if (ret == 0 && write)
1198 mod_delayed_work(system_wq, &dirtytime_work, 0);
1199 return ret;
1200}
1201
1134static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1202static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1135{ 1203{
1136 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1204 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1269 } 1337 }
1270 1338
1271 inode->dirtied_when = jiffies; 1339 inode->dirtied_when = jiffies;
1272 list_move(&inode->i_wb_list, dirtytime ? 1340 if (dirtytime)
1273 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); 1341 inode->dirtied_time_when = jiffies;
1342 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
1343 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1344 else
1345 list_move(&inode->i_wb_list,
1346 &bdi->wb.b_dirty_time);
1274 spin_unlock(&bdi->wb.list_lock); 1347 spin_unlock(&bdi->wb.list_lock);
1275 trace_writeback_dirty_inode_enqueue(inode); 1348 trace_writeback_dirty_inode_enqueue(inode);
1276 1349
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..39706c57ad3c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -890,8 +890,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
890 890
891 newpage = buf->page; 891 newpage = buf->page;
892 892
893 if (WARN_ON(!PageUptodate(newpage))) 893 if (!PageUptodate(newpage))
894 return -EIO; 894 SetPageUptodate(newpage);
895 895
896 ClearPageMappedToDisk(newpage); 896 ClearPageMappedToDisk(newpage);
897 897
@@ -1353,6 +1353,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1353 return err; 1353 return err;
1354} 1354}
1355 1355
1356static int fuse_dev_open(struct inode *inode, struct file *file)
1357{
1358 /*
1359 * The fuse device's file's private_data is used to hold
1360 * the fuse_conn(ection) when it is mounted, and is used to
1361 * keep track of whether the file has been mounted already.
1362 */
1363 file->private_data = NULL;
1364 return 0;
1365}
1366
1356static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1367static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1357 unsigned long nr_segs, loff_t pos) 1368 unsigned long nr_segs, loff_t pos)
1358{ 1369{
@@ -1797,6 +1808,9 @@ copy_finish:
1797static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1808static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1798 unsigned int size, struct fuse_copy_state *cs) 1809 unsigned int size, struct fuse_copy_state *cs)
1799{ 1810{
1811 /* Don't try to move pages (yet) */
1812 cs->move_pages = 0;
1813
1800 switch (code) { 1814 switch (code) {
1801 case FUSE_NOTIFY_POLL: 1815 case FUSE_NOTIFY_POLL:
1802 return fuse_notify_poll(fc, size, cs); 1816 return fuse_notify_poll(fc, size, cs);
@@ -2217,6 +2231,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
2217 2231
2218const struct file_operations fuse_dev_operations = { 2232const struct file_operations fuse_dev_operations = {
2219 .owner = THIS_MODULE, 2233 .owner = THIS_MODULE,
2234 .open = fuse_dev_open,
2220 .llseek = no_llseek, 2235 .llseek = no_llseek,
2221 .read = do_sync_read, 2236 .read = do_sync_read,
2222 .aio_read = fuse_dev_read, 2237 .aio_read = fuse_dev_read,
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
131 hfs_bnode_write(node, entry, data_off + key_len, entry_len); 131 hfs_bnode_write(node, entry, data_off + key_len, entry_len);
132 hfs_bnode_dump(node); 132 hfs_bnode_dump(node);
133 133
134 if (new_node) { 134 /*
135 /* update parent key if we inserted a key 135 * update parent key if we inserted a key
136 * at the start of the first node 136 * at the start of the node and it is not the new node
137 */ 137 */
138 if (!rec && new_node != node) 138 if (!rec && new_node != node) {
139 hfs_brec_update_parent(fd); 139 hfs_bnode_read_key(node, fd->search_key, data_off + size);
140 hfs_brec_update_parent(fd);
141 }
140 142
143 if (new_node) {
141 hfs_bnode_put(fd->bnode); 144 hfs_bnode_put(fd->bnode);
142 if (!new_node->parent) { 145 if (!new_node->parent) {
143 hfs_btree_inc_height(tree); 146 hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
168 goto again; 171 goto again;
169 } 172 }
170 173
171 if (!rec)
172 hfs_brec_update_parent(fd);
173
174 return 0; 174 return 0;
175} 175}
176 176
@@ -370,6 +370,8 @@ again:
370 if (IS_ERR(parent)) 370 if (IS_ERR(parent))
371 return PTR_ERR(parent); 371 return PTR_ERR(parent);
372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key); 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
373 if (fd->record < 0)
374 return -ENOENT;
373 hfs_bnode_dump(parent); 375 hfs_bnode_dump(parent);
374 rec = fd->record; 376 rec = fd->record;
375 377
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
207 goto out_free; 207 goto out_free;
208 } 208 }
209 209
210 of->event = atomic_read(&of->kn->attr.open->event);
210 ops = kernfs_ops(of->kn); 211 ops = kernfs_ops(of->kn);
211 if (ops->read) 212 if (ops->read)
212 len = ops->read(of, buf, len, *ppos); 213 len = ops->read(of, buf, len, *ppos);
diff --git a/fs/locks.c b/fs/locks.c
index 365c82e1b3a9..40bc384728c0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1388int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) 1388int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1389{ 1389{
1390 int error = 0; 1390 int error = 0;
1391 struct file_lock *new_fl;
1392 struct file_lock_context *ctx = inode->i_flctx; 1391 struct file_lock_context *ctx = inode->i_flctx;
1393 struct file_lock *fl; 1392 struct file_lock *new_fl, *fl, *tmp;
1394 unsigned long break_time; 1393 unsigned long break_time;
1395 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1394 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1396 LIST_HEAD(dispose); 1395 LIST_HEAD(dispose);
@@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1420 break_time++; /* so that 0 means no break time */ 1419 break_time++; /* so that 0 means no break time */
1421 } 1420 }
1422 1421
1423 list_for_each_entry(fl, &ctx->flc_lease, fl_list) { 1422 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1424 if (!leases_conflict(fl, new_fl)) 1423 if (!leases_conflict(fl, new_fl))
1425 continue; 1424 continue;
1426 if (want_write) { 1425 if (want_write) {
@@ -1665,7 +1664,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1665 } 1664 }
1666 1665
1667 if (my_fl != NULL) { 1666 if (my_fl != NULL) {
1668 error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); 1667 lease = my_fl;
1668 error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1669 if (error) 1669 if (error)
1670 goto out; 1670 goto out;
1671 goto out_setup; 1671 goto out_setup;
@@ -1727,7 +1727,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
1727 break; 1727 break;
1728 } 1728 }
1729 } 1729 }
1730 trace_generic_delete_lease(inode, fl); 1730 trace_generic_delete_lease(inode, victim);
1731 if (victim) 1731 if (victim)
1732 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); 1732 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1733 spin_unlock(&ctx->flc_lock); 1733 spin_unlock(&ctx->flc_lock);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f9f4845db989..19874151e95c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
433 433
434static bool nfs_client_init_is_complete(const struct nfs_client *clp) 434static bool nfs_client_init_is_complete(const struct nfs_client *clp)
435{ 435{
436 return clp->cl_cons_state != NFS_CS_INITING; 436 return clp->cl_cons_state <= NFS_CS_READY;
437} 437}
438 438
439int nfs_wait_client_init_complete(const struct nfs_client *clp) 439int nfs_wait_client_init_complete(const struct nfs_client *clp)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index a1f0685b42ff..a6ad68865880 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
181 clear_bit(NFS_DELEGATION_NEED_RECLAIM, 181 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
182 &delegation->flags); 182 &delegation->flags);
183 spin_unlock(&delegation->lock); 183 spin_unlock(&delegation->lock);
184 put_rpccred(oldcred);
185 rcu_read_unlock(); 184 rcu_read_unlock();
185 put_rpccred(oldcred);
186 trace_nfs4_reclaim_delegation(inode, res->delegation_type); 186 trace_nfs4_reclaim_delegation(inode, res->delegation_type);
187 } else { 187 } else {
188 /* We appear to have raced with a delegation return. */ 188 /* We appear to have raced with a delegation return. */
@@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
370 delegation = NULL; 370 delegation = NULL;
371 goto out; 371 goto out;
372 } 372 }
373 freeme = nfs_detach_delegation_locked(nfsi, 373 if (test_and_set_bit(NFS_DELEGATION_RETURNING,
374 &old_delegation->flags))
375 goto out;
376 freeme = nfs_detach_delegation_locked(nfsi,
374 old_delegation, clp); 377 old_delegation, clp);
375 if (freeme == NULL) 378 if (freeme == NULL)
376 goto out; 379 goto out;
@@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
433{ 436{
434 bool ret = false; 437 bool ret = false;
435 438
439 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
440 goto out;
436 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) 441 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
437 ret = true; 442 ret = true;
438 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { 443 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
444 ret = true; 449 ret = true;
445 spin_unlock(&delegation->lock); 450 spin_unlock(&delegation->lock);
446 } 451 }
452out:
447 return ret; 453 return ret;
448} 454}
449 455
@@ -471,14 +477,20 @@ restart:
471 super_list) { 477 super_list) {
472 if (!nfs_delegation_need_return(delegation)) 478 if (!nfs_delegation_need_return(delegation))
473 continue; 479 continue;
474 inode = nfs_delegation_grab_inode(delegation); 480 if (!nfs_sb_active(server->super))
475 if (inode == NULL)
476 continue; 481 continue;
482 inode = nfs_delegation_grab_inode(delegation);
483 if (inode == NULL) {
484 rcu_read_unlock();
485 nfs_sb_deactive(server->super);
486 goto restart;
487 }
477 delegation = nfs_start_delegation_return_locked(NFS_I(inode)); 488 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
478 rcu_read_unlock(); 489 rcu_read_unlock();
479 490
480 err = nfs_end_delegation_return(inode, delegation, 0); 491 err = nfs_end_delegation_return(inode, delegation, 0);
481 iput(inode); 492 iput(inode);
493 nfs_sb_deactive(server->super);
482 if (!err) 494 if (!err)
483 goto restart; 495 goto restart;
484 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 496 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
@@ -809,19 +821,30 @@ restart:
809 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 821 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
810 list_for_each_entry_rcu(delegation, &server->delegations, 822 list_for_each_entry_rcu(delegation, &server->delegations,
811 super_list) { 823 super_list) {
824 if (test_bit(NFS_DELEGATION_RETURNING,
825 &delegation->flags))
826 continue;
812 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, 827 if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
813 &delegation->flags) == 0) 828 &delegation->flags) == 0)
814 continue; 829 continue;
815 inode = nfs_delegation_grab_inode(delegation); 830 if (!nfs_sb_active(server->super))
816 if (inode == NULL)
817 continue; 831 continue;
818 delegation = nfs_detach_delegation(NFS_I(inode), 832 inode = nfs_delegation_grab_inode(delegation);
819 delegation, server); 833 if (inode == NULL) {
834 rcu_read_unlock();
835 nfs_sb_deactive(server->super);
836 goto restart;
837 }
838 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
820 rcu_read_unlock(); 839 rcu_read_unlock();
821 840 if (delegation != NULL) {
822 if (delegation != NULL) 841 delegation = nfs_detach_delegation(NFS_I(inode),
823 nfs_free_delegation(delegation); 842 delegation, server);
843 if (delegation != NULL)
844 nfs_free_delegation(delegation);
845 }
824 iput(inode); 846 iput(inode);
847 nfs_sb_deactive(server->super);
825 goto restart; 848 goto restart;
826 } 849 }
827 } 850 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b0c55cb2a2e..c19e16f0b2d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
408 return 0; 408 return 0;
409} 409}
410 410
411/* Match file and dirent using either filehandle or fileid
412 * Note: caller is responsible for checking the fsid
413 */
411static 414static
412int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) 415int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
413{ 416{
417 struct nfs_inode *nfsi;
418
414 if (dentry->d_inode == NULL) 419 if (dentry->d_inode == NULL)
415 goto different; 420 goto different;
416 if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) 421
417 goto different; 422 nfsi = NFS_I(dentry->d_inode);
418 return 1; 423 if (entry->fattr->fileid == nfsi->fileid)
424 return 1;
425 if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
426 return 1;
419different: 427different:
420 return 0; 428 return 0;
421} 429}
@@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
469 struct inode *inode; 477 struct inode *inode;
470 int status; 478 int status;
471 479
480 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
481 return;
482 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
483 return;
472 if (filename.name[0] == '.') { 484 if (filename.name[0] == '.') {
473 if (filename.len == 1) 485 if (filename.len == 1)
474 return; 486 return;
@@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479 491
480 dentry = d_lookup(parent, &filename); 492 dentry = d_lookup(parent, &filename);
481 if (dentry != NULL) { 493 if (dentry != NULL) {
494 /* Is there a mountpoint here? If so, just exit */
495 if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
496 &entry->fattr->fsid))
497 goto out;
482 if (nfs_same_file(dentry, entry)) { 498 if (nfs_same_file(dentry, entry)) {
483 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 499 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
484 status = nfs_refresh_inode(dentry->d_inode, entry->fattr); 500 status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc781fa..e679d24c39d3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
178 iocb->ki_filp, 178 iocb->ki_filp,
179 iov_iter_count(to), (unsigned long) iocb->ki_pos); 179 iov_iter_count(to), (unsigned long) iocb->ki_pos);
180 180
181 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 181 result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
182 if (!result) { 182 if (!result) {
183 result = generic_file_read_iter(iocb, to); 183 result = generic_file_read_iter(iocb, to);
184 if (result > 0) 184 if (result > 0)
@@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
199 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", 199 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
200 filp, (unsigned long) count, (unsigned long long) *ppos); 200 filp, (unsigned long) count, (unsigned long long) *ppos);
201 201
202 res = nfs_revalidate_mapping(inode, filp->f_mapping); 202 res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
203 if (!res) { 203 if (!res) {
204 res = generic_file_splice_read(filp, ppos, pipe, count, flags); 204 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
205 if (res > 0) 205 if (res > 0)
@@ -372,6 +372,10 @@ start:
372 nfs_wait_bit_killable, TASK_KILLABLE); 372 nfs_wait_bit_killable, TASK_KILLABLE);
373 if (ret) 373 if (ret)
374 return ret; 374 return ret;
375 /*
376 * Wait for O_DIRECT to complete
377 */
378 nfs_inode_dio_wait(mapping->host);
375 379
376 page = grab_cache_page_write_begin(mapping, index, flags); 380 page = grab_cache_page_write_begin(mapping, index, flags);
377 if (!page) 381 if (!page)
@@ -619,6 +623,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
619 /* make sure the cache has finished storing the page */ 623 /* make sure the cache has finished storing the page */
620 nfs_fscache_wait_on_page_write(NFS_I(inode), page); 624 nfs_fscache_wait_on_page_write(NFS_I(inode), page);
621 625
626 wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
627 nfs_wait_bit_killable, TASK_KILLABLE);
628
622 lock_page(page); 629 lock_page(page);
623 mapping = page_file_mapping(page); 630 mapping = page_file_mapping(page);
624 if (mapping != inode->i_mapping) 631 if (mapping != inode->i_mapping)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..d42dff6d5e98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
556 * This is a copy of the common vmtruncate, but with the locking 556 * This is a copy of the common vmtruncate, but with the locking
557 * corrected to take into account the fact that NFS requires 557 * corrected to take into account the fact that NFS requires
558 * inode->i_size to be updated under the inode->i_lock. 558 * inode->i_size to be updated under the inode->i_lock.
559 * Note: must be called with inode->i_lock held!
559 */ 560 */
560static int nfs_vmtruncate(struct inode * inode, loff_t offset) 561static int nfs_vmtruncate(struct inode * inode, loff_t offset)
561{ 562{
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
565 if (err) 566 if (err)
566 goto out; 567 goto out;
567 568
568 spin_lock(&inode->i_lock);
569 i_size_write(inode, offset); 569 i_size_write(inode, offset);
570 /* Optimisation */ 570 /* Optimisation */
571 if (offset == 0) 571 if (offset == 0)
572 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; 572 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
573 spin_unlock(&inode->i_lock);
574 573
574 spin_unlock(&inode->i_lock);
575 truncate_pagecache(inode, offset); 575 truncate_pagecache(inode, offset);
576 spin_lock(&inode->i_lock);
576out: 577out:
577 return err; 578 return err;
578} 579}
@@ -585,10 +586,15 @@ out:
585 * Note: we do this in the *proc.c in order to ensure that 586 * Note: we do this in the *proc.c in order to ensure that
586 * it works for things like exclusive creates too. 587 * it works for things like exclusive creates too.
587 */ 588 */
588void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) 589void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
590 struct nfs_fattr *fattr)
589{ 591{
592 /* Barrier: bump the attribute generation count. */
593 nfs_fattr_set_barrier(fattr);
594
595 spin_lock(&inode->i_lock);
596 NFS_I(inode)->attr_gencount = fattr->gencount;
590 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { 597 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
591 spin_lock(&inode->i_lock);
592 if ((attr->ia_valid & ATTR_MODE) != 0) { 598 if ((attr->ia_valid & ATTR_MODE) != 0) {
593 int mode = attr->ia_mode & S_IALLUGO; 599 int mode = attr->ia_mode & S_IALLUGO;
594 mode |= inode->i_mode & ~S_IALLUGO; 600 mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
600 inode->i_gid = attr->ia_gid; 606 inode->i_gid = attr->ia_gid;
601 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS 607 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
602 | NFS_INO_INVALID_ACL); 608 | NFS_INO_INVALID_ACL);
603 spin_unlock(&inode->i_lock);
604 } 609 }
605 if ((attr->ia_valid & ATTR_SIZE) != 0) { 610 if ((attr->ia_valid & ATTR_SIZE) != 0) {
606 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); 611 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
607 nfs_vmtruncate(inode, attr->ia_size); 612 nfs_vmtruncate(inode, attr->ia_size);
608 } 613 }
614 nfs_update_inode(inode, fattr);
615 spin_unlock(&inode->i_lock);
609} 616}
610EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); 617EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
611 618
@@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
1028 1035
1029 if (mapping->nrpages != 0) { 1036 if (mapping->nrpages != 0) {
1030 if (S_ISREG(inode->i_mode)) { 1037 if (S_ISREG(inode->i_mode)) {
1038 unmap_mapping_range(mapping, 0, 0, 0);
1031 ret = nfs_sync_mapping(mapping); 1039 ret = nfs_sync_mapping(mapping);
1032 if (ret < 0) 1040 if (ret < 0)
1033 return ret; 1041 return ret;
@@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
1060} 1068}
1061 1069
1062/** 1070/**
1063 * nfs_revalidate_mapping - Revalidate the pagecache 1071 * __nfs_revalidate_mapping - Revalidate the pagecache
1064 * @inode - pointer to host inode 1072 * @inode - pointer to host inode
1065 * @mapping - pointer to mapping 1073 * @mapping - pointer to mapping
1074 * @may_lock - take inode->i_mutex?
1066 */ 1075 */
1067int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) 1076static int __nfs_revalidate_mapping(struct inode *inode,
1077 struct address_space *mapping,
1078 bool may_lock)
1068{ 1079{
1069 struct nfs_inode *nfsi = NFS_I(inode); 1080 struct nfs_inode *nfsi = NFS_I(inode);
1070 unsigned long *bitlock = &nfsi->flags; 1081 unsigned long *bitlock = &nfsi->flags;
@@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1113 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; 1124 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
1114 spin_unlock(&inode->i_lock); 1125 spin_unlock(&inode->i_lock);
1115 trace_nfs_invalidate_mapping_enter(inode); 1126 trace_nfs_invalidate_mapping_enter(inode);
1116 ret = nfs_invalidate_mapping(inode, mapping); 1127 if (may_lock) {
1128 mutex_lock(&inode->i_mutex);
1129 ret = nfs_invalidate_mapping(inode, mapping);
1130 mutex_unlock(&inode->i_mutex);
1131 } else
1132 ret = nfs_invalidate_mapping(inode, mapping);
1117 trace_nfs_invalidate_mapping_exit(inode, ret); 1133 trace_nfs_invalidate_mapping_exit(inode, ret);
1118 1134
1119 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); 1135 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1123,6 +1139,29 @@ out:
1123 return ret; 1139 return ret;
1124} 1140}
1125 1141
1142/**
1143 * nfs_revalidate_mapping - Revalidate the pagecache
1144 * @inode - pointer to host inode
1145 * @mapping - pointer to mapping
1146 */
1147int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1148{
1149 return __nfs_revalidate_mapping(inode, mapping, false);
1150}
1151
1152/**
1153 * nfs_revalidate_mapping_protected - Revalidate the pagecache
1154 * @inode - pointer to host inode
1155 * @mapping - pointer to mapping
1156 *
1157 * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
1158 * while invalidating the mapping.
1159 */
1160int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
1161{
1162 return __nfs_revalidate_mapping(inode, mapping, true);
1163}
1164
1126static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1165static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1127{ 1166{
1128 struct nfs_inode *nfsi = NFS_I(inode); 1167 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat
1231 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; 1270 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
1232} 1271}
1233 1272
1234static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
1235{
1236 if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1237 return 0;
1238 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
1239}
1240
1241static atomic_long_t nfs_attr_generation_counter; 1273static atomic_long_t nfs_attr_generation_counter;
1242 1274
1243static unsigned long nfs_read_attr_generation_counter(void) 1275static unsigned long nfs_read_attr_generation_counter(void)
@@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void)
1249{ 1281{
1250 return atomic_long_inc_return(&nfs_attr_generation_counter); 1282 return atomic_long_inc_return(&nfs_attr_generation_counter);
1251} 1283}
1284EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter);
1252 1285
1253void nfs_fattr_init(struct nfs_fattr *fattr) 1286void nfs_fattr_init(struct nfs_fattr *fattr)
1254{ 1287{
@@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
1260} 1293}
1261EXPORT_SYMBOL_GPL(nfs_fattr_init); 1294EXPORT_SYMBOL_GPL(nfs_fattr_init);
1262 1295
1296/**
1297 * nfs_fattr_set_barrier
1298 * @fattr: attributes
1299 *
1300 * Used to set a barrier after an attribute was updated. This
1301 * barrier ensures that older attributes from RPC calls that may
1302 * have raced with our update cannot clobber these new values.
1303 * Note that you are still responsible for ensuring that other
1304 * operations which change the attribute on the server do not
1305 * collide.
1306 */
1307void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
1308{
1309 fattr->gencount = nfs_inc_attr_generation_counter();
1310}
1311
1263struct nfs_fattr *nfs_alloc_fattr(void) 1312struct nfs_fattr *nfs_alloc_fattr(void)
1264{ 1313{
1265 struct nfs_fattr *fattr; 1314 struct nfs_fattr *fattr;
@@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
1370 1419
1371 return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || 1420 return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
1372 nfs_ctime_need_update(inode, fattr) || 1421 nfs_ctime_need_update(inode, fattr) ||
1373 nfs_size_need_update(inode, fattr) ||
1374 ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); 1422 ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
1375} 1423}
1376 1424
@@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1460 int status; 1508 int status;
1461 1509
1462 spin_lock(&inode->i_lock); 1510 spin_lock(&inode->i_lock);
1511 nfs_fattr_set_barrier(fattr);
1463 status = nfs_post_op_update_inode_locked(inode, fattr); 1512 status = nfs_post_op_update_inode_locked(inode, fattr);
1464 spin_unlock(&inode->i_lock); 1513 spin_unlock(&inode->i_lock);
1465 1514
@@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1468EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); 1517EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
1469 1518
1470/** 1519/**
1471 * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache 1520 * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
1472 * @inode - pointer to inode 1521 * @inode - pointer to inode
1473 * @fattr - updated attributes 1522 * @fattr - updated attributes
1474 * 1523 *
@@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
1478 * 1527 *
1479 * This function is mainly designed to be used by the ->write_done() functions. 1528 * This function is mainly designed to be used by the ->write_done() functions.
1480 */ 1529 */
1481int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) 1530int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
1482{ 1531{
1483 int status; 1532 int status;
1484 1533
1485 spin_lock(&inode->i_lock);
1486 /* Don't do a WCC update if these attributes are already stale */ 1534 /* Don't do a WCC update if these attributes are already stale */
1487 if ((fattr->valid & NFS_ATTR_FATTR) == 0 || 1535 if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
1488 !nfs_inode_attrs_need_update(inode, fattr)) { 1536 !nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
1514 } 1562 }
1515out_noforce: 1563out_noforce:
1516 status = nfs_post_op_update_inode_locked(inode, fattr); 1564 status = nfs_post_op_update_inode_locked(inode, fattr);
1565 return status;
1566}
1567
1568/**
1569 * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
1570 * @inode - pointer to inode
1571 * @fattr - updated attributes
1572 *
1573 * After an operation that has changed the inode metadata, mark the
1574 * attribute cache as being invalid, then try to update it. Fake up
1575 * weak cache consistency data, if none exist.
1576 *
1577 * This function is mainly designed to be used by the ->write_done() functions.
1578 */
1579int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
1580{
1581 int status;
1582
1583 spin_lock(&inode->i_lock);
1584 nfs_fattr_set_barrier(fattr);
1585 status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1517 spin_unlock(&inode->i_lock); 1586 spin_unlock(&inode->i_lock);
1518 return status; 1587 return status;
1519} 1588}
@@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1715 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 1784 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1716 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1785 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1717 nfsi->attrtimeo_timestamp = now; 1786 nfsi->attrtimeo_timestamp = now;
1787 /* Set barrier to be more recent than all outstanding updates */
1718 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1788 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1719 } else { 1789 } else {
1720 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1790 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
@@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1722 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1792 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1723 nfsi->attrtimeo_timestamp = now; 1793 nfsi->attrtimeo_timestamp = now;
1724 } 1794 }
1795 /* Set the barrier to be more recent than this fattr */
1796 if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
1797 nfsi->attr_gencount = fattr->gencount;
1725 } 1798 }
1726 invalid &= ~NFS_INO_INVALID_ATTR; 1799 invalid &= ~NFS_INO_INVALID_ATTR;
1727 /* Don't invalidate the data if we were to blame */ 1800 /* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
459 struct nfs_commit_info *cinfo, 459 struct nfs_commit_info *cinfo,
460 u32 ds_commit_idx); 460 u32 ds_commit_idx);
461int nfs_write_need_commit(struct nfs_pgio_header *); 461int nfs_write_need_commit(struct nfs_pgio_header *);
462void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
462int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 463int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
463 int how, struct nfs_commit_info *cinfo); 464 int how, struct nfs_commit_info *cinfo);
464void nfs_retry_commit(struct list_head *page_list, 465void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
138 nfs_fattr_init(fattr); 138 nfs_fattr_init(fattr);
139 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 139 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
140 if (status == 0) 140 if (status == 0)
141 nfs_setattr_update_inode(inode, sattr); 141 nfs_setattr_update_inode(inode, sattr, fattr);
142 dprintk("NFS reply setattr: %d\n", status); 142 dprintk("NFS reply setattr: %d\n", status);
143 return status; 143 return status;
144} 144}
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
834 if (nfs3_async_handle_jukebox(task, inode)) 834 if (nfs3_async_handle_jukebox(task, inode))
835 return -EAGAIN; 835 return -EAGAIN;
836 if (task->tk_status >= 0) 836 if (task->tk_status >= 0)
837 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); 837 nfs_writeback_update_inode(hdr);
838 return 0; 838 return 0;
839} 839}
840 840
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 2a932fdc57cb..53852a4bd88b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1987 if (entry->fattr->valid & NFS_ATTR_FATTR_V3) 1987 if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
1988 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 1988 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
1989 1989
1990 if (entry->fattr->fileid != entry->ino) {
1991 entry->fattr->mounted_on_fileid = entry->ino;
1992 entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
1993 }
1994
1990 /* In fact, a post_op_fh3: */ 1995 /* In fact, a post_op_fh3: */
1991 p = xdr_inline_decode(xdr, 4); 1996 p = xdr_inline_decode(xdr, 4);
1992 if (unlikely(p == NULL)) 1997 if (unlikely(p == NULL))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8646af9b11d2..86d6214ea022 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new,
621 spin_lock(&nn->nfs_client_lock); 621 spin_lock(&nn->nfs_client_lock);
622 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 622 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
623 623
624 if (pos == new)
625 goto found;
626
624 if (pos->rpc_ops != new->rpc_ops) 627 if (pos->rpc_ops != new->rpc_ops)
625 continue; 628 continue;
626 629
@@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
639 prev = pos; 642 prev = pos;
640 643
641 status = nfs_wait_client_init_complete(pos); 644 status = nfs_wait_client_init_complete(pos);
642 if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
643 nfs4_schedule_lease_recovery(pos);
644 status = nfs4_wait_clnt_recover(pos);
645 }
646 spin_lock(&nn->nfs_client_lock); 645 spin_lock(&nn->nfs_client_lock);
647 if (status < 0) 646 if (status < 0)
648 break; 647 break;
@@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
668 */ 667 */
669 if (!nfs4_match_client_owner_id(pos, new)) 668 if (!nfs4_match_client_owner_id(pos, new))
670 continue; 669 continue;
671 670found:
672 atomic_inc(&pos->cl_count); 671 atomic_inc(&pos->cl_count);
673 *result = pos; 672 *result = pos;
674 status = 0; 673 status = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88180ac5ea0e..627f37c44456 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
901 if (!cinfo->atomic || cinfo->before != dir->i_version) 901 if (!cinfo->atomic || cinfo->before != dir->i_version)
902 nfs_force_lookup_revalidate(dir); 902 nfs_force_lookup_revalidate(dir);
903 dir->i_version = cinfo->after; 903 dir->i_version = cinfo->after;
904 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
904 nfs_fscache_invalidate(dir); 905 nfs_fscache_invalidate(dir);
905 spin_unlock(&dir->i_lock); 906 spin_unlock(&dir->i_lock);
906} 907}
@@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1552 1553
1553 opendata->o_arg.open_flags = 0; 1554 opendata->o_arg.open_flags = 0;
1554 opendata->o_arg.fmode = fmode; 1555 opendata->o_arg.fmode = fmode;
1556 opendata->o_arg.share_access = nfs4_map_atomic_open_share(
1557 NFS_SB(opendata->dentry->d_sb),
1558 fmode, 0);
1555 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1559 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1556 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1560 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1557 nfs4_init_opendata_res(opendata); 1561 nfs4_init_opendata_res(opendata);
@@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir,
2413 opendata->o_res.f_attr, sattr, 2417 opendata->o_res.f_attr, sattr,
2414 state, label, olabel); 2418 state, label, olabel);
2415 if (status == 0) { 2419 if (status == 0) {
2416 nfs_setattr_update_inode(state->inode, sattr); 2420 nfs_setattr_update_inode(state->inode, sattr,
2417 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 2421 opendata->o_res.f_attr);
2418 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); 2422 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
2419 } 2423 }
2420 } 2424 }
@@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2651 case -NFS4ERR_BAD_STATEID: 2655 case -NFS4ERR_BAD_STATEID:
2652 case -NFS4ERR_EXPIRED: 2656 case -NFS4ERR_EXPIRED:
2653 if (!nfs4_stateid_match(&calldata->arg.stateid, 2657 if (!nfs4_stateid_match(&calldata->arg.stateid,
2654 &state->stateid)) { 2658 &state->open_stateid)) {
2655 rpc_restart_call_prepare(task); 2659 rpc_restart_call_prepare(task);
2656 goto out_release; 2660 goto out_release;
2657 } 2661 }
@@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2687 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); 2691 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2688 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); 2692 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2689 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); 2693 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2690 nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); 2694 nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
2691 /* Calculate the change in open mode */ 2695 /* Calculate the change in open mode */
2692 calldata->arg.fmode = 0; 2696 calldata->arg.fmode = 0;
2693 if (state->n_rdwr == 0) { 2697 if (state->n_rdwr == 0) {
@@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
3288 3292
3289 status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); 3293 status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
3290 if (status == 0) { 3294 if (status == 0) {
3291 nfs_setattr_update_inode(inode, sattr); 3295 nfs_setattr_update_inode(inode, sattr, fattr);
3292 nfs_setsecurity(inode, fattr, label); 3296 nfs_setsecurity(inode, fattr, label);
3293 } 3297 }
3294 nfs4_label_free(label); 3298 nfs4_label_free(label);
@@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
4234 } 4238 }
4235 if (task->tk_status >= 0) { 4239 if (task->tk_status >= 0) {
4236 renew_lease(NFS_SERVER(inode), hdr->timestamp); 4240 renew_lease(NFS_SERVER(inode), hdr->timestamp);
4237 nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); 4241 nfs_writeback_update_inode(hdr);
4238 } 4242 }
4239 return 0; 4243 return 0;
4240} 4244}
@@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
6893 6897
6894 if (status == 0) { 6898 if (status == 0) {
6895 clp->cl_clientid = res.clientid; 6899 clp->cl_clientid = res.clientid;
6896 clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); 6900 clp->cl_exchange_flags = res.flags;
6897 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) 6901 /* Client ID is not confirmed */
6902 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
6903 clear_bit(NFS4_SESSION_ESTABLISHED,
6904 &clp->cl_session->session_state);
6898 clp->cl_seqid = res.seqid; 6905 clp->cl_seqid = res.seqid;
6906 }
6899 6907
6900 kfree(clp->cl_serverowner); 6908 kfree(clp->cl_serverowner);
6901 clp->cl_serverowner = res.server_owner; 6909 clp->cl_serverowner = res.server_owner;
@@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session,
7227 struct nfs41_create_session_res *res) 7235 struct nfs41_create_session_res *res)
7228{ 7236{
7229 nfs4_copy_sessionid(&session->sess_id, &res->sessionid); 7237 nfs4_copy_sessionid(&session->sess_id, &res->sessionid);
7238 /* Mark client id and session as being confirmed */
7239 session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
7240 set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state);
7230 session->flags = res->flags; 7241 session->flags = res->flags;
7231 memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs)); 7242 memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs));
7232 if (res->flags & SESSION4_BACK_CHAN) 7243 if (res->flags & SESSION4_BACK_CHAN)
@@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
7322 dprintk("--> nfs4_proc_destroy_session\n"); 7333 dprintk("--> nfs4_proc_destroy_session\n");
7323 7334
7324 /* session is still being setup */ 7335 /* session is still being setup */
7325 if (session->clp->cl_cons_state != NFS_CS_READY) 7336 if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state))
7326 return status; 7337 return 0;
7327 7338
7328 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 7339 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
7329 trace_nfs4_destroy_session(session->clp, status); 7340 trace_nfs4_destroy_session(session->clp, status);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index fc46c7455898..e3ea2c5324d6 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -70,6 +70,7 @@ struct nfs4_session {
70 70
71enum nfs4_session_state { 71enum nfs4_session_state {
72 NFS4_SESSION_INITING, 72 NFS4_SESSION_INITING,
73 NFS4_SESSION_ESTABLISHED,
73}; 74};
74 75
75extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, 76extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ad908e9ce9c..f95e3b58bbc3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
346 status = nfs4_proc_exchange_id(clp, cred); 346 status = nfs4_proc_exchange_id(clp, cred);
347 if (status != NFS4_OK) 347 if (status != NFS4_OK)
348 return status; 348 return status;
349 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
350 349
351 return nfs41_walk_client_list(clp, result, cred); 350 status = nfs41_walk_client_list(clp, result, cred);
351 if (status < 0)
352 return status;
353 if (clp != *result)
354 return 0;
355
356 /* Purge state if the client id was established in a prior instance */
357 if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
358 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
359 else
360 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
361 nfs4_schedule_state_manager(clp);
362 status = nfs_wait_client_init_complete(clp);
363 if (status < 0)
364 nfs_put_client(clp);
365 return status;
352} 366}
353 367
354#endif /* CONFIG_NFS_V4_1 */ 368#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
139 nfs_fattr_init(fattr); 139 nfs_fattr_init(fattr);
140 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 140 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
141 if (status == 0) 141 if (status == 0)
142 nfs_setattr_update_inode(inode, sattr); 142 nfs_setattr_update_inode(inode, sattr, fattr);
143 dprintk("NFS reply setattr: %d\n", status); 143 dprintk("NFS reply setattr: %d\n", status);
144 return status; 144 return status;
145} 145}
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
609 609
610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) 610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
611{ 611{
612 struct inode *inode = hdr->inode;
613
614 if (task->tk_status >= 0) 612 if (task->tk_status >= 0)
615 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); 613 nfs_writeback_update_inode(hdr);
616 return 0; 614 return 0;
617} 615}
618 616
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..849ed784d6ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
1377 return 0; 1377 return 0;
1378} 1378}
1379 1379
1380static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
1381 struct nfs_fattr *fattr)
1382{
1383 struct nfs_pgio_args *argp = &hdr->args;
1384 struct nfs_pgio_res *resp = &hdr->res;
1385
1386 if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1387 return;
1388 if (argp->offset + resp->count != fattr->size)
1389 return;
1390 if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
1391 return;
1392 /* Set attribute barrier */
1393 nfs_fattr_set_barrier(fattr);
1394}
1395
1396void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
1397{
1398 struct nfs_fattr *fattr = hdr->res.fattr;
1399 struct inode *inode = hdr->inode;
1400
1401 if (fattr == NULL)
1402 return;
1403 spin_lock(&inode->i_lock);
1404 nfs_writeback_check_extend(hdr, fattr);
1405 nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1406 spin_unlock(&inode->i_lock);
1407}
1408EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
1409
1380/* 1410/*
1381 * This function is called when the WRITE call is complete. 1411 * This function is called when the WRITE call is complete.
1382 */ 1412 */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdbc78c72542..03d647bf195d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
137 seg->offset = iomap.offset; 137 seg->offset = iomap.offset;
138 seg->length = iomap.length; 138 seg->length = iomap.length;
139 139
140 dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); 140 dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
141 return 0; 141 return 0;
142 142
143out_error: 143out_error:
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 9da89fddab33..9aa2796da90d 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
122 122
123 p = xdr_decode_hyper(p, &bex.foff); 123 p = xdr_decode_hyper(p, &bex.foff);
124 if (bex.foff & (block_size - 1)) { 124 if (bex.foff & (block_size - 1)) {
125 dprintk("%s: unaligned offset %lld\n", 125 dprintk("%s: unaligned offset 0x%llx\n",
126 __func__, bex.foff); 126 __func__, bex.foff);
127 goto fail; 127 goto fail;
128 } 128 }
129 p = xdr_decode_hyper(p, &bex.len); 129 p = xdr_decode_hyper(p, &bex.len);
130 if (bex.len & (block_size - 1)) { 130 if (bex.len & (block_size - 1)) {
131 dprintk("%s: unaligned length %lld\n", 131 dprintk("%s: unaligned length 0x%llx\n",
132 __func__, bex.foff); 132 __func__, bex.foff);
133 goto fail; 133 goto fail;
134 } 134 }
135 p = xdr_decode_hyper(p, &bex.soff); 135 p = xdr_decode_hyper(p, &bex.soff);
136 if (bex.soff & (block_size - 1)) { 136 if (bex.soff & (block_size - 1)) {
137 dprintk("%s: unaligned disk offset %lld\n", 137 dprintk("%s: unaligned disk offset 0x%llx\n",
138 __func__, bex.soff); 138 __func__, bex.soff);
139 goto fail; 139 goto fail;
140 } 140 }
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
118{ 118{
119 struct super_block *sb = exp->ex_path.mnt->mnt_sb; 119 struct super_block *sb = exp->ex_path.mnt->mnt_sb;
120 120
121 if (exp->ex_flags & NFSEXP_NOPNFS) 121 if (!(exp->ex_flags & NFSEXP_PNFS))
122 return; 122 return;
123 123
124 if (sb->s_export_op->get_uuid && 124 if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
440 list_move_tail(&lp->lo_perstate, reaplist); 440 list_move_tail(&lp->lo_perstate, reaplist);
441 return; 441 return;
442 } 442 }
443 end = seg->offset; 443 lo->offset = layout_end(seg);
444 } else { 444 } else {
445 /* retain the whole layout segment on a split. */ 445 /* retain the whole layout segment on a split. */
446 if (layout_end(seg) < end) { 446 if (layout_end(seg) < end) {
447 dprintk("%s: split not supported\n", __func__); 447 dprintk("%s: split not supported\n", __func__);
448 return; 448 return;
449 } 449 }
450 450 end = seg->offset;
451 lo->offset = layout_end(seg);
452 } 451 }
453 452
454 layout_update_len(lo, end); 453 layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
513 512
514 spin_lock(&clp->cl_lock); 513 spin_lock(&clp->cl_lock);
515 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { 514 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
515 if (ls->ls_layout_type != lrp->lr_layout_type)
516 continue;
517
516 if (lrp->lr_return_type == RETURN_FSID && 518 if (lrp->lr_return_type == RETURN_FSID &&
517 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, 519 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
518 &cstate->current_fh.fh_handle)) 520 &cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
587 589
588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); 590 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
589 591
590 nfsd4_cb_layout_fail(ls); 592 trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
591 593
592 printk(KERN_WARNING 594 printk(KERN_WARNING
593 "nfsd: client %s failed to respond to layout recall. " 595 "nfsd: client %s failed to respond to layout recall. "
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d30bea8d0277..92b9d97aff4f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); 1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1238 1238
1239 gdp->gd_notify_types &= ops->notify_types; 1239 gdp->gd_notify_types &= ops->notify_types;
1240 exp_put(exp);
1241out: 1240out:
1241 exp_put(exp);
1242 return nfserr; 1242 return nfserr;
1243} 1243}
1244 1244
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6b2a09f793f..8ba1d888f1e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp)
1638 nfs4_put_stid(&dp->dl_stid); 1638 nfs4_put_stid(&dp->dl_stid);
1639 } 1639 }
1640 while (!list_empty(&clp->cl_revoked)) { 1640 while (!list_empty(&clp->cl_revoked)) {
1641 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1641 dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
1642 list_del_init(&dp->dl_recall_lru); 1642 list_del_init(&dp->dl_recall_lru);
1643 nfs4_put_stid(&dp->dl_stid); 1643 nfs4_put_stid(&dp->dl_stid);
1644 } 1644 }
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3221 } else 3221 } else
3222 nfs4_free_openowner(&oo->oo_owner); 3222 nfs4_free_openowner(&oo->oo_owner);
3223 spin_unlock(&clp->cl_lock); 3223 spin_unlock(&clp->cl_lock);
3224 return oo; 3224 return ret;
3225} 3225}
3226 3226
3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5062 } else 5062 } else
5063 nfs4_free_lockowner(&lo->lo_owner); 5063 nfs4_free_lockowner(&lo->lo_owner);
5064 spin_unlock(&clp->cl_lock); 5064 spin_unlock(&clp->cl_lock);
5065 return lo; 5065 return ret;
5066} 5066}
5067 5067
5068static void 5068static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index df5e66caf100..5fb7e78169a6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset); 1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length); 1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength); 1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid); 1565
1566 status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
1567 if (status)
1568 return status;
1569
1566 READ_BUF(4); 1570 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++); 1571 lgp->lg_maxcount = be32_to_cpup(p++);
1568 1572
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset); 1584 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length); 1585 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++); 1586 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid); 1587
1588 status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
1589 if (status)
1590 return status;
1591
1584 READ_BUF(4); 1592 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++); 1593 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) { 1594 if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1628 READ_BUF(16); 1636 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset); 1637 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length); 1638 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid); 1639
1640 status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
1641 if (status)
1642 return status;
1643
1632 READ_BUF(4); 1644 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++); 1645 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) { 1646 if (lrp->lrf_body_len > 0) {
@@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4123 return nfserr_resource; 4135 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present); 4136 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present) 4137 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid); 4138 return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok; 4139 return nfs_ok;
4128} 4140}
4129#endif /* CONFIG_NFSD_PNFS */ 4141#endif /* CONFIG_NFSD_PNFS */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 83a9694ec485..46ec934f5dee 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
165{ 165{
166 unsigned int hashsize; 166 unsigned int hashsize;
167 unsigned int i; 167 unsigned int i;
168 int status = 0;
168 169
169 max_drc_entries = nfsd_cache_size_limit(); 170 max_drc_entries = nfsd_cache_size_limit();
170 atomic_set(&num_drc_entries, 0); 171 atomic_set(&num_drc_entries, 0);
171 hashsize = nfsd_hashsize(max_drc_entries); 172 hashsize = nfsd_hashsize(max_drc_entries);
172 maskbits = ilog2(hashsize); 173 maskbits = ilog2(hashsize);
173 174
174 register_shrinker(&nfsd_reply_cache_shrinker); 175 status = register_shrinker(&nfsd_reply_cache_shrinker);
176 if (status)
177 return status;
178
175 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 179 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
176 0, 0, NULL); 180 0, 0, NULL);
177 if (!drc_slab) 181 if (!drc_slab)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..0c3f303baf32 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1907 struct the_nilfs *nilfs) 1907 struct the_nilfs *nilfs)
1908{ 1908{
1909 struct nilfs_inode_info *ii, *n; 1909 struct nilfs_inode_info *ii, *n;
1910 int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
1910 int defer_iput = false; 1911 int defer_iput = false;
1911 1912
1912 spin_lock(&nilfs->ns_inode_lock); 1913 spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1919 brelse(ii->i_bh); 1920 brelse(ii->i_bh);
1920 ii->i_bh = NULL; 1921 ii->i_bh = NULL;
1921 list_del_init(&ii->i_dirty); 1922 list_del_init(&ii->i_dirty);
1922 if (!ii->vfs_inode.i_nlink) { 1923 if (!ii->vfs_inode.i_nlink || during_mount) {
1923 /* 1924 /*
1924 * Defer calling iput() to avoid a deadlock 1925 * Defer calling iput() to avoid deadlocks if
1925 * over I_SYNC flag for inodes with i_nlink == 0 1926 * i_nlink == 0 or mount is not yet finished.
1926 */ 1927 */
1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); 1928 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928 defer_iput = true; 1929 defer_iput = true;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) 143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
144 return false; 144 return false;
145 145
146 if (event_mask & marks_mask & ~marks_ignored_mask) 146 if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
147 ~marks_ignored_mask)
147 return true; 148 return true;
148 149
149 return false; 150 return false;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
502 502
503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) 503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
504{ 504{
505 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) 505 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
506 return 1; 506 return 1;
507 return 0; 507 return 0;
508} 508}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ 104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) 105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
106 | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 107#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 108 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ 109 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
109 | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
110 110
111/* 111/*
112 * Heartbeat-only devices are missing journals and other files. The 112 * Heartbeat-only devices are missing journals and other files. The
@@ -179,6 +179,11 @@
179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
180 180
181/* 181/*
182 * Append Direct IO support
183 */
184#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
185
186/*
182 * backup superblock flag is used to indicate that this volume 187 * backup superblock flag is used to indicate that this volume
183 * has backup superblocks. 188 * has backup superblocks.
184 */ 189 */
@@ -200,10 +205,6 @@
200#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 205#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
201#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 206#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
202 207
203/*
204 * Append Direct IO support
205 */
206#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
207 208
208/* The byte offset of the first backup block will be 1G. 209/* The byte offset of the first backup block will be 1G.
209 * The following will be 4G, 16G, 64G, 256G and 1T. 210 * The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
529{ 529{
530 struct ovl_fs *ufs = sb->s_fs_info; 530 struct ovl_fs *ufs = sb->s_fs_info;
531 531
532 if (!(*flags & MS_RDONLY) && 532 if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
533 (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
534 return -EROFS; 533 return -EROFS;
535 534
536 return 0; 535 return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
615 break; 614 break;
616 615
617 default: 616 default:
617 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
618 return -EINVAL; 618 return -EINVAL;
619 } 619 }
620 } 620 }
621
622 /* Workdir is useless in non-upper mount */
623 if (!config->upperdir && config->workdir) {
624 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
625 config->workdir);
626 kfree(config->workdir);
627 config->workdir = NULL;
628 }
629
621 return 0; 630 return 0;
622} 631}
623 632
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
837 846
838 sb->s_stack_depth = 0; 847 sb->s_stack_depth = 0;
839 if (ufs->config.upperdir) { 848 if (ufs->config.upperdir) {
840 /* FIXME: workdir is not needed for a R/O mount */
841 if (!ufs->config.workdir) { 849 if (!ufs->config.workdir) {
842 pr_err("overlayfs: missing 'workdir'\n"); 850 pr_err("overlayfs: missing 'workdir'\n");
843 goto out_free_config; 851 goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
847 if (err) 855 if (err)
848 goto out_free_config; 856 goto out_free_config;
849 857
858 /* Upper fs should not be r/o */
859 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
860 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
861 err = -EINVAL;
862 goto out_put_upperpath;
863 }
864
850 err = ovl_mount_dir(ufs->config.workdir, &workpath); 865 err = ovl_mount_dir(ufs->config.workdir, &workpath);
851 if (err) 866 if (err)
852 goto out_put_upperpath; 867 goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
869 884
870 err = -EINVAL; 885 err = -EINVAL;
871 stacklen = ovl_split_lowerdirs(lowertmp); 886 stacklen = ovl_split_lowerdirs(lowertmp);
872 if (stacklen > OVL_MAX_STACK) 887 if (stacklen > OVL_MAX_STACK) {
888 pr_err("overlayfs: too many lower directries, limit is %d\n",
889 OVL_MAX_STACK);
873 goto out_free_lowertmp; 890 goto out_free_lowertmp;
891 } else if (!ufs->config.upperdir && stacklen == 1) {
892 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
893 goto out_free_lowertmp;
894 }
874 895
875 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 896 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
876 if (!stack) 897 if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
932 ufs->numlower++; 953 ufs->numlower++;
933 } 954 }
934 955
935 /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ 956 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
936 if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) 957 if (!ufs->upper_mnt)
937 sb->s_flags |= MS_RDONLY; 958 sb->s_flags |= MS_RDONLY;
938 959
939 sb->s_d_op = &ovl_dentry_operations; 960 sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
1325 1325
1326static int pagemap_open(struct inode *inode, struct file *file) 1326static int pagemap_open(struct inode *inode, struct file *file)
1327{ 1327{
1328 /* do not disclose physical addresses: attack vector */
1329 if (!capable(CAP_SYS_ADMIN))
1330 return -EPERM;
1328 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " 1331 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
1329 "to stop being page-shift some time soon. See the " 1332 "to stop being page-shift some time soon. See the "
1330 "linux/Documentation/vm/pagemap.txt for details.\n"); 1333 "linux/Documentation/vm/pagemap.txt for details.\n");