aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-04-23 15:16:37 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-04-23 15:16:37 -0400
commitf139b6c676c7e49b66016b28bf3f8ec5c54be891 (patch)
tree742f00e431dded1daf642b44f4c199b318f255dc /fs
parent21330b667070fd64b2340d8d31c1b0800df78ec8 (diff)
parentd654788e98f74f2df8dfc6079fa314938f739486 (diff)
Merge tag 'nfs-rdma-for-4.1-1' of git://git.linux-nfs.org/projects/anna/nfs-rdma
NFS: NFSoRDMA Client Changes This patch series creates an operation vector for each of the different memory registration modes. This should make it easier to one day increase credit limit, rsize, and wsize. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c19
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/inode.c112
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/transaction.c39
-rw-r--r--fs/fuse/dev.c19
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h15
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/proc/task_mmu.c3
20 files changed, 439 insertions, 85 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..a91795e01a7f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
699 boff = tmp % bsize; 699 boff = tmp % bsize;
700 if (boff) { 700 if (boff) {
701 bh = affs_bread_ino(inode, bidx, 0); 701 bh = affs_bread_ino(inode, bidx, 0);
702 if (IS_ERR(bh)) 702 if (IS_ERR(bh)) {
703 return PTR_ERR(bh); 703 written = PTR_ERR(bh);
704 goto err_first_bh;
705 }
704 tmp = min(bsize - boff, to - from); 706 tmp = min(bsize - boff, to - from);
705 BUG_ON(boff + tmp > bsize || tmp > bsize); 707 BUG_ON(boff + tmp > bsize || tmp > bsize);
706 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 708 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
712 bidx++; 714 bidx++;
713 } else if (bidx) { 715 } else if (bidx) {
714 bh = affs_bread_ino(inode, bidx - 1, 0); 716 bh = affs_bread_ino(inode, bidx - 1, 0);
715 if (IS_ERR(bh)) 717 if (IS_ERR(bh)) {
716 return PTR_ERR(bh); 718 written = PTR_ERR(bh);
719 goto err_first_bh;
720 }
717 } 721 }
718 while (from + bsize <= to) { 722 while (from + bsize <= to) {
719 prev_bh = bh; 723 prev_bh = bh;
720 bh = affs_getemptyblk_ino(inode, bidx); 724 bh = affs_getemptyblk_ino(inode, bidx);
721 if (IS_ERR(bh)) 725 if (IS_ERR(bh))
722 goto out; 726 goto err_bh;
723 memcpy(AFFS_DATA(bh), data + from, bsize); 727 memcpy(AFFS_DATA(bh), data + from, bsize);
724 if (buffer_new(bh)) { 728 if (buffer_new(bh)) {
725 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 729 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
751 prev_bh = bh; 755 prev_bh = bh;
752 bh = affs_bread_ino(inode, bidx, 1); 756 bh = affs_bread_ino(inode, bidx, 1);
753 if (IS_ERR(bh)) 757 if (IS_ERR(bh))
754 goto out; 758 goto err_bh;
755 tmp = min(bsize, to - from); 759 tmp = min(bsize, to - from);
756 BUG_ON(tmp > bsize); 760 BUG_ON(tmp > bsize);
757 memcpy(AFFS_DATA(bh), data + from, tmp); 761 memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
790 if (tmp > inode->i_size) 794 if (tmp > inode->i_size)
791 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 795 inode->i_size = AFFS_I(inode)->mmu_private = tmp;
792 796
797err_first_bh:
793 unlock_page(page); 798 unlock_page(page);
794 page_cache_release(page); 799 page_cache_release(page);
795 800
796 return written; 801 return written;
797 802
798out: 803err_bh:
799 bh = prev_bh; 804 bh = prev_bh;
800 if (!written) 805 if (!written)
801 written = PTR_ERR(bh); 806 written = PTR_ERR(bh);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3387 3387
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3389 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root);
3390int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3392int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3391int btrfs_free_block_groups(struct btrfs_fs_info *info); 3393int btrfs_free_block_groups(struct btrfs_fs_info *info);
3392int btrfs_read_block_groups(struct btrfs_root *root); 3394int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3909 loff_t actual_len, u64 *alloc_hint); 3911 loff_t actual_len, u64 *alloc_hint);
3910int btrfs_inode_check_errors(struct inode *inode); 3912int btrfs_inode_check_errors(struct inode *inode);
3911extern const struct dentry_operations btrfs_dentry_operations; 3913extern const struct dentry_operations btrfs_dentry_operations;
3914#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3915void btrfs_test_inode_set_ops(struct inode *inode);
3916#endif
3912 3917
3913/* ioctl.c */ 3918/* ioctl.c */
3914long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3919long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3921 } 3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) { 3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", 3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb), 3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key) 3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk)); 3927 + sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6f080451fcb1..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3325,6 +3325,32 @@ out:
3325 return ret; 3325 return ret;
3326} 3326}
3327 3327
3328int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root)
3330{
3331 struct btrfs_block_group_cache *cache, *tmp;
3332 struct btrfs_transaction *cur_trans = trans->transaction;
3333 struct btrfs_path *path;
3334
3335 if (list_empty(&cur_trans->dirty_bgs) ||
3336 !btrfs_test_opt(root, SPACE_CACHE))
3337 return 0;
3338
3339 path = btrfs_alloc_path();
3340 if (!path)
3341 return -ENOMEM;
3342
3343 /* Could add new block groups, use _safe just in case */
3344 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3345 dirty_list) {
3346 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3347 cache_save_setup(cache, trans, path);
3348 }
3349
3350 btrfs_free_path(path);
3351 return 0;
3352}
3353
3328int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root) 3355 struct btrfs_root *root)
3330{ 3356{
@@ -5110,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5110 num_bytes = ALIGN(num_bytes, root->sectorsize); 5136 num_bytes = ALIGN(num_bytes, root->sectorsize);
5111 5137
5112 spin_lock(&BTRFS_I(inode)->lock); 5138 spin_lock(&BTRFS_I(inode)->lock);
5113 BTRFS_I(inode)->outstanding_extents++; 5139 nr_extents = (unsigned)div64_u64(num_bytes +
5140 BTRFS_MAX_EXTENT_SIZE - 1,
5141 BTRFS_MAX_EXTENT_SIZE);
5142 BTRFS_I(inode)->outstanding_extents += nr_extents;
5143 nr_extents = 0;
5114 5144
5115 if (BTRFS_I(inode)->outstanding_extents > 5145 if (BTRFS_I(inode)->outstanding_extents >
5116 BTRFS_I(inode)->reserved_extents) 5146 BTRFS_I(inode)->reserved_extents)
@@ -5255,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5255 if (dropped > 0) 5285 if (dropped > 0)
5256 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5286 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5257 5287
5288 if (btrfs_test_is_dummy_root(root))
5289 return;
5290
5258 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5291 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5259 btrfs_ino(inode), to_free, 0); 5292 btrfs_ino(inode), to_free, 0);
5260 if (root->fs_info->quota_enabled) { 5293 if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4968 4968
4969 /* Should be safe to release our pages at this point */ 4969 /* Should be safe to release our pages at this point */
4970 btrfs_release_extent_buffer_page(eb); 4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4971 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4972 return 1; 4978 return 1;
4973 } 4979 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da828cf5e8f8..d2e732d7af52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 108
109static int btrfs_dirty_inode(struct inode *inode); 109static int btrfs_dirty_inode(struct inode *inode);
110 110
111#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
112void btrfs_test_inode_set_ops(struct inode *inode)
113{
114 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
115}
116#endif
117
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 118static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 119 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 120 const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
1542 u64 new_size; 1549 u64 new_size;
1543 1550
1544 /* 1551 /*
1545 * We need the largest size of the remaining extent to see if we 1552 * See the explanation in btrfs_merge_extent_hook, the same
1546 * need to add a new outstanding extent. Think of the following 1553 * applies here, just in reverse.
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */ 1554 */
1561 new_size = orig->end - split + 1; 1555 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size) 1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE); 1557 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1558 new_size = split - orig->start;
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents) 1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1560 BTRFS_MAX_EXTENT_SIZE);
1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1569 return; 1563 return;
1570 } 1564 }
1571 1565
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1591 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1592 return; 1586 return;
1593 1587
1594 old_size = other->end - other->start + 1; 1588 if (new->start > other->start)
1595 new_size = old_size + (new->end - new->start + 1); 1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1596 1592
1597 /* we're not bigger than the max, unreserve the space and go */ 1593 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) { 1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1603 } 1599 }
1604 1600
1605 /* 1601 /*
1606 * If we grew by another max_extent, just return, we want to keep that 1602 * We have to add up either side to figure out how many extents were
1607 * reserved amount. 1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1608 */ 1618 */
1619 old_size = other->end - other->start + 1;
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, 1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE); 1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents) 1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1613 return; 1628 return;
1614 1629
1615 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1686 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1687 } 1702 }
1688 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1689 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1690 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1691 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1741 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1742 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1743 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1745 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1746 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7213 u64 start = iblock << inode->i_blkbits; 7236 u64 start = iblock << inode->i_blkbits;
7214 u64 lockstart, lockend; 7237 u64 lockstart, lockend;
7215 u64 len = bh_result->b_size; 7238 u64 len = bh_result->b_size;
7216 u64 orig_len = len; 7239 u64 *outstanding_extents = NULL;
7217 int unlock_bits = EXTENT_LOCKED; 7240 int unlock_bits = EXTENT_LOCKED;
7218 int ret = 0; 7241 int ret = 0;
7219 7242
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7225 lockstart = start; 7248 lockstart = start;
7226 lockend = start + len - 1; 7249 lockend = start + len - 1;
7227 7250
7251 if (current->journal_info) {
7252 /*
7253 * Need to pull our outstanding extents and set journal_info to NULL so
7254 * that anything that needs to check if there's a transction doesn't get
7255 * confused.
7256 */
7257 outstanding_extents = current->journal_info;
7258 current->journal_info = NULL;
7259 }
7260
7228 /* 7261 /*
7229 * If this errors out it's because we couldn't invalidate pagecache for 7262 * If this errors out it's because we couldn't invalidate pagecache for
7230 * this range and we need to fallback to buffered. 7263 * this range and we need to fallback to buffered.
@@ -7348,11 +7381,20 @@ unlock:
7348 if (start + len > i_size_read(inode)) 7381 if (start + len > i_size_read(inode))
7349 i_size_write(inode, start + len); 7382 i_size_write(inode, start + len);
7350 7383
7351 if (len < orig_len) { 7384 /*
7385 * If we have an outstanding_extents count still set then we're
7386 * within our reservation, otherwise we need to adjust our inode
7387 * counter appropriately.
7388 */
7389 if (*outstanding_extents) {
7390 (*outstanding_extents)--;
7391 } else {
7352 spin_lock(&BTRFS_I(inode)->lock); 7392 spin_lock(&BTRFS_I(inode)->lock);
7353 BTRFS_I(inode)->outstanding_extents++; 7393 BTRFS_I(inode)->outstanding_extents++;
7354 spin_unlock(&BTRFS_I(inode)->lock); 7394 spin_unlock(&BTRFS_I(inode)->lock);
7355 } 7395 }
7396
7397 current->journal_info = outstanding_extents;
7356 btrfs_free_reserved_data_space(inode, len); 7398 btrfs_free_reserved_data_space(inode, len);
7357 } 7399 }
7358 7400
@@ -7376,6 +7418,8 @@ unlock:
7376unlock_err: 7418unlock_err:
7377 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7419 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7378 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7420 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7421 if (outstanding_extents)
7422 current->journal_info = outstanding_extents;
7379 return ret; 7423 return ret;
7380} 7424}
7381 7425
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8075{ 8119{
8076 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8077 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
8122 u64 outstanding_extents = 0;
8078 size_t count = 0; 8123 size_t count = 0;
8079 int flags = 0; 8124 int flags = 0;
8080 bool wakeup = true; 8125 bool wakeup = true;
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8112 ret = btrfs_delalloc_reserve_space(inode, count); 8157 ret = btrfs_delalloc_reserve_space(inode, count);
8113 if (ret) 8158 if (ret)
8114 goto out; 8159 goto out;
8160 outstanding_extents = div64_u64(count +
8161 BTRFS_MAX_EXTENT_SIZE - 1,
8162 BTRFS_MAX_EXTENT_SIZE);
8163
8164 /*
8165 * We need to know how many extents we reserved so that we can
8166 * do the accounting properly if we go over the number we
8167 * originally calculated. Abuse current->journal_info for this.
8168 */
8169 current->journal_info = &outstanding_extents;
8115 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8170 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8116 &BTRFS_I(inode)->runtime_flags)) { 8171 &BTRFS_I(inode)->runtime_flags)) {
8117 inode_dio_done(inode); 8172 inode_dio_done(inode);
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8124 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8125 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8126 if (rw & WRITE) { 8181 if (rw & WRITE) {
8182 current->journal_info = NULL;
8127 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8128 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
8129 else if (ret >= 0 && (size_t)ret < count) 8185 else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1259 if (oper1->seq < oper2->seq) 1259 if (oper1->seq < oper2->seq)
1260 return -1; 1260 return -1;
1261 if (oper1->seq > oper2->seq) 1261 if (oper1->seq > oper2->seq)
1262 return -1; 1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1263 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1264 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1265 if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 88e51aded6bd..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1023 u64 old_root_bytenr; 1023 u64 old_root_bytenr;
1024 u64 old_root_used; 1024 u64 old_root_used;
1025 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1027 1026
1028 old_root_used = btrfs_root_used(&root->root_item); 1027 old_root_used = btrfs_root_used(&root->root_item);
1029 btrfs_write_dirty_block_groups(trans, root);
1030 1028
1031 while (1) { 1029 while (1) {
1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1030 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1033 if (old_root_bytenr == root->node->start && 1031 if (old_root_bytenr == root->node->start &&
1034 old_root_used == btrfs_root_used(&root->root_item) && 1032 old_root_used == btrfs_root_used(&root->root_item))
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1037 break; 1033 break;
1038 1034
1039 btrfs_set_root_node(&root->root_item, root->node); 1035 btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,14 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1044 return ret; 1040 return ret;
1045 1041
1046 old_root_used = btrfs_root_used(&root->root_item); 1042 old_root_used = btrfs_root_used(&root->root_item);
1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 } 1043 }
1056 1044
1057 return 0; 1045 return 0;
@@ -1068,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1068 struct btrfs_root *root) 1056 struct btrfs_root *root)
1069{ 1057{
1070 struct btrfs_fs_info *fs_info = root->fs_info; 1058 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1071 struct list_head *next; 1060 struct list_head *next;
1072 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1073 int ret; 1062 int ret;
@@ -1095,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1095 if (ret) 1084 if (ret)
1096 return ret; 1085 return ret;
1097 1086
1087 ret = btrfs_setup_space_cache(trans, root);
1088 if (ret)
1089 return ret;
1090
1098 /* run_qgroups might have added some more refs */ 1091 /* run_qgroups might have added some more refs */
1099 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1100 if (ret) 1093 if (ret)
1101 return ret; 1094 return ret;
1102 1095again:
1103 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1104 next = fs_info->dirty_cowonly_roots.next; 1097 next = fs_info->dirty_cowonly_roots.next;
1105 list_del_init(next); 1098 list_del_init(next);
@@ -1112,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1112 ret = update_cowonly_root(trans, root); 1105 ret = update_cowonly_root(trans, root);
1113 if (ret) 1106 if (ret)
1114 return ret; 1107 return ret;
1108 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1109 if (ret)
1110 return ret;
1115 } 1111 }
1116 1112
1113 while (!list_empty(dirty_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret)
1116 return ret;
1117 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1118 if (ret)
1119 return ret;
1120 }
1121
1122 if (!list_empty(&fs_info->dirty_cowonly_roots))
1123 goto again;
1124
1117 list_add_tail(&fs_info->extent_root->dirty_list, 1125 list_add_tail(&fs_info->extent_root->dirty_list,
1118 &trans->transaction->switch_commits); 1126 &trans->transaction->switch_commits);
1119 btrfs_after_dev_replace_commit(fs_info); 1127 btrfs_after_dev_replace_commit(fs_info);
@@ -1811,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1811 1819
1812 wait_for_commit(root, cur_trans); 1820 wait_for_commit(root, cur_trans);
1813 1821
1822 if (unlikely(cur_trans->aborted))
1823 ret = cur_trans->aborted;
1824
1814 btrfs_put_transaction(cur_trans); 1825 btrfs_put_transaction(cur_trans);
1815 1826
1816 return ret; 1827 return ret;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..39706c57ad3c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -890,8 +890,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
890 890
891 newpage = buf->page; 891 newpage = buf->page;
892 892
893 if (WARN_ON(!PageUptodate(newpage))) 893 if (!PageUptodate(newpage))
894 return -EIO; 894 SetPageUptodate(newpage);
895 895
896 ClearPageMappedToDisk(newpage); 896 ClearPageMappedToDisk(newpage);
897 897
@@ -1353,6 +1353,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1353 return err; 1353 return err;
1354} 1354}
1355 1355
1356static int fuse_dev_open(struct inode *inode, struct file *file)
1357{
1358 /*
1359 * The fuse device's file's private_data is used to hold
1360 * the fuse_conn(ection) when it is mounted, and is used to
1361 * keep track of whether the file has been mounted already.
1362 */
1363 file->private_data = NULL;
1364 return 0;
1365}
1366
1356static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1367static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1357 unsigned long nr_segs, loff_t pos) 1368 unsigned long nr_segs, loff_t pos)
1358{ 1369{
@@ -1797,6 +1808,9 @@ copy_finish:
1797static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1808static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1798 unsigned int size, struct fuse_copy_state *cs) 1809 unsigned int size, struct fuse_copy_state *cs)
1799{ 1810{
1811 /* Don't try to move pages (yet) */
1812 cs->move_pages = 0;
1813
1800 switch (code) { 1814 switch (code) {
1801 case FUSE_NOTIFY_POLL: 1815 case FUSE_NOTIFY_POLL:
1802 return fuse_notify_poll(fc, size, cs); 1816 return fuse_notify_poll(fc, size, cs);
@@ -2217,6 +2231,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
2217 2231
2218const struct file_operations fuse_dev_operations = { 2232const struct file_operations fuse_dev_operations = {
2219 .owner = THIS_MODULE, 2233 .owner = THIS_MODULE,
2234 .open = fuse_dev_open,
2220 .llseek = no_llseek, 2235 .llseek = no_llseek,
2221 .read = do_sync_read, 2236 .read = do_sync_read,
2222 .aio_read = fuse_dev_read, 2237 .aio_read = fuse_dev_read,
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
131 hfs_bnode_write(node, entry, data_off + key_len, entry_len); 131 hfs_bnode_write(node, entry, data_off + key_len, entry_len);
132 hfs_bnode_dump(node); 132 hfs_bnode_dump(node);
133 133
134 if (new_node) { 134 /*
135 /* update parent key if we inserted a key 135 * update parent key if we inserted a key
136 * at the start of the first node 136 * at the start of the node and it is not the new node
137 */ 137 */
138 if (!rec && new_node != node) 138 if (!rec && new_node != node) {
139 hfs_brec_update_parent(fd); 139 hfs_bnode_read_key(node, fd->search_key, data_off + size);
140 hfs_brec_update_parent(fd);
141 }
140 142
143 if (new_node) {
141 hfs_bnode_put(fd->bnode); 144 hfs_bnode_put(fd->bnode);
142 if (!new_node->parent) { 145 if (!new_node->parent) {
143 hfs_btree_inc_height(tree); 146 hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
168 goto again; 171 goto again;
169 } 172 }
170 173
171 if (!rec)
172 hfs_brec_update_parent(fd);
173
174 return 0; 174 return 0;
175} 175}
176 176
@@ -370,6 +370,8 @@ again:
370 if (IS_ERR(parent)) 370 if (IS_ERR(parent))
371 return PTR_ERR(parent); 371 return PTR_ERR(parent);
372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key); 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
373 if (fd->record < 0)
374 return -ENOENT;
373 hfs_bnode_dump(parent); 375 hfs_bnode_dump(parent);
374 rec = fd->record; 376 rec = fd->record;
375 377
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
207 goto out_free; 207 goto out_free;
208 } 208 }
209 209
210 of->event = atomic_read(&of->kn->attr.open->event);
210 ops = kernfs_ops(of->kn); 211 ops = kernfs_ops(of->kn);
211 if (ops->read) 212 if (ops->read)
212 len = ops->read(of, buf, len, *ppos); 213 len = ops->read(of, buf, len, *ppos);
diff --git a/fs/locks.c b/fs/locks.c
index f1bad681fc1c..528fedfda15e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1728,7 +1728,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
1728 break; 1728 break;
1729 } 1729 }
1730 } 1730 }
1731 trace_generic_delete_lease(inode, fl); 1731 trace_generic_delete_lease(inode, victim);
1732 if (victim) 1732 if (victim)
1733 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); 1733 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1734 spin_unlock(&ctx->flc_lock); 1734 spin_unlock(&ctx->flc_lock);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..1028a0629543 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -587,8 +587,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
587 587
588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); 588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
589 589
590 nfsd4_cb_layout_fail(ls);
591
592 printk(KERN_WARNING 590 printk(KERN_WARNING
593 "nfsd: client %s failed to respond to layout recall. " 591 "nfsd: client %s failed to respond to layout recall. "
594 " Fencing..\n", addr_str); 592 " Fencing..\n", addr_str);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..0c3f303baf32 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1907 struct the_nilfs *nilfs) 1907 struct the_nilfs *nilfs)
1908{ 1908{
1909 struct nilfs_inode_info *ii, *n; 1909 struct nilfs_inode_info *ii, *n;
1910 int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
1910 int defer_iput = false; 1911 int defer_iput = false;
1911 1912
1912 spin_lock(&nilfs->ns_inode_lock); 1913 spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1919 brelse(ii->i_bh); 1920 brelse(ii->i_bh);
1920 ii->i_bh = NULL; 1921 ii->i_bh = NULL;
1921 list_del_init(&ii->i_dirty); 1922 list_del_init(&ii->i_dirty);
1922 if (!ii->vfs_inode.i_nlink) { 1923 if (!ii->vfs_inode.i_nlink || during_mount) {
1923 /* 1924 /*
1924 * Defer calling iput() to avoid a deadlock 1925 * Defer calling iput() to avoid deadlocks if
1925 * over I_SYNC flag for inodes with i_nlink == 0 1926 * i_nlink == 0 or mount is not yet finished.
1926 */ 1927 */
1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); 1928 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928 defer_iput = true; 1929 defer_iput = true;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) 143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
144 return false; 144 return false;
145 145
146 if (event_mask & marks_mask & ~marks_ignored_mask) 146 if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
147 ~marks_ignored_mask)
147 return true; 148 return true;
148 149
149 return false; 150 return false;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
502 502
503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) 503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
504{ 504{
505 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) 505 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
506 return 1; 506 return 1;
507 return 0; 507 return 0;
508} 508}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ 104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) 105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
106 | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 107#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 108 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ 109 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
109 | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
110 110
111/* 111/*
112 * Heartbeat-only devices are missing journals and other files. The 112 * Heartbeat-only devices are missing journals and other files. The
@@ -179,6 +179,11 @@
179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
180 180
181/* 181/*
182 * Append Direct IO support
183 */
184#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
185
186/*
182 * backup superblock flag is used to indicate that this volume 187 * backup superblock flag is used to indicate that this volume
183 * has backup superblocks. 188 * has backup superblocks.
184 */ 189 */
@@ -200,10 +205,6 @@
200#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 205#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
201#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 206#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
202 207
203/*
204 * Append Direct IO support
205 */
206#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
207 208
208/* The byte offset of the first backup block will be 1G. 209/* The byte offset of the first backup block will be 1G.
209 * The following will be 4G, 16G, 64G, 256G and 1T. 210 * The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
529{ 529{
530 struct ovl_fs *ufs = sb->s_fs_info; 530 struct ovl_fs *ufs = sb->s_fs_info;
531 531
532 if (!(*flags & MS_RDONLY) && 532 if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
533 (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
534 return -EROFS; 533 return -EROFS;
535 534
536 return 0; 535 return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
615 break; 614 break;
616 615
617 default: 616 default:
617 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
618 return -EINVAL; 618 return -EINVAL;
619 } 619 }
620 } 620 }
621
622 /* Workdir is useless in non-upper mount */
623 if (!config->upperdir && config->workdir) {
624 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
625 config->workdir);
626 kfree(config->workdir);
627 config->workdir = NULL;
628 }
629
621 return 0; 630 return 0;
622} 631}
623 632
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
837 846
838 sb->s_stack_depth = 0; 847 sb->s_stack_depth = 0;
839 if (ufs->config.upperdir) { 848 if (ufs->config.upperdir) {
840 /* FIXME: workdir is not needed for a R/O mount */
841 if (!ufs->config.workdir) { 849 if (!ufs->config.workdir) {
842 pr_err("overlayfs: missing 'workdir'\n"); 850 pr_err("overlayfs: missing 'workdir'\n");
843 goto out_free_config; 851 goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
847 if (err) 855 if (err)
848 goto out_free_config; 856 goto out_free_config;
849 857
858 /* Upper fs should not be r/o */
859 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
860 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
861 err = -EINVAL;
862 goto out_put_upperpath;
863 }
864
850 err = ovl_mount_dir(ufs->config.workdir, &workpath); 865 err = ovl_mount_dir(ufs->config.workdir, &workpath);
851 if (err) 866 if (err)
852 goto out_put_upperpath; 867 goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
869 884
870 err = -EINVAL; 885 err = -EINVAL;
871 stacklen = ovl_split_lowerdirs(lowertmp); 886 stacklen = ovl_split_lowerdirs(lowertmp);
872 if (stacklen > OVL_MAX_STACK) 887 if (stacklen > OVL_MAX_STACK) {
888 pr_err("overlayfs: too many lower directries, limit is %d\n",
889 OVL_MAX_STACK);
873 goto out_free_lowertmp; 890 goto out_free_lowertmp;
891 } else if (!ufs->config.upperdir && stacklen == 1) {
892 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
893 goto out_free_lowertmp;
894 }
874 895
875 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 896 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
876 if (!stack) 897 if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
932 ufs->numlower++; 953 ufs->numlower++;
933 } 954 }
934 955
935 /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ 956 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
936 if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) 957 if (!ufs->upper_mnt)
937 sb->s_flags |= MS_RDONLY; 958 sb->s_flags |= MS_RDONLY;
938 959
939 sb->s_d_op = &ovl_dentry_operations; 960 sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
1325 1325
1326static int pagemap_open(struct inode *inode, struct file *file) 1326static int pagemap_open(struct inode *inode, struct file *file)
1327{ 1327{
1328 /* do not disclose physical addresses: attack vector */
1329 if (!capable(CAP_SYS_ADMIN))
1330 return -EPERM;
1328 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " 1331 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
1329 "to stop being page-shift some time soon. See the " 1332 "to stop being page-shift some time soon. See the "
1330 "linux/Documentation/vm/pagemap.txt for details.\n"); 1333 "linux/Documentation/vm/pagemap.txt for details.\n");