aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c7
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/btrfs/backref.c3
-rw-r--r--fs/btrfs/check-integrity.c2
-rw-r--r--fs/btrfs/ctree.c4
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c52
-rw-r--r--fs/btrfs/extent-tree.c94
-rw-r--r--fs/btrfs/extent_io.c138
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c43
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c8
-rw-r--r--fs/btrfs/inode.c81
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/raid56.c2
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/scrub.c10
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/volumes.c54
-rw-r--r--fs/btrfs/volumes.h20
-rw-r--r--fs/cifs/cifs_dfs_ref.c141
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c23
-rw-r--r--fs/cifs/dns_resolve.c4
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/efivarfs/file.c14
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c9
-rw-r--r--fs/ext4/extents_status.c17
-rw-r--r--fs/ext4/extents_status.h3
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c85
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/page-io.c121
-rw-r--r--fs/fat/inode.c15
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c9
-rw-r--r--fs/hfs/bnode.c6
-rw-r--r--fs/hpfs/dir.c10
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/nfs4session.c4
-rw-r--r--fs/nfs/nfs4session.h13
-rw-r--r--fs/nfs/nfs4state.c15
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nilfs2/inode.c27
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/pnode.c3
-rw-r--r--fs/qnx6/dir.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/xattr.c14
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/xfs/xfs_aops.c19
-rw-r--r--fs/xfs/xfs_attr_leaf.c96
-rw-r--r--fs/xfs/xfs_attr_remote.c408
-rw-r--r--fs/xfs/xfs_attr_remote.h10
-rw-r--r--fs/xfs/xfs_buf.c3
-rw-r--r--fs/xfs/xfs_buf_item.c7
-rw-r--r--fs/xfs/xfs_da_btree.c7
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dir2_format.h1
-rw-r--r--fs/xfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c13
-rw-r--r--fs/xfs/xfs_extfree_item.c5
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_iops.c47
-rw-r--r--fs/xfs/xfs_log_cil.c2
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_qm_syscalls.c40
-rw-r--r--fs/xfs/xfs_symlink.c20
-rw-r--r--fs/xfs/xfs_vnodeops.c4
81 files changed, 1078 insertions, 785 deletions
diff --git a/fs/aio.c b/fs/aio.c
index c5b1a8c10411..7fe5bdee1630 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -307,7 +307,9 @@ static void free_ioctx(struct kioctx *ctx)
307 kunmap_atomic(ring); 307 kunmap_atomic(ring);
308 308
309 while (atomic_read(&ctx->reqs_active) > 0) { 309 while (atomic_read(&ctx->reqs_active) > 0) {
310 wait_event(ctx->wait, head != ctx->tail); 310 wait_event(ctx->wait,
311 head != ctx->tail ||
312 atomic_read(&ctx->reqs_active) <= 0);
311 313
312 avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; 314 avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
313 315
@@ -1299,8 +1301,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1299 * < min_nr if the timeout specified by timeout has elapsed 1301 * < min_nr if the timeout specified by timeout has elapsed
1300 * before sufficient events are available, where timeout == NULL 1302 * before sufficient events are available, where timeout == NULL
1301 * specifies an infinite timeout. Note that the timeout pointed to by 1303 * specifies an infinite timeout. Note that the timeout pointed to by
1302 * timeout is relative and will be updated if not NULL and the 1304 * timeout is relative. Will fail with -ENOSYS if not implemented.
1303 * operation blocks. Will fail with -ENOSYS if not implemented.
1304 */ 1305 */
1305SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, 1306SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
1306 long, min_nr, 1307 long, min_nr,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8615ee89ab55..f95dddced968 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -265,8 +265,8 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
265 result = filldir(dirent, keybuf, keysize, filp->f_pos, 265 result = filldir(dirent, keybuf, keysize, filp->f_pos,
266 (ino_t) value, d_type); 266 (ino_t) value, d_type);
267 } 267 }
268 268 if (!result)
269 filp->f_pos++; 269 filp->f_pos++;
270 270
271 befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos); 271 befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos);
272 272
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b4fb41558111..290e347b6db3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -918,7 +918,8 @@ again:
918 ref->parent, bsz, 0); 918 ref->parent, bsz, 0);
919 if (!eb || !extent_buffer_uptodate(eb)) { 919 if (!eb || !extent_buffer_uptodate(eb)) {
920 free_extent_buffer(eb); 920 free_extent_buffer(eb);
921 return -EIO; 921 ret = -EIO;
922 goto out;
922 } 923 }
923 ret = find_extent_in_eb(eb, bytenr, 924 ret = find_extent_in_eb(eb, bytenr,
924 *extent_item_pos, &eie); 925 *extent_item_pos, &eie);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f48781a..1431a6965017 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1700 unsigned int j; 1700 unsigned int j;
1701 DECLARE_COMPLETION_ONSTACK(complete); 1701 DECLARE_COMPLETION_ONSTACK(complete);
1702 1702
1703 bio = bio_alloc(GFP_NOFS, num_pages - i); 1703 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1704 if (!bio) { 1704 if (!bio) {
1705 printk(KERN_INFO 1705 printk(KERN_INFO
1706 "btrfsic: bio_alloc() for %u pages failed!\n", 1706 "btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index de6de8e60b46..02fae7f7e42c 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -951,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
951 BUG_ON(ret); /* -ENOMEM */ 951 BUG_ON(ret); /* -ENOMEM */
952 } 952 }
953 if (new_flags != 0) { 953 if (new_flags != 0) {
954 int level = btrfs_header_level(buf);
955
954 ret = btrfs_set_disk_extent_flags(trans, root, 956 ret = btrfs_set_disk_extent_flags(trans, root,
955 buf->start, 957 buf->start,
956 buf->len, 958 buf->len,
957 new_flags, 0); 959 new_flags, level, 0);
958 if (ret) 960 if (ret)
959 return ret; 961 return ret;
960 } 962 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 63c328a9ce95..d6dd49b51ba8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
88/* holds checksums of all the data extents */ 88/* holds checksums of all the data extents */
89#define BTRFS_CSUM_TREE_OBJECTID 7ULL 89#define BTRFS_CSUM_TREE_OBJECTID 7ULL
90 90
91/* for storing balance parameters in the root tree */
92#define BTRFS_BALANCE_OBJECTID -4ULL
93
94/* holds quota configuration and tracking */ 91/* holds quota configuration and tracking */
95#define BTRFS_QUOTA_TREE_OBJECTID 8ULL 92#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
96 93
94/* for storing balance parameters in the root tree */
95#define BTRFS_BALANCE_OBJECTID -4ULL
96
97/* orhpan objectid for tracking unlinked/truncated files */ 97/* orhpan objectid for tracking unlinked/truncated files */
98#define BTRFS_ORPHAN_OBJECTID -5ULL 98#define BTRFS_ORPHAN_OBJECTID -5ULL
99 99
@@ -3075,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3075int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3075int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root, 3076 struct btrfs_root *root,
3077 u64 bytenr, u64 num_bytes, u64 flags, 3077 u64 bytenr, u64 num_bytes, u64 flags,
3078 int is_data); 3078 int level, int is_data);
3079int btrfs_free_extent(struct btrfs_trans_handle *trans, 3079int btrfs_free_extent(struct btrfs_trans_handle *trans,
3080 struct btrfs_root *root, 3080 struct btrfs_root *root,
3081 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3081 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f75fcaf79aeb..70b962cc177d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
60struct btrfs_delayed_extent_op { 60struct btrfs_delayed_extent_op {
61 struct btrfs_disk_key key; 61 struct btrfs_disk_key key;
62 u64 flags_to_set; 62 u64 flags_to_set;
63 int level;
63 unsigned int update_key:1; 64 unsigned int update_key:1;
64 unsigned int update_flags:1; 65 unsigned int update_flags:1;
65 unsigned int is_data:1; 66 unsigned int is_data:1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7ba7b3900cb8..65241f32d3f8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
313 struct btrfs_device *tgt_device = NULL; 313 struct btrfs_device *tgt_device = NULL;
314 struct btrfs_device *src_device = NULL; 314 struct btrfs_device *src_device = NULL;
315 315
316 if (btrfs_fs_incompat(fs_info, RAID56)) {
317 pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
318 return -EINVAL;
319 }
320
316 switch (args->start.cont_reading_from_srcdev_mode) { 321 switch (args->start.cont_reading_from_srcdev_mode) {
317 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: 322 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
318 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: 323 case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4e9ebe1f1827..e7b3cb5286a5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -152,7 +152,7 @@ static struct btrfs_lockdep_keyset {
152 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, 152 { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
153 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, 153 { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
154 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, 154 { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
155 { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, 155 { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" },
156 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 156 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
157 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 157 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
158 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 158 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
@@ -1513,7 +1513,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1513 } 1513 }
1514 1514
1515 root->commit_root = btrfs_root_node(root); 1515 root->commit_root = btrfs_root_node(root);
1516 BUG_ON(!root->node); /* -ENOMEM */
1517out: 1516out:
1518 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1517 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1519 root->ref_cows = 1; 1518 root->ref_cows = 1;
@@ -1988,30 +1987,33 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1988{ 1987{
1989 free_extent_buffer(info->tree_root->node); 1988 free_extent_buffer(info->tree_root->node);
1990 free_extent_buffer(info->tree_root->commit_root); 1989 free_extent_buffer(info->tree_root->commit_root);
1991 free_extent_buffer(info->dev_root->node);
1992 free_extent_buffer(info->dev_root->commit_root);
1993 free_extent_buffer(info->extent_root->node);
1994 free_extent_buffer(info->extent_root->commit_root);
1995 free_extent_buffer(info->csum_root->node);
1996 free_extent_buffer(info->csum_root->commit_root);
1997 if (info->quota_root) {
1998 free_extent_buffer(info->quota_root->node);
1999 free_extent_buffer(info->quota_root->commit_root);
2000 }
2001
2002 info->tree_root->node = NULL; 1990 info->tree_root->node = NULL;
2003 info->tree_root->commit_root = NULL; 1991 info->tree_root->commit_root = NULL;
2004 info->dev_root->node = NULL; 1992
2005 info->dev_root->commit_root = NULL; 1993 if (info->dev_root) {
2006 info->extent_root->node = NULL; 1994 free_extent_buffer(info->dev_root->node);
2007 info->extent_root->commit_root = NULL; 1995 free_extent_buffer(info->dev_root->commit_root);
2008 info->csum_root->node = NULL; 1996 info->dev_root->node = NULL;
2009 info->csum_root->commit_root = NULL; 1997 info->dev_root->commit_root = NULL;
1998 }
1999 if (info->extent_root) {
2000 free_extent_buffer(info->extent_root->node);
2001 free_extent_buffer(info->extent_root->commit_root);
2002 info->extent_root->node = NULL;
2003 info->extent_root->commit_root = NULL;
2004 }
2005 if (info->csum_root) {
2006 free_extent_buffer(info->csum_root->node);
2007 free_extent_buffer(info->csum_root->commit_root);
2008 info->csum_root->node = NULL;
2009 info->csum_root->commit_root = NULL;
2010 }
2010 if (info->quota_root) { 2011 if (info->quota_root) {
2012 free_extent_buffer(info->quota_root->node);
2013 free_extent_buffer(info->quota_root->commit_root);
2011 info->quota_root->node = NULL; 2014 info->quota_root->node = NULL;
2012 info->quota_root->commit_root = NULL; 2015 info->quota_root->commit_root = NULL;
2013 } 2016 }
2014
2015 if (chunk_root) { 2017 if (chunk_root) {
2016 free_extent_buffer(info->chunk_root->node); 2018 free_extent_buffer(info->chunk_root->node);
2017 free_extent_buffer(info->chunk_root->commit_root); 2019 free_extent_buffer(info->chunk_root->commit_root);
@@ -3128,7 +3130,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3128 * caller 3130 * caller
3129 */ 3131 */
3130 device->flush_bio = NULL; 3132 device->flush_bio = NULL;
3131 bio = bio_alloc(GFP_NOFS, 0); 3133 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
3132 if (!bio) 3134 if (!bio)
3133 return -ENOMEM; 3135 return -ENOMEM;
3134 3136
@@ -3659,8 +3661,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3659 ordered_operations); 3661 ordered_operations);
3660 3662
3661 list_del_init(&btrfs_inode->ordered_operations); 3663 list_del_init(&btrfs_inode->ordered_operations);
3664 spin_unlock(&root->fs_info->ordered_extent_lock);
3662 3665
3663 btrfs_invalidate_inodes(btrfs_inode->root); 3666 btrfs_invalidate_inodes(btrfs_inode->root);
3667
3668 spin_lock(&root->fs_info->ordered_extent_lock);
3664 } 3669 }
3665 3670
3666 spin_unlock(&root->fs_info->ordered_extent_lock); 3671 spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3782,8 +3787,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3782 list_del_init(&btrfs_inode->delalloc_inodes); 3787 list_del_init(&btrfs_inode->delalloc_inodes);
3783 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 3788 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3784 &btrfs_inode->runtime_flags); 3789 &btrfs_inode->runtime_flags);
3790 spin_unlock(&root->fs_info->delalloc_lock);
3785 3791
3786 btrfs_invalidate_inodes(btrfs_inode->root); 3792 btrfs_invalidate_inodes(btrfs_inode->root);
3793
3794 spin_lock(&root->fs_info->delalloc_lock);
3787 } 3795 }
3788 3796
3789 spin_unlock(&root->fs_info->delalloc_lock); 3797 spin_unlock(&root->fs_info->delalloc_lock);
@@ -3808,7 +3816,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3808 while (start <= end) { 3816 while (start <= end) {
3809 eb = btrfs_find_tree_block(root, start, 3817 eb = btrfs_find_tree_block(root, start,
3810 root->leafsize); 3818 root->leafsize);
3811 start += eb->len; 3819 start += root->leafsize;
3812 if (!eb) 3820 if (!eb)
3813 continue; 3821 continue;
3814 wait_on_extent_buffer_writeback(eb); 3822 wait_on_extent_buffer_writeback(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2305b5c5cf00..df472ab1b5ac 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2070,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2070 u32 item_size; 2070 u32 item_size;
2071 int ret; 2071 int ret;
2072 int err = 0; 2072 int err = 0;
2073 int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2073 int metadata = !extent_op->is_data;
2074 node->type == BTRFS_SHARED_BLOCK_REF_KEY);
2075 2074
2076 if (trans->aborted) 2075 if (trans->aborted)
2077 return 0; 2076 return 0;
@@ -2086,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2086 key.objectid = node->bytenr; 2085 key.objectid = node->bytenr;
2087 2086
2088 if (metadata) { 2087 if (metadata) {
2089 struct btrfs_delayed_tree_ref *tree_ref;
2090
2091 tree_ref = btrfs_delayed_node_to_tree_ref(node);
2092 key.type = BTRFS_METADATA_ITEM_KEY; 2088 key.type = BTRFS_METADATA_ITEM_KEY;
2093 key.offset = tree_ref->level; 2089 key.offset = extent_op->level;
2094 } else { 2090 } else {
2095 key.type = BTRFS_EXTENT_ITEM_KEY; 2091 key.type = BTRFS_EXTENT_ITEM_KEY;
2096 key.offset = node->num_bytes; 2092 key.offset = node->num_bytes;
@@ -2719,7 +2715,7 @@ out:
2719int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2715int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2720 struct btrfs_root *root, 2716 struct btrfs_root *root,
2721 u64 bytenr, u64 num_bytes, u64 flags, 2717 u64 bytenr, u64 num_bytes, u64 flags,
2722 int is_data) 2718 int level, int is_data)
2723{ 2719{
2724 struct btrfs_delayed_extent_op *extent_op; 2720 struct btrfs_delayed_extent_op *extent_op;
2725 int ret; 2721 int ret;
@@ -2732,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2732 extent_op->update_flags = 1; 2728 extent_op->update_flags = 1;
2733 extent_op->update_key = 0; 2729 extent_op->update_key = 0;
2734 extent_op->is_data = is_data ? 1 : 0; 2730 extent_op->is_data = is_data ? 1 : 0;
2731 extent_op->level = level;
2735 2732
2736 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, 2733 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2737 num_bytes, extent_op); 2734 num_bytes, extent_op);
@@ -3109,6 +3106,11 @@ again:
3109 WARN_ON(ret); 3106 WARN_ON(ret);
3110 3107
3111 if (i_size_read(inode) > 0) { 3108 if (i_size_read(inode) > 0) {
3109 ret = btrfs_check_trunc_cache_free_space(root,
3110 &root->fs_info->global_block_rsv);
3111 if (ret)
3112 goto out_put;
3113
3112 ret = btrfs_truncate_free_space_cache(root, trans, path, 3114 ret = btrfs_truncate_free_space_cache(root, trans, path,
3113 inode); 3115 inode);
3114 if (ret) 3116 if (ret)
@@ -4562,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
4562 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 4564 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
4563 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 4565 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
4564 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 4566 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
4567 if (fs_info->quota_root)
4568 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
4565 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 4569 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
4566 4570
4567 update_global_block_rsv(fs_info); 4571 update_global_block_rsv(fs_info);
@@ -6651,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
6651 struct btrfs_block_rsv *block_rsv; 6655 struct btrfs_block_rsv *block_rsv;
6652 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 6656 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
6653 int ret; 6657 int ret;
6658 bool global_updated = false;
6654 6659
6655 block_rsv = get_block_rsv(trans, root); 6660 block_rsv = get_block_rsv(trans, root);
6656 6661
6657 if (block_rsv->size == 0) { 6662 if (unlikely(block_rsv->size == 0))
6658 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6663 goto try_reserve;
6659 BTRFS_RESERVE_NO_FLUSH); 6664again:
6660 /* 6665 ret = block_rsv_use_bytes(block_rsv, blocksize);
6661 * If we couldn't reserve metadata bytes try and use some from 6666 if (!ret)
6662 * the global reserve.
6663 */
6664 if (ret && block_rsv != global_rsv) {
6665 ret = block_rsv_use_bytes(global_rsv, blocksize);
6666 if (!ret)
6667 return global_rsv;
6668 return ERR_PTR(ret);
6669 } else if (ret) {
6670 return ERR_PTR(ret);
6671 }
6672 return block_rsv; 6667 return block_rsv;
6668
6669 if (block_rsv->failfast)
6670 return ERR_PTR(ret);
6671
6672 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
6673 global_updated = true;
6674 update_global_block_rsv(root->fs_info);
6675 goto again;
6673 } 6676 }
6674 6677
6675 ret = block_rsv_use_bytes(block_rsv, blocksize); 6678 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6679 static DEFINE_RATELIMIT_STATE(_rs,
6680 DEFAULT_RATELIMIT_INTERVAL * 10,
6681 /*DEFAULT_RATELIMIT_BURST*/ 1);
6682 if (__ratelimit(&_rs))
6683 WARN(1, KERN_DEBUG
6684 "btrfs: block rsv returned %d\n", ret);
6685 }
6686try_reserve:
6687 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
6688 BTRFS_RESERVE_NO_FLUSH);
6676 if (!ret) 6689 if (!ret)
6677 return block_rsv; 6690 return block_rsv;
6678 if (ret && !block_rsv->failfast) { 6691 /*
6679 if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6692 * If we couldn't reserve metadata bytes try and use some from
6680 static DEFINE_RATELIMIT_STATE(_rs, 6693 * the global reserve if its space type is the same as the global
6681 DEFAULT_RATELIMIT_INTERVAL * 10, 6694 * reservation.
6682 /*DEFAULT_RATELIMIT_BURST*/ 1); 6695 */
6683 if (__ratelimit(&_rs)) 6696 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
6684 WARN(1, KERN_DEBUG 6697 block_rsv->space_info == global_rsv->space_info) {
6685 "btrfs: block rsv returned %d\n", ret); 6698 ret = block_rsv_use_bytes(global_rsv, blocksize);
6686 } 6699 if (!ret)
6687 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6700 return global_rsv;
6688 BTRFS_RESERVE_NO_FLUSH);
6689 if (!ret) {
6690 return block_rsv;
6691 } else if (ret && block_rsv != global_rsv) {
6692 ret = block_rsv_use_bytes(global_rsv, blocksize);
6693 if (!ret)
6694 return global_rsv;
6695 }
6696 } 6701 }
6697 6702 return ERR_PTR(ret);
6698 return ERR_PTR(-ENOSPC);
6699} 6703}
6700 6704
6701static void unuse_block_rsv(struct btrfs_fs_info *fs_info, 6705static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6763,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6763 extent_op->update_key = 1; 6767 extent_op->update_key = 1;
6764 extent_op->update_flags = 1; 6768 extent_op->update_flags = 1;
6765 extent_op->is_data = 0; 6769 extent_op->is_data = 0;
6770 extent_op->level = level;
6766 6771
6767 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 6772 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6768 ins.objectid, 6773 ins.objectid,
@@ -6934,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6934 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 6939 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
6935 BUG_ON(ret); /* -ENOMEM */ 6940 BUG_ON(ret); /* -ENOMEM */
6936 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 6941 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
6937 eb->len, flag, 0); 6942 eb->len, flag,
6943 btrfs_header_level(eb), 0);
6938 BUG_ON(ret); /* -ENOMEM */ 6944 BUG_ON(ret); /* -ENOMEM */
6939 wc->flags[level] |= flag; 6945 wc->flags[level] |= flag;
6940 } 6946 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e93..e7e7afb4a872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
23 23
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
26 27
27#ifdef CONFIG_BTRFS_DEBUG 28#ifdef CONFIG_BTRFS_DEBUG
28static LIST_HEAD(buffers); 29static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
125 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 126 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
126 if (!extent_buffer_cache) 127 if (!extent_buffer_cache)
127 goto free_state_cache; 128 goto free_state_cache;
129
130 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
131 offsetof(struct btrfs_io_bio, bio));
132 if (!btrfs_bioset)
133 goto free_buffer_cache;
128 return 0; 134 return 0;
129 135
136free_buffer_cache:
137 kmem_cache_destroy(extent_buffer_cache);
138 extent_buffer_cache = NULL;
139
130free_state_cache: 140free_state_cache:
131 kmem_cache_destroy(extent_state_cache); 141 kmem_cache_destroy(extent_state_cache);
142 extent_state_cache = NULL;
132 return -ENOMEM; 143 return -ENOMEM;
133} 144}
134 145
@@ -145,6 +156,8 @@ void extent_io_exit(void)
145 kmem_cache_destroy(extent_state_cache); 156 kmem_cache_destroy(extent_state_cache);
146 if (extent_buffer_cache) 157 if (extent_buffer_cache)
147 kmem_cache_destroy(extent_buffer_cache); 158 kmem_cache_destroy(extent_buffer_cache);
159 if (btrfs_bioset)
160 bioset_free(btrfs_bioset);
148} 161}
149 162
150void extent_io_tree_init(struct extent_io_tree *tree, 163void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1948} 1961}
1949 1962
1950/* 1963/*
1951 * helper function to unlock a page if all the extents in the tree
1952 * for that page are unlocked
1953 */
1954static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1955{
1956 u64 start = page_offset(page);
1957 u64 end = start + PAGE_CACHE_SIZE - 1;
1958 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1959 unlock_page(page);
1960}
1961
1962/*
1963 * helper function to end page writeback if all the extents
1964 * in the tree for that page are done with writeback
1965 */
1966static void check_page_writeback(struct extent_io_tree *tree,
1967 struct page *page)
1968{
1969 end_page_writeback(page);
1970}
1971
1972/*
1973 * When IO fails, either with EIO or csum verification fails, we 1964 * When IO fails, either with EIO or csum verification fails, we
1974 * try other mirrors that might have a good copy of the data. This 1965 * try other mirrors that might have a good copy of the data. This
1975 * io_failure_record is used to record state as we go through all the 1966 * io_failure_record is used to record state as we go through all the
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2046 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) 2037 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2047 return 0; 2038 return 0;
2048 2039
2049 bio = bio_alloc(GFP_NOFS, 1); 2040 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2050 if (!bio) 2041 if (!bio)
2051 return -EIO; 2042 return -EIO;
2052 bio->bi_private = &compl; 2043 bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2336 return -EIO; 2327 return -EIO;
2337 } 2328 }
2338 2329
2339 bio = bio_alloc(GFP_NOFS, 1); 2330 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2340 if (!bio) { 2331 if (!bio) {
2341 free_io_failure(inode, failrec, 0); 2332 free_io_failure(inode, failrec, 0);
2342 return -EIO; 2333 return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2398 struct extent_io_tree *tree; 2389 struct extent_io_tree *tree;
2399 u64 start; 2390 u64 start;
2400 u64 end; 2391 u64 end;
2401 int whole_page;
2402 2392
2403 do { 2393 do {
2404 struct page *page = bvec->bv_page; 2394 struct page *page = bvec->bv_page;
2405 tree = &BTRFS_I(page->mapping->host)->io_tree; 2395 tree = &BTRFS_I(page->mapping->host)->io_tree;
2406 2396
2407 start = page_offset(page) + bvec->bv_offset; 2397 /* We always issue full-page reads, but if some block
2408 end = start + bvec->bv_len - 1; 2398 * in a page fails to read, blk_update_request() will
2399 * advance bv_offset and adjust bv_len to compensate.
2400 * Print a warning for nonzero offsets, and an error
2401 * if they don't add up to a full page. */
2402 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2403 printk("%s page write in btrfs with offset %u and length %u\n",
2404 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2405 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2406 bvec->bv_offset, bvec->bv_len);
2409 2407
2410 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2408 start = page_offset(page);
2411 whole_page = 1; 2409 end = start + bvec->bv_offset + bvec->bv_len - 1;
2412 else
2413 whole_page = 0;
2414 2410
2415 if (--bvec >= bio->bi_io_vec) 2411 if (--bvec >= bio->bi_io_vec)
2416 prefetchw(&bvec->bv_page->flags); 2412 prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2418 if (end_extent_writepage(page, err, start, end)) 2414 if (end_extent_writepage(page, err, start, end))
2419 continue; 2415 continue;
2420 2416
2421 if (whole_page) 2417 end_page_writeback(page);
2422 end_page_writeback(page);
2423 else
2424 check_page_writeback(tree, page);
2425 } while (bvec >= bio->bi_io_vec); 2418 } while (bvec >= bio->bi_io_vec);
2426 2419
2427 bio_put(bio); 2420 bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2446 struct extent_io_tree *tree; 2439 struct extent_io_tree *tree;
2447 u64 start; 2440 u64 start;
2448 u64 end; 2441 u64 end;
2449 int whole_page;
2450 int mirror; 2442 int mirror;
2451 int ret; 2443 int ret;
2452 2444
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2457 struct page *page = bvec->bv_page; 2449 struct page *page = bvec->bv_page;
2458 struct extent_state *cached = NULL; 2450 struct extent_state *cached = NULL;
2459 struct extent_state *state; 2451 struct extent_state *state;
2452 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2460 2453
2461 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2454 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2462 "mirror=%ld\n", (u64)bio->bi_sector, err, 2455 "mirror=%lu\n", (u64)bio->bi_sector, err,
2463 (long int)bio->bi_bdev); 2456 io_bio->mirror_num);
2464 tree = &BTRFS_I(page->mapping->host)->io_tree; 2457 tree = &BTRFS_I(page->mapping->host)->io_tree;
2465 2458
2466 start = page_offset(page) + bvec->bv_offset; 2459 /* We always issue full-page reads, but if some block
2467 end = start + bvec->bv_len - 1; 2460 * in a page fails to read, blk_update_request() will
2461 * advance bv_offset and adjust bv_len to compensate.
2462 * Print a warning for nonzero offsets, and an error
2463 * if they don't add up to a full page. */
2464 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2465 printk("%s page read in btrfs with offset %u and length %u\n",
2466 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2467 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2468 bvec->bv_offset, bvec->bv_len);
2468 2469
2469 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2470 start = page_offset(page);
2470 whole_page = 1; 2471 end = start + bvec->bv_offset + bvec->bv_len - 1;
2471 else
2472 whole_page = 0;
2473 2472
2474 if (++bvec <= bvec_end) 2473 if (++bvec <= bvec_end)
2475 prefetchw(&bvec->bv_page->flags); 2474 prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2485 } 2484 }
2486 spin_unlock(&tree->lock); 2485 spin_unlock(&tree->lock);
2487 2486
2488 mirror = (int)(unsigned long)bio->bi_bdev; 2487 mirror = io_bio->mirror_num;
2489 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2488 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2490 ret = tree->ops->readpage_end_io_hook(page, start, end, 2489 ret = tree->ops->readpage_end_io_hook(page, start, end,
2491 state, mirror); 2490 state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2528 } 2527 }
2529 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); 2528 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2530 2529
2531 if (whole_page) { 2530 if (uptodate) {
2532 if (uptodate) { 2531 SetPageUptodate(page);
2533 SetPageUptodate(page);
2534 } else {
2535 ClearPageUptodate(page);
2536 SetPageError(page);
2537 }
2538 unlock_page(page);
2539 } else { 2532 } else {
2540 if (uptodate) { 2533 ClearPageUptodate(page);
2541 check_page_uptodate(tree, page); 2534 SetPageError(page);
2542 } else {
2543 ClearPageUptodate(page);
2544 SetPageError(page);
2545 }
2546 check_page_locked(tree, page);
2547 } 2535 }
2536 unlock_page(page);
2548 } while (bvec <= bvec_end); 2537 } while (bvec <= bvec_end);
2549 2538
2550 bio_put(bio); 2539 bio_put(bio);
2551} 2540}
2552 2541
2542/*
2543 * this allocates from the btrfs_bioset. We're returning a bio right now
2544 * but you can call btrfs_io_bio for the appropriate container_of magic
2545 */
2553struct bio * 2546struct bio *
2554btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2547btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2555 gfp_t gfp_flags) 2548 gfp_t gfp_flags)
2556{ 2549{
2557 struct bio *bio; 2550 struct bio *bio;
2558 2551
2559 bio = bio_alloc(gfp_flags, nr_vecs); 2552 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2560 2553
2561 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 2554 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2562 while (!bio && (nr_vecs /= 2)) 2555 while (!bio && (nr_vecs /= 2)) {
2563 bio = bio_alloc(gfp_flags, nr_vecs); 2556 bio = bio_alloc_bioset(gfp_flags,
2557 nr_vecs, btrfs_bioset);
2558 }
2564 } 2559 }
2565 2560
2566 if (bio) { 2561 if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2571 return bio; 2566 return bio;
2572} 2567}
2573 2568
2569struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2570{
2571 return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2572}
2573
2574
2575/* this also allocates from the btrfs_bioset */
2576struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2577{
2578 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2579}
2580
2581
2574static int __must_check submit_one_bio(int rw, struct bio *bio, 2582static int __must_check submit_one_bio(int rw, struct bio *bio,
2575 int mirror_num, unsigned long bio_flags) 2583 int mirror_num, unsigned long bio_flags)
2576{ 2584{
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3988 last_for_get_extent = isize; 3996 last_for_get_extent = isize;
3989 } 3997 }
3990 3998
3991 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3999 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
3992 &cached_state); 4000 &cached_state);
3993 4001
3994 em = get_extent_skip_holes(inode, start, last_for_get_extent, 4002 em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4075out_free: 4083out_free:
4076 free_extent_map(em); 4084 free_extent_map(em);
4077out: 4085out:
4078 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, 4086 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4079 &cached_state, GFP_NOFS); 4087 &cached_state, GFP_NOFS);
4080 return ret; 4088 return ret;
4081} 4089}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c03a175009..41fb81e7ec53 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
336struct bio * 336struct bio *
337btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 337btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
338 gfp_t gfp_flags); 338 gfp_t gfp_flags);
339struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
340struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
339 341
340struct btrfs_fs_info; 342struct btrfs_fs_info;
341 343
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ecca6c7375a6..e53009657f0e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -197,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
197 block_group->key.objectid); 197 block_group->key.objectid);
198} 198}
199 199
200int btrfs_truncate_free_space_cache(struct btrfs_root *root, 200int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
201 struct btrfs_trans_handle *trans, 201 struct btrfs_block_rsv *rsv)
202 struct btrfs_path *path,
203 struct inode *inode)
204{ 202{
205 struct btrfs_block_rsv *rsv;
206 u64 needed_bytes; 203 u64 needed_bytes;
207 loff_t oldsize; 204 int ret;
208 int ret = 0;
209
210 rsv = trans->block_rsv;
211 trans->block_rsv = &root->fs_info->global_block_rsv;
212 205
213 /* 1 for slack space, 1 for updating the inode */ 206 /* 1 for slack space, 1 for updating the inode */
214 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 207 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
215 btrfs_calc_trans_metadata_size(root, 1); 208 btrfs_calc_trans_metadata_size(root, 1);
216 209
217 spin_lock(&trans->block_rsv->lock); 210 spin_lock(&rsv->lock);
218 if (trans->block_rsv->reserved < needed_bytes) { 211 if (rsv->reserved < needed_bytes)
219 spin_unlock(&trans->block_rsv->lock); 212 ret = -ENOSPC;
220 trans->block_rsv = rsv; 213 else
221 return -ENOSPC; 214 ret = 0;
222 } 215 spin_unlock(&rsv->lock);
223 spin_unlock(&trans->block_rsv->lock); 216 return 0;
217}
218
219int btrfs_truncate_free_space_cache(struct btrfs_root *root,
220 struct btrfs_trans_handle *trans,
221 struct btrfs_path *path,
222 struct inode *inode)
223{
224 loff_t oldsize;
225 int ret = 0;
224 226
225 oldsize = i_size_read(inode); 227 oldsize = i_size_read(inode);
226 btrfs_i_size_write(inode, 0); 228 btrfs_i_size_write(inode, 0);
@@ -232,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
232 */ 234 */
233 ret = btrfs_truncate_inode_items(trans, root, inode, 235 ret = btrfs_truncate_inode_items(trans, root, inode,
234 0, BTRFS_EXTENT_DATA_KEY); 236 0, BTRFS_EXTENT_DATA_KEY);
235
236 if (ret) { 237 if (ret) {
237 trans->block_rsv = rsv;
238 btrfs_abort_transaction(trans, root, ret); 238 btrfs_abort_transaction(trans, root, ret);
239 return ret; 239 return ret;
240 } 240 }
@@ -242,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
242 ret = btrfs_update_inode(trans, root, inode); 242 ret = btrfs_update_inode(trans, root, inode);
243 if (ret) 243 if (ret)
244 btrfs_abort_transaction(trans, root, ret); 244 btrfs_abort_transaction(trans, root, ret);
245 trans->block_rsv = rsv;
246 245
247 return ret; 246 return ret;
248} 247}
@@ -920,10 +919,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
920 919
921 /* Make sure we can fit our crcs into the first page */ 920 /* Make sure we can fit our crcs into the first page */
922 if (io_ctl.check_crcs && 921 if (io_ctl.check_crcs &&
923 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 922 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
924 WARN_ON(1);
925 goto out_nospc; 923 goto out_nospc;
926 }
927 924
928 io_ctl_set_generation(&io_ctl, trans->transid); 925 io_ctl_set_generation(&io_ctl, trans->transid);
929 926
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 4dc17d8809c7..8b7f19f44961 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
54 struct btrfs_block_group_cache *block_group, 54 struct btrfs_block_group_cache *block_group,
55 struct btrfs_path *path); 55 struct btrfs_path *path);
56 56
57int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
58 struct btrfs_block_rsv *rsv);
57int btrfs_truncate_free_space_cache(struct btrfs_root *root, 59int btrfs_truncate_free_space_cache(struct btrfs_root *root,
58 struct btrfs_trans_handle *trans, 60 struct btrfs_trans_handle *trans,
59 struct btrfs_path *path, 61 struct btrfs_path *path,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d26f67a59e36..2c66ddbbe670 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
429 num_bytes = trans->bytes_reserved; 429 num_bytes = trans->bytes_reserved;
430 /* 430 /*
431 * 1 item for inode item insertion if need 431 * 1 item for inode item insertion if need
432 * 3 items for inode item update (in the worst case) 432 * 4 items for inode item update (in the worst case)
433 * 1 items for slack space if we need do truncation
433 * 1 item for free space object 434 * 1 item for free space object
434 * 3 items for pre-allocation 435 * 3 items for pre-allocation
435 */ 436 */
436 trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); 437 trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
437 ret = btrfs_block_rsv_add(root, trans->block_rsv, 438 ret = btrfs_block_rsv_add(root, trans->block_rsv,
438 trans->bytes_reserved, 439 trans->bytes_reserved,
439 BTRFS_RESERVE_NO_FLUSH); 440 BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
468 if (i_size_read(inode) > 0) { 469 if (i_size_read(inode) > 0) {
469 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 470 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
470 if (ret) { 471 if (ret) {
471 btrfs_abort_transaction(trans, root, ret); 472 if (ret != -ENOSPC)
473 btrfs_abort_transaction(trans, root, ret);
472 goto out_put; 474 goto out_put;
473 } 475 }
474 } 476 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9b31b3b091fc..af978f7682b3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -715,8 +715,10 @@ retry:
715 async_extent->ram_size - 1, 0); 715 async_extent->ram_size - 1, 0);
716 716
717 em = alloc_extent_map(); 717 em = alloc_extent_map();
718 if (!em) 718 if (!em) {
719 ret = -ENOMEM;
719 goto out_free_reserve; 720 goto out_free_reserve;
721 }
720 em->start = async_extent->start; 722 em->start = async_extent->start;
721 em->len = async_extent->ram_size; 723 em->len = async_extent->ram_size;
722 em->orig_start = em->start; 724 em->orig_start = em->start;
@@ -923,8 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
923 } 925 }
924 926
925 em = alloc_extent_map(); 927 em = alloc_extent_map();
926 if (!em) 928 if (!em) {
929 ret = -ENOMEM;
927 goto out_reserve; 930 goto out_reserve;
931 }
928 em->start = start; 932 em->start = start;
929 em->orig_start = em->start; 933 em->orig_start = em->start;
930 ram_size = ins.offset; 934 ram_size = ins.offset;
@@ -4724,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
4724 btrfs_end_transaction(trans, root); 4728 btrfs_end_transaction(trans, root);
4725 btrfs_btree_balance_dirty(root); 4729 btrfs_btree_balance_dirty(root);
4726no_delete: 4730no_delete:
4731 btrfs_remove_delayed_node(inode);
4727 clear_inode(inode); 4732 clear_inode(inode);
4728 return; 4733 return;
4729} 4734}
@@ -4839,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
4839 struct rb_node **p; 4844 struct rb_node **p;
4840 struct rb_node *parent; 4845 struct rb_node *parent;
4841 u64 ino = btrfs_ino(inode); 4846 u64 ino = btrfs_ino(inode);
4842again:
4843 p = &root->inode_tree.rb_node;
4844 parent = NULL;
4845 4847
4846 if (inode_unhashed(inode)) 4848 if (inode_unhashed(inode))
4847 return; 4849 return;
4848 4850again:
4851 parent = NULL;
4849 spin_lock(&root->inode_lock); 4852 spin_lock(&root->inode_lock);
4853 p = &root->inode_tree.rb_node;
4850 while (*p) { 4854 while (*p) {
4851 parent = *p; 4855 parent = *p;
4852 entry = rb_entry(parent, struct btrfs_inode, rb_node); 4856 entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -6928,7 +6932,11 @@ struct btrfs_dio_private {
6928 /* IO errors */ 6932 /* IO errors */
6929 int errors; 6933 int errors;
6930 6934
6935 /* orig_bio is our btrfs_io_bio */
6931 struct bio *orig_bio; 6936 struct bio *orig_bio;
6937
6938 /* dio_bio came from fs/direct-io.c */
6939 struct bio *dio_bio;
6932}; 6940};
6933 6941
6934static void btrfs_endio_direct_read(struct bio *bio, int err) 6942static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6938,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6938 struct bio_vec *bvec = bio->bi_io_vec; 6946 struct bio_vec *bvec = bio->bi_io_vec;
6939 struct inode *inode = dip->inode; 6947 struct inode *inode = dip->inode;
6940 struct btrfs_root *root = BTRFS_I(inode)->root; 6948 struct btrfs_root *root = BTRFS_I(inode)->root;
6949 struct bio *dio_bio;
6941 u64 start; 6950 u64 start;
6942 6951
6943 start = dip->logical_offset; 6952 start = dip->logical_offset;
@@ -6977,14 +6986,15 @@ failed:
6977 6986
6978 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6987 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
6979 dip->logical_offset + dip->bytes - 1); 6988 dip->logical_offset + dip->bytes - 1);
6980 bio->bi_private = dip->private; 6989 dio_bio = dip->dio_bio;
6981 6990
6982 kfree(dip); 6991 kfree(dip);
6983 6992
6984 /* If we had a csum failure make sure to clear the uptodate flag */ 6993 /* If we had a csum failure make sure to clear the uptodate flag */
6985 if (err) 6994 if (err)
6986 clear_bit(BIO_UPTODATE, &bio->bi_flags); 6995 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
6987 dio_end_io(bio, err); 6996 dio_end_io(dio_bio, err);
6997 bio_put(bio);
6988} 6998}
6989 6999
6990static void btrfs_endio_direct_write(struct bio *bio, int err) 7000static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6995,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
6995 struct btrfs_ordered_extent *ordered = NULL; 7005 struct btrfs_ordered_extent *ordered = NULL;
6996 u64 ordered_offset = dip->logical_offset; 7006 u64 ordered_offset = dip->logical_offset;
6997 u64 ordered_bytes = dip->bytes; 7007 u64 ordered_bytes = dip->bytes;
7008 struct bio *dio_bio;
6998 int ret; 7009 int ret;
6999 7010
7000 if (err) 7011 if (err)
@@ -7022,14 +7033,15 @@ out_test:
7022 goto again; 7033 goto again;
7023 } 7034 }
7024out_done: 7035out_done:
7025 bio->bi_private = dip->private; 7036 dio_bio = dip->dio_bio;
7026 7037
7027 kfree(dip); 7038 kfree(dip);
7028 7039
7029 /* If we had an error make sure to clear the uptodate flag */ 7040 /* If we had an error make sure to clear the uptodate flag */
7030 if (err) 7041 if (err)
7031 clear_bit(BIO_UPTODATE, &bio->bi_flags); 7042 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
7032 dio_end_io(bio, err); 7043 dio_end_io(dio_bio, err);
7044 bio_put(bio);
7033} 7045}
7034 7046
7035static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, 7047static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7065,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
7065 if (!atomic_dec_and_test(&dip->pending_bios)) 7077 if (!atomic_dec_and_test(&dip->pending_bios))
7066 goto out; 7078 goto out;
7067 7079
7068 if (dip->errors) 7080 if (dip->errors) {
7069 bio_io_error(dip->orig_bio); 7081 bio_io_error(dip->orig_bio);
7070 else { 7082 } else {
7071 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); 7083 set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
7072 bio_endio(dip->orig_bio, 0); 7084 bio_endio(dip->orig_bio, 0);
7073 } 7085 }
7074out: 7086out:
@@ -7243,25 +7255,34 @@ out_err:
7243 return 0; 7255 return 0;
7244} 7256}
7245 7257
7246static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 7258static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7247 loff_t file_offset) 7259 struct inode *inode, loff_t file_offset)
7248{ 7260{
7249 struct btrfs_root *root = BTRFS_I(inode)->root; 7261 struct btrfs_root *root = BTRFS_I(inode)->root;
7250 struct btrfs_dio_private *dip; 7262 struct btrfs_dio_private *dip;
7251 struct bio_vec *bvec = bio->bi_io_vec; 7263 struct bio_vec *bvec = dio_bio->bi_io_vec;
7264 struct bio *io_bio;
7252 int skip_sum; 7265 int skip_sum;
7253 int write = rw & REQ_WRITE; 7266 int write = rw & REQ_WRITE;
7254 int ret = 0; 7267 int ret = 0;
7255 7268
7256 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7269 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7257 7270
7271 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7272
7273 if (!io_bio) {
7274 ret = -ENOMEM;
7275 goto free_ordered;
7276 }
7277
7258 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7278 dip = kmalloc(sizeof(*dip), GFP_NOFS);
7259 if (!dip) { 7279 if (!dip) {
7260 ret = -ENOMEM; 7280 ret = -ENOMEM;
7261 goto free_ordered; 7281 goto free_io_bio;
7262 } 7282 }
7263 7283
7264 dip->private = bio->bi_private; 7284 dip->private = dio_bio->bi_private;
7285 io_bio->bi_private = dio_bio->bi_private;
7265 dip->inode = inode; 7286 dip->inode = inode;
7266 dip->logical_offset = file_offset; 7287 dip->logical_offset = file_offset;
7267 7288
@@ -7269,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
7269 do { 7290 do {
7270 dip->bytes += bvec->bv_len; 7291 dip->bytes += bvec->bv_len;
7271 bvec++; 7292 bvec++;
7272 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); 7293 } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
7273 7294
7274 dip->disk_bytenr = (u64)bio->bi_sector << 9; 7295 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
7275 bio->bi_private = dip; 7296 io_bio->bi_private = dip;
7276 dip->errors = 0; 7297 dip->errors = 0;
7277 dip->orig_bio = bio; 7298 dip->orig_bio = io_bio;
7299 dip->dio_bio = dio_bio;
7278 atomic_set(&dip->pending_bios, 0); 7300 atomic_set(&dip->pending_bios, 0);
7279 7301
7280 if (write) 7302 if (write)
7281 bio->bi_end_io = btrfs_endio_direct_write; 7303 io_bio->bi_end_io = btrfs_endio_direct_write;
7282 else 7304 else
7283 bio->bi_end_io = btrfs_endio_direct_read; 7305 io_bio->bi_end_io = btrfs_endio_direct_read;
7284 7306
7285 ret = btrfs_submit_direct_hook(rw, dip, skip_sum); 7307 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
7286 if (!ret) 7308 if (!ret)
7287 return; 7309 return;
7310
7311free_io_bio:
7312 bio_put(io_bio);
7313
7288free_ordered: 7314free_ordered:
7289 /* 7315 /*
7290 * If this is a write, we need to clean up the reserved space and kill 7316 * If this is a write, we need to clean up the reserved space and kill
@@ -7300,7 +7326,7 @@ free_ordered:
7300 btrfs_put_ordered_extent(ordered); 7326 btrfs_put_ordered_extent(ordered);
7301 btrfs_put_ordered_extent(ordered); 7327 btrfs_put_ordered_extent(ordered);
7302 } 7328 }
7303 bio_endio(bio, ret); 7329 bio_endio(dio_bio, ret);
7304} 7330}
7305 7331
7306static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, 7332static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7979,7 +8005,6 @@ void btrfs_destroy_inode(struct inode *inode)
7979 inode_tree_del(inode); 8005 inode_tree_del(inode);
7980 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 8006 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
7981free: 8007free:
7982 btrfs_remove_delayed_node(inode);
7983 call_rcu(&inode->i_rcu, btrfs_i_callback); 8008 call_rcu(&inode->i_rcu, btrfs_i_callback);
7984} 8009}
7985 8010
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de4a2fcfb24..0f81d67cdc8d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1801,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1801 item_off = btrfs_item_ptr_offset(leaf, i); 1801 item_off = btrfs_item_ptr_offset(leaf, i);
1802 item_len = btrfs_item_size_nr(leaf, i); 1802 item_len = btrfs_item_size_nr(leaf, i);
1803 1803
1804 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1804 btrfs_item_key_to_cpu(leaf, key, i);
1805 if (!key_in_sk(key, sk))
1806 continue;
1807
1808 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
1805 item_len = 0; 1809 item_len = 0;
1806 1810
1807 if (sizeof(sh) + item_len + *sk_offset > 1811 if (sizeof(sh) + item_len + *sk_offset >
@@ -1810,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1810 goto overflow; 1814 goto overflow;
1811 } 1815 }
1812 1816
1813 btrfs_item_key_to_cpu(leaf, key, i);
1814 if (!key_in_sk(key, sk))
1815 continue;
1816
1817 sh.objectid = key->objectid; 1817 sh.objectid = key->objectid;
1818 sh.offset = key->offset; 1818 sh.offset = key->offset;
1819 sh.type = key->type; 1819 sh.type = key->type;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0740621daf6c..0525e1389f5b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1050 } 1050 }
1051 1051
1052 /* put a new bio on the list */ 1052 /* put a new bio on the list */
1053 bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1); 1053 bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
1054 if (!bio) 1054 if (!bio)
1055 return -ENOMEM; 1055 return -ENOMEM;
1056 1056
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8d2a2b..395b82031a42 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1773,7 +1773,7 @@ again:
1773 if (!eb || !extent_buffer_uptodate(eb)) { 1773 if (!eb || !extent_buffer_uptodate(eb)) {
1774 ret = (!eb) ? -ENOMEM : -EIO; 1774 ret = (!eb) ? -ENOMEM : -EIO;
1775 free_extent_buffer(eb); 1775 free_extent_buffer(eb);
1776 return ret; 1776 break;
1777 } 1777 }
1778 btrfs_tree_lock(eb); 1778 btrfs_tree_lock(eb);
1779 if (cow) { 1779 if (cow) {
@@ -3350,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3350 } 3350 }
3351 3351
3352truncate: 3352truncate:
3353 ret = btrfs_check_trunc_cache_free_space(root,
3354 &fs_info->global_block_rsv);
3355 if (ret)
3356 goto out;
3357
3353 path = btrfs_alloc_path(); 3358 path = btrfs_alloc_path();
3354 if (!path) { 3359 if (!path) {
3355 ret = -ENOMEM; 3360 ret = -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f489e24659a4..79bd479317cb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1296 } 1296 }
1297 1297
1298 WARN_ON(!page->page); 1298 WARN_ON(!page->page);
1299 bio = bio_alloc(GFP_NOFS, 1); 1299 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1300 if (!bio) { 1300 if (!bio) {
1301 page->io_error = 1; 1301 page->io_error = 1;
1302 sblock->no_io_error_seen = 0; 1302 sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1431 return -EIO; 1431 return -EIO;
1432 } 1432 }
1433 1433
1434 bio = bio_alloc(GFP_NOFS, 1); 1434 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1435 if (!bio) 1435 if (!bio)
1436 return -EIO; 1436 return -EIO;
1437 bio->bi_bdev = page_bad->dev->bdev; 1437 bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@ again:
1522 sbio->dev = wr_ctx->tgtdev; 1522 sbio->dev = wr_ctx->tgtdev;
1523 bio = sbio->bio; 1523 bio = sbio->bio;
1524 if (!bio) { 1524 if (!bio) {
1525 bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); 1525 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1526 if (!bio) { 1526 if (!bio) {
1527 mutex_unlock(&wr_ctx->wr_lock); 1527 mutex_unlock(&wr_ctx->wr_lock);
1528 return -ENOMEM; 1528 return -ENOMEM;
@@ -1930,7 +1930,7 @@ again:
1930 sbio->dev = spage->dev; 1930 sbio->dev = spage->dev;
1931 bio = sbio->bio; 1931 bio = sbio->bio;
1932 if (!bio) { 1932 if (!bio) {
1933 bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); 1933 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1934 if (!bio) 1934 if (!bio)
1935 return -ENOMEM; 1935 return -ENOMEM;
1936 sbio->bio = bio; 1936 sbio->bio = bio;
@@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); 3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3308 return -EIO; 3308 return -EIO;
3309 } 3309 }
3310 bio = bio_alloc(GFP_NOFS, 1); 3310 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
3311 if (!bio) { 3311 if (!bio) {
3312 spin_lock(&sctx->stat_lock); 3312 spin_lock(&sctx->stat_lock);
3313 sctx->stat.malloc_errors++; 3313 sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a4807ced23cc..f0857e092a3c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1263,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1263 1263
1264 btrfs_dev_replace_suspend_for_unmount(fs_info); 1264 btrfs_dev_replace_suspend_for_unmount(fs_info);
1265 btrfs_scrub_cancel(fs_info); 1265 btrfs_scrub_cancel(fs_info);
1266 btrfs_pause_balance(fs_info);
1266 1267
1267 ret = btrfs_commit_super(root); 1268 ret = btrfs_commit_super(root);
1268 if (ret) 1269 if (ret)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e925ced971b..8bffb9174afb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3120,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3120 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 3120 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3121 if (num_devices == 1) 3121 if (num_devices == 1)
3122 allowed |= BTRFS_BLOCK_GROUP_DUP; 3122 allowed |= BTRFS_BLOCK_GROUP_DUP;
3123 else if (num_devices < 4) 3123 else if (num_devices > 1)
3124 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); 3124 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
3125 else 3125 if (num_devices > 2)
3126 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 3126 allowed |= BTRFS_BLOCK_GROUP_RAID5;
3127 BTRFS_BLOCK_GROUP_RAID10 | 3127 if (num_devices > 3)
3128 BTRFS_BLOCK_GROUP_RAID5 | 3128 allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
3129 BTRFS_BLOCK_GROUP_RAID6); 3129 BTRFS_BLOCK_GROUP_RAID6);
3130
3131 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3130 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3132 (!alloc_profile_is_valid(bctl->data.target, 1) || 3131 (!alloc_profile_is_valid(bctl->data.target, 1) ||
3133 (bctl->data.target & ~allowed))) { 3132 (bctl->data.target & ~allowed))) {
@@ -5019,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5019 return 0; 5018 return 0;
5020} 5019}
5021 5020
5022static void *merge_stripe_index_into_bio_private(void *bi_private,
5023 unsigned int stripe_index)
5024{
5025 /*
5026 * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
5027 * at most 1.
5028 * The alternative solution (instead of stealing bits from the
5029 * pointer) would be to allocate an intermediate structure
5030 * that contains the old private pointer plus the stripe_index.
5031 */
5032 BUG_ON((((uintptr_t)bi_private) & 3) != 0);
5033 BUG_ON(stripe_index > 3);
5034 return (void *)(((uintptr_t)bi_private) | stripe_index);
5035}
5036
5037static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
5038{
5039 return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
5040}
5041
5042static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
5043{
5044 return (unsigned int)((uintptr_t)bi_private) & 3;
5045}
5046
5047static void btrfs_end_bio(struct bio *bio, int err) 5021static void btrfs_end_bio(struct bio *bio, int err)
5048{ 5022{
5049 struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); 5023 struct btrfs_bio *bbio = bio->bi_private;
5050 int is_orig_bio = 0; 5024 int is_orig_bio = 0;
5051 5025
5052 if (err) { 5026 if (err) {
5053 atomic_inc(&bbio->error); 5027 atomic_inc(&bbio->error);
5054 if (err == -EIO || err == -EREMOTEIO) { 5028 if (err == -EIO || err == -EREMOTEIO) {
5055 unsigned int stripe_index = 5029 unsigned int stripe_index =
5056 extract_stripe_index_from_bio_private( 5030 btrfs_io_bio(bio)->stripe_index;
5057 bio->bi_private);
5058 struct btrfs_device *dev; 5031 struct btrfs_device *dev;
5059 5032
5060 BUG_ON(stripe_index >= bbio->num_stripes); 5033 BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5084,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
5084 } 5057 }
5085 bio->bi_private = bbio->private; 5058 bio->bi_private = bbio->private;
5086 bio->bi_end_io = bbio->end_io; 5059 bio->bi_end_io = bbio->end_io;
5087 bio->bi_bdev = (struct block_device *) 5060 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5088 (unsigned long)bbio->mirror_num;
5089 /* only send an error to the higher layers if it is 5061 /* only send an error to the higher layers if it is
5090 * beyond the tolerance of the btrfs bio 5062 * beyond the tolerance of the btrfs bio
5091 */ 5063 */
@@ -5211,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
5211 struct btrfs_device *dev = bbio->stripes[dev_nr].dev; 5183 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
5212 5184
5213 bio->bi_private = bbio; 5185 bio->bi_private = bbio;
5214 bio->bi_private = merge_stripe_index_into_bio_private( 5186 btrfs_io_bio(bio)->stripe_index = dev_nr;
5215 bio->bi_private, (unsigned int)dev_nr);
5216 bio->bi_end_io = btrfs_end_bio; 5187 bio->bi_end_io = btrfs_end_bio;
5217 bio->bi_sector = physical >> 9; 5188 bio->bi_sector = physical >> 9;
5218#ifdef DEBUG 5189#ifdef DEBUG
@@ -5273,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5273 if (atomic_dec_and_test(&bbio->stripes_pending)) { 5244 if (atomic_dec_and_test(&bbio->stripes_pending)) {
5274 bio->bi_private = bbio->private; 5245 bio->bi_private = bbio->private;
5275 bio->bi_end_io = bbio->end_io; 5246 bio->bi_end_io = bbio->end_io;
5276 bio->bi_bdev = (struct block_device *) 5247 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5277 (unsigned long)bbio->mirror_num;
5278 bio->bi_sector = logical >> 9; 5248 bio->bi_sector = logical >> 9;
5279 kfree(bbio); 5249 kfree(bbio);
5280 bio_endio(bio, -EIO); 5250 bio_endio(bio, -EIO);
@@ -5352,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5352 } 5322 }
5353 5323
5354 if (dev_nr < total_devs - 1) { 5324 if (dev_nr < total_devs - 1) {
5355 bio = bio_clone(first_bio, GFP_NOFS); 5325 bio = btrfs_bio_clone(first_bio, GFP_NOFS);
5356 BUG_ON(!bio); /* -ENOMEM */ 5326 BUG_ON(!bio); /* -ENOMEM */
5357 } else { 5327 } else {
5358 bio = first_bio; 5328 bio = first_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 845ccbb0d2e3..f6247e2a47f7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
152 int rotating; 152 int rotating;
153}; 153};
154 154
155/*
156 * we need the mirror number and stripe index to be passed around
157 * the call chain while we are processing end_io (especially errors).
158 * Really, what we need is a btrfs_bio structure that has this info
159 * and is properly sized with its stripe array, but we're not there
160 * quite yet. We have our own btrfs bioset, and all of the bios
161 * we allocate are actually btrfs_io_bios. We'll cram as much of
162 * struct btrfs_bio as we can into this over time.
163 */
164struct btrfs_io_bio {
165 unsigned long mirror_num;
166 unsigned long stripe_index;
167 struct bio bio;
168};
169
170static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
171{
172 return container_of(bio, struct btrfs_io_bio, bio);
173}
174
155struct btrfs_bio_stripe { 175struct btrfs_bio_stripe {
156 struct btrfs_device *dev; 176 struct btrfs_device *dev;
157 u64 physical; 177 u64 physical;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8e33ec65847b..58df174deb10 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/vfs.h> 19#include <linux/vfs.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/inet.h>
21#include "cifsglob.h" 22#include "cifsglob.h"
22#include "cifsproto.h" 23#include "cifsproto.h"
23#include "cifsfs.h" 24#include "cifsfs.h"
@@ -48,58 +49,74 @@ void cifs_dfs_release_automount_timer(void)
48} 49}
49 50
50/** 51/**
51 * cifs_get_share_name - extracts share name from UNC 52 * cifs_build_devname - build a devicename from a UNC and optional prepath
52 * @node_name: pointer to UNC string 53 * @nodename: pointer to UNC string
54 * @prepath: pointer to prefixpath (or NULL if there isn't one)
53 * 55 *
54 * Extracts sharename form full UNC. 56 * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer
55 * i.e. strips from UNC trailing path that is not part of share 57 * big enough to hold the final thing. Copy the UNC from the nodename, and
56 * name and fixup missing '\' in the beginning of DFS node refferal 58 * concatenate the prepath onto the end of it if there is one.
57 * if necessary. 59 *
58 * Returns pointer to share name on success or ERR_PTR on error. 60 * Returns pointer to the built string, or a ERR_PTR. Caller is responsible
59 * Caller is responsible for freeing returned string. 61 * for freeing the returned string.
60 */ 62 */
61static char *cifs_get_share_name(const char *node_name) 63static char *
64cifs_build_devname(char *nodename, const char *prepath)
62{ 65{
63 int len; 66 size_t pplen;
64 char *UNC; 67 size_t unclen;
65 char *pSep; 68 char *dev;
66 69 char *pos;
67 len = strlen(node_name); 70
68 UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, 71 /* skip over any preceding delimiters */
69 GFP_KERNEL); 72 nodename += strspn(nodename, "\\");
70 if (!UNC) 73 if (!*nodename)
71 return ERR_PTR(-ENOMEM); 74 return ERR_PTR(-EINVAL);
72 75
73 /* get share name and server name */ 76 /* get length of UNC and set pos to last char */
74 if (node_name[1] != '\\') { 77 unclen = strlen(nodename);
75 UNC[0] = '\\'; 78 pos = nodename + unclen - 1;
76 strncpy(UNC+1, node_name, len);
77 len++;
78 UNC[len] = 0;
79 } else {
80 strncpy(UNC, node_name, len);
81 UNC[len] = 0;
82 }
83 79
84 /* find server name end */ 80 /* trim off any trailing delimiters */
85 pSep = memchr(UNC+2, '\\', len-2); 81 while (*pos == '\\') {
86 if (!pSep) { 82 --pos;
87 cifs_dbg(VFS, "%s: no server name end in node name: %s\n", 83 --unclen;
88 __func__, node_name);
89 kfree(UNC);
90 return ERR_PTR(-EINVAL);
91 } 84 }
92 85
93 /* find sharename end */ 86 /* allocate a buffer:
94 pSep++; 87 * +2 for preceding "//"
95 pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC)); 88 * +1 for delimiter between UNC and prepath
96 if (pSep) { 89 * +1 for trailing NULL
97 /* trim path up to sharename end 90 */
98 * now we have share name in UNC */ 91 pplen = prepath ? strlen(prepath) : 0;
99 *pSep = 0; 92 dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL);
93 if (!dev)
94 return ERR_PTR(-ENOMEM);
95
96 pos = dev;
97 /* add the initial "//" */
98 *pos = '/';
99 ++pos;
100 *pos = '/';
101 ++pos;
102
103 /* copy in the UNC portion from referral */
104 memcpy(pos, nodename, unclen);
105 pos += unclen;
106
107 /* copy the prefixpath remainder (if there is one) */
108 if (pplen) {
109 *pos = '/';
110 ++pos;
111 memcpy(pos, prepath, pplen);
112 pos += pplen;
100 } 113 }
101 114
102 return UNC; 115 /* NULL terminator */
116 *pos = '\0';
117
118 convert_delimiter(dev, '/');
119 return dev;
103} 120}
104 121
105 122
@@ -123,6 +140,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
123{ 140{
124 int rc; 141 int rc;
125 char *mountdata = NULL; 142 char *mountdata = NULL;
143 const char *prepath = NULL;
126 int md_len; 144 int md_len;
127 char *tkn_e; 145 char *tkn_e;
128 char *srvIP = NULL; 146 char *srvIP = NULL;
@@ -132,7 +150,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
132 if (sb_mountdata == NULL) 150 if (sb_mountdata == NULL)
133 return ERR_PTR(-EINVAL); 151 return ERR_PTR(-EINVAL);
134 152
135 *devname = cifs_get_share_name(ref->node_name); 153 if (strlen(fullpath) - ref->path_consumed)
154 prepath = fullpath + ref->path_consumed;
155
156 *devname = cifs_build_devname(ref->node_name, prepath);
136 if (IS_ERR(*devname)) { 157 if (IS_ERR(*devname)) {
137 rc = PTR_ERR(*devname); 158 rc = PTR_ERR(*devname);
138 *devname = NULL; 159 *devname = NULL;
@@ -146,12 +167,14 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
146 goto compose_mount_options_err; 167 goto compose_mount_options_err;
147 } 168 }
148 169
149 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 170 /*
150 * assuming that we have 'unc=' and 'ip=' in 171 * In most cases, we'll be building a shorter string than the original,
151 * the original sb_mountdata 172 * but we do have to assume that the address in the ip= option may be
173 * much longer than the original. Add the max length of an address
174 * string to the length of the original string to allow for worst case.
152 */ 175 */
153 md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; 176 md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
154 mountdata = kzalloc(md_len+1, GFP_KERNEL); 177 mountdata = kzalloc(md_len + 1, GFP_KERNEL);
155 if (mountdata == NULL) { 178 if (mountdata == NULL) {
156 rc = -ENOMEM; 179 rc = -ENOMEM;
157 goto compose_mount_options_err; 180 goto compose_mount_options_err;
@@ -195,26 +218,6 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
195 strncat(mountdata, &sep, 1); 218 strncat(mountdata, &sep, 1);
196 strcat(mountdata, "ip="); 219 strcat(mountdata, "ip=");
197 strcat(mountdata, srvIP); 220 strcat(mountdata, srvIP);
198 strncat(mountdata, &sep, 1);
199 strcat(mountdata, "unc=");
200 strcat(mountdata, *devname);
201
202 /* find & copy prefixpath */
203 tkn_e = strchr(ref->node_name + 2, '\\');
204 if (tkn_e == NULL) {
205 /* invalid unc, missing share name*/
206 rc = -EINVAL;
207 goto compose_mount_options_err;
208 }
209
210 tkn_e = strchr(tkn_e + 1, '\\');
211 if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
212 strncat(mountdata, &sep, 1);
213 strcat(mountdata, "prefixpath=");
214 if (tkn_e)
215 strcat(mountdata, tkn_e + 1);
216 strcat(mountdata, fullpath + ref->path_consumed);
217 }
218 221
219 /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ 222 /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
220 /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ 223 /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72e4efee1389..3752b9f6d9e4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -372,9 +372,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
372 cifs_show_security(s, tcon->ses->server); 372 cifs_show_security(s, tcon->ses->server);
373 cifs_show_cache_flavor(s, cifs_sb); 373 cifs_show_cache_flavor(s, cifs_sb);
374 374
375 seq_printf(s, ",unc=");
376 seq_escape(s, tcon->treeName, " \t\n\\");
377
378 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 375 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
379 seq_printf(s, ",multiuser"); 376 seq_printf(s, ",multiuser");
380 else if (tcon->ses->user_name) 377 else if (tcon->ses->user_name)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 99eeaa17ee00..5b97e56ddbca 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1061,6 +1061,7 @@ static int cifs_parse_security_flavors(char *value,
1061#endif 1061#endif
1062 case Opt_sec_none: 1062 case Opt_sec_none:
1063 vol->nullauth = 1; 1063 vol->nullauth = 1;
1064 vol->secFlg |= CIFSSEC_MAY_NTLM;
1064 break; 1065 break;
1065 default: 1066 default:
1066 cifs_dbg(VFS, "bad security option: %s\n", value); 1067 cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -1257,14 +1258,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1257 vol->backupuid_specified = false; /* no backup intent for a user */ 1258 vol->backupuid_specified = false; /* no backup intent for a user */
1258 vol->backupgid_specified = false; /* no backup intent for a group */ 1259 vol->backupgid_specified = false; /* no backup intent for a group */
1259 1260
1260 /* 1261 switch (cifs_parse_devname(devname, vol)) {
1261 * For now, we ignore -EINVAL errors under the assumption that the 1262 case 0:
1262 * unc= and prefixpath= options will be usable. 1263 break;
1263 */ 1264 case -ENOMEM:
1264 if (cifs_parse_devname(devname, vol) == -ENOMEM) { 1265 cifs_dbg(VFS, "Unable to allocate memory for devname.\n");
1265 printk(KERN_ERR "CIFS: Unable to allocate memory to parse " 1266 goto cifs_parse_mount_err;
1266 "device string.\n"); 1267 case -EINVAL:
1267 goto out_nomem; 1268 cifs_dbg(VFS, "Malformed UNC in devname.\n");
1269 goto cifs_parse_mount_err;
1270 default:
1271 cifs_dbg(VFS, "Unknown error parsing devname.\n");
1272 goto cifs_parse_mount_err;
1268 } 1273 }
1269 1274
1270 while ((data = strsep(&options, separator)) != NULL) { 1275 while ((data = strsep(&options, separator)) != NULL) {
@@ -1826,7 +1831,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1826 } 1831 }
1827#endif 1832#endif
1828 if (!vol->UNC) { 1833 if (!vol->UNC) {
1829 cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n"); 1834 cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
1830 goto cifs_parse_mount_err; 1835 goto cifs_parse_mount_err;
1831 } 1836 }
1832 1837
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index e7512e497611..7ede7306599f 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -34,7 +34,7 @@
34 34
35/** 35/**
36 * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. 36 * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
37 * @unc: UNC path specifying the server 37 * @unc: UNC path specifying the server (with '/' as delimiter)
38 * @ip_addr: Where to return the IP address. 38 * @ip_addr: Where to return the IP address.
39 * 39 *
40 * The IP address will be returned in string form, and the caller is 40 * The IP address will be returned in string form, and the caller is
@@ -64,7 +64,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
64 hostname = unc + 2; 64 hostname = unc + 2;
65 65
66 /* Search for server name delimiter */ 66 /* Search for server name delimiter */
67 sep = memchr(hostname, '\\', len); 67 sep = memchr(hostname, '/', len);
68 if (sep) 68 if (sep)
69 len = sep - hostname; 69 len = sep - hostname;
70 else 70 else
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fc3025199cb3..20efd81266c6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -171,7 +171,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
171 171
172 if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) 172 if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
173 inode->i_flags |= S_AUTOMOUNT; 173 inode->i_flags |= S_AUTOMOUNT;
174 cifs_set_ops(inode); 174 if (inode->i_state & I_NEW)
175 cifs_set_ops(inode);
175} 176}
176 177
177void 178void
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index bfb531564319..8dd524f32284 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -44,8 +44,11 @@ static ssize_t efivarfs_file_write(struct file *file,
44 44
45 bytes = efivar_entry_set_get_size(var, attributes, &datasize, 45 bytes = efivar_entry_set_get_size(var, attributes, &datasize,
46 data, &set); 46 data, &set);
47 if (!set && bytes) 47 if (!set && bytes) {
48 if (bytes == -ENOENT)
49 bytes = -EIO;
48 goto out; 50 goto out;
51 }
49 52
50 if (bytes == -ENOENT) { 53 if (bytes == -ENOENT) {
51 drop_nlink(inode); 54 drop_nlink(inode);
@@ -76,7 +79,14 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
76 int err; 79 int err;
77 80
78 err = efivar_entry_size(var, &datasize); 81 err = efivar_entry_size(var, &datasize);
79 if (err) 82
83 /*
84 * efivarfs represents uncommitted variables with
85 * zero-length files. Reading them should return EOF.
86 */
87 if (err == -ENOENT)
88 return 0;
89 else if (err)
80 return err; 90 return err;
81 91
82 data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL); 92 data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0aabb344b02e..5aae3d12d400 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
209 ssize_t size; /* size of the extent */ 209 ssize_t size; /* size of the extent */
210 struct kiocb *iocb; /* iocb struct for AIO */ 210 struct kiocb *iocb; /* iocb struct for AIO */
211 int result; /* error value for AIO */ 211 int result; /* error value for AIO */
212 atomic_t count; /* reference counter */
213} ext4_io_end_t; 212} ext4_io_end_t;
214 213
215struct ext4_io_submit { 214struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2651 2650
2652/* page-io.c */ 2651/* page-io.c */
2653extern int __init ext4_init_pageio(void); 2652extern int __init ext4_init_pageio(void);
2653extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2654extern void ext4_exit_pageio(void); 2654extern void ext4_exit_pageio(void);
2655extern void ext4_ioend_shutdown(struct inode *); 2655extern void ext4_ioend_shutdown(struct inode *);
2656extern void ext4_free_io_end(ext4_io_end_t *io);
2656extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2657extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2657extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2658extern int ext4_put_io_end(ext4_io_end_t *io_end);
2659extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2660extern void ext4_io_submit_init(struct ext4_io_submit *io,
2661 struct writeback_control *wbc);
2662extern void ext4_end_io_work(struct work_struct *work); 2658extern void ext4_end_io_work(struct work_struct *work);
2663extern void ext4_io_submit(struct ext4_io_submit *io); 2659extern void ext4_io_submit(struct ext4_io_submit *io);
2664extern int ext4_bio_write_page(struct ext4_io_submit *io, 2660extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 107936db244e..bc0f1910b9cf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
3642{ 3642{
3643 struct extent_status es; 3643 struct extent_status es;
3644 3644
3645 ext4_es_find_delayed_extent(inode, lblk_start, &es); 3645 ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
3646 if (es.es_len == 0) 3646 if (es.es_len == 0)
3647 return 0; /* there is no delay extent in this tree */ 3647 return 0; /* there is no delay extent in this tree */
3648 else if (es.es_lblk <= lblk_start && 3648 else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
4608 struct extent_status es; 4608 struct extent_status es;
4609 ext4_lblk_t block, next_del; 4609 ext4_lblk_t block, next_del;
4610 4610
4611 ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4612
4613 if (newes->es_pblk == 0) { 4611 if (newes->es_pblk == 0) {
4612 ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
4613 newes->es_lblk + newes->es_len - 1, &es);
4614
4614 /* 4615 /*
4615 * No extent in extent-tree contains block @newes->es_pblk, 4616 * No extent in extent-tree contains block @newes->es_pblk,
4616 * then the block may stay in 1)a hole or 2)delayed-extent. 4617 * then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
4630 } 4631 }
4631 4632
4632 block = newes->es_lblk + newes->es_len; 4633 block = newes->es_lblk + newes->es_len;
4633 ext4_es_find_delayed_extent(inode, block, &es); 4634 ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
4634 if (es.es_len == 0) 4635 if (es.es_len == 0)
4635 next_del = EXT_MAX_BLOCKS; 4636 next_del = EXT_MAX_BLOCKS;
4636 else 4637 else
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fe3337a85ede..e6941e622d31 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
232} 232}
233 233
234/* 234/*
235 * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk 235 * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
236 * if it exists, otherwise, the next extent after @es->lblk. 236 * @es->lblk if it exists, otherwise, the next extent after @es->lblk.
237 * 237 *
238 * @inode: the inode which owns delayed extents 238 * @inode: the inode which owns delayed extents
239 * @lblk: the offset where we start to search 239 * @lblk: the offset where we start to search
240 * @end: the offset where we stop to search
240 * @es: delayed extent that we found 241 * @es: delayed extent that we found
241 */ 242 */
242void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 243void ext4_es_find_delayed_extent_range(struct inode *inode,
244 ext4_lblk_t lblk, ext4_lblk_t end,
243 struct extent_status *es) 245 struct extent_status *es)
244{ 246{
245 struct ext4_es_tree *tree = NULL; 247 struct ext4_es_tree *tree = NULL;
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
247 struct rb_node *node; 249 struct rb_node *node;
248 250
249 BUG_ON(es == NULL); 251 BUG_ON(es == NULL);
250 trace_ext4_es_find_delayed_extent_enter(inode, lblk); 252 BUG_ON(end < lblk);
253 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
251 254
252 read_lock(&EXT4_I(inode)->i_es_lock); 255 read_lock(&EXT4_I(inode)->i_es_lock);
253 tree = &EXT4_I(inode)->i_es_tree; 256 tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ out:
270 if (es1 && !ext4_es_is_delayed(es1)) { 273 if (es1 && !ext4_es_is_delayed(es1)) {
271 while ((node = rb_next(&es1->rb_node)) != NULL) { 274 while ((node = rb_next(&es1->rb_node)) != NULL) {
272 es1 = rb_entry(node, struct extent_status, rb_node); 275 es1 = rb_entry(node, struct extent_status, rb_node);
276 if (es1->es_lblk > end) {
277 es1 = NULL;
278 break;
279 }
273 if (ext4_es_is_delayed(es1)) 280 if (ext4_es_is_delayed(es1))
274 break; 281 break;
275 } 282 }
@@ -285,7 +292,7 @@ out:
285 read_unlock(&EXT4_I(inode)->i_es_lock); 292 read_unlock(&EXT4_I(inode)->i_es_lock);
286 293
287 ext4_es_lru_add(inode); 294 ext4_es_lru_add(inode);
288 trace_ext4_es_find_delayed_extent_exit(inode, es); 295 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
289} 296}
290 297
291static struct extent_status * 298static struct extent_status *
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d8e2d4dc311e..f740eb03b707 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
62 unsigned long long status); 62 unsigned long long status);
63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
64 ext4_lblk_t len); 64 ext4_lblk_t len);
65extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 65extern void ext4_es_find_delayed_extent_range(struct inode *inode,
66 ext4_lblk_t lblk, ext4_lblk_t end,
66 struct extent_status *es); 67 struct extent_status *es);
67extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 68extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
68 struct extent_status *es); 69 struct extent_status *es);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4959e29573b6..b1b4d51b5d86 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -465,7 +465,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
465 * If there is a delay extent at this offset, 465 * If there is a delay extent at this offset,
466 * it will be as a data. 466 * it will be as a data.
467 */ 467 */
468 ext4_es_find_delayed_extent(inode, last, &es); 468 ext4_es_find_delayed_extent_range(inode, last, last, &es);
469 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 469 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
470 if (last != start) 470 if (last != start)
471 dataoff = last << blkbits; 471 dataoff = last << blkbits;
@@ -548,7 +548,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
548 * If there is a delay extent at this offset, 548 * If there is a delay extent at this offset,
549 * we will skip this extent. 549 * we will skip this extent.
550 */ 550 */
551 ext4_es_find_delayed_extent(inode, last, &es); 551 ext4_es_find_delayed_extent_range(inode, last, last, &es);
552 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 552 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
553 last = es.es_lblk + es.es_len; 553 last = es.es_lblk + es.es_len;
554 holeoff = last << blkbits; 554 holeoff = last << blkbits;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0723774bdfb5..d6382b89ecbd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1488,10 +1488,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1488 struct ext4_io_submit io_submit; 1488 struct ext4_io_submit io_submit;
1489 1489
1490 BUG_ON(mpd->next_page <= mpd->first_page); 1490 BUG_ON(mpd->next_page <= mpd->first_page);
1491 ext4_io_submit_init(&io_submit, mpd->wbc); 1491 memset(&io_submit, 0, sizeof(io_submit));
1492 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1493 if (!io_submit.io_end)
1494 return -ENOMEM;
1495 /* 1492 /*
1496 * We need to start from the first_page to the next_page - 1 1493 * We need to start from the first_page to the next_page - 1
1497 * to make sure we also write the mapped dirty buffer_heads. 1494 * to make sure we also write the mapped dirty buffer_heads.
@@ -1579,8 +1576,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1579 pagevec_release(&pvec); 1576 pagevec_release(&pvec);
1580 } 1577 }
1581 ext4_io_submit(&io_submit); 1578 ext4_io_submit(&io_submit);
1582 /* Drop io_end reference we got from init */
1583 ext4_put_io_end_defer(io_submit.io_end);
1584 return ret; 1579 return ret;
1585} 1580}
1586 1581
@@ -2239,16 +2234,9 @@ static int ext4_writepage(struct page *page,
2239 */ 2234 */
2240 return __ext4_journalled_writepage(page, len); 2235 return __ext4_journalled_writepage(page, len);
2241 2236
2242 ext4_io_submit_init(&io_submit, wbc); 2237 memset(&io_submit, 0, sizeof(io_submit));
2243 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2244 if (!io_submit.io_end) {
2245 redirty_page_for_writepage(wbc, page);
2246 return -ENOMEM;
2247 }
2248 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2238 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2249 ext4_io_submit(&io_submit); 2239 ext4_io_submit(&io_submit);
2250 /* Drop io_end reference we got from init */
2251 ext4_put_io_end_defer(io_submit.io_end);
2252 return ret; 2240 return ret;
2253} 2241}
2254 2242
@@ -3079,13 +3067,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3079 struct inode *inode = file_inode(iocb->ki_filp); 3067 struct inode *inode = file_inode(iocb->ki_filp);
3080 ext4_io_end_t *io_end = iocb->private; 3068 ext4_io_end_t *io_end = iocb->private;
3081 3069
3082 /* if not async direct IO just return */ 3070 /* if not async direct IO or dio with 0 bytes write, just return */
3083 if (!io_end) { 3071 if (!io_end || !size)
3084 inode_dio_done(inode); 3072 goto out;
3085 if (is_async)
3086 aio_complete(iocb, ret, 0);
3087 return;
3088 }
3089 3073
3090 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3074 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3091 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3075 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3093,13 +3077,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3093 size); 3077 size);
3094 3078
3095 iocb->private = NULL; 3079 iocb->private = NULL;
3080
3081 /* if not aio dio with unwritten extents, just free io and return */
3082 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3083 ext4_free_io_end(io_end);
3084out:
3085 inode_dio_done(inode);
3086 if (is_async)
3087 aio_complete(iocb, ret, 0);
3088 return;
3089 }
3090
3096 io_end->offset = offset; 3091 io_end->offset = offset;
3097 io_end->size = size; 3092 io_end->size = size;
3098 if (is_async) { 3093 if (is_async) {
3099 io_end->iocb = iocb; 3094 io_end->iocb = iocb;
3100 io_end->result = ret; 3095 io_end->result = ret;
3101 } 3096 }
3102 ext4_put_io_end_defer(io_end); 3097
3098 ext4_add_complete_io(io_end);
3103} 3099}
3104 3100
3105/* 3101/*
@@ -3133,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3133 get_block_t *get_block_func = NULL; 3129 get_block_t *get_block_func = NULL;
3134 int dio_flags = 0; 3130 int dio_flags = 0;
3135 loff_t final_size = offset + count; 3131 loff_t final_size = offset + count;
3136 ext4_io_end_t *io_end = NULL;
3137 3132
3138 /* Use the old path for reads and writes beyond i_size. */ 3133 /* Use the old path for reads and writes beyond i_size. */
3139 if (rw != WRITE || final_size > inode->i_size) 3134 if (rw != WRITE || final_size > inode->i_size)
@@ -3172,16 +3167,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3172 iocb->private = NULL; 3167 iocb->private = NULL;
3173 ext4_inode_aio_set(inode, NULL); 3168 ext4_inode_aio_set(inode, NULL);
3174 if (!is_sync_kiocb(iocb)) { 3169 if (!is_sync_kiocb(iocb)) {
3175 io_end = ext4_init_io_end(inode, GFP_NOFS); 3170 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
3176 if (!io_end) { 3171 if (!io_end) {
3177 ret = -ENOMEM; 3172 ret = -ENOMEM;
3178 goto retake_lock; 3173 goto retake_lock;
3179 } 3174 }
3180 io_end->flag |= EXT4_IO_END_DIRECT; 3175 io_end->flag |= EXT4_IO_END_DIRECT;
3181 /* 3176 iocb->private = io_end;
3182 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3183 */
3184 iocb->private = ext4_get_io_end(io_end);
3185 /* 3177 /*
3186 * we save the io structure for current async direct 3178 * we save the io structure for current async direct
3187 * IO, so that later ext4_map_blocks() could flag the 3179 * IO, so that later ext4_map_blocks() could flag the
@@ -3205,27 +3197,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3205 NULL, 3197 NULL,
3206 dio_flags); 3198 dio_flags);
3207 3199
3200 if (iocb->private)
3201 ext4_inode_aio_set(inode, NULL);
3208 /* 3202 /*
3209 * Put our reference to io_end. This can free the io_end structure e.g. 3203 * The io_end structure takes a reference to the inode, that
3210 * in sync IO case or in case of error. It can even perform extent 3204 * structure needs to be destroyed and the reference to the
3211 * conversion if all bios we submitted finished before we got here. 3205 * inode need to be dropped, when IO is complete, even with 0
3212 * Note that in that case iocb->private can be already set to NULL 3206 * byte write, or failed.
3213 * here. 3207 *
3208 * In the successful AIO DIO case, the io_end structure will
3209 * be destroyed and the reference to the inode will be dropped
3210 * after the end_io call back function is called.
3211 *
3212 * In the case there is 0 byte write, or error case, since VFS
3213 * direct IO won't invoke the end_io call back function, we
3214 * need to free the end_io structure here.
3214 */ 3215 */
3215 if (io_end) { 3216 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3216 ext4_inode_aio_set(inode, NULL); 3217 ext4_free_io_end(iocb->private);
3217 ext4_put_io_end(io_end); 3218 iocb->private = NULL;
3218 /* 3219 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3219 * In case of error or no write ext4_end_io_dio() was not
3220 * called so we have to put iocb's reference.
3221 */
3222 if (ret <= 0 && ret != -EIOCBQUEUED) {
3223 WARN_ON(iocb->private != io_end);
3224 ext4_put_io_end(io_end);
3225 iocb->private = NULL;
3226 }
3227 }
3228 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3229 EXT4_STATE_DIO_UNWRITTEN)) { 3220 EXT4_STATE_DIO_UNWRITTEN)) {
3230 int err; 3221 int err;
3231 /* 3222 /*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1ed9e07434b..def84082a9a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2105,7 +2105,11 @@ repeat:
2105 group = ac->ac_g_ex.fe_group; 2105 group = ac->ac_g_ex.fe_group;
2106 2106
2107 for (i = 0; i < ngroups; group++, i++) { 2107 for (i = 0; i < ngroups; group++, i++) {
2108 if (group == ngroups) 2108 /*
2109 * Artificially restricted ngroups for non-extent
2110 * files makes group > ngroups possible on first loop.
2111 */
2112 if (group >= ngroups)
2109 group = 0; 2113 group = 0;
2110 2114
2111 /* This now checks without needing the buddy page */ 2115 /* This now checks without needing the buddy page */
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 19599bded62a..4acf1f78881b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,28 +62,15 @@ void ext4_ioend_shutdown(struct inode *inode)
62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
63} 63}
64 64
65static void ext4_release_io_end(ext4_io_end_t *io_end) 65void ext4_free_io_end(ext4_io_end_t *io)
66{ 66{
67 BUG_ON(!list_empty(&io_end->list)); 67 BUG_ON(!io);
68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 68 BUG_ON(!list_empty(&io->list));
69 69 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
70 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
71 wake_up_all(ext4_ioend_wq(io_end->inode));
72 if (io_end->flag & EXT4_IO_END_DIRECT)
73 inode_dio_done(io_end->inode);
74 if (io_end->iocb)
75 aio_complete(io_end->iocb, io_end->result, 0);
76 kmem_cache_free(io_end_cachep, io_end);
77}
78
79static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
80{
81 struct inode *inode = io_end->inode;
82 70
83 io_end->flag &= ~EXT4_IO_END_UNWRITTEN; 71 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
84 /* Wake up anyone waiting on unwritten extent conversion */ 72 wake_up_all(ext4_ioend_wq(io->inode));
85 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 73 kmem_cache_free(io_end_cachep, io);
86 wake_up_all(ext4_ioend_wq(inode));
87} 74}
88 75
89/* check a range of space and convert unwritten extents to written. */ 76/* check a range of space and convert unwritten extents to written. */
@@ -106,8 +93,13 @@ static int ext4_end_io(ext4_io_end_t *io)
106 "(inode %lu, offset %llu, size %zd, error %d)", 93 "(inode %lu, offset %llu, size %zd, error %d)",
107 inode->i_ino, offset, size, ret); 94 inode->i_ino, offset, size, ret);
108 } 95 }
109 ext4_clear_io_unwritten_flag(io); 96 /* Wake up anyone waiting on unwritten extent conversion */
110 ext4_release_io_end(io); 97 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
98 wake_up_all(ext4_ioend_wq(inode));
99 if (io->flag & EXT4_IO_END_DIRECT)
100 inode_dio_done(inode);
101 if (io->iocb)
102 aio_complete(io->iocb, io->result, 0);
111 return ret; 103 return ret;
112} 104}
113 105
@@ -138,7 +130,7 @@ static void dump_completed_IO(struct inode *inode)
138} 130}
139 131
140/* Add the io_end to per-inode completed end_io list. */ 132/* Add the io_end to per-inode completed end_io list. */
141static void ext4_add_complete_io(ext4_io_end_t *io_end) 133void ext4_add_complete_io(ext4_io_end_t *io_end)
142{ 134{
143 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 135 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
144 struct workqueue_struct *wq; 136 struct workqueue_struct *wq;
@@ -175,6 +167,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
175 err = ext4_end_io(io); 167 err = ext4_end_io(io);
176 if (unlikely(!ret && err)) 168 if (unlikely(!ret && err))
177 ret = err; 169 ret = err;
170 io->flag &= ~EXT4_IO_END_UNWRITTEN;
171 ext4_free_io_end(io);
178 } 172 }
179 return ret; 173 return ret;
180} 174}
@@ -206,43 +200,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
206 atomic_inc(&EXT4_I(inode)->i_ioend_count); 200 atomic_inc(&EXT4_I(inode)->i_ioend_count);
207 io->inode = inode; 201 io->inode = inode;
208 INIT_LIST_HEAD(&io->list); 202 INIT_LIST_HEAD(&io->list);
209 atomic_set(&io->count, 1);
210 } 203 }
211 return io; 204 return io;
212} 205}
213 206
214void ext4_put_io_end_defer(ext4_io_end_t *io_end)
215{
216 if (atomic_dec_and_test(&io_end->count)) {
217 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
218 ext4_release_io_end(io_end);
219 return;
220 }
221 ext4_add_complete_io(io_end);
222 }
223}
224
225int ext4_put_io_end(ext4_io_end_t *io_end)
226{
227 int err = 0;
228
229 if (atomic_dec_and_test(&io_end->count)) {
230 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231 err = ext4_convert_unwritten_extents(io_end->inode,
232 io_end->offset, io_end->size);
233 ext4_clear_io_unwritten_flag(io_end);
234 }
235 ext4_release_io_end(io_end);
236 }
237 return err;
238}
239
240ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
241{
242 atomic_inc(&io_end->count);
243 return io_end;
244}
245
246/* 207/*
247 * Print an buffer I/O error compatible with the fs/buffer.c. This 208 * Print an buffer I/O error compatible with the fs/buffer.c. This
248 * provides compatibility with dmesg scrapers that look for a specific 209 * provides compatibility with dmesg scrapers that look for a specific
@@ -325,7 +286,12 @@ static void ext4_end_bio(struct bio *bio, int error)
325 bi_sector >> (inode->i_blkbits - 9)); 286 bi_sector >> (inode->i_blkbits - 9));
326 } 287 }
327 288
328 ext4_put_io_end_defer(io_end); 289 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
290 ext4_free_io_end(io_end);
291 return;
292 }
293
294 ext4_add_complete_io(io_end);
329} 295}
330 296
331void ext4_io_submit(struct ext4_io_submit *io) 297void ext4_io_submit(struct ext4_io_submit *io)
@@ -339,37 +305,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
339 bio_put(io->io_bio); 305 bio_put(io->io_bio);
340 } 306 }
341 io->io_bio = NULL; 307 io->io_bio = NULL;
342} 308 io->io_op = 0;
343
344void ext4_io_submit_init(struct ext4_io_submit *io,
345 struct writeback_control *wbc)
346{
347 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
348 io->io_bio = NULL;
349 io->io_end = NULL; 309 io->io_end = NULL;
350} 310}
351 311
352static int io_submit_init_bio(struct ext4_io_submit *io, 312static int io_submit_init(struct ext4_io_submit *io,
353 struct buffer_head *bh) 313 struct inode *inode,
314 struct writeback_control *wbc,
315 struct buffer_head *bh)
354{ 316{
317 ext4_io_end_t *io_end;
318 struct page *page = bh->b_page;
355 int nvecs = bio_get_nr_vecs(bh->b_bdev); 319 int nvecs = bio_get_nr_vecs(bh->b_bdev);
356 struct bio *bio; 320 struct bio *bio;
357 321
322 io_end = ext4_init_io_end(inode, GFP_NOFS);
323 if (!io_end)
324 return -ENOMEM;
358 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 325 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
359 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 326 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
360 bio->bi_bdev = bh->b_bdev; 327 bio->bi_bdev = bh->b_bdev;
328 bio->bi_private = io->io_end = io_end;
361 bio->bi_end_io = ext4_end_bio; 329 bio->bi_end_io = ext4_end_bio;
362 bio->bi_private = ext4_get_io_end(io->io_end); 330
363 if (!io->io_end->size) 331 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
364 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) 332
365 + bh_offset(bh);
366 io->io_bio = bio; 333 io->io_bio = bio;
334 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
367 io->io_next_block = bh->b_blocknr; 335 io->io_next_block = bh->b_blocknr;
368 return 0; 336 return 0;
369} 337}
370 338
371static int io_submit_add_bh(struct ext4_io_submit *io, 339static int io_submit_add_bh(struct ext4_io_submit *io,
372 struct inode *inode, 340 struct inode *inode,
341 struct writeback_control *wbc,
373 struct buffer_head *bh) 342 struct buffer_head *bh)
374{ 343{
375 ext4_io_end_t *io_end; 344 ext4_io_end_t *io_end;
@@ -380,18 +349,18 @@ submit_and_retry:
380 ext4_io_submit(io); 349 ext4_io_submit(io);
381 } 350 }
382 if (io->io_bio == NULL) { 351 if (io->io_bio == NULL) {
383 ret = io_submit_init_bio(io, bh); 352 ret = io_submit_init(io, inode, wbc, bh);
384 if (ret) 353 if (ret)
385 return ret; 354 return ret;
386 } 355 }
387 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
388 if (ret != bh->b_size)
389 goto submit_and_retry;
390 io_end = io->io_end; 356 io_end = io->io_end;
391 if (test_clear_buffer_uninit(bh)) 357 if (test_clear_buffer_uninit(bh))
392 ext4_set_io_unwritten_flag(inode, io_end); 358 ext4_set_io_unwritten_flag(inode, io_end);
393 io_end->size += bh->b_size; 359 io->io_end->size += bh->b_size;
394 io->io_next_block++; 360 io->io_next_block++;
361 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
362 if (ret != bh->b_size)
363 goto submit_and_retry;
395 return 0; 364 return 0;
396} 365}
397 366
@@ -463,7 +432,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
463 do { 432 do {
464 if (!buffer_async_write(bh)) 433 if (!buffer_async_write(bh))
465 continue; 434 continue;
466 ret = io_submit_add_bh(io, inode, bh); 435 ret = io_submit_add_bh(io, inode, wbc, bh);
467 if (ret) { 436 if (ret) {
468 /* 437 /*
469 * We only get here on ENOMEM. Not much else 438 * We only get here on ENOMEM. Not much else
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index dfce656ddb33..5d4513cb1b3c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1229,6 +1229,19 @@ static int fat_read_root(struct inode *inode)
1229 return 0; 1229 return 0;
1230} 1230}
1231 1231
1232static unsigned long calc_fat_clusters(struct super_block *sb)
1233{
1234 struct msdos_sb_info *sbi = MSDOS_SB(sb);
1235
1236 /* Divide first to avoid overflow */
1237 if (sbi->fat_bits != 12) {
1238 unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
1239 return ent_per_sec * sbi->fat_length;
1240 }
1241
1242 return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
1243}
1244
1232/* 1245/*
1233 * Read the super block of an MS-DOS FS. 1246 * Read the super block of an MS-DOS FS.
1234 */ 1247 */
@@ -1434,7 +1447,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1434 sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; 1447 sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
1435 1448
1436 /* check that FAT table does not overflow */ 1449 /* check that FAT table does not overflow */
1437 fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; 1450 fat_clusters = calc_fat_clusters(sb);
1438 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1451 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
1439 if (total_clusters > MAX_FAT(sb)) { 1452 if (total_clusters > MAX_FAT(sb)) {
1440 if (!silent) 1453 if (!silent)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index eb08c9e43c2a..5a376ab81feb 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -26,7 +26,7 @@ config GFS2_FS
26config GFS2_FS_LOCKING_DLM 26config GFS2_FS_LOCKING_DLM
27 bool "GFS2 DLM locking" 27 bool "GFS2 DLM locking"
28 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ 28 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
29 HOTPLUG && DLM && CONFIGFS_FS && SYSFS 29 HOTPLUG && CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
30 help 30 help
31 Multiple node locking module for GFS2 31 Multiple node locking module for GFS2
32 32
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c5fa758fd844..68b4c8f1fce8 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -212,7 +212,7 @@ static void gfs2_end_log_write(struct bio *bio, int error)
212 fs_err(sdp, "Error %d writing to log\n", error); 212 fs_err(sdp, "Error %d writing to log\n", error);
213 } 213 }
214 214
215 bio_for_each_segment(bvec, bio, i) { 215 bio_for_each_segment_all(bvec, bio, i) {
216 page = bvec->bv_page; 216 page = bvec->bv_page;
217 if (page_has_buffers(page)) 217 if (page_has_buffers(page))
218 gfs2_end_log_write_bh(sdp, bvec, error); 218 gfs2_end_log_write_bh(sdp, bvec, error);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c7c840e916f8..c253b13722e8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -121,7 +121,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
121{ 121{
122 struct kqid qid = qd->qd_id; 122 struct kqid qid = qd->qd_id;
123 return (2 * (u64)from_kqid(&init_user_ns, qid)) + 123 return (2 * (u64)from_kqid(&init_user_ns, qid)) +
124 (qid.type == USRQUOTA) ? 0 : 1; 124 ((qid.type == USRQUOTA) ? 0 : 1);
125} 125}
126 126
127static u64 qd2offset(struct gfs2_quota_data *qd) 127static u64 qd2offset(struct gfs2_quota_data *qd)
@@ -721,7 +721,7 @@ get_a_page:
721 goto unlock_out; 721 goto unlock_out;
722 } 722 }
723 723
724 gfs2_trans_add_meta(ip->i_gl, bh); 724 gfs2_trans_add_data(ip->i_gl, bh);
725 725
726 kaddr = kmap_atomic(page); 726 kaddr = kmap_atomic(page);
727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0c5a575b513e..5232525934ae 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1401,9 +1401,14 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1401 u32 extlen; 1401 u32 extlen;
1402 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; 1402 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1403 int ret; 1403 int ret;
1404 struct inode *inode = &ip->i_inode;
1404 1405
1405 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); 1406 if (S_ISDIR(inode->i_mode))
1406 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); 1407 extlen = 1;
1408 else {
1409 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
1410 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1411 }
1407 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) 1412 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1408 return; 1413 return;
1409 1414
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index f3b1a15ccd59..d3fa6bd9503e 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -415,7 +415,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
415 spin_lock(&tree->hash_lock); 415 spin_lock(&tree->hash_lock);
416 node = hfs_bnode_findhash(tree, num); 416 node = hfs_bnode_findhash(tree, num);
417 spin_unlock(&tree->hash_lock); 417 spin_unlock(&tree->hash_lock);
418 BUG_ON(node); 418 if (node) {
419 pr_crit("new node %u already hashed?\n", num);
420 WARN_ON(1);
421 return node;
422 }
419 node = __hfs_bnode_create(tree, num); 423 node = __hfs_bnode_create(tree, num);
420 if (!node) 424 if (!node)
421 return ERR_PTR(-ENOMEM); 425 return ERR_PTR(-ENOMEM);
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 546f6d39713a..834ac13c04b7 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,25 +33,27 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
33 if (whence == SEEK_DATA || whence == SEEK_HOLE) 33 if (whence == SEEK_DATA || whence == SEEK_HOLE)
34 return -EINVAL; 34 return -EINVAL;
35 35
36 mutex_lock(&i->i_mutex);
36 hpfs_lock(s); 37 hpfs_lock(s);
37 38
38 /*printk("dir lseek\n");*/ 39 /*printk("dir lseek\n");*/
39 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; 40 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
40 mutex_lock(&i->i_mutex);
41 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; 41 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
42 while (pos != new_off) { 42 while (pos != new_off) {
43 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); 43 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
44 else goto fail; 44 else goto fail;
45 if (pos == 12) goto fail; 45 if (pos == 12) goto fail;
46 } 46 }
47 mutex_unlock(&i->i_mutex); 47 hpfs_add_pos(i, &filp->f_pos);
48ok: 48ok:
49 filp->f_pos = new_off;
49 hpfs_unlock(s); 50 hpfs_unlock(s);
50 return filp->f_pos = new_off;
51fail:
52 mutex_unlock(&i->i_mutex); 51 mutex_unlock(&i->i_mutex);
52 return new_off;
53fail:
53 /*printk("illegal lseek: %016llx\n", new_off);*/ 54 /*printk("illegal lseek: %016llx\n", new_off);*/
54 hpfs_unlock(s); 55 hpfs_unlock(s);
56 mutex_unlock(&i->i_mutex);
55 return -ESPIPE; 57 return -ESPIPE;
56} 58}
57 59
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a13d26ede254..0bc27684ebfa 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -414,7 +414,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
414 414
415 spin_lock(&tbl->slot_tbl_lock); 415 spin_lock(&tbl->slot_tbl_lock);
416 /* state manager is resetting the session */ 416 /* state manager is resetting the session */
417 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { 417 if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
418 spin_unlock(&tbl->slot_tbl_lock); 418 spin_unlock(&tbl->slot_tbl_lock);
419 status = htonl(NFS4ERR_DELAY); 419 status = htonl(NFS4ERR_DELAY);
420 /* Return NFS4ERR_BADSESSION if we're draining the session 420 /* Return NFS4ERR_BADSESSION if we're draining the session
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 59461c957d9d..a35582c9d444 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -763,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
763 * A single slot, so highest used slotid is either 0 or -1 763 * A single slot, so highest used slotid is either 0 or -1
764 */ 764 */
765 tbl->highest_used_slotid = NFS4_NO_SLOT; 765 tbl->highest_used_slotid = NFS4_NO_SLOT;
766 nfs4_session_drain_complete(session, tbl); 766 nfs4_slot_tbl_drain_complete(tbl);
767 spin_unlock(&tbl->slot_tbl_lock); 767 spin_unlock(&tbl->slot_tbl_lock);
768} 768}
769 769
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 947b0c908aa9..4cbad5d6b276 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -203,7 +203,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
203 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); 203 __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
204 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); 204 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
205 if (error == -EINVAL) 205 if (error == -EINVAL)
206 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_NULL); 206 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
207 if (error < 0) 207 if (error < 0)
208 goto error; 208 goto error;
209 209
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8fbc10054115..d7ba5616989c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -572,7 +572,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
572 task->tk_timeout = 0; 572 task->tk_timeout = 0;
573 573
574 spin_lock(&tbl->slot_tbl_lock); 574 spin_lock(&tbl->slot_tbl_lock);
575 if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && 575 if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
576 !args->sa_privileged) { 576 !args->sa_privileged) {
577 /* The state manager will wait until the slot table is empty */ 577 /* The state manager will wait until the slot table is empty */
578 dprintk("%s session is draining\n", __func__); 578 dprintk("%s session is draining\n", __func__);
@@ -1078,7 +1078,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1078 struct nfs4_state *state = opendata->state; 1078 struct nfs4_state *state = opendata->state;
1079 struct nfs_inode *nfsi = NFS_I(state->inode); 1079 struct nfs_inode *nfsi = NFS_I(state->inode);
1080 struct nfs_delegation *delegation; 1080 struct nfs_delegation *delegation;
1081 int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); 1081 int open_mode = opendata->o_arg.open_flags;
1082 fmode_t fmode = opendata->o_arg.fmode; 1082 fmode_t fmode = opendata->o_arg.fmode;
1083 nfs4_stateid stateid; 1083 nfs4_stateid stateid;
1084 int ret = -EAGAIN; 1084 int ret = -EAGAIN;
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index ebda5f4a031b..c4e225e4a9af 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -73,7 +73,7 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
73 tbl->highest_used_slotid = new_max; 73 tbl->highest_used_slotid = new_max;
74 else { 74 else {
75 tbl->highest_used_slotid = NFS4_NO_SLOT; 75 tbl->highest_used_slotid = NFS4_NO_SLOT;
76 nfs4_session_drain_complete(tbl->session, tbl); 76 nfs4_slot_tbl_drain_complete(tbl);
77 } 77 }
78 } 78 }
79 dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, 79 dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
@@ -226,7 +226,7 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
226 struct nfs4_slot *slot = pslot; 226 struct nfs4_slot *slot = pslot;
227 struct nfs4_slot_table *tbl = slot->table; 227 struct nfs4_slot_table *tbl = slot->table;
228 228
229 if (nfs4_session_draining(tbl->session) && !args->sa_privileged) 229 if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
230 return false; 230 return false;
231 slot->generation = tbl->generation; 231 slot->generation = tbl->generation;
232 args->sa_slot = slot; 232 args->sa_slot = slot;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 6f3cb39386d4..ff7d9f0f8a65 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -25,6 +25,10 @@ struct nfs4_slot {
25}; 25};
26 26
27/* Sessions */ 27/* Sessions */
28enum nfs4_slot_tbl_state {
29 NFS4_SLOT_TBL_DRAINING,
30};
31
28#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) 32#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
29struct nfs4_slot_table { 33struct nfs4_slot_table {
30 struct nfs4_session *session; /* Parent session */ 34 struct nfs4_session *session; /* Parent session */
@@ -43,6 +47,7 @@ struct nfs4_slot_table {
43 unsigned long generation; /* Generation counter for 47 unsigned long generation; /* Generation counter for
44 target_highest_slotid */ 48 target_highest_slotid */
45 struct completion complete; 49 struct completion complete;
50 unsigned long slot_tbl_state;
46}; 51};
47 52
48/* 53/*
@@ -68,7 +73,6 @@ struct nfs4_session {
68 73
69enum nfs4_session_state { 74enum nfs4_session_state {
70 NFS4_SESSION_INITING, 75 NFS4_SESSION_INITING,
71 NFS4_SESSION_DRAINING,
72}; 76};
73 77
74#if defined(CONFIG_NFS_V4_1) 78#if defined(CONFIG_NFS_V4_1)
@@ -88,12 +92,11 @@ extern void nfs4_destroy_session(struct nfs4_session *session);
88extern int nfs4_init_session(struct nfs_server *server); 92extern int nfs4_init_session(struct nfs_server *server);
89extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); 93extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
90 94
91extern void nfs4_session_drain_complete(struct nfs4_session *session, 95extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
92 struct nfs4_slot_table *tbl);
93 96
94static inline bool nfs4_session_draining(struct nfs4_session *session) 97static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
95{ 98{
96 return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); 99 return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
97} 100}
98 101
99bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, 102bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 300d17d85c0e..1fab140764c4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -241,7 +241,7 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
241 if (ses == NULL) 241 if (ses == NULL)
242 return; 242 return;
243 tbl = &ses->fc_slot_table; 243 tbl = &ses->fc_slot_table;
244 if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { 244 if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
245 spin_lock(&tbl->slot_tbl_lock); 245 spin_lock(&tbl->slot_tbl_lock);
246 nfs41_wake_slot_table(tbl); 246 nfs41_wake_slot_table(tbl);
247 spin_unlock(&tbl->slot_tbl_lock); 247 spin_unlock(&tbl->slot_tbl_lock);
@@ -251,15 +251,15 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
251/* 251/*
252 * Signal state manager thread if session fore channel is drained 252 * Signal state manager thread if session fore channel is drained
253 */ 253 */
254void nfs4_session_drain_complete(struct nfs4_session *session, 254void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
255 struct nfs4_slot_table *tbl)
256{ 255{
257 if (nfs4_session_draining(session)) 256 if (nfs4_slot_tbl_draining(tbl))
258 complete(&tbl->complete); 257 complete(&tbl->complete);
259} 258}
260 259
261static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) 260static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
262{ 261{
262 set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
263 spin_lock(&tbl->slot_tbl_lock); 263 spin_lock(&tbl->slot_tbl_lock);
264 if (tbl->highest_used_slotid != NFS4_NO_SLOT) { 264 if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
265 INIT_COMPLETION(tbl->complete); 265 INIT_COMPLETION(tbl->complete);
@@ -275,13 +275,12 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
275 struct nfs4_session *ses = clp->cl_session; 275 struct nfs4_session *ses = clp->cl_session;
276 int ret = 0; 276 int ret = 0;
277 277
278 set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
279 /* back channel */ 278 /* back channel */
280 ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table); 279 ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
281 if (ret) 280 if (ret)
282 return ret; 281 return ret;
283 /* fore channel */ 282 /* fore channel */
284 return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); 283 return nfs4_drain_slot_tbl(&ses->fc_slot_table);
285} 284}
286 285
287static void nfs41_finish_session_reset(struct nfs_client *clp) 286static void nfs41_finish_session_reset(struct nfs_client *clp)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a366107a7331..2d7525fbcf25 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1942,6 +1942,7 @@ static int nfs23_validate_mount_data(void *options,
1942 args->namlen = data->namlen; 1942 args->namlen = data->namlen;
1943 args->bsize = data->bsize; 1943 args->bsize = data->bsize;
1944 1944
1945 args->auth_flavors[0] = RPC_AUTH_UNIX;
1945 if (data->flags & NFS_MOUNT_SECFLAVOUR) 1946 if (data->flags & NFS_MOUNT_SECFLAVOUR)
1946 args->auth_flavors[0] = data->pseudoflavor; 1947 args->auth_flavors[0] = data->pseudoflavor;
1947 if (!args->nfs_server.hostname) 1948 if (!args->nfs_server.hostname)
@@ -2637,6 +2638,7 @@ static int nfs4_validate_mount_data(void *options,
2637 goto out_no_address; 2638 goto out_no_address;
2638 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); 2639 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2639 2640
2641 args->auth_flavors[0] = RPC_AUTH_UNIX;
2640 if (data->auth_flavourlen) { 2642 if (data->auth_flavourlen) {
2641 if (data->auth_flavourlen > 1) 2643 if (data->auth_flavourlen > 1)
2642 goto out_inval_auth; 2644 goto out_inval_auth;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 689fb608648e..bccfec8343c5 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -219,13 +219,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
219 219
220static int nilfs_set_page_dirty(struct page *page) 220static int nilfs_set_page_dirty(struct page *page)
221{ 221{
222 int ret = __set_page_dirty_buffers(page); 222 int ret = __set_page_dirty_nobuffers(page);
223 223
224 if (ret) { 224 if (page_has_buffers(page)) {
225 struct inode *inode = page->mapping->host; 225 struct inode *inode = page->mapping->host;
226 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); 226 unsigned nr_dirty = 0;
227 struct buffer_head *bh, *head;
227 228
228 nilfs_set_file_dirty(inode, nr_dirty); 229 /*
230 * This page is locked by callers, and no other thread
231 * concurrently marks its buffers dirty since they are
232 * only dirtied through routines in fs/buffer.c in
233 * which call sites of mark_buffer_dirty are protected
234 * by page lock.
235 */
236 bh = head = page_buffers(page);
237 do {
238 /* Do not mark hole blocks dirty */
239 if (buffer_dirty(bh) || !buffer_mapped(bh))
240 continue;
241
242 set_buffer_dirty(bh);
243 nr_dirty++;
244 } while (bh = bh->b_this_page, bh != head);
245
246 if (nr_dirty)
247 nilfs_set_file_dirty(inode, nr_dirty);
229 } 248 }
230 return ret; 249 return ret;
231} 250}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 1c39efb71bab..2487116d0d33 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -790,7 +790,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
790 &hole_size, &rec, &is_last); 790 &hole_size, &rec, &is_last);
791 if (ret) { 791 if (ret) {
792 mlog_errno(ret); 792 mlog_errno(ret);
793 goto out; 793 goto out_unlock;
794 } 794 }
795 795
796 if (rec.e_blkno == 0ULL) { 796 if (rec.e_blkno == 0ULL) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8a7509f9e6f5..ff54014a24ec 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2288,7 +2288,7 @@ relock:
2288 ret = ocfs2_inode_lock(inode, NULL, 1); 2288 ret = ocfs2_inode_lock(inode, NULL, 1);
2289 if (ret < 0) { 2289 if (ret < 0) {
2290 mlog_errno(ret); 2290 mlog_errno(ret);
2291 goto out_sems; 2291 goto out;
2292 } 2292 }
2293 2293
2294 ocfs2_inode_unlock(inode, 1); 2294 ocfs2_inode_unlock(inode, 1);
diff --git a/fs/pnode.c b/fs/pnode.c
index 3d2a7141b87a..9af0df15256e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -83,7 +83,8 @@ static int do_make_slave(struct mount *mnt)
83 if (peer_mnt == mnt) 83 if (peer_mnt == mnt)
84 peer_mnt = NULL; 84 peer_mnt = NULL;
85 } 85 }
86 if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share)) 86 if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
87 list_empty(&mnt->mnt_share))
87 mnt_release_group_id(mnt); 88 mnt_release_group_id(mnt);
88 89
89 list_del_init(&mnt->mnt_share); 90 list_del_init(&mnt->mnt_share);
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 8798d065e400..afa6be6fc397 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -120,7 +120,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
120 struct inode *inode = file_inode(filp); 120 struct inode *inode = file_inode(filp);
121 struct super_block *s = inode->i_sb; 121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s); 122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1); 123 loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
124 unsigned long npages = dir_pages(inode); 124 unsigned long npages = dir_pages(inode);
125 unsigned long n = pos >> PAGE_CACHE_SHIFT; 125 unsigned long n = pos >> PAGE_CACHE_SHIFT;
126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; 126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 66c53b642a88..6c2d136561cb 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -204,6 +204,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
204 next_pos = deh_offset(deh) + 1; 204 next_pos = deh_offset(deh) + 1;
205 205
206 if (item_moved(&tmp_ih, &path_to_entry)) { 206 if (item_moved(&tmp_ih, &path_to_entry)) {
207 set_cpu_key_k_offset(&pos_key,
208 next_pos);
207 goto research; 209 goto research;
208 } 210 }
209 } /* for */ 211 } /* for */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77d6d47abc83..f844533792ee 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1811,11 +1811,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1811 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1811 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1812 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); 1812 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1813 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); 1813 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1814 if (insert_inode_locked4(inode, args.objectid, 1814
1815 reiserfs_find_actor, &args) < 0) { 1815 reiserfs_write_unlock(inode->i_sb);
1816 err = insert_inode_locked4(inode, args.objectid,
1817 reiserfs_find_actor, &args);
1818 reiserfs_write_lock(inode->i_sb);
1819 if (err) {
1816 err = -EINVAL; 1820 err = -EINVAL;
1817 goto out_bad_inode; 1821 goto out_bad_inode;
1818 } 1822 }
1823
1819 if (old_format_only(sb)) 1824 if (old_format_only(sb))
1820 /* not a perfect generation count, as object ids can be reused, but 1825 /* not a perfect generation count, as object ids can be reused, but
1821 ** this is as good as reiserfs can do right now. 1826 ** this is as good as reiserfs can do right now.
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4cce1d9552fb..821bcf70e467 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -318,7 +318,19 @@ static int delete_one_xattr(struct dentry *dentry, void *data)
318static int chown_one_xattr(struct dentry *dentry, void *data) 318static int chown_one_xattr(struct dentry *dentry, void *data)
319{ 319{
320 struct iattr *attrs = data; 320 struct iattr *attrs = data;
321 return reiserfs_setattr(dentry, attrs); 321 int ia_valid = attrs->ia_valid;
322 int err;
323
324 /*
325 * We only want the ownership bits. Otherwise, we'll do
326 * things like change a directory to a regular file if
327 * ATTR_MODE is set.
328 */
329 attrs->ia_valid &= (ATTR_UID|ATTR_GID);
330 err = reiserfs_setattr(dentry, attrs);
331 attrs->ia_valid = ia_valid;
332
333 return err;
322} 334}
323 335
324/* No i_mutex, but the inode is unconnected. */ 336/* No i_mutex, but the inode is unconnected. */
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d7c01ef64eda..6c8767fdfc6a 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -443,6 +443,9 @@ int reiserfs_acl_chmod(struct inode *inode)
443 int depth; 443 int depth;
444 int error; 444 int error;
445 445
446 if (IS_PRIVATE(inode))
447 return 0;
448
446 if (S_ISLNK(inode->i_mode)) 449 if (S_ISLNK(inode->i_mode))
447 return -EOPNOTSUPP; 450 return -EOPNOTSUPP;
448 451
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2b2691b73428..41a695048be7 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -725,6 +725,25 @@ xfs_convert_page(
725 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 725 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
726 i_size_read(inode)); 726 i_size_read(inode));
727 727
728 /*
729 * If the current map does not span the entire page we are about to try
730 * to write, then give up. The only way we can write a page that spans
731 * multiple mappings in a single writeback iteration is via the
732 * xfs_vm_writepage() function. Data integrity writeback requires the
733 * entire page to be written in a single attempt, otherwise the part of
734 * the page we don't write here doesn't get written as part of the data
735 * integrity sync.
736 *
737 * For normal writeback, we also don't attempt to write partial pages
738 * here as it simply means that write_cache_pages() will see it under
739 * writeback and ignore the page until some point in the future, at
740 * which time this will be the only page in the file that needs
741 * writeback. Hence for more optimal IO patterns, we should always
742 * avoid partial page writeback due to multiple mappings on a page here.
743 */
744 if (!xfs_imap_valid(inode, imap, end_offset))
745 goto fail_unlock_page;
746
728 len = 1 << inode->i_blkbits; 747 len = 1 << inode->i_blkbits;
729 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 748 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
730 PAGE_CACHE_SIZE); 749 PAGE_CACHE_SIZE);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 08d5457c948e..d788302e506a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -931,20 +931,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
931 */ 931 */
932int 932int
933xfs_attr_shortform_allfit( 933xfs_attr_shortform_allfit(
934 struct xfs_buf *bp, 934 struct xfs_buf *bp,
935 struct xfs_inode *dp) 935 struct xfs_inode *dp)
936{ 936{
937 xfs_attr_leafblock_t *leaf; 937 struct xfs_attr_leafblock *leaf;
938 xfs_attr_leaf_entry_t *entry; 938 struct xfs_attr_leaf_entry *entry;
939 xfs_attr_leaf_name_local_t *name_loc; 939 xfs_attr_leaf_name_local_t *name_loc;
940 int bytes, i; 940 struct xfs_attr3_icleaf_hdr leafhdr;
941 int bytes;
942 int i;
941 943
942 leaf = bp->b_addr; 944 leaf = bp->b_addr;
943 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 945 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
946 entry = xfs_attr3_leaf_entryp(leaf);
944 947
945 entry = &leaf->entries[0];
946 bytes = sizeof(struct xfs_attr_sf_hdr); 948 bytes = sizeof(struct xfs_attr_sf_hdr);
947 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 949 for (i = 0; i < leafhdr.count; entry++, i++) {
948 if (entry->flags & XFS_ATTR_INCOMPLETE) 950 if (entry->flags & XFS_ATTR_INCOMPLETE)
949 continue; /* don't copy partial entries */ 951 continue; /* don't copy partial entries */
950 if (!(entry->flags & XFS_ATTR_LOCAL)) 952 if (!(entry->flags & XFS_ATTR_LOCAL))
@@ -954,15 +956,15 @@ xfs_attr_shortform_allfit(
954 return(0); 956 return(0);
955 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) 957 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
956 return(0); 958 return(0);
957 bytes += sizeof(struct xfs_attr_sf_entry)-1 959 bytes += sizeof(struct xfs_attr_sf_entry) - 1
958 + name_loc->namelen 960 + name_loc->namelen
959 + be16_to_cpu(name_loc->valuelen); 961 + be16_to_cpu(name_loc->valuelen);
960 } 962 }
961 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && 963 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
962 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && 964 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
963 (bytes == sizeof(struct xfs_attr_sf_hdr))) 965 (bytes == sizeof(struct xfs_attr_sf_hdr)))
964 return(-1); 966 return -1;
965 return(xfs_attr_shortform_bytesfit(dp, bytes)); 967 return xfs_attr_shortform_bytesfit(dp, bytes);
966} 968}
967 969
968/* 970/*
@@ -1410,7 +1412,7 @@ xfs_attr3_leaf_add_work(
1410 name_rmt->valuelen = 0; 1412 name_rmt->valuelen = 0;
1411 name_rmt->valueblk = 0; 1413 name_rmt->valueblk = 0;
1412 args->rmtblkno = 1; 1414 args->rmtblkno = 1;
1413 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); 1415 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
1414 } 1416 }
1415 xfs_trans_log_buf(args->trans, bp, 1417 xfs_trans_log_buf(args->trans, bp,
1416 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), 1418 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1443,11 +1445,12 @@ xfs_attr3_leaf_add_work(
1443STATIC void 1445STATIC void
1444xfs_attr3_leaf_compact( 1446xfs_attr3_leaf_compact(
1445 struct xfs_da_args *args, 1447 struct xfs_da_args *args,
1446 struct xfs_attr3_icleaf_hdr *ichdr_d, 1448 struct xfs_attr3_icleaf_hdr *ichdr_dst,
1447 struct xfs_buf *bp) 1449 struct xfs_buf *bp)
1448{ 1450{
1449 xfs_attr_leafblock_t *leaf_s, *leaf_d; 1451 struct xfs_attr_leafblock *leaf_src;
1450 struct xfs_attr3_icleaf_hdr ichdr_s; 1452 struct xfs_attr_leafblock *leaf_dst;
1453 struct xfs_attr3_icleaf_hdr ichdr_src;
1451 struct xfs_trans *trans = args->trans; 1454 struct xfs_trans *trans = args->trans;
1452 struct xfs_mount *mp = trans->t_mountp; 1455 struct xfs_mount *mp = trans->t_mountp;
1453 char *tmpbuffer; 1456 char *tmpbuffer;
@@ -1455,29 +1458,38 @@ xfs_attr3_leaf_compact(
1455 trace_xfs_attr_leaf_compact(args); 1458 trace_xfs_attr_leaf_compact(args);
1456 1459
1457 tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); 1460 tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
1458 ASSERT(tmpbuffer != NULL);
1459 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); 1461 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
1460 memset(bp->b_addr, 0, XFS_LBSIZE(mp)); 1462 memset(bp->b_addr, 0, XFS_LBSIZE(mp));
1463 leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
1464 leaf_dst = bp->b_addr;
1461 1465
1462 /* 1466 /*
1463 * Copy basic information 1467 * Copy the on-disk header back into the destination buffer to ensure
1468 * all the information in the header that is not part of the incore
1469 * header structure is preserved.
1464 */ 1470 */
1465 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; 1471 memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
1466 leaf_d = bp->b_addr; 1472
1467 ichdr_s = *ichdr_d; /* struct copy */ 1473 /* Initialise the incore headers */
1468 ichdr_d->firstused = XFS_LBSIZE(mp); 1474 ichdr_src = *ichdr_dst; /* struct copy */
1469 ichdr_d->usedbytes = 0; 1475 ichdr_dst->firstused = XFS_LBSIZE(mp);
1470 ichdr_d->count = 0; 1476 ichdr_dst->usedbytes = 0;
1471 ichdr_d->holes = 0; 1477 ichdr_dst->count = 0;
1472 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s); 1478 ichdr_dst->holes = 0;
1473 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; 1479 ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
1480 ichdr_dst->freemap[0].size = ichdr_dst->firstused -
1481 ichdr_dst->freemap[0].base;
1482
1483
1484 /* write the header back to initialise the underlying buffer */
1485 xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
1474 1486
1475 /* 1487 /*
1476 * Copy all entry's in the same (sorted) order, 1488 * Copy all entry's in the same (sorted) order,
1477 * but allocate name/value pairs packed and in sequence. 1489 * but allocate name/value pairs packed and in sequence.
1478 */ 1490 */
1479 xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0, 1491 xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
1480 ichdr_s.count, mp); 1492 ichdr_src.count, mp);
1481 /* 1493 /*
1482 * this logs the entire buffer, but the caller must write the header 1494 * this logs the entire buffer, but the caller must write the header
1483 * back to the buffer when it is finished modifying it. 1495 * back to the buffer when it is finished modifying it.
@@ -2179,14 +2191,24 @@ xfs_attr3_leaf_unbalance(
2179 struct xfs_attr_leafblock *tmp_leaf; 2191 struct xfs_attr_leafblock *tmp_leaf;
2180 struct xfs_attr3_icleaf_hdr tmphdr; 2192 struct xfs_attr3_icleaf_hdr tmphdr;
2181 2193
2182 tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP); 2194 tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
2183 memset(tmp_leaf, 0, state->blocksize); 2195
2184 memset(&tmphdr, 0, sizeof(tmphdr)); 2196 /*
2197 * Copy the header into the temp leaf so that all the stuff
2198 * not in the incore header is present and gets copied back in
2199 * once we've moved all the entries.
2200 */
2201 memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
2185 2202
2203 memset(&tmphdr, 0, sizeof(tmphdr));
2186 tmphdr.magic = savehdr.magic; 2204 tmphdr.magic = savehdr.magic;
2187 tmphdr.forw = savehdr.forw; 2205 tmphdr.forw = savehdr.forw;
2188 tmphdr.back = savehdr.back; 2206 tmphdr.back = savehdr.back;
2189 tmphdr.firstused = state->blocksize; 2207 tmphdr.firstused = state->blocksize;
2208
2209 /* write the header to the temp buffer to initialise it */
2210 xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
2211
2190 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, 2212 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
2191 drop_blk->bp, &drophdr)) { 2213 drop_blk->bp, &drophdr)) {
2192 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, 2214 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
@@ -2330,9 +2352,11 @@ xfs_attr3_leaf_lookup_int(
2330 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2352 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2331 continue; 2353 continue;
2332 args->index = probe; 2354 args->index = probe;
2355 args->valuelen = be32_to_cpu(name_rmt->valuelen);
2333 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2356 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2334 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, 2357 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2335 be32_to_cpu(name_rmt->valuelen)); 2358 args->dp->i_mount,
2359 args->valuelen);
2336 return XFS_ERROR(EEXIST); 2360 return XFS_ERROR(EEXIST);
2337 } 2361 }
2338 } 2362 }
@@ -2383,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
2383 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2407 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2384 valuelen = be32_to_cpu(name_rmt->valuelen); 2408 valuelen = be32_to_cpu(name_rmt->valuelen);
2385 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2409 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2386 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen); 2410 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
2411 valuelen);
2387 if (args->flags & ATTR_KERNOVAL) { 2412 if (args->flags & ATTR_KERNOVAL) {
2388 args->valuelen = valuelen; 2413 args->valuelen = valuelen;
2389 return 0; 2414 return 0;
@@ -2709,7 +2734,8 @@ xfs_attr3_leaf_list_int(
2709 args.valuelen = valuelen; 2734 args.valuelen = valuelen;
2710 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); 2735 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
2711 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 2736 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2712 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); 2737 args.rmtblkcnt = xfs_attr3_rmt_blocks(
2738 args.dp->i_mount, valuelen);
2713 retval = xfs_attr_rmtval_get(&args); 2739 retval = xfs_attr_rmtval_get(&args);
2714 if (retval) 2740 if (retval)
2715 return retval; 2741 return retval;
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index dee84466dcc9..ef6b0c124528 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,22 +47,55 @@
47 * Each contiguous block has a header, so it is not just a simple attribute 47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion. 48 * length to FSB conversion.
49 */ 49 */
50static int 50int
51xfs_attr3_rmt_blocks( 51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp, 52 struct xfs_mount *mp,
53 int attrlen) 53 int attrlen)
54{ 54{
55 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, 55 if (xfs_sb_version_hascrc(&mp->m_sb)) {
56 mp->m_sb.sb_blocksize); 56 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
57 return (attrlen + buflen - 1) / buflen; 57 return (attrlen + buflen - 1) / buflen;
58 }
59 return XFS_B_TO_FSB(mp, attrlen);
60}
61
62/*
63 * Checking of the remote attribute header is split into two parts. The verifier
64 * does CRC, location and bounds checking, the unpacking function checks the
65 * attribute parameters and owner.
66 */
67static bool
68xfs_attr3_rmt_hdr_ok(
69 struct xfs_mount *mp,
70 void *ptr,
71 xfs_ino_t ino,
72 uint32_t offset,
73 uint32_t size,
74 xfs_daddr_t bno)
75{
76 struct xfs_attr3_rmt_hdr *rmt = ptr;
77
78 if (bno != be64_to_cpu(rmt->rm_blkno))
79 return false;
80 if (offset != be32_to_cpu(rmt->rm_offset))
81 return false;
82 if (size != be32_to_cpu(rmt->rm_bytes))
83 return false;
84 if (ino != be64_to_cpu(rmt->rm_owner))
85 return false;
86
87 /* ok */
88 return true;
58} 89}
59 90
60static bool 91static bool
61xfs_attr3_rmt_verify( 92xfs_attr3_rmt_verify(
62 struct xfs_buf *bp) 93 struct xfs_mount *mp,
94 void *ptr,
95 int fsbsize,
96 xfs_daddr_t bno)
63{ 97{
64 struct xfs_mount *mp = bp->b_target->bt_mount; 98 struct xfs_attr3_rmt_hdr *rmt = ptr;
65 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
66 99
67 if (!xfs_sb_version_hascrc(&mp->m_sb)) 100 if (!xfs_sb_version_hascrc(&mp->m_sb))
68 return false; 101 return false;
@@ -70,7 +103,9 @@ xfs_attr3_rmt_verify(
70 return false; 103 return false;
71 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
72 return false; 105 return false;
73 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno)) 106 if (be64_to_cpu(rmt->rm_blkno) != bno)
107 return false;
108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
74 return false; 109 return false;
75 if (be32_to_cpu(rmt->rm_offset) + 110 if (be32_to_cpu(rmt->rm_offset) +
76 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) 111 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -86,17 +121,40 @@ xfs_attr3_rmt_read_verify(
86 struct xfs_buf *bp) 121 struct xfs_buf *bp)
87{ 122{
88 struct xfs_mount *mp = bp->b_target->bt_mount; 123 struct xfs_mount *mp = bp->b_target->bt_mount;
124 char *ptr;
125 int len;
126 bool corrupt = false;
127 xfs_daddr_t bno;
89 128
90 /* no verification of non-crc buffers */ 129 /* no verification of non-crc buffers */
91 if (!xfs_sb_version_hascrc(&mp->m_sb)) 130 if (!xfs_sb_version_hascrc(&mp->m_sb))
92 return; 131 return;
93 132
94 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 133 ptr = bp->b_addr;
95 XFS_ATTR3_RMT_CRC_OFF) || 134 bno = bp->b_bn;
96 !xfs_attr3_rmt_verify(bp)) { 135 len = BBTOB(bp->b_length);
136 ASSERT(len >= XFS_LBSIZE(mp));
137
138 while (len > 0) {
139 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
140 XFS_ATTR3_RMT_CRC_OFF)) {
141 corrupt = true;
142 break;
143 }
144 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
145 corrupt = true;
146 break;
147 }
148 len -= XFS_LBSIZE(mp);
149 ptr += XFS_LBSIZE(mp);
150 bno += mp->m_bsize;
151 }
152
153 if (corrupt) {
97 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 154 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
98 xfs_buf_ioerror(bp, EFSCORRUPTED); 155 xfs_buf_ioerror(bp, EFSCORRUPTED);
99 } 156 } else
157 ASSERT(len == 0);
100} 158}
101 159
102static void 160static void
@@ -105,23 +163,39 @@ xfs_attr3_rmt_write_verify(
105{ 163{
106 struct xfs_mount *mp = bp->b_target->bt_mount; 164 struct xfs_mount *mp = bp->b_target->bt_mount;
107 struct xfs_buf_log_item *bip = bp->b_fspriv; 165 struct xfs_buf_log_item *bip = bp->b_fspriv;
166 char *ptr;
167 int len;
168 xfs_daddr_t bno;
108 169
109 /* no verification of non-crc buffers */ 170 /* no verification of non-crc buffers */
110 if (!xfs_sb_version_hascrc(&mp->m_sb)) 171 if (!xfs_sb_version_hascrc(&mp->m_sb))
111 return; 172 return;
112 173
113 if (!xfs_attr3_rmt_verify(bp)) { 174 ptr = bp->b_addr;
114 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 175 bno = bp->b_bn;
115 xfs_buf_ioerror(bp, EFSCORRUPTED); 176 len = BBTOB(bp->b_length);
116 return; 177 ASSERT(len >= XFS_LBSIZE(mp));
117 } 178
179 while (len > 0) {
180 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
181 XFS_CORRUPTION_ERROR(__func__,
182 XFS_ERRLEVEL_LOW, mp, bp->b_addr);
183 xfs_buf_ioerror(bp, EFSCORRUPTED);
184 return;
185 }
186 if (bip) {
187 struct xfs_attr3_rmt_hdr *rmt;
188
189 rmt = (struct xfs_attr3_rmt_hdr *)ptr;
190 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
191 }
192 xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
118 193
119 if (bip) { 194 len -= XFS_LBSIZE(mp);
120 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 195 ptr += XFS_LBSIZE(mp);
121 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); 196 bno += mp->m_bsize;
122 } 197 }
123 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 198 ASSERT(len == 0);
124 XFS_ATTR3_RMT_CRC_OFF);
125} 199}
126 200
127const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 201const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -129,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
129 .verify_write = xfs_attr3_rmt_write_verify, 203 .verify_write = xfs_attr3_rmt_write_verify,
130}; 204};
131 205
132static int 206STATIC int
133xfs_attr3_rmt_hdr_set( 207xfs_attr3_rmt_hdr_set(
134 struct xfs_mount *mp, 208 struct xfs_mount *mp,
209 void *ptr,
135 xfs_ino_t ino, 210 xfs_ino_t ino,
136 uint32_t offset, 211 uint32_t offset,
137 uint32_t size, 212 uint32_t size,
138 struct xfs_buf *bp) 213 xfs_daddr_t bno)
139{ 214{
140 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 215 struct xfs_attr3_rmt_hdr *rmt = ptr;
141 216
142 if (!xfs_sb_version_hascrc(&mp->m_sb)) 217 if (!xfs_sb_version_hascrc(&mp->m_sb))
143 return 0; 218 return 0;
@@ -147,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
147 rmt->rm_bytes = cpu_to_be32(size); 222 rmt->rm_bytes = cpu_to_be32(size);
148 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); 223 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
149 rmt->rm_owner = cpu_to_be64(ino); 224 rmt->rm_owner = cpu_to_be64(ino);
150 rmt->rm_blkno = cpu_to_be64(bp->b_bn); 225 rmt->rm_blkno = cpu_to_be64(bno);
151 bp->b_ops = &xfs_attr3_rmt_buf_ops;
152 226
153 return sizeof(struct xfs_attr3_rmt_hdr); 227 return sizeof(struct xfs_attr3_rmt_hdr);
154} 228}
155 229
156/* 230/*
157 * Checking of the remote attribute header is split into two parts. the verifier 231 * Helper functions to copy attribute data in and out of the one disk extents
158 * does CRC, location and bounds checking, the unpacking function checks the
159 * attribute parameters and owner.
160 */ 232 */
161static bool 233STATIC int
162xfs_attr3_rmt_hdr_ok( 234xfs_attr_rmtval_copyout(
163 struct xfs_mount *mp, 235 struct xfs_mount *mp,
164 xfs_ino_t ino, 236 struct xfs_buf *bp,
165 uint32_t offset, 237 xfs_ino_t ino,
166 uint32_t size, 238 int *offset,
167 struct xfs_buf *bp) 239 int *valuelen,
240 char **dst)
168{ 241{
169 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 242 char *src = bp->b_addr;
243 xfs_daddr_t bno = bp->b_bn;
244 int len = BBTOB(bp->b_length);
170 245
171 if (offset != be32_to_cpu(rmt->rm_offset)) 246 ASSERT(len >= XFS_LBSIZE(mp));
172 return false;
173 if (size != be32_to_cpu(rmt->rm_bytes))
174 return false;
175 if (ino != be64_to_cpu(rmt->rm_owner))
176 return false;
177 247
178 /* ok */ 248 while (len > 0 && *valuelen > 0) {
179 return true; 249 int hdr_size = 0;
250 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
251
252 byte_cnt = min_t(int, *valuelen, byte_cnt);
253
254 if (xfs_sb_version_hascrc(&mp->m_sb)) {
255 if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
256 byte_cnt, bno)) {
257 xfs_alert(mp,
258"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
259 bno, *offset, byte_cnt, ino);
260 return EFSCORRUPTED;
261 }
262 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
263 }
264
265 memcpy(*dst, src + hdr_size, byte_cnt);
266
267 /* roll buffer forwards */
268 len -= XFS_LBSIZE(mp);
269 src += XFS_LBSIZE(mp);
270 bno += mp->m_bsize;
271
272 /* roll attribute data forwards */
273 *valuelen -= byte_cnt;
274 *dst += byte_cnt;
275 *offset += byte_cnt;
276 }
277 return 0;
278}
279
280STATIC void
281xfs_attr_rmtval_copyin(
282 struct xfs_mount *mp,
283 struct xfs_buf *bp,
284 xfs_ino_t ino,
285 int *offset,
286 int *valuelen,
287 char **src)
288{
289 char *dst = bp->b_addr;
290 xfs_daddr_t bno = bp->b_bn;
291 int len = BBTOB(bp->b_length);
292
293 ASSERT(len >= XFS_LBSIZE(mp));
294
295 while (len > 0 && *valuelen > 0) {
296 int hdr_size;
297 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
298
299 byte_cnt = min(*valuelen, byte_cnt);
300 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
301 byte_cnt, bno);
302
303 memcpy(dst + hdr_size, *src, byte_cnt);
304
305 /*
306 * If this is the last block, zero the remainder of it.
307 * Check that we are actually the last block, too.
308 */
309 if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
310 ASSERT(*valuelen - byte_cnt == 0);
311 ASSERT(len == XFS_LBSIZE(mp));
312 memset(dst + hdr_size + byte_cnt, 0,
313 XFS_LBSIZE(mp) - hdr_size - byte_cnt);
314 }
315
316 /* roll buffer forwards */
317 len -= XFS_LBSIZE(mp);
318 dst += XFS_LBSIZE(mp);
319 bno += mp->m_bsize;
320
321 /* roll attribute data forwards */
322 *valuelen -= byte_cnt;
323 *src += byte_cnt;
324 *offset += byte_cnt;
325 }
180} 326}
181 327
182/* 328/*
@@ -190,13 +336,12 @@ xfs_attr_rmtval_get(
190 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 336 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
191 struct xfs_mount *mp = args->dp->i_mount; 337 struct xfs_mount *mp = args->dp->i_mount;
192 struct xfs_buf *bp; 338 struct xfs_buf *bp;
193 xfs_daddr_t dblkno;
194 xfs_dablk_t lblkno = args->rmtblkno; 339 xfs_dablk_t lblkno = args->rmtblkno;
195 void *dst = args->value; 340 char *dst = args->value;
196 int valuelen = args->valuelen; 341 int valuelen = args->valuelen;
197 int nmap; 342 int nmap;
198 int error; 343 int error;
199 int blkcnt; 344 int blkcnt = args->rmtblkcnt;
200 int i; 345 int i;
201 int offset = 0; 346 int offset = 0;
202 347
@@ -207,52 +352,36 @@ xfs_attr_rmtval_get(
207 while (valuelen > 0) { 352 while (valuelen > 0) {
208 nmap = ATTR_RMTVALUE_MAPSIZE; 353 nmap = ATTR_RMTVALUE_MAPSIZE;
209 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 354 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
210 args->rmtblkcnt, map, &nmap, 355 blkcnt, map, &nmap,
211 XFS_BMAPI_ATTRFORK); 356 XFS_BMAPI_ATTRFORK);
212 if (error) 357 if (error)
213 return error; 358 return error;
214 ASSERT(nmap >= 1); 359 ASSERT(nmap >= 1);
215 360
216 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 361 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
217 int byte_cnt; 362 xfs_daddr_t dblkno;
218 char *src; 363 int dblkcnt;
219 364
220 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 365 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
221 (map[i].br_startblock != HOLESTARTBLOCK)); 366 (map[i].br_startblock != HOLESTARTBLOCK));
222 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 367 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
223 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 368 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
224 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 369 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
225 dblkno, blkcnt, 0, &bp, 370 dblkno, dblkcnt, 0, &bp,
226 &xfs_attr3_rmt_buf_ops); 371 &xfs_attr3_rmt_buf_ops);
227 if (error) 372 if (error)
228 return error; 373 return error;
229 374
230 byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length)); 375 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
231 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt); 376 &offset, &valuelen,
232 377 &dst);
233 src = bp->b_addr;
234 if (xfs_sb_version_hascrc(&mp->m_sb)) {
235 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
236 offset, byte_cnt, bp)) {
237 xfs_alert(mp,
238"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
239 offset, byte_cnt, args->dp->i_ino);
240 xfs_buf_relse(bp);
241 return EFSCORRUPTED;
242
243 }
244
245 src += sizeof(struct xfs_attr3_rmt_hdr);
246 }
247
248 memcpy(dst, src, byte_cnt);
249 xfs_buf_relse(bp); 378 xfs_buf_relse(bp);
379 if (error)
380 return error;
250 381
251 offset += byte_cnt; 382 /* roll attribute extent map forwards */
252 dst += byte_cnt;
253 valuelen -= byte_cnt;
254
255 lblkno += map[i].br_blockcount; 383 lblkno += map[i].br_blockcount;
384 blkcnt -= map[i].br_blockcount;
256 } 385 }
257 } 386 }
258 ASSERT(valuelen == 0); 387 ASSERT(valuelen == 0);
@@ -270,17 +399,13 @@ xfs_attr_rmtval_set(
270 struct xfs_inode *dp = args->dp; 399 struct xfs_inode *dp = args->dp;
271 struct xfs_mount *mp = dp->i_mount; 400 struct xfs_mount *mp = dp->i_mount;
272 struct xfs_bmbt_irec map; 401 struct xfs_bmbt_irec map;
273 struct xfs_buf *bp;
274 xfs_daddr_t dblkno;
275 xfs_dablk_t lblkno; 402 xfs_dablk_t lblkno;
276 xfs_fileoff_t lfileoff = 0; 403 xfs_fileoff_t lfileoff = 0;
277 void *src = args->value; 404 char *src = args->value;
278 int blkcnt; 405 int blkcnt;
279 int valuelen; 406 int valuelen;
280 int nmap; 407 int nmap;
281 int error; 408 int error;
282 int hdrcnt = 0;
283 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
284 int offset = 0; 409 int offset = 0;
285 410
286 trace_xfs_attr_rmtval_set(args); 411 trace_xfs_attr_rmtval_set(args);
@@ -289,24 +414,14 @@ xfs_attr_rmtval_set(
289 * Find a "hole" in the attribute address space large enough for 414 * Find a "hole" in the attribute address space large enough for
290 * us to drop the new attribute's value into. Because CRC enable 415 * us to drop the new attribute's value into. Because CRC enable
291 * attributes have headers, we can't just do a straight byte to FSB 416 * attributes have headers, we can't just do a straight byte to FSB
292 * conversion. We calculate the worst case block count in this case 417 * conversion and have to take the header space into account.
293 * and we may not need that many, so we have to handle this when
294 * allocating the blocks below.
295 */ 418 */
296 if (!crcs) 419 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
297 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
298 else
299 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
300
301 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 420 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
302 XFS_ATTR_FORK); 421 XFS_ATTR_FORK);
303 if (error) 422 if (error)
304 return error; 423 return error;
305 424
306 /* Start with the attribute data. We'll allocate the rest afterwards. */
307 if (crcs)
308 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
309
310 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 425 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
311 args->rmtblkcnt = blkcnt; 426 args->rmtblkcnt = blkcnt;
312 427
@@ -349,26 +464,6 @@ xfs_attr_rmtval_set(
349 (map.br_startblock != HOLESTARTBLOCK)); 464 (map.br_startblock != HOLESTARTBLOCK));
350 lblkno += map.br_blockcount; 465 lblkno += map.br_blockcount;
351 blkcnt -= map.br_blockcount; 466 blkcnt -= map.br_blockcount;
352 hdrcnt++;
353
354 /*
355 * If we have enough blocks for the attribute data, calculate
356 * how many extra blocks we need for headers. We might run
357 * through this multiple times in the case that the additional
358 * headers in the blocks needed for the data fragments spills
359 * into requiring more blocks. e.g. for 512 byte blocks, we'll
360 * spill for another block every 9 headers we require in this
361 * loop.
362 */
363 if (crcs && blkcnt == 0) {
364 int total_len;
365
366 total_len = args->valuelen +
367 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
368 blkcnt = XFS_B_TO_FSB(mp, total_len);
369 blkcnt -= args->rmtblkcnt;
370 args->rmtblkcnt += blkcnt;
371 }
372 467
373 /* 468 /*
374 * Start the next trans in the chain. 469 * Start the next trans in the chain.
@@ -385,18 +480,19 @@ xfs_attr_rmtval_set(
385 * the INCOMPLETE flag. 480 * the INCOMPLETE flag.
386 */ 481 */
387 lblkno = args->rmtblkno; 482 lblkno = args->rmtblkno;
483 blkcnt = args->rmtblkcnt;
388 valuelen = args->valuelen; 484 valuelen = args->valuelen;
389 while (valuelen > 0) { 485 while (valuelen > 0) {
390 int byte_cnt; 486 struct xfs_buf *bp;
391 char *buf; 487 xfs_daddr_t dblkno;
488 int dblkcnt;
489
490 ASSERT(blkcnt > 0);
392 491
393 /*
394 * Try to remember where we decided to put the value.
395 */
396 xfs_bmap_init(args->flist, args->firstblock); 492 xfs_bmap_init(args->flist, args->firstblock);
397 nmap = 1; 493 nmap = 1;
398 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 494 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
399 args->rmtblkcnt, &map, &nmap, 495 blkcnt, &map, &nmap,
400 XFS_BMAPI_ATTRFORK); 496 XFS_BMAPI_ATTRFORK);
401 if (error) 497 if (error)
402 return(error); 498 return(error);
@@ -405,41 +501,27 @@ xfs_attr_rmtval_set(
405 (map.br_startblock != HOLESTARTBLOCK)); 501 (map.br_startblock != HOLESTARTBLOCK));
406 502
407 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 503 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
408 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 504 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
409 505
410 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); 506 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
411 if (!bp) 507 if (!bp)
412 return ENOMEM; 508 return ENOMEM;
413 bp->b_ops = &xfs_attr3_rmt_buf_ops; 509 bp->b_ops = &xfs_attr3_rmt_buf_ops;
414 510
415 byte_cnt = BBTOB(bp->b_length); 511 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
416 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt); 512 &valuelen, &src);
417 if (valuelen < byte_cnt)
418 byte_cnt = valuelen;
419
420 buf = bp->b_addr;
421 buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
422 byte_cnt, bp);
423 memcpy(buf, src, byte_cnt);
424
425 if (byte_cnt < BBTOB(bp->b_length))
426 xfs_buf_zero(bp, byte_cnt,
427 BBTOB(bp->b_length) - byte_cnt);
428 513
429 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 514 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
430 xfs_buf_relse(bp); 515 xfs_buf_relse(bp);
431 if (error) 516 if (error)
432 return error; 517 return error;
433 518
434 src += byte_cnt;
435 valuelen -= byte_cnt;
436 offset += byte_cnt;
437 hdrcnt--;
438 519
520 /* roll attribute extent map forwards */
439 lblkno += map.br_blockcount; 521 lblkno += map.br_blockcount;
522 blkcnt -= map.br_blockcount;
440 } 523 }
441 ASSERT(valuelen == 0); 524 ASSERT(valuelen == 0);
442 ASSERT(hdrcnt == 0);
443 return 0; 525 return 0;
444} 526}
445 527
@@ -448,33 +530,40 @@ xfs_attr_rmtval_set(
448 * out-of-line buffer that it is stored on. 530 * out-of-line buffer that it is stored on.
449 */ 531 */
450int 532int
451xfs_attr_rmtval_remove(xfs_da_args_t *args) 533xfs_attr_rmtval_remove(
534 struct xfs_da_args *args)
452{ 535{
453 xfs_mount_t *mp; 536 struct xfs_mount *mp = args->dp->i_mount;
454 xfs_bmbt_irec_t map; 537 xfs_dablk_t lblkno;
455 xfs_buf_t *bp; 538 int blkcnt;
456 xfs_daddr_t dblkno; 539 int error;
457 xfs_dablk_t lblkno; 540 int done;
458 int valuelen, blkcnt, nmap, error, done, committed;
459 541
460 trace_xfs_attr_rmtval_remove(args); 542 trace_xfs_attr_rmtval_remove(args);
461 543
462 mp = args->dp->i_mount;
463
464 /* 544 /*
465 * Roll through the "value", invalidating the attribute value's 545 * Roll through the "value", invalidating the attribute value's blocks.
466 * blocks. 546 * Note that args->rmtblkcnt is the minimum number of data blocks we'll
547 * see for a CRC enabled remote attribute. Each extent will have a
548 * header, and so we may have more blocks than we realise here. If we
549 * fail to map the blocks correctly, we'll have problems with the buffer
550 * lookups.
467 */ 551 */
468 lblkno = args->rmtblkno; 552 lblkno = args->rmtblkno;
469 valuelen = args->rmtblkcnt; 553 blkcnt = args->rmtblkcnt;
470 while (valuelen > 0) { 554 while (blkcnt > 0) {
555 struct xfs_bmbt_irec map;
556 struct xfs_buf *bp;
557 xfs_daddr_t dblkno;
558 int dblkcnt;
559 int nmap;
560
471 /* 561 /*
472 * Try to remember where we decided to put the value. 562 * Try to remember where we decided to put the value.
473 */ 563 */
474 nmap = 1; 564 nmap = 1;
475 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 565 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
476 args->rmtblkcnt, &map, &nmap, 566 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
477 XFS_BMAPI_ATTRFORK);
478 if (error) 567 if (error)
479 return(error); 568 return(error);
480 ASSERT(nmap == 1); 569 ASSERT(nmap == 1);
@@ -482,21 +571,20 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
482 (map.br_startblock != HOLESTARTBLOCK)); 571 (map.br_startblock != HOLESTARTBLOCK));
483 572
484 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 573 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
485 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 574 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
486 575
487 /* 576 /*
488 * If the "remote" value is in the cache, remove it. 577 * If the "remote" value is in the cache, remove it.
489 */ 578 */
490 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); 579 bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
491 if (bp) { 580 if (bp) {
492 xfs_buf_stale(bp); 581 xfs_buf_stale(bp);
493 xfs_buf_relse(bp); 582 xfs_buf_relse(bp);
494 bp = NULL; 583 bp = NULL;
495 } 584 }
496 585
497 valuelen -= map.br_blockcount;
498
499 lblkno += map.br_blockcount; 586 lblkno += map.br_blockcount;
587 blkcnt -= map.br_blockcount;
500 } 588 }
501 589
502 /* 590 /*
@@ -506,6 +594,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
506 blkcnt = args->rmtblkcnt; 594 blkcnt = args->rmtblkcnt;
507 done = 0; 595 done = 0;
508 while (!done) { 596 while (!done) {
597 int committed;
598
509 xfs_bmap_init(args->flist, args->firstblock); 599 xfs_bmap_init(args->flist, args->firstblock);
510 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 600 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
511 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 601 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062a..92a8fd7977cc 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
20 20
21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ 21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
22 22
23/*
24 * There is one of these headers per filesystem block in a remote attribute.
25 * This is done to ensure there is a 1:1 mapping between the attribute value
26 * length and the number of blocks needed to store the attribute. This makes the
27 * verification of a buffer a little more complex, but greatly simplifies the
28 * allocation, reading and writing of these attributes as we don't have to guess
29 * the number of blocks needed to store the attribute data.
30 */
23struct xfs_attr3_rmt_hdr { 31struct xfs_attr3_rmt_hdr {
24 __be32 rm_magic; 32 __be32 rm_magic;
25 __be32 rm_offset; 33 __be32 rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
39 47
40extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 48extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
41 49
50int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
51
42int xfs_attr_rmtval_get(struct xfs_da_args *args); 52int xfs_attr_rmtval_get(struct xfs_da_args *args);
43int xfs_attr_rmtval_set(struct xfs_da_args *args); 53int xfs_attr_rmtval_set(struct xfs_da_args *args);
44int xfs_attr_rmtval_remove(struct xfs_da_args *args); 54int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 82b70bda9f47..1b2472a46e46 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
513 xfs_alert(btp->bt_mount, 513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs); 515 __func__, blkno, eofs);
516 WARN_ON(1);
516 return NULL; 517 return NULL;
517 } 518 }
518 519
@@ -1649,7 +1650,7 @@ xfs_alloc_buftarg(
1649{ 1650{
1650 xfs_buftarg_t *btp; 1651 xfs_buftarg_t *btp;
1651 1652
1652 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1653 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
1653 1654
1654 btp->bt_mount = mp; 1655 btp->bt_mount = mp;
1655 btp->bt_dev = bdev->bd_dev; 1656 btp->bt_dev = bdev->bd_dev;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b4..4ec431777048 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -262,12 +262,7 @@ xfs_buf_item_format_segment(
262 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 262 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
263 vecp->i_len = nbits * XFS_BLF_CHUNK; 263 vecp->i_len = nbits * XFS_BLF_CHUNK;
264 vecp->i_type = XLOG_REG_TYPE_BCHUNK; 264 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
265/* 265 nvecs++;
266 * You would think we need to bump the nvecs here too, but we do not
267 * this number is used by recovery, and it gets confused by the boundary
268 * split here
269 * nvecs++;
270 */
271 vecp++; 266 vecp++;
272 first_bit = next_bit; 267 first_bit = next_bit;
273 last_bit = next_bit; 268 last_bit = next_bit;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9b26a99ebfe9..0b8b2a13cd24 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -270,6 +270,7 @@ xfs_da3_node_read_verify(
270 break; 270 break;
271 return; 271 return;
272 case XFS_ATTR_LEAF_MAGIC: 272 case XFS_ATTR_LEAF_MAGIC:
273 case XFS_ATTR3_LEAF_MAGIC:
273 bp->b_ops = &xfs_attr3_leaf_buf_ops; 274 bp->b_ops = &xfs_attr3_leaf_buf_ops;
274 bp->b_ops->verify_read(bp); 275 bp->b_ops->verify_read(bp);
275 return; 276 return;
@@ -2464,7 +2465,8 @@ xfs_buf_map_from_irec(
2464 ASSERT(nirecs >= 1); 2465 ASSERT(nirecs >= 1);
2465 2466
2466 if (nirecs > 1) { 2467 if (nirecs > 1) {
2467 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP); 2468 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
2469 KM_SLEEP | KM_NOFS);
2468 if (!map) 2470 if (!map)
2469 return ENOMEM; 2471 return ENOMEM;
2470 *mapp = map; 2472 *mapp = map;
@@ -2520,7 +2522,8 @@ xfs_dabuf_map(
2520 * Optimize the one-block case. 2522 * Optimize the one-block case.
2521 */ 2523 */
2522 if (nfsb != 1) 2524 if (nfsb != 1)
2523 irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP); 2525 irecs = kmem_zalloc(sizeof(irec) * nfsb,
2526 KM_SLEEP | KM_NOFS);
2524 2527
2525 nirecs = nfsb; 2528 nirecs = nfsb;
2526 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, 2529 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f852b082a084..c407e1ccff43 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -219,6 +219,14 @@ xfs_swap_extents(
219 int taforkblks = 0; 219 int taforkblks = 0;
220 __uint64_t tmp; 220 __uint64_t tmp;
221 221
222 /*
223 * We have no way of updating owner information in the BMBT blocks for
224 * each inode on CRC enabled filesystems, so to avoid corrupting the
225 * this metadata we simply don't allow extent swaps to occur.
226 */
227 if (xfs_sb_version_hascrc(&mp->m_sb))
228 return XFS_ERROR(EINVAL);
229
222 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 230 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
223 if (!tempifp) { 231 if (!tempifp) {
224 error = XFS_ERROR(ENOMEM); 232 error = XFS_ERROR(ENOMEM);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a3b1bd841a80..995f1f505a52 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -715,6 +715,7 @@ struct xfs_dir3_free_hdr {
715 __be32 firstdb; /* db of first entry */ 715 __be32 firstdb; /* db of first entry */
716 __be32 nvalid; /* count of valid entries */ 716 __be32 nvalid; /* count of valid entries */
717 __be32 nused; /* count of used entries */ 717 __be32 nused; /* count of used entries */
718 __be32 pad; /* 64 bit alignment. */
718}; 719};
719 720
720struct xfs_dir3_free { 721struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 721ba2fe8e54..da71a1819d78 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1336,7 +1336,7 @@ xfs_dir2_leaf_getdents(
1336 mp->m_sb.sb_blocksize); 1336 mp->m_sb.sb_blocksize);
1337 map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + 1337 map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
1338 (length * sizeof(struct xfs_bmbt_irec)), 1338 (length * sizeof(struct xfs_bmbt_irec)),
1339 KM_SLEEP); 1339 KM_SLEEP | KM_NOFS);
1340 map_info->map_size = length; 1340 map_info->map_size = length;
1341 1341
1342 /* 1342 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5246de4912d4..2226a00acd15 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -263,18 +263,19 @@ xfs_dir3_free_get_buf(
263 * Initialize the new block to be empty, and remember 263 * Initialize the new block to be empty, and remember
264 * its first slot as our empty slot. 264 * its first slot as our empty slot.
265 */ 265 */
266 hdr.magic = XFS_DIR2_FREE_MAGIC; 266 memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
267 hdr.firstdb = 0; 267 memset(&hdr, 0, sizeof(hdr));
268 hdr.nused = 0; 268
269 hdr.nvalid = 0;
270 if (xfs_sb_version_hascrc(&mp->m_sb)) { 269 if (xfs_sb_version_hascrc(&mp->m_sb)) {
271 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 270 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
272 271
273 hdr.magic = XFS_DIR3_FREE_MAGIC; 272 hdr.magic = XFS_DIR3_FREE_MAGIC;
273
274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); 274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino); 275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); 276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
277 } 277 } else
278 hdr.magic = XFS_DIR2_FREE_MAGIC;
278 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr); 279 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
279 *bpp = bp; 280 *bpp = bp;
280 return 0; 281 return 0;
@@ -1921,8 +1922,6 @@ xfs_dir2_node_addname_int(
1921 */ 1922 */
1922 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * 1923 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1923 xfs_dir3_free_max_bests(mp); 1924 xfs_dir3_free_max_bests(mp);
1924 free->hdr.nvalid = 0;
1925 free->hdr.nused = 0;
1926 } else { 1925 } else {
1927 free = fbp->b_addr; 1926 free = fbp->b_addr;
1928 bests = xfs_dir3_free_bests_p(mp, free); 1927 bests = xfs_dir3_free_bests_p(mp, free);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index c0f375087efc..452920a3f03f 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -305,11 +305,12 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
305{ 305{
306 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents); 306 ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
307 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) { 307 if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
308 __xfs_efi_release(efip);
309
310 /* recovery needs us to drop the EFI reference, too */ 308 /* recovery needs us to drop the EFI reference, too */
311 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) 309 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
312 __xfs_efi_release(efip); 310 __xfs_efi_release(efip);
311
312 __xfs_efi_release(efip);
313 /* efip may now have been freed, do not reference it again. */
313 } 314 }
314} 315}
315 316
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6dda3f949b04..d04695545397 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks {
236#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ 236#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
237#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ 237#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
239 240
240 241
241/* 242/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 87595b211da1..3c3644ea825b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -99,7 +99,9 @@ xfs_fs_geometry(
99 (xfs_sb_version_hasattr2(&mp->m_sb) ? 99 (xfs_sb_version_hasattr2(&mp->m_sb) ?
100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | 100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
101 (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? 101 (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
102 XFS_FSOP_GEOM_FLAGS_PROJID32 : 0); 102 XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
103 (xfs_sb_version_hascrc(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_V5SB : 0);
103 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 105 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
104 mp->m_sb.sb_logsectsize : BBSIZE; 106 mp->m_sb.sb_logsectsize : BBSIZE;
105 geo->rtsectsize = mp->m_sb.sb_blocksize; 107 geo->rtsectsize = mp->m_sb.sb_blocksize;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d82efaa2ac73..ca9ecaa81112 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -455,6 +455,28 @@ xfs_vn_getattr(
455 return 0; 455 return 0;
456} 456}
457 457
458static void
459xfs_setattr_mode(
460 struct xfs_trans *tp,
461 struct xfs_inode *ip,
462 struct iattr *iattr)
463{
464 struct inode *inode = VFS_I(ip);
465 umode_t mode = iattr->ia_mode;
466
467 ASSERT(tp);
468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
469
470 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
471 mode &= ~S_ISGID;
472
473 ip->i_d.di_mode &= S_IFMT;
474 ip->i_d.di_mode |= mode & ~S_IFMT;
475
476 inode->i_mode &= S_IFMT;
477 inode->i_mode |= mode & ~S_IFMT;
478}
479
458int 480int
459xfs_setattr_nonsize( 481xfs_setattr_nonsize(
460 struct xfs_inode *ip, 482 struct xfs_inode *ip,
@@ -606,18 +628,8 @@ xfs_setattr_nonsize(
606 /* 628 /*
607 * Change file access modes. 629 * Change file access modes.
608 */ 630 */
609 if (mask & ATTR_MODE) { 631 if (mask & ATTR_MODE)
610 umode_t mode = iattr->ia_mode; 632 xfs_setattr_mode(tp, ip, iattr);
611
612 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
613 mode &= ~S_ISGID;
614
615 ip->i_d.di_mode &= S_IFMT;
616 ip->i_d.di_mode |= mode & ~S_IFMT;
617
618 inode->i_mode &= S_IFMT;
619 inode->i_mode |= mode & ~S_IFMT;
620 }
621 633
622 /* 634 /*
623 * Change file access or modified times. 635 * Change file access or modified times.
@@ -714,9 +726,8 @@ xfs_setattr_size(
714 return XFS_ERROR(error); 726 return XFS_ERROR(error);
715 727
716 ASSERT(S_ISREG(ip->i_d.di_mode)); 728 ASSERT(S_ISREG(ip->i_d.di_mode));
717 ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 729 ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
718 ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| 730 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
719 ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
720 731
721 if (!(flags & XFS_ATTR_NOLOCK)) { 732 if (!(flags & XFS_ATTR_NOLOCK)) {
722 lock_flags |= XFS_IOLOCK_EXCL; 733 lock_flags |= XFS_IOLOCK_EXCL;
@@ -860,6 +871,12 @@ xfs_setattr_size(
860 xfs_inode_clear_eofblocks_tag(ip); 871 xfs_inode_clear_eofblocks_tag(ip);
861 } 872 }
862 873
874 /*
875 * Change file access modes.
876 */
877 if (mask & ATTR_MODE)
878 xfs_setattr_mode(tp, ip, iattr);
879
863 if (mask & ATTR_CTIME) { 880 if (mask & ATTR_CTIME) {
864 inode->i_ctime = iattr->ia_ctime; 881 inode->i_ctime = iattr->ia_ctime;
865 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 882 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index e3d0b85d852b..d0833b54e55d 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -139,7 +139,7 @@ xlog_cil_prepare_log_vecs(
139 139
140 new_lv = kmem_zalloc(sizeof(*new_lv) + 140 new_lv = kmem_zalloc(sizeof(*new_lv) +
141 niovecs * sizeof(struct xfs_log_iovec), 141 niovecs * sizeof(struct xfs_log_iovec),
142 KM_SLEEP); 142 KM_SLEEP|KM_NOFS);
143 143
144 /* The allocated iovec region lies beyond the log vector. */ 144 /* The allocated iovec region lies beyond the log vector. */
145 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; 145 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..d9e4d3c3991a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2097,6 +2097,17 @@ xlog_recover_do_reg_buffer(
2097 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2097 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
2098 2098
2099 /* 2099 /*
2100 * The dirty regions logged in the buffer, even though
2101 * contiguous, may span multiple chunks. This is because the
2102 * dirty region may span a physical page boundary in a buffer
2103 * and hence be split into two separate vectors for writing into
2104 * the log. Hence we need to trim nbits back to the length of
2105 * the current region being copied out of the log.
2106 */
2107 if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
2108 nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
2109
2110 /*
2100 * Do a sanity check if this is a dquot buffer. Just checking 2111 * Do a sanity check if this is a dquot buffer. Just checking
2101 * the first dquot in the buffer should do. XXXThis is 2112 * the first dquot in the buffer should do. XXXThis is
2102 * probably a good thing to do for other buf types also. 2113 * probably a good thing to do for other buf types also.
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c41190cad6e9..6cdf6ffc36a1 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -489,31 +489,36 @@ xfs_qm_scall_setqlim(
489 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) 489 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
496 xfs_trans_cancel(tp, 0);
497 return (error);
498 }
499
500 /* 492 /*
501 * We don't want to race with a quotaoff so take the quotaoff lock. 493 * We don't want to race with a quotaoff so take the quotaoff lock.
502 * (We don't hold an inode lock, so there's nothing else to stop 494 * We don't hold an inode lock, so there's nothing else to stop
503 * a quotaoff from happening). (XXXThis doesn't currently happen 495 * a quotaoff from happening.
504 * because we take the vfslock before calling xfs_qm_sysent).
505 */ 496 */
506 mutex_lock(&q->qi_quotaofflock); 497 mutex_lock(&q->qi_quotaofflock);
507 498
508 /* 499 /*
509 * Get the dquot (locked), and join it to the transaction. 500 * Get the dquot (locked) before we start, as we need to do a
510 * Allocate the dquot if this doesn't exist. 501 * transaction to allocate it if it doesn't exist. Once we have the
502 * dquot, unlock it so we can start the next transaction safely. We hold
503 * a reference to the dquot, so it's safe to do this unlock/lock without
504 * it being reclaimed in the mean time.
511 */ 505 */
512 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { 506 error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
513 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 507 if (error) {
514 ASSERT(error != ENOENT); 508 ASSERT(error != ENOENT);
515 goto out_unlock; 509 goto out_unlock;
516 } 510 }
511 xfs_dqunlock(dqp);
512
513 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
514 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
515 0, 0, XFS_DEFAULT_LOG_COUNT);
516 if (error) {
517 xfs_trans_cancel(tp, 0);
518 goto out_rele;
519 }
520
521 xfs_dqlock(dqp);
517 xfs_trans_dqjoin(tp, dqp); 522 xfs_trans_dqjoin(tp, dqp);
518 ddq = &dqp->q_core; 523 ddq = &dqp->q_core;
519 524
@@ -621,9 +626,10 @@ xfs_qm_scall_setqlim(
621 xfs_trans_log_dquot(tp, dqp); 626 xfs_trans_log_dquot(tp, dqp);
622 627
623 error = xfs_trans_commit(tp, 0); 628 error = xfs_trans_commit(tp, 0);
624 xfs_qm_dqrele(dqp);
625 629
626 out_unlock: 630out_rele:
631 xfs_qm_dqrele(dqp);
632out_unlock:
627 mutex_unlock(&q->qi_quotaofflock); 633 mutex_unlock(&q->qi_quotaofflock);
628 return error; 634 return error;
629} 635}
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5f234389327c..195a403e1522 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -56,16 +56,9 @@ xfs_symlink_blocks(
56 struct xfs_mount *mp, 56 struct xfs_mount *mp,
57 int pathlen) 57 int pathlen)
58{ 58{
59 int fsblocks = 0; 59 int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
60 int len = pathlen;
61 60
62 do { 61 return (pathlen + buflen - 1) / buflen;
63 fsblocks++;
64 len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
65 } while (len > 0);
66
67 ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
68 return fsblocks;
69} 62}
70 63
71static int 64static int
@@ -405,7 +398,7 @@ xfs_symlink(
405 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version)) 398 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
406 fs_blocks = 0; 399 fs_blocks = 0;
407 else 400 else
408 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 401 fs_blocks = xfs_symlink_blocks(mp, pathlen);
409 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 402 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
410 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 403 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
411 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 404 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
@@ -512,7 +505,7 @@ xfs_symlink(
512 cur_chunk = target_path; 505 cur_chunk = target_path;
513 offset = 0; 506 offset = 0;
514 for (n = 0; n < nmaps; n++) { 507 for (n = 0; n < nmaps; n++) {
515 char *buf; 508 char *buf;
516 509
517 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 510 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
518 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 511 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
@@ -525,9 +518,7 @@ xfs_symlink(
525 bp->b_ops = &xfs_symlink_buf_ops; 518 bp->b_ops = &xfs_symlink_buf_ops;
526 519
527 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); 520 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
528 if (pathlen < byte_cnt) { 521 byte_cnt = min(byte_cnt, pathlen);
529 byte_cnt = pathlen;
530 }
531 522
532 buf = bp->b_addr; 523 buf = bp->b_addr;
533 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, 524 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
@@ -542,6 +533,7 @@ xfs_symlink(
542 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - 533 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
543 (char *)bp->b_addr); 534 (char *)bp->b_addr);
544 } 535 }
536 ASSERT(pathlen == 0);
545 } 537 }
546 538
547 /* 539 /*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1501f4fa51a6..0176bb21f09a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1453,7 +1453,7 @@ xfs_free_file_space(
1453 xfs_mount_t *mp; 1453 xfs_mount_t *mp;
1454 int nimap; 1454 int nimap;
1455 uint resblks; 1455 uint resblks;
1456 uint rounding; 1456 xfs_off_t rounding;
1457 int rt; 1457 int rt;
1458 xfs_fileoff_t startoffset_fsb; 1458 xfs_fileoff_t startoffset_fsb;
1459 xfs_trans_t *tp; 1459 xfs_trans_t *tp;
@@ -1482,7 +1482,7 @@ xfs_free_file_space(
1482 inode_dio_wait(VFS_I(ip)); 1482 inode_dio_wait(VFS_I(ip));
1483 } 1483 }
1484 1484
1485 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1485 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1486 ioffset = offset & ~(rounding - 1); 1486 ioffset = offset & ~(rounding - 1);
1487 error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1487 error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1488 ioffset, -1); 1488 ioffset, -1);