aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/expire.c36
-rw-r--r--fs/bio.c13
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c12
-rw-r--r--fs/btrfs/delayed-ref.c163
-rw-r--r--fs/btrfs/delayed-ref.h4
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c123
-rw-r--r--fs/btrfs/extent_io.c17
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/inode.c329
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c12
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/super.c19
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.c37
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c66
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/inode.c15
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/cifs/cifssmb.c11
-rw-r--r--fs/cifs/dir.c9
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/smb2misc.c16
-rw-r--r--fs/cifs/smb2pdu.h10
-rw-r--r--fs/cifs/transport.c9
-rw-r--r--fs/compat.c10
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/ecryptfs/file.c10
-rw-r--r--fs/ecryptfs/inode.c5
-rw-r--r--fs/ecryptfs/main.c1
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exofs/inode.c27
-rw-r--r--fs/exofs/ore.c14
-rw-r--r--fs/exofs/super.c11
-rw-r--r--fs/ext3/inode.c25
-rw-r--r--fs/ext3/super.c11
-rw-r--r--fs/ext4/balloc.c62
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c10
-rw-r--r--fs/ext4/super.c17
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c15
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c44
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/jbd/journal.c9
-rw-r--r--fs/jbd2/journal.c7
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/inode.c18
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c1
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/namei.c10
-rw-r--r--fs/nfs/Makefile18
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/idmap.c62
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4proc.c127
-rw-r--r--fs/nfs/nfs4super.c15
-rw-r--r--fs/nfs/nfs4xdr.c41
-rw-r--r--fs/nfs/objlayout/objio_osd.c55
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/super.c41
-rw-r--r--fs/nfs/write.c15
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/open.c9
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/bitmap.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c40
-rw-r--r--fs/ubifs/debug.h2
-rw-r--r--fs/ubifs/file.c10
-rw-r--r--fs/ubifs/lpt.c5
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/super.c5
-rw-r--r--fs/udf/file.c35
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_discard.c6
-rw-r--r--fs/xfs/xfs_ialloc.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
110 files changed, 1104 insertions, 889 deletions
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1feb68ecef95..842d00048a65 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -94,25 +94,21 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
94{ 94{
95 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); 95 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
96 struct list_head *next; 96 struct list_head *next;
97 struct dentry *p, *q; 97 struct dentry *q;
98 98
99 spin_lock(&sbi->lookup_lock); 99 spin_lock(&sbi->lookup_lock);
100 spin_lock(&root->d_lock);
100 101
101 if (prev == NULL) { 102 if (prev)
102 spin_lock(&root->d_lock); 103 next = prev->d_u.d_child.next;
104 else {
103 prev = dget_dlock(root); 105 prev = dget_dlock(root);
104 next = prev->d_subdirs.next; 106 next = prev->d_subdirs.next;
105 p = prev;
106 goto start;
107 } 107 }
108 108
109 p = prev; 109cont:
110 spin_lock(&p->d_lock);
111again:
112 next = p->d_u.d_child.next;
113start:
114 if (next == &root->d_subdirs) { 110 if (next == &root->d_subdirs) {
115 spin_unlock(&p->d_lock); 111 spin_unlock(&root->d_lock);
116 spin_unlock(&sbi->lookup_lock); 112 spin_unlock(&sbi->lookup_lock);
117 dput(prev); 113 dput(prev);
118 return NULL; 114 return NULL;
@@ -121,16 +117,15 @@ start:
121 q = list_entry(next, struct dentry, d_u.d_child); 117 q = list_entry(next, struct dentry, d_u.d_child);
122 118
123 spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); 119 spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
124 /* Negative dentry - try next */ 120 /* Already gone or negative dentry (under construction) - try next */
125 if (!simple_positive(q)) { 121 if (q->d_count == 0 || !simple_positive(q)) {
126 spin_unlock(&p->d_lock); 122 spin_unlock(&q->d_lock);
127 lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_); 123 next = q->d_u.d_child.next;
128 p = q; 124 goto cont;
129 goto again;
130 } 125 }
131 dget_dlock(q); 126 dget_dlock(q);
132 spin_unlock(&q->d_lock); 127 spin_unlock(&q->d_lock);
133 spin_unlock(&p->d_lock); 128 spin_unlock(&root->d_lock);
134 spin_unlock(&sbi->lookup_lock); 129 spin_unlock(&sbi->lookup_lock);
135 130
136 dput(prev); 131 dput(prev);
@@ -404,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
404 DPRINTK("checking mountpoint %p %.*s", 399 DPRINTK("checking mountpoint %p %.*s",
405 dentry, (int)dentry->d_name.len, dentry->d_name.name); 400 dentry, (int)dentry->d_name.len, dentry->d_name.name);
406 401
407 /* Path walk currently on this dentry? */
408 ino_count = atomic_read(&ino->count) + 2;
409 if (dentry->d_count > ino_count)
410 goto next;
411
412 /* Can we umount this guy */ 402 /* Can we umount this guy */
413 if (autofs4_mount_busy(mnt, dentry)) 403 if (autofs4_mount_busy(mnt, dentry))
414 goto next; 404 goto next;
diff --git a/fs/bio.c b/fs/bio.c
index 73922abba832..71072ab99128 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{ 73{
74 unsigned int sz = sizeof(struct bio) + extra_size; 74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL; 75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab; 76 struct bio_slab *bslab, *new_bio_slabs;
77 unsigned int i, entry = -1; 77 unsigned int i, entry = -1;
78 78
79 mutex_lock(&bio_slab_lock); 79 mutex_lock(&bio_slab_lock);
@@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
97 97
98 if (bio_slab_nr == bio_slab_max && entry == -1) { 98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1; 99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs, 100 new_bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab), 101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL); 102 GFP_KERNEL);
103 if (!bio_slabs) 103 if (!new_bio_slabs)
104 goto out_unlock; 104 goto out_unlock;
105 bio_slabs = new_bio_slabs;
105 } 106 }
106 if (entry == -1) 107 if (entry == -1)
107 entry = bio_slab_nr++; 108 entry = bio_slab_nr++;
@@ -1312,7 +1313,7 @@ EXPORT_SYMBOL(bio_copy_kern);
1312 * Note that this code is very hard to test under normal circumstances because 1313 * Note that this code is very hard to test under normal circumstances because
1313 * direct-io pins the pages with get_user_pages(). This makes 1314 * direct-io pins the pages with get_user_pages(). This makes
1314 * is_page_cache_freeable return false, and the VM will not clean the pages. 1315 * is_page_cache_freeable return false, and the VM will not clean the pages.
1315 * But other code (eg, pdflush) could clean the pages if they are mapped 1316 * But other code (eg, flusher threads) could clean the pages if they are mapped
1316 * pagecache. 1317 * pagecache.
1317 * 1318 *
1318 * Simply disabling the call to bio_set_pages_dirty() is a good way to test the 1319 * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e519195d45b..38e721b35d45 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1578 unsigned long nr_segs, loff_t pos) 1578 unsigned long nr_segs, loff_t pos)
1579{ 1579{
1580 struct file *file = iocb->ki_filp; 1580 struct file *file = iocb->ki_filp;
1581 struct blk_plug plug;
1581 ssize_t ret; 1582 ssize_t ret;
1582 1583
1583 BUG_ON(iocb->ki_pos != pos); 1584 BUG_ON(iocb->ki_pos != pos);
1584 1585
1586 blk_start_plug(&plug);
1585 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1587 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1586 if (ret > 0 || ret == -EIOCBQUEUED) { 1588 if (ret > 0 || ret == -EIOCBQUEUED) {
1587 ssize_t err; 1589 ssize_t err;
@@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1590 if (err < 0 && ret > 0) 1592 if (err < 0 && ret > 0)
1591 ret = err; 1593 ret = err;
1592 } 1594 }
1595 blk_finish_plug(&plug);
1593 return ret; 1596 return ret;
1594} 1597}
1595EXPORT_SYMBOL_GPL(blkdev_aio_write); 1598EXPORT_SYMBOL_GPL(blkdev_aio_write);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b2a845..ff6475f409d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1438 ret = extent_from_logical(fs_info, logical, path, 1438 ret = extent_from_logical(fs_info, logical, path,
1439 &found_key); 1439 &found_key);
1440 btrfs_release_path(path); 1440 btrfs_release_path(path);
1441 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1442 ret = -EINVAL;
1443 if (ret < 0) 1441 if (ret < 0)
1444 return ret; 1442 return ret;
1443 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1444 return -EINVAL;
1445 1445
1446 extent_item_pos = logical - found_key.objectid; 1446 extent_item_pos = logical - found_key.objectid;
1447 ret = iterate_extent_inodes(fs_info, found_key.objectid, 1447 ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48dab78..43d1c5a3a030 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
818 btrfs_compress_op[idx]->free_workspace(workspace); 818 btrfs_compress_op[idx]->free_workspace(workspace);
819 atomic_dec(alloc_workspace); 819 atomic_dec(alloc_workspace);
820wake: 820wake:
821 smp_mb();
821 if (waitqueue_active(workspace_wait)) 822 if (waitqueue_active(workspace_wait))
822 wake_up(workspace_wait); 823 wake_up(workspace_wait);
823} 824}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271ff..6d183f60d63a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
421 spin_unlock(&fs_info->tree_mod_seq_lock); 421 spin_unlock(&fs_info->tree_mod_seq_lock);
422 422
423 /* 423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
428
429 /*
430 * anything that's lower than the lowest existing (read: blocked) 424 * anything that's lower than the lowest existing (read: blocked)
431 * sequence number can be removed from the tree. 425 * sequence number can be removed from the tree.
432 */ 426 */
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
631 u32 nritems; 625 u32 nritems;
632 int ret; 626 int ret;
633 627
628 if (btrfs_header_level(eb) == 0)
629 return;
630
634 nritems = btrfs_header_nritems(eb); 631 nritems = btrfs_header_nritems(eb);
635 for (i = nritems - 1; i >= 0; i--) { 632 for (i = nritems - 1; i >= 0; i--) {
636 ret = tree_mod_log_insert_key_locked(fs_info, eb, i, 633 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807227ad..0d195b507660 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
1252 atomic_t tree_mod_seq; 1252 atomic_t tree_mod_seq;
1253 struct list_head tree_mod_seq_list; 1253 struct list_head tree_mod_seq_list;
1254 struct seq_list tree_mod_seq_elem; 1254 struct seq_list tree_mod_seq_elem;
1255 wait_queue_head_t tree_mod_seq_wait;
1256 1255
1257 /* this protects tree_mod_log */ 1256 /* this protects tree_mod_log */
1258 rwlock_t tree_mod_log_lock; 1257 rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
3192int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3191int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3193 struct bio *bio, u32 *dst); 3192 struct bio *bio, u32 *dst);
3194int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3193int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3195 struct bio *bio, u64 logical_offset, u32 *dst); 3194 struct bio *bio, u64 logical_offset);
3196int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3195int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3197 struct btrfs_root *root, 3196 struct btrfs_root *root,
3198 u64 objectid, u64 pos, 3197 u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8ceab..07d5eeb1e6f1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
512 512
513 rb_erase(&delayed_item->rb_node, root); 513 rb_erase(&delayed_item->rb_node, root);
514 delayed_item->delayed_node->count--; 514 delayed_item->delayed_node->count--;
515 atomic_dec(&delayed_root->items); 515 if (atomic_dec_return(&delayed_root->items) <
516 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && 516 BTRFS_DELAYED_BACKGROUND &&
517 waitqueue_active(&delayed_root->wait)) 517 waitqueue_active(&delayed_root->wait))
518 wake_up(&delayed_root->wait); 518 wake_up(&delayed_root->wait);
519} 519}
@@ -1028,9 +1028,10 @@ do_again:
1028 btrfs_release_delayed_item(prev); 1028 btrfs_release_delayed_item(prev);
1029 ret = 0; 1029 ret = 0;
1030 btrfs_release_path(path); 1030 btrfs_release_path(path);
1031 if (curr) 1031 if (curr) {
1032 mutex_unlock(&node->mutex);
1032 goto do_again; 1033 goto do_again;
1033 else 1034 } else
1034 goto delete_fail; 1035 goto delete_fail;
1035 } 1036 }
1036 1037
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1055 delayed_node->count--; 1056 delayed_node->count--;
1056 1057
1057 delayed_root = delayed_node->root->fs_info->delayed_root; 1058 delayed_root = delayed_node->root->fs_info->delayed_root;
1058 atomic_dec(&delayed_root->items); 1059 if (atomic_dec_return(&delayed_root->items) <
1059 if (atomic_read(&delayed_root->items) <
1060 BTRFS_DELAYED_BACKGROUND && 1060 BTRFS_DELAYED_BACKGROUND &&
1061 waitqueue_active(&delayed_root->wait)) 1061 waitqueue_active(&delayed_root->wait))
1062 wake_up(&delayed_root->wait); 1062 wake_up(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01bb..ae9411773397 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
39 struct btrfs_delayed_tree_ref *ref1) 39 struct btrfs_delayed_tree_ref *ref1)
40{ 40{
41 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 41 if (ref1->root < ref2->root)
42 if (ref1->root < ref2->root) 42 return -1;
43 return -1; 43 if (ref1->root > ref2->root)
44 if (ref1->root > ref2->root) 44 return 1;
45 return 1; 45 if (ref1->parent < ref2->parent)
46 } else { 46 return -1;
47 if (ref1->parent < ref2->parent) 47 if (ref1->parent > ref2->parent)
48 return -1; 48 return 1;
49 if (ref1->parent > ref2->parent)
50 return 1;
51 }
52 return 0; 49 return 0;
53} 50}
54 51
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
85 * type of the delayed backrefs and content of delayed backrefs. 82 * type of the delayed backrefs and content of delayed backrefs.
86 */ 83 */
87static int comp_entry(struct btrfs_delayed_ref_node *ref2, 84static int comp_entry(struct btrfs_delayed_ref_node *ref2,
88 struct btrfs_delayed_ref_node *ref1) 85 struct btrfs_delayed_ref_node *ref1,
86 bool compare_seq)
89{ 87{
90 if (ref1->bytenr < ref2->bytenr) 88 if (ref1->bytenr < ref2->bytenr)
91 return -1; 89 return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
102 if (ref1->type > ref2->type) 100 if (ref1->type > ref2->type)
103 return 1; 101 return 1;
104 /* merging of sequenced refs is not allowed */ 102 /* merging of sequenced refs is not allowed */
105 if (ref1->seq < ref2->seq) 103 if (compare_seq) {
106 return -1; 104 if (ref1->seq < ref2->seq)
107 if (ref1->seq > ref2->seq) 105 return -1;
108 return 1; 106 if (ref1->seq > ref2->seq)
107 return 1;
108 }
109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), 111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
140 rb_node); 140 rb_node);
141 141
142 cmp = comp_entry(entry, ins); 142 cmp = comp_entry(entry, ins, 1);
143 if (cmp < 0) 143 if (cmp < 0)
144 p = &(*p)->rb_left; 144 p = &(*p)->rb_left;
145 else if (cmp > 0) 145 else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
233 return 0; 233 return 0;
234} 234}
235 235
236static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
237 struct btrfs_delayed_ref_root *delayed_refs,
238 struct btrfs_delayed_ref_node *ref)
239{
240 rb_erase(&ref->rb_node, &delayed_refs->root);
241 ref->in_tree = 0;
242 btrfs_put_delayed_ref(ref);
243 delayed_refs->num_entries--;
244 if (trans->delayed_ref_updates)
245 trans->delayed_ref_updates--;
246}
247
248static int merge_ref(struct btrfs_trans_handle *trans,
249 struct btrfs_delayed_ref_root *delayed_refs,
250 struct btrfs_delayed_ref_node *ref, u64 seq)
251{
252 struct rb_node *node;
253 int merged = 0;
254 int mod = 0;
255 int done = 0;
256
257 node = rb_prev(&ref->rb_node);
258 while (node) {
259 struct btrfs_delayed_ref_node *next;
260
261 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
262 node = rb_prev(node);
263 if (next->bytenr != ref->bytenr)
264 break;
265 if (seq && next->seq >= seq)
266 break;
267 if (comp_entry(ref, next, 0))
268 continue;
269
270 if (ref->action == next->action) {
271 mod = next->ref_mod;
272 } else {
273 if (ref->ref_mod < next->ref_mod) {
274 struct btrfs_delayed_ref_node *tmp;
275
276 tmp = ref;
277 ref = next;
278 next = tmp;
279 done = 1;
280 }
281 mod = -next->ref_mod;
282 }
283
284 merged++;
285 drop_delayed_ref(trans, delayed_refs, next);
286 ref->ref_mod += mod;
287 if (ref->ref_mod == 0) {
288 drop_delayed_ref(trans, delayed_refs, ref);
289 break;
290 } else {
291 /*
292 * You can't have multiples of the same ref on a tree
293 * block.
294 */
295 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
296 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
297 }
298
299 if (done)
300 break;
301 node = rb_prev(&ref->rb_node);
302 }
303
304 return merged;
305}
306
307void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
308 struct btrfs_fs_info *fs_info,
309 struct btrfs_delayed_ref_root *delayed_refs,
310 struct btrfs_delayed_ref_head *head)
311{
312 struct rb_node *node;
313 u64 seq = 0;
314
315 spin_lock(&fs_info->tree_mod_seq_lock);
316 if (!list_empty(&fs_info->tree_mod_seq_list)) {
317 struct seq_list *elem;
318
319 elem = list_first_entry(&fs_info->tree_mod_seq_list,
320 struct seq_list, list);
321 seq = elem->seq;
322 }
323 spin_unlock(&fs_info->tree_mod_seq_lock);
324
325 node = rb_prev(&head->node.rb_node);
326 while (node) {
327 struct btrfs_delayed_ref_node *ref;
328
329 ref = rb_entry(node, struct btrfs_delayed_ref_node,
330 rb_node);
331 if (ref->bytenr != head->node.bytenr)
332 break;
333
334 /* We can't merge refs that are outside of our seq count */
335 if (seq && ref->seq >= seq)
336 break;
337 if (merge_ref(trans, delayed_refs, ref, seq))
338 node = rb_prev(&head->node.rb_node);
339 else
340 node = rb_prev(node);
341 }
342}
343
236int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 344int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
237 struct btrfs_delayed_ref_root *delayed_refs, 345 struct btrfs_delayed_ref_root *delayed_refs,
238 u64 seq) 346 u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
336 * every changing the extent allocation tree. 444 * every changing the extent allocation tree.
337 */ 445 */
338 existing->ref_mod--; 446 existing->ref_mod--;
339 if (existing->ref_mod == 0) { 447 if (existing->ref_mod == 0)
340 rb_erase(&existing->rb_node, 448 drop_delayed_ref(trans, delayed_refs, existing);
341 &delayed_refs->root); 449 else
342 existing->in_tree = 0;
343 btrfs_put_delayed_ref(existing);
344 delayed_refs->num_entries--;
345 if (trans->delayed_ref_updates)
346 trans->delayed_ref_updates--;
347 } else {
348 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 450 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
349 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 451 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
350 }
351 } else { 452 } else {
352 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 453 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
353 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 454 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 763 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
663 num_bytes, parent, ref_root, level, action, 764 num_bytes, parent, ref_root, level, action,
664 for_cow); 765 for_cow);
665 if (!need_ref_seq(for_cow, ref_root) &&
666 waitqueue_active(&fs_info->tree_mod_seq_wait))
667 wake_up(&fs_info->tree_mod_seq_wait);
668 spin_unlock(&delayed_refs->lock); 766 spin_unlock(&delayed_refs->lock);
669 if (need_ref_seq(for_cow, ref_root)) 767 if (need_ref_seq(for_cow, ref_root))
670 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 768 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 811 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
714 num_bytes, parent, ref_root, owner, offset, 812 num_bytes, parent, ref_root, owner, offset,
715 action, for_cow); 813 action, for_cow);
716 if (!need_ref_seq(for_cow, ref_root) &&
717 waitqueue_active(&fs_info->tree_mod_seq_wait))
718 wake_up(&fs_info->tree_mod_seq_wait);
719 spin_unlock(&delayed_refs->lock); 814 spin_unlock(&delayed_refs->lock);
720 if (need_ref_seq(for_cow, ref_root)) 815 if (need_ref_seq(for_cow, ref_root))
721 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 816 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 839 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 840 extent_op->is_data);
746 841
747 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
748 wake_up(&fs_info->tree_mod_seq_wait);
749 spin_unlock(&delayed_refs->lock); 842 spin_unlock(&delayed_refs->lock);
750 return 0; 843 return 0;
751} 844}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c366b6..ab5300595847 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
167 struct btrfs_trans_handle *trans, 167 struct btrfs_trans_handle *trans,
168 u64 bytenr, u64 num_bytes, 168 u64 bytenr, u64 num_bytes,
169 struct btrfs_delayed_extent_op *extent_op); 169 struct btrfs_delayed_extent_op *extent_op);
170void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
171 struct btrfs_fs_info *fs_info,
172 struct btrfs_delayed_ref_root *delayed_refs,
173 struct btrfs_delayed_ref_head *head);
170 174
171struct btrfs_delayed_ref_head * 175struct btrfs_delayed_ref_head *
172btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 176btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0cafd6e25..22e98e04c2ea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
377 ret = read_extent_buffer_pages(io_tree, eb, start, 377 ret = read_extent_buffer_pages(io_tree, eb, start,
378 WAIT_COMPLETE, 378 WAIT_COMPLETE,
379 btree_get_extent, mirror_num); 379 btree_get_extent, mirror_num);
380 if (!ret && !verify_parent_transid(io_tree, eb, 380 if (!ret) {
381 if (!verify_parent_transid(io_tree, eb,
381 parent_transid, 0)) 382 parent_transid, 0))
382 break; 383 break;
384 else
385 ret = -EIO;
386 }
383 387
384 /* 388 /*
385 * This buffer's crc is fine, but its contents are corrupted, so 389 * This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
754 limit = btrfs_async_submit_limit(fs_info); 758 limit = btrfs_async_submit_limit(fs_info);
755 limit = limit * 2 / 3; 759 limit = limit * 2 / 3;
756 760
757 atomic_dec(&fs_info->nr_async_submits); 761 if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
758
759 if (atomic_read(&fs_info->nr_async_submits) < limit &&
760 waitqueue_active(&fs_info->async_submit_wait)) 762 waitqueue_active(&fs_info->async_submit_wait))
761 wake_up(&fs_info->async_submit_wait); 763 wake_up(&fs_info->async_submit_wait);
762 764
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
2032 fs_info->free_chunk_space = 0; 2034 fs_info->free_chunk_space = 0;
2033 fs_info->tree_mod_log = RB_ROOT; 2035 fs_info->tree_mod_log = RB_ROOT;
2034 2036
2035 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
2036
2037 /* readahead state */ 2037 /* readahead state */
2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2039 spin_lock_init(&fs_info->reada_lock); 2039 spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
2528 goto fail_trans_kthread; 2528 goto fail_trans_kthread;
2529 2529
2530 /* do not make disk changes in broken FS */ 2530 /* do not make disk changes in broken FS */
2531 if (btrfs_super_log_root(disk_super) != 0 && 2531 if (btrfs_super_log_root(disk_super) != 0) {
2532 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
2533 u64 bytenr = btrfs_super_log_root(disk_super); 2532 u64 bytenr = btrfs_super_log_root(disk_super);
2534 2533
2535 if (fs_devices->rw_devices == 0) { 2534 if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
3189 /* clear out the rbtree of defraggable inodes */ 3188 /* clear out the rbtree of defraggable inodes */
3190 btrfs_run_defrag_inodes(fs_info); 3189 btrfs_run_defrag_inodes(fs_info);
3191 3190
3192 /*
3193 * Here come 2 situations when btrfs is broken to flip readonly:
3194 *
3195 * 1. when btrfs flips readonly somewhere else before
3196 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
3197 * and btrfs will skip to write sb directly to keep
3198 * ERROR state on disk.
3199 *
3200 * 2. when btrfs flips readonly just in btrfs_commit_super,
3201 * and in such case, btrfs cannot write sb via btrfs_commit_super,
3202 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
3203 * btrfs will cleanup all FS resources first and write sb then.
3204 */
3205 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3191 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3206 ret = btrfs_commit_super(root); 3192 ret = btrfs_commit_super(root);
3207 if (ret) 3193 if (ret)
3208 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3194 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3209 } 3195 }
3210 3196
3211 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3197 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
3212 ret = btrfs_error_commit_super(root); 3198 btrfs_error_commit_super(root);
3213 if (ret)
3214 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3215 }
3216 3199
3217 btrfs_put_block_group_cache(fs_info); 3200 btrfs_put_block_group_cache(fs_info);
3218 3201
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3434 if (read_only) 3417 if (read_only)
3435 return 0; 3418 return 0;
3436 3419
3437 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3438 printk(KERN_WARNING "warning: mount fs with errors, "
3439 "running btrfsck is recommended\n");
3440 }
3441
3442 return 0; 3420 return 0;
3443} 3421}
3444 3422
3445int btrfs_error_commit_super(struct btrfs_root *root) 3423void btrfs_error_commit_super(struct btrfs_root *root)
3446{ 3424{
3447 int ret;
3448
3449 mutex_lock(&root->fs_info->cleaner_mutex); 3425 mutex_lock(&root->fs_info->cleaner_mutex);
3450 btrfs_run_delayed_iputs(root); 3426 btrfs_run_delayed_iputs(root);
3451 mutex_unlock(&root->fs_info->cleaner_mutex); 3427 mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
3455 3431
3456 /* cleanup FS via transaction */ 3432 /* cleanup FS via transaction */
3457 btrfs_cleanup_transaction(root); 3433 btrfs_cleanup_transaction(root);
3458
3459 ret = write_ctree_super(NULL, root, 0);
3460
3461 return ret;
3462} 3434}
3463 3435
3464static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3436static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3782 /* FIXME: cleanup wait for commit */ 3754 /* FIXME: cleanup wait for commit */
3783 t->in_commit = 1; 3755 t->in_commit = 1;
3784 t->blocked = 1; 3756 t->blocked = 1;
3757 smp_mb();
3785 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3758 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3786 wake_up(&root->fs_info->transaction_blocked_wait); 3759 wake_up(&root->fs_info->transaction_blocked_wait);
3787 3760
3788 t->blocked = 0; 3761 t->blocked = 0;
3762 smp_mb();
3789 if (waitqueue_active(&root->fs_info->transaction_wait)) 3763 if (waitqueue_active(&root->fs_info->transaction_wait))
3790 wake_up(&root->fs_info->transaction_wait); 3764 wake_up(&root->fs_info->transaction_wait);
3791 3765
3792 t->commit_done = 1; 3766 t->commit_done = 1;
3767 smp_mb();
3793 if (waitqueue_active(&t->commit_wait)) 3768 if (waitqueue_active(&t->commit_wait))
3794 wake_up(&t->commit_wait); 3769 wake_up(&t->commit_wait);
3795 3770
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147eea239..c5b00a735fef 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, int max_mirrors); 54 struct btrfs_root *root, int max_mirrors);
55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
56int btrfs_commit_super(struct btrfs_root *root); 56int btrfs_commit_super(struct btrfs_root *root);
57int btrfs_error_commit_super(struct btrfs_root *root); 57void btrfs_error_commit_super(struct btrfs_root *root);
58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
59 u64 bytenr, u32 blocksize); 59 u64 bytenr, u32 blocksize);
60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153b7c47..ba58024d40d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * We need to try and merge add/drops of the same ref since we
2256 * can run into issues with relocate dropping the implicit ref
2257 * and then it being added back again before the drop can
2258 * finish. If we merged anything we need to re-loop so we can
2259 * get a good ref.
2260 */
2261 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2262 locked_ref);
2263
2264 /*
2255 * locked_ref is the head node, so we have to go one 2265 * locked_ref is the head node, so we have to go one
2256 * node back for any delayed ref updates 2266 * node back for any delayed ref updates
2257 */ 2267 */
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2318 ref->in_tree = 0; 2328 ref->in_tree = 0;
2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2329 rb_erase(&ref->rb_node, &delayed_refs->root);
2320 delayed_refs->num_entries--; 2330 delayed_refs->num_entries--;
2321 /* 2331 if (locked_ref) {
2322 * we modified num_entries, but as we're currently running 2332 /*
2323 * delayed refs, skip 2333 * when we play the delayed ref, also correct the
2324 * wake_up(&delayed_refs->seq_wait); 2334 * ref_mod on head
2325 * here. 2335 */
2326 */ 2336 switch (ref->action) {
2337 case BTRFS_ADD_DELAYED_REF:
2338 case BTRFS_ADD_DELAYED_EXTENT:
2339 locked_ref->node.ref_mod -= ref->ref_mod;
2340 break;
2341 case BTRFS_DROP_DELAYED_REF:
2342 locked_ref->node.ref_mod += ref->ref_mod;
2343 break;
2344 default:
2345 WARN_ON(1);
2346 }
2347 }
2327 spin_unlock(&delayed_refs->lock); 2348 spin_unlock(&delayed_refs->lock);
2328 2349
2329 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2350 ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
2350 return count; 2371 return count;
2351} 2372}
2352 2373
2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2355 unsigned long num_refs,
2356 struct list_head *first_seq)
2357{
2358 spin_unlock(&delayed_refs->lock);
2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2360 num_refs, first_seq);
2361 wait_event(fs_info->tree_mod_seq_wait,
2362 num_refs != delayed_refs->num_entries ||
2363 fs_info->tree_mod_seq_list.next != first_seq);
2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2366 spin_lock(&delayed_refs->lock);
2367}
2368
2369#ifdef SCRAMBLE_DELAYED_REFS 2374#ifdef SCRAMBLE_DELAYED_REFS
2370/* 2375/*
2371 * Normally delayed refs get processed in ascending bytenr order. This 2376 * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2460 struct btrfs_delayed_ref_root *delayed_refs; 2465 struct btrfs_delayed_ref_root *delayed_refs;
2461 struct btrfs_delayed_ref_node *ref; 2466 struct btrfs_delayed_ref_node *ref;
2462 struct list_head cluster; 2467 struct list_head cluster;
2463 struct list_head *first_seq = NULL;
2464 int ret; 2468 int ret;
2465 u64 delayed_start; 2469 u64 delayed_start;
2466 int run_all = count == (unsigned long)-1; 2470 int run_all = count == (unsigned long)-1;
2467 int run_most = 0; 2471 int run_most = 0;
2468 unsigned long num_refs = 0; 2472 int loops;
2469 int consider_waiting;
2470 2473
2471 /* We'll clean this up in btrfs_cleanup_transaction */ 2474 /* We'll clean this up in btrfs_cleanup_transaction */
2472 if (trans->aborted) 2475 if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2484 delayed_refs = &trans->transaction->delayed_refs; 2487 delayed_refs = &trans->transaction->delayed_refs;
2485 INIT_LIST_HEAD(&cluster); 2488 INIT_LIST_HEAD(&cluster);
2486again: 2489again:
2487 consider_waiting = 0; 2490 loops = 0;
2488 spin_lock(&delayed_refs->lock); 2491 spin_lock(&delayed_refs->lock);
2489 2492
2490#ifdef SCRAMBLE_DELAYED_REFS 2493#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
2512 if (ret) 2515 if (ret)
2513 break; 2516 break;
2514 2517
2515 if (delayed_start >= delayed_refs->run_delayed_start) {
2516 if (consider_waiting == 0) {
2517 /*
2518 * btrfs_find_ref_cluster looped. let's do one
2519 * more cycle. if we don't run any delayed ref
2520 * during that cycle (because we can't because
2521 * all of them are blocked) and if the number of
2522 * refs doesn't change, we avoid busy waiting.
2523 */
2524 consider_waiting = 1;
2525 num_refs = delayed_refs->num_entries;
2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2527 } else {
2528 wait_for_more_refs(root->fs_info, delayed_refs,
2529 num_refs, first_seq);
2530 /*
2531 * after waiting, things have changed. we
2532 * dropped the lock and someone else might have
2533 * run some refs, built new clusters and so on.
2534 * therefore, we restart staleness detection.
2535 */
2536 consider_waiting = 0;
2537 }
2538 }
2539
2540 ret = run_clustered_refs(trans, root, &cluster); 2518 ret = run_clustered_refs(trans, root, &cluster);
2541 if (ret < 0) { 2519 if (ret < 0) {
2542 spin_unlock(&delayed_refs->lock); 2520 spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
2549 if (count == 0) 2527 if (count == 0)
2550 break; 2528 break;
2551 2529
2552 if (ret || delayed_refs->run_delayed_start == 0) { 2530 if (delayed_start >= delayed_refs->run_delayed_start) {
2531 if (loops == 0) {
2532 /*
2533 * btrfs_find_ref_cluster looped. let's do one
2534 * more cycle. if we don't run any delayed ref
2535 * during that cycle (because we can't because
2536 * all of them are blocked), bail out.
2537 */
2538 loops = 1;
2539 } else {
2540 /*
2541 * no runnable refs left, stop trying
2542 */
2543 BUG_ON(run_all);
2544 break;
2545 }
2546 }
2547 if (ret) {
2553 /* refs were run, let's reset staleness detection */ 2548 /* refs were run, let's reset staleness detection */
2554 consider_waiting = 0; 2549 loops = 0;
2555 } 2550 }
2556 } 2551 }
2557 2552
@@ -3007,17 +3002,16 @@ again:
3007 } 3002 }
3008 spin_unlock(&block_group->lock); 3003 spin_unlock(&block_group->lock);
3009 3004
3010 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 3005 /*
3006 * Try to preallocate enough space based on how big the block group is.
3007 * Keep in mind this has to include any pinned space which could end up
3008 * taking up quite a bit since it's not folded into the other space
3009 * cache.
3010 */
3011 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
3011 if (!num_pages) 3012 if (!num_pages)
3012 num_pages = 1; 3013 num_pages = 1;
3013 3014
3014 /*
3015 * Just to make absolutely sure we have enough space, we're going to
3016 * preallocate 12 pages worth of space for each block group. In
3017 * practice we ought to use at most 8, but we need extra space so we can
3018 * add our header and have a terminator between the extents and the
3019 * bitmaps.
3020 */
3021 num_pages *= 16; 3015 num_pages *= 16;
3022 num_pages *= PAGE_CACHE_SIZE; 3016 num_pages *= PAGE_CACHE_SIZE;
3023 3017
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4571 if (root->fs_info->quota_enabled) { 4565 if (root->fs_info->quota_enabled) {
4572 ret = btrfs_qgroup_reserve(root, num_bytes + 4566 ret = btrfs_qgroup_reserve(root, num_bytes +
4573 nr_extents * root->leafsize); 4567 nr_extents * root->leafsize);
4574 if (ret) 4568 if (ret) {
4569 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4575 return ret; 4570 return ret;
4571 }
4576 } 4572 }
4577 4573
4578 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4574 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5294 rb_erase(&head->node.rb_node, &delayed_refs->root); 5290 rb_erase(&head->node.rb_node, &delayed_refs->root);
5295 5291
5296 delayed_refs->num_entries--; 5292 delayed_refs->num_entries--;
5297 smp_mb();
5298 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5299 wake_up(&root->fs_info->tree_mod_seq_wait);
5300 5293
5301 /* 5294 /*
5302 * we don't take a ref on the node because we're removing it from the 5295 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb4ac82..4c878476bb91 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2331 ret = tree->ops->readpage_end_io_hook(page, start, end, 2331 ret = tree->ops->readpage_end_io_hook(page, start, end,
2332 state, mirror); 2332 state, mirror);
2333 if (ret) { 2333 if (ret)
2334 /* no IO indicated but software detected errors
2335 * in the block, either checksum errors or
2336 * issues with the contents */
2337 struct btrfs_root *root =
2338 BTRFS_I(page->mapping->host)->root;
2339 struct btrfs_device *device;
2340
2341 uptodate = 0; 2334 uptodate = 0;
2342 device = btrfs_find_device_for_logical( 2335 else
2343 root, start, mirror);
2344 if (device)
2345 btrfs_dev_stat_inc_and_print(device,
2346 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2347 } else {
2348 clean_io_failure(start, page); 2336 clean_io_failure(start, page);
2349 }
2350 } 2337 }
2351 2338
2352 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2339 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de0c21d..857d93cd01dc 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
272} 272}
273 273
274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
275 struct bio *bio, u64 offset, u32 *dst) 275 struct bio *bio, u64 offset)
276{ 276{
277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); 277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
278} 278}
279 279
280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83baec24946d..ec154f954646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow,
324 * If this code finds it can't get good compression, it puts an 324 * If this code finds it can't get good compression, it puts an
325 * entry onto the work queue to write the uncompressed bytes. This 325 * entry onto the work queue to write the uncompressed bytes. This
326 * makes sure that both compressed inodes and uncompressed inodes 326 * makes sure that both compressed inodes and uncompressed inodes
327 * are written in the same order that pdflush sent them down. 327 * are written in the same order that the flusher thread sent them
328 * down.
328 */ 329 */
329static noinline int compress_file_range(struct inode *inode, 330static noinline int compress_file_range(struct inode *inode,
330 struct page *locked_page, 331 struct page *locked_page,
@@ -1007,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1007 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1008 PAGE_CACHE_SHIFT; 1009 PAGE_CACHE_SHIFT;
1009 1010
1010 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1011 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1011
1012 if (atomic_read(&root->fs_info->async_delalloc_pages) <
1013 5 * 1024 * 1024 && 1012 5 * 1024 * 1024 &&
1014 waitqueue_active(&root->fs_info->async_submit_wait)) 1013 waitqueue_active(&root->fs_info->async_submit_wait))
1015 wake_up(&root->fs_info->async_submit_wait); 1014 wake_up(&root->fs_info->async_submit_wait);
@@ -1884,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1884 trans = btrfs_join_transaction_nolock(root); 1883 trans = btrfs_join_transaction_nolock(root);
1885 else 1884 else
1886 trans = btrfs_join_transaction(root); 1885 trans = btrfs_join_transaction(root);
1887 if (IS_ERR(trans)) 1886 if (IS_ERR(trans)) {
1888 return PTR_ERR(trans); 1887 ret = PTR_ERR(trans);
1888 trans = NULL;
1889 goto out;
1890 }
1889 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1891 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1890 ret = btrfs_update_inode_fallback(trans, root, inode); 1892 ret = btrfs_update_inode_fallback(trans, root, inode);
1891 if (ret) /* -ENOMEM or corruption */ 1893 if (ret) /* -ENOMEM or corruption */
@@ -3173,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3173 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3175 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3174 inode_inc_iversion(dir); 3176 inode_inc_iversion(dir);
3175 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3177 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3176 ret = btrfs_update_inode(trans, root, dir); 3178 ret = btrfs_update_inode_fallback(trans, root, dir);
3177 if (ret) 3179 if (ret)
3178 btrfs_abort_transaction(trans, root, ret); 3180 btrfs_abort_transaction(trans, root, ret);
3179out: 3181out:
@@ -5773,18 +5775,112 @@ out:
5773 return ret; 5775 return ret;
5774} 5776}
5775 5777
5778static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5779 struct extent_state **cached_state, int writing)
5780{
5781 struct btrfs_ordered_extent *ordered;
5782 int ret = 0;
5783
5784 while (1) {
5785 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5786 0, cached_state);
5787 /*
5788 * We're concerned with the entire range that we're going to be
5789 * doing DIO to, so we need to make sure theres no ordered
5790 * extents in this range.
5791 */
5792 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5793 lockend - lockstart + 1);
5794
5795 /*
5796 * We need to make sure there are no buffered pages in this
5797 * range either, we could have raced between the invalidate in
5798 * generic_file_direct_write and locking the extent. The
5799 * invalidate needs to happen so that reads after a write do not
5800 * get stale data.
5801 */
5802 if (!ordered && (!writing ||
5803 !test_range_bit(&BTRFS_I(inode)->io_tree,
5804 lockstart, lockend, EXTENT_UPTODATE, 0,
5805 *cached_state)))
5806 break;
5807
5808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5809 cached_state, GFP_NOFS);
5810
5811 if (ordered) {
5812 btrfs_start_ordered_extent(inode, ordered, 1);
5813 btrfs_put_ordered_extent(ordered);
5814 } else {
5815 /* Screw you mmap */
5816 ret = filemap_write_and_wait_range(inode->i_mapping,
5817 lockstart,
5818 lockend);
5819 if (ret)
5820 break;
5821
5822 /*
5823 * If we found a page that couldn't be invalidated just
5824 * fall back to buffered.
5825 */
5826 ret = invalidate_inode_pages2_range(inode->i_mapping,
5827 lockstart >> PAGE_CACHE_SHIFT,
5828 lockend >> PAGE_CACHE_SHIFT);
5829 if (ret)
5830 break;
5831 }
5832
5833 cond_resched();
5834 }
5835
5836 return ret;
5837}
5838
5776static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5839static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5777 struct buffer_head *bh_result, int create) 5840 struct buffer_head *bh_result, int create)
5778{ 5841{
5779 struct extent_map *em; 5842 struct extent_map *em;
5780 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 struct btrfs_root *root = BTRFS_I(inode)->root;
5844 struct extent_state *cached_state = NULL;
5781 u64 start = iblock << inode->i_blkbits; 5845 u64 start = iblock << inode->i_blkbits;
5846 u64 lockstart, lockend;
5782 u64 len = bh_result->b_size; 5847 u64 len = bh_result->b_size;
5783 struct btrfs_trans_handle *trans; 5848 struct btrfs_trans_handle *trans;
5849 int unlock_bits = EXTENT_LOCKED;
5850 int ret;
5851
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 } else {
5858 len = min_t(u64, len, root->sectorsize);
5859 }
5860
5861 lockstart = start;
5862 lockend = start + len - 1;
5863
5864 /*
5865 * If this errors out it's because we couldn't invalidate pagecache for
5866 * this range and we need to fallback to buffered.
5867 */
5868 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5869 return -ENOTBLK;
5870
5871 if (create) {
5872 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5873 lockend, EXTENT_DELALLOC, NULL,
5874 &cached_state, GFP_NOFS);
5875 if (ret)
5876 goto unlock_err;
5877 }
5784 5878
5785 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5879 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5786 if (IS_ERR(em)) 5880 if (IS_ERR(em)) {
5787 return PTR_ERR(em); 5881 ret = PTR_ERR(em);
5882 goto unlock_err;
5883 }
5788 5884
5789 /* 5885 /*
5790 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5886 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5803,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5803 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5899 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5804 em->block_start == EXTENT_MAP_INLINE) { 5900 em->block_start == EXTENT_MAP_INLINE) {
5805 free_extent_map(em); 5901 free_extent_map(em);
5806 return -ENOTBLK; 5902 ret = -ENOTBLK;
5903 goto unlock_err;
5807 } 5904 }
5808 5905
5809 /* Just a good old fashioned hole, return */ 5906 /* Just a good old fashioned hole, return */
5810 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5811 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5812 free_extent_map(em); 5909 free_extent_map(em);
5813 /* DIO will do one hole at a time, so just unlock a sector */ 5910 ret = 0;
5814 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 goto unlock_err;
5815 start + root->sectorsize - 1);
5816 return 0;
5817 } 5912 }
5818 5913
5819 /* 5914 /*
@@ -5826,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5826 * 5921 *
5827 */ 5922 */
5828 if (!create) { 5923 if (!create) {
5829 len = em->len - (start - em->start); 5924 len = min(len, em->len - (start - em->start));
5830 goto map; 5925 lockstart = start + len;
5926 goto unlock;
5831 } 5927 }
5832 5928
5833 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5929 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5859,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5859 btrfs_end_transaction(trans, root); 5955 btrfs_end_transaction(trans, root);
5860 if (ret) { 5956 if (ret) {
5861 free_extent_map(em); 5957 free_extent_map(em);
5862 return ret; 5958 goto unlock_err;
5863 } 5959 }
5864 goto unlock; 5960 goto unlock;
5865 } 5961 }
@@ -5872,14 +5968,12 @@ must_cow:
5872 */ 5968 */
5873 len = bh_result->b_size; 5969 len = bh_result->b_size;
5874 em = btrfs_new_extent_direct(inode, em, start, len); 5970 em = btrfs_new_extent_direct(inode, em, start, len);
5875 if (IS_ERR(em)) 5971 if (IS_ERR(em)) {
5876 return PTR_ERR(em); 5972 ret = PTR_ERR(em);
5973 goto unlock_err;
5974 }
5877 len = min(len, em->len - (start - em->start)); 5975 len = min(len, em->len - (start - em->start));
5878unlock: 5976unlock:
5879 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5880 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5881 0, NULL, GFP_NOFS);
5882map:
5883 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5977 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5884 inode->i_blkbits; 5978 inode->i_blkbits;
5885 bh_result->b_size = len; 5979 bh_result->b_size = len;
@@ -5897,9 +5991,44 @@ map:
5897 i_size_write(inode, start + len); 5991 i_size_write(inode, start + len);
5898 } 5992 }
5899 5993
5994 /*
5995 * In the case of write we need to clear and unlock the entire range,
5996 * in the case of read we need to unlock only the end area that we
5997 * aren't using if there is any left over space.
5998 */
5999 if (lockstart < lockend) {
6000 if (create && len < lockend - lockstart) {
6001 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6002 lockstart + len - 1, unlock_bits, 1, 0,
6003 &cached_state, GFP_NOFS);
6004 /*
6005 * Beside unlock, we also need to cleanup reserved space
6006 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6007 */
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6009 lockstart + len, lockend,
6010 unlock_bits | EXTENT_DO_ACCOUNTING,
6011 1, 0, NULL, GFP_NOFS);
6012 } else {
6013 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6014 lockend, unlock_bits, 1, 0,
6015 &cached_state, GFP_NOFS);
6016 }
6017 } else {
6018 free_extent_state(cached_state);
6019 }
6020
5900 free_extent_map(em); 6021 free_extent_map(em);
5901 6022
5902 return 0; 6023 return 0;
6024
6025unlock_err:
6026 if (create)
6027 unlock_bits |= EXTENT_DO_ACCOUNTING;
6028
6029 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6030 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6031 return ret;
5903} 6032}
5904 6033
5905struct btrfs_dio_private { 6034struct btrfs_dio_private {
@@ -5907,7 +6036,6 @@ struct btrfs_dio_private {
5907 u64 logical_offset; 6036 u64 logical_offset;
5908 u64 disk_bytenr; 6037 u64 disk_bytenr;
5909 u64 bytes; 6038 u64 bytes;
5910 u32 *csums;
5911 void *private; 6039 void *private;
5912 6040
5913 /* number of bios pending for this dio */ 6041 /* number of bios pending for this dio */
@@ -5927,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5927 struct inode *inode = dip->inode; 6055 struct inode *inode = dip->inode;
5928 struct btrfs_root *root = BTRFS_I(inode)->root; 6056 struct btrfs_root *root = BTRFS_I(inode)->root;
5929 u64 start; 6057 u64 start;
5930 u32 *private = dip->csums;
5931 6058
5932 start = dip->logical_offset; 6059 start = dip->logical_offset;
5933 do { 6060 do {
@@ -5935,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5935 struct page *page = bvec->bv_page; 6062 struct page *page = bvec->bv_page;
5936 char *kaddr; 6063 char *kaddr;
5937 u32 csum = ~(u32)0; 6064 u32 csum = ~(u32)0;
6065 u64 private = ~(u32)0;
5938 unsigned long flags; 6066 unsigned long flags;
5939 6067
6068 if (get_state_private(&BTRFS_I(inode)->io_tree,
6069 start, &private))
6070 goto failed;
5940 local_irq_save(flags); 6071 local_irq_save(flags);
5941 kaddr = kmap_atomic(page); 6072 kaddr = kmap_atomic(page);
5942 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6073 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5946,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5946 local_irq_restore(flags); 6077 local_irq_restore(flags);
5947 6078
5948 flush_dcache_page(bvec->bv_page); 6079 flush_dcache_page(bvec->bv_page);
5949 if (csum != *private) { 6080 if (csum != private) {
6081failed:
5950 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 printk(KERN_ERR "btrfs csum failed ino %llu off"
5951 " %llu csum %u private %u\n", 6083 " %llu csum %u private %u\n",
5952 (unsigned long long)btrfs_ino(inode), 6084 (unsigned long long)btrfs_ino(inode),
5953 (unsigned long long)start, 6085 (unsigned long long)start,
5954 csum, *private); 6086 csum, (unsigned)private);
5955 err = -EIO; 6087 err = -EIO;
5956 } 6088 }
5957 } 6089 }
5958 6090
5959 start += bvec->bv_len; 6091 start += bvec->bv_len;
5960 private++;
5961 bvec++; 6092 bvec++;
5962 } while (bvec <= bvec_end); 6093 } while (bvec <= bvec_end);
5963 6094
@@ -5965,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5965 dip->logical_offset + dip->bytes - 1); 6096 dip->logical_offset + dip->bytes - 1);
5966 bio->bi_private = dip->private; 6097 bio->bi_private = dip->private;
5967 6098
5968 kfree(dip->csums);
5969 kfree(dip); 6099 kfree(dip);
5970 6100
5971 /* If we had a csum failure make sure to clear the uptodate flag */ 6101 /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6071,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
6071 6201
6072static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6202static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6073 int rw, u64 file_offset, int skip_sum, 6203 int rw, u64 file_offset, int skip_sum,
6074 u32 *csums, int async_submit) 6204 int async_submit)
6075{ 6205{
6076 int write = rw & REQ_WRITE; 6206 int write = rw & REQ_WRITE;
6077 struct btrfs_root *root = BTRFS_I(inode)->root; 6207 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6104,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6104 if (ret) 6234 if (ret)
6105 goto err; 6235 goto err;
6106 } else if (!skip_sum) { 6236 } else if (!skip_sum) {
6107 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6237 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
6108 file_offset, csums);
6109 if (ret) 6238 if (ret)
6110 goto err; 6239 goto err;
6111 } 6240 }
@@ -6131,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6131 u64 submit_len = 0; 6260 u64 submit_len = 0;
6132 u64 map_length; 6261 u64 map_length;
6133 int nr_pages = 0; 6262 int nr_pages = 0;
6134 u32 *csums = dip->csums;
6135 int ret = 0; 6263 int ret = 0;
6136 int async_submit = 0; 6264 int async_submit = 0;
6137 int write = rw & REQ_WRITE;
6138 6265
6139 map_length = orig_bio->bi_size; 6266 map_length = orig_bio->bi_size;
6140 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6267 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6170,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6170 atomic_inc(&dip->pending_bios); 6297 atomic_inc(&dip->pending_bios);
6171 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6298 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6172 file_offset, skip_sum, 6299 file_offset, skip_sum,
6173 csums, async_submit); 6300 async_submit);
6174 if (ret) { 6301 if (ret) {
6175 bio_put(bio); 6302 bio_put(bio);
6176 atomic_dec(&dip->pending_bios); 6303 atomic_dec(&dip->pending_bios);
6177 goto out_err; 6304 goto out_err;
6178 } 6305 }
6179 6306
6180 /* Write's use the ordered csums */
6181 if (!write && !skip_sum)
6182 csums = csums + nr_pages;
6183 start_sector += submit_len >> 9; 6307 start_sector += submit_len >> 9;
6184 file_offset += submit_len; 6308 file_offset += submit_len;
6185 6309
@@ -6209,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6209 6333
6210submit: 6334submit:
6211 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6335 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6212 csums, async_submit); 6336 async_submit);
6213 if (!ret) 6337 if (!ret)
6214 return 0; 6338 return 0;
6215 6339
@@ -6245,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6245 ret = -ENOMEM; 6369 ret = -ENOMEM;
6246 goto free_ordered; 6370 goto free_ordered;
6247 } 6371 }
6248 dip->csums = NULL;
6249
6250 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6251 if (!write && !skip_sum) {
6252 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6253 if (!dip->csums) {
6254 kfree(dip);
6255 ret = -ENOMEM;
6256 goto free_ordered;
6257 }
6258 }
6259 6372
6260 dip->private = bio->bi_private; 6373 dip->private = bio->bi_private;
6261 dip->inode = inode; 6374 dip->inode = inode;
@@ -6340,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6340out: 6453out:
6341 return retval; 6454 return retval;
6342} 6455}
6456
6343static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6344 const struct iovec *iov, loff_t offset, 6458 const struct iovec *iov, loff_t offset,
6345 unsigned long nr_segs) 6459 unsigned long nr_segs)
6346{ 6460{
6347 struct file *file = iocb->ki_filp; 6461 struct file *file = iocb->ki_filp;
6348 struct inode *inode = file->f_mapping->host; 6462 struct inode *inode = file->f_mapping->host;
6349 struct btrfs_ordered_extent *ordered;
6350 struct extent_state *cached_state = NULL;
6351 u64 lockstart, lockend;
6352 ssize_t ret;
6353 int writing = rw & WRITE;
6354 int write_bits = 0;
6355 size_t count = iov_length(iov, nr_segs);
6356 6463
6357 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6464 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6358 offset, nr_segs)) { 6465 offset, nr_segs))
6359 return 0; 6466 return 0;
6360 }
6361
6362 lockstart = offset;
6363 lockend = offset + count - 1;
6364
6365 if (writing) {
6366 ret = btrfs_delalloc_reserve_space(inode, count);
6367 if (ret)
6368 goto out;
6369 }
6370
6371 while (1) {
6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6373 0, &cached_state);
6374 /*
6375 * We're concerned with the entire range that we're going to be
6376 * doing DIO to, so we need to make sure theres no ordered
6377 * extents in this range.
6378 */
6379 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6380 lockend - lockstart + 1);
6381
6382 /*
6383 * We need to make sure there are no buffered pages in this
6384 * range either, we could have raced between the invalidate in
6385 * generic_file_direct_write and locking the extent. The
6386 * invalidate needs to happen so that reads after a write do not
6387 * get stale data.
6388 */
6389 if (!ordered && (!writing ||
6390 !test_range_bit(&BTRFS_I(inode)->io_tree,
6391 lockstart, lockend, EXTENT_UPTODATE, 0,
6392 cached_state)))
6393 break;
6394
6395 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6396 &cached_state, GFP_NOFS);
6397
6398 if (ordered) {
6399 btrfs_start_ordered_extent(inode, ordered, 1);
6400 btrfs_put_ordered_extent(ordered);
6401 } else {
6402 /* Screw you mmap */
6403 ret = filemap_write_and_wait_range(file->f_mapping,
6404 lockstart,
6405 lockend);
6406 if (ret)
6407 goto out;
6408
6409 /*
6410 * If we found a page that couldn't be invalidated just
6411 * fall back to buffered.
6412 */
6413 ret = invalidate_inode_pages2_range(file->f_mapping,
6414 lockstart >> PAGE_CACHE_SHIFT,
6415 lockend >> PAGE_CACHE_SHIFT);
6416 if (ret) {
6417 if (ret == -EBUSY)
6418 ret = 0;
6419 goto out;
6420 }
6421 }
6422
6423 cond_resched();
6424 }
6425 6467
6426 /* 6468 return __blockdev_direct_IO(rw, iocb, inode,
6427 * we don't use btrfs_set_extent_delalloc because we don't want
6428 * the dirty or uptodate bits
6429 */
6430 if (writing) {
6431 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6432 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6433 EXTENT_DELALLOC, NULL, &cached_state,
6434 GFP_NOFS);
6435 if (ret) {
6436 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6437 lockend, EXTENT_LOCKED | write_bits,
6438 1, 0, &cached_state, GFP_NOFS);
6439 goto out;
6440 }
6441 }
6442
6443 free_extent_state(cached_state);
6444 cached_state = NULL;
6445
6446 ret = __blockdev_direct_IO(rw, iocb, inode,
6447 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6469 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6448 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6470 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6449 btrfs_submit_direct, 0); 6471 btrfs_submit_direct, 0);
6450
6451 if (ret < 0 && ret != -EIOCBQUEUED) {
6452 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6453 offset + iov_length(iov, nr_segs) - 1,
6454 EXTENT_LOCKED | write_bits, 1, 0,
6455 &cached_state, GFP_NOFS);
6456 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6457 /*
6458 * We're falling back to buffered, unlock the section we didn't
6459 * do IO on.
6460 */
6461 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6462 offset + iov_length(iov, nr_segs) - 1,
6463 EXTENT_LOCKED | write_bits, 1, 0,
6464 &cached_state, GFP_NOFS);
6465 }
6466out:
6467 free_extent_state(cached_state);
6468 return ret;
6469} 6472}
6470 6473
6471static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6474static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bc2f6ffff3cf..9df50fa8a078 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
424 uuid_le_gen(&new_uuid); 424 uuid_le_gen(&new_uuid);
425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
427 root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); 427 root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
428 root_item.ctime = root_item.otime; 428 root_item.ctime = root_item.otime;
429 btrfs_set_root_ctransid(&root_item, trans->transid); 429 btrfs_set_root_ctransid(&root_item, trans->transid);
430 btrfs_set_root_otransid(&root_item, trans->transid); 430 btrfs_set_root_otransid(&root_item, trans->transid);
@@ -664,10 +664,6 @@ static noinline int btrfs_mksubvol(struct path *parent,
664 struct dentry *dentry; 664 struct dentry *dentry;
665 int error; 665 int error;
666 666
667 error = mnt_want_write(parent->mnt);
668 if (error)
669 return error;
670
671 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 667 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
672 668
673 dentry = lookup_one_len(name, parent->dentry, namelen); 669 dentry = lookup_one_len(name, parent->dentry, namelen);
@@ -703,7 +699,6 @@ out_dput:
703 dput(dentry); 699 dput(dentry);
704out_unlock: 700out_unlock:
705 mutex_unlock(&dir->i_mutex); 701 mutex_unlock(&dir->i_mutex);
706 mnt_drop_write(parent->mnt);
707 return error; 702 return error;
708} 703}
709 704
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff074805..2a1762c66041 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
67{ 67{
68 if (eb->lock_nested) { 68 if (eb->lock_nested) {
69 read_lock(&eb->lock); 69 read_lock(&eb->lock);
70 if (&eb->lock_nested && current->pid == eb->lock_owner) { 70 if (eb->lock_nested && current->pid == eb->lock_owner) {
71 read_unlock(&eb->lock); 71 read_unlock(&eb->lock);
72 return; 72 return;
73 } 73 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 643335a4fe3c..051c7fe551dd 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -596,7 +596,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
596 /* 596 /*
597 * pages in the range can be dirty, clean or writeback. We 597 * pages in the range can be dirty, clean or writeback. We
598 * start IO on any dirty ones so the wait doesn't stall waiting 598 * start IO on any dirty ones so the wait doesn't stall waiting
599 * for pdflush to find them 599 * for the flusher thread to find them
600 */ 600 */
601 if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) 601 if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
602 filemap_fdatawrite_range(inode->i_mapping, start, end); 602 filemap_fdatawrite_range(inode->i_mapping, start, end);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae5a81a..38b42e7bc91d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1364 spin_lock(&fs_info->qgroup_lock); 1364 spin_lock(&fs_info->qgroup_lock);
1365 1365
1366 dstgroup = add_qgroup_rb(fs_info, objectid); 1366 dstgroup = add_qgroup_rb(fs_info, objectid);
1367 if (!dstgroup) 1367 if (IS_ERR(dstgroup)) {
1368 ret = PTR_ERR(dstgroup);
1368 goto unlock; 1369 goto unlock;
1370 }
1369 1371
1370 if (srcid) { 1372 if (srcid) {
1371 srcgroup = find_qgroup_rb(fs_info, srcid); 1373 srcgroup = find_qgroup_rb(fs_info, srcid);
1372 if (!srcgroup) 1374 if (!srcgroup) {
1375 ret = -EINVAL;
1373 goto unlock; 1376 goto unlock;
1377 }
1374 dstgroup->rfer = srcgroup->rfer - level_size; 1378 dstgroup->rfer = srcgroup->rfer - level_size;
1375 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 1379 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1376 srcgroup->excl = level_size; 1380 srcgroup->excl = level_size;
@@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1379 qgroup_dirty(fs_info, srcgroup); 1383 qgroup_dirty(fs_info, srcgroup);
1380 } 1384 }
1381 1385
1382 if (!inherit) 1386 if (!inherit) {
1387 ret = -EINVAL;
1383 goto unlock; 1388 goto unlock;
1389 }
1384 1390
1385 i_qgroups = (u64 *)(inherit + 1); 1391 i_qgroups = (u64 *)(inherit + 1);
1386 for (i = 0; i < inherit->num_qgroups; ++i) { 1392 for (i = 0; i < inherit->num_qgroups; ++i) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465cca20f..10d8e4d88071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
544 struct timespec ct = CURRENT_TIME; 544 struct timespec ct = CURRENT_TIME;
545 545
546 spin_lock(&root->root_times_lock); 546 spin_lock(&root->root_times_lock);
547 item->ctransid = trans->transid; 547 item->ctransid = cpu_to_le64(trans->transid);
548 item->ctime.sec = cpu_to_le64(ct.tv_sec); 548 item->ctime.sec = cpu_to_le64(ct.tv_sec);
549 item->ctime.nsec = cpu_to_le64(ct.tv_nsec); 549 item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
550 spin_unlock(&root->root_times_lock); 550 spin_unlock(&root->root_times_lock);
551} 551}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8c6e61d6eed5..83d6f9f9c220 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -100,10 +100,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
100 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; 100 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
101} 101}
102 102
103/* NOTE:
104 * We move write_super stuff at umount in order to avoid deadlock
105 * for umount hold all lock.
106 */
107static void save_error_info(struct btrfs_fs_info *fs_info) 103static void save_error_info(struct btrfs_fs_info *fs_info)
108{ 104{
109 __save_error_info(fs_info); 105 __save_error_info(fs_info);
@@ -842,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
842 struct btrfs_trans_handle *trans; 838 struct btrfs_trans_handle *trans;
843 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 839 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
844 struct btrfs_root *root = fs_info->tree_root; 840 struct btrfs_root *root = fs_info->tree_root;
845 int ret;
846 841
847 trace_btrfs_sync_fs(wait); 842 trace_btrfs_sync_fs(wait);
848 843
@@ -853,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
853 848
854 btrfs_wait_ordered_extents(root, 0, 0); 849 btrfs_wait_ordered_extents(root, 0, 0);
855 850
856 trans = btrfs_start_transaction(root, 0); 851 spin_lock(&fs_info->trans_lock);
852 if (!fs_info->running_transaction) {
853 spin_unlock(&fs_info->trans_lock);
854 return 0;
855 }
856 spin_unlock(&fs_info->trans_lock);
857
858 trans = btrfs_join_transaction(root);
857 if (IS_ERR(trans)) 859 if (IS_ERR(trans))
858 return PTR_ERR(trans); 860 return PTR_ERR(trans);
859 ret = btrfs_commit_transaction(trans, root); 861 return btrfs_commit_transaction(trans, root);
860 return ret;
861} 862}
862 863
863static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) 864static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1534,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1534 while (cur_devices) { 1535 while (cur_devices) {
1535 head = &cur_devices->devices; 1536 head = &cur_devices->devices;
1536 list_for_each_entry(dev, head, dev_list) { 1537 list_for_each_entry(dev, head, dev_list) {
1538 if (dev->missing)
1539 continue;
1537 if (!first_dev || dev->devid < first_dev->devid) 1540 if (!first_dev || dev->devid < first_dev->devid)
1538 first_dev = dev; 1541 first_dev = dev;
1539 } 1542 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3dedacba..27c26004e050 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1031 1031
1032 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1032 btrfs_i_size_write(parent_inode, parent_inode->i_size +
1033 dentry->d_name.len * 2); 1033 dentry->d_name.len * 2);
1034 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1034 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1035 ret = btrfs_update_inode(trans, parent_root, parent_inode);
1035 if (ret) 1036 if (ret)
1036 goto abort_trans_dput; 1037 goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1066 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1067 memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1067 BTRFS_UUID_SIZE); 1068 BTRFS_UUID_SIZE);
1068 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1069 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1069 new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); 1070 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1070 btrfs_set_root_otransid(new_root_item, trans->transid); 1071 btrfs_set_root_otransid(new_root_item, trans->transid);
1071 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 1072 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1072 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); 1073 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b8708f994e67..88b969aeeb71 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@ loop_lock:
227 cur = pending; 227 cur = pending;
228 pending = pending->bi_next; 228 pending = pending->bi_next;
229 cur->bi_next = NULL; 229 cur->bi_next = NULL;
230 atomic_dec(&fs_info->nr_async_bios);
231 230
232 if (atomic_read(&fs_info->nr_async_bios) < limit && 231 if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
233 waitqueue_active(&fs_info->async_submit_wait)) 232 waitqueue_active(&fs_info->async_submit_wait))
234 wake_up(&fs_info->async_submit_wait); 233 wake_up(&fs_info->async_submit_wait);
235 234
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
569 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
570 569
571 /* Safe because we are under uuid_mutex */ 570 /* Safe because we are under uuid_mutex */
572 name = rcu_string_strdup(device->name->str, GFP_NOFS); 571 if (device->name) {
573 BUG_ON(device->name && !name); /* -ENOMEM */ 572 name = rcu_string_strdup(device->name->str, GFP_NOFS);
574 rcu_assign_pointer(new_device->name, name); 573 BUG_ON(device->name && !name); /* -ENOMEM */
574 rcu_assign_pointer(new_device->name, name);
575 }
575 new_device->bdev = NULL; 576 new_device->bdev = NULL;
576 new_device->writeable = 0; 577 new_device->writeable = 0;
577 new_device->in_fs_metadata = 0; 578 new_device->in_fs_metadata = 0;
@@ -1744,10 +1745,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1744 1745
1745 device->fs_devices = root->fs_info->fs_devices; 1746 device->fs_devices = root->fs_info->fs_devices;
1746 1747
1747 /*
1748 * we don't want write_supers to jump in here with our device
1749 * half setup
1750 */
1751 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1748 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1752 list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); 1749 list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
1753 list_add(&device->dev_alloc_list, 1750 list_add(&device->dev_alloc_list,
@@ -4609,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4609 return ret; 4606 return ret;
4610} 4607}
4611 4608
4612struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
4613 u64 logical, int mirror_num)
4614{
4615 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
4616 int ret;
4617 u64 map_length = 0;
4618 struct btrfs_bio *bbio = NULL;
4619 struct btrfs_device *device;
4620
4621 BUG_ON(mirror_num == 0);
4622 ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
4623 mirror_num);
4624 if (ret) {
4625 BUG_ON(bbio != NULL);
4626 return NULL;
4627 }
4628 BUG_ON(mirror_num != bbio->mirror_num);
4629 device = bbio->stripes[mirror_num - 1].dev;
4630 kfree(bbio);
4631 return device;
4632}
4633
4634int btrfs_read_chunk_tree(struct btrfs_root *root) 4609int btrfs_read_chunk_tree(struct btrfs_root *root)
4635{ 4610{
4636 struct btrfs_path *path; 4611 struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325987b3..53c06af92e8d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
291 u64 *start, u64 *max_avail); 291 u64 *start, u64 *max_avail);
292struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
293 u64 logical, int mirror_num);
294void btrfs_dev_stat_print_on_error(struct btrfs_device *device); 292void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
295void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 293void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
296int btrfs_get_dev_stats(struct btrfs_root *root, 294int btrfs_get_dev_stats(struct btrfs_root *root,
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f6d2e41281d..58e2e7b77372 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
914/* 914/*
915 * Initialise the state of a blockdev page's buffers. 915 * Initialise the state of a blockdev page's buffers.
916 */ 916 */
917static void 917static sector_t
918init_page_buffers(struct page *page, struct block_device *bdev, 918init_page_buffers(struct page *page, struct block_device *bdev,
919 sector_t block, int size) 919 sector_t block, int size)
920{ 920{
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
936 block++; 936 block++;
937 bh = bh->b_this_page; 937 bh = bh->b_this_page;
938 } while (bh != head); 938 } while (bh != head);
939
940 /*
941 * Caller needs to validate requested block against end of device.
942 */
943 return end_block;
939} 944}
940 945
941/* 946/*
942 * Create the page-cache page that contains the requested block. 947 * Create the page-cache page that contains the requested block.
943 * 948 *
944 * This is user purely for blockdev mappings. 949 * This is used purely for blockdev mappings.
945 */ 950 */
946static struct page * 951static int
947grow_dev_page(struct block_device *bdev, sector_t block, 952grow_dev_page(struct block_device *bdev, sector_t block,
948 pgoff_t index, int size) 953 pgoff_t index, int size, int sizebits)
949{ 954{
950 struct inode *inode = bdev->bd_inode; 955 struct inode *inode = bdev->bd_inode;
951 struct page *page; 956 struct page *page;
952 struct buffer_head *bh; 957 struct buffer_head *bh;
958 sector_t end_block;
959 int ret = 0; /* Will call free_more_memory() */
953 960
954 page = find_or_create_page(inode->i_mapping, index, 961 page = find_or_create_page(inode->i_mapping, index,
955 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); 962 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
956 if (!page) 963 if (!page)
957 return NULL; 964 return ret;
958 965
959 BUG_ON(!PageLocked(page)); 966 BUG_ON(!PageLocked(page));
960 967
961 if (page_has_buffers(page)) { 968 if (page_has_buffers(page)) {
962 bh = page_buffers(page); 969 bh = page_buffers(page);
963 if (bh->b_size == size) { 970 if (bh->b_size == size) {
964 init_page_buffers(page, bdev, block, size); 971 end_block = init_page_buffers(page, bdev,
965 return page; 972 index << sizebits, size);
973 goto done;
966 } 974 }
967 if (!try_to_free_buffers(page)) 975 if (!try_to_free_buffers(page))
968 goto failed; 976 goto failed;
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
982 */ 990 */
983 spin_lock(&inode->i_mapping->private_lock); 991 spin_lock(&inode->i_mapping->private_lock);
984 link_dev_buffers(page, bh); 992 link_dev_buffers(page, bh);
985 init_page_buffers(page, bdev, block, size); 993 end_block = init_page_buffers(page, bdev, index << sizebits, size);
986 spin_unlock(&inode->i_mapping->private_lock); 994 spin_unlock(&inode->i_mapping->private_lock);
987 return page; 995done:
988 996 ret = (block < end_block) ? 1 : -ENXIO;
989failed: 997failed:
990 unlock_page(page); 998 unlock_page(page);
991 page_cache_release(page); 999 page_cache_release(page);
992 return NULL; 1000 return ret;
993} 1001}
994 1002
995/* 1003/*
@@ -999,7 +1007,6 @@ failed:
999static int 1007static int
1000grow_buffers(struct block_device *bdev, sector_t block, int size) 1008grow_buffers(struct block_device *bdev, sector_t block, int size)
1001{ 1009{
1002 struct page *page;
1003 pgoff_t index; 1010 pgoff_t index;
1004 int sizebits; 1011 int sizebits;
1005 1012
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1023 bdevname(bdev, b)); 1030 bdevname(bdev, b));
1024 return -EIO; 1031 return -EIO;
1025 } 1032 }
1026 block = index << sizebits; 1033
1027 /* Create a page with the proper size buffers.. */ 1034 /* Create a page with the proper size buffers.. */
1028 page = grow_dev_page(bdev, block, index, size); 1035 return grow_dev_page(bdev, block, index, size, sizebits);
1029 if (!page)
1030 return 0;
1031 unlock_page(page);
1032 page_cache_release(page);
1033 return 1;
1034} 1036}
1035 1037
1036static struct buffer_head * 1038static struct buffer_head *
1037__getblk_slow(struct block_device *bdev, sector_t block, int size) 1039__getblk_slow(struct block_device *bdev, sector_t block, int size)
1038{ 1040{
1039 int ret;
1040 struct buffer_head *bh;
1041
1042 /* Size must be multiple of hard sectorsize */ 1041 /* Size must be multiple of hard sectorsize */
1043 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1042 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1044 (size < 512 || size > PAGE_SIZE))) { 1043 (size < 512 || size > PAGE_SIZE))) {
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1051 return NULL; 1050 return NULL;
1052 } 1051 }
1053 1052
1054retry: 1053 for (;;) {
1055 bh = __find_get_block(bdev, block, size); 1054 struct buffer_head *bh;
1056 if (bh) 1055 int ret;
1057 return bh;
1058 1056
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret == 0) {
1061 free_more_memory();
1062 goto retry;
1063 } else if (ret > 0) {
1064 bh = __find_get_block(bdev, block, size); 1057 bh = __find_get_block(bdev, block, size);
1065 if (bh) 1058 if (bh)
1066 return bh; 1059 return bh;
1060
1061 ret = grow_buffers(bdev, block, size);
1062 if (ret < 0)
1063 return NULL;
1064 if (ret == 0)
1065 free_more_memory();
1067 } 1066 }
1068 return NULL;
1069} 1067}
1070 1068
1071/* 1069/*
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
1321 * which corresponds to the passed block_device, block and size. The 1319 * which corresponds to the passed block_device, block and size. The
1322 * returned buffer has its reference count incremented. 1320 * returned buffer has its reference count incremented.
1323 * 1321 *
1324 * __getblk() cannot fail - it just keeps trying. If you pass it an
1325 * illegal block number, __getblk() will happily return a buffer_head
1326 * which represents the non-existent block. Very weird.
1327 *
1328 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() 1322 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
1329 * attempt is failing. FIXME, perhaps? 1323 * attempt is failing. FIXME, perhaps?
1330 */ 1324 */
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962efdacee..6d59006bfa27 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
201 int err = -ENOMEM; 201 int err = -ENOMEM;
202 202
203 dout("ceph_fs_debugfs_init\n"); 203 dout("ceph_fs_debugfs_init\n");
204 BUG_ON(!fsc->client->debugfs_dir);
204 fsc->debugfs_congestion_kb = 205 fsc->debugfs_congestion_kb =
205 debugfs_create_file("writeback_congestion_kb", 206 debugfs_create_file("writeback_congestion_kb",
206 0600, 207 0600,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9fff9f3b17e4..4b5762ef7c2b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
992 if (rinfo->head->is_dentry) { 992 if (rinfo->head->is_dentry) {
993 struct inode *dir = req->r_locked_dir; 993 struct inode *dir = req->r_locked_dir;
994 994
995 err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, 995 if (dir) {
996 session, req->r_request_started, -1, 996 err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
997 &req->r_caps_reservation); 997 session, req->r_request_started, -1,
998 if (err < 0) 998 &req->r_caps_reservation);
999 return err; 999 if (err < 0)
1000 return err;
1001 } else {
1002 WARN_ON_ONCE(1);
1003 }
1000 } 1004 }
1001 1005
1002 /* 1006 /*
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1004 * will have trouble splicing in the virtual snapdir later 1008 * will have trouble splicing in the virtual snapdir later
1005 */ 1009 */
1006 if (rinfo->head->is_dentry && !req->r_aborted && 1010 if (rinfo->head->is_dentry && !req->r_aborted &&
1011 req->r_locked_dir &&
1007 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, 1012 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
1008 fsc->mount_options->snapdir_name, 1013 fsc->mount_options->snapdir_name,
1009 req->r_dentry->d_name.len))) { 1014 req->r_dentry->d_name.len))) {
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8e3fb69fbe62..1396ceb46797 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
42 /* validate striping parameters */ 42 /* validate striping parameters */
43 if ((l->object_size & ~PAGE_MASK) || 43 if ((l->object_size & ~PAGE_MASK) ||
44 (l->stripe_unit & ~PAGE_MASK) || 44 (l->stripe_unit & ~PAGE_MASK) ||
45 ((unsigned)l->object_size % (unsigned)l->stripe_unit)) 45 (l->stripe_unit != 0 &&
46 ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
46 return -EINVAL; 47 return -EINVAL;
47 48
48 /* make sure it's a valid data pool */ 49 /* make sure it's a valid data pool */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 074923ce593d..f0cf934ba877 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1576,9 +1576,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
1576 /* result already set, check signature */ 1576 /* result already set, check signature */
1577 if (server->sec_mode & 1577 if (server->sec_mode &
1578 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 1578 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1579 if (cifs_verify_signature(rdata->iov, rdata->nr_iov, 1579 int rc = 0;
1580 server, mid->sequence_number + 1)) 1580
1581 cERROR(1, "Unexpected SMB signature"); 1581 rc = cifs_verify_signature(rdata->iov, rdata->nr_iov,
1582 server,
1583 mid->sequence_number + 1);
1584 if (rc)
1585 cERROR(1, "SMB signature verification returned "
1586 "error = %d", rc);
1582 } 1587 }
1583 /* FIXME: should this be counted toward the initiating task? */ 1588 /* FIXME: should this be counted toward the initiating task? */
1584 task_io_account_read(rdata->bytes); 1589 task_io_account_read(rdata->bytes);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index cbe709ad6663..781025be48bc 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -356,19 +356,12 @@ cifs_create_get_file_info:
356cifs_create_set_dentry: 356cifs_create_set_dentry:
357 if (rc != 0) { 357 if (rc != 0) {
358 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 358 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
359 CIFSSMBClose(xid, tcon, *fileHandle);
359 goto out; 360 goto out;
360 } 361 }
361 d_drop(direntry); 362 d_drop(direntry);
362 d_add(direntry, newinode); 363 d_add(direntry, newinode);
363 364
364 /* ENOENT for create? How weird... */
365 rc = -ENOENT;
366 if (!newinode) {
367 CIFSSMBClose(xid, tcon, *fileHandle);
368 goto out;
369 }
370 rc = 0;
371
372out: 365out:
373 kfree(buf); 366 kfree(buf);
374 kfree(full_path); 367 kfree(full_path);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9154192b0683..71e9ad9f5961 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -917,7 +917,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
917 if (!buf) { 917 if (!buf) {
918 mutex_unlock(&cinode->lock_mutex); 918 mutex_unlock(&cinode->lock_mutex);
919 free_xid(xid); 919 free_xid(xid);
920 return rc; 920 return -ENOMEM;
921 } 921 }
922 922
923 for (i = 0; i < 2; i++) { 923 for (i = 0; i < 2; i++) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7354877fa3bd..cb79c7edecb0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
124{ 124{
125 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 125 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
126 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 126 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
127 unsigned long oldtime = cifs_i->time;
128 127
129 cifs_revalidate_cache(inode, fattr); 128 cifs_revalidate_cache(inode, fattr);
130 129
130 spin_lock(&inode->i_lock);
131 inode->i_atime = fattr->cf_atime; 131 inode->i_atime = fattr->cf_atime;
132 inode->i_mtime = fattr->cf_mtime; 132 inode->i_mtime = fattr->cf_mtime;
133 inode->i_ctime = fattr->cf_ctime; 133 inode->i_ctime = fattr->cf_ctime;
@@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
148 else 148 else
149 cifs_i->time = jiffies; 149 cifs_i->time = jiffies;
150 150
151 cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
152 oldtime, cifs_i->time);
153
154 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; 151 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
155 152
156 cifs_i->server_eof = fattr->cf_eof; 153 cifs_i->server_eof = fattr->cf_eof;
@@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
158 * Can't safely change the file size here if the client is writing to 155 * Can't safely change the file size here if the client is writing to
159 * it due to potential races. 156 * it due to potential races.
160 */ 157 */
161 spin_lock(&inode->i_lock);
162 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { 158 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
163 i_size_write(inode, fattr->cf_eof); 159 i_size_write(inode, fattr->cf_eof);
164 160
@@ -859,12 +855,14 @@ struct inode *cifs_root_iget(struct super_block *sb)
859 855
860 if (rc && tcon->ipc) { 856 if (rc && tcon->ipc) {
861 cFYI(1, "ipc connection - fake read inode"); 857 cFYI(1, "ipc connection - fake read inode");
858 spin_lock(&inode->i_lock);
862 inode->i_mode |= S_IFDIR; 859 inode->i_mode |= S_IFDIR;
863 set_nlink(inode, 2); 860 set_nlink(inode, 2);
864 inode->i_op = &cifs_ipc_inode_ops; 861 inode->i_op = &cifs_ipc_inode_ops;
865 inode->i_fop = &simple_dir_operations; 862 inode->i_fop = &simple_dir_operations;
866 inode->i_uid = cifs_sb->mnt_uid; 863 inode->i_uid = cifs_sb->mnt_uid;
867 inode->i_gid = cifs_sb->mnt_gid; 864 inode->i_gid = cifs_sb->mnt_gid;
865 spin_unlock(&inode->i_lock);
868 } else if (rc) { 866 } else if (rc) {
869 iget_failed(inode); 867 iget_failed(inode);
870 inode = ERR_PTR(rc); 868 inode = ERR_PTR(rc);
@@ -1110,6 +1108,15 @@ undo_setattr:
1110 goto out_close; 1108 goto out_close;
1111} 1109}
1112 1110
1111/* copied from fs/nfs/dir.c with small changes */
1112static void
1113cifs_drop_nlink(struct inode *inode)
1114{
1115 spin_lock(&inode->i_lock);
1116 if (inode->i_nlink > 0)
1117 drop_nlink(inode);
1118 spin_unlock(&inode->i_lock);
1119}
1113 1120
1114/* 1121/*
1115 * If dentry->d_inode is null (usually meaning the cached dentry 1122 * If dentry->d_inode is null (usually meaning the cached dentry
@@ -1166,13 +1173,13 @@ retry_std_delete:
1166psx_del_no_retry: 1173psx_del_no_retry:
1167 if (!rc) { 1174 if (!rc) {
1168 if (inode) 1175 if (inode)
1169 drop_nlink(inode); 1176 cifs_drop_nlink(inode);
1170 } else if (rc == -ENOENT) { 1177 } else if (rc == -ENOENT) {
1171 d_drop(dentry); 1178 d_drop(dentry);
1172 } else if (rc == -ETXTBSY) { 1179 } else if (rc == -ETXTBSY) {
1173 rc = cifs_rename_pending_delete(full_path, dentry, xid); 1180 rc = cifs_rename_pending_delete(full_path, dentry, xid);
1174 if (rc == 0) 1181 if (rc == 0)
1175 drop_nlink(inode); 1182 cifs_drop_nlink(inode);
1176 } else if ((rc == -EACCES) && (dosattr == 0) && inode) { 1183 } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
1177 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); 1184 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
1178 if (attrs == NULL) { 1185 if (attrs == NULL) {
@@ -1241,9 +1248,10 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
1241 * setting nlink not necessary except in cases where we failed to get it 1248 * setting nlink not necessary except in cases where we failed to get it
1242 * from the server or was set bogus 1249 * from the server or was set bogus
1243 */ 1250 */
1251 spin_lock(&dentry->d_inode->i_lock);
1244 if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) 1252 if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2))
1245 set_nlink(dentry->d_inode, 2); 1253 set_nlink(dentry->d_inode, 2);
1246 1254 spin_unlock(&dentry->d_inode->i_lock);
1247 mode &= ~current_umask(); 1255 mode &= ~current_umask();
1248 /* must turn on setgid bit if parent dir has it */ 1256 /* must turn on setgid bit if parent dir has it */
1249 if (inode->i_mode & S_ISGID) 1257 if (inode->i_mode & S_ISGID)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 09e4b3ae4564..e6ce3b112875 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,9 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
433 if (old_file->d_inode) { 433 if (old_file->d_inode) {
434 cifsInode = CIFS_I(old_file->d_inode); 434 cifsInode = CIFS_I(old_file->d_inode);
435 if (rc == 0) { 435 if (rc == 0) {
436 spin_lock(&old_file->d_inode->i_lock);
436 inc_nlink(old_file->d_inode); 437 inc_nlink(old_file->d_inode);
438 spin_unlock(&old_file->d_inode->i_lock);
437/* BB should we make this contingent on superblock flag NOATIME? */ 439/* BB should we make this contingent on superblock flag NOATIME? */
438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ 440/* old_file->d_inode->i_ctime = CURRENT_TIME;*/
439 /* parent dir timestamps will update from srv 441 /* parent dir timestamps will update from srv
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index a4ff5d547554..e4d3b9964167 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
52 cERROR(1, "Bad protocol string signature header %x", 52 cERROR(1, "Bad protocol string signature header %x",
53 *(unsigned int *) hdr->ProtocolId); 53 *(unsigned int *) hdr->ProtocolId);
54 if (mid != hdr->MessageId) 54 if (mid != hdr->MessageId)
55 cERROR(1, "Mids do not match"); 55 cERROR(1, "Mids do not match: %llu and %llu", mid,
56 hdr->MessageId);
56 } 57 }
57 cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); 58 cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId);
58 return 1; 59 return 1;
@@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length)
107 * ie Validate the wct via smb2_struct_sizes table above 108 * ie Validate the wct via smb2_struct_sizes table above
108 */ 109 */
109 110
110 if (length < 2 + sizeof(struct smb2_hdr)) { 111 if (length < sizeof(struct smb2_pdu)) {
111 if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { 112 if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
112 pdu->StructureSize2 = 0; 113 pdu->StructureSize2 = 0;
113 /* 114 /*
@@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length)
121 return 1; 122 return 1;
122 } 123 }
123 if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { 124 if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
124 cERROR(1, "SMB length greater than maximum, mid=%lld", mid); 125 cERROR(1, "SMB length greater than maximum, mid=%llu", mid);
125 return 1; 126 return 1;
126 } 127 }
127 128
128 if (check_smb2_hdr(hdr, mid)) 129 if (check_smb2_hdr(hdr, mid))
129 return 1; 130 return 1;
130 131
131 if (hdr->StructureSize != SMB2_HEADER_SIZE) { 132 if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
132 cERROR(1, "Illegal structure size %d", 133 cERROR(1, "Illegal structure size %u",
133 le16_to_cpu(hdr->StructureSize)); 134 le16_to_cpu(hdr->StructureSize));
134 return 1; 135 return 1;
135 } 136 }
@@ -161,8 +162,9 @@ smb2_check_message(char *buf, unsigned int length)
161 if (4 + len != clc_len) { 162 if (4 + len != clc_len) {
162 cFYI(1, "Calculated size %u length %u mismatch mid %llu", 163 cFYI(1, "Calculated size %u length %u mismatch mid %llu",
163 clc_len, 4 + len, mid); 164 clc_len, 4 + len, mid);
164 if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ 165 /* server can return one byte more */
165 return 0; /* BB workaround Samba 3 bug SessSetup rsp */ 166 if (clc_len == 4 + len + 1)
167 return 0;
166 return 1; 168 return 1;
167 } 169 }
168 return 0; 170 return 0;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f37a1b41b402..15dc8eea8273 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -87,10 +87,6 @@
87 87
88#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) 88#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
89 89
90#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64)
91
92#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9)
93
94/* 90/*
95 * SMB2 Header Definition 91 * SMB2 Header Definition
96 * 92 *
@@ -99,6 +95,9 @@
99 * "PDU" : "Protocol Data Unit" (ie a network "frame") 95 * "PDU" : "Protocol Data Unit" (ie a network "frame")
100 * 96 *
101 */ 97 */
98
99#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
100
102struct smb2_hdr { 101struct smb2_hdr {
103 __be32 smb2_buf_length; /* big endian on wire */ 102 __be32 smb2_buf_length; /* big endian on wire */
104 /* length is only two or three bytes - with 103 /* length is only two or three bytes - with
@@ -140,6 +139,9 @@ struct smb2_pdu {
140 * command code name for the struct. Note that structures must be packed. 139 * command code name for the struct. Note that structures must be packed.
141 * 140 *
142 */ 141 */
142
143#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
144
143struct smb2_err_rsp { 145struct smb2_err_rsp {
144 struct smb2_hdr hdr; 146 struct smb2_hdr hdr;
145 __le16 StructureSize; 147 __le16 StructureSize;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 83867ef348df..d9b639b95fa8 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -503,13 +503,16 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
503 /* convert the length into a more usable form */ 503 /* convert the length into a more usable form */
504 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 504 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
505 struct kvec iov; 505 struct kvec iov;
506 int rc = 0;
506 507
507 iov.iov_base = mid->resp_buf; 508 iov.iov_base = mid->resp_buf;
508 iov.iov_len = len; 509 iov.iov_len = len;
509 /* FIXME: add code to kill session */ 510 /* FIXME: add code to kill session */
510 if (cifs_verify_signature(&iov, 1, server, 511 rc = cifs_verify_signature(&iov, 1, server,
511 mid->sequence_number + 1) != 0) 512 mid->sequence_number + 1);
512 cERROR(1, "Unexpected SMB signature"); 513 if (rc)
514 cERROR(1, "SMB signature verification returned error = "
515 "%d", rc);
513 } 516 }
514 517
515 /* BB special case reconnect tid and uid here? */ 518 /* BB special case reconnect tid and uid here? */
diff --git a/fs/compat.c b/fs/compat.c
index 6161255fac45..1bdb350ea5d3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1155 struct file *file; 1155 struct file *file;
1156 int fput_needed; 1156 int fput_needed;
1157 ssize_t ret; 1157 ssize_t ret;
1158 loff_t pos;
1158 1159
1159 file = fget_light(fd, &fput_needed); 1160 file = fget_light(fd, &fput_needed);
1160 if (!file) 1161 if (!file)
1161 return -EBADF; 1162 return -EBADF;
1162 ret = compat_readv(file, vec, vlen, &file->f_pos); 1163 pos = file->f_pos;
1164 ret = compat_readv(file, vec, vlen, &pos);
1165 file->f_pos = pos;
1163 fput_light(file, fput_needed); 1166 fput_light(file, fput_needed);
1164 return ret; 1167 return ret;
1165} 1168}
@@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1221 struct file *file; 1224 struct file *file;
1222 int fput_needed; 1225 int fput_needed;
1223 ssize_t ret; 1226 ssize_t ret;
1227 loff_t pos;
1224 1228
1225 file = fget_light(fd, &fput_needed); 1229 file = fget_light(fd, &fput_needed);
1226 if (!file) 1230 if (!file)
1227 return -EBADF; 1231 return -EBADF;
1228 ret = compat_writev(file, vec, vlen, &file->f_pos); 1232 pos = file->f_pos;
1233 ret = compat_writev(file, vec, vlen, &pos);
1234 file->f_pos = pos;
1229 fput_light(file, fput_needed); 1235 fput_light(file, fput_needed);
1230 return ret; 1236 return ret;
1231} 1237}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1faf4cb56f39..f86c720dba0e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1062 unsigned long user_addr; 1062 unsigned long user_addr;
1063 size_t bytes; 1063 size_t bytes;
1064 struct buffer_head map_bh = { 0, }; 1064 struct buffer_head map_bh = { 0, };
1065 struct blk_plug plug;
1065 1066
1066 if (rw & WRITE) 1067 if (rw & WRITE)
1067 rw = WRITE_ODIRECT; 1068 rw = WRITE_ODIRECT;
@@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1177 PAGE_SIZE - user_addr / PAGE_SIZE); 1178 PAGE_SIZE - user_addr / PAGE_SIZE);
1178 } 1179 }
1179 1180
1181 blk_start_plug(&plug);
1182
1180 for (seg = 0; seg < nr_segs; seg++) { 1183 for (seg = 0; seg < nr_segs; seg++) {
1181 user_addr = (unsigned long)iov[seg].iov_base; 1184 user_addr = (unsigned long)iov[seg].iov_base;
1182 sdio.size += bytes = iov[seg].iov_len; 1185 sdio.size += bytes = iov[seg].iov_len;
@@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1235 if (sdio.bio) 1238 if (sdio.bio)
1236 dio_bio_submit(dio, &sdio); 1239 dio_bio_submit(dio, &sdio);
1237 1240
1241 blk_finish_plug(&plug);
1242
1238 /* 1243 /*
1239 * It is possible that, we return short IO due to end of file. 1244 * It is possible that, we return short IO due to end of file.
1240 * In that case, we need to release all the pages we got hold on. 1245 * In that case, we need to release all the pages we got hold on.
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 44ce5c6a541d..d45ba4568128 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,8 +275,14 @@ out:
275 275
276static int ecryptfs_flush(struct file *file, fl_owner_t td) 276static int ecryptfs_flush(struct file *file, fl_owner_t td)
277{ 277{
278 return file->f_mode & FMODE_WRITE 278 struct file *lower_file = ecryptfs_file_to_lower(file);
279 ? filemap_write_and_wait(file->f_mapping) : 0; 279
280 if (lower_file->f_op && lower_file->f_op->flush) {
281 filemap_write_and_wait(file->f_mapping);
282 return lower_file->f_op->flush(lower_file, td);
283 }
284
285 return 0;
280} 286}
281 287
282static int ecryptfs_release(struct inode *inode, struct file *file) 288static int ecryptfs_release(struct inode *inode, struct file *file)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 534b129ea676..cc7709e7c508 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
619 struct dentry *lower_old_dir_dentry; 619 struct dentry *lower_old_dir_dentry;
620 struct dentry *lower_new_dir_dentry; 620 struct dentry *lower_new_dir_dentry;
621 struct dentry *trap = NULL; 621 struct dentry *trap = NULL;
622 struct inode *target_inode;
622 623
623 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 624 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
624 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 625 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 dget(lower_new_dentry); 627 dget(lower_new_dentry);
627 lower_old_dir_dentry = dget_parent(lower_old_dentry); 628 lower_old_dir_dentry = dget_parent(lower_old_dentry);
628 lower_new_dir_dentry = dget_parent(lower_new_dentry); 629 lower_new_dir_dentry = dget_parent(lower_new_dentry);
630 target_inode = new_dentry->d_inode;
629 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 631 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
630 /* source should not be ancestor of target */ 632 /* source should not be ancestor of target */
631 if (trap == lower_old_dentry) { 633 if (trap == lower_old_dentry) {
@@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
641 lower_new_dir_dentry->d_inode, lower_new_dentry); 643 lower_new_dir_dentry->d_inode, lower_new_dentry);
642 if (rc) 644 if (rc)
643 goto out_lock; 645 goto out_lock;
646 if (target_inode)
647 fsstack_copy_attr_all(target_inode,
648 ecryptfs_inode_to_lower(target_inode));
644 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); 649 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
645 if (new_dir != old_dir) 650 if (new_dir != old_dir)
646 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 651 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2768138eefee..9b627c15010a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
162 inode_info = ecryptfs_inode_to_private(inode); 162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, 163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) { 164 &inode_info->lower_file_mutex)) {
165 filemap_write_and_wait(inode->i_mapping);
165 fput(inode_info->lower_file); 166 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL; 167 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex); 168 mutex_unlock(&inode_info->lower_file_mutex);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c8b55670804..eedec84c1809 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1654 error = PTR_ERR(file); 1654 error = PTR_ERR(file);
1655 goto out_free_fd; 1655 goto out_free_fd;
1656 } 1656 }
1657 fd_install(fd, file);
1658 ep->file = file; 1657 ep->file = file;
1658 fd_install(fd, file);
1659 return fd; 1659 return fd;
1660 1660
1661out_free_fd: 1661out_free_fd:
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5badb0c039de..1562c27a2fab 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,15 +37,12 @@
37 37
38#define EXOFS_DBGMSG2(M...) do {} while (0) 38#define EXOFS_DBGMSG2(M...) do {} while (0)
39 39
40enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), };
41
42unsigned exofs_max_io_pages(struct ore_layout *layout, 40unsigned exofs_max_io_pages(struct ore_layout *layout,
43 unsigned expected_pages) 41 unsigned expected_pages)
44{ 42{
45 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); 43 unsigned pages = min_t(unsigned, expected_pages,
44 layout->max_io_length / PAGE_SIZE);
46 45
47 /* TODO: easily support bio chaining */
48 pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE);
49 return pages; 46 return pages;
50} 47}
51 48
@@ -101,7 +98,8 @@ static void _pcol_reset(struct page_collect *pcol)
101 * it might not end here. don't be left with nothing 98 * it might not end here. don't be left with nothing
102 */ 99 */
103 if (!pcol->expected_pages) 100 if (!pcol->expected_pages)
104 pcol->expected_pages = MAX_PAGES_KMALLOC; 101 pcol->expected_pages =
102 exofs_max_io_pages(&pcol->sbi->layout, ~0);
105} 103}
106 104
107static int pcol_try_alloc(struct page_collect *pcol) 105static int pcol_try_alloc(struct page_collect *pcol)
@@ -389,6 +387,8 @@ static int readpage_strip(void *data, struct page *page)
389 size_t len; 387 size_t len;
390 int ret; 388 int ret;
391 389
390 BUG_ON(!PageLocked(page));
391
392 /* FIXME: Just for debugging, will be removed */ 392 /* FIXME: Just for debugging, will be removed */
393 if (PageUptodate(page)) 393 if (PageUptodate(page))
394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, 394 EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
@@ -572,8 +572,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
572 572
573 if (!pcol->that_locked_page || 573 if (!pcol->that_locked_page ||
574 (pcol->that_locked_page->index != index)) { 574 (pcol->that_locked_page->index != index)) {
575 struct page *page = find_get_page(pcol->inode->i_mapping, index); 575 struct page *page;
576 loff_t i_size = i_size_read(pcol->inode);
577
578 if (offset >= i_size) {
579 *uptodate = true;
580 EXOFS_DBGMSG("offset >= i_size index=0x%lx\n", index);
581 return ZERO_PAGE(0);
582 }
576 583
584 page = find_get_page(pcol->inode->i_mapping, index);
577 if (!page) { 585 if (!page) {
578 page = find_or_create_page(pcol->inode->i_mapping, 586 page = find_or_create_page(pcol->inode->i_mapping,
579 index, GFP_NOFS); 587 index, GFP_NOFS);
@@ -602,12 +610,13 @@ static void __r4w_put_page(void *priv, struct page *page)
602{ 610{
603 struct page_collect *pcol = priv; 611 struct page_collect *pcol = priv;
604 612
605 if (pcol->that_locked_page != page) { 613 if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) {
606 EXOFS_DBGMSG("index=0x%lx\n", page->index); 614 EXOFS_DBGMSG("index=0x%lx\n", page->index);
607 page_cache_release(page); 615 page_cache_release(page);
608 return; 616 return;
609 } 617 }
610 EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index); 618 EXOFS_DBGMSG("that_locked_page index=0x%lx\n",
619 ZERO_PAGE(0) == page ? -1 : page->index);
611} 620}
612 621
613static const struct _ore_r4w_op _r4w_op = { 622static const struct _ore_r4w_op _r4w_op = {
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 24a49d47e935..1585db1aa365 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -837,11 +837,11 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
837 bio->bi_rw |= REQ_WRITE; 837 bio->bi_rw |= REQ_WRITE;
838 } 838 }
839 839
840 osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, 840 osd_req_write(or, _ios_obj(ios, cur_comp),
841 bio, per_dev->length); 841 per_dev->offset, bio, per_dev->length);
842 ORE_DBGMSG("write(0x%llx) offset=0x%llx " 842 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
843 "length=0x%llx dev=%d\n", 843 "length=0x%llx dev=%d\n",
844 _LLU(_ios_obj(ios, dev)->id), 844 _LLU(_ios_obj(ios, cur_comp)->id),
845 _LLU(per_dev->offset), 845 _LLU(per_dev->offset),
846 _LLU(per_dev->length), dev); 846 _LLU(per_dev->length), dev);
847 } else if (ios->kern_buff) { 847 } else if (ios->kern_buff) {
@@ -853,20 +853,20 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
853 (ios->si.unit_off + ios->length > 853 (ios->si.unit_off + ios->length >
854 ios->layout->stripe_unit)); 854 ios->layout->stripe_unit));
855 855
856 ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev), 856 ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
857 per_dev->offset, 857 per_dev->offset,
858 ios->kern_buff, ios->length); 858 ios->kern_buff, ios->length);
859 if (unlikely(ret)) 859 if (unlikely(ret))
860 goto out; 860 goto out;
861 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " 861 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
862 "length=0x%llx dev=%d\n", 862 "length=0x%llx dev=%d\n",
863 _LLU(_ios_obj(ios, dev)->id), 863 _LLU(_ios_obj(ios, cur_comp)->id),
864 _LLU(per_dev->offset), 864 _LLU(per_dev->offset),
865 _LLU(ios->length), per_dev->dev); 865 _LLU(ios->length), per_dev->dev);
866 } else { 866 } else {
867 osd_req_set_attributes(or, _ios_obj(ios, dev)); 867 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
868 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", 868 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
869 _LLU(_ios_obj(ios, dev)->id), 869 _LLU(_ios_obj(ios, cur_comp)->id),
870 ios->out_attr_len, dev); 870 ios->out_attr_len, dev);
871 } 871 }
872 872
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 433783624d10..dde41a75c7c8 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -400,8 +400,6 @@ static int exofs_sync_fs(struct super_block *sb, int wait)
400 ret = ore_write(ios); 400 ret = ore_write(ios);
401 if (unlikely(ret)) 401 if (unlikely(ret))
402 EXOFS_ERR("%s: ore_write failed.\n", __func__); 402 EXOFS_ERR("%s: ore_write failed.\n", __func__);
403 else
404 sb->s_dirt = 0;
405 403
406 404
407 unlock_super(sb); 405 unlock_super(sb);
@@ -412,14 +410,6 @@ out:
412 return ret; 410 return ret;
413} 411}
414 412
415static void exofs_write_super(struct super_block *sb)
416{
417 if (!(sb->s_flags & MS_RDONLY))
418 exofs_sync_fs(sb, 1);
419 else
420 sb->s_dirt = 0;
421}
422
423static void _exofs_print_device(const char *msg, const char *dev_path, 413static void _exofs_print_device(const char *msg, const char *dev_path,
424 struct osd_dev *od, u64 pid) 414 struct osd_dev *od, u64 pid)
425{ 415{
@@ -952,7 +942,6 @@ static const struct super_operations exofs_sops = {
952 .write_inode = exofs_write_inode, 942 .write_inode = exofs_write_inode,
953 .evict_inode = exofs_evict_inode, 943 .evict_inode = exofs_evict_inode,
954 .put_super = exofs_put_super, 944 .put_super = exofs_put_super,
955 .write_super = exofs_write_super,
956 .sync_fs = exofs_sync_fs, 945 .sync_fs = exofs_sync_fs,
957 .statfs = exofs_statfs, 946 .statfs = exofs_statfs,
958}; 947};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9a4a5c48b1c9..ff574b4e345e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle,
3072 struct ext3_inode_info *ei = EXT3_I(inode); 3072 struct ext3_inode_info *ei = EXT3_I(inode);
3073 struct buffer_head *bh = iloc->bh; 3073 struct buffer_head *bh = iloc->bh;
3074 int err = 0, rc, block; 3074 int err = 0, rc, block;
3075 int need_datasync = 0;
3076 __le32 disksize;
3075 uid_t i_uid; 3077 uid_t i_uid;
3076 gid_t i_gid; 3078 gid_t i_gid;
3077 3079
@@ -3113,7 +3115,11 @@ again:
3113 raw_inode->i_gid_high = 0; 3115 raw_inode->i_gid_high = 0;
3114 } 3116 }
3115 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 3117 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3116 raw_inode->i_size = cpu_to_le32(ei->i_disksize); 3118 disksize = cpu_to_le32(ei->i_disksize);
3119 if (disksize != raw_inode->i_size) {
3120 need_datasync = 1;
3121 raw_inode->i_size = disksize;
3122 }
3117 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 3123 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3118 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 3124 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
3119 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 3125 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3129,8 +3135,11 @@ again:
3129 if (!S_ISREG(inode->i_mode)) { 3135 if (!S_ISREG(inode->i_mode)) {
3130 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); 3136 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
3131 } else { 3137 } else {
3132 raw_inode->i_size_high = 3138 disksize = cpu_to_le32(ei->i_disksize >> 32);
3133 cpu_to_le32(ei->i_disksize >> 32); 3139 if (disksize != raw_inode->i_size_high) {
3140 raw_inode->i_size_high = disksize;
3141 need_datasync = 1;
3142 }
3134 if (ei->i_disksize > 0x7fffffffULL) { 3143 if (ei->i_disksize > 0x7fffffffULL) {
3135 struct super_block *sb = inode->i_sb; 3144 struct super_block *sb = inode->i_sb;
3136 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 3145 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3183,6 +3192,8 @@ again:
3183 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 3192 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3184 3193
3185 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3194 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3195 if (need_datasync)
3196 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
3186out_brelse: 3197out_brelse:
3187 brelse (bh); 3198 brelse (bh);
3188 ext3_std_error(inode->i_sb, err); 3199 ext3_std_error(inode->i_sb, err);
@@ -3459,14 +3470,6 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
3459 * inode out, but prune_icache isn't a user-visible syncing function. 3470 * inode out, but prune_icache isn't a user-visible syncing function.
3460 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 3471 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
3461 * we start and wait on commits. 3472 * we start and wait on commits.
3462 *
3463 * Is this efficient/effective? Well, we're being nice to the system
3464 * by cleaning up our inodes proactively so they can be reaped
3465 * without I/O. But we are potentially leaving up to five seconds'
3466 * worth of inodes floating about which prune_icache wants us to
3467 * write out. One way to fix that would be to get prune_icache()
3468 * to do a write_super() to free up some memory. It has the desired
3469 * effect.
3470 */ 3473 */
3471int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) 3474int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3472{ 3475{
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ff9bcdc5b0d5..8c892e93d8e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -64,11 +64,6 @@ static int ext3_freeze(struct super_block *sb);
64 64
65/* 65/*
66 * Wrappers for journal_start/end. 66 * Wrappers for journal_start/end.
67 *
68 * The only special thing we need to do here is to make sure that all
69 * journal_end calls result in the superblock being marked dirty, so
70 * that sync() will call the filesystem's write_super callback if
71 * appropriate.
72 */ 67 */
73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 68handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
74{ 69{
@@ -90,12 +85,6 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
90 return journal_start(journal, nblocks); 85 return journal_start(journal, nblocks);
91} 86}
92 87
93/*
94 * The only special thing we need to do here is to make sure that all
95 * journal_stop calls result in the superblock being marked dirty, so
96 * that sync() will call the filesystem's write_super callback if
97 * appropriate.
98 */
99int __ext3_journal_stop(const char *where, handle_t *handle) 88int __ext3_journal_stop(const char *where, handle_t *handle)
100{ 89{
101 struct super_block *sb; 90 struct super_block *sb;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d23b31ca9d7a..1b5089067d01 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
280 return desc; 280 return desc;
281} 281}
282 282
283static int ext4_valid_block_bitmap(struct super_block *sb, 283/*
284 struct ext4_group_desc *desc, 284 * Return the block number which was discovered to be invalid, or 0 if
285 unsigned int block_group, 285 * the block bitmap is valid.
286 struct buffer_head *bh) 286 */
287static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
288 struct ext4_group_desc *desc,
289 unsigned int block_group,
290 struct buffer_head *bh)
287{ 291{
288 ext4_grpblk_t offset; 292 ext4_grpblk_t offset;
289 ext4_grpblk_t next_zero_bit; 293 ext4_grpblk_t next_zero_bit;
290 ext4_fsblk_t bitmap_blk; 294 ext4_fsblk_t blk;
291 ext4_fsblk_t group_first_block; 295 ext4_fsblk_t group_first_block;
292 296
293 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 297 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
@@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
297 * or it has to also read the block group where the bitmaps 301 * or it has to also read the block group where the bitmaps
298 * are located to verify they are set. 302 * are located to verify they are set.
299 */ 303 */
300 return 1; 304 return 0;
301 } 305 }
302 group_first_block = ext4_group_first_block_no(sb, block_group); 306 group_first_block = ext4_group_first_block_no(sb, block_group);
303 307
304 /* check whether block bitmap block number is set */ 308 /* check whether block bitmap block number is set */
305 bitmap_blk = ext4_block_bitmap(sb, desc); 309 blk = ext4_block_bitmap(sb, desc);
306 offset = bitmap_blk - group_first_block; 310 offset = blk - group_first_block;
307 if (!ext4_test_bit(offset, bh->b_data)) 311 if (!ext4_test_bit(offset, bh->b_data))
308 /* bad block bitmap */ 312 /* bad block bitmap */
309 goto err_out; 313 return blk;
310 314
311 /* check whether the inode bitmap block number is set */ 315 /* check whether the inode bitmap block number is set */
312 bitmap_blk = ext4_inode_bitmap(sb, desc); 316 blk = ext4_inode_bitmap(sb, desc);
313 offset = bitmap_blk - group_first_block; 317 offset = blk - group_first_block;
314 if (!ext4_test_bit(offset, bh->b_data)) 318 if (!ext4_test_bit(offset, bh->b_data))
315 /* bad block bitmap */ 319 /* bad block bitmap */
316 goto err_out; 320 return blk;
317 321
318 /* check whether the inode table block number is set */ 322 /* check whether the inode table block number is set */
319 bitmap_blk = ext4_inode_table(sb, desc); 323 blk = ext4_inode_table(sb, desc);
320 offset = bitmap_blk - group_first_block; 324 offset = blk - group_first_block;
321 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 325 next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
322 offset + EXT4_SB(sb)->s_itb_per_group, 326 offset + EXT4_SB(sb)->s_itb_per_group,
323 offset); 327 offset);
324 if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) 328 if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group)
325 /* good bitmap for inode tables */ 329 /* bad bitmap for inode tables */
326 return 1; 330 return blk;
327
328err_out:
329 ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
330 block_group, bitmap_blk);
331 return 0; 331 return 0;
332} 332}
333 333
@@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb,
336 unsigned int block_group, 336 unsigned int block_group,
337 struct buffer_head *bh) 337 struct buffer_head *bh)
338{ 338{
339 ext4_fsblk_t blk;
340
339 if (buffer_verified(bh)) 341 if (buffer_verified(bh))
340 return; 342 return;
341 343
342 ext4_lock_group(sb, block_group); 344 ext4_lock_group(sb, block_group);
343 if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && 345 blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
344 ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, 346 if (unlikely(blk != 0)) {
345 EXT4_BLOCKS_PER_GROUP(sb) / 8)) 347 ext4_unlock_group(sb, block_group);
346 set_buffer_verified(bh); 348 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
349 block_group, blk);
350 return;
351 }
352 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
353 desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
354 ext4_unlock_group(sb, block_group);
355 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
356 return;
357 }
358 set_buffer_verified(bh);
347 ext4_unlock_group(sb, block_group); 359 ext4_unlock_group(sb, block_group);
348} 360}
349 361
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index f8716eab9995..5c2d1813ebe9 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
79 if (provided == calculated) 79 if (provided == calculated)
80 return 1; 80 return 1;
81 81
82 ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
83 return 0; 82 return 0;
84} 83}
85 84
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed06772..aabbb3f53683 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2662,6 +2662,7 @@ cont:
2662 } 2662 }
2663 path[0].p_depth = depth; 2663 path[0].p_depth = depth;
2664 path[0].p_hdr = ext_inode_hdr(inode); 2664 path[0].p_hdr = ext_inode_hdr(inode);
2665 i = 0;
2665 2666
2666 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2667 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2667 err = -EIO; 2668 err = -EIO;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6324f74e0342..dff171c3a123 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1970,7 +1970,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1970 * This function can get called via... 1970 * This function can get called via...
1971 * - ext4_da_writepages after taking page lock (have journal handle) 1971 * - ext4_da_writepages after taking page lock (have journal handle)
1972 * - journal_submit_inode_data_buffers (no journal handle) 1972 * - journal_submit_inode_data_buffers (no journal handle)
1973 * - shrink_page_list via pdflush (no journal handle) 1973 * - shrink_page_list via the kswapd/direct reclaim (no journal handle)
1974 * - grab_page_cache when doing write_begin (have journal handle) 1974 * - grab_page_cache when doing write_begin (have journal handle)
1975 * 1975 *
1976 * We don't do any block allocation in this function. If we have page with 1976 * We don't do any block allocation in this function. If we have page with
@@ -4589,14 +4589,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
4589 * inode out, but prune_icache isn't a user-visible syncing function. 4589 * inode out, but prune_icache isn't a user-visible syncing function.
4590 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 4590 * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
4591 * we start and wait on commits. 4591 * we start and wait on commits.
4592 *
4593 * Is this efficient/effective? Well, we're being nice to the system
4594 * by cleaning up our inodes proactively so they can be reaped
4595 * without I/O. But we are potentially leaving up to five seconds'
4596 * worth of inodes floating about which prune_icache wants us to
4597 * write out. One way to fix that would be to get prune_icache()
4598 * to do a write_super() to free up some memory. It has the desired
4599 * effect.
4600 */ 4592 */
4601int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) 4593int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4602{ 4594{
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d76ec8277d3f..c6e0cb3d1f4a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -326,11 +326,6 @@ static void ext4_put_nojournal(handle_t *handle)
326 326
327/* 327/*
328 * Wrappers for jbd2_journal_start/end. 328 * Wrappers for jbd2_journal_start/end.
329 *
330 * The only special thing we need to do here is to make sure that all
331 * journal_end calls result in the superblock being marked dirty, so
332 * that sync() will call the filesystem's write_super callback if
333 * appropriate.
334 */ 329 */
335handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 330handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
336{ 331{
@@ -356,12 +351,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
356 return jbd2_journal_start(journal, nblocks); 351 return jbd2_journal_start(journal, nblocks);
357} 352}
358 353
359/*
360 * The only special thing we need to do here is to make sure that all
361 * jbd2_journal_stop calls result in the superblock being marked dirty, so
362 * that sync() will call the filesystem's write_super callback if
363 * appropriate.
364 */
365int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) 354int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
366{ 355{
367 struct super_block *sb; 356 struct super_block *sb;
@@ -959,6 +948,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
959 ei->i_reserved_meta_blocks = 0; 948 ei->i_reserved_meta_blocks = 0;
960 ei->i_allocated_meta_blocks = 0; 949 ei->i_allocated_meta_blocks = 0;
961 ei->i_da_metadata_calc_len = 0; 950 ei->i_da_metadata_calc_len = 0;
951 ei->i_da_metadata_calc_last_lblock = 0;
962 spin_lock_init(&(ei->i_block_reservation_lock)); 952 spin_lock_init(&(ei->i_block_reservation_lock));
963#ifdef CONFIG_QUOTA 953#ifdef CONFIG_QUOTA
964 ei->i_reserved_quota = 0; 954 ei->i_reserved_quota = 0;
@@ -3119,6 +3109,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
3119 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3109 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3120 int s, j, count = 0; 3110 int s, j, count = 0;
3121 3111
3112 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3113 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3114 sbi->s_itb_per_group + 2);
3115
3122 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + 3116 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3123 (grp * EXT4_BLOCKS_PER_GROUP(sb)); 3117 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3124 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; 3118 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
@@ -4430,6 +4424,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
4430 ext4_commit_super(sb, 1); 4424 ext4_commit_super(sb, 1);
4431 4425
4432 jbd2_journal_clear_err(journal); 4426 jbd2_journal_clear_err(journal);
4427 jbd2_journal_update_sb_errno(journal);
4433 } 4428 }
4434} 4429}
4435 4430
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 03ff5b1eba93..75a20c092dd4 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
117 const char __user *buf, 117 const char __user *buf,
118 size_t count, loff_t *ppos) 118 size_t count, loff_t *ppos)
119{ 119{
120 unsigned val; 120 unsigned uninitialized_var(val);
121 ssize_t ret; 121 ssize_t ret;
122 122
123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
154 const char __user *buf, 154 const char __user *buf,
155 size_t count, loff_t *ppos) 155 size_t count, loff_t *ppos)
156{ 156{
157 unsigned val; 157 unsigned uninitialized_var(val);
158 ssize_t ret; 158 ssize_t ret;
159 159
160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3426521f3205..ee8d55042298 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -396,7 +396,7 @@ err_device:
396err_region: 396err_region:
397 unregister_chrdev_region(devt, 1); 397 unregister_chrdev_region(devt, 1);
398err: 398err:
399 fc->conn_error = 1; 399 fuse_conn_kill(fc);
400 goto out; 400 goto out;
401} 401}
402 402
@@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
532 cdev_del(cc->cdev); 532 cdev_del(cc->cdev);
533 } 533 }
534 534
535 /* kill connection and shutdown channel */
536 fuse_conn_kill(&cc->fc);
537 rc = fuse_dev_release(inode, file); /* puts the base reference */ 535 rc = fuse_dev_release(inode, file); /* puts the base reference */
538 536
539 return rc; 537 return rc;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e8fbe1..f4246cfc8d87 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1576,6 +1576,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1576 req->pages[req->num_pages] = page; 1576 req->pages[req->num_pages] = page;
1577 req->num_pages++; 1577 req->num_pages++;
1578 1578
1579 offset = 0;
1579 num -= this_num; 1580 num -= this_num;
1580 total_len += this_num; 1581 total_len += this_num;
1581 index++; 1582 index++;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8964cf3999b2..324bc0850534 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
383 struct fuse_entry_out outentry; 383 struct fuse_entry_out outentry;
384 struct fuse_file *ff; 384 struct fuse_file *ff;
385 385
386 /* Userspace expects S_IFREG in create mode */
387 BUG_ON((mode & S_IFMT) != S_IFREG);
388
386 forget = fuse_alloc_forget(); 389 forget = fuse_alloc_forget();
387 err = -ENOMEM; 390 err = -ENOMEM;
388 if (!forget) 391 if (!forget)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 93d8d6c9494d..aba15f1b7ad2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -703,13 +703,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
703 unsigned long nr_segs, loff_t pos) 703 unsigned long nr_segs, loff_t pos)
704{ 704{
705 struct inode *inode = iocb->ki_filp->f_mapping->host; 705 struct inode *inode = iocb->ki_filp->f_mapping->host;
706 struct fuse_conn *fc = get_fuse_conn(inode);
706 707
707 if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) { 708 /*
709 * In auto invalidate mode, always update attributes on read.
710 * Otherwise, only update if we attempt to read past EOF (to ensure
711 * i_size is up to date).
712 */
713 if (fc->auto_inval_data ||
714 (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
708 int err; 715 int err;
709 /*
710 * If trying to read past EOF, make sure the i_size
711 * attribute is up-to-date.
712 */
713 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); 716 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
714 if (err) 717 if (err)
715 return err; 718 return err;
@@ -1700,7 +1703,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1700 size_t n; 1703 size_t n;
1701 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; 1704 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
1702 1705
1703 for (n = 0; n < count; n++) { 1706 for (n = 0; n < count; n++, iov++) {
1704 if (iov->iov_len > (size_t) max) 1707 if (iov->iov_len > (size_t) max)
1705 return -ENOMEM; 1708 return -ENOMEM;
1706 max -= iov->iov_len; 1709 max -= iov->iov_len;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 771fb6322c07..e24dd74e3068 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -484,6 +484,9 @@ struct fuse_conn {
484 /** Is fallocate not implemented by fs? */ 484 /** Is fallocate not implemented by fs? */
485 unsigned no_fallocate:1; 485 unsigned no_fallocate:1;
486 486
487 /** Use enhanced/automatic page cache invalidation. */
488 unsigned auto_inval_data:1;
489
487 /** The number of requests waiting for completion */ 490 /** The number of requests waiting for completion */
488 atomic_t num_waiting; 491 atomic_t num_waiting;
489 492
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1cd61652018c..fca222dabe3c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
197 struct fuse_conn *fc = get_fuse_conn(inode); 197 struct fuse_conn *fc = get_fuse_conn(inode);
198 struct fuse_inode *fi = get_fuse_inode(inode); 198 struct fuse_inode *fi = get_fuse_inode(inode);
199 loff_t oldsize; 199 loff_t oldsize;
200 struct timespec old_mtime;
200 201
201 spin_lock(&fc->lock); 202 spin_lock(&fc->lock);
202 if (attr_version != 0 && fi->attr_version > attr_version) { 203 if (attr_version != 0 && fi->attr_version > attr_version) {
@@ -204,15 +205,35 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
204 return; 205 return;
205 } 206 }
206 207
208 old_mtime = inode->i_mtime;
207 fuse_change_attributes_common(inode, attr, attr_valid); 209 fuse_change_attributes_common(inode, attr, attr_valid);
208 210
209 oldsize = inode->i_size; 211 oldsize = inode->i_size;
210 i_size_write(inode, attr->size); 212 i_size_write(inode, attr->size);
211 spin_unlock(&fc->lock); 213 spin_unlock(&fc->lock);
212 214
213 if (S_ISREG(inode->i_mode) && oldsize != attr->size) { 215 if (S_ISREG(inode->i_mode)) {
214 truncate_pagecache(inode, oldsize, attr->size); 216 bool inval = false;
215 invalidate_inode_pages2(inode->i_mapping); 217
218 if (oldsize != attr->size) {
219 truncate_pagecache(inode, oldsize, attr->size);
220 inval = true;
221 } else if (fc->auto_inval_data) {
222 struct timespec new_mtime = {
223 .tv_sec = attr->mtime,
224 .tv_nsec = attr->mtimensec,
225 };
226
227 /*
228 * Auto inval mode also checks and invalidates if mtime
229 * has changed.
230 */
231 if (!timespec_equal(&old_mtime, &new_mtime))
232 inval = true;
233 }
234
235 if (inval)
236 invalidate_inode_pages2(inode->i_mapping);
216 } 237 }
217} 238}
218 239
@@ -346,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc)
346 wake_up_all(&fc->waitq); 367 wake_up_all(&fc->waitq);
347 wake_up_all(&fc->blocked_waitq); 368 wake_up_all(&fc->blocked_waitq);
348 wake_up_all(&fc->reserved_req_waitq); 369 wake_up_all(&fc->reserved_req_waitq);
349 mutex_lock(&fuse_mutex);
350 list_del(&fc->entry);
351 fuse_ctl_remove_conn(fc);
352 mutex_unlock(&fuse_mutex);
353 fuse_bdi_destroy(fc);
354} 370}
355EXPORT_SYMBOL_GPL(fuse_conn_kill); 371EXPORT_SYMBOL_GPL(fuse_conn_kill);
356 372
@@ -359,7 +375,14 @@ static void fuse_put_super(struct super_block *sb)
359 struct fuse_conn *fc = get_fuse_conn_super(sb); 375 struct fuse_conn *fc = get_fuse_conn_super(sb);
360 376
361 fuse_send_destroy(fc); 377 fuse_send_destroy(fc);
378
362 fuse_conn_kill(fc); 379 fuse_conn_kill(fc);
380 mutex_lock(&fuse_mutex);
381 list_del(&fc->entry);
382 fuse_ctl_remove_conn(fc);
383 mutex_unlock(&fuse_mutex);
384 fuse_bdi_destroy(fc);
385
363 fuse_conn_put(fc); 386 fuse_conn_put(fc);
364} 387}
365 388
@@ -834,6 +857,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
834 fc->big_writes = 1; 857 fc->big_writes = 1;
835 if (arg->flags & FUSE_DONT_MASK) 858 if (arg->flags & FUSE_DONT_MASK)
836 fc->dont_mask = 1; 859 fc->dont_mask = 1;
860 if (arg->flags & FUSE_AUTO_INVAL_DATA)
861 fc->auto_inval_data = 1;
837 } else { 862 } else {
838 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 863 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
839 fc->no_lock = 1; 864 fc->no_lock = 1;
@@ -859,7 +884,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
859 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 884 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
860 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 885 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
861 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 886 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
862 FUSE_FLOCK_LOCKS; 887 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
888 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
863 req->in.h.opcode = FUSE_INIT; 889 req->in.h.opcode = FUSE_INIT;
864 req->in.numargs = 1; 890 req->in.numargs = 1;
865 req->in.args[0].size = sizeof(*arg); 891 req->in.args[0].size = sizeof(*arg);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3a56c8d94de0..22255d96b27e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -52,7 +52,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
52 /* 52 /*
53 * If it's a fully non-blocking write attempt and we cannot 53 * If it's a fully non-blocking write attempt and we cannot
54 * lock the buffer then redirty the page. Note that this can 54 * lock the buffer then redirty the page. Note that this can
55 * potentially cause a busy-wait loop from pdflush and kswapd 55 * potentially cause a busy-wait loop from flusher thread and kswapd
56 * activity, but those code paths have their own higher-level 56 * activity, but those code paths have their own higher-level
57 * throttling. 57 * throttling.
58 */ 58 */
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 5fd51a5833ff..b7ec224910c5 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -236,10 +236,10 @@ out:
236 * hfs_mdb_commit() 236 * hfs_mdb_commit()
237 * 237 *
238 * Description: 238 * Description:
239 * This updates the MDB on disk (look also at hfs_write_super()). 239 * This updates the MDB on disk.
240 * It does not check, if the superblock has been modified, or 240 * It does not check, if the superblock has been modified, or
241 * if the filesystem has been mounted read-only. It is mainly 241 * if the filesystem has been mounted read-only. It is mainly
242 * called by hfs_write_super() and hfs_btree_extend(). 242 * called by hfs_sync_fs() and flush_mdb().
243 * Input Variable(s): 243 * Input Variable(s):
244 * struct hfs_mdb *mdb: Pointer to the hfs MDB 244 * struct hfs_mdb *mdb: Pointer to the hfs MDB
245 * int backup; 245 * int backup;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 425c2f2cf170..a2862339323b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -534,8 +534,8 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
534 ret = 1; 534 ret = 1;
535 } else if (journal->j_committing_transaction) { 535 } else if (journal->j_committing_transaction) {
536 /* 536 /*
537 * If ext3_write_super() recently started a commit, then we 537 * If commit has been started, then we have to wait for
538 * have to wait for completion of that transaction 538 * completion of that transaction.
539 */ 539 */
540 if (ptid) 540 if (ptid)
541 *ptid = journal->j_committing_transaction->t_tid; 541 *ptid = journal->j_committing_transaction->t_tid;
@@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal)
1113 1113
1114 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1114 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1115 spin_lock(&journal->j_state_lock); 1115 spin_lock(&journal->j_state_lock);
1116 /* Is it already empty? */
1117 if (sb->s_start == 0) {
1118 spin_unlock(&journal->j_state_lock);
1119 return;
1120 }
1116 jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", 1121 jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
1117 journal->j_tail_sequence); 1122 journal->j_tail_sequence);
1118 1123
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e9a3c4c85594..e149b99a7ffb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -612,8 +612,8 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
612 ret = 1; 612 ret = 1;
613 } else if (journal->j_committing_transaction) { 613 } else if (journal->j_committing_transaction) {
614 /* 614 /*
615 * If ext3_write_super() recently started a commit, then we 615 * If commit has been started, then we have to wait for
616 * have to wait for completion of that transaction 616 * completion of that transaction.
617 */ 617 */
618 if (ptid) 618 if (ptid)
619 *ptid = journal->j_committing_transaction->t_tid; 619 *ptid = journal->j_committing_transaction->t_tid;
@@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1377 * Update a journal's errno. Write updated superblock to disk waiting for IO 1377 * Update a journal's errno. Write updated superblock to disk waiting for IO
1378 * to complete. 1378 * to complete.
1379 */ 1379 */
1380static void jbd2_journal_update_sb_errno(journal_t *journal) 1380void jbd2_journal_update_sb_errno(journal_t *journal)
1381{ 1381{
1382 journal_superblock_t *sb = journal->j_superblock; 1382 journal_superblock_t *sb = journal->j_superblock;
1383 1383
@@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
1390 1390
1391 jbd2_write_superblock(journal, WRITE_SYNC); 1391 jbd2_write_superblock(journal, WRITE_SYNC);
1392} 1392}
1393EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
1393 1394
1394/* 1395/*
1395 * Read the superblock for a given journal, performing initial 1396 * Read the superblock for a given journal, performing initial
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index df0de27c2733..e784a217b500 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
26 struct completion complete; 26 struct completion complete;
27 27
28 bio_init(&bio); 28 bio_init(&bio);
29 bio.bi_max_vecs = 1;
29 bio.bi_io_vec = &bio_vec; 30 bio.bi_io_vec = &bio_vec;
30 bio_vec.bv_page = page; 31 bio_vec.bv_page = page;
31 bio_vec.bv_len = PAGE_SIZE; 32 bio_vec.bv_len = PAGE_SIZE;
@@ -95,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
95 struct address_space *mapping = super->s_mapping_inode->i_mapping; 96 struct address_space *mapping = super->s_mapping_inode->i_mapping;
96 struct bio *bio; 97 struct bio *bio;
97 struct page *page; 98 struct page *page;
98 struct request_queue *q = bdev_get_queue(sb->s_bdev); 99 unsigned int max_pages;
99 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
100 int i; 100 int i;
101 101
102 if (max_pages > BIO_MAX_PAGES) 102 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
103 max_pages = BIO_MAX_PAGES; 103
104 bio = bio_alloc(GFP_NOFS, max_pages); 104 bio = bio_alloc(GFP_NOFS, max_pages);
105 BUG_ON(!bio); 105 BUG_ON(!bio);
106 106
@@ -190,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
190{ 190{
191 struct logfs_super *super = logfs_super(sb); 191 struct logfs_super *super = logfs_super(sb);
192 struct bio *bio; 192 struct bio *bio;
193 struct request_queue *q = bdev_get_queue(sb->s_bdev); 193 unsigned int max_pages;
194 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
195 int i; 194 int i;
196 195
197 if (max_pages > BIO_MAX_PAGES) 196 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
198 max_pages = BIO_MAX_PAGES; 197
199 bio = bio_alloc(GFP_NOFS, max_pages); 198 bio = bio_alloc(GFP_NOFS, max_pages);
200 BUG_ON(!bio); 199 BUG_ON(!bio);
201 200
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42238b2..6984562738d3 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode)
156 call_rcu(&inode->i_rcu, logfs_i_callback); 156 call_rcu(&inode->i_rcu, logfs_i_callback);
157} 157}
158 158
159static void __logfs_destroy_meta_inode(struct inode *inode)
160{
161 struct logfs_inode *li = logfs_inode(inode);
162 BUG_ON(li->li_block);
163 call_rcu(&inode->i_rcu, logfs_i_callback);
164}
165
159static void logfs_destroy_inode(struct inode *inode) 166static void logfs_destroy_inode(struct inode *inode)
160{ 167{
161 struct logfs_inode *li = logfs_inode(inode); 168 struct logfs_inode *li = logfs_inode(inode);
162 169
170 if (inode->i_ino < LOGFS_RESERVED_INOS) {
171 /*
172 * The reserved inodes are never destroyed unless we are in
173 * unmont path.
174 */
175 __logfs_destroy_meta_inode(inode);
176 return;
177 }
178
163 BUG_ON(list_empty(&li->li_freeing_list)); 179 BUG_ON(list_empty(&li->li_freeing_list));
164 spin_lock(&logfs_inode_lock); 180 spin_lock(&logfs_inode_lock);
165 li->li_refcount--; 181 li->li_refcount--;
@@ -373,8 +389,8 @@ static void logfs_put_super(struct super_block *sb)
373{ 389{
374 struct logfs_super *super = logfs_super(sb); 390 struct logfs_super *super = logfs_super(sb);
375 /* kill the meta-inodes */ 391 /* kill the meta-inodes */
376 iput(super->s_master_inode);
377 iput(super->s_segfile_inode); 392 iput(super->s_segfile_inode);
393 iput(super->s_master_inode);
378 iput(super->s_mapping_inode); 394 iput(super->s_mapping_inode);
379} 395}
380 396
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 1e1c369df22b..2a09b8d73989 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area,
565 index = ofs >> PAGE_SHIFT; 565 index = ofs >> PAGE_SHIFT;
566 page_ofs = ofs & (PAGE_SIZE - 1); 566 page_ofs = ofs & (PAGE_SIZE - 1);
567 567
568 page = find_lock_page(mapping, index); 568 page = find_or_create_page(mapping, index, GFP_NOFS);
569 BUG_ON(!page); 569 BUG_ON(!page);
570 memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); 570 memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
571 unlock_page(page); 571 unlock_page(page);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512c5019..5be0abef603d 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode)
2189 return; 2189 return;
2190 } 2190 }
2191 2191
2192 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
2193 page = inode_to_page(inode); 2192 page = inode_to_page(inode);
2194 BUG_ON(!page); /* FIXME: Use emergency page */ 2193 BUG_ON(!page); /* FIXME: Use emergency page */
2195 logfs_put_write_page(page); 2194 logfs_put_write_page(page);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index e28d090c98d6..038da0991794 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb)
886 886
887static void map_invalidatepage(struct page *page, unsigned long l) 887static void map_invalidatepage(struct page *page, unsigned long l)
888{ 888{
889 BUG(); 889 return;
890} 890}
891 891
892static int map_releasepage(struct page *page, gfp_t g) 892static int map_releasepage(struct page *page, gfp_t g)
diff --git a/fs/namei.c b/fs/namei.c
index 1b464390dde8..dd1ed1b8e98e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask)
352/** 352/**
353 * sb_permission - Check superblock-level permissions 353 * sb_permission - Check superblock-level permissions
354 * @sb: Superblock of inode to check permission on 354 * @sb: Superblock of inode to check permission on
355 * @inode: Inode to check permission on
355 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 356 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
356 * 357 *
357 * Separate out file-system wide checks from inode-specific permission checks. 358 * Separate out file-system wide checks from inode-specific permission checks.
@@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1;
656/** 657/**
657 * may_follow_link - Check symlink following for unsafe situations 658 * may_follow_link - Check symlink following for unsafe situations
658 * @link: The path of the symlink 659 * @link: The path of the symlink
660 * @nd: nameidata pathwalk data
659 * 661 *
660 * In the case of the sysctl_protected_symlinks sysctl being enabled, 662 * In the case of the sysctl_protected_symlinks sysctl being enabled,
661 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is 663 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
@@ -2414,7 +2416,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2414 goto out; 2416 goto out;
2415 } 2417 }
2416 2418
2417 mode = op->mode & S_IALLUGO; 2419 mode = op->mode;
2418 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) 2420 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2419 mode &= ~current_umask(); 2421 mode &= ~current_umask();
2420 2422
@@ -2452,7 +2454,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2452 } 2454 }
2453 2455
2454 if (open_flag & O_CREAT) { 2456 if (open_flag & O_CREAT) {
2455 error = may_o_create(&nd->path, dentry, op->mode); 2457 error = may_o_create(&nd->path, dentry, mode);
2456 if (error) { 2458 if (error) {
2457 create_error = error; 2459 create_error = error;
2458 if (open_flag & O_EXCL) 2460 if (open_flag & O_EXCL)
@@ -2489,6 +2491,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2489 dput(dentry); 2491 dput(dentry);
2490 dentry = file->f_path.dentry; 2492 dentry = file->f_path.dentry;
2491 } 2493 }
2494 if (create_error && dentry->d_inode == NULL) {
2495 error = create_error;
2496 goto out;
2497 }
2492 goto looked_up; 2498 goto looked_up;
2493 } 2499 }
2494 2500
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8bf3a3f6925a..b7db60897f91 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_SYSCTL) += sysctl.o 12nfs-$(CONFIG_SYSCTL) += sysctl.o
13nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 13nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
14 14
15obj-$(CONFIG_NFS_V2) += nfs2.o 15obj-$(CONFIG_NFS_V2) += nfsv2.o
16nfs2-y := nfs2super.o proc.o nfs2xdr.o 16nfsv2-y := nfs2super.o proc.o nfs2xdr.o
17 17
18obj-$(CONFIG_NFS_V3) += nfs3.o 18obj-$(CONFIG_NFS_V3) += nfsv3.o
19nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o 19nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
20nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 20nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
21 21
22obj-$(CONFIG_NFS_V4) += nfs4.o 22obj-$(CONFIG_NFS_V4) += nfsv4.o
23nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ 23nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
24 delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ 24 delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
25 nfs4namespace.o nfs4getroot.o nfs4client.o 25 nfs4namespace.o nfs4getroot.o nfs4client.o
26nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o 26nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
27nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 27nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
28 28
29obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o 29obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
30nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o 30nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9fc0d9dfc91b..99694442b93f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
105 105
106 if (IS_ERR(nfs)) { 106 if (IS_ERR(nfs)) {
107 mutex_lock(&nfs_version_mutex); 107 mutex_lock(&nfs_version_mutex);
108 request_module("nfs%d", version); 108 request_module("nfsv%d", version);
109 nfs = find_nfs_version(version); 109 nfs = find_nfs_version(version);
110 mutex_unlock(&nfs_version_mutex); 110 mutex_unlock(&nfs_version_mutex);
111 } 111 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 75d6d0a3d32e..6a7fcab7ecb3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
287 struct inode *inode = file->f_path.dentry->d_inode; 287 struct inode *inode = file->f_path.dentry->d_inode;
288 288
289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
290 if (ret != 0)
291 goto out;
290 mutex_lock(&inode->i_mutex); 292 mutex_lock(&inode->i_mutex);
291 ret = nfs_file_fsync_commit(file, start, end, datasync); 293 ret = nfs_file_fsync_commit(file, start, end, datasync);
292 mutex_unlock(&inode->i_mutex); 294 mutex_unlock(&inode->i_mutex);
293 295out:
294 return ret; 296 return ret;
295} 297}
296 298
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b701358c39c3..a850079467d8 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -61,6 +61,12 @@ struct idmap {
61 struct mutex idmap_mutex; 61 struct mutex idmap_mutex;
62}; 62};
63 63
64struct idmap_legacy_upcalldata {
65 struct rpc_pipe_msg pipe_msg;
66 struct idmap_msg idmap_msg;
67 struct idmap *idmap;
68};
69
64/** 70/**
65 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 71 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
66 * @fattr: fully initialised struct nfs_fattr 72 * @fattr: fully initialised struct nfs_fattr
@@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
324 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, 330 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
325 name, namelen, type, data, 331 name, namelen, type, data,
326 data_size, idmap); 332 data_size, idmap);
333 idmap->idmap_key_cons = NULL;
327 mutex_unlock(&idmap->idmap_mutex); 334 mutex_unlock(&idmap->idmap_mutex);
328 } 335 }
329 return ret; 336 return ret;
@@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = {
380static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); 387static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
381static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 388static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
382 size_t); 389 size_t);
390static void idmap_release_pipe(struct inode *);
383static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); 391static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
384 392
385static const struct rpc_pipe_ops idmap_upcall_ops = { 393static const struct rpc_pipe_ops idmap_upcall_ops = {
386 .upcall = rpc_pipe_generic_upcall, 394 .upcall = rpc_pipe_generic_upcall,
387 .downcall = idmap_pipe_downcall, 395 .downcall = idmap_pipe_downcall,
396 .release_pipe = idmap_release_pipe,
388 .destroy_msg = idmap_pipe_destroy_msg, 397 .destroy_msg = idmap_pipe_destroy_msg,
389}; 398};
390 399
@@ -616,7 +625,8 @@ void nfs_idmap_quit(void)
616 nfs_idmap_quit_keyring(); 625 nfs_idmap_quit_keyring();
617} 626}
618 627
619static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, 628static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
629 struct idmap_msg *im,
620 struct rpc_pipe_msg *msg) 630 struct rpc_pipe_msg *msg)
621{ 631{
622 substring_t substr; 632 substring_t substr;
@@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
659 const char *op, 669 const char *op,
660 void *aux) 670 void *aux)
661{ 671{
672 struct idmap_legacy_upcalldata *data;
662 struct rpc_pipe_msg *msg; 673 struct rpc_pipe_msg *msg;
663 struct idmap_msg *im; 674 struct idmap_msg *im;
664 struct idmap *idmap = (struct idmap *)aux; 675 struct idmap *idmap = (struct idmap *)aux;
@@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
666 int ret = -ENOMEM; 677 int ret = -ENOMEM;
667 678
668 /* msg and im are freed in idmap_pipe_destroy_msg */ 679 /* msg and im are freed in idmap_pipe_destroy_msg */
669 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 680 data = kmalloc(sizeof(*data), GFP_KERNEL);
670 if (!msg) 681 if (!data)
671 goto out0;
672
673 im = kmalloc(sizeof(*im), GFP_KERNEL);
674 if (!im)
675 goto out1; 682 goto out1;
676 683
677 ret = nfs_idmap_prepare_message(key->description, im, msg); 684 msg = &data->pipe_msg;
685 im = &data->idmap_msg;
686 data->idmap = idmap;
687
688 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
678 if (ret < 0) 689 if (ret < 0)
679 goto out2; 690 goto out2;
680 691
@@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
683 694
684 ret = rpc_queue_upcall(idmap->idmap_pipe, msg); 695 ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
685 if (ret < 0) 696 if (ret < 0)
686 goto out2; 697 goto out3;
687 698
688 return ret; 699 return ret;
689 700
701out3:
702 idmap->idmap_key_cons = NULL;
690out2: 703out2:
691 kfree(im); 704 kfree(data);
692out1: 705out1:
693 kfree(msg);
694out0:
695 complete_request_key(cons, ret); 706 complete_request_key(cons, ret);
696 return ret; 707 return ret;
697} 708}
@@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
749 } 760 }
750 761
751 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { 762 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
752 ret = mlen; 763 ret = -ENOKEY;
753 complete_request_key(cons, -ENOKEY); 764 goto out;
754 goto out_incomplete;
755 } 765 }
756 766
757 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); 767 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
@@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
768 778
769out: 779out:
770 complete_request_key(cons, ret); 780 complete_request_key(cons, ret);
771out_incomplete:
772 return ret; 781 return ret;
773} 782}
774 783
775static void 784static void
776idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) 785idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
777{ 786{
787 struct idmap_legacy_upcalldata *data = container_of(msg,
788 struct idmap_legacy_upcalldata,
789 pipe_msg);
790 struct idmap *idmap = data->idmap;
791 struct key_construction *cons;
792 if (msg->errno) {
793 cons = ACCESS_ONCE(idmap->idmap_key_cons);
794 idmap->idmap_key_cons = NULL;
795 complete_request_key(cons, msg->errno);
796 }
778 /* Free memory allocated in nfs_idmap_legacy_upcall() */ 797 /* Free memory allocated in nfs_idmap_legacy_upcall() */
779 kfree(msg->data); 798 kfree(data);
780 kfree(msg); 799}
800
801static void
802idmap_release_pipe(struct inode *inode)
803{
804 struct rpc_inode *rpci = RPC_I(inode);
805 struct idmap *idmap = (struct idmap *)rpci->private;
806 idmap->idmap_key_cons = NULL;
781} 807}
782 808
783int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 809int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6e895f0fbf3..9b47610338f5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
155 nfsi->attrtimeo_timestamp = jiffies; 155 nfsi->attrtimeo_timestamp = jiffies;
156 156
157 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); 157 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
160 else 160 else
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0952c791df36..69322096c325 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
69 nfs_fattr_init(info->fattr); 69 nfs_fattr_init(info->fattr);
70 status = rpc_call_sync(client, &msg, 0); 70 status = rpc_call_sync(client, &msg, 0);
71 dprintk("%s: reply fsinfo: %d\n", __func__, status); 71 dprintk("%s: reply fsinfo: %d\n", __func__, status);
72 if (!(info->fattr->valid & NFS_ATTR_FATTR)) { 72 if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
73 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; 73 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
74 msg.rpc_resp = info->fattr; 74 msg.rpc_resp = info->fattr;
75 status = rpc_call_sync(client, &msg, 0); 75 status = rpc_call_sync(client, &msg, 0);
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
643 u64 cookie, struct page **pages, unsigned int count, int plus) 643 u64 cookie, struct page **pages, unsigned int count, int plus)
644{ 644{
645 struct inode *dir = dentry->d_inode; 645 struct inode *dir = dentry->d_inode;
646 __be32 *verf = NFS_COOKIEVERF(dir); 646 __be32 *verf = NFS_I(dir)->cookieverf;
647 struct nfs3_readdirargs arg = { 647 struct nfs3_readdirargs arg = {
648 .fh = NFS_FH(dir), 648 .fh = NFS_FH(dir),
649 .cookie = cookie, 649 .cookie = cookie,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3b950dd81e81..da0618aeeadb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations;
205int nfs_atomic_open(struct inode *, struct dentry *, struct file *, 205int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
206 unsigned, umode_t, int *); 206 unsigned, umode_t, int *);
207 207
208/* super.c */
209extern struct file_system_type nfs4_fs_type;
210
208/* nfs4namespace.c */ 211/* nfs4namespace.c */
209rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 212rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
210struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 213struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index cbcdfaf32505..24eb663f8ed5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
74 return clp; 74 return clp;
75 75
76error: 76error:
77 kfree(clp); 77 nfs_free_client(clp);
78 return ERR_PTR(err); 78 return ERR_PTR(err);
79} 79}
80 80
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index acb65e7887f8..eb5eb8eef4d3 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
96 struct inode *inode = file->f_path.dentry->d_inode; 96 struct inode *inode = file->f_path.dentry->d_inode;
97 97
98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
99 if (ret != 0)
100 goto out;
99 mutex_lock(&inode->i_mutex); 101 mutex_lock(&inode->i_mutex);
100 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 ret = nfs_file_fsync_commit(file, start, end, datasync);
101 if (!ret && !datasync) 103 if (!ret && !datasync)
102 /* application has asked for meta-data sync */ 104 /* application has asked for meta-data sync */
103 ret = pnfs_layoutcommit_inode(inode, true); 105 ret = pnfs_layoutcommit_inode(inode, true);
104 mutex_unlock(&inode->i_mutex); 106 mutex_unlock(&inode->i_mutex);
105 107out:
106 return ret; 108 return ret;
107} 109}
108 110
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a99a8d948721..1e50326d00dd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
3215 dentry->d_parent->d_name.name, 3215 dentry->d_parent->d_name.name,
3216 dentry->d_name.name, 3216 dentry->d_name.name,
3217 (unsigned long long)cookie); 3217 (unsigned long long)cookie);
3218 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 3218 nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
3219 res.pgbase = args.pgbase; 3219 res.pgbase = args.pgbase;
3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); 3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
3221 if (status >= 0) { 3221 if (status >= 0) {
3222 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 3222 memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
3223 status += args.pgbase; 3223 status += args.pgbase;
3224 } 3224 }
3225 3225
@@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); 3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
3654} 3654}
3655 3655
3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that 3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on 3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
3658 * the stack. 3658 * the stack.
3659 */ 3659 */
3660#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) 3660#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
3661 3661
3662static int buf_to_pages_noslab(const void *buf, size_t buflen, 3662static int buf_to_pages_noslab(const void *buf, size_t buflen,
3663 struct page **pages, unsigned int *pgbase) 3663 struct page **pages, unsigned int *pgbase)
@@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
3668 spages = pages; 3668 spages = pages;
3669 3669
3670 do { 3670 do {
3671 len = min_t(size_t, PAGE_CACHE_SIZE, buflen); 3671 len = min_t(size_t, PAGE_SIZE, buflen);
3672 newpage = alloc_page(GFP_KERNEL); 3672 newpage = alloc_page(GFP_KERNEL);
3673 3673
3674 if (newpage == NULL) 3674 if (newpage == NULL)
@@ -3737,9 +3737,10 @@ out:
3737static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) 3737static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
3738{ 3738{
3739 struct nfs4_cached_acl *acl; 3739 struct nfs4_cached_acl *acl;
3740 size_t buflen = sizeof(*acl) + acl_len;
3740 3741
3741 if (pages && acl_len <= PAGE_SIZE) { 3742 if (buflen <= PAGE_SIZE) {
3742 acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); 3743 acl = kmalloc(buflen, GFP_KERNEL);
3743 if (acl == NULL) 3744 if (acl == NULL)
3744 goto out; 3745 goto out;
3745 acl->cached = 1; 3746 acl->cached = 1;
@@ -3781,17 +3782,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3781 .rpc_argp = &args, 3782 .rpc_argp = &args,
3782 .rpc_resp = &res, 3783 .rpc_resp = &res,
3783 }; 3784 };
3784 int ret = -ENOMEM, npages, i; 3785 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3785 size_t acl_len = 0; 3786 int ret = -ENOMEM, i;
3786 3787
3787 npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
3788 /* As long as we're doing a round trip to the server anyway, 3788 /* As long as we're doing a round trip to the server anyway,
3789 * let's be prepared for a page of acl data. */ 3789 * let's be prepared for a page of acl data. */
3790 if (npages == 0) 3790 if (npages == 0)
3791 npages = 1; 3791 npages = 1;
3792 3792 if (npages > ARRAY_SIZE(pages))
3793 /* Add an extra page to handle the bitmap returned */ 3793 return -ERANGE;
3794 npages++;
3795 3794
3796 for (i = 0; i < npages; i++) { 3795 for (i = 0; i < npages; i++) {
3797 pages[i] = alloc_page(GFP_KERNEL); 3796 pages[i] = alloc_page(GFP_KERNEL);
@@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3807 args.acl_len = npages * PAGE_SIZE; 3806 args.acl_len = npages * PAGE_SIZE;
3808 args.acl_pgbase = 0; 3807 args.acl_pgbase = 0;
3809 3808
3810 /* Let decode_getfacl know not to fail if the ACL data is larger than
3811 * the page we send as a guess */
3812 if (buf == NULL)
3813 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3814
3815 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", 3809 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3816 __func__, buf, buflen, npages, args.acl_len); 3810 __func__, buf, buflen, npages, args.acl_len);
3817 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), 3811 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
@@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3819 if (ret) 3813 if (ret)
3820 goto out_free; 3814 goto out_free;
3821 3815
3822 acl_len = res.acl_len - res.acl_data_offset; 3816 /* Handle the case where the passed-in buffer is too short */
3823 if (acl_len > args.acl_len) 3817 if (res.acl_flags & NFS4_ACL_TRUNC) {
3824 nfs4_write_cached_acl(inode, NULL, 0, acl_len); 3818 /* Did the user only issue a request for the acl length? */
3825 else 3819 if (buf == NULL)
3826 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, 3820 goto out_ok;
3827 acl_len);
3828 if (buf) {
3829 ret = -ERANGE; 3821 ret = -ERANGE;
3830 if (acl_len > buflen) 3822 goto out_free;
3831 goto out_free;
3832 _copy_from_pages(buf, pages, res.acl_data_offset,
3833 acl_len);
3834 } 3823 }
3835 ret = acl_len; 3824 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
3825 if (buf)
3826 _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
3827out_ok:
3828 ret = res.acl_len;
3836out_free: 3829out_free:
3837 for (i = 0; i < npages; i++) 3830 for (i = 0; i < npages; i++)
3838 if (pages[i]) 3831 if (pages[i])
@@ -3890,10 +3883,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3890 .rpc_argp = &arg, 3883 .rpc_argp = &arg,
3891 .rpc_resp = &res, 3884 .rpc_resp = &res,
3892 }; 3885 };
3886 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3893 int ret, i; 3887 int ret, i;
3894 3888
3895 if (!nfs4_server_supports_acls(server)) 3889 if (!nfs4_server_supports_acls(server))
3896 return -EOPNOTSUPP; 3890 return -EOPNOTSUPP;
3891 if (npages > ARRAY_SIZE(pages))
3892 return -ERANGE;
3897 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3893 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3898 if (i < 0) 3894 if (i < 0)
3899 return i; 3895 return i;
@@ -6223,11 +6219,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
6223 dprintk("<-- %s\n", __func__); 6219 dprintk("<-- %s\n", __func__);
6224} 6220}
6225 6221
6222static size_t max_response_pages(struct nfs_server *server)
6223{
6224 u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
6225 return nfs_page_array_len(0, max_resp_sz);
6226}
6227
6228static void nfs4_free_pages(struct page **pages, size_t size)
6229{
6230 int i;
6231
6232 if (!pages)
6233 return;
6234
6235 for (i = 0; i < size; i++) {
6236 if (!pages[i])
6237 break;
6238 __free_page(pages[i]);
6239 }
6240 kfree(pages);
6241}
6242
6243static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
6244{
6245 struct page **pages;
6246 int i;
6247
6248 pages = kcalloc(size, sizeof(struct page *), gfp_flags);
6249 if (!pages) {
6250 dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
6251 return NULL;
6252 }
6253
6254 for (i = 0; i < size; i++) {
6255 pages[i] = alloc_page(gfp_flags);
6256 if (!pages[i]) {
6257 dprintk("%s: failed to allocate page\n", __func__);
6258 nfs4_free_pages(pages, size);
6259 return NULL;
6260 }
6261 }
6262
6263 return pages;
6264}
6265
6226static void nfs4_layoutget_release(void *calldata) 6266static void nfs4_layoutget_release(void *calldata)
6227{ 6267{
6228 struct nfs4_layoutget *lgp = calldata; 6268 struct nfs4_layoutget *lgp = calldata;
6269 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
6270 size_t max_pages = max_response_pages(server);
6229 6271
6230 dprintk("--> %s\n", __func__); 6272 dprintk("--> %s\n", __func__);
6273 nfs4_free_pages(lgp->args.layout.pages, max_pages);
6231 put_nfs_open_context(lgp->args.ctx); 6274 put_nfs_open_context(lgp->args.ctx);
6232 kfree(calldata); 6275 kfree(calldata);
6233 dprintk("<-- %s\n", __func__); 6276 dprintk("<-- %s\n", __func__);
@@ -6239,9 +6282,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
6239 .rpc_release = nfs4_layoutget_release, 6282 .rpc_release = nfs4_layoutget_release,
6240}; 6283};
6241 6284
6242int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) 6285void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6243{ 6286{
6244 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 6287 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
6288 size_t max_pages = max_response_pages(server);
6245 struct rpc_task *task; 6289 struct rpc_task *task;
6246 struct rpc_message msg = { 6290 struct rpc_message msg = {
6247 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], 6291 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@@ -6259,12 +6303,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
6259 6303
6260 dprintk("--> %s\n", __func__); 6304 dprintk("--> %s\n", __func__);
6261 6305
6306 lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
6307 if (!lgp->args.layout.pages) {
6308 nfs4_layoutget_release(lgp);
6309 return;
6310 }
6311 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
6312
6262 lgp->res.layoutp = &lgp->args.layout; 6313 lgp->res.layoutp = &lgp->args.layout;
6263 lgp->res.seq_res.sr_slot = NULL; 6314 lgp->res.seq_res.sr_slot = NULL;
6264 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); 6315 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
6265 task = rpc_run_task(&task_setup_data); 6316 task = rpc_run_task(&task_setup_data);
6266 if (IS_ERR(task)) 6317 if (IS_ERR(task))
6267 return PTR_ERR(task); 6318 return;
6268 status = nfs4_wait_for_completion_rpc_task(task); 6319 status = nfs4_wait_for_completion_rpc_task(task);
6269 if (status == 0) 6320 if (status == 0)
6270 status = task->tk_status; 6321 status = task->tk_status;
@@ -6272,7 +6323,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
6272 status = pnfs_layout_process(lgp); 6323 status = pnfs_layout_process(lgp);
6273 rpc_put_task(task); 6324 rpc_put_task(task);
6274 dprintk("<-- %s status=%d\n", __func__, status); 6325 dprintk("<-- %s status=%d\n", __func__, status);
6275 return status; 6326 return;
6276} 6327}
6277 6328
6278static void 6329static void
@@ -6304,12 +6355,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
6304 return; 6355 return;
6305 } 6356 }
6306 spin_lock(&lo->plh_inode->i_lock); 6357 spin_lock(&lo->plh_inode->i_lock);
6307 if (task->tk_status == 0) { 6358 if (task->tk_status == 0 && lrp->res.lrs_present)
6308 if (lrp->res.lrs_present) { 6359 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6309 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6310 } else
6311 BUG_ON(!list_empty(&lo->plh_segs));
6312 }
6313 lo->plh_block_lgets--; 6360 lo->plh_block_lgets--;
6314 spin_unlock(&lo->plh_inode->i_lock); 6361 spin_unlock(&lo->plh_inode->i_lock);
6315 dprintk("<-- %s\n", __func__); 6362 dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 12a31a9dbcdd..bd61221ad2c5 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
23static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 23static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
24 int flags, const char *dev_name, void *raw_data); 24 int flags, const char *dev_name, void *raw_data);
25 25
26static struct file_system_type nfs4_fs_type = {
27 .owner = THIS_MODULE,
28 .name = "nfs4",
29 .mount = nfs_fs_mount,
30 .kill_sb = nfs_kill_super,
31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
32};
33
34static struct file_system_type nfs4_remote_fs_type = { 26static struct file_system_type nfs4_remote_fs_type = {
35 .owner = THIS_MODULE, 27 .owner = THIS_MODULE,
36 .name = "nfs4", 28 .name = "nfs4",
@@ -344,14 +336,8 @@ static int __init init_nfs_v4(void)
344 if (err) 336 if (err)
345 goto out1; 337 goto out1;
346 338
347 err = register_filesystem(&nfs4_fs_type);
348 if (err < 0)
349 goto out2;
350
351 register_nfs_version(&nfs_v4); 339 register_nfs_version(&nfs_v4);
352 return 0; 340 return 0;
353out2:
354 nfs4_unregister_sysctl();
355out1: 341out1:
356 nfs_idmap_quit(); 342 nfs_idmap_quit();
357out: 343out:
@@ -361,7 +347,6 @@ out:
361static void __exit exit_nfs_v4(void) 347static void __exit exit_nfs_v4(void)
362{ 348{
363 unregister_nfs_version(&nfs_v4); 349 unregister_nfs_version(&nfs_v4);
364 unregister_filesystem(&nfs4_fs_type);
365 nfs4_unregister_sysctl(); 350 nfs4_unregister_sysctl();
366 nfs_idmap_quit(); 351 nfs_idmap_quit();
367} 352}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ca13483edd60..8dba6bd48557 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
5045 struct nfs_getaclres *res) 5045 struct nfs_getaclres *res)
5046{ 5046{
5047 unsigned int savep; 5047 unsigned int savep;
5048 __be32 *bm_p;
5049 uint32_t attrlen, 5048 uint32_t attrlen,
5050 bitmap[3] = {0}; 5049 bitmap[3] = {0};
5051 int status; 5050 int status;
5052 size_t page_len = xdr->buf->page_len; 5051 unsigned int pg_offset;
5053 5052
5054 res->acl_len = 0; 5053 res->acl_len = 0;
5055 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 5054 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
5056 goto out; 5055 goto out;
5057 5056
5058 bm_p = xdr->p; 5057 xdr_enter_page(xdr, xdr->buf->page_len);
5059 res->acl_data_offset = be32_to_cpup(bm_p) + 2; 5058
5060 res->acl_data_offset <<= 2; 5059 /* Calculate the offset of the page data */
5061 /* Check if the acl data starts beyond the allocated buffer */ 5060 pg_offset = xdr->buf->head[0].iov_len;
5062 if (res->acl_data_offset > page_len)
5063 return -ERANGE;
5064 5061
5065 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 5062 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
5066 goto out; 5063 goto out;
@@ -5074,23 +5071,16 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
5074 /* The bitmap (xdr len + bitmaps) and the attr xdr len words 5071 /* The bitmap (xdr len + bitmaps) and the attr xdr len words
5075 * are stored with the acl data to handle the problem of 5072 * are stored with the acl data to handle the problem of
5076 * variable length bitmaps.*/ 5073 * variable length bitmaps.*/
5077 xdr->p = bm_p; 5074 res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
5078
5079 /* We ignore &savep and don't do consistency checks on
5080 * the attr length. Let userspace figure it out.... */
5081 attrlen += res->acl_data_offset;
5082 if (attrlen > page_len) {
5083 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
5084 /* getxattr interface called with a NULL buf */
5085 res->acl_len = attrlen;
5086 goto out;
5087 }
5088 dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
5089 attrlen, page_len);
5090 return -EINVAL;
5091 }
5092 xdr_read_pages(xdr, attrlen);
5093 res->acl_len = attrlen; 5075 res->acl_len = attrlen;
5076
5077 /* Check for receive buffer overflow */
5078 if (res->acl_len > (xdr->nwords << 2) ||
5079 res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
5080 res->acl_flags |= NFS4_ACL_TRUNC;
5081 dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
5082 attrlen, xdr->nwords << 2);
5083 }
5094 } else 5084 } else
5095 status = -EOPNOTSUPP; 5085 status = -EOPNOTSUPP;
5096 5086
@@ -6235,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6235 status = decode_open(xdr, res); 6225 status = decode_open(xdr, res);
6236 if (status) 6226 if (status)
6237 goto out; 6227 goto out;
6238 if (decode_getfh(xdr, &res->fh) != 0) 6228 status = decode_getfh(xdr, &res->fh);
6229 if (status)
6239 goto out; 6230 goto out;
6240 decode_getfattr(xdr, res->f_attr, res->server); 6231 decode_getfattr(xdr, res->f_attr, res->server);
6241out: 6232out:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index f50d3e8d6f22..ea6d111b03e9 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
570 return false; 570 return false;
571 571
572 return pgio->pg_count + req->wb_bytes <= 572 return pgio->pg_count + req->wb_bytes <=
573 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; 573 (unsigned long)pgio->pg_layout_private;
574}
575
576void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
577{
578 pnfs_generic_pg_init_read(pgio, req);
579 if (unlikely(pgio->pg_lseg == NULL))
580 return; /* Not pNFS */
581
582 pgio->pg_layout_private = (void *)
583 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
584}
585
586static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
587 unsigned long *stripe_end)
588{
589 u32 stripe_off;
590 unsigned stripe_size;
591
592 if (layout->raid_algorithm == PNFS_OSD_RAID_0)
593 return true;
594
595 stripe_size = layout->stripe_unit *
596 (layout->group_width - layout->parity);
597
598 div_u64_rem(offset, stripe_size, &stripe_off);
599 if (!stripe_off)
600 return true;
601
602 *stripe_end = stripe_size - stripe_off;
603 return false;
604}
605
606void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
607{
608 unsigned long stripe_end = 0;
609
610 pnfs_generic_pg_init_write(pgio, req);
611 if (unlikely(pgio->pg_lseg == NULL))
612 return; /* Not pNFS */
613
614 if (req->wb_offset ||
615 !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
616 &OBJIO_LSEG(pgio->pg_lseg)->layout,
617 &stripe_end)) {
618 pgio->pg_layout_private = (void *)stripe_end;
619 } else {
620 pgio->pg_layout_private = (void *)
621 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
622 }
574} 623}
575 624
576static const struct nfs_pageio_ops objio_pg_read_ops = { 625static const struct nfs_pageio_ops objio_pg_read_ops = {
577 .pg_init = pnfs_generic_pg_init_read, 626 .pg_init = objio_init_read,
578 .pg_test = objio_pg_test, 627 .pg_test = objio_pg_test,
579 .pg_doio = pnfs_generic_pg_readpages, 628 .pg_doio = pnfs_generic_pg_readpages,
580}; 629};
581 630
582static const struct nfs_pageio_ops objio_pg_write_ops = { 631static const struct nfs_pageio_ops objio_pg_write_ops = {
583 .pg_init = pnfs_generic_pg_init_write, 632 .pg_init = objio_init_write,
584 .pg_test = objio_pg_test, 633 .pg_test = objio_pg_test,
585 .pg_doio = pnfs_generic_pg_writepages, 634 .pg_doio = pnfs_generic_pg_writepages,
586}; 635};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 1a6732ed04a4..311a79681e2b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
49 hdr->io_start = req_offset(hdr->req); 49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count; 50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq; 51 hdr->dreq = desc->pg_dreq;
52 hdr->layout_private = desc->pg_layout_private;
52 hdr->release = release; 53 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops; 54 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr) 55 if (hdr->completion_ops->init_hdr)
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
268 desc->pg_error = 0; 269 desc->pg_error = 0;
269 desc->pg_lseg = NULL; 270 desc->pg_lseg = NULL;
270 desc->pg_dreq = NULL; 271 desc->pg_dreq = NULL;
272 desc->pg_layout_private = NULL;
271} 273}
272EXPORT_SYMBOL_GPL(nfs_pageio_init); 274EXPORT_SYMBOL_GPL(nfs_pageio_init);
273 275
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 76875bfcf19c..2e00feacd4be 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
583 struct nfs_server *server = NFS_SERVER(ino); 583 struct nfs_server *server = NFS_SERVER(ino);
584 struct nfs4_layoutget *lgp; 584 struct nfs4_layoutget *lgp;
585 struct pnfs_layout_segment *lseg = NULL; 585 struct pnfs_layout_segment *lseg = NULL;
586 struct page **pages = NULL;
587 int i;
588 u32 max_resp_sz, max_pages;
589 586
590 dprintk("--> %s\n", __func__); 587 dprintk("--> %s\n", __func__);
591 588
@@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
594 if (lgp == NULL) 591 if (lgp == NULL)
595 return NULL; 592 return NULL;
596 593
597 /* allocate pages for xdr post processing */
598 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
599 max_pages = nfs_page_array_len(0, max_resp_sz);
600
601 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
602 if (!pages)
603 goto out_err_free;
604
605 for (i = 0; i < max_pages; i++) {
606 pages[i] = alloc_page(gfp_flags);
607 if (!pages[i])
608 goto out_err_free;
609 }
610
611 lgp->args.minlength = PAGE_CACHE_SIZE; 594 lgp->args.minlength = PAGE_CACHE_SIZE;
612 if (lgp->args.minlength > range->length) 595 if (lgp->args.minlength > range->length)
613 lgp->args.minlength = range->length; 596 lgp->args.minlength = range->length;
@@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
616 lgp->args.type = server->pnfs_curr_ld->id; 599 lgp->args.type = server->pnfs_curr_ld->id;
617 lgp->args.inode = ino; 600 lgp->args.inode = ino;
618 lgp->args.ctx = get_nfs_open_context(ctx); 601 lgp->args.ctx = get_nfs_open_context(ctx);
619 lgp->args.layout.pages = pages;
620 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
621 lgp->lsegpp = &lseg; 602 lgp->lsegpp = &lseg;
622 lgp->gfp_flags = gfp_flags; 603 lgp->gfp_flags = gfp_flags;
623 604
624 /* Synchronously retrieve layout information from server and 605 /* Synchronously retrieve layout information from server and
625 * store in lseg. 606 * store in lseg.
626 */ 607 */
627 nfs4_proc_layoutget(lgp); 608 nfs4_proc_layoutget(lgp, gfp_flags);
628 if (!lseg) { 609 if (!lseg) {
629 /* remember that LAYOUTGET failed and suspend trying */ 610 /* remember that LAYOUTGET failed and suspend trying */
630 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); 611 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
631 } 612 }
632 613
633 /* free xdr pages */
634 for (i = 0; i < max_pages; i++)
635 __free_page(pages[i]);
636 kfree(pages);
637
638 return lseg; 614 return lseg;
639
640out_err_free:
641 /* free any allocated xdr pages, lgp as it's not used */
642 if (pages) {
643 for (i = 0; i < max_pages; i++) {
644 if (!pages[i])
645 break;
646 __free_page(pages[i]);
647 }
648 kfree(pages);
649 }
650 kfree(lgp);
651 return NULL;
652} 615}
653 616
654/* 617/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2c6c80503ba4..745aa1b39e7c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
172 struct pnfs_devicelist *devlist); 172 struct pnfs_devicelist *devlist);
173extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, 173extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
174 struct pnfs_device *dev); 174 struct pnfs_device *dev);
175extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 175extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
176extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); 176extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
177 177
178/* pnfs.c */ 178/* pnfs.c */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ac6a3c55dce4..b8eda700584b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops);
319static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); 319static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
320static int nfs4_validate_mount_data(void *options, 320static int nfs4_validate_mount_data(void *options,
321 struct nfs_parsed_mount_data *args, const char *dev_name); 321 struct nfs_parsed_mount_data *args, const char *dev_name);
322
323struct file_system_type nfs4_fs_type = {
324 .owner = THIS_MODULE,
325 .name = "nfs4",
326 .mount = nfs_fs_mount,
327 .kill_sb = nfs_kill_super,
328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
329};
330EXPORT_SYMBOL_GPL(nfs4_fs_type);
331
332static int __init register_nfs4_fs(void)
333{
334 return register_filesystem(&nfs4_fs_type);
335}
336
337static void unregister_nfs4_fs(void)
338{
339 unregister_filesystem(&nfs4_fs_type);
340}
341#else
342static int __init register_nfs4_fs(void)
343{
344 return 0;
345}
346
347static void unregister_nfs4_fs(void)
348{
349}
322#endif 350#endif
323 351
324static struct shrinker acl_shrinker = { 352static struct shrinker acl_shrinker = {
@@ -337,12 +365,18 @@ int __init register_nfs_fs(void)
337 if (ret < 0) 365 if (ret < 0)
338 goto error_0; 366 goto error_0;
339 367
340 ret = nfs_register_sysctl(); 368 ret = register_nfs4_fs();
341 if (ret < 0) 369 if (ret < 0)
342 goto error_1; 370 goto error_1;
371
372 ret = nfs_register_sysctl();
373 if (ret < 0)
374 goto error_2;
343 register_shrinker(&acl_shrinker); 375 register_shrinker(&acl_shrinker);
344 return 0; 376 return 0;
345 377
378error_2:
379 unregister_nfs4_fs();
346error_1: 380error_1:
347 unregister_filesystem(&nfs_fs_type); 381 unregister_filesystem(&nfs_fs_type);
348error_0: 382error_0:
@@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void)
356{ 390{
357 unregister_shrinker(&acl_shrinker); 391 unregister_shrinker(&acl_shrinker);
358 nfs_unregister_sysctl(); 392 nfs_unregister_sysctl();
393 unregister_nfs4_fs();
359 unregister_filesystem(&nfs_fs_type); 394 unregister_filesystem(&nfs_fs_type);
360} 395}
361 396
@@ -1832,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
1832 1867
1833 memcpy(sap, &data->addr, sizeof(data->addr)); 1868 memcpy(sap, &data->addr, sizeof(data->addr));
1834 args->nfs_server.addrlen = sizeof(data->addr); 1869 args->nfs_server.addrlen = sizeof(data->addr);
1870 args->nfs_server.port = ntohs(data->addr.sin_port);
1835 if (!nfs_verify_server_address(sap)) 1871 if (!nfs_verify_server_address(sap))
1836 goto out_no_address; 1872 goto out_no_address;
1837 1873
@@ -2529,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options,
2529 return -EFAULT; 2565 return -EFAULT;
2530 if (!nfs_verify_server_address(sap)) 2566 if (!nfs_verify_server_address(sap))
2531 goto out_no_address; 2567 goto out_no_address;
2568 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2532 2569
2533 if (data->auth_flavourlen) { 2570 if (data->auth_flavourlen) {
2534 if (data->auth_flavourlen > 1) 2571 if (data->auth_flavourlen > 1)
@@ -2645,4 +2682,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
2645module_param(send_implementation_id, ushort, 0644); 2682module_param(send_implementation_id, ushort, 0644);
2646MODULE_PARM_DESC(send_implementation_id, 2683MODULE_PARM_DESC(send_implementation_id,
2647 "Send implementation ID with NFSv4.1 exchange_id"); 2684 "Send implementation ID with NFSv4.1 exchange_id");
2685MODULE_ALIAS("nfs4");
2686
2648#endif /* CONFIG_NFS_V4 */ 2687#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5829d0ce7cfb..e3b55372726c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void)
1814 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1814 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1815 nfs_wdata_cachep); 1815 nfs_wdata_cachep);
1816 if (nfs_wdata_mempool == NULL) 1816 if (nfs_wdata_mempool == NULL)
1817 return -ENOMEM; 1817 goto out_destroy_write_cache;
1818 1818
1819 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", 1819 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1820 sizeof(struct nfs_commit_data), 1820 sizeof(struct nfs_commit_data),
1821 0, SLAB_HWCACHE_ALIGN, 1821 0, SLAB_HWCACHE_ALIGN,
1822 NULL); 1822 NULL);
1823 if (nfs_cdata_cachep == NULL) 1823 if (nfs_cdata_cachep == NULL)
1824 return -ENOMEM; 1824 goto out_destroy_write_mempool;
1825 1825
1826 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1826 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1827 nfs_wdata_cachep); 1827 nfs_wdata_cachep);
1828 if (nfs_commit_mempool == NULL) 1828 if (nfs_commit_mempool == NULL)
1829 return -ENOMEM; 1829 goto out_destroy_commit_cache;
1830 1830
1831 /* 1831 /*
1832 * NFS congestion size, scale with available memory. 1832 * NFS congestion size, scale with available memory.
@@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void)
1849 nfs_congestion_kb = 256*1024; 1849 nfs_congestion_kb = 256*1024;
1850 1850
1851 return 0; 1851 return 0;
1852
1853out_destroy_commit_cache:
1854 kmem_cache_destroy(nfs_cdata_cachep);
1855out_destroy_write_mempool:
1856 mempool_destroy(nfs_wdata_mempool);
1857out_destroy_write_cache:
1858 kmem_cache_destroy(nfs_wdata_cachep);
1859 return -ENOMEM;
1852} 1860}
1853 1861
1854void nfs_destroy_writepagecache(void) 1862void nfs_destroy_writepagecache(void)
1855{ 1863{
1856 mempool_destroy(nfs_commit_mempool); 1864 mempool_destroy(nfs_commit_mempool);
1865 kmem_cache_destroy(nfs_cdata_cachep);
1857 mempool_destroy(nfs_wdata_mempool); 1866 mempool_destroy(nfs_wdata_mempool);
1858 kmem_cache_destroy(nfs_wdata_cachep); 1867 kmem_cache_destroy(nfs_wdata_cachep);
1859} 1868}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cbaf4f8bb7b7..4c7bd35b1876 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
651 651
652 if (clp->cl_minorversion == 0) { 652 if (clp->cl_minorversion == 0) {
653 if (!clp->cl_cred.cr_principal && 653 if (!clp->cl_cred.cr_principal &&
654 (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 654 (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
655 return -EINVAL; 655 return -EINVAL;
656 args.client_name = clp->cl_cred.cr_principal; 656 args.client_name = clp->cl_cred.cr_principal;
657 args.prognumber = conn->cb_prog, 657 args.prognumber = conn->cb_prog,
658 args.protocol = XPRT_TRANSPORT_TCP; 658 args.protocol = XPRT_TRANSPORT_TCP;
659 args.authflavor = clp->cl_flavor; 659 args.authflavor = clp->cl_cred.cr_flavor;
660 clp->cl_cb_ident = conn->cb_ident; 660 clp->cl_cb_ident = conn->cb_ident;
661 } else { 661 } else {
662 if (!conn->cb_xprt) 662 if (!conn->cb_xprt)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e6173147f982..22bd0a66c356 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -231,7 +231,6 @@ struct nfs4_client {
231 nfs4_verifier cl_verifier; /* generated by client */ 231 nfs4_verifier cl_verifier; /* generated by client */
232 time_t cl_time; /* time of last lease renewal */ 232 time_t cl_time; /* time of last lease renewal */
233 struct sockaddr_storage cl_addr; /* client ipaddress */ 233 struct sockaddr_storage cl_addr; /* client ipaddress */
234 u32 cl_flavor; /* setclientid pseudoflavor */
235 struct svc_cred cl_cred; /* setclientid principal */ 234 struct svc_cred cl_cred; /* setclientid principal */
236 clientid_t cl_clientid; /* generated by server */ 235 clientid_t cl_clientid; /* generated by server */
237 nfs4_verifier cl_confirm; /* generated by server */ 236 nfs4_verifier cl_confirm; /* generated by server */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6522cac6057c..6a10812711c1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -676,17 +676,13 @@ static const struct super_operations nilfs_sops = {
676 .alloc_inode = nilfs_alloc_inode, 676 .alloc_inode = nilfs_alloc_inode,
677 .destroy_inode = nilfs_destroy_inode, 677 .destroy_inode = nilfs_destroy_inode,
678 .dirty_inode = nilfs_dirty_inode, 678 .dirty_inode = nilfs_dirty_inode,
679 /* .write_inode = nilfs_write_inode, */
680 /* .drop_inode = nilfs_drop_inode, */
681 .evict_inode = nilfs_evict_inode, 679 .evict_inode = nilfs_evict_inode,
682 .put_super = nilfs_put_super, 680 .put_super = nilfs_put_super,
683 /* .write_super = nilfs_write_super, */
684 .sync_fs = nilfs_sync_fs, 681 .sync_fs = nilfs_sync_fs,
685 .freeze_fs = nilfs_freeze, 682 .freeze_fs = nilfs_freeze,
686 .unfreeze_fs = nilfs_unfreeze, 683 .unfreeze_fs = nilfs_unfreeze,
687 .statfs = nilfs_statfs, 684 .statfs = nilfs_statfs,
688 .remount_fs = nilfs_remount, 685 .remount_fs = nilfs_remount,
689 /* .umount_begin */
690 .show_options = nilfs_show_options 686 .show_options = nilfs_show_options
691}; 687};
692 688
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 6eee4177807b..be1267a34cea 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -107,8 +107,6 @@ struct the_nilfs {
107 * used for 107 * used for
108 * - loading the latest checkpoint exclusively. 108 * - loading the latest checkpoint exclusively.
109 * - allocating a new full segment. 109 * - allocating a new full segment.
110 * - protecting s_dirt in the super_block struct
111 * (see nilfs_write_super) and the following fields.
112 */ 110 */
113 struct buffer_head *ns_sbh[2]; 111 struct buffer_head *ns_sbh[2];
114 struct nilfs_super_block *ns_sbp[2]; 112 struct nilfs_super_block *ns_sbp[2];
diff --git a/fs/open.c b/fs/open.c
index f3d96e7e7b19..e1f2cdb91a4d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -717,7 +717,7 @@ cleanup_all:
717 * here, so just reset the state. 717 * here, so just reset the state.
718 */ 718 */
719 file_reset_write(f); 719 file_reset_write(f);
720 mnt_drop_write(f->f_path.mnt); 720 __mnt_drop_write(f->f_path.mnt);
721 } 721 }
722 } 722 }
723cleanup_file: 723cleanup_file:
@@ -852,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
852 int lookup_flags = 0; 852 int lookup_flags = 0;
853 int acc_mode; 853 int acc_mode;
854 854
855 if (!(flags & O_CREAT)) 855 if (flags & O_CREAT)
856 mode = 0; 856 op->mode = (mode & S_IALLUGO) | S_IFREG;
857 op->mode = mode; 857 else
858 op->mode = 0;
858 859
859 /* Must never be set by userspace */ 860 /* Must never be set by userspace */
860 flags &= ~FMODE_NONOTIFY; 861 flags &= ~FMODE_NONOTIFY;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 36a29b753c79..c495a3055e2a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1589 goto out; 1589 goto out;
1590 } 1590 }
1591 1591
1592 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1593 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1592 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1594 warn[cnt].w_type = QUOTA_NL_NOWARN; 1593 warn[cnt].w_type = QUOTA_NL_NOWARN;
1595 1594
1595 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1596 spin_lock(&dq_data_lock); 1596 spin_lock(&dq_data_lock);
1597 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1597 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1598 if (!dquots[cnt]) 1598 if (!dquots[cnt])
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 4c0c7d163d15..a98b7740a0fc 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1334 else if (bitmap == 0) 1334 else if (bitmap == 0)
1335 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; 1335 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1336 1336
1337 reiserfs_write_unlock(sb);
1338 bh = sb_bread(sb, block); 1337 bh = sb_bread(sb, block);
1339 reiserfs_write_lock(sb);
1340 if (bh == NULL) 1338 if (bh == NULL)
1341 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1339 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1342 "reading failed", __func__, block); 1340 "reading failed", __func__, block);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6d4268fb6c1..855da58db145 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode)
76 ; 76 ;
77 } 77 }
78 out: 78 out:
79 reiserfs_write_unlock_once(inode->i_sb, depth);
79 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 80 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
80 dquot_drop(inode); 81 dquot_drop(inode);
81 inode->i_blocks = 0; 82 inode->i_blocks = 0;
82 reiserfs_write_unlock_once(inode->i_sb, depth);
83 return; 83 return;
84 84
85no_delete: 85no_delete:
diff --git a/fs/stat.c b/fs/stat.c
index b6ff11825fc8..40780229a032 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(vfs_getattr);
58int vfs_fstat(unsigned int fd, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 int fput_needed; 60 int fput_needed;
61 struct file *f = fget_light(fd, &fput_needed); 61 struct file *f = fget_raw_light(fd, &fput_needed);
62 int error = -EBADF; 62 int error = -EBADF;
63 63
64 if (f) { 64 if (f) {
diff --git a/fs/super.c b/fs/super.c
index b05cf47463d0..0902cfa6a12e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -537,46 +537,6 @@ void drop_super(struct super_block *sb)
537EXPORT_SYMBOL(drop_super); 537EXPORT_SYMBOL(drop_super);
538 538
539/** 539/**
540 * sync_supers - helper for periodic superblock writeback
541 *
542 * Call the write_super method if present on all dirty superblocks in
543 * the system. This is for the periodic writeback used by most older
544 * filesystems. For data integrity superblock writeback use
545 * sync_filesystems() instead.
546 *
547 * Note: check the dirty flag before waiting, so we don't
548 * hold up the sync while mounting a device. (The newly
549 * mounted device won't need syncing.)
550 */
551void sync_supers(void)
552{
553 struct super_block *sb, *p = NULL;
554
555 spin_lock(&sb_lock);
556 list_for_each_entry(sb, &super_blocks, s_list) {
557 if (hlist_unhashed(&sb->s_instances))
558 continue;
559 if (sb->s_op->write_super && sb->s_dirt) {
560 sb->s_count++;
561 spin_unlock(&sb_lock);
562
563 down_read(&sb->s_umount);
564 if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
565 sb->s_op->write_super(sb);
566 up_read(&sb->s_umount);
567
568 spin_lock(&sb_lock);
569 if (p)
570 __put_super(p);
571 p = sb;
572 }
573 }
574 if (p)
575 __put_super(p);
576 spin_unlock(&sb_lock);
577}
578
579/**
580 * iterate_supers - call function for all active superblocks 540 * iterate_supers - call function for all active superblocks
581 * @f: function to call 541 * @f: function to call
582 * @arg: argument to pass to it 542 * @arg: argument to pass to it
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8b8cc4e945f4..760de723dadb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -167,7 +167,7 @@ struct ubifs_global_debug_info {
167#define ubifs_dbg_msg(type, fmt, ...) \ 167#define ubifs_dbg_msg(type, fmt, ...) \
168 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) 168 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
169 169
170#define DBG_KEY_BUF_LEN 32 170#define DBG_KEY_BUF_LEN 48
171#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ 171#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
172 char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ 172 char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
173 pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ 173 pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 35389ca2d267..7bd6e72afd11 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -37,11 +37,11 @@
37 * 37 *
38 * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we 38 * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
39 * implement. However, this is not true for 'ubifs_writepage()', which may be 39 * implement. However, this is not true for 'ubifs_writepage()', which may be
40 * called with @i_mutex unlocked. For example, when pdflush is doing background 40 * called with @i_mutex unlocked. For example, when flusher thread is doing
41 * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" 41 * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
42 * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the 42 * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
43 * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' 43 * in the "sys_write -> alloc_pages -> direct reclaim path". So, in
44 * we are only guaranteed that the page is locked. 44 * 'ubifs_writepage()' we are only guaranteed that the page is locked.
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ce33b2beb151..8640920766ed 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
1749 return 0; 1749 return 0;
1750 1750
1751out_err: 1751out_err:
1752 ubifs_lpt_free(c, 0); 1752 if (wr)
1753 ubifs_lpt_free(c, 1);
1754 if (rd)
1755 ubifs_lpt_free(c, 0);
1753 return err; 1756 return err;
1754} 1757}
1755 1758
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c30d976b4be8..edeec499c048 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
788 788
789corrupted_rescan: 789corrupted_rescan:
790 /* Re-scan the corrupted data with verbose messages */ 790 /* Re-scan the corrupted data with verbose messages */
791 ubifs_err("corruptio %d", ret); 791 ubifs_err("corruption %d", ret);
792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
793corrupted: 793corrupted:
794 ubifs_scanned_corruption(c, lnum, offs, buf); 794 ubifs_scanned_corruption(c, lnum, offs, buf);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eba46d4a7619..94d78fc5d4e0 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c)
1026 c->replaying = 1; 1026 c->replaying = 1;
1027 lnum = c->ltail_lnum = c->lhead_lnum; 1027 lnum = c->ltail_lnum = c->lhead_lnum;
1028 1028
1029 lnum = UBIFS_LOG_LNUM;
1030 do { 1029 do {
1031 err = replay_log_leb(c, lnum, 0, c->sbuf); 1030 err = replay_log_leb(c, lnum, 0, c->sbuf);
1032 if (err == 1) 1031 if (err == 1)
@@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1035 if (err) 1034 if (err)
1036 goto out; 1035 goto out;
1037 lnum = ubifs_next_log_lnum(c, lnum); 1036 lnum = ubifs_next_log_lnum(c, lnum);
1038 } while (lnum != UBIFS_LOG_LNUM); 1037 } while (lnum != c->ltail_lnum);
1039 1038
1040 err = replay_buds(c); 1039 err = replay_buds(c);
1041 if (err) 1040 if (err)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1c766c39c038..71a197f0f93d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -303,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
303 mutex_lock(&ui->ui_mutex); 303 mutex_lock(&ui->ui_mutex);
304 /* 304 /*
305 * Due to races between write-back forced by budgeting 305 * Due to races between write-back forced by budgeting
306 * (see 'sync_some_inodes()') and pdflush write-back, the inode may 306 * (see 'sync_some_inodes()') and background write-back, the inode may
307 * have already been synchronized, do not do this again. This might 307 * have already been synchronized, do not do this again. This might
308 * also happen if it was synchronized in an VFS operation, e.g. 308 * also happen if it was synchronized in an VFS operation, e.g.
309 * 'ubifs_link()'. 309 * 'ubifs_link()'.
@@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c)
1157 * 1157 *
1158 * This function mounts UBIFS file system. Returns zero in case of success and 1158 * This function mounts UBIFS file system. Returns zero in case of success and
1159 * a negative error code in case of failure. 1159 * a negative error code in case of failure.
1160 *
1161 * Note, the function does not de-allocate resources it it fails half way
1162 * through, and the caller has to do this instead.
1163 */ 1160 */
1164static int mount_ubifs(struct ubifs_info *c) 1161static int mount_ubifs(struct ubifs_info *c)
1165{ 1162{
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba3df6e..d1c6093fd3d3 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -39,20 +39,24 @@
39#include "udf_i.h" 39#include "udf_i.h"
40#include "udf_sb.h" 40#include "udf_sb.h"
41 41
42static int udf_adinicb_readpage(struct file *file, struct page *page) 42static void __udf_adinicb_readpage(struct page *page)
43{ 43{
44 struct inode *inode = page->mapping->host; 44 struct inode *inode = page->mapping->host;
45 char *kaddr; 45 char *kaddr;
46 struct udf_inode_info *iinfo = UDF_I(inode); 46 struct udf_inode_info *iinfo = UDF_I(inode);
47 47
48 BUG_ON(!PageLocked(page));
49
50 kaddr = kmap(page); 48 kaddr = kmap(page);
51 memset(kaddr, 0, PAGE_CACHE_SIZE);
52 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); 49 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
50 memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
53 flush_dcache_page(page); 51 flush_dcache_page(page);
54 SetPageUptodate(page); 52 SetPageUptodate(page);
55 kunmap(page); 53 kunmap(page);
54}
55
56static int udf_adinicb_readpage(struct file *file, struct page *page)
57{
58 BUG_ON(!PageLocked(page));
59 __udf_adinicb_readpage(page);
56 unlock_page(page); 60 unlock_page(page);
57 61
58 return 0; 62 return 0;
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
77 return 0; 81 return 0;
78} 82}
79 83
84static int udf_adinicb_write_begin(struct file *file,
85 struct address_space *mapping, loff_t pos,
86 unsigned len, unsigned flags, struct page **pagep,
87 void **fsdata)
88{
89 struct page *page;
90
91 if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
92 return -EIO;
93 page = grab_cache_page_write_begin(mapping, 0, flags);
94 if (!page)
95 return -ENOMEM;
96 *pagep = page;
97
98 if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
99 __udf_adinicb_readpage(page);
100 return 0;
101}
102
80static int udf_adinicb_write_end(struct file *file, 103static int udf_adinicb_write_end(struct file *file,
81 struct address_space *mapping, 104 struct address_space *mapping,
82 loff_t pos, unsigned len, unsigned copied, 105 loff_t pos, unsigned len, unsigned copied,
@@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file,
98const struct address_space_operations udf_adinicb_aops = { 121const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 122 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 123 .writepage = udf_adinicb_writepage,
101 .write_begin = simple_write_begin, 124 .write_begin = udf_adinicb_write_begin,
102 .write_end = udf_adinicb_write_end, 125 .write_end = udf_adinicb_write_end,
103}; 126};
104 127
105static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 128static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad795cd6..aa233469b3c1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1124 if (err) 1124 if (err)
1125 return err; 1125 return err;
1126 down_write(&iinfo->i_data_sem); 1126 down_write(&iinfo->i_data_sem);
1127 } else 1127 } else {
1128 iinfo->i_lenAlloc = newsize; 1128 iinfo->i_lenAlloc = newsize;
1129 goto set_size;
1130 }
1129 } 1131 }
1130 err = udf_extend_file(inode, newsize); 1132 err = udf_extend_file(inode, newsize);
1131 if (err) { 1133 if (err) {
1132 up_write(&iinfo->i_data_sem); 1134 up_write(&iinfo->i_data_sem);
1133 return err; 1135 return err;
1134 } 1136 }
1137set_size:
1135 truncate_setsize(inode, newsize); 1138 truncate_setsize(inode, newsize);
1136 up_write(&iinfo->i_data_sem); 1139 up_write(&iinfo->i_data_sem);
1137 } else { 1140 } else {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index dcbf98722afc..18fc038a438d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1344 udf_err(sb, "error loading logical volume descriptor: " 1344 udf_err(sb, "error loading logical volume descriptor: "
1345 "Partition table too long (%u > %lu)\n", table_len, 1345 "Partition table too long (%u > %lu)\n", table_len,
1346 sb->s_blocksize - sizeof(*lvd)); 1346 sb->s_blocksize - sizeof(*lvd));
1347 ret = 1;
1347 goto out_bh; 1348 goto out_bh;
1348 } 1349 }
1349 1350
@@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1388 UDF_ID_SPARABLE, 1389 UDF_ID_SPARABLE,
1389 strlen(UDF_ID_SPARABLE))) { 1390 strlen(UDF_ID_SPARABLE))) {
1390 if (udf_load_sparable_map(sb, map, 1391 if (udf_load_sparable_map(sb, map,
1391 (struct sparablePartitionMap *)gpm) < 0) 1392 (struct sparablePartitionMap *)gpm) < 0) {
1393 ret = 1;
1392 goto out_bh; 1394 goto out_bh;
1395 }
1393 } else if (!strncmp(upm2->partIdent.ident, 1396 } else if (!strncmp(upm2->partIdent.ident,
1394 UDF_ID_METADATA, 1397 UDF_ID_METADATA,
1395 strlen(UDF_ID_METADATA))) { 1398 strlen(UDF_ID_METADATA))) {
@@ -2000,6 +2003,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2000 if (!silent) 2003 if (!silent)
2001 pr_notice("Rescanning with blocksize %d\n", 2004 pr_notice("Rescanning with blocksize %d\n",
2002 UDF_DEFAULT_BLOCKSIZE); 2005 UDF_DEFAULT_BLOCKSIZE);
2006 brelse(sbi->s_lvid_bh);
2007 sbi->s_lvid_bh = NULL;
2003 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; 2008 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
2004 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 2009 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
2005 } 2010 }
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a17..69cf4fcde03e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
179 * used by the fstrim application. In the end it really doesn't 179 * used by the fstrim application. In the end it really doesn't
180 * matter as trimming blocks is an advisory interface. 180 * matter as trimming blocks is an advisory interface.
181 */ 181 */
182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
184 return -XFS_ERROR(EINVAL);
185
182 start = BTOBB(range.start); 186 start = BTOBB(range.start);
183 end = start + BTOBBT(range.len) - 1; 187 end = start + BTOBBT(range.len) - 1;
184 minlen = BTOBB(max_t(u64, granularity, range.minlen)); 188 minlen = BTOBB(max_t(u64, granularity, range.minlen));
185 189
186 if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
187 return -XFS_ERROR(EINVAL);
188 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) 190 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
189 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; 191 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
190 192
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e5..5aceb3f8ecd6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -962,23 +962,22 @@ xfs_dialloc(
962 if (!pag->pagi_freecount && !okalloc) 962 if (!pag->pagi_freecount && !okalloc)
963 goto nextag; 963 goto nextag;
964 964
965 /*
966 * Then read in the AGI buffer and recheck with the AGI buffer
967 * lock held.
968 */
965 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 969 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
966 if (error) 970 if (error)
967 goto out_error; 971 goto out_error;
968 972
969 /*
970 * Once the AGI has been read in we have to recheck
971 * pagi_freecount with the AGI buffer lock held.
972 */
973 if (pag->pagi_freecount) { 973 if (pag->pagi_freecount) {
974 xfs_perag_put(pag); 974 xfs_perag_put(pag);
975 goto out_alloc; 975 goto out_alloc;
976 } 976 }
977 977
978 if (!okalloc) { 978 if (!okalloc)
979 xfs_trans_brelse(tp, agbp); 979 goto nextag_relse_buffer;
980 goto nextag; 980
981 }
982 981
983 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); 982 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
984 if (error) { 983 if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
1007 return 0; 1006 return 0;
1008 } 1007 }
1009 1008
1009nextag_relse_buffer:
1010 xfs_trans_brelse(tp, agbp);
1010nextag: 1011nextag:
1011 xfs_perag_put(pag); 1012 xfs_perag_put(pag);
1012 if (++agno == mp->m_sb.sb_agcount) 1013 if (++agno == mp->m_sb.sb_agcount)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f1..ca28a4ba4b54 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
857 xfs_buf_t *bp; /* block buffer, result */ 857 xfs_buf_t *bp; /* block buffer, result */
858 xfs_inode_t *ip; /* bitmap or summary inode */ 858 xfs_inode_t *ip; /* bitmap or summary inode */
859 xfs_bmbt_irec_t map; 859 xfs_bmbt_irec_t map;
860 int nmap; 860 int nmap = 1;
861 int error; /* error value */ 861 int error; /* error value */
862 862
863 ip = issum ? mp->m_rsumip : mp->m_rbmip; 863 ip = issum ? mp->m_rsumip : mp->m_rbmip;